+++ /dev/null
-*.wpr
-__pycache__
+++ /dev/null
-from nmigen import Module, Signal, Elaboratable
-from nmigen.lib.coding import Encoder, PriorityEncoder
-
-
-class AddressEncoder(Elaboratable):
- """Address Encoder
-
- The purpose of this module is to take in a vector and
- encode the bits that are one hot into an address. This module
- combines both nmigen's Encoder and PriorityEncoder and will state
- whether the input line has a single bit hot, multiple bits hot,
- or no bits hot. The output line will always have the lowest value
- address output.
-
- Usage:
- The output is valid when either single or multiple match is high.
- Otherwise output is 0.
- """
-
- def __init__(self, width):
- """ Arguments:
- * width: The desired length of the input vector
- """
- # Internal
- self.encoder = Encoder(width)
- self.p_encoder = PriorityEncoder(width)
-
- # Input
- self.i = Signal(width)
-
- # Output
- self.single_match = Signal(1)
- self.multiple_match = Signal(1)
- self.o = Signal(range(width))
-
- def elaborate(self, platform=None):
- m = Module()
-
- # Add internal submodules
- m.submodules.encoder = self.encoder
- m.submodules.p_encoder = self.p_encoder
-
- m.d.comb += [
- self.encoder.i.eq(self.i),
- self.p_encoder.i.eq(self.i)
- ]
-
- # Steps:
- # 1. check if the input vector is non-zero
- # 2. if non-zero, check if single match or multiple match
- # 3. set output line to be lowest value address output
-
- # If the priority encoder recieves an input of 0
- # If n is 1 then the output is not valid
- with m.If(self.p_encoder.n):
- m.d.comb += [
- self.single_match.eq(0),
- self.multiple_match.eq(0),
- self.o.eq(0)
- ]
- # If the priority encoder recieves an input > 0
- with m.Else():
- # Multiple Match if encoder n is invalid
- with m.If(self.encoder.n):
- m.d.comb += [
- self.single_match.eq(0),
- self.multiple_match.eq(1)
- ]
- # Single Match if encoder n is valid
- with m.Else():
- m.d.comb += [
- self.single_match.eq(1),
- self.multiple_match.eq(0)
- ]
- # Always set output based on priority encoder output
- m.d.comb += self.o.eq(self.p_encoder.o)
- return m
+++ /dev/null
-from nmigen import Array, Cat, Module, Signal, Elaboratable
-from nmigen.lib.coding import Decoder
-from nmigen.cli import main # , verilog
-
-from .CamEntry import CamEntry
-from .AddressEncoder import AddressEncoder
-
-
-class Cam(Elaboratable):
- """ Content Addressable Memory (CAM)
-
- The purpose of this module is to quickly look up whether an
- entry exists given a data key.
- This module will search for the given data in all internal entries
- and output whether a single or multiple match was found.
- If an single entry is found the address be returned and single_match
- is set HIGH. If multiple entries are found the lowest address is
- returned and multiple_match is set HIGH. If neither single_match or
- multiple_match are HIGH this implies no match was found. To write
- to the CAM set the address bus to the desired entry and set write_enable
- HIGH. Entry managment should be performed one level above this block
- as lookup is performed within.
-
- Notes:
- The read and write operations take one clock cycle to complete.
- Currently the read_warning line is present for interfacing but
- is not necessary for this design. This module is capable of writing
- in the first cycle, reading on the second, and output the correct
- address on the third.
- """
-
- def __init__(self, data_size, cam_size):
- """ Arguments:
- * data_size: (bits) The bit size of the data
- * cam_size: (number) The number of entries in the CAM
- """
-
- # Internal
- self.cam_size = cam_size
- self.encoder = AddressEncoder(cam_size)
- self.decoder = Decoder(cam_size)
- self.entry_array = Array(CamEntry(data_size) for x in range(cam_size))
-
- # Input
- self.enable = Signal(1)
- self.write_enable = Signal(1)
- self.data_in = Signal(data_size) # The data to be written
- self.data_mask = Signal(data_size) # mask for ternary writes
- # address of CAM Entry to write
- self.address_in = Signal(range(cam_size))
-
- # Output
- self.read_warning = Signal(1) # High when a read interrupts a write
- self.single_match = Signal(1) # High when there is only one match
- self.multiple_match = Signal(1) # High when there at least two matches
- # The lowest address matched
- self.match_address = Signal(range(cam_size))
-
- def elaborate(self, platform=None):
- m = Module()
- # AddressEncoder for match types and output address
- m.submodules.AddressEncoder = self.encoder
- # Decoder is used to select which entry will be written to
- m.submodules.Decoder = self.decoder
- # CamEntry Array Submodules
- # Note these area added anonymously
- entry_array = self.entry_array
- m.submodules += entry_array
-
- # Decoder logic
- m.d.comb += [
- self.decoder.i.eq(self.address_in),
- self.decoder.n.eq(0)
- ]
-
- encoder_vector = []
- with m.If(self.enable):
- # Set the key value for every CamEntry
- for index in range(self.cam_size):
-
- # Write Operation
- with m.If(self.write_enable):
- with m.If(self.decoder.o[index]):
- m.d.comb += entry_array[index].command.eq(2)
- with m.Else():
- m.d.comb += entry_array[index].command.eq(0)
-
- # Read Operation
- with m.Else():
- m.d.comb += entry_array[index].command.eq(1)
-
- # Send data input to all entries
- m.d.comb += entry_array[index].data_in.eq(self.data_in)
- # Send all entry matches to encoder
- ematch = entry_array[index].match
- encoder_vector.append(ematch)
-
- # Give input to and accept output from encoder module
- m.d.comb += [
- self.encoder.i.eq(Cat(*encoder_vector)),
- self.single_match.eq(self.encoder.single_match),
- self.multiple_match.eq(self.encoder.multiple_match),
- self.match_address.eq(self.encoder.o)
- ]
-
- # If the CAM is not enabled set all outputs to 0
- with m.Else():
- m.d.comb += [
- self.read_warning.eq(0),
- self.single_match.eq(0),
- self.multiple_match.eq(0),
- self.match_address.eq(0)
- ]
-
- return m
-
- def ports(self):
- return [self.enable, self.write_enable,
- self.data_in, self.data_mask,
- self.read_warning, self.single_match,
- self.multiple_match, self.match_address]
-
-
-if __name__ == '__main__':
- cam = Cam(4, 4)
- main(cam, ports=cam.ports())
+++ /dev/null
-from nmigen import Module, Signal, Elaboratable
-
-
-class CamEntry(Elaboratable):
- """ Content Addressable Memory (CAM) Entry
-
- The purpose of this module is to represent an entry within a CAM.
- This module when given a read command will compare the given data
- and output whether a match was found or not. When given a write
- command it will write the given data into internal registers.
- """
-
- def __init__(self, data_size):
- """ Arguments:
- * data_size: (bit count) The size of the data
- """
- # Input
- self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset
- self.data_in = Signal(data_size) # Data input when writing
-
- # Output
- self.match = Signal(1) # Result of the internal/input key comparison
- self.data = Signal(data_size)
-
- def elaborate(self, platform=None):
- m = Module()
- with m.Switch(self.command):
- with m.Case("00"):
- m.d.sync += self.match.eq(0)
- with m.Case("01"):
- with m.If(self.data == self.data_in):
- m.d.sync += self.match.eq(1)
- with m.Else():
- m.d.sync += self.match.eq(0)
- with m.Case("10"):
- m.d.sync += [
- self.data.eq(self.data_in),
- self.match.eq(0)
- ]
- with m.Case():
- m.d.sync += [
- self.match.eq(0),
- self.data.eq(0)
- ]
-
- return m
+++ /dev/null
-# SPDX-License-Identifier: LGPL-2.1-or-later
-# See Notices.txt for copyright information
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-from nmigen.cli import verilog, rtlil
-
-
-class LFSRPolynomial(set):
- """ implements a polynomial for use in LFSR
- """
- def __init__(self, exponents=()):
- for e in exponents:
- assert isinstance(e, int), TypeError("%s must be an int" % repr(e))
- assert (e >= 0), ValueError("%d must not be negative" % e)
- set.__init__(self, set(exponents).union({0})) # must contain zero
-
- @property
- def max_exponent(self):
- return max(self) # derived from set, so this returns the max exponent
-
- @property
- def exponents(self):
- exponents = list(self) # get elements of set as a list
- exponents.sort(reverse=True)
- return exponents
-
- def __str__(self):
- expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2)
- retval = map(lambda i: expd[min(i,2)].format(i), self.exponents)
- return " + ".join(retval)
-
- def __repr__(self):
- return "LFSRPolynomial(%s)" % self.exponents
-
-
-# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs # noqa
-LFSR_POLY_2 = LFSRPolynomial([2, 1, 0])
-LFSR_POLY_3 = LFSRPolynomial([3, 2, 0])
-LFSR_POLY_4 = LFSRPolynomial([4, 3, 0])
-LFSR_POLY_5 = LFSRPolynomial([5, 3, 0])
-LFSR_POLY_6 = LFSRPolynomial([6, 5, 0])
-LFSR_POLY_7 = LFSRPolynomial([7, 6, 0])
-LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0])
-LFSR_POLY_9 = LFSRPolynomial([9, 5, 0])
-LFSR_POLY_10 = LFSRPolynomial([10, 7, 0])
-LFSR_POLY_11 = LFSRPolynomial([11, 9, 0])
-LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0])
-LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0])
-LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0])
-LFSR_POLY_15 = LFSRPolynomial([15, 14, 0])
-LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0])
-LFSR_POLY_17 = LFSRPolynomial([17, 14, 0])
-LFSR_POLY_18 = LFSRPolynomial([18, 11, 0])
-LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0])
-LFSR_POLY_20 = LFSRPolynomial([20, 17, 0])
-LFSR_POLY_21 = LFSRPolynomial([21, 19, 0])
-LFSR_POLY_22 = LFSRPolynomial([22, 21, 0])
-LFSR_POLY_23 = LFSRPolynomial([23, 18, 0])
-LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0])
-
-
-class LFSR(LFSRPolynomial, Elaboratable):
- """ implements a Linear Feedback Shift Register
- """
- def __init__(self, polynomial):
- """ Inputs:
- ------
- :polynomial: the polynomial to feedback on. may be a LFSRPolynomial
- instance or an iterable of ints (list/tuple/generator)
- :enable: enable (set LO to disable. NOTE: defaults to HI)
-
- Outputs:
- -------
- :state: the LFSR state. bitwidth is taken from the polynomial
- maximum exponent.
-
- Note: if an LFSRPolynomial is passed in as the input, because
- LFSRPolynomial is derived from set() it's ok:
- LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p)
- """
- LFSRPolynomial.__init__(self, polynomial)
- self.state = Signal(self.max_exponent, reset=1)
- self.enable = Signal(reset=1)
-
- def elaborate(self, platform):
- m = Module()
- # do absolutely nothing if the polynomial is empty (always has a zero)
- if self.max_exponent <= 1:
- return m
-
- # create XOR-bunch, select bits from state based on exponent
- feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain)
- for exponent in self:
- if exponent > 0: # don't have to skip, saves CPU cycles though
- feedback ^= self.state[exponent - 1]
-
- # if enabled, shift-and-feedback
- with m.If(self.enable):
- # shift up lower bits by Cat'ing in a new bit zero (feedback)
- newstate = Cat(feedback, self.state[:-1])
- m.d.sync += self.state.eq(newstate)
-
- return m
-
-
-# example: Poly24
-if __name__ == '__main__':
- p24 = rtlil.convert(LFSR(LFSR_POLY_24))
- with open("lfsr2_p24.il", "w") as f:
- f.write(p24)
+++ /dev/null
-# SPDX-License-Identifier: LGPL-2.1-or-later
-# See Notices.txt for copyright information
-from nmigen import Module
-from typing import Iterable, Optional, Iterator, Any, Union
-from typing_extensions import final
-
-
-@final
-class LFSRPolynomial(set):
- def __init__(self, exponents: Iterable[int] = ()):
- def elements() -> Iterable[int]: ...
- @property
- def exponents(self) -> list[int]: ...
- def __str__(self) -> str: ...
- def __repr__(self) -> str: ...
-
-
-@final
-class LFSR:
- def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ...
- @property
- def width(self) -> int: ...
- def elaborate(self, platform: Any) -> Module: ...
+++ /dev/null
-verilog:
- python3 Cam.py generate -t v > Cam.v
+++ /dev/null
-from nmigen import Cat, Memory, Module, Signal, Elaboratable
-from nmigen.cli import main
-from nmigen.cli import verilog, rtlil
-
-
-class MemorySet(Elaboratable):
- def __init__(self, data_size, tag_size, set_count, active):
- self.active = active
- input_size = tag_size + data_size # Size of the input data
- memory_width = input_size + 1 # The width of the cache memory
- self.active = active
- self.data_size = data_size
- self.tag_size = tag_size
-
- # XXX TODO, use rd-enable and wr-enable?
- self.mem = Memory(width=memory_width, depth=set_count)
- self.r = self.mem.read_port()
- self.w = self.mem.write_port()
-
- # inputs (address)
- self.cset = Signal(range(set_count)) # The set to be checked
- self.tag = Signal(tag_size) # The tag to find
- self.data_i = Signal(data_size) # Incoming data
-
- # outputs
- self.valid = Signal()
- self.data_o = Signal(data_size) # Outgoing data (excludes tag)
-
- def elaborate(self, platform):
- m = Module()
- m.submodules.mem = self.mem
- m.submodules.r = self.r
- m.submodules.w = self.w
-
- # temporaries
- active_bit = Signal()
- tag_valid = Signal()
- data_start = self.active + 1
- data_end = data_start + self.data_size
- tag_start = data_end
- tag_end = tag_start + self.tag_size
-
- # connect the read port address to the set/entry
- read_port = self.r
- m.d.comb += read_port.addr.eq(self.cset)
- # Pull out active bit from data
- data = read_port.data
- m.d.comb += active_bit.eq(data[self.active])
- # Validate given tag vs stored tag
- tag = data[tag_start:tag_end]
- m.d.comb += tag_valid.eq(self.tag == tag)
- # An entry is only valid if the tags match AND
- # is marked as a valid entry
- m.d.comb += self.valid.eq(tag_valid & active_bit)
-
- # output data: TODO, check rd-enable?
- m.d.comb += self.data_o.eq(data[data_start:data_end])
-
- # connect the write port addr to the set/entry (only if write enabled)
- # (which is only done on a match, see SAC.write_entry below)
- write_port = self.w
- with m.If(write_port.en):
- m.d.comb += write_port.addr.eq(self.cset)
- m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag))
-
- return m
+++ /dev/null
-from nmigen import Module, Signal, Elaboratable
-from nmigen.cli import main
-
-from soc.TLB.PteEntry import PteEntry
-
-
-class PermissionValidator(Elaboratable):
- """ The purpose of this Module is to check the Permissions of a given PTE
- against the requested access permissions.
-
- This module will either validate (by setting the valid bit HIGH)
- the request or find a permission fault and invalidate (by setting
- the valid bit LOW) the request
- """
-
- def __init__(self, asid_size, pte_size):
- """ Arguments:
- * asid_size: (bit count) The size of the asid to be processed
- * pte_size: (bit count) The size of the pte to be processed
-
- Return:
- * valid HIGH when permissions are correct
- """
- # Internal
- self.pte_entry = PteEntry(asid_size, pte_size)
-
- # Input
- self.data = Signal(asid_size + pte_size)
- self.xwr = Signal(3) # Execute, Write, Read
- self.super_mode = Signal(1) # Supervisor Mode
- self.super_access = Signal(1) # Supervisor Access
- self.asid = Signal(15) # Address Space IDentifier (ASID)
-
- # Output
- self.valid = Signal(1) # Denotes if the permissions are correct
-
- def elaborate(self, platform=None):
- m = Module()
-
- m.submodules.pte_entry = self.pte_entry
-
- m.d.comb += self.pte_entry.i.eq(self.data)
-
- # Check if the entry is valid
- with m.If(self.pte_entry.v):
- # ASID match or Global Permission
- # Note that the MSB bound is exclusive
- with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g):
- # Check Execute, Write, Read (XWR) Permissions
- with m.If(self.pte_entry.xwr == self.xwr):
- # Supervisor Logic
- with m.If(self.super_mode):
- # Valid if entry is not in user mode or supervisor
- # has Supervisor User Memory (SUM) access via the
- # SUM bit in the sstatus register
- m.d.comb += self.valid.eq((~self.pte_entry.u)
- | self.super_access)
- # User logic
- with m.Else():
- # Valid if the entry is in user mode only
- m.d.comb += self.valid.eq(self.pte_entry.u)
- with m.Else():
- m.d.comb += self.valid.eq(0)
- with m.Else():
- m.d.comb += self.valid.eq(0)
- with m.Else():
- m.d.comb += self.valid.eq(0)
- return m
+++ /dev/null
-from nmigen import Module, Signal, Elaboratable
-from nmigen.cli import main
-
-
-class PteEntry(Elaboratable):
- """ The purpose of this Module is to centralize the parsing of Page
- Table Entries (PTE) into one module to prevent common mistakes
- and duplication of code. The control bits are parsed out for
- ease of use.
-
- This module parses according to the standard PTE given by the
- Volume II: RISC-V Privileged Architectures V1.10 Pg 60.
- The Address Space IDentifier (ASID) is appended to the MSB of the input
- and is parsed out as such.
-
- An valid input Signal would be:
- ASID PTE
- Bits:[78-64][63-0]
-
- The output PTE value will include the control bits.
- """
- def __init__(self, asid_size, pte_size):
- """ Arguments:
- * asid_size: (bit count) The size of the asid to be processed
- * pte_size: (bit count) The size of the pte to be processed
-
- Return:
- * d The Dirty bit from the PTE portion of i
- * a The Accessed bit from the PTE portion of i
- * g The Global bit from the PTE portion of i
- * u The User Mode bit from the PTE portion of i
- * xwr The Execute/Write/Read bit from the PTE portion of i
- * v The Valid bit from the PTE portion of i
- * asid The asid portion of i
- * pte The pte portion of i
- """
- # Internal
- self.asid_start = pte_size
- self.asid_end = pte_size + asid_size
-
- # Input
- self.i = Signal(asid_size + pte_size)
-
- # Output
- self.d = Signal(1) # Dirty bit (From pte)
- self.a = Signal(1) # Accessed bit (From pte)
- self.g = Signal(1) # Global Access (From pte)
- self.u = Signal(1) # User Mode (From pte)
- self.xwr = Signal(3) # Execute Read Write (From pte)
- self.v = Signal(1) # Valid (From pte)
- self.asid = Signal(asid_size) # Associated Address Space IDentifier
- self.pte = Signal(pte_size) # Full Page Table Entry
-
- def elaborate(self, platform=None):
- m = Module()
- # Pull out all control bites from PTE
- m.d.comb += [
- self.d.eq(self.i[7]),
- self.a.eq(self.i[6]),
- self.g.eq(self.i[5]),
- self.u.eq(self.i[4]),
- self.xwr.eq(self.i[1:4]),
- self.v.eq(self.i[0])
- ]
- m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end])
- m.d.comb += self.pte.eq(self.i[0:self.asid_start])
- return m
+++ /dev/null
-"""
-
-Online simulator of 4-way set-associative cache:
-http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/sa4.html
-
-Python simulator of a N-way set-associative cache:
-https://github.com/vaskevich/CacheSim/blob/master/cachesim.py
-"""
-
-from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable
-from nmigen.compat.genlib import fsm
-from nmigen.cli import main
-from nmigen.cli import verilog, rtlil
-
-from .AddressEncoder import AddressEncoder
-from .MemorySet import MemorySet
-
-# TODO: use a LFSR that advances continuously and picking the bottom
-# few bits from it to select which cache line to replace, instead of PLRU
-# http://bugs.libre-riscv.org/show_bug.cgi?id=71
-from .ariane.plru import PLRU
-from .LFSR import LFSR, LFSR_POLY_24
-
-SA_NA = "00" # no action (none)
-SA_RD = "01" # read
-SA_WR = "10" # write
-
-
-class SetAssociativeCache(Elaboratable):
- """ Set Associative Cache Memory
-
- The purpose of this module is to generate a memory cache given the
- constraints passed in. This will create a n-way set associative cache.
- It is expected for the SV TLB that the VMA will provide the set number
- while the ASID provides the tag (still to be decided).
-
- """
-
- def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False):
- """ Arguments
- * tag_size (bits): The bit count of the tag
- * data_size (bits): The bit count of the data to be stored
- * set_count (number): The number of sets/entries in the cache
- * way_count (number): The number of slots a data can be stored
- in one set
- * lfsr: if set, use an LFSR for (pseudo-randomly) selecting
- set/entry to write to. otherwise, use a PLRU
- """
- # Internals
- self.lfsr_mode = lfsr
- self.way_count = way_count # The number of slots in one set
- self.tag_size = tag_size # The bit count of the tag
- self.data_size = data_size # The bit count of the data to be stored
-
- # set up Memory array
- self.mem_array = Array() # memory array
- for i in range(way_count):
- ms = MemorySet(data_size, tag_size, set_count, active=0)
- self.mem_array.append(ms)
-
- # Finds valid entries
- self.encoder = AddressEncoder(way_count)
-
- # setup PLRU or LFSR
- if lfsr:
- # LFSR mode
- self.lfsr = LFSR(LFSR_POLY_24)
- else:
- # PLRU mode
- # One block to handle plru calculations
- self.plru = PLRU(way_count)
- self.plru_array = Array() # PLRU data on each set
- for i in range(set_count):
- name = "plru%d" % i
- self.plru_array.append(Signal(self.plru.TLBSZ, name=name))
-
- # Input
- self.enable = Signal(1) # Whether the cache is enabled
- self.command = Signal(2) # 00=None, 01=Read, 10=Write (see SA_XX)
- self.cset = Signal(range(set_count)) # The set to be checked
- self.tag = Signal(tag_size) # The tag to find
- self.data_i = Signal(data_size) # The input data
-
- # Output
- self.ready = Signal(1) # 0 => Processing 1 => Ready for commands
- self.hit = Signal(1) # Tag matched one way in the given set
- # Tag matched many ways in the given set
- self.multiple_hit = Signal(1)
- self.data_o = Signal(data_size) # The data linked to the matched tag
-
- def check_tags(self, m):
- """ Validate the tags in the selected set. If one and only one
- tag matches set its state to zero and increment all others
- by one. We only advance to next state if a single hit is found.
- """
- # Vector to store way valid results
- # A zero denotes a way is invalid
- valid_vector = []
- # Loop through memory to prep read/write ports and set valid_vector
- for i in range(self.way_count):
- valid_vector.append(self.mem_array[i].valid)
-
- # Pass encoder the valid vector
- m.d.comb += self.encoder.i.eq(Cat(*valid_vector))
-
- # Only one entry should be marked
- # This is due to already verifying the tags
- # matched and the valid bit is high
- with m.If(self.hit):
- m.next = "FINISHED_READ"
- # Pull out data from the read port
- data = self.mem_array[self.encoder.o].data_o
- m.d.comb += self.data_o.eq(data)
- if not self.lfsr_mode:
- self.access_plru(m)
-
- # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k
- with m.Elif(self.multiple_hit):
- # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
- m.d.comb += self.data_o.eq(0)
-
- # No tag matches means no data
- with m.Else():
- # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
- m.d.comb += self.data_o.eq(0)
-
- def access_plru(self, m):
- """ An entry was accessed and the plru tree must now be updated
- """
- # Pull out the set's entry being edited
- plru_entry = self.plru_array[self.cset]
- m.d.comb += [
- # Set the plru data to the current state
- self.plru.plru_tree.eq(plru_entry),
- # Set that the cache was accessed
- self.plru.lu_access_i.eq(1)
- ]
-
- def read(self, m):
- """ Go through the read process of the cache.
- This takes two cycles to complete. First it checks for a valid tag
- and secondly it updates the LRU values.
- """
- with m.FSM() as fsm_read:
- with m.State("READY"):
- m.d.comb += self.ready.eq(0)
- # check_tags will set the state if the conditions are met
- self.check_tags(m)
- with m.State("FINISHED_READ"):
- m.next = "READY"
- m.d.comb += self.ready.eq(1)
- if not self.lfsr_mode:
- plru_tree_o = self.plru.plru_tree_o
- m.d.sync += self.plru_array[self.cset].eq(plru_tree_o)
-
- def write_entry(self, m):
- if not self.lfsr_mode:
- m.d.comb += [ # set cset (mem address) into PLRU
- self.plru.plru_tree.eq(self.plru_array[self.cset]),
- # and connect plru to encoder for write
- self.encoder.i.eq(self.plru.replace_en_o)
- ]
- write_port = self.mem_array[self.encoder.o].w
- else:
- # use the LFSR to generate a random(ish) one of the mem array
- lfsr_output = Signal(range(self.way_count))
- lfsr_random = Signal(range(self.way_count))
- m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits
- # address too big, limit to range of array
- m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count,
- lfsr_output - self.way_count,
- lfsr_output))
- write_port = self.mem_array[lfsr_random].w
-
- # then if there is a match from the encoder, enable the selected write
- with m.If(self.encoder.single_match):
- m.d.comb += write_port.en.eq(1)
-
- def write(self, m):
- """ Go through the write process of the cache.
- This takes two cycles to complete. First it writes the entry,
- and secondly it updates the PLRU (in plru mode)
- """
- with m.FSM() as fsm_write:
- with m.State("READY"):
- m.d.comb += self.ready.eq(0)
- self.write_entry(m)
- m.next = "FINISHED_WRITE"
- with m.State("FINISHED_WRITE"):
- m.d.comb += self.ready.eq(1)
- if not self.lfsr_mode:
- plru_entry = self.plru_array[self.cset]
- m.d.sync += plru_entry.eq(self.plru.plru_tree_o)
- m.next = "READY"
-
- def elaborate(self, platform=None):
- m = Module()
-
- # ----
- # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array
- # ----
-
- m.submodules.AddressEncoder = self.encoder
- if self.lfsr_mode:
- m.submodules.LFSR = self.lfsr
- else:
- m.submodules.PLRU = self.plru
-
- for i, mem in enumerate(self.mem_array):
- setattr(m.submodules, "mem%d" % i, mem)
-
- # ----
- # select mode: PLRU connect to encoder, LFSR do... something
- # ----
-
- if not self.lfsr_mode:
- # Set what entry was hit
- m.d.comb += self.plru.lu_hit.eq(self.encoder.o)
- else:
- # enable LFSR
- m.d.comb += self.lfsr.enable.eq(self.enable)
-
- # ----
- # connect hit/multiple hit to encoder output
- # ----
-
- m.d.comb += [
- self.hit.eq(self.encoder.single_match),
- self.multiple_hit.eq(self.encoder.multiple_match),
- ]
-
- # ----
- # connect incoming data/tag/cset(addr) to mem_array
- # ----
-
- for mem in self.mem_array:
- write_port = mem.w
- m.d.comb += [mem.cset.eq(self.cset),
- mem.tag.eq(self.tag),
- mem.data_i.eq(self.data_i),
- write_port.en.eq(0), # default: disable write
- ]
- # ----
- # Commands: READ/WRITE/TODO
- # ----
-
- with m.If(self.enable):
- with m.Switch(self.command):
- # Search all sets at a particular tag
- with m.Case(SA_RD):
- self.read(m)
- with m.Case(SA_WR):
- self.write(m)
- # Maybe catch multiple tags write here?
- # TODO
- # TODO: invalidate/flush, flush-all?
-
- return m
-
- def ports(self):
- return [self.enable, self.command, self.cset, self.tag, self.data_i,
- self.ready, self.hit, self.multiple_hit, self.data_o]
-
-
-if __name__ == '__main__':
- sac = SetAssociativeCache(4, 8, 4, 6)
- vl = rtlil.convert(sac, ports=sac.ports())
- with open("SetAssociativeCache.il", "w") as f:
- f.write(vl)
-
- sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True)
- vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports())
- with open("SetAssociativeCacheLFSR.il", "w") as f:
- f.write(vl)
+++ /dev/null
-""" TLB Module
-
- The expected form of the data is:
- * Item (Bits)
- * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0)
-"""
-
-from nmigen import Memory, Module, Signal, Cat, Elaboratable
-from nmigen.cli import main
-
-from .PermissionValidator import PermissionValidator
-from .Cam import Cam
-
-
-class TLB(Elaboratable):
- def __init__(self, asid_size, vma_size, pte_size, L1_size):
- """ Arguments
- * asid_size: Address Space IDentifier (ASID) typically 15 bits
- * vma_size: Virtual Memory Address (VMA) typically 36 bits
- * pte_size: Page Table Entry (PTE) typically 64 bits
-
- Notes:
- These arguments should represent the largest possible size
- defined by the MODE settings. See
- Volume II: RISC-V Privileged Architectures V1.10 Page 57
- """
-
- # Internal
- self.state = 0
- # L1 Cache Modules
- self.cam_L1 = Cam(vma_size, L1_size)
- self.mem_L1 = Memory(width=asid_size + pte_size, depth=L1_size)
-
- # Permission Validator
- self.perm_validator = PermissionValidator(asid_size, pte_size)
-
- # Inputs
- self.supermode = Signal(1) # Supervisor Mode
- self.super_access = Signal(1) # Supervisor Access
- # 00=None, 01=Search, 10=Write L1, 11=Write L2
- self.command = Signal(2)
- self.xwr = Signal(3) # Execute, Write, Read
- self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64
- self.address_L1 = Signal(range(L1_size))
- self.asid = Signal(asid_size) # Address Space IDentifier (ASID)
- self.vma = Signal(vma_size) # Virtual Memory Address (VMA)
- self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE)
-
- # Outputs
- self.hit = Signal(1) # Denotes if the VMA had a mapped PTE
- self.perm_valid = Signal(1) # Denotes if the permissions are correct
- self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA
-
- def search(self, m, read_L1, write_L1):
- """ searches the TLB
- """
- m.d.comb += [
- write_L1.en.eq(0),
- self.cam_L1.write_enable.eq(0),
- self.cam_L1.data_in.eq(self.vma)
- ]
- # Match found in L1 CAM
- match_found = Signal(reset_less=True)
- m.d.comb += match_found.eq(self.cam_L1.single_match
- | self.cam_L1.multiple_match)
- with m.If(match_found):
- # Memory shortcut variables
- mem_address = self.cam_L1.match_address
- # Memory Logic
- m.d.comb += read_L1.addr.eq(mem_address)
- # Permission Validator Logic
- m.d.comb += [
- self.hit.eq(1),
- # Set permission validator data to the correct
- # register file data according to CAM match
- # address
- self.perm_validator.data.eq(read_L1.data),
- # Execute, Read, Write
- self.perm_validator.xwr.eq(self.xwr),
- # Supervisor Mode
- self.perm_validator.super_mode.eq(self.supermode),
- # Supverisor Access
- self.perm_validator.super_access.eq(self.super_access),
- # Address Space IDentifier (ASID)
- self.perm_validator.asid.eq(self.asid),
- # Output result of permission validation
- self.perm_valid.eq(self.perm_validator.valid)
- ]
- # Only output PTE if permissions are valid
- with m.If(self.perm_validator.valid):
- # XXX TODO - dummy for now
- reg_data = Signal.like(self.pte_out)
- m.d.comb += [
- self.pte_out.eq(reg_data)
- ]
- with m.Else():
- m.d.comb += [
- self.pte_out.eq(0)
- ]
- # Miss Logic
- with m.Else():
- m.d.comb += [
- self.hit.eq(0),
- self.perm_valid.eq(0),
- self.pte_out.eq(0)
- ]
-
- def write_l1(self, m, read_L1, write_L1):
- """ writes to the L1 cache
- """
- # Memory_L1 Logic
- m.d.comb += [
- write_L1.en.eq(1),
- write_L1.addr.eq(self.address_L1),
- # The Cat places arguments from LSB -> MSB
- write_L1.data.eq(Cat(self.pte_in, self.asid))
- ]
- # CAM_L1 Logic
- m.d.comb += [
- self.cam_L1.write_enable.eq(1),
- self.cam_L1.data_in.eq(self.vma), # data_in is sent to all entries
- # self.cam_L1.address_in.eq(todo) # a CAM entry needs to be selected
-
- ]
-
- def elaborate(self, platform):
- m = Module()
- # Add submodules
- # Submodules for L1 Cache
- m.submodules.cam_L1 = self.cam_L1
- m.submodules.read_L1 = read_L1 = self.mem_L1.read_port()
- m.submodules.write_L1 = write_L1 = self.mem_L1.write_port()
-
- # Permission Validator Submodule
- m.submodules.perm_valididator = self.perm_validator
-
- # When MODE specifies translation
- # TODO add in different bit length handling ie prefix 0s
- tlb_enable = Signal(reset_less=True)
- m.d.comb += tlb_enable.eq(self.mode != 0)
-
- with m.If(tlb_enable):
- m.d.comb += [
- self.cam_L1.enable.eq(1)
- ]
- with m.Switch(self.command):
- # Search
- with m.Case("01"):
- self.search(m, read_L1, write_L1)
-
- # Write L1
- # Expected that the miss will be handled in software
- with m.Case("10"):
- self.write_l1(m, read_L1, write_L1)
-
- # TODO
- # with m.Case("11"):
-
- # When disabled
- with m.Else():
- m.d.comb += [
- self.cam_L1.enable.eq(0),
- # XXX TODO - self.reg_file.enable.eq(0),
- self.hit.eq(0),
- self.perm_valid.eq(0), # XXX TODO, check this
- self.pte_out.eq(0)
- ]
- return m
-
-
-if __name__ == '__main__':
- tlb = TLB(15, 36, 64, 4)
- main(tlb, ports=[tlb.supermode, tlb.super_access, tlb.command,
- tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid,
- tlb.vma, tlb.pte_in,
- tlb.hit, tlb.perm_valid, tlb.pte_out,
- ] + tlb.cam_L1.ports())
+++ /dev/null
-#include <cstdint>
-#include <iostream>
-#include <cmath>
-
-
-#define NWAY 4
-#define NLINE 256
-#define HIT 0
-#define MISS 1
-#define MS 1000
-/*
-Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing
-Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt
-four-way set associative - three bits
- each bit represents one branch point in a binary decision tree; let 1
- represent that the left side has been referenced more recently than the
- right side, and 0 vice-versa
- are all 4 lines valid?
- / \
- yes no, use an invalid line
- |
- |
- |
- bit_0 == 0? state | replace ref to | next state
- / \ ------+-------- -------+-----------
- y n 00x | line_0 line_0 | 11_
- / \ 01x | line_1 line_1 | 10_
- bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
- / \ / \ 1x1 | line_3 line_3 | 0_0
- y n y n
- / \ / \ ('x' means ('_' means unchanged)
- line_0 line_1 line_2 line_3 don't care)
- 8-way set associative - 7 = 1+2+4 bits
-16-way set associative - 15 = 1+2+4+8 bits
-32-way set associative - 31 = 1+2+4+8+16 bits
-64-way set associative - 63 = 1+2+4+8+16+32 bits
-*/
-using namespace std;
-struct AddressField {
- uint64_t wd_idx : 2;//Unused
- uint64_t offset : 4;//Unused
- uint64_t index : 8;//NLINE = 256 = 2^8
- uint64_t tag : 50;
-};
-
-union Address {
- uint32_t* p;
- AddressField fields;
-};
-
-struct Cell {
- bool v;
- uint64_t tag;
-
- Cell() : v(false), tag(0) {}
-
- bool isHit(uint64_t tag) {
- return v && (tag == this->tag);
- }
-
- void fetch(uint32_t* address) {
- Address addr;
- addr.p = address;
- addr.fields.offset = 0;
- addr.fields.wd_idx = 0;
- tag = addr.fields.tag;
- v = true;
- }
-};
-
-ostream& operator<<(ostream & out, const Cell& cell) {
- out << " v:" << cell.v << " tag:" << hex << cell.tag;
- return out;
-}
-
-struct Block {
- Cell cell[NWAY];
- uint32_t state;
- uint64_t *mask;//Mask the state to get accurate value for specified 1 bit.
- uint64_t *value;
- uint64_t *next_value;
-
- Block() : state(0) {
- switch (NWAY) {
- case 4:
- mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101};
- value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101};
- next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000};
- break;
- case 8:
- mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001,
- 0b1010001};
- value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000,
- 0b1010001};
- next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000,
- 0b0000001, 0b0000000};
- break;
- //TODO - more NWAY goes here.
- default:
- std::cout << "Error definition NWAY = " << NWAY << std::endl;
- }
- }
-
- uint32_t *getByTag(uint64_t tag, uint32_t *pway) {
- for (int i = 0; i < NWAY; ++i) {
- if (cell[i].isHit(tag)) {
- *pway = i;
- return pway;
- }
- }
- return NULL;
- }
-
- void setLRU(uint32_t *address) {
- int way = 0;
- uint32_t st = state;
- for (int i = 0; i < NWAY; ++i) {
- if ((state & mask[i]) == value[i]) {
- state ^= mask[i];
- way = i;
- break;
- }
- }
- cell[way].fetch(address);
- cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl;
- }
-
- uint32_t *get(uint32_t *address, uint32_t *pway) {
- Address addr;
- addr.p = address;
- uint32_t *d = getByTag(addr.fields.tag, pway);
- if (d != NULL) {
- return &d[addr.fields.offset];
- }
- return d;
- }
-
- int set(uint32_t *address) {
- uint32_t way = 0;
- uint32_t *p = get(address, &way);
- if (p != NULL) {
- printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state);
- state &= ~mask[way];
- printf("%X --> ", state);
- state |= next_value[way];
- printf("%X\n", state);
- // *p = *address; //skip since address is fake.
- return HIT;
- } else {
- setLRU(address);
- return MISS;
- }
- }
-};
-
-ostream& operator<<(ostream & out, const Block& block) {
- out << "state:" << block.state << " ";
- for (int i = 0; i<NWAY; i++) {
- out << block.cell[i];
- }
- return out;
-}
-
-struct Cache {
- Block block[NLINE];
- uint32_t count[2];
- Cache() { count[HIT] = 0; count[MISS] = 0; }
-
- void access(uint32_t* address) {
- Address addr;
- addr.p = address;
- Block& b = block[addr.fields.index];
- ++count[b.set(address)];
- }
-
-};
-ostream& operator<<(ostream & out, const Cache& cache) {
- out << "\n==Summary==\n\tHit: " << cache.count[HIT] << " Miss: " << cache.count[MISS] << std::endl;
- for (int i = 0; i < NLINE; i++) {
- out << cache.block[i] << endl;
- }
- return out;
-}
-
-Cache cache;
-void multiply(uint32_t* m1, uint32_t* m2, uint32_t* res)
-{
- int x, i, j;
- for (i = 0; i < MS; i++) {
- for (j = 0; j < MS; j++) {
- cache.access(res + i*MS +j);
- for (x = 0; x < MS; x++) {
- cache.access(m1 + i*MS + x);
- cache.access(m2 + x*MS + j);
- cache.access(res + i*MS +j);
- // res[i][j] += m1[i][x] * m2[x][j];
- cache.access(res + i*MS +j);
- }
- }
- }
-}
-
-int main()
-{
- uint32_t* m1 = (uint32_t*) 0xFACE00A000000000LL; // fake virtual address; don’t access it
- uint32_t* m2 = (uint32_t*) 0xFACE00B000000000LL; // fake virtual address; don’t access it
- uint32_t* res = (uint32_t*) 0xFACE00C000000000LL; // fake virtual address; don’t access it
- multiply(m1, m2, res);
- cout << cache << endl;
- return 0;
-}
+++ /dev/null
-from nmigen import Const
-
-INSTR_ADDR_MISALIGNED = Const(0, 64)
-INSTR_ACCESS_FAULT = Const(1, 64)
-ILLEGAL_INSTR = Const(2, 64)
-BREAKPOINT = Const(3, 64)
-LD_ADDR_MISALIGNED = Const(4, 64)
-LD_ACCESS_FAULT = Const(5, 64)
-ST_ADDR_MISALIGNED = Const(6, 64)
-ST_ACCESS_FAULT = Const(7, 64)
-ENV_CALL_UMODE = Const(8, 64) # environment call from user mode
-ENV_CALL_SMODE = Const(9, 64) # environment call from supervisor mode
-ENV_CALL_MMODE = Const(11, 64) # environment call from machine mode
-INSTR_PAGE_FAULT = Const(12, 64) # Instruction page fault
-LOAD_PAGE_FAULT = Const(13, 64) # Load page fault
-STORE_PAGE_FAULT = Const(15, 64) # Store page fault
+++ /dev/null
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-#
-# Author: Florian Zaruba, ETH Zurich
-# Date: 12.11.2017
-# Description: Handles cache misses.
-from nmigen.lib.coding import Encoder, PriorityEncoder
-
-
-# --------------
-# MISS Handler
-# --------------
-import ariane_pkg::*;
-import std_cache_pkg::*;
-
-unsigned NR_PORTS = 3
-
-class MissReq(RecordObject):
- def __init__(self, name=None):
- Record.__init__(self, name)
- self.valid = Signal()
- self.addr = Signal(64)
- self.be = Signal(8)
- self.size = Signal(2)
- self.we = Signal()
- self.wdata = Signal(64)
- bypass = Signal()
-
-class CacheLine:
- def __init__(self):
- self.tag = Signal(DCACHE_TAG_WIDTH) # tag array
- self.data = Signal(DCACHE_LINE_WIDTH) # data array
- self.valid = Signal() # state array
- self.dirty = Signal() # state array
-
-# cache line byte enable
-class CLBE:
- def __init__(self):
- self.tag = Signal(DCACHE_TAG_WIDTH+7)//8) # byte enable into tag array
- self.data = Signal(DCACHE_LINE_WIDTH+7)//8) # byte enable data array
- # bit enable into state array (valid for a pair of dirty/valid bits)
- self.vldrty = Signal(DCACHE_SET_ASSOC)
- } cl_be_t;
-
-
-
- # FSM states
-"""
- enum logic [3:0] {
- IDLE, # 0
- FLUSHING, # 1
- FLUSH, # 2
- WB_CACHELINE_FLUSH, # 3
- FLUSH_REQ_STATUS, # 4
- WB_CACHELINE_MISS, # 5
- WAIT_GNT_SRAM, # 6
- MISS, # 7
- REQ_CACHELINE, # 8
- MISS_REPL, # 9
- SAVE_CACHELINE, # A
- INIT, # B
- AMO_LOAD, # C
- AMO_SAVE_LOAD, # D
- AMO_STORE # E
- } state_d, state_q;
-"""
-
-class MissHandler(Elaboratable):
- def __init__(self, NR_PORTS):
- self.NR_PORTS = NR_PORTS
- self.pwid = pwid = ceil(log(NR_PORTS) / log(2))
- self.flush_i = Signal() # flush request
- self.flush_ack_o = Signal() # acknowledge successful flush
- self.miss_o = Signal()
- self.busy_i = Signal() # dcache is busy with something
-
- # Bypass or miss
- self.miss_req_i = Array(MissReq(name="missreq") for i in range(NR_PORTS))
- # Bypass handling
- self.bypass_gnt_o = Signal(NR_PORTS)
- self.bypass_valid_o = Signal(NR_PORTS)
- self.bypass_data_o = Array(Signal(name="bdata_o", 64) \
- for i in range(NR_PORTS))
-
- # AXI port
- output ariane_axi::req_t axi_bypass_o,
- input ariane_axi::resp_t axi_bypass_i,
-
- # Miss handling (~> cacheline refill)
- self.miss_gnt_o = Signal(NR_PORTS)
- self.active_serving_o = Signal(NR_PORTS)
-
- self.critical_word_o = Signal(64)
- self.critical_word_valid_o = Signal()
- output ariane_axi::req_t axi_data_o,
- input ariane_axi::resp_t axi_data_i,
-
- self.mshr_addr_i = Array(Signal(name="bdata_o", 56) \
- for i in range(NR_PORTS))
- self.mshr_addr_matches_o = Signal(NR_PORTS)
- self.mshr_index_matches_o = Signal(NR_PORTS)
-
- # AMO
- self.amo_req_i = AMOReq()
- self.amo_resp_o = AMOResp()
- # Port to SRAMs, for refill and eviction
- self.req_o = Signal(DCACHE_SET_ASSOC)
- self.addr_o = Signal(DCACHE_INDEX_WIDTH) # address into cache array
- self.data_o = CacheLine()
- self.be_o = CLBE()
- self.data_i = Array(CacheLine() \
- for i in range(DCACHE_SET_ASSOC))
- self.we_o = Signal()
-
- def elaborate(self, platform):
- # Registers
- mshr_t mshr_d, mshr_q;
- logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q;
- logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q;
- # cache line to evict
- cache_line_t evict_cl_d, evict_cl_q;
-
- logic serve_amo_d, serve_amo_q;
- # Request from one FSM
- miss_req_valid = Signal(self.NR_PORTS)
- miss_req_bypass = Signal(self.NR_PORTS)
- miss_req_addr = Array(Signal(name="miss_req_addr", 64) \
- for i in range(NR_PORTS))
- miss_req_wdata = Array(Signal(name="miss_req_wdata", 64) \
- for i in range(NR_PORTS))
- miss_req_we = Signal(self.NR_PORTS)
- miss_req_be = Array(Signal(name="miss_req_be", 8) \
- for i in range(NR_PORTS))
- miss_req_size = Array(Signal(name="miss_req_size", 2) \
- for i in range(NR_PORTS))
-
- # Cache Line Refill <-> AXI
- req_fsm_miss_valid = Signal()
- req_fsm_miss_addr = Signal(64)
- req_fsm_miss_wdata = Signal(DCACHE_LINE_WIDTH)
- req_fsm_miss_we = Signal()
- req_fsm_miss_be = Signal(DCACHE_LINE_WIDTH//8)
- ariane_axi::ad_req_t req_fsm_miss_req;
- req_fsm_miss_size = Signal(2)
-
- gnt_miss_fsm = Signal()
- valid_miss_fsm = Signal()
- nmiss = DCACHE_LINE_WIDTH//64
- data_miss_fsm = Array(Signal(name="data_miss_fsm", 64) \
- for i in range(nmiss))
-
- # Cache Management <-> LFSR
- lfsr_enable = Signal()
- lfsr_oh = Signal(DCACHE_SET_ASSOC)
- lfsr_bin = Signal($clog2(DCACHE_SET_ASSOC-1))
- # AMOs
- ariane_pkg::amo_t amo_op;
- amo_operand_a = Signal(64)
- amo_operand_b = Signal(64)
- amo_result_o = Signal(64)
-
- struct packed {
- logic [63:3] address;
- logic valid;
- } reservation_d, reservation_q;
-
- # ------------------------------
- # Cache Management
- # ------------------------------
- evict_way = Signal(DCACHE_SET_ASSOC)
- valid_way = Signal(DCACHE_SET_ASSOC)
-
- for (i in range(DCACHE_SET_ASSOC):
- comb += evict_way[i].eq(data_i[i].valid & data_i[i].dirty)
- comb += valid_way[i].eq(data_i[i].valid)
-
- # ----------------------
- # Default Assignments
- # ----------------------
- # to AXI refill
- req_fsm_miss_req = ariane_axi::CACHE_LINE_REQ;
- req_fsm_miss_size = Const(0b11, 2)
- # core
- serve_amo_d = serve_amo_q;
- # --------------------------------
- # Flush and Miss operation
- # --------------------------------
- state_d = state_q;
- cnt_d = cnt_q;
- evict_way_d = evict_way_q;
- evict_cl_d = evict_cl_q;
- mshr_d = mshr_q;
- # communicate to the requester which unit we are currently serving
- active_serving_o[mshr_q.id] = mshr_q.valid;
- # AMOs
- # silence the unit when not used
- amo_op = amo_req_i.amo_op;
-
- reservation_d = reservation_q;
- with m.FSM() as state_q:
-
- with m.Case("IDLE"):
- # lowest priority are AMOs, wait until everything else
- # is served before going for the AMOs
- with m.If (amo_req_i.req & ~busy_i):
- # 1. Flush the cache
- with m.If(~serve_amo_q):
- m.next = "FLUSH_REQ_STATUS"
- serve_amo_d.eq(0b1
- cnt_d.eq(0
- # 2. Do the AMO
- with m.Else():
- m.next = "AMO_LOAD"
- serve_amo_d.eq(0b0
-
- # check if we want to flush and can flush
- # e.g.: we are not busy anymore
- # TODO: Check that the busy flag is indeed needed
- with m.If (flush_i & ~busy_i):
- m.next = "FLUSH_REQ_STATUS"
- cnt_d = 0
-
- # check if one of the state machines missed
- for i in range(NR_PORTS):
- # here comes the refill portion of code
- with m.If (miss_req_valid[i] & ~miss_req_bypass[i]):
- m.next = "MISS"
- # we are taking another request so don't
- # take the AMO
- serve_amo_d = 0b0;
- # save to MSHR
- wid = DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH
- comb += [ mshr_d.valid.eq(0b1),
- mshr_d.we.eq(miss_req_we[i]),
- mshr_d.id.eq(i),
- mshr_d.addr.eq(miss_req_addr[i][0:wid]),
- mshr_d.wdata.eq(miss_req_wdata[i]),
- mshr_d.be.eq(miss_req_be[i]),
- ]
- break
-
- # ~> we missed on the cache
- with m.Case("MISS"):
- # 1. Check if there is an empty cache-line
- # 2. If not -> evict one
- comb += req_o.eq(1)
- sync += addr_o.eq(mshr_q.addr[:DCACHE_INDEX_WIDTH]
- m.next = "MISS_REPL"
- comb += miss_o.eq(1)
-
- # ~> second miss cycle
- with m.Case("MISS_REPL"):
- # if all are valid we need to evict one,
- # pseudo random from LFSR
- with m.If(~(~valid_way).bool()):
- comb += lfsr_enable.eq(0b1)
- comb += evict_way_d.eq(lfsr_oh)
- # do we need to write back the cache line?
- with m.If(data_i[lfsr_bin].dirty):
- state_d = WB_CACHELINE_MISS;
- comb += evict_cl_d.tag.eq(data_i[lfsr_bin].tag)
- comb += evict_cl_d.data.eq(data_i[lfsr_bin].data)
- comb += cnt_d.eq(mshr_q.addr[:DCACHE_INDEX_WIDTH])
- # no - we can request a cache line now
- with m.Else():
- m.next = "REQ_CACHELINE"
- # we have at least one free way
- with m.Else():
- # get victim cache-line by looking for the
- # first non-valid bit
- comb += evict_way_d.eq(get_victim_cl(~valid_way)
- m.next = "REQ_CACHELINE"
-
- # ~> we can just load the cache-line,
- # the way is store in evict_way_q
- with m.Case("REQ_CACHELINE"):
- comb += req_fsm_miss_valid .eq(1)
- sync += req_fsm_miss_addr .eq(mshr_q.addr)
-
- with m.If (gnt_miss_fsm):
- m.next = "SAVE_CACHELINE"
- comb += miss_gnt_o[mshr_q.id].eq(1)
-
- # ~> replace the cacheline
- with m.Case("SAVE_CACHELINE"):
- # calculate cacheline offset
- automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
- sync += cl_offset.eq(mshr_q.addr[3:DCACHE_BYTE_OFFSET] << 6)
- # we've got a valid response from refill unit
- with m.If (valid_miss_fsm):
- wid = DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH
- sync += addr_o .eq(mshr_q.addr[:DCACHE_INDEX_WIDTH])
- sync += req_o .eq(evict_way_q)
- comb += we_o .eq(1)
- comb += be_o .eq(1)
- sync += be_o.vldrty .eq(evict_way_q)
- sync += data_o.tag .eq(mshr_q.addr[DCACHE_INDEX_WIDTH:wid]
- comb += data_o.data .eq(data_miss_fsm)
- comb += data_o.valid.eq(1)
- comb += data_o.dirty.eq(0)
-
- # is this a write?
- with m.If (mshr_q.we):
- # Yes, so safe the updated data now
- for i in range(8):
- # check if we really want to write
- # the corresponding byte
- with m.If (mshr_q.be[i]):
- sync += data_o.data[(cl_offset + i*8) +: 8].eq(mshr_q.wdata[i];
- # it's immediately dirty if we write
- comb += data_o.dirty.eq(1)
-
- # reset MSHR
- comb += mshr_d.valid.eq(0)
- # go back to idle
- m.next = 'IDLE'
-
- # ------------------------------
- # Write Back Operation
- # ------------------------------
- # ~> evict a cache line from way saved in evict_way_q
- with m.Case("WB_CACHELINE_FLUSH"):
- with m.Case("WB_CACHELINE_MISS"):
-
- comb += req_fsm_miss_valid .eq(0b1)
- sync += req_fsm_miss_addr .eq({evict_cl_q.tag, cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET], {{DCACHE_BYTE_OFFSET}{0b0}}};
- comb += req_fsm_miss_be .eq(1)
- comb += req_fsm_miss_we .eq(0b1)
- sync += req_fsm_miss_wdata .eq(evict_cl_q.data;
-
- # we've got a grant --> this is timing critical, think about it
- if (gnt_miss_fsm) begin
- # write status array
- sync += addr_o .eq(cnt_q)
- comb += req_o .eq(0b1)
- comb += we_o .eq(0b1)
- comb += data_o.valid.eq(INVALIDATE_ON_FLUSH ? 0b0 : 0b1)
- # invalidate
- sync += be_o.vldrty.eq(evict_way_q)
- # go back to handling the miss or flushing,
- # depending on where we came from
- with m.If(state_q == WB_CACHELINE_MISS):
- m.next = "MISS"
- with m.Else():
- m.next = "FLUSH_REQ_STATUS"
-
- # ------------------------------
- # Flushing & Initialization
- # ------------------------------
- # ~> make another request to check the same
- # cache-line if there are still some valid entries
- with m.Case("FLUSH_REQ_STATUS"):
- comb += req_o .eq(1)
- sync += addr_o .eq(cnt_q)
- m.next = "FLUSHING"
-
- with m.Case("FLUSHING"):
- # this has priority
- # at least one of the cache lines is dirty
- with m.If(~evict_way):
- # evict cache line, look for the first
- # cache-line which is dirty
- comb += evict_way_d.eq(get_victim_cl(evict_way))
- comb += evict_cl_d .eq(data_i[one_hot_to_bin(evict_way)])
- state_d = WB_CACHELINE_FLUSH;
- # not dirty ~> increment and continue
- with m.Else():
- # increment and re-request
- sync += cnt_d.eq(cnt_q + (1 << DCACHE_BYTE_OFFSET))
- m.next = "FLUSH_REQ_STATUS"
- sync += addr_o .eq(cnt_q)
- comb += req_o .eq(1)
- comb += be_o.vldrty.eq(INVALIDATE_ON_FLUSH ? 1 : 0)
- comb += we_o .eq(1)
- # finished with flushing operation, go back to idle
- with m.If (cnt_q[DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH] \
- == DCACHE_NUM_WORDS-1):
- # only acknowledge if the flush wasn't
- # triggered by an atomic
- sync += flush_ack_o.eq(~serve_amo_q)
- m.next = "IDLE"
-
- # ~> only called after reset
- with m.Case("INIT"):
- # initialize status array
- sync += addr_o.eq(cnt_q)
- comb += req_o .eq(1)
- comb += we_o .eq(1)
- # only write the dirty array
- comb += be_o.vldrty.eq(1)
- sync += cnt_d .eq(cnt_q + (1 << DCACHE_BYTE_OFFSET))
- # finished initialization
- with m.If (cnt_q[DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH] \
- == DCACHE_NUM_WORDS-1)
- m.next = "IDLE"
-
- # ----------------------
- # AMOs
- # ----------------------
- # TODO(zarubaf) Move this closer to memory
- # ~> we are here because we need to do the AMO,
- # the cache is clean at this point
- # start by executing the load
- with m.Case("AMO_LOAD"):
- comb += req_fsm_miss_valid.eq(1)
- # address is in operand a
- comb += req_fsm_miss_addr.eq(amo_req_i.operand_a)
- comb += req_fsm_miss_req.eq(ariane_axi::SINGLE_REQ)
- comb += req_fsm_miss_size.eq(amo_req_i.size)
- # the request has been granted
- with m.If(gnt_miss_fsm):
- m.next = "AMO_SAVE_LOAD"
- # save the load value
- with m.Case("AMO_SAVE_LOAD"):
- with m.If (valid_miss_fsm):
- # we are only concerned about the lower 64-bit
- comb += mshr_d.wdata.eq(data_miss_fsm[0])
- m.next = "AMO_STORE"
- # and do the store
- with m.Case("AMO_STORE"):
- load_data = Signal(64)
- # re-align load data
- comb += load_data.eq(data_align(amo_req_i.operand_a[:3],
- mshr_q.wdata))
- # Sign-extend for word operation
- with m.If (amo_req_i.size == 0b10):
- comb += amo_operand_a.eq(sext32(load_data[:32]))
- comb += amo_operand_b.eq(sext32(amo_req_i.operand_b[:32]))
- with m.Else():
- comb += amo_operand_a.eq(load_data)
- comb += amo_operand_b.eq(amo_req_i.operand_b)
-
- # we do not need a store request for load reserved
- # or a failing store conditional
- # we can bail-out without making any further requests
- with m.If ((amo_req_i.amo_op == AMO_LR) | \
- ((amo_req_i.amo_op == AMO_SC) & \
- ((reservation_q.valid & \
- (reservation_q.address != \
- amo_req_i.operand_a[3:64])) | \
- ~reservation_q.valid))):
- comb += req_fsm_miss_valid.eq(0)
- m.next = "IDLE"
- comb += amo_resp_o.ack.eq(1)
- # write-back the result
- comb += amo_resp_o.result.eq(amo_operand_a)
- # we know that the SC failed
- with m.If (amo_req_i.amo_op == AMO_SC):
- comb += amo_resp_o.result.eq(1)
- # also clear the reservation
- comb += reservation_d.valid.eq(0)
- with m.Else():
- comb += req_fsm_miss_valid.eq(1)
-
- comb += req_fsm_miss_we .eq(1)
- comb += req_fsm_miss_req .eq(ariane_axi::SINGLE_REQ)
- comb += req_fsm_miss_size.eq(amo_req_i.size)
- comb += req_fsm_miss_addr.eq(amo_req_i.operand_a)
-
- comb += req_fsm_miss_wdata.eq(
- data_align(amo_req_i.operand_a[0:3], amo_result_o))
- comb += req_fsm_miss_be.eq(
- be_gen(amo_req_i.operand_a[0:3], amo_req_i.size))
-
- # place a reservation on the memory
- with m.If (amo_req_i.amo_op == AMO_LR):
- comb += reservation_d.address.eq(amo_req_i.operand_a[3:64])
- comb += reservation_d.valid.eq(1)
-
- # the request is valid or we didn't need to go for another store
- with m.If (valid_miss_fsm):
- m.next = "IDLE"
- comb += amo_resp_o.ack.eq(1)
- # write-back the result
- comb += amo_resp_o.result.eq(amo_operand_a;
-
- if (amo_req_i.amo_op == AMO_SC) begin
- comb += amo_resp_o.result.eq(0)
- # An SC must fail if there is another SC
- # (to any address) between the LR and the SC in
- # program order (even to the same address).
- # in any case destroy the reservation
- comb += reservation_d.valid.eq(0)
-
- # check MSHR for aliasing
-
- comb += mshr_addr_matches_o .eq(0)
- comb += mshr_index_matches_o.eq()
-
- for i in range(NR_PORTS):
- # check mshr for potential matching of other units,
- # exclude the unit currently being served
- with m.If (mshr_q.valid & \
- (mshr_addr_i[i][DCACHE_BYTE_OFFSET:56] == \
- mshr_q.addr[DCACHE_BYTE_OFFSET:56])):
- comb += mshr_addr_matches_o[i].eq(1)
-
- # same as previous, but checking only the index
- with m.If (mshr_q.valid & \
- (mshr_addr_i[i][DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH] == \
- mshr_q.addr[DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH])):
- mshr_index_matches_o[i].eq(1)
-
- # --------------------
- # Sequential Process
- # --------------------
-
- """
- #pragma translate_off
- `ifndef VERILATOR
- # assert that cache only hits on one way
- assert property (
- @(posedge clk_i) $onehot0(evict_way_q)) else $warning("Evict-way should be one-hot encoded");
- `endif
- #pragma translate_on
- """
-
- # ----------------------
- # Bypass Arbiter
- # ----------------------
- # Connection Arbiter <-> AXI
- req_fsm_bypass_valid = Signal()
- req_fsm_bypass_addr = Signal(64)
- req_fsm_bypass_wdata = Signal(64)
- req_fsm_bypass_we = Signal()
- req_fsm_bypass_be = Signal(8)
- req_fsm_bypass_size = Signal(2)
- gnt_bypass_fsm = Signal()
- valid_bypass_fsm = Signal()
- data_bypass_fsm = Signal(64)
- logic [$clog2(NR_PORTS)-1:0] id_fsm_bypass;
- logic [3:0] id_bypass_fsm;
- logic [3:0] gnt_id_bypass_fsm;
-
- i_bypass_arbiter = ib = AXIArbiter( NR_PORTS, 64)
- comb += [
- # Master Side
- ib.data_req_i .eq( miss_req_valid & miss_req_bypass ),
- ib.address_i .eq( miss_req_addr ),
- ib.data_wdata_i .eq( miss_req_wdata ),
- ib.data_we_i .eq( miss_req_we ),
- ib.data_be_i .eq( miss_req_be ),
- ib.data_size_i .eq( miss_req_size ),
- ib.data_gnt_o .eq( bypass_gnt_o ),
- ib.data_rvalid_o .eq( bypass_valid_o ),
- ib.data_rdata_o .eq( bypass_data_o ),
- # Slave Sid
- ib.id_i .eq( id_bypass_fsm[$clog2(NR_PORTS)-1:0] ),
- ib.id_o .eq( id_fsm_bypass ),
- ib.gnt_id_i .eq( gnt_id_bypass_fsm[$clog2(NR_PORTS)-1:0] ),
- ib.address_o .eq( req_fsm_bypass_addr ),
- ib.data_wdata_o .eq( req_fsm_bypass_wdata ),
- ib.data_req_o .eq( req_fsm_bypass_valid ),
- ib.data_we_o .eq( req_fsm_bypass_we ),
- ib.data_be_o .eq( req_fsm_bypass_be ),
- ib.data_size_o .eq( req_fsm_bypass_size ),
- ib.data_gnt_i .eq( gnt_bypass_fsm ),
- ib.data_rvalid_i .eq( valid_bypass_fsm ),
- ib.data_rdata_i .eq( data_bypass_fsm ),
- ]
-
- axi_adapter #(
- .DATA_WIDTH ( 64 ),
- .AXI_ID_WIDTH ( 4 ),
- .CACHELINE_BYTE_OFFSET ( DCACHE_BYTE_OFFSET )
- ) i_bypass_axi_adapter (
- .clk_i,
- .rst_ni,
- .req_i ( req_fsm_bypass_valid ),
- .type_i ( ariane_axi::SINGLE_REQ ),
- .gnt_o ( gnt_bypass_fsm ),
- .addr_i ( req_fsm_bypass_addr ),
- .we_i ( req_fsm_bypass_we ),
- .wdata_i ( req_fsm_bypass_wdata ),
- .be_i ( req_fsm_bypass_be ),
- .size_i ( req_fsm_bypass_size ),
- .id_i ( Cat(id_fsm_bypass, 0, 0) ),
- .valid_o ( valid_bypass_fsm ),
- .rdata_o ( data_bypass_fsm ),
- .gnt_id_o ( gnt_id_bypass_fsm ),
- .id_o ( id_bypass_fsm ),
- .critical_word_o ( ), # not used for single requests
- .critical_word_valid_o ( ), # not used for single requests
- .axi_req_o ( axi_bypass_o ),
- .axi_resp_i ( axi_bypass_i )
- );
-
- # ----------------------
- # Cache Line AXI Refill
- # ----------------------
- axi_adapter #(
- .DATA_WIDTH ( DCACHE_LINE_WIDTH ),
- .AXI_ID_WIDTH ( 4 ),
- .CACHELINE_BYTE_OFFSET ( DCACHE_BYTE_OFFSET )
- ) i_miss_axi_adapter (
- .clk_i,
- .rst_ni,
- .req_i ( req_fsm_miss_valid ),
- .type_i ( req_fsm_miss_req ),
- .gnt_o ( gnt_miss_fsm ),
- .addr_i ( req_fsm_miss_addr ),
- .we_i ( req_fsm_miss_we ),
- .wdata_i ( req_fsm_miss_wdata ),
- .be_i ( req_fsm_miss_be ),
- .size_i ( req_fsm_miss_size ),
- .id_i ( Const(0b1100, 4) ),
- .gnt_id_o ( ), # open
- .valid_o ( valid_miss_fsm ),
- .rdata_o ( data_miss_fsm ),
- .id_o ( ),
- .critical_word_o,
- .critical_word_valid_o,
- .axi_req_o ( axi_data_o ),
- .axi_resp_i ( axi_data_i )
- );
-
- # -----------------
- # Replacement LFSR
- # -----------------
- lfsr_8bit #(.WIDTH (DCACHE_SET_ASSOC)) i_lfsr (
- .en_i ( lfsr_enable ),
- .refill_way_oh ( lfsr_oh ),
- .refill_way_bin ( lfsr_bin ),
- .*
- );
-
- # -----------------
- # AMO ALU
- # -----------------
- amo_alu i_amo_alu (
- .amo_op_i ( amo_op ),
- .amo_operand_a_i ( amo_operand_a ),
- .amo_operand_b_i ( amo_operand_b ),
- .amo_result_o ( amo_result_o )
- );
-
- # -----------------
- # Struct Split
- # -----------------
-
- for i in range(NR_PORTS):
- miss_req = MissReq()
- comb += miss_req.eq(miss_req_i[i]);
- comb += miss_req_valid [i] .eq(miss_req.valid)
- comb += miss_req_bypass [i] .eq(miss_req.bypass)
- comb += miss_req_addr [i] .eq(miss_req.addr)
- comb += miss_req_wdata [i] .eq(miss_req.wdata)
- comb += miss_req_we [i] .eq(miss_req.we)
- comb += miss_req_be [i] .eq(miss_req.be)
- comb += miss_req_size [i] .eq(miss_req.size)
-
- # --------------
- # AXI Arbiter
- # --------------s
- #
- # Description: Arbitrates access to AXI refill/bypass
- #
-class AXIArbiter:
- def __init__(self, NR_PORTS = 3, DATA_WIDTH = 64):
- self.NR_PORTS = NR_PORTS
- self.DATA_WIDTH = DATA_WIDTH
- self.pwid = pwid = ceil(log(NR_PORTS) / log(2))
- rst_ni = ResetSignal() # Asynchronous reset active low
- # master ports
- self.data_req_i = Signal(NR_PORTS)
- self.address_i = Array(Signal(name="address_i", 64) \
- for i in range(NR_PORTS))
- self.data_wdata_i = Array(Signal(name="data_wdata_i", 64) \
- for i in range(NR_PORTS))
- self.data_we_i = Signal(NR_PORTS)
- self.data_be_i = Array(Signal(name="data_wdata_i", DATA_WIDTH/8) \
- for i in range(NR_PORTS))
- self.data_size_i = Array(Signal(name="data_size_i", 2) \
- for i in range(NR_PORTS))
- self.data_gnt_o = Signal(NR_PORTS)
- self.data_rvalid_o = Signal(NR_PORTS)
- self.data_rdata_o = Array(Signal(name="data_rdata_o", 64) \
- for i in range(NR_PORTS))
-
- # slave port
- self.id_i = Signal(pwid)
- self.id_o = Signal(pwid)
- self.gnt_id_i = Signal(pwid)
- self.data_req_o = Signal()
- self.address_o = Signal(64)
- self.data_wdata_o = Signal(DATA_WIDTH)
- self.data_we_o = Signal()
- self.data_be_o = Signal(DATA_WIDTH/8)
- self.data_size_o = Signal(2)
- self.data_gnt_i = Signal()
- self.data_rvalid_i = Signal()
- self.data_rdata_i = Signal(DATA_WIDTH)
-
- def elaborate(self, platform):
- #enum logic [1:0] { IDLE, REQ, SERVING } state_d, state_q;
-
- class Packet:
- def __init__(self, pwid, DATA_WIDTH):
- self.id = Signal(pwid)
- self.address = Signal(64)
- self.data = Signal(64)
- self.size = Signal(2)
- self.be = Signal(DATA_WIDTH/8)
- self.we = Signal()
-
- request_index = Signal(self.pwid)
- req_q = Packet(self.pwid, self.DATA_WIDTH)
- req_d = Packet(self.pwid, self.DATA_WIDTH)
-
- # request register
- sync += req_q.eq(req_d)
-
- # request port
- comb += self.address_o .eq(req_q.address)
- comb += self.data_wdata_o .eq(req_q.data)
- comb += self.data_be_o .eq(req_q.be)
- comb += self.data_size_o .eq(req_q.size)
- comb += self.data_we_o .eq(req_q.we)
- comb += self.id_o .eq(req_q.id)
- comb += self.data_gnt_o .eq(0)
- # read port
- comb += self.data_rvalid_o .eq(0)
- comb += self.data_rdata_o .eq(0)
- comb += self.data_rdata_o[req_q.id].eq(data_rdata_i)
-
- m.submodules.pp = pp = PriorityEncoder(self.NR_PORTS)
- comb += pp.i.eq(self.data_req_i) # select one request (priority-based)
- comb += request_index.eq(pp.o)
-
- with m.Switch("state") as s:
-
- with m.Case("IDLE"):
- # wait for incoming requests (priority encoder data_req_i)
- with m.If(~pp.n): # one output valid from encoder
- comb += self.data_req_o .eq(self.data_req_i[i])
- comb += self.data_gnt_o[i].eq(self.data_req_i[i])
- # save the request
- comb += req_d.address.eq(self.address_i[i])
- comb += req_d.id.eq(request_index)
- comb += req_d.data.eq(self.data_wdata_i[i])
- comb += req_d.size.eq(self.data_size_i[i])
- comb += req_d.be.eq(self.data_be_i[i])
- comb += req_d.we.eq(self.data_we_i[i])
- m.next = "SERVING"
-
- comb += self.address_o .eq(self.address_i[request_index])
- comb += self.data_wdata_o .eq(self.data_wdata_i[request_index])
- comb += self.data_be_o .eq(self.data_be_i[request_index])
- comb += self.data_size_o .eq(self.data_size_i[request_index])
- comb += self.data_we_o .eq(self.data_we_i[request_index])
- comb += self.id_o .eq(request_index)
-
- with m.Case("SERVING"):
- comb += self.data_req_o.eq(1)
- with m.If (self.data_rvalid_i):
- comb += self.data_rvalid_o[req_q.id].eq(1)
- m.next = "IDLE"
-
- # ------------
- # Assertions
- # ------------
-
- """
-#pragma translate_off
-`ifndef VERILATOR
-# make sure that we eventually get an rvalid after we received a grant
-assert property (@(posedge clk_i) data_gnt_i |-> ##[1:$] data_rvalid_i )
- else begin $error("There was a grant without a rvalid"); $stop(); end
-# assert that there is no grant without a request
-assert property (@(negedge clk_i) data_gnt_i |-> data_req_o)
- else begin $error("There was a grant without a request."); $stop(); end
-# assert that the address does not contain X when request is sent
-assert property ( @(posedge clk_i) (data_req_o) |-> (!$isunknown(address_o)) )
- else begin $error("address contains X when request is set"); $stop(); end
-
-`endif
-#pragma translate_on
- """
-
+++ /dev/null
-"""
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-#
-# Author: Florian Zaruba, ETH Zurich
-# Date: 19/04/2017
-# Description: Memory Management Unit for Ariane, contains TLB and
-# address translation unit. SV48 as defined in
-# Volume II: RISC-V Privileged Architectures V1.10 Page 63
-
-import ariane_pkg::*;
-"""
-
-from nmigen import Const, Signal, Cat, Module, Mux
-from nmigen.cli import verilog, rtlil
-
-from ptw import DCacheReqI, DCacheReqO, TLBUpdate, PTE, PTW
-from tlb import TLB
-from exceptcause import (INSTR_ACCESS_FAULT, INSTR_PAGE_FAULT,
- LOAD_PAGE_FAULT, STORE_PAGE_FAULT)
-
-PRIV_LVL_M = Const(0b11, 2)
-PRIV_LVL_S = Const(0b01, 2)
-PRIV_LVL_U = Const(0b00, 2)
-
-
-class RVException:
- def __init__(self):
- self.cause = Signal(64) # cause of exception
- self.tval = Signal(64) # more info of causing exception
- # (e.g.: instruction causing it),
- # address of LD/ST fault
- self.valid = Signal()
-
- def eq(self, inp):
- res = []
- for (o, i) in zip(self.ports(), inp.ports()):
- res.append(o.eq(i))
- return res
-
- def __iter__(self):
- yield self.cause
- yield self.tval
- yield self.valid
-
- def ports(self):
- return list(self)
-
-
-class ICacheReqI:
- def __init__(self):
- self.fetch_valid = Signal() # address translation valid
- self.fetch_paddr = Signal(64) # physical address in
- self.fetch_exception = RVException() # exception occurred during fetch
-
- def __iter__(self):
- yield self.fetch_valid
- yield self.fetch_paddr
- yield from self.fetch_exception
-
- def ports(self):
- return list(self)
-
-
-class ICacheReqO:
- def __init__(self):
- self.fetch_req = Signal() # address translation request
- self.fetch_vaddr = Signal(64) # virtual address out
-
- def __iter__(self):
- yield self.fetch_req
- yield self.fetch_vaddr
-
- def ports(self):
- return list(self)
-
-
-class MMU:
- def __init__(self, instr_tlb_entries = 4,
- data_tlb_entries = 4,
- asid_width = 1):
- self.instr_tlb_entries = instr_tlb_entries
- self.data_tlb_entries = data_tlb_entries
- self.asid_width = asid_width
-
- self.flush_i = Signal()
- self.enable_translation_i = Signal()
- self.en_ld_st_translation_i = Signal() # enable VM translation for LD/ST
- # IF interface
- self.icache_areq_i = ICacheReqO()
- self.icache_areq_o = ICacheReqI()
- # LSU interface
- # this is a more minimalistic interface because the actual addressing
- # logic is handled in the LSU as we distinguish load and stores,
- # what we do here is simple address translation
- self.misaligned_ex_i = RVException()
- self.lsu_req_i = Signal() # request address translation
- self.lsu_vaddr_i = Signal(64) # virtual address in
- self.lsu_is_store_i = Signal() # the translation is requested by a store
- # if we need to walk the page table we can't grant in the same cycle
-
- # Cycle 0
- self.lsu_dtlb_hit_o = Signal() # sent in the same cycle as the request
- # if translation hits in the DTLB
- # Cycle 1
- self.lsu_valid_o = Signal() # translation is valid
- self.lsu_paddr_o = Signal(64) # translated address
- self.lsu_exception_o = RVException() # addr translate threw exception
-
- # General control signals
- self.priv_lvl_i = Signal(2)
- self.ld_st_priv_lvl_i = Signal(2)
- self.sum_i = Signal()
- self.mxr_i = Signal()
- # input logic flag_mprv_i,
- self.satp_ppn_i = Signal(44)
- self.asid_i = Signal(self.asid_width)
- self.flush_tlb_i = Signal()
- # Performance counters
- self.itlb_miss_o = Signal()
- self.dtlb_miss_o = Signal()
- # PTW memory interface
- self.req_port_i = DCacheReqO()
- self.req_port_o = DCacheReqI()
-
- def elaborate(self, platform):
- m = Module()
-
- iaccess_err = Signal() # insufficient priv to access instr page
- daccess_err = Signal() # insufficient priv to access data page
- ptw_active = Signal() # PTW is currently walking a page table
- walking_instr = Signal() # PTW is walking because of an ITLB miss
- ptw_error = Signal() # PTW threw an exception
-
- update_vaddr = Signal(48) # guessed
- uaddr64 = Cat(update_vaddr, Const(0, 25)) # extend to 64bit with zeros
- update_ptw_itlb = TLBUpdate(self.asid_width)
- update_ptw_dtlb = TLBUpdate(self.asid_width)
-
- itlb_lu_access = Signal()
- itlb_content = PTE()
- itlb_is_2M = Signal()
- itlb_is_1G = Signal()
- itlb_is_512G = Signal()
- itlb_lu_hit = Signal()
-
- dtlb_lu_access = Signal()
- dtlb_content = PTE()
- dtlb_is_2M = Signal()
- dtlb_is_1G = Signal()
- dtlb_is_512G = Signal()
- dtlb_lu_hit = Signal()
-
- # Assignments
- m.d.comb += [itlb_lu_access.eq(self.icache_areq_i.fetch_req),
- dtlb_lu_access.eq(self.lsu_req_i)
- ]
-
- # ITLB
- m.submodules.i_tlb = i_tlb = TLB(self.instr_tlb_entries,
- self.asid_width)
- m.d.comb += [i_tlb.flush_i.eq(self.flush_tlb_i),
- i_tlb.update_i.eq(update_ptw_itlb),
- i_tlb.lu_access_i.eq(itlb_lu_access),
- i_tlb.lu_asid_i.eq(self.asid_i),
- i_tlb.lu_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
- itlb_content.eq(i_tlb.lu_content_o),
- itlb_is_2M.eq(i_tlb.lu_is_2M_o),
- itlb_is_1G.eq(i_tlb.lu_is_1G_o),
- itlb_is_512G.eq(i_tlb.lu_is_512G_o),
- itlb_lu_hit.eq(i_tlb.lu_hit_o),
- ]
-
- # DTLB
- m.submodules.d_tlb = d_tlb = TLB(self.data_tlb_entries,
- self.asid_width)
- m.d.comb += [d_tlb.flush_i.eq(self.flush_tlb_i),
- d_tlb.update_i.eq(update_ptw_dtlb),
- d_tlb.lu_access_i.eq(dtlb_lu_access),
- d_tlb.lu_asid_i.eq(self.asid_i),
- d_tlb.lu_vaddr_i.eq(self.lsu_vaddr_i),
- dtlb_content.eq(d_tlb.lu_content_o),
- dtlb_is_2M.eq(d_tlb.lu_is_2M_o),
- dtlb_is_1G.eq(d_tlb.lu_is_1G_o),
- dtlb_is_512G.eq(d_tlb.lu_is_512G_o),
- dtlb_lu_hit.eq(d_tlb.lu_hit_o),
- ]
-
- # PTW
- m.submodules.ptw = ptw = PTW(self.asid_width)
- m.d.comb += [ptw_active.eq(ptw.ptw_active_o),
- walking_instr.eq(ptw.walking_instr_o),
- ptw_error.eq(ptw.ptw_error_o),
- ptw.enable_translation_i.eq(self.enable_translation_i),
-
- update_vaddr.eq(ptw.update_vaddr_o),
- update_ptw_itlb.eq(ptw.itlb_update_o),
- update_ptw_dtlb.eq(ptw.dtlb_update_o),
-
- ptw.itlb_access_i.eq(itlb_lu_access),
- ptw.itlb_hit_i.eq(itlb_lu_hit),
- ptw.itlb_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
-
- ptw.dtlb_access_i.eq(dtlb_lu_access),
- ptw.dtlb_hit_i.eq(dtlb_lu_hit),
- ptw.dtlb_vaddr_i.eq(self.lsu_vaddr_i),
-
- ptw.req_port_i.eq(self.req_port_i),
- self.req_port_o.eq(ptw.req_port_o),
- ]
-
- # ila_1 i_ila_1 (
- # .clk(clk_i), # input wire clk
- # .probe0({req_port_o.address_tag, req_port_o.address_index}),
- # .probe1(req_port_o.data_req), # input wire [63:0] probe1
- # .probe2(req_port_i.data_gnt), # input wire [0:0] probe2
- # .probe3(req_port_i.data_rdata), # input wire [0:0] probe3
- # .probe4(req_port_i.data_rvalid), # input wire [0:0] probe4
- # .probe5(ptw_error), # input wire [1:0] probe5
- # .probe6(update_vaddr), # input wire [0:0] probe6
- # .probe7(update_ptw_itlb.valid), # input wire [0:0] probe7
- # .probe8(update_ptw_dtlb.valid), # input wire [0:0] probe8
- # .probe9(dtlb_lu_access), # input wire [0:0] probe9
- # .probe10(lsu_vaddr_i), # input wire [0:0] probe10
- # .probe11(dtlb_lu_hit), # input wire [0:0] probe11
- # .probe12(itlb_lu_access), # input wire [0:0] probe12
- # .probe13(icache_areq_i.fetch_vaddr), # input wire [0:0] probe13
- # .probe14(itlb_lu_hit) # input wire [0:0] probe13
- # );
-
- #-----------------------
- # Instruction Interface
- #-----------------------
- # The instruction interface is a simple request response interface
-
- # MMU disabled: just pass through
- m.d.comb += [self.icache_areq_o.fetch_valid.eq(
- self.icache_areq_i.fetch_req),
- # play through in case we disabled address translation
- self.icache_areq_o.fetch_paddr.eq(
- self.icache_areq_i.fetch_vaddr)
- ]
- # two potential exception sources:
- # 1. HPTW threw an exception -> signal with a page fault exception
- # 2. We got an access error because of insufficient permissions ->
- # throw an access exception
- m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0)
- # Check whether we are allowed to access this memory region
- # from a fetch perspective
-
- # PLATEN TODO: use PermissionValidator instead [we like modules]
- m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \
- (((self.priv_lvl_i == PRIV_LVL_U) & \
- ~itlb_content.u) | \
- ((self.priv_lvl_i == PRIV_LVL_S) & \
- itlb_content.u)))
-
- # MMU enabled: address from TLB, request delayed until hit.
- # Error when TLB hit and no access right or TLB hit and
- # translated address not valid (e.g. AXI decode error),
- # or when PTW performs walk due to ITLB miss and raises
- # an error.
- with m.If (self.enable_translation_i):
- # we work with SV48, so if VM is enabled, check that
- # all bits [47:38] are equal
- with m.If (self.icache_areq_i.fetch_req & \
- ~(((~self.icache_areq_i.fetch_vaddr[47:64]) == 0) | \
- (self.icache_areq_i.fetch_vaddr[47:64]) == 0)):
- fe = self.icache_areq_o.fetch_exception
- m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
- fe.tval.eq(self.icache_areq_i.fetch_vaddr),
- fe.valid.eq(1)
- ]
-
- m.d.comb += self.icache_areq_o.fetch_valid.eq(0)
-
- # 4K page
- paddr = Signal.like(self.icache_areq_o.fetch_paddr)
- paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12],
- itlb_content.ppn)
- m.d.comb += paddr.eq(paddr4k)
- # Mega page
- with m.If(itlb_is_2M):
- m.d.comb += paddr[12:21].eq(
- self.icache_areq_i.fetch_vaddr[12:21])
- # Giga page
- with m.If(itlb_is_1G):
- m.d.comb += paddr[12:30].eq(
- self.icache_areq_i.fetch_vaddr[12:30])
- m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr)
- # Tera page
- with m.If(itlb_is_512G):
- m.d.comb += paddr[12:39].eq(
- self.icache_areq_i.fetch_vaddr[12:39])
- m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr)
-
- # ---------
- # ITLB Hit
- # --------
- # if we hit the ITLB output the request signal immediately
- with m.If(itlb_lu_hit):
- m.d.comb += self.icache_areq_o.fetch_valid.eq(
- self.icache_areq_i.fetch_req)
- # we got an access error
- with m.If (iaccess_err):
- # throw a page fault
- fe = self.icache_areq_o.fetch_exception
- m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
- fe.tval.eq(self.icache_areq_i.fetch_vaddr),
- fe.valid.eq(1)
- ]
- # ---------
- # ITLB Miss
- # ---------
- # watch out for exceptions happening during walking the page table
- with m.Elif(ptw_active & walking_instr):
- m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error)
- fe = self.icache_areq_o.fetch_exception
- m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT),
- fe.tval.eq(uaddr64),
- fe.valid.eq(1)
- ]
-
- #-----------------------
- # Data Interface
- #-----------------------
-
- lsu_vaddr = Signal(64)
- dtlb_pte = PTE()
- misaligned_ex = RVException()
- lsu_req = Signal()
- lsu_is_store = Signal()
- dtlb_hit = Signal()
- #dtlb_is_2M = Signal()
- #dtlb_is_1G = Signal()
- #dtlb_is_512 = Signal()
-
- # check if we need to do translation or if we are always
- # ready (e.g.: we are not translating anything)
- m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i,
- dtlb_lu_hit, 1))
-
- # The data interface is simpler and only consists of a
- # request/response interface
- m.d.comb += [
- # save request and DTLB response
- lsu_vaddr.eq(self.lsu_vaddr_i),
- lsu_req.eq(self.lsu_req_i),
- misaligned_ex.eq(self.misaligned_ex_i),
- dtlb_pte.eq(dtlb_content),
- dtlb_hit.eq(dtlb_lu_hit),
- lsu_is_store.eq(self.lsu_is_store_i),
- #dtlb_is_2M.eq(dtlb_is_2M),
- #dtlb_is_1G.eq(dtlb_is_1G),
- ##dtlb_is_512.eq(self.dtlb_is_512G) #????
- ]
- m.d.sync += [
- self.lsu_paddr_o.eq(lsu_vaddr),
- self.lsu_valid_o.eq(lsu_req),
- self.lsu_exception_o.eq(misaligned_ex),
- ]
-
- sverr = Signal()
- usrerr = Signal()
-
- m.d.comb += [
- # mute misaligned exceptions if there is no request
- # otherwise they will throw accidental exceptions
- misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i),
-
- # SUM is not set and we are trying to access a user
- # page in supervisor mode
- sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \
- dtlb_pte.u),
- # this is not a user page but we are in user mode and
- # trying to access it
- usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u),
-
- # Check if the User flag is set, then we may only
- # access it in supervisor mode if SUM is enabled
- daccess_err.eq(sverr | usrerr),
- ]
-
- # translation is enabled and no misaligned exception occurred
- with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid):
- m.d.comb += lsu_req.eq(0)
- # 4K page
- paddr = Signal.like(lsu_vaddr)
- paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn)
- m.d.comb += paddr.eq(paddr4k)
- # Mega page
- with m.If(dtlb_is_2M):
- m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21])
- # Giga page
- with m.If(dtlb_is_1G):
- m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30])
- m.d.sync += self.lsu_paddr_o.eq(paddr)
- # TODO platen tera_page
-
- # ---------
- # DTLB Hit
- # --------
- with m.If(dtlb_hit & lsu_req):
- m.d.comb += lsu_req.eq(1)
- # this is a store
- with m.If (lsu_is_store):
- # check if the page is write-able and
- # we are not violating privileges
- # also check if the dirty flag is set
- with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d):
- le = self.lsu_exception_o
- m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
- le.tval.eq(lsu_vaddr),
- le.valid.eq(1)
- ]
-
- # this is a load, check for sufficient access
- # privileges - throw a page fault if necessary
- with m.Elif(daccess_err):
- le = self.lsu_exception_o
- m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
- le.tval.eq(lsu_vaddr),
- le.valid.eq(1)
- ]
- # ---------
- # DTLB Miss
- # ---------
- # watch out for exceptions
- with m.Elif (ptw_active & ~walking_instr):
- # page table walker threw an exception
- with m.If (ptw_error):
- # an error makes the translation valid
- m.d.comb += lsu_req.eq(1)
- # the page table walker can only throw page faults
- with m.If (lsu_is_store):
- le = self.lsu_exception_o
- m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
- le.tval.eq(uaddr64),
- le.valid.eq(1)
- ]
- with m.Else():
- m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
- le.tval.eq(uaddr64),
- le.valid.eq(1)
- ]
-
- return m
-
- def ports(self):
- return [self.flush_i, self.enable_translation_i,
- self.en_ld_st_translation_i,
- self.lsu_req_i,
- self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o,
- self.lsu_valid_o, self.lsu_paddr_o,
- self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i,
- self.satp_ppn_i, self.asid_i, self.flush_tlb_i,
- self.itlb_miss_o, self.dtlb_miss_o] + \
- self.icache_areq_i.ports() + self.icache_areq_o.ports() + \
- self.req_port_i.ports() + self.req_port_o.ports() + \
- self.misaligned_ex_i.ports() + self.lsu_exception_o.ports()
-
-if __name__ == '__main__':
- mmu = MMU()
- vl = rtlil.convert(mmu, ports=mmu.ports())
- with open("test_mmu.il", "w") as f:
- f.write(vl)
-
+++ /dev/null
-pseudo-LRU
-
-two-way set associative - one bit
-
- indicates which line of the two has been reference more recently
-
-
-four-way set associative - three bits
-
- each bit represents one branch point in a binary decision tree; let 1
- represent that the left side has been referenced more recently than the
- right side, and 0 vice-versa
-
- are all 4 lines valid?
- / \
- yes no, use an invalid line
- |
- |
- |
- bit_0 == 0? state | replace ref to | next state
- / \ ------+-------- -------+-----------
- y n 00x | line_0 line_0 | 11_
- / \ 01x | line_1 line_1 | 10_
- bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
- / \ / \ 1x1 | line_3 line_3 | 0_0
- y n y n
- / \ / \ ('x' means ('_' means unchanged)
- line_0 line_1 line_2 line_3 don't care)
-
- (see Figure 3-7, p. 3-18, in Intel Embedded Pentium Processor Family Dev.
- Manual, 1998, http://www.intel.com/design/intarch/manuals/273204.htm)
-
-
-note that there is a 6-bit encoding for true LRU for four-way set associative
-
- bit 0: bank[1] more recently used than bank[0]
- bit 1: bank[2] more recently used than bank[0]
- bit 2: bank[2] more recently used than bank[1]
- bit 3: bank[3] more recently used than bank[0]
- bit 4: bank[3] more recently used than bank[1]
- bit 5: bank[3] more recently used than bank[2]
-
- this results in 24 valid bit patterns within the 64 possible bit patterns
- (4! possible valid traces for bank references)
-
- e.g., a trace of 0 1 2 3, where 0 is LRU and 3 is MRU, is encoded as 111111
-
- you can implement a state machine with a 256x6 ROM (6-bit state encoding
- appended with a 2-bit bank reference input will yield a new 6-bit state),
- and you can implement an LRU bank indicator with a 64x2 ROM
-
+++ /dev/null
-# moved to nmutil https://git.libre-soc.org/?p=nmutil.git;a=tree
-from nmutil.plru import PLRU
+++ /dev/null
-"""
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-#
-# Author: David Schaffenrath, TU Graz
-# Author: Florian Zaruba, ETH Zurich
-# Date: 24.4.2017
-# Description: Hardware-PTW
-
-/* verilator lint_off WIDTH */
-import ariane_pkg::*;
-
-see linux kernel source:
-
-* "arch/riscv/include/asm/page.h"
-* "arch/riscv/include/asm/mmu_context.h"
-* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET)
-
-"""
-
-from nmigen import Const, Signal, Cat, Module, Elaboratable
-from nmigen.hdl.ast import ArrayProxy
-from nmigen.cli import verilog, rtlil
-from math import log2
-
-
-DCACHE_SET_ASSOC = 8
-CONFIG_L1D_SIZE = 32*1024
-DCACHE_INDEX_WIDTH = int(log2(CONFIG_L1D_SIZE / DCACHE_SET_ASSOC))
-DCACHE_TAG_WIDTH = 56 - DCACHE_INDEX_WIDTH
-
-ASID_WIDTH = 8
-
-
-class DCacheReqI:
- def __init__(self):
- self.address_index = Signal(DCACHE_INDEX_WIDTH)
- self.address_tag = Signal(DCACHE_TAG_WIDTH)
- self.data_wdata = Signal(64)
- self.data_req = Signal()
- self.data_we = Signal()
- self.data_be = Signal(8)
- self.data_size = Signal(2)
- self.kill_req = Signal()
- self.tag_valid = Signal()
-
- def eq(self, inp):
- res = []
- for (o, i) in zip(self.ports(), inp.ports()):
- res.append(o.eq(i))
- return res
-
- def ports(self):
- return [self.address_index, self.address_tag,
- self.data_wdata, self.data_req,
- self.data_we, self.data_be, self.data_size,
- self.kill_req, self.tag_valid,
- ]
-
-class DCacheReqO:
- def __init__(self):
- self.data_gnt = Signal()
- self.data_rvalid = Signal()
- self.data_rdata = Signal(64) # actually in PTE object format
-
- def eq(self, inp):
- res = []
- for (o, i) in zip(self.ports(), inp.ports()):
- res.append(o.eq(i))
- return res
-
- def ports(self):
- return [self.data_gnt, self.data_rvalid, self.data_rdata]
-
-
-class PTE: #(RecordObject):
- def __init__(self):
- self.v = Signal()
- self.r = Signal()
- self.w = Signal()
- self.x = Signal()
- self.u = Signal()
- self.g = Signal()
- self.a = Signal()
- self.d = Signal()
- self.rsw = Signal(2)
- self.ppn = Signal(44)
- self.reserved = Signal(10)
-
- def flatten(self):
- return Cat(*self.ports())
-
- def eq(self, x):
- if isinstance(x, ArrayProxy):
- res = []
- for o in self.ports():
- i = getattr(x, o.name)
- res.append(i)
- x = Cat(*res)
- else:
- x = x.flatten()
- return self.flatten().eq(x)
-
- def __iter__(self):
- """ order is critical so that flatten creates LSB to MSB
- """
- yield self.v
- yield self.r
- yield self.w
- yield self.x
- yield self.u
- yield self.g
- yield self.a
- yield self.d
- yield self.rsw
- yield self.ppn
- yield self.reserved
-
- def ports(self):
- return list(self)
-
-
-class TLBUpdate:
- def __init__(self, asid_width):
- self.valid = Signal() # valid flag
- self.is_2M = Signal()
- self.is_1G = Signal()
- self.is_512G = Signal()
- self.vpn = Signal(36)
- self.asid = Signal(asid_width)
- self.content = PTE()
-
- def flatten(self):
- return Cat(*self.ports())
-
- def eq(self, x):
- return self.flatten().eq(x.flatten())
-
- def ports(self):
- return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \
- self.content.ports()
-
-
-# SV48 defines four levels of page tables
-LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1
-LVL2 = Const(1, 2)
-LVL3 = Const(2, 2)
-LVL4 = Const(3, 2)
-
-
-class PTW(Elaboratable):
- def __init__(self, asid_width=8):
- self.asid_width = asid_width
-
- self.flush_i = Signal() # flush everything, we need to do this because
- # actually everything we do is speculative at this stage
- # e.g.: there could be a CSR instruction that changes everything
- self.ptw_active_o = Signal(reset=1) # active if not IDLE
- self.walking_instr_o = Signal() # set when walking for TLB
- self.ptw_error_o = Signal() # set when an error occurred
- self.enable_translation_i = Signal() # CSRs indicate to enable SV48
- self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st
-
- self.lsu_is_store_i = Signal() # translation triggered by store
- # PTW memory interface
- self.req_port_i = DCacheReqO()
- self.req_port_o = DCacheReqI()
-
- # to TLBs, update logic
- self.itlb_update_o = TLBUpdate(asid_width)
- self.dtlb_update_o = TLBUpdate(asid_width)
-
- self.update_vaddr_o = Signal(48)
-
- self.asid_i = Signal(self.asid_width)
- # from TLBs
- # did we miss?
- self.itlb_access_i = Signal()
- self.itlb_hit_i = Signal()
- self.itlb_vaddr_i = Signal(64)
-
- self.dtlb_access_i = Signal()
- self.dtlb_hit_i = Signal()
- self.dtlb_vaddr_i = Signal(64)
- # from CSR file
- self.satp_ppn_i = Signal(44) # ppn from satp
- self.mxr_i = Signal()
- # Performance counters
- self.itlb_miss_o = Signal()
- self.dtlb_miss_o = Signal()
-
- def ports(self):
- return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o,
- ]
- return [
- self.enable_translation_i, self.en_ld_st_translation_i,
- self.lsu_is_store_i, self.req_port_i, self.req_port_o,
- self.update_vaddr_o,
- self.asid_i,
- self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i,
- self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i,
- self.satp_ppn_i, self.mxr_i,
- self.itlb_miss_o, self.dtlb_miss_o
- ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports()
-
- def elaborate(self, platform):
- m = Module()
-
- # input registers
- data_rvalid = Signal()
- data_rdata = Signal(64)
-
- # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata
- # is spec'd in 64-bit binary-format: better to spec as Record?
- pte = PTE()
- m.d.comb += pte.flatten().eq(data_rdata)
-
- # SV48 defines four levels of page tables
- ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above)
- ptw_lvl1 = Signal()
- ptw_lvl2 = Signal()
- ptw_lvl3 = Signal()
- ptw_lvl4 = Signal()
- m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1),
- ptw_lvl2.eq(ptw_lvl == LVL2),
- ptw_lvl3.eq(ptw_lvl == LVL3),
- ptw_lvl4.eq(ptw_lvl == LVL4)
- ]
-
- # is this an instruction page table walk?
- is_instr_ptw = Signal()
- global_mapping = Signal()
- # latched tag signal
- tag_valid = Signal()
- # register the ASID
- tlb_update_asid = Signal(self.asid_width)
- # register VPN we need to walk, SV48 defines a 48 bit virtual addr
- vaddr = Signal(64)
- # 4 byte aligned physical pointer
- ptw_pptr = Signal(56)
-
- end = DCACHE_INDEX_WIDTH + DCACHE_TAG_WIDTH
- m.d.sync += [
- # Assignments
- self.update_vaddr_o.eq(vaddr),
-
- self.walking_instr_o.eq(is_instr_ptw),
- # directly output the correct physical address
- self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]),
- self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]),
- # we are never going to kill this request
- self.req_port_o.kill_req.eq(0), # XXX assign comb?
- # we are never going to write with the HPTW
- self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb?
- # -----------
- # TLB Update
- # -----------
- self.itlb_update_o.vpn.eq(vaddr[12:48]),
- self.dtlb_update_o.vpn.eq(vaddr[12:48]),
- # update the correct page table level
- self.itlb_update_o.is_2M.eq(ptw_lvl3),
- self.itlb_update_o.is_1G.eq(ptw_lvl2),
- self.itlb_update_o.is_512G.eq(ptw_lvl1),
- self.dtlb_update_o.is_2M.eq(ptw_lvl3),
- self.dtlb_update_o.is_1G.eq(ptw_lvl2),
- self.dtlb_update_o.is_512G.eq(ptw_lvl1),
-
- # output the correct ASID
- self.itlb_update_o.asid.eq(tlb_update_asid),
- self.dtlb_update_o.asid.eq(tlb_update_asid),
- # set the global mapping bit
- self.itlb_update_o.content.eq(pte),
- self.itlb_update_o.content.g.eq(global_mapping),
- self.dtlb_update_o.content.eq(pte),
- self.dtlb_update_o.content.g.eq(global_mapping),
-
- self.req_port_o.tag_valid.eq(tag_valid),
- ]
-
- #-------------------
- # Page table walker #needs update
- #-------------------
- # A virtual address va is translated into a physical address pa as
- # follows:
- # 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv48,
- # PAGESIZE=2^12 and LEVELS=4.)
- # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE.
- # (For Sv32, PTESIZE=4.)
- # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an
- # access exception.
- # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to
- # step 5. Otherwise, this PTE is a pointer to the next level of
- # the page table.
- # Let i=i-1. If i < 0, stop and raise an access exception.
- # Otherwise, let a = pte.ppn × PAGESIZE and go to step 2.
- # 5. A leaf PTE has been found. Determine if the requested memory
- # access is allowed by the pte.r, pte.w, and pte.x bits. If not,
- # stop and raise an access exception. Otherwise, the translation is
- # successful. Set pte.a to 1, and, if the memory access is a
- # store, set pte.d to 1.
- # The translated physical address is given as follows:
- # - pa.pgoff = va.pgoff.
- # - If i > 0, then this is a superpage translation and
- # pa.ppn[i-1:0] = va.vpn[i-1:0].
- # - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
- # 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned
- # superpage stop and raise a page-fault exception.
-
- m.d.sync += tag_valid.eq(0)
-
- # default assignments
- m.d.comb += [
- # PTW memory interface
- self.req_port_o.data_req.eq(0),
- self.req_port_o.data_be.eq(Const(0xFF, 8)),
- self.req_port_o.data_size.eq(Const(0b11, 2)),
- self.req_port_o.data_we.eq(0),
- self.ptw_error_o.eq(0),
- self.itlb_update_o.valid.eq(0),
- self.dtlb_update_o.valid.eq(0),
-
- self.itlb_miss_o.eq(0),
- self.dtlb_miss_o.eq(0),
- ]
-
- # ------------
- # State Machine
- # ------------
-
- with m.FSM() as fsm:
-
- with m.State("IDLE"):
- self.idle(m, is_instr_ptw, ptw_lvl, global_mapping,
- ptw_pptr, vaddr, tlb_update_asid)
-
- with m.State("WAIT_GRANT"):
- self.grant(m, tag_valid, data_rvalid)
-
- with m.State("PTE_LOOKUP"):
- # we wait for the valid signal
- with m.If(data_rvalid):
- self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, ptw_lvl4,
- data_rvalid, global_mapping,
- is_instr_ptw, ptw_pptr)
-
- # Propagate error to MMU/LSU
- with m.State("PROPAGATE_ERROR"):
- m.next = "IDLE"
- m.d.comb += self.ptw_error_o.eq(1)
-
- # wait for the rvalid before going back to IDLE
- with m.State("WAIT_RVALID"):
- with m.If(data_rvalid):
- m.next = "IDLE"
-
- m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata),
- data_rvalid.eq(self.req_port_i.data_rvalid)
- ]
-
- return m
-
- def set_grant_state(self, m):
- # should we have flushed before we got an rvalid,
- # wait for it until going back to IDLE
- with m.If(self.flush_i):
- with m.If (self.req_port_i.data_gnt):
- m.next = "WAIT_RVALID"
- with m.Else():
- m.next = "IDLE"
- with m.Else():
- m.next = "WAIT_GRANT"
-
- def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping,
- ptw_pptr, vaddr, tlb_update_asid):
- # by default we start with the top-most page table
- m.d.sync += [is_instr_ptw.eq(0),
- ptw_lvl.eq(LVL1),
- global_mapping.eq(0),
- self.ptw_active_o.eq(0), # deactive (IDLE)
- ]
- # work out itlb/dtlb miss
- m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \
- self.itlb_access_i & \
- ~self.itlb_hit_i & \
- ~self.dtlb_access_i)
- m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \
- self.dtlb_access_i & \
- ~self.dtlb_hit_i)
- # we got an ITLB miss?
- with m.If(self.itlb_miss_o):
- pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:48],
- self.satp_ppn_i)
- m.d.sync += [ptw_pptr.eq(pptr),
- is_instr_ptw.eq(1),
- vaddr.eq(self.itlb_vaddr_i),
- tlb_update_asid.eq(self.asid_i),
- ]
- self.set_grant_state(m)
-
- # we got a DTLB miss?
- with m.Elif(self.dtlb_miss_o):
- pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:48],
- self.satp_ppn_i)
- m.d.sync += [ptw_pptr.eq(pptr),
- vaddr.eq(self.dtlb_vaddr_i),
- tlb_update_asid.eq(self.asid_i),
- ]
- self.set_grant_state(m)
-
- def grant(self, m, tag_valid, data_rvalid):
- # we've got a data WAIT_GRANT so tell the
- # cache that the tag is valid
-
- # send a request out
- m.d.comb += self.req_port_o.data_req.eq(1)
- # wait for the WAIT_GRANT
- with m.If(self.req_port_i.data_gnt):
- # send the tag valid signal one cycle later
- m.d.sync += tag_valid.eq(1)
- # should we have flushed before we got an rvalid,
- # wait for it until going back to IDLE
- with m.If(self.flush_i):
- with m.If (~data_rvalid):
- m.next = "WAIT_RVALID"
- with m.Else():
- m.next = "IDLE"
- with m.Else():
- m.next = "PTE_LOOKUP"
-
- def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, ptw_lvl4,
- data_rvalid, global_mapping,
- is_instr_ptw, ptw_pptr):
- # temporaries
- pte_rx = Signal(reset_less=True)
- pte_exe = Signal(reset_less=True)
- pte_inv = Signal(reset_less=True)
- pte_a = Signal(reset_less=True)
- st_wd = Signal(reset_less=True)
- m.d.comb += [pte_rx.eq(pte.r | pte.x),
- pte_exe.eq(~pte.x | ~pte.a),
- pte_inv.eq(~pte.v | (~pte.r & pte.w)),
- pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))),
- st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))]
-
- l1err = Signal(reset_less=True)
- l2err = Signal(reset_less=True)
- l3err = Signal(reset_less=True)
- m.d.comb += [l3err.eq((ptw_lvl3) & pte.ppn[0:9] != Const(0,0)),
- l2err.eq((ptw_lvl2) & pte.ppn[0:18] != Const(0, 18)),
- l1err.eq((ptw_lvl1) & pte.ppn[0:27] != Const(0, 27))]
-
- # check if the global mapping bit is set
- with m.If (pte.g):
- m.d.sync += global_mapping.eq(1)
-
- m.next = "IDLE"
-
- # -------------
- # Invalid PTE
- # -------------
- # If pte.v = 0, or if pte.r = 0 and pte.w = 1,
- # stop and raise a page-fault exception.
- with m.If (pte_inv):
- m.next = "PROPAGATE_ERROR"
-
- # -----------
- # Valid PTE
- # -----------
-
- # it is a valid PTE
- # if pte.r = 1 or pte.x = 1 it is a valid PTE
- with m.Elif (pte_rx):
- # Valid translation found (either 1G, 2M or 4K)
- with m.If(is_instr_ptw):
- # ------------
- # Update ITLB
- # ------------
- # If page not executable, we can directly raise error.
- # This doesn't put a useless entry into the TLB.
- # The same idea applies to the access flag since we let
- # the access flag be managed by SW.
- with m.If (pte_exe):
- m.next = "IDLE"
- with m.Else():
- m.d.comb += self.itlb_update_o.valid.eq(1)
-
- with m.Else():
- # ------------
- # Update DTLB
- # ------------
- # Check if the access flag has been set, otherwise
- # throw page-fault and let software handle those bits.
- # If page not readable (there are no write-only pages)
- # directly raise an error. This doesn't put a useless
- # entry into the TLB.
- with m.If(pte_a):
- m.d.comb += self.dtlb_update_o.valid.eq(1)
- with m.Else():
- m.next = "PROPAGATE_ERROR"
- # Request is a store: perform additional checks
- # If the request was a store and the page not
- # write-able, raise an error
- # the same applies if the dirty flag is not set
- with m.If (st_wd):
- m.d.comb += self.dtlb_update_o.valid.eq(0)
- m.next = "PROPAGATE_ERROR"
-
- # check if the ppn is correctly aligned: Case (6)
- with m.If(l1err | l2err | l3err):
- m.next = "PROPAGATE_ERROR"
- m.d.comb += [self.dtlb_update_o.valid.eq(0),
- self.itlb_update_o.valid.eq(0)]
-
- # this is a pointer to the next TLB level
- with m.Else():
- # pointer to next level of page table
- with m.If (ptw_lvl1):
- # we are in the second level now
- pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39], pte.ppn)
- m.d.sync += [ptw_pptr.eq(pptr),
- ptw_lvl.eq(LVL2)
- ]
- with m.If(ptw_lvl2):
- # here we received a pointer to the third level
- pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn)
- m.d.sync += [ptw_pptr.eq(pptr),
- ptw_lvl.eq(LVL3)
- ]
- with m.If(ptw_lvl3): #guess: shift page levels by one
- # here we received a pointer to the fourth level
- # the last one is near the page offset
- pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn)
- m.d.sync += [ptw_pptr.eq(pptr),
- ptw_lvl.eq(LVL4)
- ]
- self.set_grant_state(m)
-
- with m.If (ptw_lvl4):
- # Should already be the last level
- # page table => Error
- m.d.sync += ptw_lvl.eq(LVL4)
- m.next = "PROPAGATE_ERROR"
-
-
-if __name__ == '__main__':
- ptw = PTW()
- vl = rtlil.convert(ptw, ports=ptw.ports())
- with open("test_ptw.il", "w") as f:
- f.write(vl)
+++ /dev/null
-import sys
-from soc.TLB.ariane.plru import PLRU
-from nmigen.compat.sim import run_simulation
-
-
-def tbench(dut):
- yield
-
-
-if __name__ == "__main__":
- dut = PLRU(4)
- run_simulation(dut, tbench(dut), vcd_name="test_plru.vcd")
- print("PLRU Unit Test Success")
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from soc.TLB.ariane.ptw import PTW, PTE
-
-# unit was changed, test needs to be changed
-
-
-def tbench(dut):
-
- addr = 0x8000000
-
- #pte = PTE()
- # yield pte.v.eq(1)
- # yield pte.r.eq(1)
-
- yield dut.req_port_i.data_gnt.eq(1)
- yield dut.req_port_i.data_rvalid.eq(1)
- yield dut.req_port_i.data_rdata.eq(0x43) # pte.flatten())
-
- # data lookup
- yield dut.en_ld_st_translation_i.eq(1)
- yield dut.asid_i.eq(1)
-
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x400000000)
-
- yield
- yield
- yield
-
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x200000)
-
- yield
- yield
- yield
-
- yield dut.req_port_i.data_gnt.eq(0)
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x400000011)
-
- yield
- yield dut.req_port_i.data_gnt.eq(1)
- yield
- yield
-
- # data lookup, PTW levels 1-2-3
- addr = 0x4000000
- yield dut.dtlb_vaddr_i.eq(addr)
- yield dut.mxr_i.eq(0x1)
- yield dut.req_port_i.data_gnt.eq(1)
- yield dut.req_port_i.data_rvalid.eq(1)
- # pte.flatten())
- yield dut.req_port_i.data_rdata.eq(0x41 | (addr >> 12) << 10)
-
- yield dut.en_ld_st_translation_i.eq(1)
- yield dut.asid_i.eq(1)
-
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(addr)
-
- yield
- yield
- yield
- yield
- yield
- yield
- yield
- yield
-
- yield dut.req_port_i.data_gnt.eq(0)
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x400000011)
-
- yield
- yield dut.req_port_i.data_gnt.eq(1)
- yield
- yield
- yield
- yield
-
- # instruction lookup
- yield dut.en_ld_st_translation_i.eq(0)
- yield dut.enable_translation_i.eq(1)
- yield dut.asid_i.eq(1)
-
- yield dut.itlb_access_i.eq(1)
- yield dut.itlb_hit_i.eq(0)
- yield dut.itlb_vaddr_i.eq(0x800000)
-
- yield
- yield
- yield
-
- yield dut.itlb_access_i.eq(1)
- yield dut.itlb_hit_i.eq(0)
- yield dut.itlb_vaddr_i.eq(0x200000)
-
- yield
- yield
- yield
-
- yield dut.req_port_i.data_gnt.eq(0)
- yield dut.itlb_access_i.eq(1)
- yield dut.itlb_hit_i.eq(0)
- yield dut.itlb_vaddr_i.eq(0x800011)
-
- yield
- yield dut.req_port_i.data_gnt.eq(1)
- yield
- yield
-
- yield
-
-
-def test_ptw():
- dut = PTW()
- run_simulation(dut, tbench(dut), vcd_name="test_ptw.vcd")
- print("PTW Unit Test Success")
-
-
-if __name__ == "__main__":
- test_ptw()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-
-from soc.TLB.ariane.tlb import TLB
-
-
-def set_vaddr(addr):
- yield dut.lu_vaddr_i.eq(addr)
- yield dut.update_i.vpn.eq(addr >> 12)
-
-
-def tbench(dut):
- yield dut.lu_access_i.eq(1)
- yield dut.lu_asid_i.eq(1)
- yield dut.update_i.valid.eq(1)
- yield dut.update_i.is_1G.eq(0)
- yield dut.update_i.is_2M.eq(0)
- yield dut.update_i.asid.eq(1)
- yield dut.update_i.content.ppn.eq(0)
- yield dut.update_i.content.rsw.eq(0)
- yield dut.update_i.content.r.eq(1)
-
- yield
-
- addr = 0x80000
- yield from set_vaddr(addr)
- yield
-
- addr = 0x90001
- yield from set_vaddr(addr)
- yield
-
- addr = 0x28000000
- yield from set_vaddr(addr)
- yield
-
- addr = 0x28000001
- yield from set_vaddr(addr)
-
- addr = 0x28000001
- yield from set_vaddr(addr)
- yield
-
- addr = 0x1000040000
- yield from set_vaddr(addr)
- yield
-
- addr = 0x1000040001
- yield from set_vaddr(addr)
- yield
-
- yield dut.update_i.is_1G.eq(1)
- addr = 0x2040000
- yield from set_vaddr(addr)
- yield
-
- yield dut.update_i.is_1G.eq(1)
- addr = 0x2040001
- yield from set_vaddr(addr)
- yield
-
- yield
-
-
-if __name__ == "__main__":
- dut = TLB()
- run_simulation(dut, tbench(dut), vcd_name="test_tlb.vcd")
- print("TLB Unit Test Success")
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-
-from soc.TLB.ariane.tlb_content import TLBContent
-from soc.TestUtil.test_helper import assert_op, assert_eq
-
-
-def update(dut, a, t, g, m):
- yield dut.replace_en_i.eq(1)
- yield dut.update_i.valid.eq(1)
- yield dut.update_i.is_512G.eq(t)
- yield dut.update_i.is_1G.eq(g)
- yield dut.update_i.is_2M.eq(m)
- yield dut.update_i.vpn.eq(a)
- yield
- yield
-
-
-def check_hit(dut, hit, pagesize):
- hit_d = yield dut.lu_hit_o
- assert_eq("hit", hit_d, hit)
-
- if(hit):
- if(pagesize == "t"):
- hitp = yield dut.lu_is_512G_o
- assert_eq("lu_is_512G_o", hitp, 1)
- elif(pagesize == "g"):
- hitp = yield dut.lu_is_1G_o
- assert_eq("lu_is_1G_o", hitp, 1)
- elif(pagesize == "m"):
- hitp = yield dut.lu_is_2M_o
- assert_eq("lu_is_2M_o", hitp, 1)
-
-
-def addr(a, b, c, d):
- return a | b << 9 | c << 18 | d << 27
-
-
-def tbench(dut):
- yield dut.vpn0.eq(0x0A)
- yield dut.vpn1.eq(0x0B)
- yield dut.vpn2.eq(0x0C)
- yield dut.vpn3.eq(0x0D)
- yield from update(dut, addr(0xFF, 0xFF, 0xFF, 0x0D), 1, 0, 0)
- yield from check_hit(dut, 1, "t")
-
- yield from update(dut, addr(0xFF, 0xFF, 0x0C, 0x0D), 0, 1, 0)
- yield from check_hit(dut, 1, "g")
-
- yield from update(dut, addr(0xFF, 0x0B, 0x0C, 0x0D), 0, 0, 1)
- yield from check_hit(dut, 1, "m")
-
- yield from update(dut, addr(0x0A, 0x0B, 0x0C, 0x0D), 0, 0, 0)
- yield from check_hit(dut, 1, "")
-
- yield from update(dut, addr(0xAA, 0xBB, 0xCC, 0xDD), 0, 0, 0)
- yield from check_hit(dut, 0, "miss")
-
-
-if __name__ == "__main__":
- dut = TLBContent(4, 4)
- #
- run_simulation(dut, tbench(dut), vcd_name="test_tlb_content.vcd")
- print("TLBContent Unit Test Success")
+++ /dev/null
-"""
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-#
-# Author: David Schaffenrath, TU Graz
-# Author: Florian Zaruba, ETH Zurich
-# Date: 21.4.2017
-# Description: Translation Lookaside Buffer, SV48
-# fully set-associative
-
-Implementation in c++:
-https://raw.githubusercontent.com/Tony-Hu/TreePLRU/master/TreePLRU.cpp
-
-Text description:
-https://people.cs.clemson.edu/~mark/464/p_lru.txt
-
-Online simulator:
-http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/vm.html
-"""
-from math import log2
-from nmigen import Signal, Module, Cat, Const, Array, Elaboratable
-from nmigen.cli import verilog, rtlil
-from nmigen.lib.coding import Encoder
-
-from soc.TLB.ariane.ptw import TLBUpdate, PTE, ASID_WIDTH
-from soc.TLB.ariane.plru import PLRU
-from soc.TLB.ariane.tlb_content import TLBContent
-
-TLB_ENTRIES = 8
-
-
-class TLB(Elaboratable):
- def __init__(self, tlb_entries=8, asid_width=8):
- self.tlb_entries = tlb_entries
- self.asid_width = asid_width
-
- self.flush_i = Signal() # Flush signal
- # Lookup signals
- self.lu_access_i = Signal()
- self.lu_asid_i = Signal(self.asid_width)
- self.lu_vaddr_i = Signal(64)
- self.lu_content_o = PTE()
- self.lu_is_2M_o = Signal()
- self.lu_is_1G_o = Signal()
- self.lu_is_512G_o = Signal()
- self.lu_hit_o = Signal()
- # Update TLB
- self.pte_width = len(self.lu_content_o.flatten())
- self.update_i = TLBUpdate(asid_width)
-
- def elaborate(self, platform):
- m = Module()
-
- vpn3 = Signal(9) # FIXME unused signal
- vpn2 = Signal(9)
- vpn1 = Signal(9)
- vpn0 = Signal(9)
-
- # -------------
- # Translation
- # -------------
-
- # SV48 defines four levels of page tables
- m.d.comb += [vpn0.eq(self.lu_vaddr_i[12:21]),
- vpn1.eq(self.lu_vaddr_i[21:30]),
- vpn2.eq(self.lu_vaddr_i[30:39]),
- vpn3.eq(self.lu_vaddr_i[39:48]), # FIXME
- ]
-
- tc = []
- for i in range(self.tlb_entries):
- tlc = TLBContent(self.pte_width, self.asid_width)
- setattr(m.submodules, "tc%d" % i, tlc)
- tc.append(tlc)
- # connect inputs
- tlc.update_i = self.update_i # saves a lot of graphviz links
- m.d.comb += [tlc.vpn0.eq(vpn0),
- tlc.vpn1.eq(vpn1),
- tlc.vpn2.eq(vpn2),
- # TODO 4th
- tlc.flush_i.eq(self.flush_i),
- # tlc.update_i.eq(self.update_i),
- tlc.lu_asid_i.eq(self.lu_asid_i)]
- tc = Array(tc)
-
- # --------------
- # Select hit
- # --------------
-
- # use Encoder to select hit index
- # XXX TODO: assert that there's only one valid entry (one lu_hit)
- hitsel = Encoder(self.tlb_entries)
- m.submodules.hitsel = hitsel
-
- hits = []
- for i in range(self.tlb_entries):
- hits.append(tc[i].lu_hit_o)
- m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well)
- idx = hitsel.o
-
- active = Signal(reset_less=True)
- m.d.comb += active.eq(~hitsel.n)
- with m.If(active):
- # active hit, send selected as output
- m.d.comb += [self.lu_is_512G_o.eq(tc[idx].lu_is_512G_o),
- self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o),
- self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o),
- self.lu_hit_o.eq(1),
- self.lu_content_o.flatten().eq(tc[idx].lu_content_o),
- ]
-
- # --------------
- # PLRU.
- # --------------
-
- p = PLRU(self.tlb_entries)
- plru_tree = Signal(p.TLBSZ)
- m.submodules.plru = p
-
- # connect PLRU inputs/outputs
- # XXX TODO: assert that there's only one valid entry (one replace_en)
- en = []
- for i in range(self.tlb_entries):
- en.append(tc[i].replace_en_i)
- m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags
- p.lu_hit.eq(hitsel.i),
- p.lu_access_i.eq(self.lu_access_i),
- p.plru_tree.eq(plru_tree)]
- m.d.sync += plru_tree.eq(p.plru_tree_o)
-
- # --------------
- # Sanity checks
- # --------------
-
- assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \
- "TLB size must be a multiple of 2 and greater than 1"
- assert (self.asid_width >= 1), \
- "ASID width must be at least 1"
-
- return m
-
- """
- # Just for checking
- function int countSetBits(logic[self.tlb_entries-1:0] vector);
- automatic int count = 0;
- foreach (vector[idx]) begin
- count += vector[idx];
- end
- return count;
- endfunction
-
- assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1))
- else $error("More then one hit in TLB!"); $stop(); end
- assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1))
- else $error("More then one TLB entry selected for next replace!");
- """
-
- def ports(self):
- return [self.flush_i, self.lu_access_i,
- self.lu_asid_i, self.lu_vaddr_i,
- self.lu_is_2M_o, self.lu_1G_o, self.lu_is_512G_o, self.lu_hit_o
- ] + self.lu_content_o.ports() + self.update_i.ports()
-
-
-if __name__ == '__main__':
- tlb = TLB()
- vl = rtlil.convert(tlb, ports=tlb.ports())
- with open("test_tlb.il", "w") as f:
- f.write(vl)
+++ /dev/null
-from nmigen import Signal, Module, Cat, Const, Elaboratable
-
-from soc.TLB.ariane.ptw import TLBUpdate, PTE
-
-
-class TLBEntry:
- def __init__(self, asid_width):
- self.asid = Signal(asid_width, name="ent_asid")
- # SV48 defines four levels of page tables
- self.vpn0 = Signal(9, name="ent_vpn0")
- self.vpn1 = Signal(9, name="ent_vpn1")
- self.vpn2 = Signal(9, name="ent_vpn2")
- self.vpn3 = Signal(9, name="ent_vpn3")
- self.is_2M = Signal(name="ent_is_2M")
- self.is_1G = Signal(name="ent_is_1G")
- self.is_512G = Signal(name="ent_is_512G")
- self.valid = Signal(name="ent_valid")
-
- def flatten(self):
- return Cat(*self.ports())
-
- def eq(self, x):
- return self.flatten().eq(x.flatten())
-
- def ports(self):
- return [self.asid, self.vpn0, self.vpn1, self.vpn2,
- self.is_2M, self.is_1G, self.valid]
-
-
-class TLBContent(Elaboratable):
- def __init__(self, pte_width, asid_width):
- self.asid_width = asid_width
- self.pte_width = pte_width
- self.flush_i = Signal() # Flush signal
- # Update TLB
- self.update_i = TLBUpdate(asid_width)
- self.vpn3 = Signal(9)
- self.vpn2 = Signal(9)
- self.vpn1 = Signal(9)
- self.vpn0 = Signal(9)
- self.replace_en_i = Signal() # replace the following entry,
- # set by replacement strategy
- # Lookup signals
- self.lu_asid_i = Signal(asid_width)
- self.lu_content_o = Signal(pte_width)
- self.lu_is_512G_o = Signal()
- self.lu_is_2M_o = Signal()
- self.lu_is_1G_o = Signal()
- self.lu_hit_o = Signal()
-
- def elaborate(self, platform):
- m = Module()
-
- tags = TLBEntry(self.asid_width)
-
- content = Signal(self.pte_width)
-
- m.d.comb += [self.lu_hit_o.eq(0),
- self.lu_is_512G_o.eq(0),
- self.lu_is_2M_o.eq(0),
- self.lu_is_1G_o.eq(0)]
-
- # temporaries for lookup
- asid_ok = Signal(reset_less=True)
- # tags_ok = Signal(reset_less=True)
-
- vpn3_ok = Signal(reset_less=True)
- vpn2_ok = Signal(reset_less=True)
- vpn1_ok = Signal(reset_less=True)
- vpn0_ok = Signal(reset_less=True)
-
- #tags_2M = Signal(reset_less=True)
- vpn0_or_2M = Signal(reset_less=True)
-
- m.d.comb += [
- # compare asid and vpn*
- asid_ok.eq(tags.asid == self.lu_asid_i),
- vpn3_ok.eq(tags.vpn3 == self.vpn3),
- vpn2_ok.eq(tags.vpn2 == self.vpn2),
- vpn1_ok.eq(tags.vpn1 == self.vpn1),
- vpn0_ok.eq(tags.vpn0 == self.vpn0),
- vpn0_or_2M.eq(tags.is_2M | vpn0_ok)
- ]
-
- with m.If(asid_ok & tags.valid):
- # first level, only vpn3 needs to match
- with m.If(tags.is_512G & vpn3_ok):
- m.d.comb += [self.lu_content_o.eq(content),
- self.lu_is_512G_o.eq(1),
- self.lu_hit_o.eq(1),
- ]
- # second level , second level vpn2 and vpn3 need to match
- with m.Elif(tags.is_1G & vpn2_ok & vpn3_ok):
- m.d.comb += [self.lu_content_o.eq(content),
- self.lu_is_1G_o.eq(1),
- self.lu_hit_o.eq(1),
- ]
- # not a giga page hit nor a tera page hit so check further
- with m.Elif(vpn1_ok):
- # this could be a 2 mega page hit or a 4 kB hit
- # output accordingly
- with m.If(vpn0_or_2M):
- m.d.comb += [self.lu_content_o.eq(content),
- self.lu_is_2M_o.eq(tags.is_2M),
- self.lu_hit_o.eq(1),
- ]
- # ------------------
- # Update or Flush
- # ------------------
-
- # temporaries
- replace_valid = Signal(reset_less=True)
- m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i)
-
- # flush
- with m.If(self.flush_i):
- # invalidate (flush) conditions: all if zero or just this ASID
- with m.If(self.lu_asid_i == Const(0, self.asid_width) |
- (self.lu_asid_i == tags.asid)):
- m.d.sync += tags.valid.eq(0)
-
- # normal replacement
- with m.Elif(replace_valid):
- m.d.sync += [ # update tag array
- tags.asid.eq(self.update_i.asid),
- tags.vpn3.eq(self.update_i.vpn[27:36]),
- tags.vpn2.eq(self.update_i.vpn[18:27]),
- tags.vpn1.eq(self.update_i.vpn[9:18]),
- tags.vpn0.eq(self.update_i.vpn[0:9]),
- tags.is_512G.eq(self.update_i.is_512G),
- tags.is_1G.eq(self.update_i.is_1G),
- tags.is_2M.eq(self.update_i.is_2M),
- tags.valid.eq(1),
- # and content as well
- content.eq(self.update_i.content.flatten())
- ]
- return m
-
- def ports(self):
- return [self.flush_i,
- self.lu_asid_i,
- self.lu_is_2M_o, self.lu_is_1G_o, self.lu_is_512G_o, self.lu_hit_o,
- ] + self.update_i.content.ports() + self.update_i.ports()
+++ /dev/null
-# SPDX-License-Identifier: LGPL-2.1-or-later
-# See Notices.txt for copyright information
-from soc.TLB.LFSR import LFSR, LFSRPolynomial, LFSR_POLY_3
-
-from nmigen.back.pysim import Simulator, Delay, Tick
-import unittest
-
-
-class TestLFSR(unittest.TestCase):
- def test_poly(self):
- v = LFSRPolynomial()
- self.assertEqual(repr(v), "LFSRPolynomial([0])")
- self.assertEqual(str(v), "1")
- v = LFSRPolynomial([1])
- self.assertEqual(repr(v), "LFSRPolynomial([1, 0])")
- self.assertEqual(str(v), "x + 1")
- v = LFSRPolynomial([0, 1])
- self.assertEqual(repr(v), "LFSRPolynomial([1, 0])")
- self.assertEqual(str(v), "x + 1")
- v = LFSRPolynomial([1, 2])
- self.assertEqual(repr(v), "LFSRPolynomial([2, 1, 0])")
- self.assertEqual(str(v), "x^2 + x + 1")
- v = LFSRPolynomial([2])
- self.assertEqual(repr(v), "LFSRPolynomial([2, 0])")
- self.assertEqual(str(v), "x^2 + 1")
- self.assertEqual(str(LFSR_POLY_3), "x^3 + x^2 + 1")
-
- def test_lfsr_3(self):
- module = LFSR(LFSR_POLY_3)
- traces = [module.state, module.enable]
- with Simulator(module,
- vcd_file=open("Waveforms/test_LFSR2.vcd", "w"),
- gtkw_file=open("Waveforms/test_LFSR2.gtkw", "w"),
- traces=traces) as sim:
- sim.add_clock(1e-6, phase=0.25e-6)
- delay = Delay(1e-7)
-
- def async_process():
- yield module.enable.eq(0)
- yield Tick()
- self.assertEqual((yield module.state), 0x1)
- yield Tick()
- self.assertEqual((yield module.state), 0x1)
- yield module.enable.eq(1)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x2)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x5)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x3)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x7)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x6)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x4)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x1)
- yield Tick()
-
- sim.add_process(async_process)
- sim.run()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from soc.TLB.AddressEncoder import AddressEncoder
-from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op
-
-
-# This function allows for the easy setting of values to the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# i (Input): The array of single bits to be written
-def set_encoder(dut, i):
- yield dut.i.eq(i)
- yield
-
-# Checks the single match of the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# sm (Single Match): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-
-
-def check_single_match(dut, sm, op):
- out_sm = yield dut.single_match
- assert_op("Single Match", out_sm, sm, op)
-
-# Checks the multiple match of the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# mm (Multiple Match): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-
-
-def check_multiple_match(dut, mm, op):
- out_mm = yield dut.multiple_match
- assert_op("Multiple Match", out_mm, mm, op)
-
-# Checks the output of the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# o (Output): The expected output
-# op (Operation): (0 => ==), (1 => !=)
-
-
-def check_output(dut, o, op):
- out_o = yield dut.o
- assert_op("Output", out_o, o, op)
-
-# Checks the state of the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# sm (Single Match): The expected match result
-# mm (Multiple Match): The expected match result
-# o (Output): The expected output
-# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-# o_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-
-
-def check_all(dut, sm, mm, o, sm_op, mm_op, o_op):
- yield from check_single_match(dut, sm, sm_op)
- yield from check_multiple_match(dut, mm, mm_op)
- yield from check_output(dut, o, o_op)
-
-
-def tbench(dut):
- # Check invalid input
- in_val = 0b000
- single_match = 0
- multiple_match = 0
- output = 0
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
-
- # Check single bit
- in_val = 0b001
- single_match = 1
- multiple_match = 0
- output = 0
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
-
- # Check another single bit
- in_val = 0b100
- single_match = 1
- multiple_match = 0
- output = 2
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
-
- # Check multiple match
- # We expected the lowest bit to be returned which is address 0
- in_val = 0b101
- single_match = 0
- multiple_match = 1
- output = 0
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
-
- # Check another multiple match
- # We expected the lowest bit to be returned which is address 1
- in_val = 0b110
- single_match = 0
- multiple_match = 1
- output = 1
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
-
-
-def test_addr():
- dut = AddressEncoder(4)
- run_simulation(dut, tbench(dut),
- vcd_name="Waveforms/test_address_encoder.vcd")
- print("AddressEncoder Unit Test Success")
-
-
-if __name__ == "__main__":
- test_addr()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-
-from soc.TLB.Cam import Cam
-
-from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op
-
-# This function allows for the easy setting of values to the Cam
-# Arguments:
-# dut: The Cam being tested
-# e (Enable): Whether the block is going to be enabled
-# we (Write Enable): Whether the Cam will write on the next cycle
-# a (Address): Where the data will be written if write enable is high
-# d (Data): Either what we are looking for or will write to the address
-
-
-def set_cam(dut, e, we, a, d):
- yield dut.enable.eq(e)
- yield dut.write_enable.eq(we)
- yield dut.address_in.eq(a)
- yield dut.data_in.eq(d)
- yield
-
-# Checks the multiple match of the Cam
-# Arguments:
-# dut: The Cam being tested
-# mm (Multiple Match): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-
-
-def check_multiple_match(dut, mm, op):
- out_mm = yield dut.multiple_match
- assert_op("Multiple Match", out_mm, mm, op)
-
-# Checks the single match of the Cam
-# Arguments:
-# dut: The Cam being tested
-# sm (Single Match): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-
-
-def check_single_match(dut, sm, op):
- out_sm = yield dut.single_match
- assert_op("Single Match", out_sm, sm, op)
-
-# Checks the address output of the Cam
-# Arguments:
-# dut: The Cam being tested
-# ma (Match Address): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-
-
-def check_match_address(dut, ma, op):
- out_ma = yield dut.match_address
- assert_op("Match Address", out_ma, ma, op)
-
-# Checks the state of the Cam
-# Arguments:
-# dut: The Cam being tested
-# sm (Single Match): The expected match result
-# mm (Multiple Match): The expected match result
-# ma: (Match Address): The expected address output
-# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-# ma_op (Operation): Operation for the address assertion (0 => ==), (1 => !=)
-
-
-def check_all(dut, mm, sm, ma, mm_op, sm_op, ma_op):
- yield from check_multiple_match(dut, mm, mm_op)
- yield from check_single_match(dut, sm, sm_op)
- yield from check_match_address(dut, ma, ma_op)
-
-
-def tbench(dut):
- # NA
- enable = 0
- write_enable = 0
- address = 0
- data = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
-
- # Read Miss Multiple
- # Note that the default starting entry data bits are all 0
- enable = 1
- write_enable = 0
- address = 0
- data = 0
- multiple_match = 1
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_multiple_match(dut, multiple_match, 0)
-
- # Read Miss
- # Note that the default starting entry data bits are all 0
- enable = 1
- write_enable = 0
- address = 0
- data = 1
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
-
- # Write Entry 0
- enable = 1
- write_enable = 1
- address = 0
- data = 4
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
-
- # Read Hit Entry 0
- enable = 1
- write_enable = 0
- address = 0
- data = 4
- multiple_match = 0
- single_match = 1
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0)
-
- # Search Hit
- enable = 1
- write_enable = 0
- address = 0
- data = 4
- multiple_match = 0
- single_match = 1
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0)
-
- # Search Miss
- enable = 1
- write_enable = 0
- address = 0
- data = 5
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
-
- # Multiple Match test
- # Write Entry 1
- enable = 1
- write_enable = 1
- address = 1
- data = 5
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
-
- # Write Entry 2
- # Same data as Entry 1
- enable = 1
- write_enable = 1
- address = 2
- data = 5
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
-
- # Read Hit Data 5
- enable = 1
- write_enable = 0
- address = 1
- data = 5
- multiple_match = 1
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0)
-
- # Verify read_warning is not caused
- # Write Entry 0
- enable = 1
- write_enable = 1
- address = 0
- data = 7
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- # Note there is no yield we immediately attempt to read in the next cycle
-
- # Read Hit Data 7
- enable = 1
- write_enable = 0
- address = 0
- data = 7
- multiple_match = 0
- single_match = 1
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
-
- yield
-
-
-def test_cam():
- dut = Cam(4, 4)
- run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_cam.vcd")
- print("Cam Unit Test Success")
-
-
-if __name__ == "__main__":
- test_cam()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-
-from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op
-from soc.TLB.CamEntry import CamEntry
-
-# This function allows for the easy setting of values to the Cam Entry
-# Arguments:
-# dut: The CamEntry being tested
-# c (command): NA (0), Read (1), Write (2), Reserve (3)
-# d (data): The data to be set
-
-
-def set_cam_entry(dut, c, d):
- # Write desired values
- yield dut.command.eq(c)
- yield dut.data_in.eq(d)
- yield
- # Reset all lines
- yield dut.command.eq(0)
- yield dut.data_in.eq(0)
- yield
-
-# Checks the data state of the CAM entry
-# Arguments:
-# dut: The CamEntry being tested
-# d (Data): The expected data
-# op (Operation): (0 => ==), (1 => !=)
-
-
-def check_data(dut, d, op):
- out_d = yield dut.data
- assert_op("Data", out_d, d, op)
-
-# Checks the match state of the CAM entry
-# Arguments:
-# dut: The CamEntry being tested
-# m (Match): The expected match
-# op (Operation): (0 => ==), (1 => !=)
-
-
-def check_match(dut, m, op):
- out_m = yield dut.match
- assert_op("Match", out_m, m, op)
-
-# Checks the state of the CAM entry
-# Arguments:
-# dut: The CamEntry being tested
-# d (data): The expected data
-# m (match): The expected match
-# d_op (Operation): Operation for the data assertion (0 => ==), (1 => !=)
-# m_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-
-
-def check_all(dut, d, m, d_op, m_op):
- yield from check_data(dut, d, d_op)
- yield from check_match(dut, m, m_op)
-
-# This tbench goes through the paces of testing the CamEntry module
-# It is done by writing and then reading various combinations of key/data pairs
-# and reading the results with varying keys to verify the resulting stored
-# data is correct.
-
-
-def tbench(dut):
- # Check write
- command = 2
- data = 1
- match = 0
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 0, 0)
-
- # Check read miss
- command = 1
- data = 2
- match = 0
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 1, 0)
-
- # Check read hit
- command = 1
- data = 1
- match = 1
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 0, 0)
-
- # Check overwrite
- command = 2
- data = 5
- match = 0
- yield from set_cam_entry(dut, command, data)
- yield
- yield from check_all(dut, data, match, 0, 0)
-
- # Check read hit
- command = 1
- data = 5
- match = 1
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 0, 0)
-
- # Check reset
- command = 3
- data = 0
- match = 0
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 0, 0)
-
- # Extra clock cycle for waveform
- yield
-
-
-def test_camentry():
- dut = CamEntry(4)
- run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_cam_entry.vcd")
- print("CamEntry Unit Test Success")
-
-
-if __name__ == "__main__":
- test_camentry()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-
-from soc.TLB.PermissionValidator import PermissionValidator
-
-from soc.TestUtil.test_helper import assert_op
-
-
-def set_validator(dut, d, xwr, sm, sa, asid):
- yield dut.data.eq(d)
- yield dut.xwr.eq(xwr)
- yield dut.super_mode.eq(sm)
- yield dut.super_access.eq(sa)
- yield dut.asid.eq(asid)
- yield
-
-
-def check_valid(dut, v, op):
- out_v = yield dut.valid
- assert_op("Valid", out_v, v, op)
-
-
-def tbench(dut):
- # 80 bits represented. Ignore the MSB as it will be truncated
- # ASID is bits first 4 hex values (bits 64 - 78)
-
- # Test user mode entry valid
- # Global Bit matching ASID
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000031
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
-
- # Test user mode entry valid
- # Global Bit nonmatching ASID
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000031
- # Ignore MSB it will be truncated
- asid = 0x7FF6
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
-
- # Test user mode entry invalid
- # Global Bit nonmatching ASID
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000021
- # Ignore MSB it will be truncated
- asid = 0x7FF6
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 0
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
-
- # Test user mode entry valid
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000011
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
-
- # Test user mode entry invalid
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000011
- # Ignore MSB it will be truncated
- asid = 0x7FF6
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 0
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
-
- # Test supervisor mode entry valid
- # The entry is NOT in user mode
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000001
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 1
- super_access = 0
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
-
- # Test supervisor mode entry invalid
- # The entry is in user mode
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000011
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 1
- super_access = 0
- xwr = 0
- valid = 0
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
-
- # Test supervisor mode entry valid
- # The entry is NOT in user mode with access
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000001
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 1
- super_access = 1
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
-
- # Test supervisor mode entry valid
- # The entry is in user mode with access
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000011
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 1
- super_access = 1
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
-
-
-def test_permv():
- dut = PermissionValidator(15, 64)
- run_simulation(dut, tbench(
- dut), vcd_name="Waveforms/test_permission_validator.vcd")
- print("PermissionValidator Unit Test Success")
-
-
-if __name__ == "__main__":
- test_permv()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-
-from soc.TLB.PteEntry import PteEntry
-
-from soc.TestUtil.test_helper import assert_op
-
-
-def set_entry(dut, i):
- yield dut.i.eq(i)
- yield
-
-
-def check_dirty(dut, d, op):
- out_d = yield dut.d
- assert_op("Dirty", out_d, d, op)
-
-
-def check_accessed(dut, a, op):
- out_a = yield dut.a
- assert_op("Accessed", out_a, a, op)
-
-
-def check_global(dut, o, op):
- out = yield dut.g
- assert_op("Global", out, o, op)
-
-
-def check_user(dut, o, op):
- out = yield dut.u
- assert_op("User Mode", out, o, op)
-
-
-def check_xwr(dut, o, op):
- out = yield dut.xwr
- assert_op("XWR", out, o, op)
-
-
-def check_asid(dut, o, op):
- out = yield dut.asid
- assert_op("ASID", out, o, op)
-
-
-def check_pte(dut, o, op):
- out = yield dut.pte
- assert_op("ASID", out, o, op)
-
-
-def check_valid(dut, v, op):
- out_v = yield dut.v
- assert_op("Valid", out_v, v, op)
-
-
-def check_all(dut, d, a, g, u, xwr, v, asid, pte):
- yield from check_dirty(dut, d, 0)
- yield from check_accessed(dut, a, 0)
- yield from check_global(dut, g, 0)
- yield from check_user(dut, u, 0)
- yield from check_xwr(dut, xwr, 0)
- yield from check_asid(dut, asid, 0)
- yield from check_pte(dut, pte, 0)
- yield from check_valid(dut, v, 0)
-
-
-def tbench(dut):
- # 80 bits represented. Ignore the MSB as it will be truncated
- # ASID is bits first 4 hex values (bits 64 - 78)
-
- i = 0x7FFF0000000000000031
- dirty = 0
- access = 0
- glob = 1
- user = 1
- xwr = 0
- valid = 1
- asid = 0x7FFF
- pte = 0x0000000000000031
- yield from set_entry(dut, i)
- yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
-
- i = 0x0FFF00000000000000FF
- dirty = 1
- access = 1
- glob = 1
- user = 1
- xwr = 7
- valid = 1
- asid = 0x0FFF
- pte = 0x00000000000000FF
- yield from set_entry(dut, i)
- yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
-
- i = 0x0721000000001100001F
- dirty = 0
- access = 0
- glob = 0
- user = 1
- xwr = 7
- valid = 1
- asid = 0x0721
- pte = 0x000000001100001F
- yield from set_entry(dut, i)
- yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
-
- yield
-
-
-def test_pteentry():
- dut = PteEntry(15, 64)
- run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_pte_entry.vcd")
- print("PteEntry Unit Test Success")
-
-
-if __name__ == "__main__":
- test_pteentry()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-
-from soc.TLB.SetAssociativeCache import SetAssociativeCache
-
-from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op
-
-
-def set_sac(dut, e, c, s, t, d):
- yield dut.enable.eq(e)
- yield dut.command.eq(c)
- yield dut.cset.eq(s)
- yield dut.tag.eq(t)
- yield dut.data_i.eq(d)
- yield
-
-
-def tbench(dut):
- enable = 1
- command = 2
- cset = 1
- tag = 2
- data = 3
- yield from set_sac(dut, enable, command, cset, tag, data)
- yield
-
- enable = 1
- command = 2
- cset = 1
- tag = 5
- data = 8
- yield from set_sac(dut, enable, command, cset, tag, data)
- yield
-
-
-def test_assoc_cache():
- dut = SetAssociativeCache(4, 4, 4, 4)
- run_simulation(dut, tbench(
- dut), vcd_name="Waveforms/test_set_associative_cache.vcd")
- print("Set Associative Cache Unit Test Success")
-
-
-if __name__ == "__main__":
- test_assoc_cache()
+++ /dev/null
-#import tracemalloc
-# tracemalloc.start()
-
-from nmigen.compat.sim import run_simulation
-
-from soc.TLB.TLB import TLB
-
-from soc.TestUtil.test_helper import assert_op, assert_eq
-
-# self.supermode = Signal(1) # Supervisor Mode
-# self.super_access = Signal(1) # Supervisor Access
-# self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2
-# self.xwr = Signal(3) # Execute, Write, Read
-# self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64
-#self.address_L1 = Signal(range(L1_size))
-# self.asid = Signal(asid_size) # Address Space IDentifier (ASID)
-# self.vma = Signal(vma_size) # Virtual Memory Address (VMA)
-# self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE)
-#
-# self.hit = Signal(1) # Denotes if the VMA had a mapped PTE
-# self.perm_valid = Signal(1) # Denotes if the permissions are correct
-# self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA
-
-COMMAND_READ = 1
-COMMAND_WRITE_L1 = 2
-
-# Checks the data state of the CAM entry
-# Arguments:
-# dut: The CamEntry being tested
-# d (Data): The expected data
-# op (Operation): (0 => ==), (1 => !=)
-
-
-def check_hit(dut, d):
- hit_d = yield dut.hit
- #assert_eq("hit", hit_d, d)
-
-
-def tst_command(dut, cmd, xwr, cycles):
- yield dut.command.eq(cmd)
- yield dut.xwr.eq(xwr)
- for i in range(0, cycles):
- yield
-
-
-def tst_write_L1(dut, vma, address_L1, asid, pte_in):
- yield dut.address_L1.eq(address_L1)
- yield dut.asid.eq(asid)
- yield dut.vma.eq(vma)
- yield dut.pte_in.eq(pte_in)
- yield from tst_command(dut, COMMAND_WRITE_L1, 7, 2)
-
-
-def tst_search(dut, vma, found):
- yield dut.vma.eq(vma)
- yield from tst_command(dut, COMMAND_READ, 7, 1)
- yield from check_hit(dut, found)
-
-
-def zero(dut):
- yield dut.supermode.eq(0)
- yield dut.super_access.eq(0)
- yield dut.mode.eq(0)
- yield dut.address_L1.eq(0)
- yield dut.asid.eq(0)
- yield dut.vma.eq(0)
- yield dut.pte_in.eq(0)
-
-
-def tbench(dut):
- yield from zero(dut)
- yield dut.mode.eq(0xF) # enable TLB
- # test hit
- yield from tst_write_L1(dut, 0xFEEDFACE, 0, 0xFFFF, 0xF0F0)
- yield from tst_search(dut, 0xFEEDFACE, 1)
- yield from tst_search(dut, 0xFACEFEED, 0)
-
-
-def test_tlb():
- dut = TLB(15, 36, 64, 8)
- run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_tlb.vcd")
- print("TLB Unit Test Success")
-
-
-if __name__ == "__main__":
- test_tlb()
+++ /dev/null
-class DualPortSplitter(Elaboratable):
- """DualPortSplitter
-
- * one incoming PortInterface
- * two *OUTGOING* PortInterfaces
- * uses LDSTSplitter to do it
-
- (actually, thinking about it LDSTSplitter could simply be
- modified to conform to PortInterface: one in, two out)
-
- once that is done each pair of ports may be wired directly
- to the dual ports of L0CacheBuffer
-
- The split is carried out so that, regardless of alignment or
- mis-alignment, outgoing PortInterface[0] takes bit 4 == 0
- of the address, whilst outgoing PortInterface[1] takes
- bit 4 == 1.
-
- PortInterface *may* need to be changed so that the length is
- a binary number (accepting values 1-16).
- """
-
- def __init__(self,inp):
- self.outp = [PortInterface(name="outp_0"),
- PortInterface(name="outp_1")]
- print(self.outp)
-
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- m.submodules.splitter = splitter = LDSTSplitter(64, 48, 4)
- self.inp = splitter.pi
- comb += splitter.addr_i.eq(self.inp.addr) # XXX
- #comb += splitter.len_i.eq()
- #comb += splitter.valid_i.eq()
- comb += splitter.is_ld_i.eq(self.inp.is_ld_i)
- comb += splitter.is_st_i.eq(self.inp.is_st_i)
- #comb += splitter.st_data_i.eq()
- #comb += splitter.sld_valid_i.eq()
- #comb += splitter.sld_data_i.eq()
- #comb += splitter.sst_valid_i.eq()
- return m
+++ /dev/null
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-# module axi4_ar_buffer
-# #(
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-#
-# input logic [AXI_ID_WIDTH-1:0] s_axi4_arid,
-# input logic [31:0] s_axi4_araddr,
-# input logic s_axi4_arvalid,
-# output logic s_axi4_arready,
-# input logic [7:0] s_axi4_arlen,
-# input logic [2:0] s_axi4_arsize,
-# input logic [1:0] s_axi4_arburst,
-# input logic s_axi4_arlock,
-# input logic [2:0] s_axi4_arprot,
-# input logic [3:0] s_axi4_arcache,
-# input logic [AXI_USER_WIDTH-1:0] s_axi4_aruser,
-#
-# output logic [AXI_ID_WIDTH-1:0] m_axi4_arid,
-# output logic [31:0] m_axi4_araddr,
-# output logic m_axi4_arvalid,
-# input logic m_axi4_arready,
-# output logic [7:0] m_axi4_arlen,
-# output logic [2:0] m_axi4_arsize,
-# output logic [1:0] m_axi4_arburst,
-# output logic m_axi4_arlock,
-# output logic [2:0] m_axi4_arprot,
-# output logic [3:0] m_axi4_arcache,
-# output logic [AXI_USER_WIDTH-1:0] m_axi4_aruser
-# );
-
-
-class axi4_ar_buffer(Elaboratable):
-
- def __init__(self):
- # self.axi4_aclk = Signal() # input
- # self.axi4_arstn = Signal() # input
- self.s_axi4_arid = Signal(AXI_ID_WIDTH) # input
- self.s_axi4_araddr = Signal(32) # input
- self.s_axi4_arvalid = Signal() # input
- self.s_axi4_arready = Signal() # output
- self.s_axi4_arlen = Signal(8) # input
- self.s_axi4_arsize = Signal(3) # input
- self.s_axi4_arburst = Signal(2) # input
- self.s_axi4_arlock = Signal() # input
- self.s_axi4_arprot = Signal(3) # input
- self.s_axi4_arcache = Signal(4) # input
- self.s_axi4_aruser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_arid = Signal(AXI_ID_WIDTH) # output
- self.m_axi4_araddr = Signal(32) # output
- self.m_axi4_arvalid = Signal() # output
- self.m_axi4_arready = Signal() # input
- self.m_axi4_arlen = Signal(8) # output
- self.m_axi4_arsize = Signal(3) # output
- self.m_axi4_arburst = Signal(2) # output
- self.m_axi4_arlock = Signal() # output
- self.m_axi4_arprot = Signal(3) # output
- self.m_axi4_arcache = Signal(4) # output
- self.m_axi4_aruser = Signal(AXI_USER_WIDTH) # output
-
- def elaborate(self, platform=None):
- m = Module()
- # #TODO use record types here
- # wire [AXI_ID_WIDTH+AXI_USER_WIDTH+52:0] data_in;
- # wire [AXI_ID_WIDTH+AXI_USER_WIDTH+52:0] data_out;
-
- # assign data_in [3:0] = s_axi4_arcache;
- # assign data_in [6:4] = s_axi4_arprot;
- # assign data_in [7] = s_axi4_arlock;
- # assign data_in [9:8] = s_axi4_arburst;
- # assign data_in [12:10] = s_axi4_arsize;
- # assign data_in [20:13] = s_axi4_arlen;
- # assign data_in [52:21] = s_axi4_araddr;
- # assign data_in [52+AXI_ID_WIDTH:53] = s_axi4_arid;
- # assign data_in[52+AXI_ID_WIDTH+AXI_USER_WIDTH:53+AXI_ID_WIDTH] = s_axi4_aruser;
- #
- # assign m_axi4_arcache = data_out[3:0];
- # assign m_axi4_arprot = data_out[6:4];
- # assign m_axi4_arlock = data_out[7];
- # assign m_axi4_arburst = data_out[9:8];
- # assign m_axi4_arsize = data_out[12:10];
- # assign m_axi4_arlen = data_out[20:13];
- # assign m_axi4_araddr = data_out[52:21];
- # assign m_axi4_arid = data_out[52+AXI_ID_WIDTH:53];
- # assign m_axi4_aruser = data_out[52+AXI_ID_WIDTH+AXI_USER_WIDTH:53+AXI_ID_WIDTH];
-
- # m.d.comb += self.m_axi4_arcache.eq(..)
- # m.d.comb += self.m_axi4_arprot.eq(..)
- # m.d.comb += self.m_axi4_arlock.eq(..)
- # m.d.comb += self.m_axi4_arburst.eq(..)
- # m.d.comb += self.m_axi4_arsize.eq(..)
- # m.d.comb += self.m_axi4_arlen.eq(..)
- # m.d.comb += self.m_axi4_araddr.eq(..)
- # m.d.comb += self.m_axi4_arid.eq(..)
- # m.d.comb += self.m_axi4_aruser.eq(..)
- return m
-
-# TODO convert axi_buffer_rab.sv
-#
-# axi_buffer_rab
-# #(
-# .DATA_WIDTH ( AXI_ID_WIDTH+AXI_USER_WIDTH+53 ),
-# .BUFFER_DEPTH ( 4 )
-# )
-# u_buffer
-# (
-# .clk ( axi4_aclk ),
-# .rstn ( axi4_arstn ),
-# .valid_out ( m_axi4_arvalid ),
-# .data_out ( data_out ),
-# .ready_in ( m_axi4_arready ),
-# .valid_in ( s_axi4_arvalid ),
-# .data_in ( data_in ),
-# .ready_out ( s_axi4_arready )
-# );
-#
-
-# endmodule
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class axi4_ar_sender(Elaboratable):
-
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.l1_done_o = Signal() # output
- self.l1_accept_i = Signal() # input
- self.l1_drop_i = Signal() # input
- self.l1_save_i = Signal() # input
- self.l2_done_o = Signal() # output
- self.l2_accept_i = Signal() # input
- self.l2_drop_i = Signal() # input
- self.l2_sending_o = Signal() # output
- self.l1_araddr_i = Signal(AXI_ADDR_WIDTH) # input
- self.l2_araddr_i = Signal(AXI_ADDR_WIDTH) # input
- self.s_axi4_arid = Signal(AXI_ID_WIDTH) # input
- self.s_axi4_arvalid = Signal() # input
- self.s_axi4_arready = Signal() # output
- self.s_axi4_arlen = Signal(8) # input
- self.s_axi4_arsize = Signal(3) # input
- self.s_axi4_arburst = Signal(2) # input
- self.s_axi4_arlock = Signal() # input
- self.s_axi4_arprot = Signal(3) # input
- self.s_axi4_arcache = Signal(4) # input
- self.s_axi4_aruser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_arid = Signal(AXI_ID_WIDTH) # output
- self.m_axi4_araddr = Signal(AXI_ADDR_WIDTH) # output
- self.m_axi4_arvalid = Signal() # output
- self.m_axi4_arready = Signal() # input
- self.m_axi4_arlen = Signal(8) # output
- self.m_axi4_arsize = Signal(3) # output
- self.m_axi4_arburst = Signal(2) # output
- self.m_axi4_arlock = Signal() # output
- self.m_axi4_arprot = Signal(3) # output
- self.m_axi4_arcache = Signal(4) # output
- self.m_axi4_aruser = Signal(AXI_USER_WIDTH) # output
-
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.l1_save.eq(self.None)
- m.d.comb += self.l1_done_o.eq(self.None)
- m.d.comb += self.m_axi4_arvalid.eq(self.None)
- m.d.comb += self.s_axi4_arready.eq(self.None)
- m.d.comb += self.m_axi4_aruser.eq(self.None)
- m.d.comb += self.m_axi4_arcache.eq(self.None)
- m.d.comb += self.m_axi4_arprot.eq(self.None)
- m.d.comb += self.m_axi4_arlock.eq(self.None)
- m.d.comb += self.m_axi4_arburst.eq(self.None)
- m.d.comb += self.m_axi4_arsize.eq(self.None)
- m.d.comb += self.m_axi4_arlen.eq(self.None)
- m.d.comb += self.m_axi4_araddr.eq(self.None)
- m.d.comb += self.m_axi4_arid.eq(self.None)
- m.d.comb += self.l2_sending_o.eq(self.None)
- m.d.comb += self.l2_sent.eq(self.None)
- m.d.comb += self.l2_done_o.eq(self.None)
- m.d.comb += self.m_axi4_aruser.eq(self.s_axi4_aruser)
- m.d.comb += self.m_axi4_arcache.eq(self.s_axi4_arcache)
- m.d.comb += self.m_axi4_arprot.eq(self.s_axi4_arprot)
- m.d.comb += self.m_axi4_arlock.eq(self.s_axi4_arlock)
- m.d.comb += self.m_axi4_arburst.eq(self.s_axi4_arburst)
- m.d.comb += self.m_axi4_arsize.eq(self.s_axi4_arsize)
- m.d.comb += self.m_axi4_arlen.eq(self.s_axi4_arlen)
- m.d.comb += self.m_axi4_araddr.eq(self.l1_araddr_i)
- m.d.comb += self.m_axi4_arid.eq(self.s_axi4_arid)
- m.d.comb += self.l2_sending_o.eq(self.1: 'b0)
- m.d.comb += self.l2_available_q.eq(self.1: 'b0)
- m.d.comb += self.l2_done_o.eq(self.1: 'b0)
- return m
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# module axi4_ar_sender
-# #(
-# parameter AXI_ADDR_WIDTH = 40,
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4,
-# parameter ENABLE_L2TLB = 0
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-#
-# output logic l1_done_o,
-# input logic l1_accept_i,
-# input logic l1_drop_i,
-# input logic l1_save_i,
-#
-# output logic l2_done_o,
-# input logic l2_accept_i,
-# input logic l2_drop_i,
-# output logic l2_sending_o,
-#
-# input logic [AXI_ADDR_WIDTH-1:0] l1_araddr_i,
-# input logic [AXI_ADDR_WIDTH-1:0] l2_araddr_i,
-#
-# input logic [AXI_ID_WIDTH-1:0] s_axi4_arid,
-# input logic s_axi4_arvalid,
-# output logic s_axi4_arready,
-# input logic [7:0] s_axi4_arlen,
-# input logic [2:0] s_axi4_arsize,
-# input logic [1:0] s_axi4_arburst,
-# input logic s_axi4_arlock,
-# input logic [2:0] s_axi4_arprot,
-# input logic [3:0] s_axi4_arcache,
-# input logic [AXI_USER_WIDTH-1:0] s_axi4_aruser,
-#
-# output logic [AXI_ID_WIDTH-1:0] m_axi4_arid,
-# output logic [AXI_ADDR_WIDTH-1:0] m_axi4_araddr,
-# output logic m_axi4_arvalid,
-# input logic m_axi4_arready,
-# output logic [7:0] m_axi4_arlen,
-# output logic [2:0] m_axi4_arsize,
-# output logic [1:0] m_axi4_arburst,
-# output logic m_axi4_arlock,
-# output logic [2:0] m_axi4_arprot,
-# output logic [3:0] m_axi4_arcache,
-# output logic [AXI_USER_WIDTH-1:0] m_axi4_aruser
-# );
-#
-# logic l1_save;
-#
-# logic l2_sent;
-# logic l2_available_q;
-#
-# assign l1_save = l1_save_i & l2_available_q;
-#
-# assign l1_done_o = s_axi4_arvalid & s_axi4_arready ;
-#
-# // if 1: accept and forward a transaction translated by L1
-# // 2: drop or save request (if L2 slot not occupied already)
-# assign m_axi4_arvalid = (s_axi4_arvalid & l1_accept_i) |
-# l2_sending_o;
-# assign s_axi4_arready = (m_axi4_arvalid & m_axi4_arready & ~l2_sending_o) |
-# (s_axi4_arvalid & (l1_drop_i | l1_save));
-#
-# generate
-# if (ENABLE_L2TLB == 1) begin
-# logic [AXI_USER_WIDTH-1:0] l2_axi4_aruser ;
-# logic [3:0] l2_axi4_arcache ;
-# logic [3:0] l2_axi4_arregion;
-# logic [3:0] l2_axi4_arqos ;
-# logic [2:0] l2_axi4_arprot ;
-# logic l2_axi4_arlock ;
-# logic [1:0] l2_axi4_arburst ;
-# logic [2:0] l2_axi4_arsize ;
-# logic [7:0] l2_axi4_arlen ;
-# logic [AXI_ID_WIDTH-1:0] l2_axi4_arid ;
-#
-# assign m_axi4_aruser = l2_sending_o ? l2_axi4_aruser : s_axi4_aruser;
-# assign m_axi4_arcache = l2_sending_o ? l2_axi4_arcache : s_axi4_arcache;
-# assign m_axi4_arprot = l2_sending_o ? l2_axi4_arprot : s_axi4_arprot;
-# assign m_axi4_arlock = l2_sending_o ? l2_axi4_arlock : s_axi4_arlock;
-# assign m_axi4_arburst = l2_sending_o ? l2_axi4_arburst : s_axi4_arburst;
-# assign m_axi4_arsize = l2_sending_o ? l2_axi4_arsize : s_axi4_arsize;
-# assign m_axi4_arlen = l2_sending_o ? l2_axi4_arlen : s_axi4_arlen;
-# assign m_axi4_araddr = l2_sending_o ? l2_araddr_i : l1_araddr_i;
-# assign m_axi4_arid = l2_sending_o ? l2_axi4_arid : s_axi4_arid;
-#
-# // Buffer AXI signals in case of L1 miss
-# always @(posedge axi4_aclk or negedge axi4_arstn) begin
-# if (axi4_arstn == 1'b0) begin
-# l2_axi4_aruser <= 'b0;
-# l2_axi4_arcache <= 'b0;
-# l2_axi4_arprot <= 'b0;
-# l2_axi4_arlock <= 1'b0;
-# l2_axi4_arburst <= 'b0;
-# l2_axi4_arsize <= 'b0;
-# l2_axi4_arlen <= 'b0;
-# l2_axi4_arid <= 'b0;
-# end else if (l1_save) begin
-# l2_axi4_aruser <= s_axi4_aruser;
-# l2_axi4_arcache <= s_axi4_arcache;
-# l2_axi4_arprot <= s_axi4_arprot;
-# l2_axi4_arlock <= s_axi4_arlock;
-# l2_axi4_arburst <= s_axi4_arburst;
-# l2_axi4_arsize <= s_axi4_arsize;
-# l2_axi4_arlen <= s_axi4_arlen;
-# l2_axi4_arid <= s_axi4_arid;
-# end
-# end
-#
-# // signal that an l1_save_i can be accepted
-# always @(posedge axi4_aclk or negedge axi4_arstn) begin
-# if (axi4_arstn == 1'b0) begin
-# l2_available_q <= 1'b1;
-# end else if (l2_sent | l2_drop_i) begin
-# l2_available_q <= 1'b1;
-# end else if (l1_save) begin
-# l2_available_q <= 1'b0;
-# end
-# end
-#
-# assign l2_sending_o = l2_accept_i & ~l2_available_q;
-# assign l2_sent = l2_sending_o & m_axi4_arvalid & m_axi4_arready;
-#
-# // if 1: having sent out a transaction translated by L2
-# // 2: drop request (L2 slot is available again)
-# assign l2_done_o = l2_sent | l2_drop_i;
-#
-# end else begin // !`ifdef ENABLE_L2TLB
-# assign m_axi4_aruser = s_axi4_aruser;
-# assign m_axi4_arcache = s_axi4_arcache;
-# assign m_axi4_arprot = s_axi4_arprot;
-# assign m_axi4_arlock = s_axi4_arlock;
-# assign m_axi4_arburst = s_axi4_arburst;
-# assign m_axi4_arsize = s_axi4_arsize;
-# assign m_axi4_arlen = s_axi4_arlen;
-# assign m_axi4_araddr = l1_araddr_i;
-# assign m_axi4_arid = s_axi4_arid;
-#
-# assign l2_sending_o = 1'b0;
-# assign l2_available_q = 1'b0;
-# assign l2_done_o = 1'b0;
-# end // else: !if(ENABLE_L2TLB == 1)
-# endgenerate
-#
-# endmodule
-#
-#
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class axi4_aw_buffer(Elaboratable):
-
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.s_axi4_awid = Signal(AXI_ID_WIDTH) # input
- self.s_axi4_awaddr = Signal(32) # input
- self.s_axi4_awvalid = Signal() # input
- self.s_axi4_awready = Signal() # output
- self.s_axi4_awlen = Signal(8) # input
- self.s_axi4_awsize = Signal(3) # input
- self.s_axi4_awburst = Signal(2) # input
- self.s_axi4_awlock = Signal() # input
- self.s_axi4_awprot = Signal(3) # input
- self.s_axi4_awcache = Signal(4) # input
- self.s_axi4_awregion = Signal(4) # input
- self.s_axi4_awqos = Signal(4) # input
- self.s_axi4_awuser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_awid = Signal(AXI_ID_WIDTH) # output
- self.m_axi4_awaddr = Signal(32) # output
- self.m_axi4_awvalid = Signal() # output
- self.m_axi4_awready = Signal() # input
- self.m_axi4_awlen = Signal(8) # output
- self.m_axi4_awsize = Signal(3) # output
- self.m_axi4_awburst = Signal(2) # output
- self.m_axi4_awlock = Signal() # output
- self.m_axi4_awprot = Signal(3) # output
- self.m_axi4_awcache = Signal(4) # output
- self.m_axi4_awregion = Signal(4) # output
- self.m_axi4_awqos = Signal(4) # output
- self.m_axi4_awuser = Signal(AXI_USER_WIDTH) # output
-
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.None.eq(self.s_axi4_awcache)
- m.d.comb += self.None.eq(self.s_axi4_awprot)
- m.d.comb += self.None.eq(self.s_axi4_awlock)
- m.d.comb += self.None.eq(self.s_axi4_awburst)
- m.d.comb += self.None.eq(self.s_axi4_awsize)
- m.d.comb += self.None.eq(self.s_axi4_awlen)
- m.d.comb += self.None.eq(self.s_axi4_awaddr)
- m.d.comb += self.None.eq(self.s_axi4_awregion)
- m.d.comb += self.None.eq(self.s_axi4_awqos)
- m.d.comb += self.None.eq(self.s_axi4_awid)
- m.d.comb += self.None.eq(self.s_axi4_awuser)
- m.d.comb += self.m_axi4_awcache.eq(self.None)
- m.d.comb += self.m_axi4_awprot.eq(self.None)
- m.d.comb += self.m_axi4_awlock.eq(self.None)
- m.d.comb += self.m_axi4_awburst.eq(self.None)
- m.d.comb += self.m_axi4_awsize.eq(self.None)
- m.d.comb += self.m_axi4_awlen.eq(self.None)
- m.d.comb += self.m_axi4_awaddr.eq(self.None)
- m.d.comb += self.m_axi4_awregion.eq(self.None)
- m.d.comb += self.m_axi4_awqos.eq(self.None)
- m.d.comb += self.m_axi4_awid.eq(self.None)
- m.d.comb += self.m_axi4_awuser.eq(self.None)
- return m
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# module axi4_aw_buffer
-# #(
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-#
-# input logic [AXI_ID_WIDTH-1:0] s_axi4_awid,
-# input logic [31:0] s_axi4_awaddr,
-# input logic s_axi4_awvalid,
-# output logic s_axi4_awready,
-# input logic [7:0] s_axi4_awlen,
-# input logic [2:0] s_axi4_awsize,
-# input logic [1:0] s_axi4_awburst,
-# input logic s_axi4_awlock,
-# input logic [2:0] s_axi4_awprot,
-# input logic [3:0] s_axi4_awcache,
-# input logic [3:0] s_axi4_awregion,
-# input logic [3:0] s_axi4_awqos,
-# input logic [AXI_USER_WIDTH-1:0] s_axi4_awuser,
-#
-# output logic [AXI_ID_WIDTH-1:0] m_axi4_awid,
-# output logic [31:0] m_axi4_awaddr,
-# output logic m_axi4_awvalid,
-# input logic m_axi4_awready,
-# output logic [7:0] m_axi4_awlen,
-# output logic [2:0] m_axi4_awsize,
-# output logic [1:0] m_axi4_awburst,
-# output logic m_axi4_awlock,
-# output logic [2:0] m_axi4_awprot,
-# output logic [3:0] m_axi4_awcache,
-# output logic [3:0] m_axi4_awregion,
-# output logic [3:0] m_axi4_awqos,
-# output logic [AXI_USER_WIDTH-1:0] m_axi4_awuser
-# );
-#
-# wire [AXI_USER_WIDTH+AXI_ID_WIDTH+60:0] data_in;
-# wire [AXI_USER_WIDTH+AXI_ID_WIDTH+60:0] data_out;
-#
-# assign data_in [3:0] = s_axi4_awcache;
-# assign data_in [6:4] = s_axi4_awprot;
-# assign data_in [7] = s_axi4_awlock;
-# assign data_in [9:8] = s_axi4_awburst;
-# assign data_in [12:10] = s_axi4_awsize;
-# assign data_in [20:13] = s_axi4_awlen;
-# assign data_in [52:21] = s_axi4_awaddr;
-# assign data_in [56:53] = s_axi4_awregion;
-# assign data_in [60:57] = s_axi4_awqos;
-# assign data_in [60+AXI_ID_WIDTH:61] = s_axi4_awid;
-# assign data_in [60+AXI_ID_WIDTH+AXI_USER_WIDTH:61+AXI_ID_WIDTH] = s_axi4_awuser;
-#
-# assign m_axi4_awcache = data_out[3:0];
-# assign m_axi4_awprot = data_out[6:4];
-# assign m_axi4_awlock = data_out[7];
-# assign m_axi4_awburst = data_out[9:8];
-# assign m_axi4_awsize = data_out[12:10];
-# assign m_axi4_awlen = data_out[20:13];
-# assign m_axi4_awaddr = data_out[52:21];
-# assign m_axi4_awregion = data_out[56:53];
-# assign m_axi4_awqos = data_out[60:57];
-# assign m_axi4_awid = data_out[60+AXI_ID_WIDTH:61];
-# assign m_axi4_awuser = data_out[60+AXI_ID_WIDTH+AXI_USER_WIDTH:61+AXI_ID_WIDTH];
-#
-# axi_buffer_rab
-# #(
-# .DATA_WIDTH ( AXI_ID_WIDTH+AXI_USER_WIDTH+61 ),
-# .BUFFER_DEPTH ( 4 )
-# )
-# u_buffer
-# (
-# .clk ( axi4_aclk ),
-# .rstn ( axi4_arstn ),
-# .valid_out ( m_axi4_awvalid ),
-# .data_out ( data_out ),
-# .ready_in ( m_axi4_awready ),
-# .valid_in ( s_axi4_awvalid ),
-# .data_in ( data_in ),
-# .ready_out ( s_axi4_awready )
-# );
-# endmodule
-#
-#
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class axi4_aw_sender(Elaboratable):
-
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.l1_done_o = Signal() # output
- self.l1_accept_i = Signal() # input
- self.l1_drop_i = Signal() # input
- self.l1_save_i = Signal() # input
- self.l2_done_o = Signal() # output
- self.l2_accept_i = Signal() # input
- self.l2_drop_i = Signal() # input
- self.l2_sending_o = Signal() # output
- self.l1_awaddr_i = Signal(AXI_ADDR_WIDTH) # input
- self.l2_awaddr_i = Signal(AXI_ADDR_WIDTH) # input
- self.s_axi4_awid = Signal(AXI_ID_WIDTH) # input
- self.s_axi4_awvalid = Signal() # input
- self.s_axi4_awready = Signal() # output
- self.s_axi4_awlen = Signal(8) # input
- self.s_axi4_awsize = Signal(3) # input
- self.s_axi4_awburst = Signal(2) # input
- self.s_axi4_awlock = Signal() # input
- self.s_axi4_awprot = Signal(3) # input
- self.s_axi4_awcache = Signal(4) # input
- self.s_axi4_awregion = Signal(4) # input
- self.s_axi4_awqos = Signal(4) # input
- self.s_axi4_awuser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_awid = Signal(AXI_ID_WIDTH) # output
- self.m_axi4_awaddr = Signal(AXI_ADDR_WIDTH) # output
- self.m_axi4_awvalid = Signal() # output
- self.m_axi4_awready = Signal() # input
- self.m_axi4_awlen = Signal(8) # output
- self.m_axi4_awsize = Signal(3) # output
- self.m_axi4_awburst = Signal(2) # output
- self.m_axi4_awlock = Signal() # output
- self.m_axi4_awprot = Signal(3) # output
- self.m_axi4_awcache = Signal(4) # output
- self.m_axi4_awregion = Signal(4) # output
- self.m_axi4_awqos = Signal(4) # output
- self.m_axi4_awuser = Signal(AXI_USER_WIDTH) # output
-
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.l1_save.eq(self.None)
- m.d.comb += self.l1_done_o.eq(self.None)
- m.d.comb += self.m_axi4_awvalid.eq(self.None)
- m.d.comb += self.s_axi4_awready.eq(self.None)
- m.d.comb += self.m_axi4_awuser.eq(self.None)
- m.d.comb += self.m_axi4_awcache.eq(self.None)
- m.d.comb += self.m_axi4_awregion.eq(self.None)
- m.d.comb += self.m_axi4_awqos.eq(self.None)
- m.d.comb += self.m_axi4_awprot.eq(self.None)
- m.d.comb += self.m_axi4_awlock.eq(self.None)
- m.d.comb += self.m_axi4_awburst.eq(self.None)
- m.d.comb += self.m_axi4_awsize.eq(self.None)
- m.d.comb += self.m_axi4_awlen.eq(self.None)
- m.d.comb += self.m_axi4_awaddr.eq(self.None)
- m.d.comb += self.m_axi4_awid.eq(self.None)
- m.d.comb += self.l2_sending_o.eq(self.None)
- m.d.comb += self.l2_sent.eq(self.None)
- m.d.comb += self.l2_done_o.eq(self.None)
- m.d.comb += self.m_axi4_awuser.eq(self.s_axi4_awuser)
- m.d.comb += self.m_axi4_awcache.eq(self.s_axi4_awcache)
- m.d.comb += self.m_axi4_awregion.eq(self.s_axi4_awregion)
- m.d.comb += self.m_axi4_awqos.eq(self.s_axi4_awqos)
- m.d.comb += self.m_axi4_awprot.eq(self.s_axi4_awprot)
- m.d.comb += self.m_axi4_awlock.eq(self.s_axi4_awlock)
- m.d.comb += self.m_axi4_awburst.eq(self.s_axi4_awburst)
- m.d.comb += self.m_axi4_awsize.eq(self.s_axi4_awsize)
- m.d.comb += self.m_axi4_awlen.eq(self.s_axi4_awlen)
- m.d.comb += self.m_axi4_awaddr.eq(self.l1_awaddr_i)
- m.d.comb += self.m_axi4_awid.eq(self.s_axi4_awid)
- m.d.comb += self.l2_sending_o.eq(self.1: 'b0)
- m.d.comb += self.l2_available_q.eq(self.1: 'b0)
- m.d.comb += self.l2_done_o.eq(self.1: 'b0)
- return m
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# module axi4_aw_sender
-# #(
-# parameter AXI_ADDR_WIDTH = 40,
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4,
-# parameter ENABLE_L2TLB = 0
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-#
-# output logic l1_done_o,
-# input logic l1_accept_i,
-# input logic l1_drop_i,
-# input logic l1_save_i,
-#
-# output logic l2_done_o,
-# input logic l2_accept_i,
-# input logic l2_drop_i,
-# output logic l2_sending_o,
-#
-# input logic [AXI_ADDR_WIDTH-1:0] l1_awaddr_i,
-# input logic [AXI_ADDR_WIDTH-1:0] l2_awaddr_i,
-#
-# input logic [AXI_ID_WIDTH-1:0] s_axi4_awid,
-# input logic s_axi4_awvalid,
-# output logic s_axi4_awready,
-# input logic [7:0] s_axi4_awlen,
-# input logic [2:0] s_axi4_awsize,
-# input logic [1:0] s_axi4_awburst,
-# input logic s_axi4_awlock,
-# input logic [2:0] s_axi4_awprot,
-# input logic [3:0] s_axi4_awcache,
-# input logic [3:0] s_axi4_awregion,
-# input logic [3:0] s_axi4_awqos,
-# input logic [AXI_USER_WIDTH-1:0] s_axi4_awuser,
-#
-# output logic [AXI_ID_WIDTH-1:0] m_axi4_awid,
-# output logic [AXI_ADDR_WIDTH-1:0] m_axi4_awaddr,
-# output logic m_axi4_awvalid,
-# input logic m_axi4_awready,
-# output logic [7:0] m_axi4_awlen,
-# output logic [2:0] m_axi4_awsize,
-# output logic [1:0] m_axi4_awburst,
-# output logic m_axi4_awlock,
-# output logic [2:0] m_axi4_awprot,
-# output logic [3:0] m_axi4_awcache,
-# output logic [3:0] m_axi4_awregion,
-# output logic [3:0] m_axi4_awqos,
-# output logic [AXI_USER_WIDTH-1:0] m_axi4_awuser
-# );
-#
-# logic l1_save;
-#
-# logic l2_sent;
-# logic l2_available_q;
-#
-# assign l1_save = l1_save_i & l2_available_q;
-#
-# assign l1_done_o = s_axi4_awvalid & s_axi4_awready ;
-#
-# // if 1: accept and forward a transaction translated by L1
-# // 2: drop or save request (if L2 slot not occupied already)
-# assign m_axi4_awvalid = (s_axi4_awvalid & l1_accept_i) |
-# l2_sending_o;
-# assign s_axi4_awready = (m_axi4_awvalid & m_axi4_awready & ~l2_sending_o) |
-# (s_axi4_awvalid & (l1_drop_i | l1_save));
-#
-# generate
-# if (ENABLE_L2TLB == 1) begin
-# logic [AXI_USER_WIDTH-1:0] l2_axi4_awuser ;
-# logic [3:0] l2_axi4_awcache ;
-# logic [3:0] l2_axi4_awregion;
-# logic [3:0] l2_axi4_awqos ;
-# logic [2:0] l2_axi4_awprot ;
-# logic l2_axi4_awlock ;
-# logic [1:0] l2_axi4_awburst ;
-# logic [2:0] l2_axi4_awsize ;
-# logic [7:0] l2_axi4_awlen ;
-# logic [AXI_ID_WIDTH-1:0] l2_axi4_awid ;
-#
-# assign m_axi4_awuser = l2_sending_o ? l2_axi4_awuser : s_axi4_awuser;
-# assign m_axi4_awcache = l2_sending_o ? l2_axi4_awcache : s_axi4_awcache;
-# assign m_axi4_awregion = l2_sending_o ? l2_axi4_awregion : s_axi4_awregion;
-# assign m_axi4_awqos = l2_sending_o ? l2_axi4_awqos : s_axi4_awqos;
-# assign m_axi4_awprot = l2_sending_o ? l2_axi4_awprot : s_axi4_awprot;
-# assign m_axi4_awlock = l2_sending_o ? l2_axi4_awlock : s_axi4_awlock;
-# assign m_axi4_awburst = l2_sending_o ? l2_axi4_awburst : s_axi4_awburst;
-# assign m_axi4_awsize = l2_sending_o ? l2_axi4_awsize : s_axi4_awsize;
-# assign m_axi4_awlen = l2_sending_o ? l2_axi4_awlen : s_axi4_awlen;
-# assign m_axi4_awaddr = l2_sending_o ? l2_awaddr_i : l1_awaddr_i;
-# assign m_axi4_awid = l2_sending_o ? l2_axi4_awid : s_axi4_awid;
-#
-# // buffer AXI signals in case of L1 miss
-# always @(posedge axi4_aclk or negedge axi4_arstn) begin
-# if (axi4_arstn == 1'b0) begin
-# l2_axi4_awuser <= 'b0;
-# l2_axi4_awcache <= 'b0;
-# l2_axi4_awregion <= 'b0;
-# l2_axi4_awqos <= 'b0;
-# l2_axi4_awprot <= 'b0;
-# l2_axi4_awlock <= 1'b0;
-# l2_axi4_awburst <= 'b0;
-# l2_axi4_awsize <= 'b0;
-# l2_axi4_awlen <= 'b0;
-# l2_axi4_awid <= 'b0;
-# end else if (l1_save) begin
-# l2_axi4_awuser <= s_axi4_awuser;
-# l2_axi4_awcache <= s_axi4_awcache;
-# l2_axi4_awregion <= s_axi4_awregion;
-# l2_axi4_awqos <= s_axi4_awqos;
-# l2_axi4_awprot <= s_axi4_awprot;
-# l2_axi4_awlock <= s_axi4_awlock;
-# l2_axi4_awburst <= s_axi4_awburst;
-# l2_axi4_awsize <= s_axi4_awsize;
-# l2_axi4_awlen <= s_axi4_awlen;
-# l2_axi4_awid <= s_axi4_awid;
-# end
-# end
-#
-# // signal that an l1_save_i can be accepted
-# always @(posedge axi4_aclk or negedge axi4_arstn) begin
-# if (axi4_arstn == 1'b0) begin
-# l2_available_q <= 1'b1;
-# end else if (l2_sent | l2_drop_i) begin
-# l2_available_q <= 1'b1;
-# end else if (l1_save) begin
-# l2_available_q <= 1'b0;
-# end
-# end
-#
-# assign l2_sending_o = l2_accept_i & ~l2_available_q;
-# assign l2_sent = l2_sending_o & m_axi4_awvalid & m_axi4_awready;
-#
-# // if 1: having sent out a transaction translated by L2
-# // 2: drop request (L2 slot is available again)
-# assign l2_done_o = l2_sent | l2_drop_i;
-#
-# end else begin // !`ifdef ENABLE_L2TLB
-# assign m_axi4_awuser = s_axi4_awuser;
-# assign m_axi4_awcache = s_axi4_awcache;
-# assign m_axi4_awregion = s_axi4_awregion;
-# assign m_axi4_awqos = s_axi4_awqos;
-# assign m_axi4_awprot = s_axi4_awprot;
-# assign m_axi4_awlock = s_axi4_awlock;
-# assign m_axi4_awburst = s_axi4_awburst;
-# assign m_axi4_awsize = s_axi4_awsize;
-# assign m_axi4_awlen = s_axi4_awlen;
-# assign m_axi4_awaddr = l1_awaddr_i;
-# assign m_axi4_awid = s_axi4_awid;
-#
-# assign l2_sending_o = 1'b0;
-# assign l2_available_q = 1'b0;
-# assign l2_done_o = 1'b0;
-# end // !`ifdef ENABLE_L2TLB
-# endgenerate
-#
-# endmodule
-#
-#
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class axi4_b_buffer(Elaboratable):
-
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.s_axi4_bid = Signal(AXI_ID_WIDTH) # output
- self.s_axi4_bresp = Signal(2) # output
- self.s_axi4_bvalid = Signal() # output
- self.s_axi4_buser = Signal(AXI_USER_WIDTH) # output
- self.s_axi4_bready = Signal() # input
- self.m_axi4_bid = Signal(AXI_ID_WIDTH) # input
- self.m_axi4_bresp = Signal(2) # input
- self.m_axi4_bvalid = Signal() # input
- self.m_axi4_buser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_bready = Signal() # output
-
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.None.eq(self.m_axi4_bresp)
- m.d.comb += self.None.eq(self.m_axi4_bid)
- m.d.comb += self.None.eq(self.m_axi4_buser)
- m.d.comb += self.s_axi4_buser.eq(self.None)
- m.d.comb += self.s_axi4_bid.eq(self.None)
- m.d.comb += self.s_axi4_bresp.eq(self.None)
- return m
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# module axi4_b_buffer
-# #(
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-#
-# output logic [AXI_ID_WIDTH-1:0] s_axi4_bid,
-# output logic [1:0] s_axi4_bresp,
-# output logic s_axi4_bvalid,
-# output logic [AXI_USER_WIDTH-1:0] s_axi4_buser,
-# input logic s_axi4_bready,
-#
-# input logic [AXI_ID_WIDTH-1:0] m_axi4_bid,
-# input logic [1:0] m_axi4_bresp,
-# input logic m_axi4_bvalid,
-# input logic [AXI_USER_WIDTH-1:0] m_axi4_buser,
-# output logic m_axi4_bready
-# );
-#
-# wire [AXI_ID_WIDTH+AXI_USER_WIDTH+1:0] data_in;
-# wire [AXI_ID_WIDTH+AXI_USER_WIDTH+1:0] data_out;
-#
-# assign data_in [1:0] = m_axi4_bresp;
-# assign data_in [AXI_ID_WIDTH+1:2] = m_axi4_bid;
-# assign data_in[AXI_ID_WIDTH+AXI_USER_WIDTH+1:AXI_ID_WIDTH+2] = m_axi4_buser;
-#
-# assign s_axi4_buser = data_out[AXI_ID_WIDTH+AXI_USER_WIDTH+1:AXI_ID_WIDTH+2];
-# assign s_axi4_bid = data_out[AXI_ID_WIDTH+1:2];
-# assign s_axi4_bresp = data_out[1:0];
-#
-# axi_buffer_rab
-# #(
-# .DATA_WIDTH ( AXI_ID_WIDTH+AXI_USER_WIDTH+2 ),
-# .BUFFER_DEPTH ( 4 )
-# )
-# u_buffer
-# (
-# .clk ( axi4_aclk ),
-# .rstn ( axi4_arstn ),
-# .valid_out( s_axi4_bvalid ),
-# .data_out ( data_out ),
-# .ready_in ( s_axi4_bready ),
-# .valid_in ( m_axi4_bvalid ),
-# .data_in ( data_in ),
-# .ready_out( m_axi4_bready )
-# );
-#
-# endmodule
-#
-#
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class axi4_b_sender(Elaboratable):
-
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.drop_i = Signal() # input
- self.done_o = Signal() # output
- self.id_i = Signal(AXI_ID_WIDTH) # input
- self.prefetch_i = Signal() # input
- self.hit_i = Signal() # input
- self.s_axi4_bid = Signal(AXI_ID_WIDTH) # output
- self.s_axi4_bresp = Signal(2) # output
- self.s_axi4_bvalid = Signal() # output
- self.s_axi4_buser = Signal(AXI_USER_WIDTH) # output
- self.s_axi4_bready = Signal() # input
- self.m_axi4_bid = Signal(AXI_ID_WIDTH) # input
- self.m_axi4_bresp = Signal(2) # input
- self.m_axi4_bvalid = Signal() # input
- self.m_axi4_buser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_bready = Signal() # output
-
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.fifo_push.eq(self.None)
- m.d.comb += self.done_o.eq(self.fifo_push)
- m.d.comb += self.fifo_pop.eq(self.None)
- m.d.comb += self.s_axi4_buser.eq(self.None)
- m.d.comb += self.s_axi4_bid.eq(self.None)
- m.d.comb += self.s_axi4_bresp.eq(self.None)
- m.d.comb += self.s_axi4_bvalid.eq(self.None)
- m.d.comb += self.m_axi4_bready.eq(self.None)
- return m
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# module axi4_b_sender
-# #(
-# parameter AXI_ID_WIDTH = 10,
-# parameter AXI_USER_WIDTH = 4
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-#
-# input logic drop_i,
-# output logic done_o,
-# input logic [AXI_ID_WIDTH-1:0] id_i,
-# input logic prefetch_i,
-# input logic hit_i,
-#
-# output logic [AXI_ID_WIDTH-1:0] s_axi4_bid,
-# output logic [1:0] s_axi4_bresp,
-# output logic s_axi4_bvalid,
-# output logic [AXI_USER_WIDTH-1:0] s_axi4_buser,
-# input logic s_axi4_bready,
-#
-# input logic [AXI_ID_WIDTH-1:0] m_axi4_bid,
-# input logic [1:0] m_axi4_bresp,
-# input logic m_axi4_bvalid,
-# input logic [AXI_USER_WIDTH-1:0] m_axi4_buser,
-# output logic m_axi4_bready
-# );
-#
-# logic fifo_valid;
-# logic fifo_pop;
-# logic fifo_push;
-# logic fifo_ready;
-# logic [AXI_ID_WIDTH-1:0] id;
-# logic prefetch;
-# logic hit;
-#
-# logic dropping;
-#
-# axi_buffer_rab
-# #(
-# .DATA_WIDTH ( 2+AXI_ID_WIDTH ),
-# .BUFFER_DEPTH ( 4 )
-# )
-# u_fifo
-# (
-# .clk ( axi4_aclk ),
-# .rstn ( axi4_arstn ),
-# // Pop
-# .data_out ( {prefetch, hit, id} ),
-# .valid_out ( fifo_valid ),
-# .ready_in ( fifo_pop ),
-# // Push
-# .valid_in ( fifo_push ),
-# .data_in ( {prefetch_i, hit_i, id_i} ),
-# .ready_out ( fifo_ready )
-# );
-#
-# assign fifo_push = drop_i & fifo_ready;
-# assign done_o = fifo_push;
-#
-# assign fifo_pop = dropping & s_axi4_bready;
-#
-# always @ (posedge axi4_aclk or negedge axi4_arstn) begin
-# if (axi4_arstn == 1'b0) begin
-# dropping <= 1'b0;
-# end else begin
-# if (fifo_valid && ~dropping)
-# dropping <= 1'b1;
-# else if (fifo_pop)
-# dropping <= 1'b0;
-# end
-# end
-#
-# assign s_axi4_buser = dropping ? {AXI_USER_WIDTH{1'b0}} : m_axi4_buser;
-# assign s_axi4_bid = dropping ? id : m_axi4_bid;
-#
-# assign s_axi4_bresp = (dropping & prefetch & hit) ? 2'b00 : // prefetch hit, mutli, prot
-# (dropping & prefetch ) ? 2'b10 : // prefetch miss
-# (dropping & hit) ? 2'b10 : // non-prefetch multi, prot
-# (dropping ) ? 2'b10 : // non-prefetch miss
-# m_axi4_bresp;
-#
-# assign s_axi4_bvalid = dropping | m_axi4_bvalid;
-# assign m_axi4_bready = ~dropping & s_axi4_bready;
-#
-# endmodule
-#
-#
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class axi4_r_buffer(Elaboratable):
-
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.s_axi4_rid = Signal(AXI_ID_WIDTH) # output
- self.s_axi4_rresp = Signal(2) # output
- self.s_axi4_rdata = Signal(AXI_DATA_WIDTH) # output
- self.s_axi4_rlast = Signal() # output
- self.s_axi4_rvalid = Signal() # output
- self.s_axi4_ruser = Signal(AXI_USER_WIDTH) # output
- self.s_axi4_rready = Signal() # input
- self.m_axi4_rid = Signal(AXI_ID_WIDTH) # input
- self.m_axi4_rresp = Signal(2) # input
- self.m_axi4_rdata = Signal(AXI_DATA_WIDTH) # input
- self.m_axi4_rlast = Signal() # input
- self.m_axi4_rvalid = Signal() # input
- self.m_axi4_ruser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_rready = Signal() # output
-
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.None.eq(self.m_axi4_rresp)
- m.d.comb += self.None.eq(self.m_axi4_rlast)
- m.d.comb += self.None.eq(self.m_axi4_rid)
- m.d.comb += self.None.eq(self.m_axi4_rdata)
- m.d.comb += self.None.eq(self.m_axi4_ruser)
- m.d.comb += self.s_axi4_rresp.eq(self.None)
- m.d.comb += self.s_axi4_rlast.eq(self.None)
- m.d.comb += self.s_axi4_rid.eq(self.None)
- m.d.comb += self.s_axi4_rdata.eq(self.None)
- m.d.comb += self.s_axi4_ruser.eq(self.None)
- return m
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# module axi4_r_buffer
-# #(
-# parameter AXI_DATA_WIDTH = 32,
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-#
-# output logic [AXI_ID_WIDTH-1:0] s_axi4_rid,
-# output logic [1:0] s_axi4_rresp,
-# output logic [AXI_DATA_WIDTH-1:0] s_axi4_rdata,
-# output logic s_axi4_rlast,
-# output logic s_axi4_rvalid,
-# output logic [AXI_USER_WIDTH-1:0] s_axi4_ruser,
-# input logic s_axi4_rready,
-#
-# input logic [AXI_ID_WIDTH-1:0] m_axi4_rid,
-# input logic [1:0] m_axi4_rresp,
-# input logic [AXI_DATA_WIDTH-1:0] m_axi4_rdata,
-# input logic m_axi4_rlast,
-# input logic m_axi4_rvalid,
-# input logic [AXI_USER_WIDTH-1:0] m_axi4_ruser,
-# output logic m_axi4_rready
-# );
-#
-# wire [AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3-1:0] data_in;
-# wire [AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3-1:0] data_out;
-#
-# localparam ID_START = 3;
-# localparam ID_END = AXI_ID_WIDTH-1 + ID_START;
-# localparam DATA_START = ID_END + 1;
-# localparam DATA_END = AXI_DATA_WIDTH-1 + DATA_START;
-# localparam USER_START = DATA_END + 1;
-# localparam USER_END = AXI_USER_WIDTH-1 + USER_START;
-#
-# assign data_in [1:0] = m_axi4_rresp;
-# assign data_in [2] = m_axi4_rlast;
-# assign data_in [ID_END:ID_START] = m_axi4_rid;
-# assign data_in[DATA_END:DATA_START] = m_axi4_rdata;
-# assign data_in[USER_END:USER_START] = m_axi4_ruser;
-#
-# assign s_axi4_rresp = data_out [1:0];
-# assign s_axi4_rlast = data_out [2];
-# assign s_axi4_rid = data_out [ID_END:ID_START];
-# assign s_axi4_rdata = data_out[DATA_END:DATA_START];
-# assign s_axi4_ruser = data_out[USER_END:USER_START];
-#
-# axi_buffer_rab
-# #(
-# .DATA_WIDTH ( AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3 ),
-# .BUFFER_DEPTH ( 4 )
-# )
-# u_buffer
-# (
-# .clk ( axi4_aclk ),
-# .rstn ( axi4_arstn ),
-# // Pop
-# .valid_out ( s_axi4_rvalid ),
-# .data_out ( data_out ),
-# .ready_in ( s_axi4_rready ),
-# // Push
-# .valid_in ( m_axi4_rvalid ),
-# .data_in ( data_in ),
-# .ready_out ( m_axi4_rready )
-# );
-#
-# endmodule
-#
-#
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class axi4_r_sender(Elaboratable):
-
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.drop_i = Signal() # input
- self.drop_len_i = Signal(8) # input
- self.done_o = Signal() # output
- self.id_i = Signal(AXI_ID_WIDTH) # input
- self.prefetch_i = Signal() # input
- self.hit_i = Signal() # input
- self.s_axi4_rid = Signal(AXI_ID_WIDTH) # output
- self.s_axi4_rresp = Signal(2) # output
- self.s_axi4_rdata = Signal(AXI_DATA_WIDTH) # output
- self.s_axi4_rlast = Signal() # output
- self.s_axi4_rvalid = Signal() # output
- self.s_axi4_ruser = Signal(AXI_USER_WIDTH) # output
- self.s_axi4_rready = Signal() # input
- self.m_axi4_rid = Signal(AXI_ID_WIDTH) # input
- self.m_axi4_rresp = Signal(2) # input
- self.m_axi4_rdata = Signal(AXI_DATA_WIDTH) # input
- self.m_axi4_rlast = Signal() # input
- self.m_axi4_rvalid = Signal() # input
- self.m_axi4_ruser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_rready = Signal() # output
-
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.fifo_push.eq(self.None)
- m.d.comb += self.done_o.eq(self.fifo_push)
- m.d.comb += self.s_axi4_rdata.eq(self.m_axi4_rdata)
- m.d.comb += self.s_axi4_ruser.eq(self.None)
- m.d.comb += self.s_axi4_rid.eq(self.None)
- m.d.comb += self.s_axi4_rresp.eq(self.None)
- m.d.comb += self.s_axi4_rvalid.eq(self.None)
- m.d.comb += self.m_axi4_rready.eq(self.None)
- return m
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# //import CfMath::log2;
-#
-# module axi4_r_sender
-# #(
-# parameter AXI_DATA_WIDTH = 32,
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-#
-# input logic drop_i,
-# input logic [7:0] drop_len_i,
-# output logic done_o,
-# input logic [AXI_ID_WIDTH-1:0] id_i,
-# input logic prefetch_i,
-# input logic hit_i,
-#
-# output logic [AXI_ID_WIDTH-1:0] s_axi4_rid,
-# output logic [1:0] s_axi4_rresp,
-# output logic [AXI_DATA_WIDTH-1:0] s_axi4_rdata,
-# output logic s_axi4_rlast,
-# output logic s_axi4_rvalid,
-# output logic [AXI_USER_WIDTH-1:0] s_axi4_ruser,
-# input logic s_axi4_rready,
-#
-# input logic [AXI_ID_WIDTH-1:0] m_axi4_rid,
-# input logic [1:0] m_axi4_rresp,
-# input logic [AXI_DATA_WIDTH-1:0] m_axi4_rdata,
-# input logic m_axi4_rlast,
-# input logic m_axi4_rvalid,
-# input logic [AXI_USER_WIDTH-1:0] m_axi4_ruser,
-# output logic m_axi4_rready
-# );
-#
-# localparam BUFFER_DEPTH = 16;
-#
-# logic fifo_valid;
-# logic fifo_pop;
-# logic fifo_push;
-# logic fifo_ready;
-# logic [AXI_ID_WIDTH-1:0] id;
-# logic [7:0] len;
-# logic prefetch;
-# logic hit;
-#
-# logic dropping;
-#
-# enum logic [1:0] { FORWARDING, DROPPING }
-# state_d, state_q;
-# logic burst_ongoing_d, burst_ongoing_q;
-# logic [7:0] drop_cnt_d, drop_cnt_q;
-#
-# axi_buffer_rab
-# #(
-# .DATA_WIDTH ( 2+AXI_ID_WIDTH+8 ),
-# .BUFFER_DEPTH ( BUFFER_DEPTH )
-# )
-# u_fifo
-# (
-# .clk ( axi4_aclk ),
-# .rstn ( axi4_arstn ),
-# // Pop
-# .data_out ( {prefetch, hit, id, len} ),
-# .valid_out ( fifo_valid ),
-# .ready_in ( fifo_pop ),
-# // Push
-# .valid_in ( fifo_push ),
-# .data_in ( {prefetch_i, hit_i, id_i, drop_len_i} ),
-# .ready_out ( fifo_ready )
-# );
-#
-# assign fifo_push = drop_i & fifo_ready;
-# assign done_o = fifo_push;
-#
-# always_comb begin
-# burst_ongoing_d = burst_ongoing_q;
-# drop_cnt_d = drop_cnt_q;
-# dropping = 1'b0;
-# s_axi4_rlast = 1'b0;
-# fifo_pop = 1'b0;
-# state_d = state_q;
-#
-# case (state_q)
-# FORWARDING: begin
-# s_axi4_rlast = m_axi4_rlast;
-# // Remember whether there is currently a burst ongoing.
-# if (m_axi4_rvalid && m_axi4_rready) begin
-# if (m_axi4_rlast) begin
-# burst_ongoing_d = 1'b0;
-# end else begin
-# burst_ongoing_d = 1'b1;
-# end
-# end
-# // If there is no burst ongoing and the FIFO has a drop request ready, process it.
-# if (!burst_ongoing_d && fifo_valid) begin
-# drop_cnt_d = len;
-# state_d = DROPPING;
-# end
-# end
-#
-# DROPPING: begin
-# dropping = 1'b1;
-# s_axi4_rlast = (drop_cnt_q == '0);
-# // Handshake on slave interface
-# if (s_axi4_rready) begin
-# drop_cnt_d -= 1;
-# if (drop_cnt_q == '0) begin
-# drop_cnt_d = '0;
-# fifo_pop = 1'b1;
-# state_d = FORWARDING;
-# end
-# end
-# end
-#
-# default: begin
-# state_d = FORWARDING;
-# end
-# endcase
-# end
-#
-# assign s_axi4_rdata = m_axi4_rdata;
-#
-# assign s_axi4_ruser = dropping ? {AXI_USER_WIDTH{1'b0}} : m_axi4_ruser;
-# assign s_axi4_rid = dropping ? id : m_axi4_rid;
-#
-# assign s_axi4_rresp = (dropping & prefetch & hit) ? 2'b00 : // prefetch hit, mutli, prot
-# (dropping & prefetch ) ? 2'b10 : // prefetch miss
-# (dropping & hit) ? 2'b10 : // non-prefetch multi, prot
-# (dropping ) ? 2'b10 : // non-prefetch miss
-# m_axi4_rresp;
-#
-# assign s_axi4_rvalid = dropping | m_axi4_rvalid;
-# assign m_axi4_rready = ~dropping & s_axi4_rready;
-#
-# always_ff @(posedge axi4_aclk, negedge axi4_arstn) begin
-# if (axi4_arstn == 1'b0) begin
-# burst_ongoing_q <= 1'b0;
-# drop_cnt_q <= 'b0;
-# state_q <= FORWARDING;
-# end else begin
-# burst_ongoing_q <= burst_ongoing_d;
-# drop_cnt_q <= drop_cnt_d;
-# state_q <= state_d;
-# end
-# end
-#
-# endmodule
-#
-#
-#
-#
+++ /dev/null
-# this file has been generated by sv2nmigen
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class axi4_w_buffer(Elaboratable):
-
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.l1_done_o = Signal() # output
- self.l1_accept_i = Signal() # input
- self.l1_save_i = Signal() # input
- self.l1_drop_i = Signal() # input
- self.l1_master_i = Signal() # input
- self.l1_id_i = Signal(AXI_ID_WIDTH) # input
- self.l1_len_i = Signal(8) # input
- self.l1_prefetch_i = Signal() # input
- self.l1_hit_i = Signal() # input
- self.l2_done_o = Signal() # output
- self.l2_accept_i = Signal() # input
- self.l2_drop_i = Signal() # input
- self.l2_master_i = Signal() # input
- self.l2_id_i = Signal(AXI_ID_WIDTH) # input
- self.l2_len_i = Signal(8) # input
- self.l2_prefetch_i = Signal() # input
- self.l2_hit_i = Signal() # input
- self.master_select_o = Signal() # output
- self.input_stall_o = Signal() # output
- self.output_stall_o = Signal() # output
- self.b_drop_o = Signal() # output
- self.b_done_i = Signal() # input
- self.id_o = Signal(AXI_ID_WIDTH) # output
- self.prefetch_o = Signal() # output
- self.hit_o = Signal() # output
- self.s_axi4_wdata = Signal(AXI_DATA_WIDTH) # input
- self.s_axi4_wvalid = Signal() # input
- self.s_axi4_wready = Signal() # output
- self.s_axi4_wstrb = Signal(1+ERROR p_expression_25) # input
- self.s_axi4_wlast = Signal() # input
- self.s_axi4_wuser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_wdata = Signal(AXI_DATA_WIDTH) # output
- self.m_axi4_wvalid = Signal() # output
- self.m_axi4_wready = Signal() # input
- self.m_axi4_wstrb = Signal(1+ERROR p_expression_25) # output
- self.m_axi4_wlast = Signal() # output
- self.m_axi4_wuser = Signal(AXI_USER_WIDTH) # output
-
- def elaborate(self, platform=None):
- m = Module()
- return m
-
-
-#
-# //import CfMath::log2;
-#
-# module axi4_w_buffer
-# #(
-# parameter AXI_DATA_WIDTH = 32,
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4,
-# parameter ENABLE_L2TLB = 0,
-# parameter HUM_BUFFER_DEPTH = 16
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-#
-# // L1 & L2 interfaces
-# output logic l1_done_o,
-# input logic l1_accept_i,
-# input logic l1_save_i,
-# input logic l1_drop_i,
-# input logic l1_master_i,
-# input logic [AXI_ID_WIDTH-1:0] l1_id_i,
-# input logic [7:0] l1_len_i,
-# input logic l1_prefetch_i,
-# input logic l1_hit_i,
-#
-# output logic l2_done_o,
-# input logic l2_accept_i,
-# input logic l2_drop_i,
-# input logic l2_master_i,
-# input logic [AXI_ID_WIDTH-1:0] l2_id_i,
-# input logic [7:0] l2_len_i,
-# input logic l2_prefetch_i,
-# input logic l2_hit_i,
-#
-# output logic master_select_o,
-# output logic input_stall_o,
-# output logic output_stall_o,
-#
-# // B sender interface
-# output logic b_drop_o,
-# input logic b_done_i,
-# output logic [AXI_ID_WIDTH-1:0] id_o,
-# output logic prefetch_o,
-# output logic hit_o,
-#
-# // AXI W channel interfaces
-# input logic [AXI_DATA_WIDTH-1:0] s_axi4_wdata,
-# input logic s_axi4_wvalid,
-# output logic s_axi4_wready,
-# input logic [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb,
-# input logic s_axi4_wlast,
-# input logic [AXI_USER_WIDTH-1:0] s_axi4_wuser,
-#
-# output logic [AXI_DATA_WIDTH-1:0] m_axi4_wdata,
-# output logic m_axi4_wvalid,
-# input logic m_axi4_wready,
-# output logic [AXI_DATA_WIDTH/8-1:0] m_axi4_wstrb,
-# output logic m_axi4_wlast,
-# output logic [AXI_USER_WIDTH-1:0] m_axi4_wuser
-# );
-#
-"""
-
- localparam BUFFER_WIDTH = AXI_DATA_WIDTH+AXI_USER_WIDTH+AXI_DATA_WIDTH/8+1;
-
- localparam INPUT_BUFFER_DEPTH = 4;
- localparam L1_FIFO_DEPTH = 8;
- localparam L2_FIFO_DEPTH = 4;
-
- logic [AXI_DATA_WIDTH-1:0] axi4_wdata;
- logic axi4_wvalid;
- logic axi4_wready;
- logic [AXI_DATA_WIDTH/8-1:0] axi4_wstrb;
- logic axi4_wlast;
- logic [AXI_USER_WIDTH-1:0] axi4_wuser;
-
- logic l1_fifo_valid_out;
- logic l1_fifo_ready_in;
- logic l1_fifo_valid_in;
- logic l1_fifo_ready_out;
-
- logic l1_req;
- logic l1_accept_cur, l1_save_cur, l1_drop_cur;
- logic l1_master_cur;
- logic [AXI_ID_WIDTH-1:0] l1_id_cur;
- logic [7:0] l1_len_cur;
- logic l1_hit_cur, l1_prefetch_cur;
- logic l1_save_in, l1_save_out;
- logic [log2(L1_FIFO_DEPTH)-1:0] n_l1_save_SP;
-
- logic l2_fifo_valid_out;
- logic l2_fifo_ready_in;
- logic l2_fifo_valid_in;
- logic l2_fifo_ready_out;
-
- logic l2_req;
- logic l2_accept_cur, l2_drop_cur;
- logic l2_master_cur;
- logic [AXI_ID_WIDTH-1:0] l2_id_cur;
- logic [7:0] l2_len_cur;
- logic l2_hit_cur, l2_prefetch_cur;
-
- logic fifo_select, fifo_select_SN, fifo_select_SP;
- logic w_done;
- logic b_drop_set;
-
- // HUM buffer signals
- logic hum_buf_ready_out;
- logic hum_buf_valid_in;
- logic hum_buf_ready_in;
- logic hum_buf_valid_out;
- logic hum_buf_underfull;
-
- logic [AXI_DATA_WIDTH-1:0] hum_buf_wdata;
- logic [AXI_DATA_WIDTH/8-1:0] hum_buf_wstrb;
- logic hum_buf_wlast;
- logic [AXI_USER_WIDTH-1:0] hum_buf_wuser;
-
- logic hum_buf_drop_req_SN, hum_buf_drop_req_SP;
- logic [7:0] hum_buf_drop_len_SN, hum_buf_drop_len_SP;
- logic hum_buf_almost_full;
-
- logic stop_store;
- logic wlast_in, wlast_out;
- logic signed [3:0] n_wlast_SN, n_wlast_SP;
- logic block_forwarding;
-
- // Search FSM
- typedef enum logic [3:0] {STORE, BYPASS,
- WAIT_L1_BYPASS_YES, WAIT_L2_BYPASS_YES,
- WAIT_L1_BYPASS_NO, WAIT_L2_BYPASS_NO,
- FLUSH, DISCARD,
- DISCARD_FINISH}
- hum_buf_state_t;
- hum_buf_state_t hum_buf_SP; // Present state
- hum_buf_state_tbg hum_buf_SN; // Next State
-
- axi_buffer_rab
- #(
- .DATA_WIDTH ( BUFFER_WIDTH ),
- .BUFFER_DEPTH ( INPUT_BUFFER_DEPTH )
- )
- u_input_buf
- (
- .clk ( axi4_aclk ),
- .rstn ( axi4_arstn ),
- // Push
- .data_in ( {s_axi4_wuser, s_axi4_wstrb, s_axi4_wdata, s_axi4_wlast} ),
- .valid_in ( s_axi4_wvalid ),
- .ready_out ( s_axi4_wready ),
- // Pop
- .data_out ( {axi4_wuser, axi4_wstrb, axi4_wdata, axi4_wlast} ),
- .valid_out ( axi4_wvalid ),
- .ready_in ( axi4_wready )
- );
-
- axi_buffer_rab
- #(
- .DATA_WIDTH ( 2+AXI_ID_WIDTH+8+4 ),
- .BUFFER_DEPTH ( L1_FIFO_DEPTH )
- )
- u_l1_fifo
- (
- .clk ( axi4_aclk ),
- .rstn ( axi4_arstn ),
- // Push
- .data_in ( {l1_prefetch_i, l1_hit_i, l1_id_i, l1_len_i, l1_master_i, l1_accept_i, l1_save_i, l1_drop_i} ),
- .valid_in ( l1_fifo_valid_in ),
- .ready_out ( l1_fifo_ready_out ),
- // Pop
- .data_out ( {l1_prefetch_cur, l1_hit_cur, l1_id_cur, l1_len_cur, l1_master_cur, l1_accept_cur, l1_save_cur, l1_drop_cur} ),
- .valid_out ( l1_fifo_valid_out ),
- .ready_in ( l1_fifo_ready_in )
- );
-
- // Push upon receiving new requests from the TLB.
- assign l1_req = l1_accept_i | l1_save_i | l1_drop_i;
- assign l1_fifo_valid_in = l1_req & l1_fifo_ready_out;
-
- // Signal handshake
- assign l1_done_o = l1_fifo_valid_in;
- assign l2_done_o = l2_fifo_valid_in;
-
- // Stall AW input of L1 TLB
- assign input_stall_o = ~(l1_fifo_ready_out & l2_fifo_ready_out);
-
- // Interface b_drop signals + handshake
- always_comb begin
- if (fifo_select == 1'b0) begin
- prefetch_o = l1_prefetch_cur;
- hit_o = l1_hit_cur;
- id_o = l1_id_cur;
-
- l1_fifo_ready_in = w_done | b_done_i;
- l2_fifo_ready_in = 1'b0;
- end else begin
- prefetch_o = l2_prefetch_cur;
- hit_o = l2_hit_cur;
- id_o = l2_id_cur;
-
- l1_fifo_ready_in = 1'b0;
- l2_fifo_ready_in = w_done | b_done_i;
- end
- end
-
- // Detect when an L1 transaction save request enters or exits the L1 FIFO.
- assign l1_save_in = l1_fifo_valid_in & l1_save_i;
- assign l1_save_out = l1_fifo_ready_in & l1_save_cur;
-
- // Count the number of L1 transaction to save in the L1 FIFO.
- always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin
- if (axi4_arstn == 0) begin
- n_l1_save_SP <= '0;
- end else if (l1_save_in ^ l1_save_out) begin
- if (l1_save_in) begin
- n_l1_save_SP <= n_l1_save_SP + 1'b1;
- end else if (l1_save_out) begin
- n_l1_save_SP <= n_l1_save_SP - 1'b1;
- end
- end
- end
-
- // Stall forwarding of AW L1 hits if:
- // 1. The HUM buffer does not allow to be bypassed.
- // 2. There are multiple L1 save requests in the FIFO, i.e., multiple L2 outputs pending.
- assign output_stall_o = (n_l1_save_SP > 1) || (block_forwarding == 1'b1);
-
- generate
- if (ENABLE_L2TLB == 1) begin : HUM_BUFFER
-
- axi_buffer_rab_bram
- #(
- .DATA_WIDTH ( BUFFER_WIDTH ),
- .BUFFER_DEPTH ( HUM_BUFFER_DEPTH )
- )
- u_hum_buf
- (
- .clk ( axi4_aclk ),
- .rstn ( axi4_arstn ),
- // Push
- .data_in ( {axi4_wuser, axi4_wstrb, axi4_wdata, axi4_wlast} ),
- .valid_in ( hum_buf_valid_in ),
- .ready_out ( hum_buf_ready_out ),
- // Pop
- .data_out ( {hum_buf_wuser, hum_buf_wstrb, hum_buf_wdata, hum_buf_wlast} ),
- .valid_out ( hum_buf_valid_out ),
- .ready_in ( hum_buf_ready_in ),
- // Clear
- .almost_full ( hum_buf_almost_full ),
- .underfull ( hum_buf_underfull ),
- .drop_req ( hum_buf_drop_req_SP ),
- .drop_len ( hum_buf_drop_len_SP )
- );
-
- axi_buffer_rab
- #(
- .DATA_WIDTH ( 2+AXI_ID_WIDTH+8+3 ),
- .BUFFER_DEPTH ( L2_FIFO_DEPTH )
- )
- u_l2_fifo
- (
- .clk ( axi4_aclk ),
- .rstn ( axi4_arstn ),
- // Push
- .data_in ( {l2_prefetch_i, l2_hit_i, l2_id_i, l2_len_i, l2_master_i, l2_accept_i, l2_drop_i} ),
- .valid_in ( l2_fifo_valid_in ),
- .ready_out ( l2_fifo_ready_out ),
- // Pop
- .data_out ( {l2_prefetch_cur, l2_hit_cur, l2_id_cur, l2_len_cur, l2_master_cur, l2_accept_cur, l2_drop_cur} ),
- .valid_out ( l2_fifo_valid_out ),
- .ready_in ( l2_fifo_ready_in )
- );
-
- // Push upon receiving new result from TLB.
- assign l2_req = l2_accept_i | l2_drop_i;
- assign l2_fifo_valid_in = l2_req & l2_fifo_ready_out;
-
- assign wlast_in = axi4_wlast & hum_buf_valid_in & hum_buf_ready_out;
- assign wlast_out = hum_buf_wlast & hum_buf_valid_out & hum_buf_ready_in;
-
- always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin
- if (axi4_arstn == 0) begin
- fifo_select_SP <= 1'b0;
- hum_buf_drop_len_SP <= 'b0;
- hum_buf_drop_req_SP <= 1'b0;
- hum_buf_SP <= STORE;
- n_wlast_SP <= 'b0;
- end else begin
- fifo_select_SP <= fifo_select_SN;
- hum_buf_drop_len_SP <= hum_buf_drop_len_SN;
- hum_buf_drop_req_SP <= hum_buf_drop_req_SN;
- hum_buf_SP <= hum_buf_SN;
- n_wlast_SP <= n_wlast_SN;
- end
- end
-
- always_comb begin
- n_wlast_SN = n_wlast_SP;
- if (hum_buf_drop_req_SP) begin // Happens exactly once per burst to be dropped.
- n_wlast_SN -= 1;
- end
- if (wlast_in) begin
- n_wlast_SN += 1;
- end
- if (wlast_out) begin
- n_wlast_SN -= 1;
- end
- end
-
- always_comb begin : HUM_BUFFER_FSM
- hum_buf_SN = hum_buf_SP;
-
- m_axi4_wlast = 1'b0;
- m_axi4_wdata = 'b0;
- m_axi4_wstrb = 'b0;
- m_axi4_wuser = 'b0;
-
- m_axi4_wvalid = 1'b0;
- axi4_wready = 1'b0;
-
- hum_buf_valid_in = 1'b0;
- hum_buf_ready_in = 1'b0;
-
- hum_buf_drop_req_SN = hum_buf_drop_req_SP;
- hum_buf_drop_len_SN = hum_buf_drop_len_SP;
- master_select_o = 1'b0;
-
- w_done = 1'b0; // read from FIFO without handshake with B sender
- b_drop_o = 1'b0; // send data from FIFO to B sender (with handshake)
- fifo_select = 1'b0;
-
- fifo_select_SN = fifo_select_SP;
- stop_store = 1'b0;
-
- block_forwarding = 1'b0;
-
- unique case (hum_buf_SP)
-
- STORE : begin
- // Simply store the data in the buffer.
- hum_buf_valid_in = axi4_wvalid & hum_buf_ready_out;
- axi4_wready = hum_buf_ready_out;
-
- // We have got a full burst in the HUM buffer, thus stop storing.
- if (wlast_in & !hum_buf_underfull | (n_wlast_SP > $signed(0))) begin
- hum_buf_SN = WAIT_L1_BYPASS_YES;
-
- // The buffer is full, thus wait for decision.
- end else if (~hum_buf_ready_out) begin
- hum_buf_SN = WAIT_L1_BYPASS_NO;
- end
-
- // Avoid the forwarding of L1 hits until we know whether we can bypass.
- if (l1_fifo_valid_out & l1_save_cur) begin
- block_forwarding = 1'b1;
- end
- end
-
- WAIT_L1_BYPASS_YES : begin
- // Wait for orders from L1 TLB.
- if (l1_fifo_valid_out) begin
-
- // L1 hit - forward data from buffer
- if (l1_accept_cur) begin
- m_axi4_wlast = hum_buf_wlast;
- m_axi4_wdata = hum_buf_wdata;
- m_axi4_wstrb = hum_buf_wstrb;
- m_axi4_wuser = hum_buf_wuser;
-
- m_axi4_wvalid = hum_buf_valid_out;
- hum_buf_ready_in = m_axi4_wready;
-
- master_select_o = l1_master_cur;
-
- // Detect last data beat.
- if (wlast_out) begin
- fifo_select = 1'b0;
- w_done = 1'b1;
- hum_buf_SN = STORE;
- end
-
- // L1 miss - wait for L2
- end else if (l1_save_cur) begin
- fifo_select = 1'b0;
- w_done = 1'b1;
- hum_buf_SN = WAIT_L2_BYPASS_YES;
-
- // L1 prefetch, prot, multi - drop data
- end else if (l1_drop_cur) begin
- fifo_select_SN = 1'b0; // L1
- hum_buf_drop_req_SN = 1'b1;
- hum_buf_drop_len_SN = l1_len_cur;
- hum_buf_SN = FLUSH;
- end
- end
- end
-
- WAIT_L2_BYPASS_YES : begin
- // Wait for orders from L2 TLB.
- if (l2_fifo_valid_out) begin
-
- // L2 hit - forward data from buffer
- if (l2_accept_cur) begin
- m_axi4_wlast = hum_buf_wlast;
- m_axi4_wdata = hum_buf_wdata;
- m_axi4_wstrb = hum_buf_wstrb;
- m_axi4_wuser = hum_buf_wuser;
-
- m_axi4_wvalid = hum_buf_valid_out;
- hum_buf_ready_in = m_axi4_wready;
-
- master_select_o = l2_master_cur;
-
- // Detect last data beat.
- if (wlast_out) begin
- fifo_select = 1'b1;
- w_done = 1'b1;
- hum_buf_SN = STORE;
- end
-
- // L2 miss/prefetch hit
- end else if (l2_drop_cur) begin
- fifo_select_SN = 1'b1; // L2
- hum_buf_drop_req_SN = 1'b1;
- hum_buf_drop_len_SN = l2_len_cur;
- hum_buf_SN = FLUSH;
- end
-
- // While we wait for orders from L2 TLB, we can still drop and accept L1 transactions.
- end else if (l1_fifo_valid_out) begin
-
- // L1 hit
- if (l1_accept_cur) begin
- hum_buf_SN = BYPASS;
-
- // L1 prefetch/prot/multi
- end else if (l1_drop_cur) begin
- hum_buf_SN = DISCARD;
- end
- end
- end
-
- FLUSH : begin
- // Clear HUM buffer flush request.
- hum_buf_drop_req_SN = 1'b0;
-
- // perform handshake with B sender
- fifo_select = fifo_select_SP;
- b_drop_o = 1'b1;
- if (b_done_i) begin
- hum_buf_SN = STORE;
- end
- end
-
- BYPASS : begin
- // Forward one full transaction from input buffer.
- m_axi4_wlast = axi4_wlast;
- m_axi4_wdata = axi4_wdata;
- m_axi4_wstrb = axi4_wstrb;
- m_axi4_wuser = axi4_wuser;
-
- m_axi4_wvalid = axi4_wvalid;
- axi4_wready = m_axi4_wready;
-
- master_select_o = l1_master_cur;
-
- // We have got a full transaction.
- if (axi4_wlast & axi4_wready & axi4_wvalid) begin
- fifo_select = 1'b0;
- w_done = 1'b1;
- hum_buf_SN = WAIT_L2_BYPASS_YES;
- end
- end
-
- DISCARD : begin
- // Discard one full transaction from input buffer.
- axi4_wready = 1'b1;
-
- // We have got a full transaction.
- if (axi4_wlast & axi4_wready & axi4_wvalid) begin
- // Try to perform handshake with B sender.
- fifo_select = 1'b0;
- b_drop_o = 1'b1;
- // We cannot wait here due to axi4_wready.
- if (b_done_i) begin
- hum_buf_SN = WAIT_L2_BYPASS_YES;
- end else begin
- hum_buf_SN = DISCARD_FINISH;
- end
- end
- end
-
- DISCARD_FINISH : begin
- // Perform handshake with B sender.
- fifo_select = 1'b0;
- b_drop_o = 1'b1;
- if (b_done_i) begin
- hum_buf_SN = WAIT_L2_BYPASS_YES;
- end
- end
-
- WAIT_L1_BYPASS_NO : begin
- // Do not allow the forwarding of L1 hits.
- block_forwarding = 1'b1;
-
- // Wait for orders from L1 TLB.
- if (l1_fifo_valid_out) begin
-
- // L1 hit - forward data from/through HUM buffer and refill the buffer
- if (l1_accept_cur) begin
- // Forward data from HUM buffer.
- m_axi4_wlast = hum_buf_wlast;
- m_axi4_wdata = hum_buf_wdata;
- m_axi4_wstrb = hum_buf_wstrb;
- m_axi4_wuser = hum_buf_wuser;
-
- m_axi4_wvalid = hum_buf_valid_out;
- hum_buf_ready_in = m_axi4_wready;
-
- master_select_o = l1_master_cur;
-
- // Refill the HUM buffer. Stop when buffer full.
- stop_store = ~hum_buf_ready_out;
- hum_buf_valid_in = stop_store ? 1'b0 : axi4_wvalid ;
- axi4_wready = stop_store ? 1'b0 : hum_buf_ready_out;
-
- // Detect last data beat.
- if (wlast_out) begin
- fifo_select = 1'b0;
- w_done = 1'b1;
- if (~hum_buf_ready_out | hum_buf_almost_full) begin
- hum_buf_SN = WAIT_L1_BYPASS_NO;
- end else begin
- hum_buf_SN = STORE;
- end
- end
-
- // Allow the forwarding of L1 hits.
- block_forwarding = 1'b0;
-
- // L1 miss - wait for L2
- end else if (l1_save_cur) begin
- fifo_select = 1'b0;
- w_done = 1'b1;
- hum_buf_SN = WAIT_L2_BYPASS_NO;
-
- // L1 prefetch, prot, multi - drop data
- end else if (l1_drop_cur) begin
- fifo_select_SN = 1'b0; // L1
- hum_buf_drop_req_SN = 1'b1;
- hum_buf_drop_len_SN = l1_len_cur;
- hum_buf_SN = FLUSH;
-
- // Allow the forwarding of L1 hits.
- block_forwarding = 1'b0;
- end
- end
- end
-
- WAIT_L2_BYPASS_NO : begin
- // Do not allow the forwarding of L1 hits.
- block_forwarding = 1'b1;
-
- // Wait for orders from L2 TLB.
- if (l2_fifo_valid_out) begin
-
- // L2 hit - forward first part from HUM buffer, rest from input buffer
- if (l2_accept_cur) begin
- // Forward data from HUM buffer.
- m_axi4_wlast = hum_buf_wlast;
- m_axi4_wdata = hum_buf_wdata;
- m_axi4_wstrb = hum_buf_wstrb;
- m_axi4_wuser = hum_buf_wuser;
-
- m_axi4_wvalid = hum_buf_valid_out;
- hum_buf_ready_in = m_axi4_wready;
-
- master_select_o = l2_master_cur;
-
- // Refill the HUM buffer. Stop when buffer full.
- stop_store = ~hum_buf_ready_out;
- hum_buf_valid_in = stop_store ? 1'b0 : axi4_wvalid ;
- axi4_wready = stop_store ? 1'b0 : hum_buf_ready_out;
-
- // Detect last data beat.
- if (wlast_out) begin
- fifo_select = 1'b1;
- w_done = 1'b1;
- if (~hum_buf_ready_out | hum_buf_almost_full) begin
- hum_buf_SN = WAIT_L1_BYPASS_NO;
- end else begin
- hum_buf_SN = STORE;
- end
- end
-
- // Allow the forwarding of L1 hits.
- block_forwarding = 1'b0;
-
- // L2 miss/prefetch hit - drop data
- end else if (l2_drop_cur) begin
- fifo_select_SN = 1'b1; // L2
- hum_buf_drop_req_SN = 1'b1;
- hum_buf_drop_len_SN = l2_len_cur;
- hum_buf_SN = FLUSH;
-
- // Allow the forwarding of L1 hits.
- block_forwarding = 1'b0;
- end
- end
- end
-
-
- default: begin
- hum_buf_SN = STORE;
- end
-
- endcase // hum_buf_SP
- end // HUM_BUFFER_FSM
-
- assign b_drop_set = 1'b0;
-
- end else begin // HUM_BUFFER
-
- // register to perform the handshake with B sender
- always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin
- if (axi4_arstn == 0) begin
- b_drop_o <= 1'b0;
- end else if (b_done_i) begin
- b_drop_o <= 1'b0;
- end else if (b_drop_set) begin
- b_drop_o <= 1'b1;;
- end
- end
-
- always_comb begin : OUTPUT_CTRL
-
- fifo_select = 1'b0;
- w_done = 1'b0;
- b_drop_set = 1'b0;
-
- m_axi4_wlast = 1'b0;
- m_axi4_wdata = 'b0;
- m_axi4_wstrb = 'b0;
- m_axi4_wuser = 'b0;
-
- m_axi4_wvalid = 1'b0;
- axi4_wready = 1'b0;
-
- if (l1_fifo_valid_out) begin
- // forward data
- if (l1_accept_cur) begin
- m_axi4_wlast = axi4_wlast;
- m_axi4_wdata = axi4_wdata;
- m_axi4_wstrb = axi4_wstrb;
- m_axi4_wuser = axi4_wuser;
-
- m_axi4_wvalid = axi4_wvalid;
- axi4_wready = m_axi4_wready;
-
- // Simply pop from FIFO upon last data beat.
- w_done = axi4_wlast & axi4_wvalid & axi4_wready;
-
- // discard entire burst
- end else if (b_drop_o == 1'b0) begin
- axi4_wready = 1'b1;
-
- // Simply pop from FIFO upon last data beat. Perform handshake with B sender.
- if (axi4_wlast & axi4_wvalid & axi4_wready)
- b_drop_set = 1'b1;
- end
- end
-
- end // OUTPUT_CTRL
-
- assign master_select_o = l1_master_cur;
- assign l2_fifo_ready_out = 1'b1;
- assign block_forwarding = 1'b0;
-
- // unused signals
- assign hum_buf_ready_out = 1'b0;
- assign hum_buf_valid_in = 1'b0;
- assign hum_buf_ready_in = 1'b0;
- assign hum_buf_valid_out = 1'b0;
- assign hum_buf_wdata = 'b0;
- assign hum_buf_wstrb = 'b0;
- assign hum_buf_wlast = 1'b0;
- assign hum_buf_wuser = 'b0;
- assign hum_buf_drop_len_SN = 'b0;
- assign hum_buf_drop_req_SN = 1'b0;
- assign hum_buf_almost_full = 1'b0;
-
- assign l2_fifo_valid_in = 1'b0;
- assign l2_fifo_valid_out = 1'b0;
- assign l2_prefetch_cur = 1'b0;
- assign l2_hit_cur = 1'b0;
- assign l2_id_cur = 'b0;
- assign l2_len_cur = 'b0;
- assign l2_master_cur = 1'b0;
- assign l2_accept_cur = 1'b0;
- assign l2_drop_cur = 1'b0;
-
- assign l2_req = 1'b0;
-
- assign fifo_select_SN = 1'b0;
- assign fifo_select_SP = 1'b0;
-
- assign stop_store = 1'b0;
- assign n_wlast_SP = 'b0;
- assign wlast_in = 1'b0;
- assign wlast_out = 1'b0;
-
- end // HUM_BUFFER
-
- endgenerate
-"""
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class axi4_w_sender(Elaboratable):
-
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.s_axi4_wdata = Signal() # input
- self.s_axi4_wvalid = Signal() # input
- self.s_axi4_wready = Signal() # output
- self.s_axi4_wstrb = Signal() # input
- self.s_axi4_wlast = Signal() # input
- self.s_axi4_wuser = Signal() # input
- self.m_axi4_wdata = Signal() # output
- self.m_axi4_wvalid = Signal() # output
- self.m_axi4_wready = Signal() # input
- self.m_axi4_wstrb = Signal() # output
- self.m_axi4_wlast = Signal() # output
- self.m_axi4_wuser = Signal() # output
-
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.m_axi4_wdata.eq(self.s_axi4_wdata)
- m.d.comb += self.m_axi4_wstrb.eq(self.s_axi4_wstrb)
- m.d.comb += self.m_axi4_wlast.eq(self.s_axi4_wlast)
- m.d.comb += self.m_axi4_wuser.eq(self.s_axi4_wuser)
- m.d.comb += self.m_axi4_wvalid.eq(self.s_axi4_wvalid)
- m.d.comb += self.s_axi4_wready.eq(self.m_axi4_wready)
- return m
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# module axi4_w_sender
-# #(
-# parameter AXI_DATA_WIDTH = 32,
-# parameter AXI_USER_WIDTH = 2
-# )
-# (
-# input axi4_aclk,
-# input axi4_arstn,
-#
-# input [AXI_DATA_WIDTH-1:0] s_axi4_wdata,
-# input s_axi4_wvalid,
-# output s_axi4_wready,
-# input [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb,
-# input s_axi4_wlast,
-# input [AXI_USER_WIDTH-1:0] s_axi4_wuser,
-#
-# output [AXI_DATA_WIDTH-1:0] m_axi4_wdata,
-# output m_axi4_wvalid,
-# input m_axi4_wready,
-# output [AXI_DATA_WIDTH/8-1:0] m_axi4_wstrb,
-# output m_axi4_wlast,
-# output [AXI_USER_WIDTH-1:0] m_axi4_wuser
-# );
-#
-# assign m_axi4_wdata = s_axi4_wdata;
-# assign m_axi4_wstrb = s_axi4_wstrb;
-# assign m_axi4_wlast = s_axi4_wlast;
-# assign m_axi4_wuser = s_axi4_wuser;
-#
-# assign m_axi4_wvalid = s_axi4_wvalid;
-# assign s_axi4_wready = m_axi4_wready;
-#
-# endmodule
-#
-#
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class axi_buffer_rab(Elaboratable):
-
- def __init__(self):
- self.clk = Signal() # input
- self.rstn = Signal() # input
- self.data_out = Signal(DATA_WIDTH) # output
- self.valid_out = Signal() # output
- self.ready_in = Signal() # input
- self.valid_in = Signal() # input
- self.data_in = Signal(DATA_WIDTH) # input
- self.ready_out = Signal() # output
-
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.full.eq(self.None)
- m.d.comb += self.data_out.eq(self.None)
- m.d.comb += self.valid_out.eq(self.None)
- m.d.comb += self.ready_out.eq(self.None)
- return m
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# //import CfMath::log2;
-#
-# module axi_buffer_rab
-# //#(
-# // parameter DATA_WIDTH,
-# // parameter BUFFER_DEPTH
-# //)
-# (
-# input logic clk,
-# input logic rstn,
-#
-# // Downstream port
-# output logic [DATA_WIDTH-1:0] data_out,
-# output logic valid_out,
-# input logic ready_in,
-#
-# // Upstream port
-# input logic valid_in,
-# input logic [DATA_WIDTH-1:0] data_in,
-# output logic ready_out
-# );
-#
-# localparam integer LOG_BUFFER_DEPTH = log2(BUFFER_DEPTH);
-#
-# // Internal data structures
-# reg [LOG_BUFFER_DEPTH - 1 : 0] pointer_in; // location to which we last wrote
-# reg [LOG_BUFFER_DEPTH - 1 : 0] pointer_out; // location from which we last sent
-# reg [LOG_BUFFER_DEPTH : 0] elements; // number of elements in the buffer
-# reg [DATA_WIDTH - 1 : 0] buffer [BUFFER_DEPTH - 1 : 0];
-#
-# wire full;
-#
-# integer loop1;
-#
-# assign full = (elements == BUFFER_DEPTH);
-#
-# always @(posedge clk or negedge rstn)
-# begin: elements_sequential
-# if (rstn == 1'b0)
-# elements <= 0;
-# else
-# begin
-# // ------------------
-# // Are we filling up?
-# // ------------------
-# // One out, none in
-# if (ready_in && valid_out && (!valid_in || full))
-# elements <= elements - 1;
-# // None out, one in
-# else if ((!valid_out || !ready_in) && valid_in && !full)
-# elements <= elements + 1;
-# // Else, either one out and one in, or none out and none in - stays unchanged
-# end
-# end
-#
-# always @(posedge clk or negedge rstn)
-# begin: buffers_sequential
-# if (rstn == 1'b0)
-# begin
-# for (loop1 = 0 ; loop1 < BUFFER_DEPTH ; loop1 = loop1 + 1)
-# buffer[loop1] <= 0;
-# end
-# else
-# begin
-# // Update the memory
-# if (valid_in && !full)
-# buffer[pointer_in] <= data_in;
-# end
-# end
-#
-# always @(posedge clk or negedge rstn)
-# begin: sequential
-# if (rstn == 1'b0)
-# begin
-# pointer_out <= 0;
-# pointer_in <= 0;
-# end
-# else
-# begin
-# // ------------------------------------
-# // Check what to do with the input side
-# // ------------------------------------
-# // We have some input, increase by 1 the input pointer
-# if (valid_in && !full)
-# begin
-# if (pointer_in == $unsigned(BUFFER_DEPTH - 1))
-# pointer_in <= 0;
-# else
-# pointer_in <= pointer_in + 1;
-# end
-# // Else we don't have any input, the input pointer stays the same
-#
-# // -------------------------------------
-# // Check what to do with the output side
-# // -------------------------------------
-# // We had pushed one flit out, we can try to go for the next one
-# if (ready_in && valid_out)
-# begin
-# if (pointer_out == $unsigned(BUFFER_DEPTH - 1))
-# pointer_out <= 0;
-# else
-# pointer_out <= pointer_out + 1;
-# end
-# // Else stay on the same output location
-# end
-# end
-#
-# // Update output ports
-# assign data_out = buffer[pointer_out];
-# assign valid_out = (elements != 0);
-#
-# assign ready_out = ~full;
-#
-# endmodule
-#
-#
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class axi_buffer_rab_bram(Elaboratable):
-
- def __init__(self):
- self.clk = Signal() # input
- self.rstn = Signal() # input
- self.data_out = Signal(DATA_WIDTH) # output
- self.valid_out = Signal() # output
- self.ready_in = Signal() # input
- self.valid_in = Signal() # input
- self.data_in = Signal(DATA_WIDTH) # input
- self.ready_out = Signal() # output
- self.almost_full = Signal() # output
- self.underfull = Signal() # output
- self.drop_req = Signal() # input
- self.drop_len = Signal(8) # input
-
- def elaborate(self, platform=None):
- m = Module()
- return m
-
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# ////import CfMath::log2;
-#
-# module axi_buffer_rab_bram
-# //#(
-# // parameter DATA_WIDTH,
-# // parameter BUFFER_DEPTH
-# // )
-# (
-# input logic clk,
-# input logic rstn,
-#
-# // Downstream port
-# output logic [DATA_WIDTH-1:0] data_out,
-# output logic valid_out,
-# input logic ready_in,
-#
-# // Upstream port
-# input logic valid_in,
-# input logic [DATA_WIDTH-1:0] data_in,
-# output logic ready_out,
-#
-# // Status and drop control
-# output logic almost_full,
-# output logic underfull,
-# input logic drop_req,
-# // Number of items to drop. As for AXI lengths, counting starts at zero, i.e., `drop_len == 0`
-# // and `drop_req` means drop one item.
-# input logic [7:0] drop_len
-# );
-#
-""" #docstring_begin
- // The BRAM needs to be in "write-first" mode for first-word fall-through FIFO behavior.
- // To still push and pop simultaneously if the buffer is full, we internally increase the
- // buffer depth by 1.
- localparam ACT_BUFFER_DEPTH = BUFFER_DEPTH+1;
- localparam ACT_LOG_BUFFER_DEPTH = log2(ACT_BUFFER_DEPTH+1);
-
- /**
- * Internal data structures
- */
- // Location to which we last wrote
- logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_in_d, ptr_in_q;
- // Location from which we last sent
- logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_out_d, ptr_out_q;
- // Required for fall-through behavior on the first word
- logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_out_bram;
- // Number of elements in the buffer. Can be negative if elements that have been dropped have not
- // yet been written.
- logic signed [ACT_LOG_BUFFER_DEPTH:0] n_elems_d, n_elems_q;
-
- logic [DATA_WIDTH-1:0] data_out_bram, data_out_q;
- logic valid_out_q;
-
- logic full;
-
- assign almost_full = (n_elems_q == BUFFER_DEPTH-1);
- assign full = (n_elems_q == BUFFER_DEPTH);
-
- always_ff @(posedge clk, negedge rstn) begin
- if (~rstn) begin
- n_elems_q <= '0;
- ptr_in_q <= '0;
- ptr_out_q <= '0;
- end else begin
- n_elems_q <= n_elems_d;
- ptr_in_q <= ptr_in_d;
- ptr_out_q <= ptr_out_d;
- end
- end
-
- // Update the number of elements.
- always_comb begin
- n_elems_d = n_elems_q;
- if (drop_req) begin
- n_elems_d -= (drop_len + 1);
- end
- if (valid_in && ready_out) begin
- n_elems_d += 1;
- end
- if (valid_out && ready_in) begin
- n_elems_d -= 1;
- end
- end
-
- // Update the output pointer.
- always_comb begin
- ptr_out_d = ptr_out_q;
- if (drop_req) begin
- if ((ptr_out_q + drop_len + 1) > (ACT_BUFFER_DEPTH - 1)) begin
- ptr_out_d = drop_len + 1 - (ACT_BUFFER_DEPTH - ptr_out_q);
- end else begin
- ptr_out_d += (drop_len + 1);
- end
- end
- if (valid_out && ready_in) begin
- if (ptr_out_d == (ACT_BUFFER_DEPTH - 1)) begin
- ptr_out_d = '0;
- end else begin
- ptr_out_d += 1;
- end
- end
- end
-
- // The BRAM has a read latency of one cycle, so apply the new address one cycle earlier for
- // first-word fall-through FIFO behavior.
- //assign ptr_out_bram = (ptr_out_q == (ACT_BUFFER_DEPTH-1)) ? '0 : (ptr_out_q + 1);
- assign ptr_out_bram = ptr_out_d;
-
- // Update the input pointer.
- always_comb begin
- ptr_in_d = ptr_in_q;
- if (valid_in && ready_out) begin
- if (ptr_in_d == (ACT_BUFFER_DEPTH - 1)) begin
- ptr_in_d = '0;
- end else begin
- ptr_in_d += 1;
- end
- end
- end
-
- // Update output ports.
- assign valid_out = (n_elems_q > $signed(0));
- assign underfull = (n_elems_q < $signed(0));
- assign ready_out = ~full;
-
- ram_tp_write_first #(
- .ADDR_WIDTH ( ACT_LOG_BUFFER_DEPTH ),
- .DATA_WIDTH ( DATA_WIDTH )
- )
- ram_tp_write_first_0
- (
- .clk ( clk ),
- .we ( valid_in & ~full ),
- .addr0 ( ptr_in_q ),
- .addr1 ( ptr_out_bram ),
- .d_i ( data_in ),
- .d0_o ( ),
- .d1_o ( data_out_bram )
- );
-
- // When reading from/writing two the same address on both ports ("Write-Read Collision"),
- // the data on the read port is invalid (during the write cycle). In this implementation,
- // this can happen only when the buffer is empty. Thus, we forward the data from an
- // register in this case.
- always @(posedge clk) begin
- if (rstn == 1'b0) begin
- data_out_q <= 'b0;
- end else if ( (ptr_out_bram == ptr_in_q) && (valid_in && !full) ) begin
- data_out_q <= data_in;
- end
- end
-
- always @(posedge clk) begin
- if (rstn == 1'b0) begin
- valid_out_q <= 'b0;
- end else begin
- valid_out_q <= valid_out;
- end
- end
-
- // Drive output data
- always_comb begin
- if (valid_out && !valid_out_q) begin // We have just written to an empty FIFO
- data_out = data_out_q;
- end else begin
- data_out = data_out_bram;
- end
- end
-
-"""
-# endmodule
-#
-#
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class axi_rab_cfg(Elaboratable):
-
- def __init__(self):
- self.Clk_CI = Signal() # input
- self.Rst_RBI = Signal() # input
- self.s_axi_awaddr = Signal(AXI_ADDR_WIDTH) # input
- self.s_axi_awvalid = Signal() # input
- self.s_axi_awready = Signal() # output
- self.s_axi_wdata = Signal() # input
- self.s_axi_wstrb = Signal(1+ERROR p_expression_25) # input
- self.s_axi_wvalid = Signal() # input
- self.s_axi_wready = Signal() # output
- self.s_axi_bresp = Signal(2) # output
- self.s_axi_bvalid = Signal() # output
- self.s_axi_bready = Signal() # input
- self.s_axi_araddr = Signal(AXI_ADDR_WIDTH) # input
- self.s_axi_arvalid = Signal() # input
- self.s_axi_arready = Signal() # output
- self.s_axi_rdata = Signal(AXI_DATA_WIDTH) # output
- self.s_axi_rresp = Signal(2) # output
- self.s_axi_rvalid = Signal() # output
- self.s_axi_rready = Signal() # input
- self.L1Cfg_DO = Signal() # output
- self.L1AllowMultiHit_SO = Signal() # output
- self.MissAddr_DI = Signal(ADDR_WIDTH_VIRT) # input
- self.MissMeta_DI = Signal(MISS_META_WIDTH) # input
- self.Miss_SI = Signal() # input
- self.MhFifoFull_SO = Signal() # output
- self.wdata_l2 = Signal() # output
- self.waddr_l2 = Signal() # output
- self.wren_l2 = Signal(N_PORTS) # output
-
- def elaborate(self, platform=None):
- m = Module()
- return m
-
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# // --=========================================================================--
-# //
-# // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ██████╗███████╗ ██████╗
-# // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔════╝██╔════╝
-# // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ █████╗ ██║ ███╗
-# // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██╔══╝ ██║ ██║
-# // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ╚██████╗██║ ╚██████╔╝
-# // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝╚═╝ ╚═════╝
-# //
-# //
-# // Author: Pirmin Vogel - vogelpi@iis.ee.ethz.ch
-# //
-# // Purpose : AXI4-Lite configuration and miss handling interface for RAB
-# //
-# // --=========================================================================--
-#
-# //import CfMath::log2;
-#
-# module axi_rab_cfg
-# #(
-# parameter N_PORTS = 3,
-# parameter N_REGS = 196,
-# parameter N_L2_SETS = 32,
-# parameter N_L2_SET_ENTRIES= 32,
-# parameter ADDR_WIDTH_PHYS = 40,
-# parameter ADDR_WIDTH_VIRT = 32,
-# parameter N_FLAGS = 4,
-# parameter AXI_DATA_WIDTH = 64,
-# parameter AXI_ADDR_WIDTH = 32,
-# parameter MISS_META_WIDTH = 10, // <= FIFO_WIDTH
-# parameter MH_FIFO_DEPTH = 16
-# )
-# (
-# input logic Clk_CI,
-# input logic Rst_RBI,
-#
-# // AXI Lite interface
-# input logic [AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
-# input logic s_axi_awvalid,
-# output logic s_axi_awready,
-# input logic [AXI_DATA_WIDTH/8-1:0][7:0] s_axi_wdata,
-# input logic [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb,
-# input logic s_axi_wvalid,
-# output logic s_axi_wready,
-# output logic [1:0] s_axi_bresp,
-# output logic s_axi_bvalid,
-# input logic s_axi_bready,
-# input logic [AXI_ADDR_WIDTH-1:0] s_axi_araddr,
-# input logic s_axi_arvalid,
-# output logic s_axi_arready,
-# output logic [AXI_DATA_WIDTH-1:0] s_axi_rdata,
-# output logic [1:0] s_axi_rresp,
-# output logic s_axi_rvalid,
-# input logic s_axi_rready,
-#
-# // Slice configuration
-# output logic [N_REGS-1:0][63:0] L1Cfg_DO,
-# output logic L1AllowMultiHit_SO,
-#
-# // Miss handling
-# input logic [ADDR_WIDTH_VIRT-1:0] MissAddr_DI,
-# input logic [MISS_META_WIDTH-1:0] MissMeta_DI,
-# input logic Miss_SI,
-# output logic MhFifoFull_SO,
-#
-# // L2 TLB
-# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] wdata_l2,
-# output logic [N_PORTS-1:0] [AXI_ADDR_WIDTH-1:0] waddr_l2,
-# output logic [N_PORTS-1:0] wren_l2
-# );
-#
-""" #docstring_begin
-
- localparam ADDR_LSB = log2(64/8); // 64 even if the AXI Lite interface is 32,
- // because RAB slices are 64 bit wide.
- localparam ADDR_MSB = log2(N_REGS)+ADDR_LSB-1;
-
- localparam L2SINGLE_AMAP_SIZE = 16'h4000; // Maximum 2048 TLB entries in L2
-
- localparam integer N_L2_ENTRIES = N_L2_SETS * N_L2_SET_ENTRIES;
-
- localparam logic [AXI_ADDR_WIDTH-1:0] L2_VA_MAX_ADDR = (N_L2_ENTRIES-1) << 2;
-
- logic [AXI_DATA_WIDTH/8-1:0][7:0] L1Cfg_DP[N_REGS]; // [Byte][Bit]
- genvar j;
-
- // █████╗ ██╗ ██╗██╗██╗ ██╗ ██╗ ██╗████████╗███████╗
- // ██╔══██╗╚██╗██╔╝██║██║ ██║ ██║ ██║╚══██╔══╝██╔════╝
- // ███████║ ╚███╔╝ ██║███████║█████╗██║ ██║ ██║ █████╗
- // ██╔══██║ ██╔██╗ ██║╚════██║╚════╝██║ ██║ ██║ ██╔══╝
- // ██║ ██║██╔╝ ██╗██║ ██║ ███████╗██║ ██║ ███████╗
- // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚═╝ ╚══════╝
- //
- logic [AXI_ADDR_WIDTH-1:0] awaddr_reg;
- logic awaddr_done_rise;
- logic awaddr_done_reg;
- logic awaddr_done_reg_dly;
-
- logic [AXI_DATA_WIDTH/8-1:0][7:0] wdata_reg;
- logic [AXI_DATA_WIDTH/8-1:0] wstrb_reg;
- logic wdata_done_rise;
- logic wdata_done_reg;
- logic wdata_done_reg_dly;
-
- logic wresp_done_reg;
- logic wresp_running_reg;
-
- logic [AXI_ADDR_WIDTH-1:0] araddr_reg;
- logic araddr_done_reg;
-
- logic [AXI_DATA_WIDTH-1:0] rdata_reg;
- logic rresp_done_reg;
- logic rresp_running_reg;
-
- logic awready;
- logic wready;
- logic bvalid;
-
- logic arready;
- logic rvalid;
-
- logic wren;
- logic wren_l1;
-
- assign wren = ( wdata_done_rise & awaddr_done_reg ) | ( awaddr_done_rise & wdata_done_reg );
- assign wdata_done_rise = wdata_done_reg & ~wdata_done_reg_dly;
- assign awaddr_done_rise = awaddr_done_reg & ~awaddr_done_reg_dly;
-
- // reg_dly
- always @(posedge Clk_CI or negedge Rst_RBI)
- begin
- if (!Rst_RBI)
- begin
- wdata_done_reg_dly <= 1'b0;
- awaddr_done_reg_dly <= 1'b0;
- end
- else
- begin
- wdata_done_reg_dly <= wdata_done_reg;
- awaddr_done_reg_dly <= awaddr_done_reg;
- end
- end
-
- // AW Channel
- always @(posedge Clk_CI or negedge Rst_RBI)
- begin
- if (!Rst_RBI)
- begin
- awaddr_done_reg <= 1'b0;
- awaddr_reg <= '0;
- awready <= 1'b1;
- end
- else
- begin
- if (awready && s_axi_awvalid)
- begin
- awready <= 1'b0;
- awaddr_done_reg <= 1'b1;
- awaddr_reg <= s_axi_awaddr;
- end
- else if (awaddr_done_reg && wresp_done_reg)
- begin
- awready <= 1'b1;
- awaddr_done_reg <= 1'b0;
- end
- end
- end
-
- // W Channel
- always @(posedge Clk_CI or negedge Rst_RBI)
- begin
- if (!Rst_RBI)
- begin
- wdata_done_reg <= 1'b0;
- wready <= 1'b1;
- wdata_reg <= '0;
- wstrb_reg <= '0;
- end
- else
- begin
- if (wready && s_axi_wvalid)
- begin
- wready <= 1'b0;
- wdata_done_reg <= 1'b1;
- wdata_reg <= s_axi_wdata;
- wstrb_reg <= s_axi_wstrb;
- end
- else if (wdata_done_reg && wresp_done_reg)
- begin
- wready <= 1'b1;
- wdata_done_reg <= 1'b0;
- end
- end
- end
-
- // B Channel
- always @(posedge Clk_CI or negedge Rst_RBI)
- begin
- if (!Rst_RBI)
- begin
- bvalid <= 1'b0;
- wresp_done_reg <= 1'b0;
- wresp_running_reg <= 1'b0;
- end
- else
- begin
- if (awaddr_done_reg && wdata_done_reg && !wresp_done_reg)
- begin
- if (!wresp_running_reg)
- begin
- bvalid <= 1'b1;
- wresp_running_reg <= 1'b1;
- end
- else if (s_axi_bready)
- begin
- bvalid <= 1'b0;
- wresp_done_reg <= 1'b1;
- wresp_running_reg <= 1'b0;
- end
- end
- else
- begin
- bvalid <= 1'b0;
- wresp_done_reg <= 1'b0;
- wresp_running_reg <= 1'b0;
- end
- end
- end
-
- // AR Channel
- always @(posedge Clk_CI or negedge Rst_RBI)
- begin
- if (!Rst_RBI)
- begin
- araddr_done_reg <= 1'b0;
- arready <= 1'b1;
- araddr_reg <= '0;
- end
- else
- begin
- if (arready && s_axi_arvalid)
- begin
- arready <= 1'b0;
- araddr_done_reg <= 1'b1;
- araddr_reg <= s_axi_araddr;
- end
- else if (araddr_done_reg && rresp_done_reg)
- begin
- arready <= 1'b1;
- araddr_done_reg <= 1'b0;
- end
- end
- end
-
- // R Channel
- always @(posedge Clk_CI or negedge Rst_RBI)
- begin
- if (!Rst_RBI)
- begin
- rresp_done_reg <= 1'b0;
- rvalid <= 1'b0;
- rresp_running_reg <= 1'b0;
- end
- else
- begin
- if (araddr_done_reg && !rresp_done_reg)
- begin
- if (!rresp_running_reg)
- begin
- rvalid <= 1'b1;
- rresp_running_reg <= 1'b1;
- end
- else if (s_axi_rready)
- begin
- rvalid <= 1'b0;
- rresp_done_reg <= 1'b1;
- rresp_running_reg <= 1'b0;
- end
- end
- else
- begin
- rvalid <= 1'b0;
- rresp_done_reg <= 1'b0;
- rresp_running_reg <= 1'b0;
- end
- end
- end
-
- // ██╗ ██╗ ██████╗███████╗ ██████╗ ██████╗ ███████╗ ██████╗
- // ██║ ███║ ██╔════╝██╔════╝██╔════╝ ██╔══██╗██╔════╝██╔════╝
- // ██║ ╚██║ ██║ █████╗ ██║ ███╗ ██████╔╝█████╗ ██║ ███╗
- // ██║ ██║ ██║ ██╔══╝ ██║ ██║ ██╔══██╗██╔══╝ ██║ ██║
- // ███████╗██║ ╚██████╗██║ ╚██████╔╝ ██║ ██║███████╗╚██████╔╝
- // ╚══════╝╚═╝ ╚═════╝╚═╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝ ╚═════╝
- //
- assign wren_l1 = wren && (awaddr_reg < L2SINGLE_AMAP_SIZE);
-
- always @( posedge Clk_CI or negedge Rst_RBI )
- begin
- var integer idx_reg, idx_byte;
- if ( Rst_RBI == 1'b0 )
- begin
- for ( idx_reg = 0; idx_reg < N_REGS; idx_reg++ )
- L1Cfg_DP[idx_reg] <= '0;
- end
- else if ( wren_l1 )
- begin
- if ( awaddr_reg[ADDR_LSB+1] == 1'b0 ) begin // VIRT_ADDR
- for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin
- if ( (idx_byte < ADDR_WIDTH_VIRT/8) ) begin
- if ( wstrb_reg[idx_byte] ) begin
- L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte];
- end
- end
- else begin // Let synthesizer optimize away unused registers.
- L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0;
- end
- end
- end
- else if ( awaddr_reg[ADDR_LSB+1:ADDR_LSB] == 2'b10 ) begin // PHYS_ADDR
- for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin
- if ( (idx_byte < ADDR_WIDTH_PHYS/8) ) begin
- if ( wstrb_reg[idx_byte] ) begin
- L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte];
- end
- end
- else begin // Let synthesizer optimize away unused registers.
- L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0;
- end
- end
- end
- else begin // ( awaddr_reg[ADDR_LSB+1:ADDR_LSB] == 2'b11 ) // FLAGS
- for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin
- if ( (idx_byte < 1) ) begin
- if ( wstrb_reg[idx_byte] ) begin
- L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte] & { {{8-N_FLAGS}{1'b0}}, {{N_FLAGS}{1'b1}} };
- end
- end
- else begin // Let synthesizer optimize away unused registers.
- L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0;
- end
- end
- end
- end
- end // always @ ( posedge Clk_CI or negedge Rst_RBI )
-
- generate
- // Mask unused bits -> Synthesizer should optimize away unused registers
- for( j=0; j<N_REGS; j++ ) begin
- if ( j[1] == 1'b0 ) // VIRT_ADDR
- assign L1Cfg_DO[j] = { {{64-ADDR_WIDTH_VIRT}{1'b0}},{ADDR_WIDTH_VIRT{1'b1}} } & L1Cfg_DP[j];
- else if ( j[1:0] == 2'b10 ) // PHYS_ADDR
- assign L1Cfg_DO[j] = { {{64-ADDR_WIDTH_PHYS}{1'b0}},{ADDR_WIDTH_PHYS{1'b1}} } & L1Cfg_DP[j];
- else // if ( j[1:0] == 2'b11 ) // FLAGS
- assign L1Cfg_DO[j] = { {{64-N_FLAGS}{1'b0}},{N_FLAGS{1'b1}} } & L1Cfg_DP[j];
- end
- endgenerate
-
- always_comb
- begin
- if ( araddr_reg[ADDR_LSB-1] == 1'b1 ) // read upper 32 bit, for debugging over 32-bit interface
- rdata_reg = { {32'h00000000},{L1Cfg_DO[araddr_reg[ADDR_MSB:ADDR_LSB]][63:32]} };
- else
- rdata_reg = L1Cfg_DO[araddr_reg[ADDR_MSB:ADDR_LSB]];
- end
-
- assign s_axi_awready = awready;
- assign s_axi_wready = wready;
-
- assign s_axi_bresp = 2'b00;
- assign s_axi_bvalid = bvalid;
-
- assign s_axi_arready = arready;
- assign s_axi_rresp = 2'b00;
- assign s_axi_rvalid = rvalid;
-
- // ██╗ ██████╗ ██████╗███████╗ ██████╗
- // ██║ ╚════██╗ ██╔════╝██╔════╝██╔════╝
- // ██║ █████╔╝ ██║ █████╗ ██║ ███╗
- // ██║ ██╔═══╝ ██║ ██╔══╝ ██║ ██║
- // ███████╗███████╗ ╚██████╗██║ ╚██████╔╝
- // ╚══════╝╚══════╝ ╚═════╝╚═╝ ╚═════╝
- //
- logic [N_PORTS-1:0] l2_addr_is_in_va_rams;
- logic [N_PORTS-1:0] upper_word_is_written;
- logic [N_PORTS-1:0] lower_word_is_written;
- generate
- for( j=0; j< N_PORTS; j++)
- begin
- if (AXI_DATA_WIDTH == 64) begin
- assign l2_addr_is_in_va_rams[j] = (awaddr_reg >= (j+1)*L2SINGLE_AMAP_SIZE) && (awaddr_reg[log2(L2SINGLE_AMAP_SIZE)-1:0] <= L2_VA_MAX_ADDR);
- assign upper_word_is_written[j] = (wstrb_reg[7:4] != 4'b0000);
- assign lower_word_is_written[j] = (wstrb_reg[3:0] != 4'b0000);
- end else begin
- assign l2_addr_is_in_va_rams[j] = 1'b0;
- assign upper_word_is_written[j] = 1'b0;
- assign lower_word_is_written[j] = 1'b0;
- end
-
- always @( posedge Clk_CI or negedge Rst_RBI ) begin
- var integer idx_byte, off_byte;
- if ( Rst_RBI == 1'b0 )
- begin
- wren_l2[j] <= 1'b0;
- wdata_l2[j] <= '0;
- end
- else if (wren)
- begin
- if ( (awaddr_reg >= (j+1)*L2SINGLE_AMAP_SIZE) && (awaddr_reg < (j+2)*L2SINGLE_AMAP_SIZE) && (|wstrb_reg) )
- wren_l2[j] <= 1'b1;
- if (AXI_DATA_WIDTH == 32) begin
- for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ )
- wdata_l2[j][idx_byte*8 +: 8] <= wdata_reg[idx_byte] & {8{wstrb_reg[idx_byte]}};
- end
- else if (AXI_DATA_WIDTH == 64) begin
- if (lower_word_is_written[j] == 1'b1)
- off_byte = 0;
- else
- off_byte = 4;
- // always put the payload in the lower word and set upper word to 0
- for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8/2; idx_byte++ )
- wdata_l2[j][idx_byte*8 +: 8] <= wdata_reg[idx_byte+off_byte] & {8{wstrb_reg[idx_byte+off_byte]}};
- wdata_l2[j][AXI_DATA_WIDTH-1:AXI_DATA_WIDTH/2] <= 'b0;
- end
- // pragma translate_off
- else
- $fatal(1, "Unsupported AXI_DATA_WIDTH!");
- // pragma translate_on
- end
- else
- wren_l2[j] <= '0;
- end // always @ ( posedge Clk_CI or negedge Rst_RBI )
-
- // Properly align the 32-bit word address when writing from 64-bit interface:
- // Depending on the system, the incoming address is (non-)aligned to the 64-bit
- // word when writing the upper 32-bit word.
- always_comb begin
- waddr_l2[j] = (awaddr_reg -(j+1)*L2SINGLE_AMAP_SIZE)/4;
- if (wren_l2[j]) begin
- if (AXI_DATA_WIDTH == 64) begin
- if (upper_word_is_written[j] == 1'b1) begin
- // address must be non-aligned
- waddr_l2[j][0] = 1'b1;
- end
- end
- // pragma translate_off
- else if (AXI_DATA_WIDTH != 32) begin
- $fatal(1, "Unsupported AXI_DATA_WIDTH!");
- end
- // pragma translate_on
- end
- end
-
- // Assert that only one 32-bit word is ever written at a time to VA RAMs on 64-bit data
- // systems.
- // pragma translate_off
- always_ff @ (posedge Clk_CI) begin
- if (AXI_DATA_WIDTH == 64) begin
- if (l2_addr_is_in_va_rams[j]) begin
- if (upper_word_is_written[j]) begin
- assert (!lower_word_is_written[j])
- else $error("Unsupported write across two 32-bit words to VA RAMs!");
- end
- else if (lower_word_is_written[j]) begin
- assert (!upper_word_is_written[j])
- else $error("Unsupported write across two 32-bit words to VA RAMs!");
- end
- end
- end
- end
- // pragma translate_on
-
- end // for (j=0; j< N_PORTS; j++)
- endgenerate
-
- // ███╗ ███╗██╗ ██╗ ███████╗██╗███████╗ ██████╗ ███████╗
- // ████╗ ████║██║ ██║ ██╔════╝██║██╔════╝██╔═══██╗██╔════╝
- // ██╔████╔██║███████║ █████╗ ██║█████╗ ██║ ██║███████╗
- // ██║╚██╔╝██║██╔══██║ ██╔══╝ ██║██╔══╝ ██║ ██║╚════██║
- // ██║ ╚═╝ ██║██║ ██║ ██║ ██║██║ ╚██████╔╝███████║
- // ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═════╝ ╚══════╝
- //
- logic [ADDR_WIDTH_VIRT-1:0] AddrFifoDin_D;
- logic AddrFifoWen_S;
- logic AddrFifoRen_S;
- logic [ADDR_WIDTH_VIRT-1:0] AddrFifoDout_D;
- logic AddrFifoFull_S;
- logic AddrFifoEmpty_S;
- logic AddrFifoEmpty_SB;
- logic AddrFifoFull_SB;
-
- logic [MISS_META_WIDTH-1:0] MetaFifoDin_D;
- logic MetaFifoWen_S;
- logic MetaFifoRen_S;
- logic [MISS_META_WIDTH-1:0] MetaFifoDout_D;
- logic MetaFifoFull_S;
- logic MetaFifoEmpty_S;
- logic MetaFifoEmpty_SB;
- logic MetaFifoFull_SB;
-
- logic FifosDisabled_S;
- logic ConfRegWen_S;
- logic [1:0] ConfReg_DN;
- logic [1:0] ConfReg_DP;
-
- logic [AXI_DATA_WIDTH-1:0] wdata_reg_vec;
-
- assign FifosDisabled_S = ConfReg_DP[0];
- assign L1AllowMultiHit_SO = ConfReg_DP[1];
-
- assign AddrFifoEmpty_S = ~AddrFifoEmpty_SB;
- assign MetaFifoEmpty_S = ~MetaFifoEmpty_SB;
-
- assign AddrFifoFull_S = ~AddrFifoFull_SB;
- assign MetaFifoFull_S = ~MetaFifoFull_SB;
-
- assign MhFifoFull_SO = (AddrFifoWen_S & AddrFifoFull_S) | (MetaFifoWen_S & MetaFifoFull_S);
-
- generate
- for ( j=0; j<AXI_DATA_WIDTH/8; j++ )
- assign wdata_reg_vec[(j+1)*8-1:j*8] = wdata_reg[j];
- endgenerate
-
- // write address FIFO
- always_comb
- begin
- AddrFifoWen_S = 1'b0;
- AddrFifoDin_D = 'b0;
- if ( (Miss_SI == 1'b1) && (FifosDisabled_S == 1'b0) ) // register a new miss
- begin
- AddrFifoWen_S = 1'b1;
- AddrFifoDin_D = MissAddr_DI;
- end
- else if ( (wren_l1 == 1'b1) && (awaddr_reg[ADDR_MSB:0] == 'b0) && (FifosDisabled_S == 1'b0)) // write request from AXI interface
- begin
- AddrFifoWen_S = 1'b1;
- AddrFifoDin_D = wdata_reg_vec[ADDR_WIDTH_VIRT-1:0];
- end
- end
-
- // write meta FIFO
- always_comb
- begin
- MetaFifoWen_S = 1'b0;
- MetaFifoDin_D = 'b0;
- if ( (Miss_SI == 1'b1) && (FifosDisabled_S == 1'b0) ) // register a new miss
- begin
- MetaFifoWen_S = 1'b1;
- MetaFifoDin_D[MISS_META_WIDTH-1:0] = MissMeta_DI;
- end
- else if ( (wren_l1 == 1'b1) && (awaddr_reg[ADDR_MSB:0] == 4'h8) && (FifosDisabled_S == 1'b0) ) // write request from AXI interface
- begin
- MetaFifoWen_S = 1'b1;
- MetaFifoDin_D = wdata_reg_vec[MISS_META_WIDTH-1:0];
- end
- end
-
- // write configuration register
- always_comb
- begin
- ConfRegWen_S = 1'b0;
- ConfReg_DN = 1'b0;
- if ( (wren_l1 == 1'b1) && (awaddr_reg[ADDR_MSB:0] == 8'h10) ) // write request from AXI interface
- begin
- ConfRegWen_S = 1'b1;
- ConfReg_DN = wdata_reg_vec[$high(ConfReg_DN):0];
- end
- end
-
- // AXI read data
- always_comb
- begin
- s_axi_rdata = rdata_reg; // read L1 config
- AddrFifoRen_S = 1'b0;
- MetaFifoRen_S = 1'b0;
- if ( rvalid == 1'b1 )
- begin
- // read address FIFO
- if ( araddr_reg[ADDR_MSB:0] == 'b0 )
- begin
- s_axi_rdata = {AXI_DATA_WIDTH{1'b0}};
- s_axi_rdata[ADDR_WIDTH_VIRT-1:0] = AddrFifoDout_D;
- if ( AddrFifoEmpty_S == 1'b0 )
- AddrFifoRen_S = 1'b1;
- end
- // read meta FIFO
- else if ( araddr_reg[ADDR_MSB:0] == 4'h8 )
- begin
- s_axi_rdata = {AXI_DATA_WIDTH{1'b0}};
- s_axi_rdata[31] = MetaFifoEmpty_S;
- s_axi_rdata[MISS_META_WIDTH-1:0] = MetaFifoDout_D;
- if ( MetaFifoEmpty_S == 1'b0 )
- MetaFifoRen_S = 1'b1;
- end
- // read configuration register
- else if ( araddr_reg[ADDR_MSB:0] == 8'h10 )
- begin
- s_axi_rdata = {AXI_DATA_WIDTH{1'b0}};
- s_axi_rdata[$high(ConfReg_DP):0] = ConfReg_DP;
- end
- end // if ( rvalid == 1'b1 )
- end // always_comb begin
-
- // configuration register
- always_ff @(posedge Clk_CI or negedge Rst_RBI) begin
- if (Rst_RBI == 1'b0)
- begin
- ConfReg_DP <= 'b0;
- end
- else if (ConfRegWen_S == 1'b1)
- begin
- ConfReg_DP <= ConfReg_DN;
- end
- end
-
- generic_fifo
- #(
- .DATA_WIDTH ( ADDR_WIDTH_VIRT ),
- .DATA_DEPTH ( MH_FIFO_DEPTH )
- )
- fifo_addr_i
- (
- .clk ( Clk_CI ),
- .rst_n ( Rst_RBI ),
- .data_i ( AddrFifoDin_D ),
- .valid_i ( AddrFifoWen_S & AddrFifoFull_SB ),
- .grant_o ( AddrFifoFull_SB ),
- .data_o ( AddrFifoDout_D ),
- .valid_o ( AddrFifoEmpty_SB ),
- .grant_i ( AddrFifoRen_S ),
- .test_mode_i ( 1'b0 )
- );
-
- generic_fifo
- #(
- .DATA_WIDTH ( MISS_META_WIDTH ),
- .DATA_DEPTH ( MH_FIFO_DEPTH )
- )
- fifo_meta_i
- (
- .clk ( Clk_CI ),
- .rst_n ( Rst_RBI ),
- .data_i ( MetaFifoDin_D ),
- .valid_i ( MetaFifoWen_S & MetaFifoFull_SB ),
- .grant_o ( MetaFifoFull_SB ),
- .data_o ( MetaFifoDout_D ),
- .valid_o ( MetaFifoEmpty_SB ),
- .grant_i ( MetaFifoRen_S ),
- .test_mode_i ( 1'b0 )
- );
-"""
-#
-# endmodule
-#
-#
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class axi_rab_top(Elaboratable):
-
- def __init__(self):
- self.Clk_CI = Signal() # input
- self.NonGatedClk_CI = Signal() # input
- self.Rst_RBI = Signal() # input
- self.s_axi4_awid = Signal() # input
- self.s_axi4_awaddr = Signal() # input
- self.s_axi4_awvalid = Signal(N_PORTS) # input
- self.s_axi4_awready = Signal(N_PORTS) # output
- self.s_axi4_awlen = Signal() # input
- self.s_axi4_awsize = Signal() # input
- self.s_axi4_awburst = Signal() # input
- self.s_axi4_awlock = Signal(N_PORTS) # input
- self.s_axi4_awprot = Signal() # input
- self.s_axi4_awcache = Signal() # input
- self.s_axi4_awregion = Signal() # input
- self.s_axi4_awqos = Signal() # input
- self.s_axi4_awuser = Signal() # input
- self.s_axi4_wdata = Signal() # input
- self.s_axi4_wvalid = Signal(N_PORTS) # input
- self.s_axi4_wready = Signal(N_PORTS) # output
- self.s_axi4_wstrb = Signal() # input
- self.s_axi4_wlast = Signal(N_PORTS) # input
- self.s_axi4_wuser = Signal() # input
- self.s_axi4_bid = Signal() # output
- self.s_axi4_bresp = Signal() # output
- self.s_axi4_bvalid = Signal(N_PORTS) # output
- self.s_axi4_buser = Signal() # output
- self.s_axi4_bready = Signal(N_PORTS) # input
- self.s_axi4_arid = Signal() # input
- self.s_axi4_araddr = Signal() # input
- self.s_axi4_arvalid = Signal(N_PORTS) # input
- self.s_axi4_arready = Signal(N_PORTS) # output
- self.s_axi4_arlen = Signal() # input
- self.s_axi4_arsize = Signal() # input
- self.s_axi4_arburst = Signal() # input
- self.s_axi4_arlock = Signal(N_PORTS) # input
- self.s_axi4_arprot = Signal() # input
- self.s_axi4_arcache = Signal() # input
- self.s_axi4_aruser = Signal() # input
- self.s_axi4_rid = Signal() # output
- self.s_axi4_rdata = Signal() # output
- self.s_axi4_rresp = Signal() # output
- self.s_axi4_rvalid = Signal(N_PORTS) # output
- self.s_axi4_rready = Signal(N_PORTS) # input
- self.s_axi4_rlast = Signal(N_PORTS) # output
- self.s_axi4_ruser = Signal() # output
- self.m0_axi4_awid = Signal() # output
- self.m0_axi4_awaddr = Signal() # output
- self.m0_axi4_awvalid = Signal(N_PORTS) # output
- self.m0_axi4_awready = Signal(N_PORTS) # input
- self.m0_axi4_awlen = Signal() # output
- self.m0_axi4_awsize = Signal() # output
- self.m0_axi4_awburst = Signal() # output
- self.m0_axi4_awlock = Signal(N_PORTS) # output
- self.m0_axi4_awprot = Signal() # output
- self.m0_axi4_awcache = Signal() # output
- self.m0_axi4_awregion = Signal() # output
- self.m0_axi4_awqos = Signal() # output
- self.m0_axi4_awuser = Signal() # output
- self.m0_axi4_wdata = Signal() # output
- self.m0_axi4_wvalid = Signal(N_PORTS) # output
- self.m0_axi4_wready = Signal(N_PORTS) # input
- self.m0_axi4_wstrb = Signal() # output
- self.m0_axi4_wlast = Signal(N_PORTS) # output
- self.m0_axi4_wuser = Signal() # output
- self.m0_axi4_bid = Signal() # input
- self.m0_axi4_bresp = Signal() # input
- self.m0_axi4_bvalid = Signal(N_PORTS) # input
- self.m0_axi4_buser = Signal() # input
- self.m0_axi4_bready = Signal(N_PORTS) # output
- self.m0_axi4_arid = Signal() # output
- self.m0_axi4_araddr = Signal() # output
- self.m0_axi4_arvalid = Signal(N_PORTS) # output
- self.m0_axi4_arready = Signal(N_PORTS) # input
- self.m0_axi4_arlen = Signal() # output
- self.m0_axi4_arsize = Signal() # output
- self.m0_axi4_arburst = Signal() # output
- self.m0_axi4_arlock = Signal(N_PORTS) # output
- self.m0_axi4_arprot = Signal() # output
- self.m0_axi4_arcache = Signal() # output
- self.m0_axi4_aruser = Signal() # output
- self.m0_axi4_rid = Signal() # input
- self.m0_axi4_rdata = Signal() # input
- self.m0_axi4_rresp = Signal() # input
- self.m0_axi4_rvalid = Signal(N_PORTS) # input
- self.m0_axi4_rready = Signal(N_PORTS) # output
- self.m0_axi4_rlast = Signal(N_PORTS) # input
- self.m0_axi4_ruser = Signal() # input
- self.m1_axi4_awid = Signal() # output
- self.m1_axi4_awaddr = Signal() # output
- self.m1_axi4_awvalid = Signal(N_PORTS) # output
- self.m1_axi4_awready = Signal(N_PORTS) # input
- self.m1_axi4_awlen = Signal() # output
- self.m1_axi4_awsize = Signal() # output
- self.m1_axi4_awburst = Signal() # output
- self.m1_axi4_awlock = Signal(N_PORTS) # output
- self.m1_axi4_awprot = Signal() # output
- self.m1_axi4_awcache = Signal() # output
- self.m1_axi4_awregion = Signal() # output
- self.m1_axi4_awqos = Signal() # output
- self.m1_axi4_awuser = Signal() # output
- self.m1_axi4_wdata = Signal() # output
- self.m1_axi4_wvalid = Signal(N_PORTS) # output
- self.m1_axi4_wready = Signal(N_PORTS) # input
- self.m1_axi4_wstrb = Signal() # output
- self.m1_axi4_wlast = Signal(N_PORTS) # output
- self.m1_axi4_wuser = Signal() # output
- self.m1_axi4_bid = Signal() # input
- self.m1_axi4_bresp = Signal() # input
- self.m1_axi4_bvalid = Signal(N_PORTS) # input
- self.m1_axi4_buser = Signal() # input
- self.m1_axi4_bready = Signal(N_PORTS) # output
- self.m1_axi4_arid = Signal() # output
- self.m1_axi4_araddr = Signal() # output
- self.m1_axi4_arvalid = Signal(N_PORTS) # output
- self.m1_axi4_arready = Signal(N_PORTS) # input
- self.m1_axi4_arlen = Signal() # output
- self.m1_axi4_arsize = Signal() # output
- self.m1_axi4_arburst = Signal() # output
- self.m1_axi4_arlock = Signal(N_PORTS) # output
- self.m1_axi4_arprot = Signal() # output
- self.m1_axi4_arcache = Signal() # output
- self.m1_axi4_aruser = Signal() # output
- self.m1_axi4_rid = Signal() # input
- self.m1_axi4_rdata = Signal() # input
- self.m1_axi4_rresp = Signal() # input
- self.m1_axi4_rvalid = Signal(N_PORTS) # input
- self.m1_axi4_rready = Signal(N_PORTS) # output
- self.m1_axi4_rlast = Signal(N_PORTS) # input
- self.m1_axi4_ruser = Signal() # input
- self.s_axi4lite_awaddr = Signal(AXI_LITE_ADDR_WIDTH) # input
- self.s_axi4lite_awvalid = Signal() # input
- self.s_axi4lite_awready = Signal() # output
- self.s_axi4lite_wdata = Signal(AXI_LITE_DATA_WIDTH) # input
- self.s_axi4lite_wvalid = Signal() # input
- self.s_axi4lite_wready = Signal() # output
- self.s_axi4lite_wstrb = Signal(1+ERROR p_expression_25) # input
- self.s_axi4lite_bresp = Signal(2) # output
- self.s_axi4lite_bvalid = Signal() # output
- self.s_axi4lite_bready = Signal() # input
- self.s_axi4lite_araddr = Signal(AXI_LITE_ADDR_WIDTH) # input
- self.s_axi4lite_arvalid = Signal() # input
- self.s_axi4lite_arready = Signal() # output
- self.s_axi4lite_rdata = Signal(AXI_LITE_DATA_WIDTH) # output
- self.s_axi4lite_rresp = Signal(2) # output
- self.s_axi4lite_rvalid = Signal() # output
- self.s_axi4lite_rready = Signal() # input
- self.int_miss = Signal(N_PORTS) # output
- self.int_multi = Signal(N_PORTS) # output
- self.int_prot = Signal(N_PORTS) # output
- self.int_mhf_full = Signal() # output
-
- def elaborate(self, platform=None):
- m = Module()
- return m
-
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# // --=========================================================================--
-# //
-# // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ████████╗ ██████╗ ██████╗
-# // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ╚══██╔══╝██╔═══██╗██╔══██╗
-# // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ ██║ ██║██████╔╝
-# // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██║ ██║██╔═══╝
-# // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ██║ ╚██████╔╝██║
-# // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═════╝ ╚═╝
-# //
-# // --=========================================================================--
-# /*
-# * axi_rab_top
-# *
-# * The remapping address block (RAB) performs address translation for AXI
-# * transactions arriving at the input port and forwards them to different
-# * downstream AXI ports.
-# *
-# * The five axi channels are each buffered on the input side using a FIFO,
-# * described in axi4_XX_buffer. The RAB lookup result is merged into the
-# * AXI transaction via the axi4_XX_sender instances, which manages upstream
-# * error signaling for failed lookups.
-# *
-# * Address translation is performed based on data stored in up to two
-# * translation lookaside buffers (TLBs), which are private per RAB port (each
-# * of which having two AXI master ports and one AXI slave port). These TLBs
-# * are managed in software through the AXI-Lite interface.
-# *
-# * If ACP is enabled, the `cache_coherent` flag in the TLBs is used to
-# * multiplex between the two ports. If ACP is disabled, only the first master
-# * port is used. In this case, the `cache_coherent` flag is used to set the
-# * AxCACHE signals of the AXI bus accordingly.
-# *
-# * Authors:
-# * Antonio Pullini <pullinia@iis.ee.ethz.ch>
-# * Conrad Burchert <bconrad@ethz.ch>
-# * Maheshwara Sharma <msharma@student.ethz.ch>
-# * Andreas Kurth <akurth@iis.ee.ethz.ch>
-# * Johannes Weinbuch <jweinbuch@student.ethz.ch>
-# * Pirmin Vogel <vogelpi@iis.ee.ethz.ch>
-# */
-#
-# //`include "pulp_soc_defines.sv"
-#
-# ////import CfMath::log2;
-#
-# module axi_rab_top
-#
-# // Parameters {{{
-# #(
-# parameter N_PORTS = 2,
-# parameter N_L2_SETS = 32,
-# parameter N_L2_SET_ENTRIES = 32,
-# parameter AXI_DATA_WIDTH = 64,
-# parameter AXI_S_ADDR_WIDTH = 32,
-# parameter AXI_M_ADDR_WIDTH = 40,
-# parameter AXI_LITE_DATA_WIDTH = 64,
-# parameter AXI_LITE_ADDR_WIDTH = 32,
-# parameter AXI_ID_WIDTH = 10,
-# parameter AXI_USER_WIDTH = 6,
-# parameter MH_FIFO_DEPTH = 16
-# )
-# // }}}
-#
-# // Ports {{{
-# (
-#
-# input logic Clk_CI, // This clock may be gated.
-# input logic NonGatedClk_CI,
-# input logic Rst_RBI,
-#
-# // For every slave port there are two master ports. The master
-# // port to use can be set using the master_select flag of the protection
-# // bits of a slice
-#
-# // AXI4 Slave {{{
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_awid,
-# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] s_axi4_awaddr,
-# input logic [N_PORTS-1:0] s_axi4_awvalid,
-# output logic [N_PORTS-1:0] s_axi4_awready,
-# input logic [N_PORTS-1:0] [7:0] s_axi4_awlen,
-# input logic [N_PORTS-1:0] [2:0] s_axi4_awsize,
-# input logic [N_PORTS-1:0] [1:0] s_axi4_awburst,
-# input logic [N_PORTS-1:0] s_axi4_awlock,
-# input logic [N_PORTS-1:0] [2:0] s_axi4_awprot,
-# input logic [N_PORTS-1:0] [3:0] s_axi4_awcache,
-# input logic [N_PORTS-1:0] [3:0] s_axi4_awregion,
-# input logic [N_PORTS-1:0] [3:0] s_axi4_awqos,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_awuser,
-#
-# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] s_axi4_wdata,
-# input logic [N_PORTS-1:0] s_axi4_wvalid,
-# output logic [N_PORTS-1:0] s_axi4_wready,
-# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb,
-# input logic [N_PORTS-1:0] s_axi4_wlast,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_wuser,
-#
-# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_bid,
-# output logic [N_PORTS-1:0] [1:0] s_axi4_bresp,
-# output logic [N_PORTS-1:0] s_axi4_bvalid,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_buser,
-# input logic [N_PORTS-1:0] s_axi4_bready,
-#
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_arid,
-# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] s_axi4_araddr,
-# input logic [N_PORTS-1:0] s_axi4_arvalid,
-# output logic [N_PORTS-1:0] s_axi4_arready,
-# input logic [N_PORTS-1:0] [7:0] s_axi4_arlen,
-# input logic [N_PORTS-1:0] [2:0] s_axi4_arsize,
-# input logic [N_PORTS-1:0] [1:0] s_axi4_arburst,
-# input logic [N_PORTS-1:0] s_axi4_arlock,
-# input logic [N_PORTS-1:0] [2:0] s_axi4_arprot,
-# input logic [N_PORTS-1:0] [3:0] s_axi4_arcache,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_aruser,
-#
-# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_rid,
-# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] s_axi4_rdata,
-# output logic [N_PORTS-1:0] [1:0] s_axi4_rresp,
-# output logic [N_PORTS-1:0] s_axi4_rvalid,
-# input logic [N_PORTS-1:0] s_axi4_rready,
-# output logic [N_PORTS-1:0] s_axi4_rlast,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_ruser,
-# // }}}
-#
-# // AXI4 Master 0 {{{
-# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_awid,
-# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m0_axi4_awaddr,
-# output logic [N_PORTS-1:0] m0_axi4_awvalid,
-# input logic [N_PORTS-1:0] m0_axi4_awready,
-# output logic [N_PORTS-1:0] [7:0] m0_axi4_awlen,
-# output logic [N_PORTS-1:0] [2:0] m0_axi4_awsize,
-# output logic [N_PORTS-1:0] [1:0] m0_axi4_awburst,
-# output logic [N_PORTS-1:0] m0_axi4_awlock,
-# output logic [N_PORTS-1:0] [2:0] m0_axi4_awprot,
-# output logic [N_PORTS-1:0] [3:0] m0_axi4_awcache,
-# output logic [N_PORTS-1:0] [3:0] m0_axi4_awregion,
-# output logic [N_PORTS-1:0] [3:0] m0_axi4_awqos,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_awuser,
-#
-# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m0_axi4_wdata,
-# output logic [N_PORTS-1:0] m0_axi4_wvalid,
-# input logic [N_PORTS-1:0] m0_axi4_wready,
-# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] m0_axi4_wstrb,
-# output logic [N_PORTS-1:0] m0_axi4_wlast,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_wuser,
-#
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_bid,
-# input logic [N_PORTS-1:0] [1:0] m0_axi4_bresp,
-# input logic [N_PORTS-1:0] m0_axi4_bvalid,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_buser,
-# output logic [N_PORTS-1:0] m0_axi4_bready,
-#
-# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_arid,
-# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m0_axi4_araddr,
-# output logic [N_PORTS-1:0] m0_axi4_arvalid,
-# input logic [N_PORTS-1:0] m0_axi4_arready,
-# output logic [N_PORTS-1:0] [7:0] m0_axi4_arlen,
-# output logic [N_PORTS-1:0] [2:0] m0_axi4_arsize,
-# output logic [N_PORTS-1:0] [1:0] m0_axi4_arburst,
-# output logic [N_PORTS-1:0] m0_axi4_arlock,
-# output logic [N_PORTS-1:0] [2:0] m0_axi4_arprot,
-# output logic [N_PORTS-1:0] [3:0] m0_axi4_arcache,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_aruser,
-#
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_rid,
-# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m0_axi4_rdata,
-# input logic [N_PORTS-1:0] [1:0] m0_axi4_rresp,
-# input logic [N_PORTS-1:0] m0_axi4_rvalid,
-# output logic [N_PORTS-1:0] m0_axi4_rready,
-# input logic [N_PORTS-1:0] m0_axi4_rlast,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_ruser,
-# // }}}
-#
-# // AXI4 Master 1 {{{
-# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_awid,
-# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m1_axi4_awaddr,
-# output logic [N_PORTS-1:0] m1_axi4_awvalid,
-# input logic [N_PORTS-1:0] m1_axi4_awready,
-# output logic [N_PORTS-1:0] [7:0] m1_axi4_awlen,
-# output logic [N_PORTS-1:0] [2:0] m1_axi4_awsize,
-# output logic [N_PORTS-1:0] [1:0] m1_axi4_awburst,
-# output logic [N_PORTS-1:0] m1_axi4_awlock,
-# output logic [N_PORTS-1:0] [2:0] m1_axi4_awprot,
-# output logic [N_PORTS-1:0] [3:0] m1_axi4_awcache,
-# output logic [N_PORTS-1:0] [3:0] m1_axi4_awregion,
-# output logic [N_PORTS-1:0] [3:0] m1_axi4_awqos,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_awuser,
-#
-# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m1_axi4_wdata,
-# output logic [N_PORTS-1:0] m1_axi4_wvalid,
-# input logic [N_PORTS-1:0] m1_axi4_wready,
-# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] m1_axi4_wstrb,
-# output logic [N_PORTS-1:0] m1_axi4_wlast,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_wuser,
-#
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_bid,
-# input logic [N_PORTS-1:0] [1:0] m1_axi4_bresp,
-# input logic [N_PORTS-1:0] m1_axi4_bvalid,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_buser,
-# output logic [N_PORTS-1:0] m1_axi4_bready,
-#
-# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_arid,
-# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m1_axi4_araddr,
-# output logic [N_PORTS-1:0] m1_axi4_arvalid,
-# input logic [N_PORTS-1:0] m1_axi4_arready,
-# output logic [N_PORTS-1:0] [7:0] m1_axi4_arlen,
-# output logic [N_PORTS-1:0] [2:0] m1_axi4_arsize,
-# output logic [N_PORTS-1:0] [1:0] m1_axi4_arburst,
-# output logic [N_PORTS-1:0] m1_axi4_arlock,
-# output logic [N_PORTS-1:0] [2:0] m1_axi4_arprot,
-# output logic [N_PORTS-1:0] [3:0] m1_axi4_arcache,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_aruser,
-#
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_rid,
-# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m1_axi4_rdata,
-# input logic [N_PORTS-1:0] [1:0] m1_axi4_rresp,
-# input logic [N_PORTS-1:0] m1_axi4_rvalid,
-# output logic [N_PORTS-1:0] m1_axi4_rready,
-# input logic [N_PORTS-1:0] m1_axi4_rlast,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_ruser,
-# // }}}
-#
-# // AXI 4 Lite Slave (Configuration Interface) {{{
-# // AXI4-Lite port to setup the rab slices
-# // use this to program the configuration registers
-# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi4lite_awaddr,
-# input logic s_axi4lite_awvalid,
-# output logic s_axi4lite_awready,
-#
-# input logic [AXI_LITE_DATA_WIDTH-1:0] s_axi4lite_wdata,
-# input logic s_axi4lite_wvalid,
-# output logic s_axi4lite_wready,
-# input logic [AXI_LITE_DATA_WIDTH/8-1:0] s_axi4lite_wstrb,
-#
-# output logic [1:0] s_axi4lite_bresp,
-# output logic s_axi4lite_bvalid,
-# input logic s_axi4lite_bready,
-#
-# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi4lite_araddr,
-# input logic s_axi4lite_arvalid,
-# output logic s_axi4lite_arready,
-#
-# output logic [AXI_LITE_DATA_WIDTH-1:0] s_axi4lite_rdata,
-# output logic [1:0] s_axi4lite_rresp,
-# output logic s_axi4lite_rvalid,
-# input logic s_axi4lite_rready,
-# // }}}
-#
-# // BRAMs {{{
-# //`ifdef RAB_AX_LOG_EN
-# // BramPort.Slave ArBram_PS,
-# // BramPort.Slave AwBram_PS,
-# //`endif
-# // }}}
-#
-# // Logger Control {{{
-# //`ifdef RAB_AX_LOG_EN
-# // input logic LogEn_SI,
-# // input logic ArLogClr_SI,
-# // input logic AwLogClr_SI,
-# // output logic ArLogRdy_SO,
-# // output logic AwLogRdy_SO,
-# //`endif
-# // }}}
-#
-# // Interrupt Outputs {{{
-# // Interrupt lines to handle misses, collisions of slices/multiple hits,
-# // protection faults and overflow of the miss handling fifo
-# //`ifdef RAB_AX_LOG_EN
-# // output logic int_ar_log_full,
-# // output logic int_aw_log_full,
-# //`endif
-# output logic [N_PORTS-1:0] int_miss,
-# output logic [N_PORTS-1:0] int_multi,
-# output logic [N_PORTS-1:0] int_prot,
-# output logic int_mhf_full
-# // }}}
-#
-# );
-#
-"""#docstring_begin
-
- // }}}
-
- // Signals {{{
- // ███████╗██╗ ██████╗ ███╗ ██╗ █████╗ ██╗ ███████╗
- // ██╔════╝██║██╔════╝ ████╗ ██║██╔══██╗██║ ██╔════╝
- // ███████╗██║██║ ███╗██╔██╗ ██║███████║██║ ███████╗
- // ╚════██║██║██║ ██║██║╚██╗██║██╔══██║██║ ╚════██║
- // ███████║██║╚██████╔╝██║ ╚████║██║ ██║███████╗███████║
- // ╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚══════╝
- //
-
- // Internal AXI4 lines, these connect buffers on the slave side to the rab core and
- // multiplexers which switch between the two master outputs
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_awid;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_awaddr;
- logic [N_PORTS-1:0] int_awvalid;
- logic [N_PORTS-1:0] int_awready;
- logic [N_PORTS-1:0] [7:0] int_awlen;
- logic [N_PORTS-1:0] [2:0] int_awsize;
- logic [N_PORTS-1:0] [1:0] int_awburst;
- logic [N_PORTS-1:0] int_awlock;
- logic [N_PORTS-1:0] [2:0] int_awprot;
- logic [N_PORTS-1:0] [3:0] int_awcache;
- logic [N_PORTS-1:0] [3:0] int_awregion;
- logic [N_PORTS-1:0] [3:0] int_awqos;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_awuser;
-
- logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_wdata;
- logic [N_PORTS-1:0] int_wvalid;
- logic [N_PORTS-1:0] int_wready;
- logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] int_wstrb;
- logic [N_PORTS-1:0] int_wlast;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_wuser;
-
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_bid;
- logic [N_PORTS-1:0] [1:0] int_bresp;
- logic [N_PORTS-1:0] int_bvalid;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_buser;
- logic [N_PORTS-1:0] int_bready;
-
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_arid;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_araddr;
- logic [N_PORTS-1:0] int_arvalid;
- logic [N_PORTS-1:0] int_arready;
- logic [N_PORTS-1:0] [7:0] int_arlen;
- logic [N_PORTS-1:0] [2:0] int_arsize;
- logic [N_PORTS-1:0] [1:0] int_arburst;
- logic [N_PORTS-1:0] int_arlock;
- logic [N_PORTS-1:0] [2:0] int_arprot;
- logic [N_PORTS-1:0] [3:0] int_arcache;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_aruser;
-
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_rid;
- logic [N_PORTS-1:0] [1:0] int_rresp;
- logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_rdata;
- logic [N_PORTS-1:0] int_rlast;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_ruser;
- logic [N_PORTS-1:0] int_rvalid;
- logic [N_PORTS-1:0] int_rready;
-
- // rab_core outputs
- logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] int_wtrans_addr;
- logic [N_PORTS-1:0] int_wtrans_accept;
- logic [N_PORTS-1:0] int_wtrans_drop;
- logic [N_PORTS-1:0] int_wtrans_miss;
- logic [N_PORTS-1:0] int_wtrans_sent;
- logic [N_PORTS-1:0] int_wtrans_cache_coherent;
- logic [N_PORTS-1:0] int_wmaster_select;
-
- logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] int_rtrans_addr;
- logic [N_PORTS-1:0] int_rtrans_accept;
- logic [N_PORTS-1:0] int_rtrans_drop;
- logic [N_PORTS-1:0] int_rtrans_miss;
- logic [N_PORTS-1:0] int_rtrans_sent;
- logic [N_PORTS-1:0] int_rtrans_cache_coherent;
- logic [N_PORTS-1:0] int_rmaster_select;
-
- logic [N_PORTS-1:0] w_master_select;
-
- // Internal master0 AXI4 lines. These connect the first master port to the
- // multiplexers
- // For channels read address, write address and write data the other lines
- // are ignored if valid is not set, therefore we only need to multiplex those
- logic [N_PORTS-1:0] int_m0_awvalid;
- logic [N_PORTS-1:0] int_m0_awready;
-
- logic [N_PORTS-1:0] int_m0_wvalid;
- logic [N_PORTS-1:0] int_m0_wready;
-
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m0_bid;
- logic [N_PORTS-1:0] [1:0] int_m0_bresp;
- logic [N_PORTS-1:0] int_m0_bvalid;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m0_buser;
- logic [N_PORTS-1:0] int_m0_bready;
-
- logic [N_PORTS-1:0] int_m0_arvalid;
- logic [N_PORTS-1:0] int_m0_arready;
-
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m0_rid;
- logic [N_PORTS-1:0] [1:0] int_m0_rresp;
- logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_m0_rdata;
- logic [N_PORTS-1:0] int_m0_rlast;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m0_ruser;
- logic [N_PORTS-1:0] int_m0_rready;
- logic [N_PORTS-1:0] int_m0_rvalid;
-
- logic [N_PORTS-1:0] l1_m0_ar_accept;
- logic [N_PORTS-1:0] l1_m0_ar_drop;
- logic [N_PORTS-1:0] l1_m0_ar_save;
- logic [N_PORTS-1:0] l1_m0_ar_done;
- logic [N_PORTS-1:0] l2_m0_ar_accept;
- logic [N_PORTS-1:0] l2_m0_ar_drop;
- logic [N_PORTS-1:0] l2_m0_ar_done;
- logic [N_PORTS-1:0] l2_m0_ar_sending;
-
- logic [N_PORTS-1:0] l1_m0_aw_accept;
- logic [N_PORTS-1:0] l1_m0_aw_drop;
- logic [N_PORTS-1:0] l1_m0_aw_save;
- logic [N_PORTS-1:0] l1_m0_aw_done;
- logic [N_PORTS-1:0] l2_m0_aw_accept;
- logic [N_PORTS-1:0] l2_m0_aw_drop;
- logic [N_PORTS-1:0] l2_m0_aw_done;
- logic [N_PORTS-1:0] l2_m0_aw_sending;
-
- // Internal master1 AXI4 lines. These connect the second master port to the
- // multiplexers
- // For channels read address, write address and write data the other lines
- // are ignored if valid is not set, therefore we only need to multiplex those
- logic [N_PORTS-1:0] int_m1_awvalid;
- logic [N_PORTS-1:0] int_m1_awready;
-
- logic [N_PORTS-1:0] int_m1_wvalid;
- logic [N_PORTS-1:0] int_m1_wready;
-
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m1_bid;
- logic [N_PORTS-1:0] [1:0] int_m1_bresp;
- logic [N_PORTS-1:0] int_m1_bvalid;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m1_buser;
- logic [N_PORTS-1:0] int_m1_bready;
-
- logic [N_PORTS-1:0] int_m1_arvalid;
- logic [N_PORTS-1:0] int_m1_arready;
-
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m1_rid;
- logic [N_PORTS-1:0] [1:0] int_m1_rresp;
- logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_m1_rdata;
- logic [N_PORTS-1:0] int_m1_rlast;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m1_ruser;
- logic [N_PORTS-1:0] int_m1_rvalid;
- logic [N_PORTS-1:0] int_m1_rready;
-
- logic [N_PORTS-1:0] l1_m1_ar_accept;
- logic [N_PORTS-1:0] l1_m1_ar_drop;
- logic [N_PORTS-1:0] l1_m1_ar_save;
- logic [N_PORTS-1:0] l1_m1_ar_done;
- logic [N_PORTS-1:0] l2_m1_ar_accept;
- logic [N_PORTS-1:0] l2_m1_ar_drop;
- logic [N_PORTS-1:0] l2_m1_ar_done;
-
- logic [N_PORTS-1:0] l1_m1_aw_accept;
- logic [N_PORTS-1:0] l1_m1_aw_drop;
- logic [N_PORTS-1:0] l1_m1_aw_save;
- logic [N_PORTS-1:0] l1_m1_aw_done;
- logic [N_PORTS-1:0] l2_m1_aw_accept;
- logic [N_PORTS-1:0] l2_m1_aw_drop;
- logic [N_PORTS-1:0] l2_m1_aw_done;
-
- // L1 outputs
- logic [N_PORTS-1:0] rab_miss; // L1 RAB miss
- logic [N_PORTS-1:0] rab_prot;
- logic [N_PORTS-1:0] rab_multi;
- logic [N_PORTS-1:0] rab_prefetch;
-
- //
- // Signals used to support L2 TLB
- //
- // L2 RAM configuration signals
- logic [N_PORTS-1:0] [AXI_LITE_DATA_WIDTH-1:0] L2CfgWData_D;
- logic [N_PORTS-1:0] [AXI_LITE_ADDR_WIDTH-1:0] L2CfgWAddr_D;
- logic [N_PORTS-1:0] L2CfgWE_S;
-
- // L1 output and drop Buffer
- logic [N_PORTS-1:0] L1OutRwType_D, L1DropRwType_DP;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L1OutUser_D, L1DropUser_DP;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L1OutId_D, L1DropId_DP;
- logic [N_PORTS-1:0] [7:0] L1OutLen_D, L1DropLen_DP;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L1OutAddr_D, L1DropAddr_DP;
- logic [N_PORTS-1:0] L1OutProt_D, L1DropProt_DP;
- logic [N_PORTS-1:0] L1OutMulti_D, L1DropMulti_DP;
- logic [N_PORTS-1:0] L1DropEn_S;
- logic [N_PORTS-1:0] L1DropPrefetch_S;
-
- logic [N_PORTS-1:0] L1DropValid_SN, L1DropValid_SP;
-
- // L2 input Buffer
- logic [N_PORTS-1:0] L2InRwType_DP;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L2InUser_DP;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L2InId_DP;
- logic [N_PORTS-1:0] [7:0] L2InLen_DP;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L2InAddr_DP;
- logic [N_PORTS-1:0] L2InEn_S;
-
- // L2 output Buffer
- logic [N_PORTS-1:0] L2OutRwType_DP;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L2OutUser_DP;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L2OutId_DP;
- logic [N_PORTS-1:0] [7:0] L2OutLen_DP;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L2OutInAddr_DP;
-
- logic [N_PORTS-1:0] L2OutHit_SN, L2OutHit_SP;
- logic [N_PORTS-1:0] L2OutMiss_SN, L2OutMiss_SP;
- logic [N_PORTS-1:0] L2OutProt_SN, L2OutProt_SP;
- logic [N_PORTS-1:0] L2OutMulti_SN, L2OutMulti_SP;
- logic [N_PORTS-1:0] L2OutCC_SN, L2OutCC_SP;
- logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] L2OutAddr_DN, L2OutAddr_DP;
-
- logic [N_PORTS-1:0] L2OutValid_SN, L2OutValid_SP;
- logic [N_PORTS-1:0] L2OutPrefetch_S;
- logic [N_PORTS-1:0] L2OutReady_S;
- logic [N_PORTS-1:0] L2OutEn_S;
-
- // L2 outputs
- logic [N_PORTS-1:0] L2Busy_S;
- logic [N_PORTS-1:0] L2OutValid_S;
-
- logic [N_PORTS-1:0] L2Miss_S;
-
- // Signals for interfacing the AXI modules
- logic [N_PORTS-1:0] l1_ar_accept;
- logic [N_PORTS-1:0] l1_aw_accept;
- logic [N_PORTS-1:0] l1_w_accept;
- logic [N_PORTS-1:0] l1_xw_accept;
-
- logic [N_PORTS-1:0] l1_ar_drop;
- logic [N_PORTS-1:0] l1_aw_drop;
- logic [N_PORTS-1:0] l1_w_drop;
- logic [N_PORTS-1:0] l1_xw_drop;
-
- logic [N_PORTS-1:0] l1_ar_save;
- logic [N_PORTS-1:0] l1_aw_save;
- logic [N_PORTS-1:0] l1_w_save;
- logic [N_PORTS-1:0] l1_xw_save;
-
- logic [N_PORTS-1:0] l1_ar_done;
- logic [N_PORTS-1:0] l1_r_done;
- logic [N_PORTS-1:0] l1_r_drop;
- logic [N_PORTS-1:0] lx_r_drop;
- logic [N_PORTS-1:0] lx_r_done;
-
- logic [N_PORTS-1:0] l1_aw_done;
- logic [N_PORTS-1:0] l1_w_done;
- logic [N_PORTS-1:0] l1_xw_done;
- logic [N_PORTS-1:0] l1_aw_done_SP;
- logic [N_PORTS-1:0] l1_w_done_SP;
-
- logic [N_PORTS-1:0] l2_ar_accept;
- logic [N_PORTS-1:0] l2_aw_accept;
- logic [N_PORTS-1:0] l2_w_accept;
- logic [N_PORTS-1:0] l2_xw_accept;
-
- logic [N_PORTS-1:0] l2_ar_drop;
- logic [N_PORTS-1:0] l2_r_drop;
- logic [N_PORTS-1:0] l2_xr_drop;
- logic [N_PORTS-1:0] l2_aw_drop;
- logic [N_PORTS-1:0] l2_w_drop;
- logic [N_PORTS-1:0] l2_xw_drop;
-
- logic [N_PORTS-1:0] l2_aw_done;
- logic [N_PORTS-1:0] l2_w_done;
- logic [N_PORTS-1:0] l2_xw_done;
- logic [N_PORTS-1:0] l2_aw_done_SP;
- logic [N_PORTS-1:0] l2_w_done_SP;
-
- logic [N_PORTS-1:0] l2_ar_done;
- logic [N_PORTS-1:0] l2_r_done;
- logic [N_PORTS-1:0] l2_xr_done;
- logic [N_PORTS-1:0] l2_ar_done_SP;
- logic [N_PORTS-1:0] l2_r_done_SP;
-
- logic [N_PORTS-1:0] l1_mx_aw_done;
- logic [N_PORTS-1:0] l1_mx_ar_done;
- logic [N_PORTS-1:0] l1_m0_aw_done_SP;
- logic [N_PORTS-1:0] l1_m0_ar_done_SP;
- logic [N_PORTS-1:0] l1_m1_aw_done_SP;
- logic [N_PORTS-1:0] l1_m1_ar_done_SP;
-
- logic [N_PORTS-1:0] l2_mx_aw_done;
- logic [N_PORTS-1:0] l2_mx_ar_done;
- logic [N_PORTS-1:0] l2_m0_aw_done_SP;
- logic [N_PORTS-1:0] l2_m0_ar_done_SP;
- logic [N_PORTS-1:0] l2_m1_aw_done_SP;
- logic [N_PORTS-1:0] l2_m1_ar_done_SP;
-
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] l1_id_drop, lx_id_drop, b_id_drop;
- logic [N_PORTS-1:0] [7:0] l1_len_drop, lx_len_drop;
- logic [N_PORTS-1:0] l1_prefetch_drop, lx_prefetch_drop, b_prefetch_drop;
- logic [N_PORTS-1:0] l1_hit_drop, lx_hit_drop, b_hit_drop;
-
- logic [N_PORTS-1:0] b_drop;
- logic [N_PORTS-1:0] b_done;
-
- logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] l2_aw_addr;
- logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] l2_ar_addr;
-
- logic [N_PORTS-1:0] l2_cache_coherent;
- logic [N_PORTS-1:0] l2_master_select;
-
- logic [N_PORTS-1:0] aw_in_stall;
- logic [N_PORTS-1:0] aw_out_stall;
-
- genvar i;
-
- // RRESP FSM
- typedef enum logic {IDLE, BUSY} r_resp_mux_ctrl_state_t;
- r_resp_mux_ctrl_state_t [N_PORTS-1:0] RRespMuxCtrl_SN, RRespMuxCtrl_SP;
- logic [N_PORTS-1:0] RRespSel_SN, RRespSel_SP;
- logic [N_PORTS-1:0] RRespBurst_S;
- logic [N_PORTS-1:0] RRespSelIm_S;
-
- // }}}
-
- // Local parameters {{{
-
- // Enable L2 for select ports
- localparam integer ENABLE_L2TLB[N_PORTS-1:0] = `EN_L2TLB_ARRAY;
-
- // L2TLB parameters
- localparam integer HUM_BUFFER_DEPTH = (N_L2_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS)+13;
-
- // }}}
-
- // Derive `master_select` from cache coherency flag. {{{
- `ifdef EN_ACP
- assign int_wmaster_select = int_wtrans_cache_coherent;
- assign int_rmaster_select = int_rtrans_cache_coherent;
- assign l2_master_select = l2_cache_coherent;
- `else
- assign int_wmaster_select = '0;
- assign int_rmaster_select = '0;
- assign l2_master_select = '0;
- `endif
- // }}}
-
- // Buf and Send {{{
- // ██████╗ ██╗ ██╗███████╗ ██╗ ███████╗███████╗███╗ ██╗██████╗
- // ██╔══██╗██║ ██║██╔════╝ ██║ ██╔════╝██╔════╝████╗ ██║██╔══██╗
- // ██████╔╝██║ ██║█████╗ ████████╗ ███████╗█████╗ ██╔██╗ ██║██║ ██║
- // ██╔══██╗██║ ██║██╔══╝ ██╔═██╔═╝ ╚════██║██╔══╝ ██║╚██╗██║██║ ██║
- // ██████╔╝╚██████╔╝██║ ██████║ ███████║███████╗██║ ╚████║██████╔╝
- // ╚═════╝ ╚═════╝ ╚═╝ ╚═════╝ ╚══════╝╚══════╝╚═╝ ╚═══╝╚═════╝
- //
- logic[N_PORTS-1:0] m0_write_is_burst, m0_read_is_burst;
- logic[N_PORTS-1:0] m1_write_is_burst, m1_read_is_burst;
-
- generate for (i = 0; i < N_PORTS; i++) begin : BUF_AND_SEND
-
- // Write Address channel (aw) {{{
- /*
- * write address channel (aw)
- *
- * ██╗ ██╗██████╗ ██╗████████╗███████╗ █████╗ ██████╗ ██████╗ ██████╗
- * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔══██╗██╔══██╗██╔══██╗
- * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ███████║██║ ██║██║ ██║██████╔╝
- * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██╔══██║██║ ██║██║ ██║██╔══██╗
- * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██║ ██║██████╔╝██████╔╝██║ ██║
- * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝
- *
- */
-
- axi4_aw_buffer
- #(
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH )
- )
- u_aw_buffer
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_awid ( s_axi4_awid[i] ),
- .s_axi4_awaddr ( s_axi4_awaddr[i] ),
- .s_axi4_awvalid ( s_axi4_awvalid[i] ),
- .s_axi4_awready ( s_axi4_awready[i] ),
- .s_axi4_awlen ( s_axi4_awlen[i] ),
- .s_axi4_awsize ( s_axi4_awsize[i] ),
- .s_axi4_awburst ( s_axi4_awburst[i] ),
- .s_axi4_awlock ( s_axi4_awlock[i] ),
- .s_axi4_awprot ( s_axi4_awprot[i] ),
- .s_axi4_awcache ( s_axi4_awcache[i] ),
- .s_axi4_awregion ( s_axi4_awregion[i] ),
- .s_axi4_awqos ( s_axi4_awqos[i] ),
- .s_axi4_awuser ( s_axi4_awuser[i] ),
- .m_axi4_awid ( int_awid[i] ),
- .m_axi4_awaddr ( int_awaddr[i] ),
- .m_axi4_awvalid ( int_awvalid[i] ),
- .m_axi4_awready ( int_awready[i] ),
- .m_axi4_awlen ( int_awlen[i] ),
- .m_axi4_awsize ( int_awsize[i] ),
- .m_axi4_awburst ( int_awburst[i] ),
- .m_axi4_awlock ( int_awlock[i] ),
- .m_axi4_awprot ( int_awprot[i] ),
- .m_axi4_awcache ( int_awcache[i] ),
- .m_axi4_awregion ( int_awregion[i] ),
- .m_axi4_awqos ( int_awqos[i] ),
- .m_axi4_awuser ( int_awuser[i] )
- );
-
- axi4_aw_sender
- #(
- .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ),
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH ),
- .ENABLE_L2TLB ( ENABLE_L2TLB[i] )
- )
- u_aw_sender_m0
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .l1_done_o ( l1_m0_aw_done[i] ),
- .l1_accept_i ( l1_m0_aw_accept[i] ),
- .l1_drop_i ( l1_m0_aw_drop[i] ),
- .l1_save_i ( l1_m0_aw_save[i] ),
- .l2_done_o ( l2_m0_aw_done[i] ),
- .l2_accept_i ( l2_m0_aw_accept[i] ),
- .l2_drop_i ( l2_m0_aw_drop[i] ),
- .l2_sending_o ( l2_m0_aw_sending[i] ),
- .l1_awaddr_i ( int_wtrans_addr[i] ),
- .l2_awaddr_i ( l2_aw_addr[i] ),
- .s_axi4_awid ( int_awid[i] ),
- .s_axi4_awvalid ( int_m0_awvalid[i] ),
- .s_axi4_awready ( int_m0_awready[i] ),
- .s_axi4_awlen ( int_awlen[i] ),
- .s_axi4_awsize ( int_awsize[i] ),
- .s_axi4_awburst ( int_awburst[i] ),
- .s_axi4_awlock ( int_awlock[i] ),
- .s_axi4_awprot ( int_awprot[i] ),
- .s_axi4_awcache ( int_awcache[i] ),
- .s_axi4_awregion ( int_awregion[i] ),
- .s_axi4_awqos ( int_awqos[i] ),
- .s_axi4_awuser ( int_awuser[i] ),
- .m_axi4_awid ( m0_axi4_awid[i] ),
- .m_axi4_awaddr ( m0_axi4_awaddr[i] ),
- .m_axi4_awvalid ( m0_axi4_awvalid[i] ),
- .m_axi4_awready ( m0_axi4_awready[i] ),
- .m_axi4_awlen ( m0_axi4_awlen[i] ),
- .m_axi4_awsize ( m0_axi4_awsize[i] ),
- .m_axi4_awburst ( m0_axi4_awburst[i] ),
- .m_axi4_awlock ( m0_axi4_awlock[i] ),
- .m_axi4_awprot ( m0_axi4_awprot[i] ),
- .m_axi4_awcache ( ),
- .m_axi4_awregion ( m0_axi4_awregion[i] ),
- .m_axi4_awqos ( m0_axi4_awqos[i] ),
- .m_axi4_awuser ( m0_axi4_awuser[i] )
- );
-
- // The AXCACHE signals are set according to burstiness and cache coherence or statically
- // when not connected to ACP on Zynq (implemented below).
- assign m0_write_is_burst[i] = (m0_axi4_awlen[i] != {8{1'b0}}) && (m0_axi4_awburst[i] != 2'b00);
- `ifndef EN_ACP
- always_comb begin
- if ( (l2_m0_aw_sending[i] & l2_cache_coherent[i]) | int_wtrans_cache_coherent[i]) begin
- if (m0_write_is_burst[i]) begin
- m0_axi4_awcache[i] = 4'b0111;
- end else begin
- m0_axi4_awcache[i] = 4'b1111;
- end
- end else begin
- m0_axi4_awcache[i] = 4'b0011;
- end
- end
- `else
- assign m0_axi4_awcache[i] = 4'b0011;
- `endif
-
- axi4_aw_sender
- #(
- .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ),
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH ),
- .ENABLE_L2TLB ( ENABLE_L2TLB[i] )
- )
- u_aw_sender_m1
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .l1_accept_i ( l1_m1_aw_accept[i] ),
- .l1_drop_i ( l1_m1_aw_drop[i] ),
- .l1_save_i ( l1_m1_aw_save[i] ),
- .l1_done_o ( l1_m1_aw_done[i] ),
- .l2_accept_i ( l2_m1_aw_accept[i] ),
- .l2_drop_i ( l2_m1_aw_drop[i] ),
- .l2_done_o ( l2_m1_aw_done[i] ),
- .l2_sending_o ( ), // just helps to set axcache
- .l1_awaddr_i ( int_wtrans_addr[i] ),
- .l2_awaddr_i ( l2_aw_addr[i] ),
- .s_axi4_awid ( int_awid[i] ),
- .s_axi4_awvalid ( int_m1_awvalid[i] ),
- .s_axi4_awready ( int_m1_awready[i] ),
- .s_axi4_awlen ( int_awlen[i] ),
- .s_axi4_awsize ( int_awsize[i] ),
- .s_axi4_awburst ( int_awburst[i] ),
- .s_axi4_awlock ( int_awlock[i] ),
- .s_axi4_awprot ( int_awprot[i] ),
- .s_axi4_awcache ( int_awcache[i] ),
- .s_axi4_awregion ( int_awregion[i] ),
- .s_axi4_awqos ( int_awqos[i] ),
- .s_axi4_awuser ( int_awuser[i] ),
- .m_axi4_awid ( m1_axi4_awid[i] ),
- .m_axi4_awaddr ( m1_axi4_awaddr[i] ),
- .m_axi4_awvalid ( m1_axi4_awvalid[i] ),
- .m_axi4_awready ( m1_axi4_awready[i] ),
- .m_axi4_awlen ( m1_axi4_awlen[i] ),
- .m_axi4_awsize ( m1_axi4_awsize[i] ),
- .m_axi4_awburst ( m1_axi4_awburst[i] ),
- .m_axi4_awlock ( m1_axi4_awlock[i] ),
- .m_axi4_awprot ( m1_axi4_awprot[i] ),
- .m_axi4_awcache ( ),
- .m_axi4_awregion ( m1_axi4_awregion[i] ),
- .m_axi4_awqos ( m1_axi4_awqos[i] ),
- .m_axi4_awuser ( m1_axi4_awuser[i] )
- );
-
- // The AXCACHE signals are set according to burstiness and cache coherence or statically
- // when not connected to ACP on Zynq (implemented below).
- assign m1_write_is_burst[i] = (m1_axi4_awlen[i] != {8{1'b0}}) && (m1_axi4_awburst[i] != 2'b00);
- `ifdef EN_ACP
- always_comb begin
- if (m1_write_is_burst[i]) begin
- m1_axi4_awcache[i] = 4'b1011;
- end else begin
- m1_axi4_awcache[i] = 4'b1111;
- end
- end
- `else
- assign m1_axi4_awcache[i] = 4'b0011;
- `endif
-
- // }}}
-
- // Write Data channel (w) {{{
- /*
- * write data channel (w)
- *
- * ██╗ ██╗██████╗ ██╗████████╗███████╗ ██████╗ █████╗ ████████╗ █████╗
- * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔══██╗╚══██╔══╝██╔══██╗
- * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ██║ ██║███████║ ██║ ███████║
- * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██║ ██║██╔══██║ ██║ ██╔══██║
- * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██████╔╝██║ ██║ ██║ ██║ ██║
- * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝
- *
- */
- axi4_w_buffer
- #(
- .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH ),
- .ENABLE_L2TLB ( ENABLE_L2TLB[i] ),
- .HUM_BUFFER_DEPTH ( HUM_BUFFER_DEPTH )
- )
- u_w_buffer
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
-
- // L1 interface
- .l1_done_o ( l1_w_done[i] ),
- .l1_accept_i ( l1_w_accept[i] ),
- .l1_save_i ( l1_w_save[i] ),
- .l1_drop_i ( l1_w_drop[i] ),
- .l1_master_i ( int_wmaster_select[i] ),
- .l1_id_i ( l1_id_drop[i] ),
- .l1_len_i ( l1_len_drop[i] ),
- .l1_prefetch_i ( l1_prefetch_drop[i] ),
- .l1_hit_i ( l1_hit_drop[i] ),
-
- // L2 interface
- .l2_done_o ( l2_w_done[i] ),
- .l2_accept_i ( l2_w_accept[i] ),
- .l2_drop_i ( l2_w_drop[i] ),
- .l2_master_i ( l2_master_select[i] ),
- .l2_id_i ( lx_id_drop[i] ),
- .l2_len_i ( lx_len_drop[i] ),
- .l2_prefetch_i ( lx_prefetch_drop[i] ),
- .l2_hit_i ( lx_hit_drop[i] ),
-
- // Top-level control outputs
- .master_select_o ( w_master_select[i] ),
- .input_stall_o ( aw_in_stall[i] ), // stall L1 AW input if request buffers full
- .output_stall_o ( aw_out_stall[i] ), // stall L1 AW hit forwarding if bypass not possible
-
- // B sender interface
- .b_drop_o ( b_drop[i] ),
- .b_done_i ( b_done[i] ),
- .id_o ( b_id_drop[i] ),
- .prefetch_o ( b_prefetch_drop[i] ),
- .hit_o ( b_hit_drop[i] ),
-
- // AXI W channel interfaces
- .s_axi4_wdata ( s_axi4_wdata[i] ),
- .s_axi4_wvalid ( s_axi4_wvalid[i] ),
- .s_axi4_wready ( s_axi4_wready[i] ),
- .s_axi4_wstrb ( s_axi4_wstrb[i] ),
- .s_axi4_wlast ( s_axi4_wlast[i] ),
- .s_axi4_wuser ( s_axi4_wuser[i] ),
- .m_axi4_wdata ( int_wdata[i] ),
- .m_axi4_wvalid ( int_wvalid[i] ),
- .m_axi4_wready ( int_wready[i] ),
- .m_axi4_wstrb ( int_wstrb[i] ),
- .m_axi4_wlast ( int_wlast[i] ),
- .m_axi4_wuser ( int_wuser[i] )
- );
-
- axi4_w_sender
- #(
- .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH )
- )
- u_w_sender_m0
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_wdata ( int_wdata[i] ),
- .s_axi4_wvalid ( int_m0_wvalid[i] ),
- .s_axi4_wready ( int_m0_wready[i] ),
- .s_axi4_wstrb ( int_wstrb[i] ),
- .s_axi4_wlast ( int_wlast[i] ),
- .s_axi4_wuser ( int_wuser[i] ),
- .m_axi4_wdata ( m0_axi4_wdata[i] ),
- .m_axi4_wvalid ( m0_axi4_wvalid[i] ),
- .m_axi4_wready ( m0_axi4_wready[i] ),
- .m_axi4_wstrb ( m0_axi4_wstrb[i] ),
- .m_axi4_wlast ( m0_axi4_wlast[i] ),
- .m_axi4_wuser ( m0_axi4_wuser[i] )
- );
-
- axi4_w_sender
- #(
- .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH )
-
- )
- u_w_sender_m1
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_wdata ( int_wdata[i] ),
- .s_axi4_wvalid ( int_m1_wvalid[i] ),
- .s_axi4_wready ( int_m1_wready[i] ),
- .s_axi4_wstrb ( int_wstrb[i] ),
- .s_axi4_wlast ( int_wlast[i] ),
- .s_axi4_wuser ( int_wuser[i] ),
- .m_axi4_wdata ( m1_axi4_wdata[i] ),
- .m_axi4_wvalid ( m1_axi4_wvalid[i] ),
- .m_axi4_wready ( m1_axi4_wready[i] ),
- .m_axi4_wstrb ( m1_axi4_wstrb[i] ),
- .m_axi4_wlast ( m1_axi4_wlast[i] ),
- .m_axi4_wuser ( m1_axi4_wuser[i] )
- );
-
- /*
- * Multiplexer to switch between the two output master ports on the write data (w) channel
- */
- always_comb begin
- /* Only one output can be selected at any time */
- if (w_master_select[i] == 1'b0) begin
- int_m0_wvalid[i] = int_wvalid[i];
- int_m1_wvalid[i] = 1'b0;
- int_wready[i] = int_m0_wready[i];
- end else begin
- int_m0_wvalid[i] = 1'b0;
- int_m1_wvalid[i] = int_wvalid[i];
- int_wready[i] = int_m1_wready[i];
- end
- end
-
- // }}}
-
- // Write Response channel (b) {{{
- /*
- * write response channel (b)
- *
- * ██╗ ██╗██████╗ ██╗████████╗███████╗ ██████╗ ███████╗███████╗██████╗
- * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔════╝██╔════╝██╔══██╗
- * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ██████╔╝█████╗ ███████╗██████╔╝
- * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██╔══██╗██╔══╝ ╚════██║██╔═══╝
- * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██║ ██║███████╗███████║██║
- * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝
- *
- */
- axi4_b_buffer
- #(
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH )
- )
- u_b_buffer_m0
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_bid ( int_m0_bid[i] ),
- .s_axi4_bresp ( int_m0_bresp[i] ),
- .s_axi4_bvalid ( int_m0_bvalid[i] ),
- .s_axi4_buser ( int_m0_buser[i] ),
- .s_axi4_bready ( int_m0_bready[i] ),
- .m_axi4_bid ( m0_axi4_bid[i] ),
- .m_axi4_bresp ( m0_axi4_bresp[i] ),
- .m_axi4_bvalid ( m0_axi4_bvalid[i] ),
- .m_axi4_buser ( m0_axi4_buser[i] ),
- .m_axi4_bready ( m0_axi4_bready[i] )
- );
-
- axi4_b_buffer
- #(
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH )
- )
- u_b_buffer_m1
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_bid ( int_m1_bid[i] ),
- .s_axi4_bresp ( int_m1_bresp[i] ),
- .s_axi4_bvalid ( int_m1_bvalid[i] ),
- .s_axi4_buser ( int_m1_buser[i] ),
- .s_axi4_bready ( int_m1_bready[i] ),
- .m_axi4_bid ( m1_axi4_bid[i] ),
- .m_axi4_bresp ( m1_axi4_bresp[i] ),
- .m_axi4_bvalid ( m1_axi4_bvalid[i] ),
- .m_axi4_buser ( m1_axi4_buser[i] ),
- .m_axi4_bready ( m1_axi4_bready[i] )
- );
-
- axi4_b_sender
- #(
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH )
- )
- u_b_sender
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .drop_i ( b_drop[i] ),
- .done_o ( b_done[i] ),
- .id_i ( b_id_drop[i] ),
- .prefetch_i ( b_prefetch_drop[i] ),
- .hit_i ( b_hit_drop[i] ),
- .s_axi4_bid ( s_axi4_bid[i] ),
- .s_axi4_bresp ( s_axi4_bresp[i] ),
- .s_axi4_bvalid ( s_axi4_bvalid[i] ),
- .s_axi4_buser ( s_axi4_buser[i] ),
- .s_axi4_bready ( s_axi4_bready[i] ),
- .m_axi4_bid ( int_bid[i] ),
- .m_axi4_bresp ( int_bresp[i] ),
- .m_axi4_bvalid ( int_bvalid[i] ),
- .m_axi4_buser ( int_buser[i] ),
- .m_axi4_bready ( int_bready[i] )
- );
-
- /*
- * Multiplexer to switch between the two output master ports on the write response (b) channel
- */
- always_comb begin
- /* Output 1 always gets priority, so if it has something to send connect
- it and let output 0 wait using rready = 0 */
- if (int_m1_bvalid[i] == 1'b1) begin
- int_m0_bready[i] = 1'b0;
- int_m1_bready[i] = int_bready[i];
-
- int_bid[i] = int_m1_bid[i];
- int_bresp[i] = int_m1_bresp[i];
- int_buser[i] = int_m1_buser[i];
- int_bvalid[i] = int_m1_bvalid[i];
- end else begin
- int_m0_bready[i] = int_bready[i];
- int_m1_bready[i] = 1'b0;
-
- int_bid[i] = int_m0_bid[i];
- int_bresp[i] = int_m0_bresp[i];
- int_buser[i] = int_m0_buser[i];
- int_bvalid[i] = int_m0_bvalid[i];
- end
- end
-
- // }}}
-
- // Read Address channel (ar) {{{
- /*
- * read address channel (ar)
- *
- * ██████╗ ███████╗ █████╗ ██████╗ █████╗ ██████╗ ██████╗ ██████╗
- * ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ██╔══██╗██╔══██╗██╔══██╗██╔══██╗
- * ██████╔╝█████╗ ███████║██║ ██║ ███████║██║ ██║██║ ██║██████╔╝
- * ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ██╔══██║██║ ██║██║ ██║██╔══██╗
- * ██║ ██║███████╗██║ ██║██████╔╝ ██║ ██║██████╔╝██████╔╝██║ ██║
- * ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝
- *
- */
- axi4_ar_buffer
- #(
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH )
- )
- u_ar_buffer
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_arid ( s_axi4_arid[i] ),
- .s_axi4_araddr ( s_axi4_araddr[i] ),
- .s_axi4_arvalid ( s_axi4_arvalid[i] ),
- .s_axi4_arready ( s_axi4_arready[i] ),
- .s_axi4_arlen ( s_axi4_arlen[i] ),
- .s_axi4_arsize ( s_axi4_arsize[i] ),
- .s_axi4_arburst ( s_axi4_arburst[i] ),
- .s_axi4_arlock ( s_axi4_arlock[i] ),
- .s_axi4_arprot ( s_axi4_arprot[i] ),
- .s_axi4_arcache ( s_axi4_arcache[i] ),
- .s_axi4_aruser ( s_axi4_aruser[i] ),
- .m_axi4_arid ( int_arid[i] ),
- .m_axi4_araddr ( int_araddr[i] ),
- .m_axi4_arvalid ( int_arvalid[i] ),
- .m_axi4_arready ( int_arready[i] ),
- .m_axi4_arlen ( int_arlen[i] ),
- .m_axi4_arsize ( int_arsize[i] ),
- .m_axi4_arburst ( int_arburst[i] ),
- .m_axi4_arlock ( int_arlock[i] ),
- .m_axi4_arprot ( int_arprot[i] ),
- .m_axi4_arcache ( int_arcache[i] ),
- .m_axi4_aruser ( int_aruser[i] )
- );
-
- axi4_ar_sender
- #(
- .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ),
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH ),
- .ENABLE_L2TLB ( ENABLE_L2TLB[i] )
- )
- u_ar_sender_m0
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .l1_done_o ( l1_m0_ar_done[i] ),
- .l1_accept_i ( l1_m0_ar_accept[i] ),
- .l1_drop_i ( l1_m0_ar_drop[i] ),
- .l1_save_i ( l1_m0_ar_save[i] ),
- .l2_done_o ( l2_m0_ar_done[i] ),
- .l2_accept_i ( l2_m0_ar_accept[i] ),
- .l2_drop_i ( l2_m0_ar_drop[i] ),
- .l2_sending_o ( l2_m0_ar_sending[i] ),
- .l1_araddr_i ( int_rtrans_addr[i] ),
- .l2_araddr_i ( l2_ar_addr[i] ),
- .s_axi4_arid ( int_arid[i] ),
- .s_axi4_arvalid ( int_m0_arvalid[i] ),
- .s_axi4_arready ( int_m0_arready[i] ),
- .s_axi4_arlen ( int_arlen[i] ),
- .s_axi4_arsize ( int_arsize[i] ),
- .s_axi4_arburst ( int_arburst[i] ),
- .s_axi4_arlock ( int_arlock[i] ),
- .s_axi4_arprot ( int_arprot[i] ),
- .s_axi4_arcache ( int_arcache[i] ),
- .s_axi4_aruser ( int_aruser[i] ),
- .m_axi4_arid ( m0_axi4_arid[i] ),
- .m_axi4_araddr ( m0_axi4_araddr[i] ),
- .m_axi4_arvalid ( m0_axi4_arvalid[i] ),
- .m_axi4_arready ( m0_axi4_arready[i] ),
- .m_axi4_arlen ( m0_axi4_arlen[i] ),
- .m_axi4_arsize ( m0_axi4_arsize[i] ),
- .m_axi4_arburst ( m0_axi4_arburst[i] ),
- .m_axi4_arlock ( m0_axi4_arlock[i] ),
- .m_axi4_arprot ( m0_axi4_arprot[i] ),
- .m_axi4_arcache ( ),
- .m_axi4_aruser ( m0_axi4_aruser[i] )
- );
-
- // The AXCACHE signals are set according to burstiness and cache coherence or statically
- // when not connected to ACP on Zynq (implemented below).
- assign m0_read_is_burst[i] = (m0_axi4_arlen[i] != {8{1'b0}}) && (m0_axi4_arburst[i] != 2'b00);
- `ifndef EN_ACP
- always_comb begin
- if ( (l2_m0_ar_sending[i] & l2_cache_coherent[i]) | int_rtrans_cache_coherent[i]) begin
- if (m0_read_is_burst[i]) begin
- m0_axi4_arcache[i] = 4'b1011;
- end else begin
- m0_axi4_arcache[i] = 4'b1111;
- end
- end else begin
- m0_axi4_arcache[i] = 4'b0011;
- end
- end
- `else
- assign m0_axi4_arcache[i] = 4'b0011;
- `endif
-
- axi4_ar_sender
- #(
- .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ),
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH ),
- .ENABLE_L2TLB ( ENABLE_L2TLB[i] )
- )
- u_ar_sender_m1
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .l1_done_o ( l1_m1_ar_done[i] ),
- .l1_accept_i ( l1_m1_ar_accept[i] ),
- .l1_drop_i ( l1_m1_ar_drop[i] ),
- .l1_save_i ( l1_m1_ar_save[i] ),
- .l2_done_o ( l2_m1_ar_done[i] ),
- .l2_accept_i ( l2_m1_ar_accept[i] ),
- .l2_drop_i ( l2_m1_ar_drop[i] ),
- .l2_sending_o ( ), // just helps to set axcache
- .l1_araddr_i ( int_rtrans_addr[i] ),
- .l2_araddr_i ( l2_ar_addr[i] ),
- .s_axi4_arid ( int_arid[i] ),
- .s_axi4_arvalid ( int_m1_arvalid[i] ),
- .s_axi4_arready ( int_m1_arready[i] ),
- .s_axi4_arlen ( int_arlen[i] ),
- .s_axi4_arsize ( int_arsize[i] ),
- .s_axi4_arburst ( int_arburst[i] ),
- .s_axi4_arlock ( int_arlock[i] ),
- .s_axi4_arprot ( int_arprot[i] ),
- .s_axi4_arcache ( int_arcache[i] ),
- .s_axi4_aruser ( int_aruser[i] ),
- .m_axi4_arid ( m1_axi4_arid[i] ),
- .m_axi4_araddr ( m1_axi4_araddr[i] ),
- .m_axi4_arvalid ( m1_axi4_arvalid[i] ),
- .m_axi4_arready ( m1_axi4_arready[i] ),
- .m_axi4_arlen ( m1_axi4_arlen[i] ),
- .m_axi4_arsize ( m1_axi4_arsize[i] ),
- .m_axi4_arburst ( m1_axi4_arburst[i] ),
- .m_axi4_arlock ( m1_axi4_arlock[i] ),
- .m_axi4_arprot ( m1_axi4_arprot[i] ),
- .m_axi4_arcache ( ),
- .m_axi4_aruser ( m1_axi4_aruser[i] )
- );
-
- // The AXCACHE signals are set according to burstiness and cache coherence or statically
- // when not connected to ACP on Zynq (implemented below).
- assign m1_read_is_burst[i] = (m1_axi4_arlen[i] != {8{1'b0}}) && (m1_axi4_arburst[i] != 2'b00);
- `ifdef EN_ACP
- always_comb begin
- if (m1_read_is_burst[i]) begin
- m1_axi4_arcache[i] = 4'b1011;
- end else begin
- m1_axi4_arcache[i] = 4'b1111;
- end
- end
- `else
- assign m1_axi4_arcache[i] = 4'b0011;
- `endif
-
- // }}}
-
- // Read Response channel (r) {{{
- /*
- * read response channel (r)
- *
- * ██████╗ ███████╗ █████╗ ██████╗ ██████╗ ███████╗███████╗██████╗
- * ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ██╔══██╗██╔════╝██╔════╝██╔══██╗
- * ██████╔╝█████╗ ███████║██║ ██║ ██████╔╝█████╗ ███████╗██████╔╝
- * ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ██╔══██╗██╔══╝ ╚════██║██╔═══╝
- * ██║ ██║███████╗██║ ██║██████╔╝ ██║ ██║███████╗███████║██║
- * ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝
- *
- */
- axi4_r_buffer
- #(
- .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH )
- )
- u_r_buffer_m0
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_rid ( int_m0_rid[i] ),
- .s_axi4_rresp ( int_m0_rresp[i] ),
- .s_axi4_rdata ( int_m0_rdata[i] ),
- .s_axi4_rlast ( int_m0_rlast[i] ),
- .s_axi4_rvalid ( int_m0_rvalid[i] ),
- .s_axi4_ruser ( int_m0_ruser[i] ),
- .s_axi4_rready ( int_m0_rready[i] ),
- .m_axi4_rid ( m0_axi4_rid[i] ),
- .m_axi4_rresp ( m0_axi4_rresp[i] ),
- .m_axi4_rdata ( m0_axi4_rdata[i] ),
- .m_axi4_rlast ( m0_axi4_rlast[i] ),
- .m_axi4_rvalid ( m0_axi4_rvalid[i] ),
- .m_axi4_ruser ( m0_axi4_ruser[i] ),
- .m_axi4_rready ( m0_axi4_rready[i] )
- );
-
- axi4_r_buffer
- #(
- .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH )
- )
- u_r_buffer_m1
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_rid ( int_m1_rid[i] ),
- .s_axi4_rresp ( int_m1_rresp[i] ),
- .s_axi4_rdata ( int_m1_rdata[i] ),
- .s_axi4_rlast ( int_m1_rlast[i] ),
- .s_axi4_rvalid ( int_m1_rvalid[i] ),
- .s_axi4_ruser ( int_m1_ruser[i] ),
- .s_axi4_rready ( int_m1_rready[i] ),
- .m_axi4_rid ( m1_axi4_rid[i] ),
- .m_axi4_rresp ( m1_axi4_rresp[i] ),
- .m_axi4_rdata ( m1_axi4_rdata[i] ),
- .m_axi4_rlast ( m1_axi4_rlast[i] ),
- .m_axi4_rvalid ( m1_axi4_rvalid[i] ),
- .m_axi4_ruser ( m1_axi4_ruser[i] ),
- .m_axi4_rready ( m1_axi4_rready[i] )
- );
-
- axi4_r_sender
- #(
- .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH )
- )
- u_r_sender
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .drop_i ( lx_r_drop[i] ),
- .drop_len_i ( lx_len_drop[i] ),
- .done_o ( lx_r_done[i] ),
- .id_i ( lx_id_drop[i] ),
- .prefetch_i ( lx_prefetch_drop[i] ),
- .hit_i ( lx_hit_drop[i] ),
- .s_axi4_rid ( s_axi4_rid[i] ),
- .s_axi4_rresp ( s_axi4_rresp[i] ),
- .s_axi4_rdata ( s_axi4_rdata[i] ),
- .s_axi4_rlast ( s_axi4_rlast[i] ),
- .s_axi4_rvalid ( s_axi4_rvalid[i] ),
- .s_axi4_ruser ( s_axi4_ruser[i] ),
- .s_axi4_rready ( s_axi4_rready[i] ),
- .m_axi4_rid ( int_rid[i] ),
- .m_axi4_rresp ( int_rresp[i] ),
- .m_axi4_rdata ( int_rdata[i] ),
- .m_axi4_rlast ( int_rlast[i] ),
- .m_axi4_rvalid ( int_rvalid[i] ),
- .m_axi4_ruser ( int_ruser[i] ),
- .m_axi4_rready ( int_rready[i] )
- );
-
- /*
- * Multiplexer to switch between the two output master ports on the read response(r) channel
- *
- * Do not perform read burst interleaving as the DMA does not support it. This means we can only
- * switch between the two masters upon sending rlast or when idle.
- *
- * However, if the downstream already performs burst interleaving, this cannot be undone here.
- * Also, the downstream may interleave a burst reponse with a single-beat transaction. In this
- * case, the FSM below falls out of the burst mode. To avoid it performing burst interleaving
- * after such an event, it gives priority to the master which received the last burst in case
- * both have a have a burst ready (rvalid).
- *
- * Order of priority:
- * 1. Ongoing burst transaction
- * 2. Single-beat transaction on Master 1.
- * 3. Single-beat transaction on Master 0.
- * 4. Burst transaction on master that received the last burst.
- */
- // Select signal
- always_ff @(posedge Clk_CI) begin
- if (Rst_RBI == 0) begin
- RRespSel_SP[i] <= 1'b0;
- end else begin
- RRespSel_SP[i] <= RRespSel_SN[i];
- end
- end
-
- // FSM
- always_comb begin : RRespMuxFsm
- RRespMuxCtrl_SN[i] = RRespMuxCtrl_SP[i];
- RRespSel_SN[i] = RRespSel_SP[i];
-
- RRespBurst_S[i] = 1'b0;
- RRespSelIm_S[i] = 1'b0;
-
- unique case (RRespMuxCtrl_SP[i])
-
- IDLE: begin
- // immediately forward single-beat transactions
- if (int_m1_rvalid[i] && int_m1_rlast[i])
- RRespSelIm_S[i] = 1'b1;
- else if (int_m0_rvalid[i] && int_m0_rlast[i])
- RRespSelIm_S[i] = 1'b0;
-
- // bursts - they also start immediately
- else if (int_m1_rvalid[i] || int_m0_rvalid[i]) begin
- RRespMuxCtrl_SN[i] = BUSY;
-
- // in case both are ready, continue with the master that had the last burst
- if (int_m1_rvalid[i] && int_m0_rvalid[i]) begin
- RRespSel_SN[i] = RRespSel_SP[i];
- RRespSelIm_S[i] = RRespSel_SP[i];
- end else if (int_m1_rvalid[i]) begin
- RRespSel_SN[i] = 1'b1;
- RRespSelIm_S[i] = 1'b1;
- end else begin
- RRespSel_SN[i] = 1'b0;
- RRespSelIm_S[i] = 1'b0;
- end
- end
- end
-
- BUSY: begin
- RRespBurst_S[i] = 1'b1;
- // detect last handshake of currently ongoing transfer
- if (int_rvalid[i] && int_rready[i] && int_rlast[i])
- RRespMuxCtrl_SN[i] = IDLE;
- end
-
- default: begin
- RRespMuxCtrl_SN[i] = IDLE;
- end
-
- endcase
- end
-
- // FSM state
- always_ff @(posedge Clk_CI) begin
- if (Rst_RBI == 0) begin
- RRespMuxCtrl_SP[i] <= IDLE;
- end else begin
- RRespMuxCtrl_SP[i] <= RRespMuxCtrl_SN[i];
- end
- end
-
- // Actual multiplexer
- always_comb begin
- if ( (RRespBurst_S[i] && RRespSel_SP[i]) || (!RRespBurst_S[i] && RRespSelIm_S[i]) ) begin
- int_m0_rready[i] = 1'b0;
- int_m1_rready[i] = int_rready[i];
-
- int_rid[i] = int_m1_rid[i];
- int_rresp[i] = int_m1_rresp[i];
- int_rdata[i] = int_m1_rdata[i];
- int_rlast[i] = int_m1_rlast[i];
- int_ruser[i] = int_m1_ruser[i];
- int_rvalid[i] = int_m1_rvalid[i];
- end else begin
- int_m0_rready[i] = int_rready[i];
- int_m1_rready[i] = 1'b0;
-
- int_rid[i] = int_m0_rid[i];
- int_rresp[i] = int_m0_rresp[i];
- int_rdata[i] = int_m0_rdata[i];
- int_rlast[i] = int_m0_rlast[i];
- int_ruser[i] = int_m0_ruser[i];
- int_rvalid[i] = int_m0_rvalid[i];
- end
- end
-
- end // BUF & SEND
-
- // }}}
-
- endgenerate // BUF & SEND }}}
-
- // Log {{{
-
-`ifdef RAB_AX_LOG_EN
- AxiBramLogger
- #(
- .AXI_ID_BITW ( AXI_ID_WIDTH ),
- .AXI_ADDR_BITW ( AXI_S_ADDR_WIDTH ),
- .NUM_LOG_ENTRIES ( `RAB_AX_LOG_ENTRIES )
- )
- u_aw_logger
- (
- .Clk_CI ( NonGatedClk_CI ),
- .TimestampClk_CI ( Clk_CI ),
- .Rst_RBI ( Rst_RBI ),
- .AxiValid_SI ( s_axi4_awvalid[1] ),
- .AxiReady_SI ( s_axi4_awready[1] ),
- .AxiId_DI ( s_axi4_awid[1] ),
- .AxiAddr_DI ( s_axi4_awaddr[1] ),
- .AxiLen_DI ( s_axi4_awlen[1] ),
- .Clear_SI ( AwLogClr_SI ),
- .LogEn_SI ( LogEn_SI ),
- .Full_SO ( int_aw_log_full ),
- .Ready_SO ( AwLogRdy_SO ),
- .Bram_PS ( AwBram_PS )
- );
-
- AxiBramLogger
- #(
- .AXI_ID_BITW ( AXI_ID_WIDTH ),
- .AXI_ADDR_BITW ( AXI_S_ADDR_WIDTH ),
- .NUM_LOG_ENTRIES ( `RAB_AX_LOG_ENTRIES )
- )
- u_ar_logger
- (
- .Clk_CI ( NonGatedClk_CI ),
- .TimestampClk_CI ( Clk_CI ),
- .Rst_RBI ( Rst_RBI ),
- .AxiValid_SI ( s_axi4_arvalid[1] ),
- .AxiReady_SI ( s_axi4_arready[1] ),
- .AxiId_DI ( s_axi4_arid[1] ),
- .AxiAddr_DI ( s_axi4_araddr[1] ),
- .AxiLen_DI ( s_axi4_arlen[1] ),
- .Clear_SI ( ArLogClr_SI ),
- .LogEn_SI ( LogEn_SI ),
- .Full_SO ( int_ar_log_full ),
- .Ready_SO ( ArLogRdy_SO ),
- .Bram_PS ( ArBram_PS )
- );
-`endif
-
- // }}}
-
- // RAB Core {{{
- // ██████╗ █████╗ ██████╗ ██████╗ ██████╗ ██████╗ ███████╗
- // ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔═══██╗██╔══██╗██╔════╝
- // ██████╔╝███████║██████╔╝ ██║ ██║ ██║██████╔╝█████╗
- // ██╔══██╗██╔══██║██╔══██╗ ██║ ██║ ██║██╔══██╗██╔══╝
- // ██║ ██║██║ ██║██████╔╝ ╚██████╗╚██████╔╝██║ ██║███████╗
- // ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝
- //
- /*
- * rab_core
- *
- * The rab core translates addresses. It has two ports, which can be used
- * independently, however they will compete for time internally, as lookups
- * are serialized.
- *
- * type is the read(0) or write(1) used to check the protection flags. If they
- * don't match an interrupt is created on the int_prot line.
- */
-
- rab_core
- #(
- .N_PORTS ( N_PORTS ),
- .N_L2_SETS ( N_L2_SETS ),
- .N_L2_SET_ENTRIES ( N_L2_SET_ENTRIES ),
- .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
- .AXI_S_ADDR_WIDTH ( AXI_S_ADDR_WIDTH ),
- .AXI_M_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ),
- .AXI_LITE_DATA_WIDTH ( AXI_LITE_DATA_WIDTH ),
- .AXI_LITE_ADDR_WIDTH ( AXI_LITE_ADDR_WIDTH ),
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH ),
- .MH_FIFO_DEPTH ( MH_FIFO_DEPTH )
- )
- u_rab_core
- (
- .Clk_CI ( Clk_CI ),
- .Rst_RBI ( Rst_RBI ),
-
- // Config IF
- .s_axi_awaddr ( s_axi4lite_awaddr ),
- .s_axi_awvalid ( s_axi4lite_awvalid ),
- .s_axi_awready ( s_axi4lite_awready ),
- .s_axi_wdata ( s_axi4lite_wdata ),
- .s_axi_wstrb ( s_axi4lite_wstrb ),
- .s_axi_wvalid ( s_axi4lite_wvalid ),
- .s_axi_wready ( s_axi4lite_wready ),
- .s_axi_bresp ( s_axi4lite_bresp ),
- .s_axi_bvalid ( s_axi4lite_bvalid ),
- .s_axi_bready ( s_axi4lite_bready ),
- .s_axi_araddr ( s_axi4lite_araddr ),
- .s_axi_arvalid ( s_axi4lite_arvalid ),
- .s_axi_arready ( s_axi4lite_arready ),
- .s_axi_rready ( s_axi4lite_rready ),
- .s_axi_rdata ( s_axi4lite_rdata ),
- .s_axi_rresp ( s_axi4lite_rresp ),
- .s_axi_rvalid ( s_axi4lite_rvalid ),
-
- // L1 miss info outputs -> L2 TLB arbitration
- .int_miss ( rab_miss ),
- .int_multi ( rab_multi ),
- .int_prot ( rab_prot ),
- .int_prefetch ( rab_prefetch ),
- .int_mhf_full ( int_mhf_full ),
-
- // L1 transaction info outputs -> L2 TLB arbitration
- .int_axaddr_o ( L1OutAddr_D ),
- .int_axid_o ( L1OutId_D ),
- .int_axlen_o ( L1OutLen_D ),
- .int_axuser_o ( L1OutUser_D ),
-
- // Write Req IF
- .port1_addr ( int_awaddr ),
- .port1_id ( int_awid ),
- .port1_len ( int_awlen ),
- .port1_size ( int_awsize ),
- .port1_addr_valid ( int_awvalid & ~aw_in_stall ), // avoid the FSM accepting new AW requests
- .port1_type ( {N_PORTS{1'b1}} ),
- .port1_user ( int_awuser ),
- .port1_sent ( int_wtrans_sent ), // signal done to L1 FSM
- .port1_out_addr ( int_wtrans_addr ),
- .port1_cache_coherent ( int_wtrans_cache_coherent ),
- .port1_accept ( int_wtrans_accept ),
- .port1_drop ( int_wtrans_drop ),
- .port1_miss ( int_wtrans_miss ),
-
- // Read Req IF
- .port2_addr ( int_araddr ),
- .port2_id ( int_arid ),
- .port2_len ( int_arlen ),
- .port2_size ( int_arsize ),
- .port2_addr_valid ( int_arvalid ),
- .port2_type ( {N_PORTS{1'b0}} ),
- .port2_user ( int_aruser ),
- .port2_sent ( int_rtrans_sent ), // signal done to L1 FSM
- .port2_out_addr ( int_rtrans_addr ),
- .port2_cache_coherent ( int_rtrans_cache_coherent ),
- .port2_accept ( int_rtrans_accept ),
- .port2_drop ( int_rtrans_drop ),
- .port2_miss ( int_rtrans_miss ),
-
- // L2 miss info inputs -> axi_rab_cfg
- .miss_l2_i ( L2Miss_S ),
- .miss_l2_addr_i ( L2OutInAddr_DP ),
- .miss_l2_id_i ( L2OutId_DP ),
- .miss_l2_user_i ( L2OutUser_DP ),
-
- // L2 config outputs
- .wdata_l2_o ( L2CfgWData_D ),
- .waddr_l2_o ( L2CfgWAddr_D ),
- .wren_l2_o ( L2CfgWE_S )
- );
-
- // }}}
-
- // AX SPLITS {{{
- // █████╗ ██╗ ██╗ ███████╗██████╗ ██╗ ██╗████████╗
- // ██╔══██╗╚██╗██╔╝ ██╔════╝██╔══██╗██║ ██║╚══██╔══╝
- // ███████║ ╚███╔╝ ███████╗██████╔╝██║ ██║ ██║
- // ██╔══██║ ██╔██╗ ╚════██║██╔═══╝ ██║ ██║ ██║
- // ██║ ██║██╔╝ ██╗ ███████║██║ ███████╗██║ ██║
- // ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚══════╝╚═╝ ╚═╝
- //
- /**
- * Multiplex the two output master ports of the Read Address and Write Address (AR/AW) channels.
- *
- * Use the `int_xmaster_select` signal to route the signals to either Master 0 (to memory) or
- * Master 1 (to ACP). In case of an L1 miss: Route the signals to both masters. They shall be
- * saved until the L2 outputs are available.
- */
- generate for (i = 0; i < N_PORTS; i++) begin : AX_SPLIT
-
- /*
- * When accepting L1 transactions, we must just do so on the selected master. Drop requests must
- * be performed on any one of the two masters. Save requests must be performed by both masters.
- */
- always_comb begin : AW_L1_SPLIT
-
- // TLB handshake
- l1_m0_aw_accept[i] = 1'b0;
- l1_m1_aw_accept[i] = 1'b0;
- l1_m0_aw_drop[i] = 1'b0;
- l1_m1_aw_drop[i] = 1'b0;
- l1_m0_aw_save[i] = 1'b0;
- l1_m1_aw_save[i] = 1'b0;
-
- l1_mx_aw_done[i] = 1'b0;
-
- // AXI sender input handshake
- int_m0_awvalid[i] = 1'b0;
- int_m1_awvalid[i] = 1'b0;
- int_awready[i] = 1'b0;
-
- // accept on selected master only
- if (l1_aw_accept[i]) begin
- if (int_wmaster_select[i]) begin
- l1_m1_aw_accept[i] = 1'b1;
- l1_mx_aw_done[i] = l1_m1_aw_done[i];
-
- int_m1_awvalid[i] = int_awvalid[i];
- int_awready[i] = int_m1_awready[i];
-
- end else begin
- l1_m0_aw_accept[i] = 1'b1;
- l1_mx_aw_done[i] = l1_m0_aw_done[i];
-
- int_m0_awvalid[i] = int_awvalid[i];
- int_awready[i] = int_m0_awready[i];
- end
-
- // drop on Master 0 only
- end else if (l1_aw_drop[i]) begin
- l1_m0_aw_drop[i] = 1'b1;
- l1_mx_aw_done[i] = l1_m0_aw_done[i];
-
- int_m0_awvalid[i] = int_awvalid[i];
- int_awready[i] = l1_m0_aw_done[i];
-
- // save on both masters
- end else if (l1_aw_save[i]) begin
- // split save
- l1_m0_aw_save[i] = ~l1_m0_aw_done_SP[i];
- l1_m1_aw_save[i] = ~l1_m1_aw_done_SP[i];
-
- // combine done
- l1_mx_aw_done[i] = l1_m0_aw_done_SP[i] & l1_m1_aw_done_SP[i];
-
- int_m0_awvalid[i] = int_awvalid[i];
- int_m1_awvalid[i] = int_awvalid[i];
- int_awready[i] = l1_mx_aw_done[i];
- end
- end
-
- // signal back to handshake splitter
- assign l1_aw_done[i] = l1_mx_aw_done[i];
-
- always_ff @(posedge Clk_CI) begin : L1_MX_AW_DONE_REG
- if (Rst_RBI == 0) begin
- l1_m0_aw_done_SP[i] <= 1'b0;
- l1_m1_aw_done_SP[i] <= 1'b0;
- end else if (l1_mx_aw_done[i]) begin
- l1_m0_aw_done_SP[i] <= 1'b0;
- l1_m1_aw_done_SP[i] <= 1'b0;
- end else begin
- l1_m0_aw_done_SP[i] <= l1_m0_aw_done_SP[i] | l1_m0_aw_done[i];
- l1_m1_aw_done_SP[i] <= l1_m1_aw_done_SP[i] | l1_m1_aw_done[i];
- end
- end
-
- /*
- * When accepting L2 transactions, we must drop the corresponding transaction from the other
- * master to make it available again for save requests from L1_DROP_SAVE.
- */
- always_comb begin : AW_L2_SPLIT
-
- l2_m0_aw_accept[i] = 1'b0;
- l2_m1_aw_accept[i] = 1'b0;
- l2_m0_aw_drop[i] = 1'b0;
- l2_m1_aw_drop[i] = 1'b0;
-
- // de-assert request signals individually upon handshakes
- if (l2_aw_accept[i]) begin
- if (l2_master_select[i]) begin
- l2_m1_aw_accept[i] = ~l2_m1_aw_done_SP[i];
- l2_m0_aw_drop[i] = ~l2_m0_aw_done_SP[i];
-
- end else begin
- l2_m0_aw_accept[i] = ~l2_m0_aw_done_SP[i];
- l2_m1_aw_drop[i] = ~l2_m1_aw_done_SP[i];
-
- end
- end else begin
- l2_m0_aw_drop[i] = ~l2_m0_aw_done_SP[i] ? l2_aw_drop[i] : 1'b0;
- l2_m1_aw_drop[i] = ~l2_m1_aw_done_SP[i] ? l2_aw_drop[i] : 1'b0;
-
- end
-
- // combine done
- l2_mx_aw_done[i] = l2_m0_aw_done_SP[i] & l2_m1_aw_done_SP[i];
-
- l2_aw_done[i] = l2_mx_aw_done[i];
- end
-
- always_ff @(posedge Clk_CI) begin : L2_MX_AW_DONE_REG
- if (Rst_RBI == 0) begin
- l2_m0_aw_done_SP[i] <= 1'b0;
- l2_m1_aw_done_SP[i] <= 1'b0;
- end else if (l2_mx_aw_done[i]) begin
- l2_m0_aw_done_SP[i] <= 1'b0;
- l2_m1_aw_done_SP[i] <= 1'b0;
- end else begin
- l2_m0_aw_done_SP[i] <= l2_m0_aw_done_SP[i] | l2_m0_aw_done[i];
- l2_m1_aw_done_SP[i] <= l2_m1_aw_done_SP[i] | l2_m1_aw_done[i];
- end
- end
-
- /*
- * When accepting L1 transactions, we must just do so on the selected master. Drop requests must
- * be performed on any one of the two masters. Save requests must be performed by both masters.
- */
- always_comb begin : AR_L1_SPLIT
-
- // TLB handshake
- l1_m0_ar_accept[i] = 1'b0;
- l1_m1_ar_accept[i] = 1'b0;
- l1_m0_ar_drop[i] = 1'b0;
- l1_m1_ar_drop[i] = 1'b0;
- l1_m0_ar_save[i] = 1'b0;
- l1_m1_ar_save[i] = 1'b0;
-
- l1_mx_ar_done[i] = 1'b0;
-
- // AXI sender input handshake
- int_m0_arvalid[i] = 1'b0;
- int_m1_arvalid[i] = 1'b0;
- int_arready[i] = 1'b0;
-
- // accept on selected master only
- if (l1_ar_accept[i]) begin
- if (int_rmaster_select[i]) begin
- l1_m1_ar_accept[i] = 1'b1;
- l1_mx_ar_done[i] = l1_m1_ar_done[i];
-
- int_m1_arvalid[i] = int_arvalid[i];
- int_arready[i] = int_m1_arready[i];
-
- end else begin
- l1_m0_ar_accept[i] = 1'b1;
- l1_mx_ar_done[i] = l1_m0_ar_done[i];
-
- int_m0_arvalid[i] = int_arvalid[i];
- int_arready[i] = int_m0_arready[i];
- end
-
- // drop on Master 0 only
- end else if (l1_ar_drop[i]) begin
- l1_m0_ar_drop[i] = 1'b1;
- l1_mx_ar_done[i] = l1_m0_ar_done[i];
-
- int_m0_arvalid[i] = int_arvalid[i];
- int_arready[i] = l1_m0_ar_done[i];
-
- // save on both masters
- end else if (l1_ar_save[i]) begin
- // split save
- l1_m0_ar_save[i] = ~l1_m0_ar_done_SP[i];
- l1_m1_ar_save[i] = ~l1_m1_ar_done_SP[i];
-
- // combine done
- l1_mx_ar_done[i] = l1_m0_ar_done_SP[i] & l1_m1_ar_done_SP[i];
-
- int_m0_arvalid[i] = int_arvalid[i];
- int_m1_arvalid[i] = int_arvalid[i];
- int_arready[i] = l1_mx_ar_done[i];
- end
- end
-
- // signal back to handshake splitter
- assign l1_ar_done[i] = l1_mx_ar_done[i];
-
- always_ff @(posedge Clk_CI) begin : L1_MX_AR_DONE_REG
- if (Rst_RBI == 0) begin
- l1_m0_ar_done_SP[i] <= 1'b0;
- l1_m1_ar_done_SP[i] <= 1'b0;
- end else if (l1_mx_ar_done[i]) begin
- l1_m0_ar_done_SP[i] <= 1'b0;
- l1_m1_ar_done_SP[i] <= 1'b0;
- end else begin
- l1_m0_ar_done_SP[i] <= l1_m0_ar_done_SP[i] | l1_m0_ar_done[i];
- l1_m1_ar_done_SP[i] <= l1_m1_ar_done_SP[i] | l1_m1_ar_done[i];
- end
- end
-
- /*
- * When accepting L2 transactions, we must drop the corresponding transaction from the other
- * master to make it available again for save requests from L1_DROP_SAVE.
- */
- always_comb begin : AR_L2_SPLIT
-
- l2_m0_ar_accept[i] = 1'b0;
- l2_m1_ar_accept[i] = 1'b0;
- l2_m0_ar_drop[i] = 1'b0;
- l2_m1_ar_drop[i] = 1'b0;
-
- // de-assert request signals individually upon handshakes
- if (l2_ar_accept[i]) begin
- if (l2_master_select[i]) begin
- l2_m1_ar_accept[i] = ~l2_m1_ar_done_SP[i];
- l2_m0_ar_drop[i] = ~l2_m0_ar_done_SP[i];
-
- end else begin
- l2_m0_ar_accept[i] = ~l2_m0_ar_done_SP[i];
- l2_m1_ar_drop[i] = ~l2_m1_ar_done_SP[i];
-
- end
- end else if (l2_ar_drop[i]) begin
- l2_m0_ar_drop[i] = ~l2_m0_ar_done_SP[i] ? l2_ar_drop[i] : 1'b0;
- l2_m1_ar_drop[i] = ~l2_m1_ar_done_SP[i] ? l2_ar_drop[i] : 1'b0;
-
- end
-
- // combine done
- l2_mx_ar_done[i] = l2_m0_ar_done_SP[i] & l2_m1_ar_done_SP[i];
-
- l2_ar_done[i] = l2_mx_ar_done[i];
- end
-
- always_ff @(posedge Clk_CI) begin : L2_MX_AR_DONE_REG
- if (Rst_RBI == 0) begin
- l2_m0_ar_done_SP[i] <= 1'b0;
- l2_m1_ar_done_SP[i] <= 1'b0;
- end else if (l2_mx_ar_done[i]) begin
- l2_m0_ar_done_SP[i] <= 1'b0;
- l2_m1_ar_done_SP[i] <= 1'b0;
- end else begin
- l2_m0_ar_done_SP[i] <= l2_m0_ar_done_SP[i] | l2_m0_ar_done[i];
- l2_m1_ar_done_SP[i] <= l2_m1_ar_done_SP[i] | l2_m1_ar_done[i];
- end
- end
-
- end // AX_SPLIT
- endgenerate // AX_SPLIT
-
- // }}}
-
- // HANDSHAKE SPLITS {{{
- // ██╗ ██╗███████╗ ███████╗██████╗ ██╗ ██╗████████╗
- // ██║ ██║██╔════╝ ██╔════╝██╔══██╗██║ ██║╚══██╔══╝
- // ███████║███████╗ ███████╗██████╔╝██║ ██║ ██║
- // ██╔══██║╚════██║ ╚════██║██╔═══╝ ██║ ██║ ██║
- // ██║ ██║███████║ ███████║██║ ███████╗██║ ██║
- // ╚═╝ ╚═╝╚══════╝ ╚══════╝╚═╝ ╚══════╝╚═╝ ╚═╝
- //
- /*
- * We need to perform combined handshakes with multiple AXI modules
- * upon transactions drops, accepts, saves etc. from two TLBs.
- */
- generate for (i = 0; i < N_PORTS; i++) begin : HANDSHAKE_SPLIT
-
- assign l1_xw_accept[i] = int_wtrans_accept[i] & ~aw_out_stall[i];
- assign int_wtrans_sent[i] = l1_xw_done[i];
-
- assign l1_ar_accept[i] = int_rtrans_accept[i];
- assign int_rtrans_sent[i] = l1_ar_done[i];
-
- /*
- * L1 AW sender + W buffer handshake split
- */
- // forward
- assign l1_aw_accept[i] = l1_xw_accept[i] & ~l1_aw_done_SP[i];
- assign l1_w_accept[i] = l1_xw_accept[i] & ~l1_w_done_SP[i];
-
- assign l1_aw_save[i] = l1_xw_save[i] & ~l1_aw_done_SP[i];
- assign l1_w_save[i] = l1_xw_save[i] & ~l1_w_done_SP[i];
-
- assign l1_aw_drop[i] = l1_xw_drop[i] & ~l1_aw_done_SP[i];
- assign l1_w_drop[i] = l1_xw_drop[i] & ~l1_w_done_SP[i];
-
- // backward
- assign l1_xw_done[i] = l1_aw_done_SP[i] & l1_w_done_SP[i];
-
- always_ff @(posedge Clk_CI) begin : L1_XW_HS_SPLIT
- if (Rst_RBI == 0) begin
- l1_aw_done_SP[i] <= 1'b0;
- l1_w_done_SP[i] <= 1'b0;
- end else if (l1_xw_done[i]) begin
- l1_aw_done_SP[i] <= 1'b0;
- l1_w_done_SP[i] <= 1'b0;
- end else begin
- l1_aw_done_SP[i] <= l1_aw_done_SP[i] | l1_aw_done[i];
- l1_w_done_SP[i] <= l1_w_done_SP[i] | l1_w_done[i];
- end
- end
-
- if (ENABLE_L2TLB[i] == 1) begin : L2_HS_SPLIT
-
- /*
- * L1 AR sender + R sender handshake split
- *
- * AR and R do not need to be strictly in sync. We thus use separate handshakes.
- * But the handshake signals for the R sender are multiplexed with the those for
- * the L2. However, L2_ACCEPT_DROP_SAVE has always higher priority.
- */
- assign lx_r_drop[i] = l2_r_drop[i] | l1_r_drop[i];
- assign l1_r_done[i] = l2_r_drop[i] ? 1'b0 : lx_r_done[i];
- assign l2_r_done[i] = l2_r_drop[i] ? lx_r_done[i] : 1'b0;
-
- /*
- * L2 AW sender + W buffer handshake split
- */
- // forward
- assign l2_aw_accept[i] = l2_xw_accept[i] & ~l2_aw_done_SP[i];
- assign l2_w_accept[i] = l2_xw_accept[i] & ~l2_w_done_SP[i];
-
- assign l2_aw_drop[i] = l2_xw_drop[i] & ~l2_aw_done_SP[i];
- assign l2_w_drop[i] = l2_xw_drop[i] & ~l2_w_done_SP[i];
-
- // backward
- assign l2_xw_done[i] = l2_aw_done_SP[i] & l2_w_done_SP[i];
-
- always_ff @(posedge Clk_CI) begin : L2_XW_HS_SPLIT
- if (Rst_RBI == 0) begin
- l2_aw_done_SP[i] <= 1'b0;
- l2_w_done_SP[i] <= 1'b0;
- end else if (l2_xw_done[i]) begin
- l2_aw_done_SP[i] <= 1'b0;
- l2_w_done_SP[i] <= 1'b0;
- end else begin
- l2_aw_done_SP[i] <= l2_aw_done_SP[i] | l2_aw_done[i];
- l2_w_done_SP[i] <= l2_w_done_SP[i] | l2_w_done[i];
- end
- end
-
- /*
- * L2 AR + R sender handshake split
- */
- // forward
- assign l2_ar_drop[i] = l2_xr_drop[i] & ~l2_ar_done_SP[i];
- assign l2_r_drop[i] = l2_xr_drop[i] & ~l2_r_done_SP[i];
-
- // backward - make sure to always clear L2_XR_HS_SPLIT
- always_comb begin
- if (l2_xr_drop[i]) begin
- l2_xr_done[i] = l2_ar_done_SP[i] & l2_r_done_SP[i];
- end else begin
- l2_xr_done[i] = l2_ar_done_SP[i];
- end
- end
-
- always_ff @(posedge Clk_CI) begin : L2_XR_HS_SPLIT
- if (Rst_RBI == 0) begin
- l2_ar_done_SP[i] <= 1'b0;
- l2_r_done_SP[i] <= 1'b0;
- end else if (l2_xr_done[i]) begin
- l2_ar_done_SP[i] <= 1'b0;
- l2_r_done_SP[i] <= 1'b0;
- end else begin
- l2_ar_done_SP[i] <= l2_ar_done_SP[i] | l2_ar_done[i];
- l2_r_done_SP[i] <= l2_r_done_SP[i] | l2_r_done[i];
- end
- end
-
- end else begin // if (ENABLE_L2TLB[i] == 1)
-
- assign lx_r_drop[i] = l1_r_drop[i];
- assign l1_r_done[i] = lx_r_done[i];
-
- assign l2_aw_accept[i] = 1'b0;
- assign l2_w_accept[i] = 1'b0;
- assign l2_aw_drop[i] = 1'b0;
- assign l2_w_drop[i] = 1'b0;
- assign l2_xw_done[i] = 1'b0;
- assign l2_aw_done_SP[i] = 1'b0;
- assign l2_w_done_SP[i] = 1'b0;
-
- assign l2_ar_accept[i] = 1'b0;
- assign l2_ar_drop[i] = 1'b0;
- assign l2_r_drop[i] = 1'b0;
- assign l2_xr_done[i] = 1'b0;
- assign l2_r_done[i] = 1'b0;
- assign l2_ar_done_SP[i] = 1'b0;
- assign l2_r_done_SP[i] = 1'b0;
-
- end // if (ENABLE_L2TLB[i] == 1)
-
- end // HANDSHAKE_SPLIT
- endgenerate // HANDSHAKE_SPLIT
-
- // }}}
-
- // L2 TLB {{{
- // ██╗ ██████╗ ████████╗██╗ ██████╗
- // ██║ ╚════██╗ ╚══██╔══╝██║ ██╔══██╗
- // ██║ █████╔╝ ██║ ██║ ██████╔╝
- // ██║ ██╔═══╝ ██║ ██║ ██╔══██╗
- // ███████╗███████╗ ██║ ███████╗██████╔╝
- // ╚══════╝╚══════╝ ╚═╝ ╚══════╝╚═════╝
- //
- /*
- * l2_tlb
- *
- * The L2 TLB translates addresses upon misses in the L1 TLB (rab_core).
- *
- * It supports one ongoing translation at a time. If an L1 miss occurs while the L2 is busy,
- * the L1 is stalled untill the L2 is available again.
- *
- */
- generate for (i = 0; i < N_PORTS; i++) begin : L2_TLB
- if (ENABLE_L2TLB[i] == 1) begin : L2_TLB
-
- /*
- * L1 output selector
- */
- assign L1OutRwType_D[i] = int_wtrans_drop[i] ? 1'b1 : 1'b0;
- assign L1OutProt_D[i] = rab_prot[i];
- assign L1OutMulti_D[i] = rab_multi[i];
-
- /*
- * L1 output control + L1_DROP_BUF, L2_IN_BUF management
- *
- * Forward the L1 drop request to AR/AW sender modules if
- * 1. the transactions needs to be dropped (L1 multi, prot, prefetch), or
- * 2. if a lookup in the L2 TLB is required (L1 miss) and the input buffer is not full.
- *
- * The AR/AW senders do not support more than 1 oustanding L1 miss. The push back towards
- * the upstream is realized by not accepting the save request (saving the L1 transaction)
- * in the senders as long as the L2 TLB is busy or has valid output. This ultimately
- * blocks the L1 TLB.
- *
- * Together with the AW drop/save, we also perform the W drop/save as AW and W need to
- * absolutely remain in order. In contrast, the R drop is performed
- */
- always_comb begin : L1_DROP_SAVE
-
- l1_ar_drop[i] = 1'b0;
- l1_ar_save[i] = 1'b0;
- l1_xw_drop[i] = 1'b0;
- l1_xw_save[i] = 1'b0;
-
- l1_id_drop[i] = L1OutId_D[i];
- l1_len_drop[i] = L1OutLen_D[i];
- l1_prefetch_drop[i] = rab_prefetch[i];
- l1_hit_drop[i] = 1'b1; // there are no drops for L1 misses
-
- L1DropEn_S[i] = 1'b0;
- L2InEn_S[i] = 1'b0;
-
- if ( rab_prot[i] | rab_multi[i] | rab_prefetch[i] ) begin
- // 1. Drop
- l1_ar_drop[i] = int_rtrans_drop[i] & ~L1DropValid_SP[i];
- l1_xw_drop[i] = int_wtrans_drop[i] & ~L1DropValid_SP[i];
-
- // Store to L1_DROP_BUF upon handshake
- L1DropEn_S[i] = (l1_ar_drop[i] & l1_ar_done[i]) |
- (l1_xw_drop[i] & l1_xw_done[i]);
-
- end else if ( rab_miss[i] ) begin
- // 2. Save - Make sure L2 is really available.
- l1_ar_save[i] = int_rtrans_drop[i] & ~L2Busy_S[i];
- l1_xw_save[i] = int_wtrans_drop[i] & ~L2Busy_S[i];
-
- // Store to L2_IN_BUF upon handshake - triggers the L2 TLB
- L2InEn_S[i] = (l1_ar_save[i] & l1_ar_done[i]) |
- (l1_xw_save[i] & l1_xw_done[i]);
- end
- end
-
- /*
- * L2 output control + L2_OUT_BUF management + R/B sender control + W buffer control
- *
- * Perform L1 R transaction drops unless the L2 output buffer holds valid data. The AXI specs
- * require the B response to be sent only after consuming/discarding the corresponding data
- * in the W channel. Thus, we only send L2 drop request to the W buffer here. The drop
- * request to the B sender is then sent by the W buffer autonomously.
- *
- * L1 AW/W drop requests are managed by L1_DROP_SAVE.
- */
- always_comb begin : L2_ACCEPT_DROP_SAVE
-
- l2_ar_addr[i] = 'b0;
- l2_aw_addr[i] = 'b0;
- l2_ar_accept[i] = 1'b0;
- l2_xr_drop[i] = 1'b0;
- l2_xw_accept[i] = 1'b0;
- l2_xw_drop[i] = 1'b0;
-
- l1_r_drop[i] = 1'b0;
-
- lx_id_drop[i] = 'b0;
- lx_len_drop[i] = 'b0;
- lx_prefetch_drop[i] = 1'b0;
- lx_hit_drop[i] = 1'b0;
-
- L1DropValid_SN[i] = L1DropValid_SP[i] | L1DropEn_S[i];
- L2OutValid_SN[i] = L2OutValid_SP[i];
- L2OutReady_S[i] = 1'b0;
- L2OutEn_S[i] = 1'b0;
-
- L2Miss_S[i] = 1'b0;
- int_multi[i] = 1'b0;
- int_prot[i] = 1'b0;
-
- if (L2OutValid_SP[i] == 1'b0) begin
-
- // Drop L1 from R senders
- if (L1DropValid_SP[i] == 1'b1) begin
-
- // Only perform the R sender drop here.
- if (~L1DropRwType_DP[i]) begin
-
- l1_r_drop[i] = 1'b1;
- lx_id_drop[i] = L1DropId_DP[i];
- lx_len_drop[i] = L1DropLen_DP[i];
- lx_prefetch_drop[i] = L1DropPrefetch_S[i];
- lx_hit_drop[i] = 1'b1; // there are no drops for L1 misses
-
- // Invalidate L1_DROP_BUF upon handshake
- if ( l1_r_drop[i] & l1_r_done[i] ) begin
-
- L1DropValid_SN[i] = 1'b0;
- int_prot[i] = L1DropProt_DP[i];
- int_multi[i] = L1DropMulti_DP[i];
- end
-
- end else begin
- // Invalidate L1_DROP_BUF
- L1DropValid_SN[i] = 1'b0;
- int_prot[i] = L1DropProt_DP[i];
- int_multi[i] = L1DropMulti_DP[i];
- end
- end
-
- end else begin // L2_OUT_BUF has valid data
-
- if ( L2OutHit_SP[i] & ~(L2OutPrefetch_S[i] | L2OutProt_SP[i] | L2OutMulti_SP[i]) ) begin
-
- l2_ar_addr[i] = L2OutAddr_DP[i];
- l2_aw_addr[i] = L2OutAddr_DP[i];
-
- l2_ar_accept[i] = L2OutRwType_DP[i] ? 1'b0 : 1'b1;
- l2_xw_accept[i] = L2OutRwType_DP[i] ? 1'b1 : 1'b0;
-
- // Invalidate L2_OUT_BUF upon handshake
- L2OutValid_SN[i] = ~( (l2_ar_accept[i] & l2_ar_done[i]) |
- (l2_xw_accept[i] & l2_xw_done[i]) );
- end else begin
-
- lx_id_drop[i] = L2OutId_DP[i];
- lx_len_drop[i] = L2OutLen_DP[i];
- lx_prefetch_drop[i] = L2OutPrefetch_S[i];
- lx_hit_drop[i] = L2OutHit_SP[i];
-
- // The l2_xr_drop will also perform the handshake with the R sender
- l2_xr_drop[i] = L2OutRwType_DP[i] ? 1'b0 : 1'b1;
- l2_xw_drop[i] = L2OutRwType_DP[i] ? 1'b1 : 1'b0;
-
- // Invalidate L1_DROP_BUF upon handshake
- if ( (l2_xr_drop[i] & l2_xr_done[i]) | (l2_xw_drop[i] & l2_xw_done[i]) ) begin
-
- L2OutValid_SN[i] = 1'b0;
- L2Miss_S[i] = ~L2OutHit_SP[i];
- int_prot[i] = L2OutProt_SP[i];
- int_multi[i] = L2OutMulti_SP[i];
- end
- end
- end
-
- // Only accept new L2 output after ongoing drops have finished.
- if ( (l2_xr_drop[i] == l2_xr_done[i]) &
- (l2_xw_drop[i] == l2_xw_done[i]) &
- (l1_r_drop[i] == l1_r_done[i] ) ) begin
- // Store to L2_OUT_BUF upon handshake with L2 TLB module
- if ( (L2OutValid_SP[i] == 1'b0) && (L2OutValid_S[i] == 1'b1) ) begin
- L2OutValid_SN[i] = 1'b1;
- L2OutReady_S[i] = 1'b1;
- L2OutEn_S[i] = 1'b1;
- end
- end
- end
-
- /*
- * L1 drop buffer
- *
- * Used in case of multi, prot and prefetch hits in the L1 TLB.
- */
- always_ff @(posedge Clk_CI) begin : L1_DROP_BUF
- if (Rst_RBI == 0) begin
- L1DropProt_DP[i] <= 1'b0;
- L1DropMulti_DP[i] <= 1'b0;
- L1DropRwType_DP[i] <= 1'b0;
- L1DropUser_DP[i] <= 'b0;
- L1DropId_DP[i] <= 'b0;
- L1DropLen_DP[i] <= 'b0;
- L1DropAddr_DP[i] <= 'b0;
- end else if (L1DropEn_S[i] == 1'b1) begin
- L1DropProt_DP[i] <= L1OutProt_D[i] ;
- L1DropMulti_DP[i] <= L1OutMulti_D[i] ;
- L1DropRwType_DP[i] <= L1OutRwType_D[i];
- L1DropUser_DP[i] <= L1OutUser_D[i] ;
- L1DropId_DP[i] <= L1OutId_D[i] ;
- L1DropLen_DP[i] <= L1OutLen_D[i] ;
- L1DropAddr_DP[i] <= L1OutAddr_D[i] ;
- end
- end // always_ff @ (posedge Clk_CI)
-
- /*
- * L2 input buffer
- *
- * Make sure there are no combinational paths between L1 TLB/inputs and L2 TLB.
- */
- always_ff @(posedge Clk_CI) begin : L2_IN_BUF
- if (Rst_RBI == 0) begin
- L2InRwType_DP[i] <= 1'b0;
- L2InUser_DP[i] <= 'b0;
- L2InId_DP[i] <= 'b0;
- L2InLen_DP[i] <= 'b0;
- L2InAddr_DP[i] <= 'b0;
- end else if (L2InEn_S[i] == 1'b1) begin
- L2InRwType_DP[i] <= L1OutRwType_D[i];
- L2InUser_DP[i] <= L1OutUser_D[i] ;
- L2InId_DP[i] <= L1OutId_D[i] ;
- L2InLen_DP[i] <= L1OutLen_D[i] ;
- L2InAddr_DP[i] <= L1OutAddr_D[i] ;
- end
- end // always_ff @ (posedge Clk_CI)
-
- l2_tlb
- #(
- .AXI_S_ADDR_WIDTH ( AXI_S_ADDR_WIDTH ),
- .AXI_M_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ),
- .AXI_LITE_DATA_WIDTH ( AXI_LITE_DATA_WIDTH ),
- .AXI_LITE_ADDR_WIDTH ( AXI_LITE_ADDR_WIDTH ),
- .N_SETS ( `RAB_L2_N_SETS ),
- .N_OFFSETS ( `RAB_L2_N_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS ),
- .N_PAR_VA_RAMS ( `RAB_L2_N_PAR_VA_RAMS ),
- .HIT_OFFSET_STORE_WIDTH ( log2(`RAB_L2_N_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS) )
- )
- u_l2_tlb
- (
- .clk_i ( Clk_CI ),
- .rst_ni ( Rst_RBI ),
-
- // Config inputs
- .we_i ( L2CfgWE_S[i] ),
- .waddr_i ( L2CfgWAddr_D[i] ),
- .wdata_i ( L2CfgWData_D[i] ),
-
- // Request input
- .start_i ( L2InEn_S[i] ),
- .busy_o ( L2Busy_S[i] ),
- .rw_type_i ( L2InRwType_DP[i] ),
- .in_addr_i ( L2InAddr_DP[i] ),
-
- // Response output
- .out_ready_i ( L2OutReady_S[i] ),
- .out_valid_o ( L2OutValid_S[i] ),
- .hit_o ( L2OutHit_SN[i] ),
- .miss_o ( L2OutMiss_SN[i] ),
- .prot_o ( L2OutProt_SN[i] ),
- .multi_o ( L2OutMulti_SN[i] ),
- .cache_coherent_o ( L2OutCC_SN[i] ),
- .out_addr_o ( L2OutAddr_DN[i] )
- );
-
- /*
- * L2 output buffer
- *
- * Make sure there are no combinational paths between L1 TLB/inputs and L2 TLB.
- */
- always_ff @(posedge Clk_CI) begin : L2_OUT_BUF
- if (Rst_RBI == 0) begin
- L2OutRwType_DP[i] <= 1'b0;
- L2OutUser_DP[i] <= 'b0;
- L2OutLen_DP[i] <= 'b0;
- L2OutId_DP[i] <= 'b0;
- L2OutInAddr_DP[i] <= 'b0;
-
- L2OutHit_SP[i] <= 1'b0;
- L2OutMiss_SP[i] <= 1'b0;
- L2OutProt_SP[i] <= 1'b0;
- L2OutMulti_SP[i] <= 1'b0;
- L2OutCC_SP[i] <= 1'b0;
- L2OutAddr_DP[i] <= 'b0;
- end else if (L2OutEn_S[i] == 1'b1) begin
- L2OutRwType_DP[i] <= L2InRwType_DP[i];
- L2OutUser_DP[i] <= L2InUser_DP[i] ;
- L2OutLen_DP[i] <= L2InLen_DP[i] ;
- L2OutId_DP[i] <= L2InId_DP[i] ;
- L2OutInAddr_DP[i] <= L2InAddr_DP[i] ;
-
- L2OutHit_SP[i] <= L2OutHit_SN[i] ;
- L2OutMiss_SP[i] <= L2OutMiss_SN[i] ;
- L2OutProt_SP[i] <= L2OutProt_SN[i] ;
- L2OutMulti_SP[i] <= L2OutMulti_SN[i];
- L2OutCC_SP[i] <= L2OutCC_SN[i] ;
- L2OutAddr_DP[i] <= L2OutAddr_DN[i] ;
- end
- end // always_ff @ (posedge Clk_CI)
-
- always_ff @(posedge Clk_CI) begin : BUF_VALID
- if (Rst_RBI == 0) begin
- L1DropValid_SP[i] = 1'b0;
- L2OutValid_SP[i] = 1'b0;
- end else begin
- L1DropValid_SP[i] = L1DropValid_SN[i];
- L2OutValid_SP[i] = L2OutValid_SN[i];
- end
- end
-
- always_comb begin : BUF_TO_PREFETCH
- // L1 Drop Buf
- if (L1DropUser_DP[i] == {AXI_USER_WIDTH{1'b1}})
- L1DropPrefetch_S[i] = 1'b1;
- else
- L1DropPrefetch_S[i] = 1'b0;
-
- // L2 Out Buf
- if (L2OutUser_DP[i] == {AXI_USER_WIDTH{1'b1}})
- L2OutPrefetch_S[i] = 1'b1;
- else
- L2OutPrefetch_S[i] = 1'b0;
- end
-
- assign l2_cache_coherent[i] = L2OutCC_SP[i];
- assign int_miss[i] = L2Miss_S[i];
-
- end else begin : L2_TLB_STUB // if (ENABLE_L2TLB[i] == 1)
-
- assign l1_ar_drop[i] = int_rtrans_drop[i];
- assign l1_r_drop[i] = int_rtrans_drop[i];
- assign l1_xw_drop[i] = int_wtrans_drop[i];
-
- assign l1_ar_save[i] = 1'b0;
- assign l1_xw_save[i] = 1'b0;
- assign l2_xw_accept[i] = 1'b0;
- assign l2_xr_drop[i] = 1'b0;
- assign l2_xw_drop[i] = 1'b0;
-
- assign l2_ar_addr[i] = 'b0;
- assign l2_aw_addr[i] = 'b0;
-
- assign l1_id_drop[i] = int_wtrans_drop[i] ? int_awid[i] :
- int_rtrans_drop[i] ? int_arid[i] :
- '0;
- assign l1_len_drop[i] = int_wtrans_drop[i] ? int_awlen[i] :
- int_rtrans_drop[i] ? int_arlen[i] :
- '0;
- assign l1_prefetch_drop[i] = rab_prefetch[i];
- assign l1_hit_drop[i] = ~rab_miss[i];
-
- assign lx_id_drop[i] = int_wtrans_drop[i] ? int_awid[i] :
- int_rtrans_drop[i] ? int_arid[i] :
- '0;
- assign lx_len_drop[i] = int_wtrans_drop[i] ? int_awlen[i] :
- int_rtrans_drop[i] ? int_arlen[i] :
- '0;
- assign lx_prefetch_drop[i] = rab_prefetch[i];
- assign lx_hit_drop[i] = ~rab_miss[i];
-
- assign l2_cache_coherent[i] = 1'b0;
-
- assign int_miss[i] = rab_miss[i];
- assign int_prot[i] = rab_prot[i];
- assign int_multi[i] = rab_multi[i];
-
- // unused signals
- assign L2Miss_S[i] = 1'b0;
-
- assign L1OutRwType_D[i] = 1'b0;
- assign L1OutProt_D[i] = 1'b0;
- assign L1OutMulti_D[i] = 1'b0;
-
- assign L1DropRwType_DP[i] = 1'b0;
- assign L1DropUser_DP[i] = 'b0;
- assign L1DropId_DP[i] = 'b0;
- assign L1DropLen_DP[i] = 'b0;
- assign L1DropAddr_DP[i] = 'b0;
- assign L1DropProt_DP[i] = 1'b0;
- assign L1DropMulti_DP[i] = 1'b0;
-
- assign L1DropEn_S[i] = 1'b0;
- assign L1DropPrefetch_S[i] = 1'b0;
- assign L1DropValid_SN[i] = 1'b0;
- assign L1DropValid_SP[i] = 1'b0;
-
- assign L2InRwType_DP[i] = 1'b0;
- assign L2InUser_DP[i] = 'b0;
- assign L2InId_DP[i] = 'b0;
- assign L2InLen_DP[i] = 'b0;
- assign L2InAddr_DP[i] = 'b0;
-
- assign L2InEn_S[i] = 1'b0;
-
- assign L2OutHit_SN[i] = 1'b0;
- assign L2OutMiss_SN[i] = 1'b0;
- assign L2OutProt_SN[i] = 1'b0;
- assign L2OutMulti_SN[i] = 1'b0;
- assign L2OutCC_SN[i] = 1'b0;
- assign L2OutAddr_DN[i] = 'b0;
-
- assign L2OutRwType_DP[i] = 1'b0;
- assign L2OutUser_DP[i] = 'b0;
- assign L2OutId_DP[i] = 'b0;
- assign L2OutLen_DP[i] = 'b0;
- assign L2OutInAddr_DP[i] = 'b0;
- assign L2OutHit_SP[i] = 1'b0;
- assign L2OutMiss_SP[i] = 1'b0;
- assign L2OutProt_SP[i] = 1'b0;
- assign L2OutMulti_SP[i] = 1'b0;
- assign L2OutCC_SP[i] = 1'b0;
- assign L2OutAddr_DP[i] = 'b0;
-
- assign L2OutEn_S[i] = 1'b0;
- assign L2OutPrefetch_S[i] = 1'b0;
- assign L2Busy_S[i] = 1'b0;
- assign L2OutValid_S[i] = 1'b0;
- assign L2OutValid_SN[i] = 1'b0;
- assign L2OutValid_SP[i] = 1'b0;
- assign L2OutReady_S[i] = 1'b0;
-
- end // !`ifdef ENABLE_L2TLB
- end // for (i = 0; i < N_PORTS; i++)
- endgenerate
-
-// }}}
-"""
-# endmodule
-#
-#
-# // vim: ts=2 sw=2 sts=2 et nosmartindent autoindent foldmethod=marker
-#
-#
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class check_ram(Elaboratable):
-
- def __init__(self):
- self.clk_i = Signal() # input
- self.rst_ni = Signal() # input
- self.in_addr = Signal(ADDR_WIDTH) # input
- self.rw_type = Signal() # input
- self.ram_we = Signal() # input
- self.port0_addr = Signal(1+ERROR p_expression_25) # input
- self.port1_addr = Signal(1+ERROR p_expression_25) # input
- self.ram_wdata = Signal(RAM_DATA_WIDTH) # input
- self.output_sent = Signal() # input
- self.output_valid = Signal() # input
- self.offset_addr_d = Signal(OFFSET_WIDTH) # input
- self.hit_addr = Signal(1+ERROR p_expression_25) # output
- self.master = Signal() # output
- self.hit = Signal() # output
- self.multi_hit = Signal() # output
- self.prot = Signal() # output
-
- def elaborate(self, platform=None):
- m = Module()
- return m
-
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# //import CfMath::log2;
-#
-# //`define MULTI_HIT_FULL_SET
-#
-# module check_ram
-# //#(
-# // parameter ADDR_WIDTH = 32,
-# // parameter RAM_DATA_WIDTH = 32,
-# // parameter PAGE_SIZE = 4096, // 4kB
-# // parameter SET_WIDTH = 5,
-# // parameter OFFSET_WIDTH = 4
-# // )
-# (
-# input logic clk_i,
-# input logic rst_ni,
-# input logic [ADDR_WIDTH-1:0] in_addr,
-# input logic rw_type, // 1 => write, 0=> read
-# input logic ram_we,
-# input logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr,
-# input logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port1_addr,
-# input logic [RAM_DATA_WIDTH-1:0] ram_wdata,
-# input logic output_sent,
-# input logic output_valid,
-# input logic [OFFSET_WIDTH-1:0] offset_addr_d,
-# output logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr,
-# output logic master,
-# output logic hit,
-# output logic multi_hit,
-# output logic prot
-# );
-#
-""" #docstring_begin
-
- localparam IGNORE_LSB = log2(PAGE_SIZE); // 12
-
- logic [RAM_DATA_WIDTH-1:0] port0_data_o, port1_data_o; // RAM read data outputs
- logic port0_hit, port1_hit; // Ram output matches in_addr
-
- logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr_saved, port1_addr_saved;
-
- // Hit FSM Signals
- typedef enum logic {SEARCH, HIT} hit_state_t;
- hit_state_t hit_SP; // Hit FSM state
- hit_state_t hit_SN; // Hit FSM next state
-
- // Multi Hit FSM signals
-`ifdef MULTI_HIT_FULL_SET
- typedef enum logic[1:0] {NO_HITS, ONE_HIT, MULTI_HIT} multi_state_t;
- multi_state_t multi_SP; // Multi Hit FSM state
- multi_state_t multi_SN; // Multi Hit FSM next state
-
- logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr_saved;
- logic master_saved;
-`endif
-
- //// --------------- Block RAM (Dual Port) -------------- ////
-
- // The outputs of the BRAMs are only valid if in the previous cycle:
- // 1. the inputs were valid, and
- // 2. the BRAM was not written to.
- // Otherwise, the outputs must be ignored which is controlled by the output_valid signal.
- // This signal is driven by the uppler level L2 TLB module.
- ram_tp_no_change #(
- .ADDR_WIDTH( SET_WIDTH+OFFSET_WIDTH+1 ),
- .DATA_WIDTH( RAM_DATA_WIDTH )
- )
- ram_tp_no_change_0
- (
- .clk ( clk_i ),
- .we ( ram_we ),
- .addr0 ( port0_addr ),
- .addr1 ( port1_addr ),
- .d_i ( ram_wdata ),
- .d0_o ( port0_data_o ),
- .d1_o ( port1_data_o )
- );
-
- //// Check Ram Outputs
- assign port0_hit = (port0_data_o[0] == 1'b1) && (in_addr[ADDR_WIDTH-1: IGNORE_LSB] == port0_data_o[RAM_DATA_WIDTH-1:4]);
- assign port1_hit = (port1_data_o[0] == 1'b1) && (in_addr[ADDR_WIDTH-1: IGNORE_LSB] == port1_data_o[RAM_DATA_WIDTH-1:4]);
- //// ----------------------------------------------------- /////
-
- //// ------------------- Check if Hit ------------------------ ////
- // FSM
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- hit_SP <= SEARCH;
- end else begin
- hit_SP <= hit_SN;
- end
- end
-
- always_ff @(posedge clk_i, negedge rst_ni) begin
- if (!rst_ni) begin
- port0_addr_saved <= '0;
- port1_addr_saved <= '0;
- end else begin
- port0_addr_saved <= port0_addr;
- port1_addr_saved <= port1_addr;
- end
- end
-
- always_comb begin
- hit_SN = hit_SP;
- hit = 1'b0;
- hit_addr = 0;
- master = 1'b0;
- unique case(hit_SP)
- SEARCH :
- if (output_valid)
- if (port0_hit || port1_hit) begin
- hit_SN = HIT;
- hit = 1'b1;
- hit_addr = port0_hit ? {port0_addr_saved[SET_WIDTH+OFFSET_WIDTH:OFFSET_WIDTH], offset_addr_d} :
- port1_hit ? {port1_addr_saved[SET_WIDTH+OFFSET_WIDTH:OFFSET_WIDTH], offset_addr_d} :
- 0;
- master = port0_hit ? port0_data_o[3] :
- port1_hit ? port1_data_o[3] :
- 1'b0;
- end
-
- HIT : begin
-`ifdef MULTI_HIT_FULL_SET // Since the search continues after the first hit, it needs to be saved to be accessed later.
- hit = 1'b1;
- hit_addr = hit_addr_saved;
- master = master_saved;
-`endif
- if (output_sent)
- hit_SN = SEARCH;
- end
-
- default : begin
- hit_SN = SEARCH;
- end
- endcase // case (hit_SP)
- end // always_comb begin
-
- //// ------------------------------------------- ////
-
- assign prot = output_valid && port0_hit ? ((~port0_data_o[2] && rw_type) || (~port0_data_o[1] && ~rw_type)) :
- output_valid && port1_hit ? ((~port1_data_o[2] && rw_type) || (~port1_data_o[1] && ~rw_type)) :
- 1'b0;
-
- //// ------------------- Multi ------------------- ////
-`ifdef MULTI_HIT_FULL_SET
-
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- hit_addr_saved <= 0;
- master_saved <= 1'b0;
- end else if (output_valid) begin
- hit_addr_saved <= hit_addr;
- master_saved <= master;
- end
- end
-
- // FSM
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- multi_SP <= NO_HITS;
- end else begin
- multi_SP <= multi_SN;
- end
- end
-
- always_comb begin
- multi_SN = multi_SP;
- multi_hit = 1'b0;
- unique case(multi_SP)
- NO_HITS :
- if(output_valid && (port0_hit && port1_hit)) begin
- multi_SN = MULTI_HIT;
- multi_hit = 1'b1;
- end else if(output_valid && (port0_hit || port1_hit))
- multi_SN = ONE_HIT;
-
- ONE_HIT :
- if(output_valid && (port0_hit || port1_hit)) begin
- multi_SN = MULTI_HIT;
- multi_hit = 1'b1;
- end else if (output_sent)
- multi_SN = NO_HITS;
-
- MULTI_HIT : begin
- multi_hit = 1'b1;
- if (output_sent)
- multi_SN = NO_HITS;
- end
-
- endcase // case (multi_SP)
- end // always_comb begin
-
-`else // !`ifdef MULTI_HIT_FULL_SET
- assign multi_hit = output_valid && port0_hit && port1_hit;
-`endif // !`ifdef MULTI_HIT_FULL_SET
- //// ------------------------------------------- ////
-"""
-# endmodule
-#
-#
+++ /dev/null
-class CoreConfig:
- def __init__(self):
- self.N_SLICES = 16
- self.N_REGS = 4*self.N_SLICES
- self.ADDR_WIDTH_PHYS = 40
- self.ADDR_WIDTH_VIRT = 32
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class fsm(Elaboratable):
-
- def __init__(self):
- self.Clk_CI = Signal() # input
- self.Rst_RBI = Signal() # input
- self.port1_addr_valid_i = Signal() # input
- self.port2_addr_valid_i = Signal() # input
- self.port1_sent_i = Signal() # input
- self.port2_sent_i = Signal() # input
- self.select_i = Signal() # input
- self.no_hit_i = Signal() # input
- self.multi_hit_i = Signal() # input
- self.no_prot_i = Signal() # input
- self.prefetch_i = Signal() # input
- self.out_addr_i = Signal(AXI_M_ADDR_WIDTH) # input
- self.cache_coherent_i = Signal() # input
- self.port1_accept_o = Signal() # output
- self.port1_drop_o = Signal() # output
- self.port1_miss_o = Signal() # output
- self.port2_accept_o = Signal() # output
- self.port2_drop_o = Signal() # output
- self.port2_miss_o = Signal() # output
- self.out_addr_o = Signal(AXI_M_ADDR_WIDTH) # output
- self.cache_coherent_o = Signal() # output
- self.miss_o = Signal() # output
- self.multi_o = Signal() # output
- self.prot_o = Signal() # output
- self.prefetch_o = Signal() # output
- self.in_addr_i = Signal(AXI_S_ADDR_WIDTH) # input
- self.in_id_i = Signal(AXI_ID_WIDTH) # input
- self.in_len_i = Signal(8) # input
- self.in_user_i = Signal(AXI_USER_WIDTH) # input
- self.in_addr_o = Signal(AXI_S_ADDR_WIDTH) # output
- self.in_id_o = Signal(AXI_ID_WIDTH) # output
- self.in_len_o = Signal(8) # output
- self.in_user_o = Signal(AXI_USER_WIDTH) # output
-
- def elaborate(self, platform=None):
- m = Module()
- return m
-
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# //`timescale 1ns / 1ps
-#
-# module fsm
-# #(
-# parameter AXI_M_ADDR_WIDTH = 40,
-# parameter AXI_S_ADDR_WIDTH = 32,
-# parameter AXI_ID_WIDTH = 8,
-# parameter AXI_USER_WIDTH = 6
-# )
-# (
-# input logic Clk_CI,
-# input logic Rst_RBI,
-#
-# input logic port1_addr_valid_i,
-# input logic port2_addr_valid_i,
-# input logic port1_sent_i,
-# input logic port2_sent_i,
-# input logic select_i,
-# input logic no_hit_i,
-# input logic multi_hit_i,
-# input logic no_prot_i,
-# input logic prefetch_i,
-# input logic [AXI_M_ADDR_WIDTH-1:0] out_addr_i,
-# input logic cache_coherent_i,
-# output logic port1_accept_o,
-# output logic port1_drop_o,
-# output logic port1_miss_o,
-# output logic port2_accept_o,
-# output logic port2_drop_o,
-# output logic port2_miss_o,
-# output logic [AXI_M_ADDR_WIDTH-1:0] out_addr_o,
-# output logic cache_coherent_o,
-# output logic miss_o,
-# output logic multi_o,
-# output logic prot_o,
-# output logic prefetch_o,
-# input logic [AXI_S_ADDR_WIDTH-1:0] in_addr_i,
-# input logic [AXI_ID_WIDTH-1:0] in_id_i,
-# input logic [7:0] in_len_i,
-# input logic [AXI_USER_WIDTH-1:0] in_user_i,
-# output logic [AXI_S_ADDR_WIDTH-1:0] in_addr_o,
-# output logic [AXI_ID_WIDTH-1:0] in_id_o,
-# output logic [7:0] in_len_o,
-# output logic [AXI_USER_WIDTH-1:0] in_user_o
-# );
-#
-""" #docstring_begin
-
- //-------------Internal Signals----------------------
-
- typedef enum logic {IDLE, WAIT} state_t;
- logic state_SP; // Present state
- logic state_SN; // Next State
-
- logic port1_accept_SN;
- logic port1_drop_SN;
- logic port1_miss_SN;
- logic port2_accept_SN;
- logic port2_drop_SN;
- logic port2_miss_SN;
- logic miss_SN;
- logic multi_SN;
- logic prot_SN;
- logic prefetch_SN;
- logic cache_coherent_SN;
- logic [AXI_M_ADDR_WIDTH-1:0] out_addr_DN;
-
- logic out_reg_en_S;
-
- //----------FSM comb------------------------------
-
- always_comb begin: FSM_COMBO
- state_SN = state_SP;
-
- port1_accept_SN = 1'b0;
- port1_drop_SN = 1'b0;
- port1_miss_SN = 1'b0;
- port2_accept_SN = 1'b0;
- port2_drop_SN = 1'b0;
- port2_miss_SN = 1'b0;
- miss_SN = 1'b0;
- multi_SN = 1'b0;
- prot_SN = 1'b0;
- prefetch_SN = 1'b0;
- cache_coherent_SN = 1'b0;
- out_addr_DN = '0;
-
- out_reg_en_S = 1'b0; // by default hold register output
-
- unique case(state_SP)
- IDLE :
- if ( (port1_addr_valid_i & select_i) | (port2_addr_valid_i & ~select_i) ) begin
- out_reg_en_S = 1'b1;
- state_SN = WAIT;
-
- // Select inputs for output registers
- if (port1_addr_valid_i & select_i) begin
- port1_accept_SN = ~(no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
- port1_drop_SN = (no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
- port1_miss_SN = no_hit_i;
- port2_accept_SN = 1'b0;
- port2_drop_SN = 1'b0;
- port2_miss_SN = 1'b0;
- end else if (port2_addr_valid_i & ~select_i) begin
- port1_accept_SN = 1'b0;
- port1_drop_SN = 1'b0;
- port1_miss_SN = 1'b0;
- port2_accept_SN = ~(no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
- port2_drop_SN = (no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
- port2_miss_SN = no_hit_i;
- end
-
- miss_SN = port1_miss_SN | port2_miss_SN;
- multi_SN = multi_hit_i;
- prot_SN = ~no_prot_i;
- prefetch_SN = ~no_hit_i & prefetch_i;
-
- cache_coherent_SN = cache_coherent_i;
- out_addr_DN = out_addr_i;
- end
-
- WAIT :
- if ( port1_sent_i | port2_sent_i ) begin
- out_reg_en_S = 1'b1; // "clear" the register
- state_SN = IDLE;
- end
-
- default : begin
- state_SN = IDLE;
- end
- endcase
- end
-
- //----------FSM seq-------------------------------
-
- always_ff @(posedge Clk_CI, negedge Rst_RBI) begin: FSM_SEQ
- if (Rst_RBI == 1'b0)
- state_SP <= IDLE;
- else
- state_SP <= state_SN;
- end
-
- //----------Output seq--------------------------
-
- always_ff @(posedge Clk_CI, negedge Rst_RBI) begin: OUTPUT_SEQ
- if (Rst_RBI == 1'b0) begin
- port1_accept_o = 1'b0;
- port1_drop_o = 1'b0;
- port1_miss_o = 1'b0;
- port2_accept_o = 1'b0;
- port2_drop_o = 1'b0;
- port2_miss_o = 1'b0;
- miss_o = 1'b0;
- multi_o = 1'b0;
- prot_o = 1'b0;
- prefetch_o = 1'b0;
- cache_coherent_o = 1'b0;
- out_addr_o = '0;
- in_addr_o = '0;
- in_id_o = '0;
- in_len_o = '0;
- in_user_o = '0;
- end else if (out_reg_en_S == 1'b1) begin
- port1_accept_o = port1_accept_SN;
- port1_drop_o = port1_drop_SN;
- port1_miss_o = port1_miss_SN;
- port2_accept_o = port2_accept_SN;
- port2_drop_o = port2_drop_SN;
- port2_miss_o = port2_miss_SN;
- miss_o = miss_SN;
- multi_o = multi_SN;
- prot_o = prot_SN;
- prefetch_o = prefetch_SN;
- cache_coherent_o = cache_coherent_SN;
- out_addr_o = out_addr_DN;
- in_addr_o = in_addr_i;
- in_id_o = in_id_i;
- in_len_o = in_len_i;
- in_user_o = in_user_i;
- end
- end // block: OUTPUT_SEQ
-"""
-#
-# endmodule
-#
-#
+++ /dev/null
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class l2_tlb(Elaboratable):
-
- def __init__(self):
- self.clk_i = Signal() # input
- self.rst_ni = Signal() # input
- self.we_i = Signal() # input
- self.waddr_i = Signal(AXI_LITE_ADDR_WIDTH) # input
- self.wdata_i = Signal(AXI_LITE_DATA_WIDTH) # input
- self.start_i = Signal() # input
- self.busy_o = Signal() # output
- self.in_addr_i = Signal(AXI_S_ADDR_WIDTH) # input
- self.rw_type_i = Signal() # input
- self.out_ready_i = Signal() # input
- self.out_valid_o = Signal() # output
- self.hit_o = Signal() # output
- self.miss_o = Signal() # output
- self.prot_o = Signal() # output
- self.multi_o = Signal() # output
- self.cache_coherent_o = Signal() # output
- self.out_addr_o = Signal(AXI_M_ADDR_WIDTH) # output
-
- def elaborate(self, platform=None):
- m = Module()
- return m
-
-
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# //`include "pulp_soc_defines.sv"
-#
-# ////import CfMath::log2;
-#
-# //`define MULTI_HIT_FULL_SET // Enable full multi hit detection. Always the entire set is searched.
-# //`define MULTI_HIT_CUR_CYCLE // Enable partial multi hit detection. Only multi hits in the same search cycle are detected.
-#
-# //`ifdef MULTI_HIT_FULL_SET
-# // `ifndef MULTI_HIT_CUR_CYCLE
-# // `define MULTI_HIT_CUR_CYCLE
-# // `endif
-# //`endif
-#
-# module l2_tlb
-# //#(
-# // parameter AXI_S_ADDR_WIDTH = 32,
-# // parameter AXI_M_ADDR_WIDTH = 40,
-# // parameter AXI_LITE_DATA_WIDTH = 64,
-# // parameter AXI_LITE_ADDR_WIDTH = 32,
-# // parameter N_SETS = 32,
-# // parameter N_OFFSETS = 4, //per port. There are 2 ports.
-# // parameter PAGE_SIZE = 4096, // 4kB
-# // parameter N_PAR_VA_RAMS = 4,
-# // parameter HIT_OFFSET_STORE_WIDTH = 2 // Num of bits of VA RAM offset stored. This should not be greater than OFFSET_WIDTH
-# // )
-# (
-# input logic clk_i,
-# input logic rst_ni,
-#
-# input logic we_i,
-# input logic [AXI_LITE_ADDR_WIDTH-1:0] waddr_i,
-# input logic [AXI_LITE_DATA_WIDTH-1:0] wdata_i,
-#
-# input logic start_i,
-# output logic busy_o,
-# input logic [AXI_S_ADDR_WIDTH-1:0] in_addr_i,
-# input logic rw_type_i, //1 => write, 0=> read
-#
-# input logic out_ready_i,
-# output logic out_valid_o,
-# output logic hit_o,
-# output logic miss_o,
-# output logic prot_o,
-# output logic multi_o,
-# output logic cache_coherent_o,
-# output logic [AXI_M_ADDR_WIDTH-1:0] out_addr_o
-# );
-#
-""" #docstring_begin
-
- localparam VA_RAM_DEPTH = N_SETS * N_OFFSETS * 2;
- localparam PA_RAM_DEPTH = VA_RAM_DEPTH * N_PAR_VA_RAMS;
- localparam VA_RAM_ADDR_WIDTH = log2(VA_RAM_DEPTH);
- localparam PA_RAM_ADDR_WIDTH = log2(PA_RAM_DEPTH);
- localparam SET_WIDTH = log2(N_SETS);
- localparam OFFSET_WIDTH = log2(N_OFFSETS);
- localparam LL_WIDTH = log2(N_PAR_VA_RAMS);
- localparam IGNORE_LSB = log2(PAGE_SIZE);
-
- localparam VA_RAM_DATA_WIDTH = AXI_S_ADDR_WIDTH - IGNORE_LSB + 4;
- localparam PA_RAM_DATA_WIDTH = AXI_M_ADDR_WIDTH - IGNORE_LSB;
-
- logic [N_PAR_VA_RAMS-1:0] hit, prot, multi_hit, cache_coherent;
- logic [N_PAR_VA_RAMS-1:0] ram_we;
- logic last_search, last_search_next;
- logic first_search, first_search_next;
- logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] ram_waddr;
- logic [N_PAR_VA_RAMS-1:0][SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr;
- logic pa_ram_we;
- logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_raddr, pa_port0_waddr; // PA RAM read, Write addr;
- logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_raddr_reg_SN, pa_port0_raddr_reg_SP; // registered addresses, needed for WAIT_ON_WRITE;
- logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_addr; // PA RAM addr
- logic [PA_RAM_DATA_WIDTH-1:0] pa_port0_data, pa_data, pa_port0_data_reg; // PA RAM data
- logic pa_ram_store_data_SN, pa_ram_store_data_SP;
- logic hit_top, prot_top, multi_hit_top, first_hit_top;
- logic output_sent;
- int hit_block_num;
-
- logic searching, search_done;
- logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr, port0_raddr; // VA RAM port0 addr
- logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port1_addr; // VA RAM port1 addr
- logic [OFFSET_WIDTH-1:0] offset_addr, offset_addr_d;
- logic [OFFSET_WIDTH-1:0] offset_start_addr, offset_end_addr;
- logic [SET_WIDTH-1:0] set_num;
-
- logic va_output_valid;
- logic searching_q;
-
- genvar z;
-
- // Search FSM
- typedef enum logic [1:0] {IDLE, SEARCH, DONE} search_state_t;
- search_state_t search_SP; // Present state
- search_state_t search_SN; // Next State
-
- // Output FSM
- typedef enum logic [1:0] {OUT_IDLE, SEND_OUTPUT, WAIT_ON_WRITE} out_state_t;
- out_state_t out_SP; // Present state
- out_state_t out_SN; // Next State
-
- logic miss_next;
- logic hit_next;
- logic prot_next;
- logic multi_next;
- logic cache_coherent_next;
-
- // Generate the VA Block rams and their surrounding logic
- generate
- for (z = 0; z < N_PAR_VA_RAMS; z++) begin : VA_RAMS
- check_ram
- #(
- .ADDR_WIDTH ( AXI_S_ADDR_WIDTH ),
- .RAM_DATA_WIDTH ( VA_RAM_DATA_WIDTH ),
- .PAGE_SIZE ( PAGE_SIZE ),
- .SET_WIDTH ( SET_WIDTH ),
- .OFFSET_WIDTH ( OFFSET_WIDTH )
- )
- u_check_ram
- (
- .clk_i ( clk_i ),
- .rst_ni ( rst_ni ),
- .in_addr ( in_addr_i ),
- .rw_type ( rw_type_i ),
- .ram_we ( ram_we[z] ),
- .port0_addr ( port0_addr ),
- .port1_addr ( port1_addr ),
- .ram_wdata ( wdata_i[VA_RAM_DATA_WIDTH-1:0] ),
- .output_sent ( output_sent ),
- .output_valid ( va_output_valid ),
- .offset_addr_d ( offset_addr_d ),
- .hit_addr ( hit_addr[z] ),
- .master ( cache_coherent[z] ),
- .hit ( hit[z] ),
- .multi_hit ( multi_hit[z] ),
- .prot ( prot[z] )
- );
- end // for (z = 0; z < N_PORTS; z++)
- endgenerate
-
- ////////////////// ---------------- Control and Address --------------- ////////////////////////
- // FSM
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- search_SP <= IDLE;
- end else begin
- search_SP <= search_SN;
- end
- end
-
- always_comb begin : SEARCH_FSM
- search_SN = search_SP;
- busy_o = 1'b0;
- searching = 1'b0;
- search_done = 1'b0;
- last_search_next = 1'b0;
- first_search_next = first_search;
-
- unique case (search_SP)
- IDLE : begin
- if (start_i) begin
- search_SN = SEARCH;
- first_search_next = 1'b1;
- end
- end
-
- SEARCH : begin
- busy_o = 1'b1;
-
- // detect last search cycle
- if ( (first_search == 1'b0) && (offset_addr == offset_end_addr) )
- last_search_next = 1'b1;
-
- // pause search during VA RAM reconfigration
- if (|ram_we) begin
- searching = 1'b0;
- end else begin
- searching = 1'b1;
- first_search_next = 1'b0;
- end
-
- if (va_output_valid) begin
- // stop search
-`ifdef MULTI_HIT_FULL_SET
- if (last_search | prot_top | multi_hit_top) begin
-`else
- if (last_search | prot_top | multi_hit_top | hit_top ) begin
-`endif
- search_SN = DONE;
- search_done = 1'b1;
- end
- end
- end
-
- DONE : begin
- busy_o = 1'b1;
- if (out_valid_o & out_ready_i)
- search_SN = IDLE;
- end
-
- default : begin
- search_SN = IDLE;
- end
- endcase // case (prot_SP)
- end // always_comb begin
-
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- last_search <= 1'b0;
- first_search <= 1'b0;
- end else begin
- last_search <= last_search_next;
- first_search <= first_search_next;
- end
- end
-
- /*
- * VA RAM address generation
- *
- * The input address and set number, and thus the offset start address, are available in the
- * cycle after the start signal. The buffered offset_addr becomes available one cycle later.
- * During the first search cycle, we therefore directly use offset_addr_start for the lookup.
- */
- assign set_num = in_addr_i[SET_WIDTH+IGNORE_LSB -1 : IGNORE_LSB];
-
- assign port0_raddr[OFFSET_WIDTH] = 1'b0;
- assign port1_addr [OFFSET_WIDTH] = 1'b1;
-
- assign port0_raddr[OFFSET_WIDTH-1:0] = first_search ? offset_start_addr : offset_addr;
- assign port1_addr [OFFSET_WIDTH-1:0] = first_search ? offset_start_addr : offset_addr;
-
- assign port0_raddr[SET_WIDTH+OFFSET_WIDTH : OFFSET_WIDTH+1] = set_num;
- assign port1_addr [SET_WIDTH+OFFSET_WIDTH : OFFSET_WIDTH+1] = set_num;
-
- assign port0_addr = ram_we ? ram_waddr : port0_raddr;
-
- // The outputs of the BRAMs are only valid if in the previous cycle:
- // 1. the inputs were valid, and
- // 2. the BRAMs were not written to.
- // Otherwise, the outputs must be ignored.
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- searching_q <= 1'b0;
- end else begin
- searching_q <= searching;
- end
- end
- assign va_output_valid = searching_q;
-
- // Address offset for looking up the VA RAMs
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- offset_addr <= 0;
- end else if (first_search) begin
- offset_addr <= offset_start_addr + 1'b1;
- end else if (searching) begin
- offset_addr <= offset_addr + 1'b1;
- end
- end
-
- // Delayed address offest for looking up the PA RAM upon a hit in the VA RAMs
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- offset_addr_d <= 0;
- end else if (first_search) begin
- offset_addr_d <= offset_start_addr;
- end else if (searching) begin
- offset_addr_d <= offset_addr_d + 1'b1;
- end
- end
-
- // Store the offset addr for hit to reduce latency for next search.
- generate
- if (HIT_OFFSET_STORE_WIDTH > 0) begin : OFFSET_STORE
-`ifndef MULTI_HIT_FULL_SET
- logic [N_SETS-1:0][HIT_OFFSET_STORE_WIDTH-1:0] hit_offset_addr; // Contains offset addr for previous hit for every SET.
- logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr_reg;
-
- assign offset_start_addr = { hit_offset_addr[set_num] , {{OFFSET_WIDTH-HIT_OFFSET_STORE_WIDTH}{1'b0}} };
- assign offset_end_addr = hit_offset_addr[set_num]-1'b1;
-
- // Register the hit addr
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- hit_addr_reg <= 0;
- end else if (hit_top) begin
- hit_addr_reg <= hit_addr[hit_block_num];
- end
- end
-
- // Store hit addr for each set. The next search in the same set will start from the saved addr.
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- hit_offset_addr <= 0;
- end else if (hit_o) begin
- hit_offset_addr[set_num][HIT_OFFSET_STORE_WIDTH-1:0] <= hit_addr_reg[OFFSET_WIDTH-1 : (OFFSET_WIDTH - HIT_OFFSET_STORE_WIDTH)];
- end
- end
-`else // No need to store offset if full multi hit detection is enabled because the entire SET is searched.
- assign offset_start_addr = 0;
- assign offset_end_addr = {OFFSET_WIDTH{1'b1}};
-`endif
- end else begin // if (HIT_OFFSET_STORE_WIDTH > 0)
- assign offset_start_addr = 0;
- assign offset_end_addr = {OFFSET_WIDTH{1'b1}};
- end
- endgenerate
-
- assign prot_top = |prot;
-
- //////////////////////////////////////////////////////////////////////////////////////
- // check for hit, multi hit
- // In case of a multi hit, the hit_block_num indicates the lowest VA RAM with a hit.
- // In case of a multi hit in the same VA RAM, Port 0 is given priority.
- always_comb begin : HIT_CHECK
- hit_top = |hit;
- hit_block_num = 0;
- first_hit_top = 1'b0;
- multi_hit_top = 1'b0;
- for (int i=N_PAR_VA_RAMS-1; i>=0; i--) begin
- if (hit[i] == 1'b1) begin
-`ifdef MULTI_HIT_CUR_CYCLE
- if (multi_hit[i] | first_hit_top ) begin
- multi_hit_top = 1'b1;
- end
-`endif
- first_hit_top = 1'b1;
- hit_block_num = i;
- end
- end // for (int i=0; i<N_PAR_VA_RAMS; i++)
- end // always_comb begin
-
- ///////////////////// ------------- Outputs ------------ //////////////////////////////////
- //// FSM
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- out_SP <= OUT_IDLE;
- pa_ram_store_data_SP <= 1'b0;
- pa_port0_raddr_reg_SP <= 'b0;
- end else begin
- out_SP <= out_SN;
- pa_ram_store_data_SP <= pa_ram_store_data_SN;
- pa_port0_raddr_reg_SP <= pa_port0_raddr_reg_SN;
- end
- end
-
- always_comb begin : OUTPUT_FSM
- out_SN = out_SP;
-
- miss_next = miss_o;
- prot_next = prot_o;
- multi_next = multi_o;
- hit_next = hit_o;
- cache_coherent_next = cache_coherent_o;
- pa_port0_raddr_reg_SN = pa_port0_raddr_reg_SP;
-
- pa_port0_raddr = 'b0;
- pa_ram_store_data_SN = 1'b0;
-
- out_valid_o = 1'b0;
- output_sent = 1'b0;
-
- unique case (out_SP)
- OUT_IDLE : begin
- hit_next = 1'b0;
- miss_next = 1'b0;
- prot_next = 1'b0;
- multi_next = 1'b0;
- cache_coherent_next = 1'b0;
-
- // abort transaction
- if ((search_done & ~hit_top) | prot_top | multi_hit_top) begin
- out_SN = SEND_OUTPUT;
-
- if (search_done & ~hit_top) begin
- miss_next = 1'b1;
- end
- if (prot_top) begin
- prot_next = 1'b1;
- hit_next = 1'b1;
- end
- if (multi_hit_top) begin
- multi_next = 1'b1;
- hit_next = 1'b1;
- end
-
- // read PA RAM
- end else if (search_done & hit_top) begin
- hit_next = 1'b1;
- cache_coherent_next = cache_coherent[hit_block_num];
- pa_port0_raddr = (N_PAR_VA_RAMS * hit_addr[hit_block_num]) + hit_block_num;
- pa_port0_raddr_reg_SN = pa_port0_raddr;
-
- // read PA RAM now
- if (~pa_ram_we) begin
- out_SN = SEND_OUTPUT;
- pa_ram_store_data_SN = 1'b1;
-
- // read PA RAM after PA RAM reconfiguration
- end else begin // pa_ram_we
- out_SN = WAIT_ON_WRITE;
-
- end
- end
- end
-
- WAIT_ON_WRITE : begin
- if ( ~pa_ram_we ) begin
- out_SN = SEND_OUTPUT;
- pa_port0_raddr = pa_port0_raddr_reg_SP;
- pa_ram_store_data_SN = 1'b1;
- end
- end
-
- SEND_OUTPUT : begin
- out_valid_o = 1'b1;
- if (out_ready_i) begin
- out_SN = OUT_IDLE;
- output_sent = 1'b1;
- end
- end
-
- default : begin
- out_SN = OUT_IDLE;
- end
-
- endcase // case (out_SP)
- end // always_comb begin
-
- //// Output signals
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- miss_o <= 1'b0;
- prot_o <= 1'b0;
- multi_o <= 1'b0;
- hit_o <= 1'b0;
- cache_coherent_o <= 1'b0;
- end else begin
- miss_o <= miss_next;
- prot_o <= prot_next;
- multi_o <= multi_next;
- hit_o <= hit_next;
- cache_coherent_o <= cache_coherent_next;
- end
- end
-
- ///////////////////////////////////////////////////////////////////////////////////////////////////
-
-
- ///////////////////// --------------- Physical Address -------------- ////////////////////////////
-
- /// PA Block RAM
- ram_tp_no_change #(
- .ADDR_WIDTH( PA_RAM_ADDR_WIDTH ),
- .DATA_WIDTH( PA_RAM_DATA_WIDTH )
- )
- pa_ram
- (
- .clk ( clk_i ),
- .we ( pa_ram_we ),
- .addr0 ( pa_port0_addr ),
- .addr1 ( '0 ),
- .d_i ( wdata_i[PA_RAM_DATA_WIDTH-1:0] ),
- .d0_o ( pa_port0_data ),
- .d1_o ( )
- );
-
- assign out_addr_o[IGNORE_LSB-1:0] = in_addr_i[IGNORE_LSB-1:0];
- assign out_addr_o[AXI_M_ADDR_WIDTH-1:IGNORE_LSB] = pa_data;
-
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- pa_port0_data_reg <= 0;
- end else if (pa_ram_store_data_SP) begin
- pa_port0_data_reg <= pa_port0_data;
- end
- end
-
- assign pa_data = pa_ram_store_data_SP ? pa_port0_data : pa_port0_data_reg;
-
-/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-///// Write enable for all block rams
-generate if (LL_WIDTH != 0) begin
- always_comb begin
- var reg[LL_WIDTH:0] para;
- var int para_int;
- for (para = 0; para < N_PAR_VA_RAMS; para=para+1'b1) begin
- para_int = int'(para);
- ram_we[para_int] = we_i && (waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] == 1'b0) && (waddr_i[LL_WIDTH-1:0] == para);
- end
- end
-end else begin
- assign ram_we[0] = we_i && (waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] == 1'b0);
-end
-
-endgenerate
-
-// Addresses are word, not byte addresses
-assign pa_ram_we = we_i && (waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] == 1'b1); //waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] will be 0 for all VA writes and 1 for all PA writes
-assign ram_waddr = waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH-1:LL_WIDTH];
-assign pa_port0_waddr = waddr_i[PA_RAM_ADDR_WIDTH-1:0];
-assign pa_port0_addr = pa_ram_we ? pa_port0_waddr : pa_port0_raddr;
-
-"""
-# endmodule
-#
-# // vim: ts=3 sw=3 sts=3 et nosmartindent autoindent foldmethod=marker tw=100
-#
-#
+++ /dev/null
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-
-# this file has been generated by sv2nmigen
-
-#
-# //`include "pulp_soc_defines.sv"
-#
-# ////import CfMath::log2;
-#
-# //`define MY_ARRAY_SUM(MY_ARRAY,ARRAY_SIZE) ( (ARRAY_SIZE==1) ? MY_ARRAY[0] : (ARRAY_SIZE==2) ? MY_ARRAY[0] + MY_ARRAY[1] : (ARRAY_SIZE==3) ? MY_ARRAY[0] + MY_ARRAY[1] + MY_ARRAY[2] : (ARRAY_SIZE==4) ? MY_ARRAY[0] + MY_ARRAY[1] + MY_ARRAY[2] + MY_ARRAY[3] : 0 )
-#
-
-# module rab_core
-# #(
-# parameter N_PORTS = 3,
-# parameter N_L2_SETS = 32,
-# parameter N_L2_SET_ENTRIES = 32,
-# parameter AXI_DATA_WIDTH = 64,
-# parameter AXI_S_ADDR_WIDTH = 32,
-# parameter AXI_M_ADDR_WIDTH = 40,
-# parameter AXI_LITE_DATA_WIDTH = 64,
-# parameter AXI_LITE_ADDR_WIDTH = 32,
-# parameter AXI_ID_WIDTH = 8,
-# parameter AXI_USER_WIDTH = 6,
-# parameter MH_FIFO_DEPTH = 16
-# )
-# (
-# input logic Clk_CI,
-# input logic Rst_RBI,
-#
-# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi_awaddr,
-# input logic s_axi_awvalid,
-# output logic s_axi_awready,
-#
-# input logic [AXI_LITE_DATA_WIDTH-1:0] s_axi_wdata,
-# input logic [AXI_LITE_DATA_WIDTH/8-1:0] s_axi_wstrb,
-# input logic s_axi_wvalid,
-# output logic s_axi_wready,
-#
-# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi_araddr,
-# input logic s_axi_arvalid,
-# output logic s_axi_arready,
-#
-# input logic s_axi_rready,
-# output logic [AXI_LITE_DATA_WIDTH-1:0] s_axi_rdata,
-# output logic [1:0] s_axi_rresp,
-# output logic s_axi_rvalid,
-#
-# output logic [1:0] s_axi_bresp,
-# output logic s_axi_bvalid,
-# input logic s_axi_bready,
-#
-# output logic [N_PORTS-1:0] int_miss,
-# output logic [N_PORTS-1:0] int_prot,
-# output logic [N_PORTS-1:0] int_multi,
-# output logic [N_PORTS-1:0] int_prefetch,
-# output logic int_mhf_full,
-#
-# output logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_axaddr_o,
-# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_axid_o,
-# output logic [N_PORTS-1:0] [7:0] int_axlen_o,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_axuser_o,
-#
-# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] port1_addr,
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] port1_id,
-# input logic [N_PORTS-1:0] [7:0] port1_len,
-# input logic [N_PORTS-1:0] [2:0] port1_size,
-# input logic [N_PORTS-1:0] port1_addr_valid,
-# input logic [N_PORTS-1:0] port1_type,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] port1_user,
-# input logic [N_PORTS-1:0] port1_sent,
-# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] port1_out_addr,
-# output logic [N_PORTS-1:0] port1_cache_coherent,
-# output logic [N_PORTS-1:0] port1_accept,
-# output logic [N_PORTS-1:0] port1_drop,
-# output logic [N_PORTS-1:0] port1_miss,
-#
-# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] port2_addr,
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] port2_id,
-# input logic [N_PORTS-1:0] [7:0] port2_len,
-# input logic [N_PORTS-1:0] [2:0] port2_size,
-# input logic [N_PORTS-1:0] port2_addr_valid,
-# input logic [N_PORTS-1:0] port2_type,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] port2_user,
-# input logic [N_PORTS-1:0] port2_sent,
-# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] port2_out_addr,
-# output logic [N_PORTS-1:0] port2_cache_coherent,
-# output logic [N_PORTS-1:0] port2_accept,
-# output logic [N_PORTS-1:0] port2_drop,
-# output logic [N_PORTS-1:0] port2_miss,
-#
-# input logic [N_PORTS-1:0] miss_l2_i,
-# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] miss_l2_addr_i,
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] miss_l2_id_i,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] miss_l2_user_i,
-#
-# output logic [N_PORTS-1:0] [AXI_LITE_DATA_WIDTH-1:0] wdata_l2_o,
-# output logic [N_PORTS-1:0] [AXI_LITE_ADDR_WIDTH-1:0] waddr_l2_o,
-# output logic [N_PORTS-1:0] wren_l2_o
-# );
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class rab_core(Elaboratable):
-
- def __init__(self):
- self.s_axi_awaddr = Signal(AXI_LITE_ADDR_WIDTH) # input
- self.s_axi_awvalid = Signal() # input
- self.s_axi_awready = Signal() # output
- self.s_axi_wdata = Signal(AXI_LITE_DATA_WIDTH) # input
- self.s_axi_wstrb = Signal(FIXME) # input
- self.s_axi_wvalid = Signal() # input
- self.s_axi_wready = Signal() # output
- self.s_axi_araddr = Signal(AXI_LITE_ADDR_WIDTH) # input
- self.s_axi_arvalid = Signal() # input
- self.s_axi_arready = Signal() # output
- self.s_axi_rready = Signal() # input
- self.s_axi_rdata = Signal(AXI_LITE_DATA_WIDTH) # output
- self.s_axi_rresp = Signal(2) # output
- self.s_axi_rvalid = Signal() # output
- self.s_axi_bresp = Signal(2) # output
- self.s_axi_bvalid = Signal() # output
- self.s_axi_bready = Signal() # input
- self.int_miss = Signal(N_PORTS) # output
- self.int_prot = Signal(N_PORTS) # output
- self.int_multi = Signal(N_PORTS) # output
- self.int_prefetch = Signal(N_PORTS) # output
- self.int_mhf_full = Signal() # output
- self.int_axaddr_o = Signal() # output
- self.int_axid_o = Signal() # output
- self.int_axlen_o = Signal() # output
- self.int_axuser_o = Signal() # output
- self.port1_addr = Signal() # input
- self.port1_id = Signal() # input
- self.port1_len = Signal() # input
- self.port1_size = Signal() # input
- self.port1_addr_valid = Signal(N_PORTS) # input
- self.port1_type = Signal(N_PORTS) # input
- self.port1_user = Signal() # input
- self.port1_sent = Signal(N_PORTS) # input
- self.port1_out_addr = Signal() # output
- self.port1_cache_coherent = Signal(N_PORTS) # output
- self.port1_accept = Signal(N_PORTS) # output
- self.port1_drop = Signal(N_PORTS) # output
- self.port1_miss = Signal(N_PORTS) # output
- self.port2_addr = Signal() # input
- self.port2_id = Signal() # input
- self.port2_len = Signal() # input
- self.port2_size = Signal() # input
- self.port2_addr_valid = Signal(N_PORTS) # input
- self.port2_type = Signal(N_PORTS) # input
- self.port2_user = Signal() # input
- self.port2_sent = Signal(N_PORTS) # input
- self.port2_out_addr = Signal() # output
- self.port2_cache_coherent = Signal(N_PORTS) # output
- self.port2_accept = Signal(N_PORTS) # output
- self.port2_drop = Signal(N_PORTS) # output
- self.port2_miss = Signal(N_PORTS) # output
- self.miss_l2_i = Signal(N_PORTS) # input
- self.miss_l2_addr_i = Signal() # input
- self.miss_l2_id_i = Signal() # input
- self.miss_l2_user_i = Signal() # input
- self.wdata_l2_o = Signal() # output
- self.waddr_l2_o = Signal() # output
- self.wren_l2_o = Signal(N_PORTS) # output
-
- def elaborate(self, platform=None):
- m = Module()
- return m
-
-
-"""
-
-
- // ███████╗██╗ ██████╗ ███╗ ██╗ █████╗ ██╗ ███████╗
- // ██╔════╝██║██╔════╝ ████╗ ██║██╔══██╗██║ ██╔════╝
- // ███████╗██║██║ ███╗██╔██╗ ██║███████║██║ ███████╗
- // ╚════██║██║██║ ██║██║╚██╗██║██╔══██║██║ ╚════██║
- // ███████║██║╚██████╔╝██║ ╚████║██║ ██║███████╗███████║
- // ╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚══════╝
- // signals
-
- localparam integer ENABLE_L2TLB[N_PORTS-1:0] = `EN_L2TLB_ARRAY;
-
- localparam integer N_SLICES[N_PORTS-1:0] = `N_SLICES_ARRAY;
- localparam N_SLICES_TOT = `MY_ARRAY_SUM(N_SLICES,N_PORTS);
- localparam N_SLICES_MAX = `N_SLICES_MAX;
-
- localparam N_REGS = 4*N_SLICES_TOT + 4;
- localparam AXI_SIZE_WIDTH = log2(AXI_DATA_WIDTH/8);
-
- localparam PORT_ID_WIDTH = (N_PORTS < 2) ? 1 : log2(N_PORTS);
- localparam MISS_META_WIDTH = PORT_ID_WIDTH + AXI_USER_WIDTH + AXI_ID_WIDTH;
-
- logic [N_PORTS-1:0] [15:0] p1_burst_size;
- logic [N_PORTS-1:0] [15:0] p2_burst_size;
-
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p1_align_addr;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p2_align_addr;
-
- logic [N_PORTS-1:0] [AXI_SIZE_WIDTH-1:0] p1_mask;
- logic [N_PORTS-1:0] [AXI_SIZE_WIDTH-1:0] p2_mask;
-
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p1_max_addr;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p2_max_addr;
-
- logic [N_PORTS-1:0] p1_prefetch;
- logic [N_PORTS-1:0] p2_prefetch;
-
- logic [N_PORTS-1:0] int_rw;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_addr_min;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_addr_max;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_id;
- logic [N_PORTS-1:0] [7:0] int_len;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_user;
-
- logic [N_PORTS-1:0] hit;
- logic [N_PORTS-1:0] prot;
- logic [N_PORTS-1:0] prefetch;
-
- logic [N_PORTS-1:0] no_hit;
- logic [N_PORTS-1:0] no_prot;
-
- logic [N_PORTS-1:0] [N_SLICES_MAX-1:0] hit_slices;
- logic [N_PORTS-1:0] [N_SLICES_MAX-1:0] prot_slices;
-
- logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] out_addr;
- logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] out_addr_reg;
-
- logic [N_PORTS-1:0] cache_coherent;
- logic [N_PORTS-1:0] cache_coherent_reg;
-
- logic [N_PORTS-1:0] select;
- reg [N_PORTS-1:0] curr_priority;
-
- reg [N_PORTS-1:0] multi_hit;
-
- logic [N_PORTS-1:0] miss_valid_mhf;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] miss_addr_mhf;
- logic [N_PORTS-1:0] [MISS_META_WIDTH-1:0] miss_meta_mhf;
-
- logic [N_REGS-1:0] [63:0] int_cfg_regs;
- logic [N_PORTS-1:0] [4*N_SLICES_MAX-1:0] [63:0] int_cfg_regs_slices;
-
- logic L1AllowMultiHit_S;
-
- genvar z;
-
- // █████╗ ███████╗███████╗██╗ ██████╗ ███╗ ██╗███╗ ███╗███████╗███╗ ██╗████████╗███████╗
- // ██╔══██╗██╔════╝██╔════╝██║██╔════╝ ████╗ ██║████╗ ████║██╔════╝████╗ ██║╚══██╔══╝██╔════╝
- // ███████║███████╗███████╗██║██║ ███╗██╔██╗ ██║██╔████╔██║█████╗ ██╔██╗ ██║ ██║ ███████╗
- // ██╔══██║╚════██║╚════██║██║██║ ██║██║╚██╗██║██║╚██╔╝██║██╔══╝ ██║╚██╗██║ ██║ ╚════██║
- // ██║ ██║███████║███████║██║╚██████╔╝██║ ╚████║██║ ╚═╝ ██║███████╗██║ ╚████║ ██║ ███████║
- // ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚═╝ ╚═══╝ ╚═╝ ╚══════╝
- // assignments
-
- always_comb
- begin : PORT_SELECT
- var integer idx;
-
- for (idx=0; idx<N_PORTS; idx++) begin
-
- // select = 1 -> port1 active
- // select = 0 -> port2 active
- select[idx] = (curr_priority[idx] & port1_addr_valid[idx]) | ~port2_addr_valid[idx];
-
- p1_burst_size[idx] = (port1_len[idx] + 1) << port1_size[idx];
- p2_burst_size[idx] = (port2_len[idx] + 1) << port2_size[idx];
-
- // align min addr for max addr computation to allow for smart AXI bursts around the 4k boundary
- if (port1_size[idx] == 3'b001)
- p1_mask[idx] = 3'b110;
- else if (port1_size[idx] == 3'b010)
- p1_mask[idx] = 3'b100;
- else if (port1_size[idx] == 3'b011)
- p1_mask[idx] = 3'b000;
- else
- p1_mask[idx] = 3'b111;
-
- p1_align_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH] = port1_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH];
- p1_align_addr[idx][AXI_SIZE_WIDTH-1:0] = port1_addr[idx][AXI_SIZE_WIDTH-1:0] & p1_mask[idx];
-
- if (port2_size[idx] == 3'b001)
- p2_mask[idx] = 3'b110;
- else if (port2_size[idx] == 3'b010)
- p2_mask[idx] = 3'b100;
- else if (port2_size[idx] == 3'b011)
- p2_mask[idx] = 3'b000;
- else
- p2_mask[idx] = 3'b111;
-
- if (port1_user[idx] == {AXI_USER_WIDTH{1'b1}})
- p1_prefetch[idx] = 1'b1;
- else
- p1_prefetch[idx] = 1'b0;
-
- if (port2_user[idx] == {AXI_USER_WIDTH{1'b1}})
- p2_prefetch[idx] = 1'b1;
- else
- p2_prefetch[idx] = 1'b0;
-
- p2_align_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH] = port2_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH];
- p2_align_addr[idx][AXI_SIZE_WIDTH-1:0] = port2_addr[idx][AXI_SIZE_WIDTH-1:0] & p2_mask[idx];
-
- p1_max_addr[idx] = p1_align_addr[idx] + p1_burst_size[idx] - 1;
- p2_max_addr[idx] = p2_align_addr[idx] + p2_burst_size[idx] - 1;
-
- int_addr_min[idx] = select[idx] ? port1_addr[idx] : port2_addr[idx];
- int_addr_max[idx] = select[idx] ? p1_max_addr[idx] : p2_max_addr[idx];
- int_rw[idx] = select[idx] ? port1_type[idx] : port2_type[idx];
- int_id[idx] = select[idx] ? port1_id[idx] : port2_id[idx];
- int_len[idx] = select[idx] ? port1_len[idx] : port2_len[idx];
- int_user[idx] = select[idx] ? port1_user[idx] : port2_user[idx];
- prefetch[idx] = select[idx] ? p1_prefetch[idx] : p2_prefetch[idx];
-
- hit [idx] = | hit_slices [idx];
- prot[idx] = | prot_slices[idx];
-
- no_hit [idx] = ~hit [idx];
- no_prot[idx] = ~prot[idx];
-
- port1_out_addr[idx] = out_addr_reg[idx];
- port2_out_addr[idx] = out_addr_reg[idx];
-
- port1_cache_coherent[idx] = cache_coherent_reg[idx];
- port2_cache_coherent[idx] = cache_coherent_reg[idx];
- end
- end
-
- always_comb
- begin
- var integer idx_port, idx_slice;
- var integer reg_num;
- reg_num=0;
- for ( idx_port = 0; idx_port < N_PORTS; idx_port++ ) begin
- for ( idx_slice = 0; idx_slice < 4*N_SLICES[idx_port]; idx_slice++ ) begin
- int_cfg_regs_slices[idx_port][idx_slice] = int_cfg_regs[4+reg_num];
- reg_num++;
- end
- // int_cfg_regs_slices[idx_port][N_SLICES_MAX:N_SLICES[idx_port]] will be dangling
- // Fix to zero. Synthesis will remove these signals.
- // int_cfg_regs_slices[idx_port][4*N_SLICES_MAX-1:4*N_SLICES[idx_port]] = 0;
- end
- end
-
- always @(posedge Clk_CI or negedge Rst_RBI)
- begin : PORT_PRIORITY
- var integer idx;
- if (Rst_RBI == 1'b0)
- curr_priority = 'h0;
- else begin
- for (idx=0; idx<N_PORTS; idx++) begin
- if (port1_accept[idx] || port1_drop[idx])
- curr_priority[idx] = 1'b1;
- else if (port2_accept[idx] || port2_drop[idx])
- curr_priority[idx] = 1'b0;
- end
- end
- end
-
- // find port that misses
- logic [PORT_ID_WIDTH-1:0] PortIdx_D; // index of the first missing port
- var integer idx_miss;
- always_comb begin : MHF_PORT_SELECT
- PortIdx_D = 'b0;
- for (idx_miss = 0; idx_miss < N_PORTS; idx_miss++) begin
- if (miss_valid_mhf[idx_miss] == 1'b1) begin
- PortIdx_D = idx_miss;
- break;
- end
- end
- end // always_comb begin
-
- // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ██████╗███████╗ ██████╗
- // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔════╝██╔════╝
- // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ █████╗ ██║ ███╗
- // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██╔══╝ ██║ ██║
- // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ╚██████╗██║ ╚██████╔╝
- // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝╚═╝ ╚═════╝
- axi_rab_cfg
- #(
- .N_PORTS ( N_PORTS ),
- .N_REGS ( N_REGS ),
- .N_L2_SETS ( N_L2_SETS ),
- .N_L2_SET_ENTRIES( N_L2_SET_ENTRIES ),
- .ADDR_WIDTH_PHYS ( AXI_M_ADDR_WIDTH ),
- .ADDR_WIDTH_VIRT ( AXI_S_ADDR_WIDTH ),
- .N_FLAGS ( 4 ),
- .AXI_DATA_WIDTH ( AXI_LITE_DATA_WIDTH ),
- .AXI_ADDR_WIDTH ( AXI_LITE_ADDR_WIDTH ),
- .MISS_META_WIDTH ( MISS_META_WIDTH ),
- .MH_FIFO_DEPTH ( MH_FIFO_DEPTH )
- )
- u_axi_rab_cfg
- (
- .Clk_CI ( Clk_CI ),
- .Rst_RBI ( Rst_RBI ),
- .s_axi_awaddr ( s_axi_awaddr ),
- .s_axi_awvalid ( s_axi_awvalid ),
- .s_axi_wdata ( s_axi_wdata ),
- .s_axi_wstrb ( s_axi_wstrb ),
- .s_axi_wvalid ( s_axi_wvalid ),
- .s_axi_bready ( s_axi_bready ),
- .s_axi_araddr ( s_axi_araddr ),
- .s_axi_arvalid ( s_axi_arvalid ),
- .s_axi_rready ( s_axi_rready ),
- .s_axi_arready ( s_axi_arready ),
- .s_axi_rdata ( s_axi_rdata ),
- .s_axi_rresp ( s_axi_rresp ),
- .s_axi_rvalid ( s_axi_rvalid ),
- .s_axi_wready ( s_axi_wready ),
- .s_axi_bresp ( s_axi_bresp ),
- .s_axi_bvalid ( s_axi_bvalid ),
- .s_axi_awready ( s_axi_awready ),
- .L1Cfg_DO ( int_cfg_regs ),
- .L1AllowMultiHit_SO ( L1AllowMultiHit_S ),
- .MissAddr_DI ( miss_addr_mhf[PortIdx_D] ),
- .MissMeta_DI ( miss_meta_mhf[PortIdx_D] ),
- .Miss_SI ( miss_valid_mhf[PortIdx_D] ),
- .MhFifoFull_SO ( int_mhf_full ),
- .wdata_l2 ( wdata_l2_o ),
- .waddr_l2 ( waddr_l2_o ),
- .wren_l2 ( wren_l2_o )
- );
-
- generate for (z = 0; z < N_PORTS; z++) begin : MHF_TLB_SELECT
- if (ENABLE_L2TLB[z] == 1) begin // L2 TLB is enabled
- assign miss_valid_mhf[z] = miss_l2_i[z];
- assign miss_addr_mhf[z] = miss_l2_addr_i[z];
- assign miss_meta_mhf[z] = {miss_l2_user_i[z], PortIdx_D, miss_l2_id_i[z]};
- end else begin// L2 TLB is disabled
- assign miss_valid_mhf[z] = int_miss[z];
- assign miss_addr_mhf[z] = int_addr_min[z];
- assign miss_meta_mhf[z] = {int_user[z], PortIdx_D, int_id[z]};
- end
- end
- endgenerate
-
- // ███████╗██╗ ██╗ ██████╗███████╗ ████████╗ ██████╗ ██████╗
- // ██╔════╝██║ ██║██╔════╝██╔════╝ ╚══██╔══╝██╔═══██╗██╔══██╗
- // ███████╗██║ ██║██║ █████╗ ██║ ██║ ██║██████╔╝
- // ╚════██║██║ ██║██║ ██╔══╝ ██║ ██║ ██║██╔═══╝
- // ███████║███████╗██║╚██████╗███████╗ ██║ ╚██████╔╝██║
- // ╚══════╝╚══════╝╚═╝ ╚═════╝╚══════╝ ╚═╝ ╚═════╝ ╚═╝
- generate for (z = 0; z < N_PORTS; z++) begin : SLICE_TOP_GEN
- slice_top
- #(
- .N_SLICES ( N_SLICES[z] ),
- .N_REGS ( 4*N_SLICES[z] ),
- .ADDR_WIDTH_PHYS ( AXI_M_ADDR_WIDTH ),
- .ADDR_WIDTH_VIRT ( AXI_S_ADDR_WIDTH )
- )
- u_slice_top
- (
- .int_cfg_regs ( int_cfg_regs_slices[z][4*N_SLICES[z]-1:0] ),
- .int_rw ( int_rw[z] ),
- .int_addr_min ( int_addr_min[z] ),
- .int_addr_max ( int_addr_max[z] ),
- .multi_hit_allow ( L1AllowMultiHit_S ),
- .multi_hit ( multi_hit[z] ),
- .prot ( prot_slices[z][N_SLICES[z]-1:0] ),
- .hit ( hit_slices [z][N_SLICES[z]-1:0] ),
- .cache_coherent ( cache_coherent[z] ),
- .out_addr ( out_addr[z] )
- );
- // hit_slices [N_SLICES_MAX-1:N_SLICES_MAX-N_SLICES[z]] will be dangling
- // prot_slices[N_SLICES_MAX-1:N_SLICES_MAX-N_SLICES[z]] will be dangling
- // Fix to zero. Synthesis will remove these signals.
- if ( N_SLICES[z] < N_SLICES_MAX ) begin
- assign hit_slices [z][N_SLICES_MAX-1:N_SLICES[z]] = 0;
- assign prot_slices[z][N_SLICES_MAX-1:N_SLICES[z]] = 0;
- end
- end // for (z = 0; z < N_PORTS; z++)
- endgenerate
-
- // ███████╗███████╗███╗ ███╗
- // ██╔════╝██╔════╝████╗ ████║
- // █████╗ ███████╗██╔████╔██║
- // ██╔══╝ ╚════██║██║╚██╔╝██║
- // ██║ ███████║██║ ╚═╝ ██║
- // ╚═╝ ╚══════╝╚═╝ ╚═╝
- //
- generate for (z = 0; z < N_PORTS; z++) begin : FSM_GEN
- fsm
- #(
- .AXI_M_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ),
- .AXI_S_ADDR_WIDTH ( AXI_S_ADDR_WIDTH ),
- .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
- .AXI_USER_WIDTH ( AXI_USER_WIDTH )
- )
- u_fsm
- (
- .Clk_CI ( Clk_CI ),
- .Rst_RBI ( Rst_RBI ),
- .port1_addr_valid_i ( port1_addr_valid[z] ),
- .port2_addr_valid_i ( port2_addr_valid[z] ),
- .port1_sent_i ( port1_sent[z] ),
- .port2_sent_i ( port2_sent[z] ),
- .select_i ( select[z] ),
- .no_hit_i ( no_hit[z] ),
- .multi_hit_i ( multi_hit[z] ),
- .no_prot_i ( no_prot[z] ),
- .prefetch_i ( prefetch[z] ),
- .out_addr_i ( out_addr[z] ),
- .cache_coherent_i ( cache_coherent[z] ),
- .port1_accept_o ( port1_accept[z] ),
- .port1_drop_o ( port1_drop[z] ),
- .port1_miss_o ( port1_miss[z] ),
- .port2_accept_o ( port2_accept[z] ),
- .port2_drop_o ( port2_drop[z] ),
- .port2_miss_o ( port2_miss[z] ),
- .out_addr_o ( out_addr_reg[z] ),
- .cache_coherent_o ( cache_coherent_reg[z] ),
- .miss_o ( int_miss[z] ),
- .multi_o ( int_multi[z] ),
- .prot_o ( int_prot[z] ),
- .prefetch_o ( int_prefetch[z] ),
- .in_addr_i ( int_addr_min[z] ),
- .in_id_i ( int_id[z] ),
- .in_len_i ( int_len[z] ),
- .in_user_i ( int_user[z] ),
- .in_addr_o ( int_axaddr_o[z] ),
- .in_id_o ( int_axid_o[z] ),
- .in_len_o ( int_axlen_o[z] ),
- .in_user_o ( int_axuser_o[z] )
- );
- end
- endgenerate
-
-"""
+++ /dev/null
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# module rab_slice
-# #(
-# parameter ADDR_WIDTH_PHYS = 40,
-# parameter ADDR_WIDTH_VIRT = 32
-# )
-# (
-# input logic [ADDR_WIDTH_VIRT-1:0] cfg_min,
-# input logic [ADDR_WIDTH_VIRT-1:0] cfg_max,
-# input logic [ADDR_WIDTH_PHYS-1:0] cfg_offset,
-# input logic cfg_wen,
-# input logic cfg_ren,
-# input logic cfg_en,
-# input logic in_trans_type,
-# input logic [ADDR_WIDTH_VIRT-1:0] in_addr_min,
-# input logic [ADDR_WIDTH_VIRT-1:0] in_addr_max,
-# output logic out_hit,
-# output logic out_prot,
-# output logic [ADDR_WIDTH_PHYS-1:0] out_addr
-# );
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-
-
-class rab_slice(Elaboratable):
-
- def __init__(self, params): # pass config object
- # TODO parameters
- self.params = params
- self.cfg_min = Signal(params.ADDR_WIDTH_VIRT) # input
- self.cfg_max = Signal(params.ADDR_WIDTH_VIRT) # input
- self.cfg_offset = Signal(params.ADDR_WIDTH_PHYS) # input
- self.cfg_wen = Signal() # input
- self.cfg_ren = Signal() # input
- self.cfg_en = Signal() # input
- self.in_trans_type = Signal() # input
- self.in_addr_min = Signal(params.ADDR_WIDTH_VIRT) # input
- self.in_addr_max = Signal(params.ADDR_WIDTH_VIRT) # input
- self.out_hit = Signal() # output
- self.out_prot = Signal() # output
- self.out_addr = Signal(params.ADDR_WIDTH_PHYS) # output
-
- def elaborate(self, platform=None):
- m = Module()
- min_above_min = Signal()
- min_below_max = Signal()
- max_below_max = Signal()
-
- # assign min_above_min = (in_addr_min >= cfg_min) ? 1'b1 : 1'b0;
- # assign min_below_max = (in_addr_min <= cfg_max) ? 1'b1 : 1'b0;
- # assign max_below_max = (in_addr_max <= cfg_max) ? 1'b1 : 1'b0;
- # assign out_hit = cfg_en & min_above_min & min_below_max & max_below_max;
- # assign out_prot = out_hit & ((in_trans_type & ~cfg_wen) | (~in_trans_type & ~cfg_ren));
- # assign out_addr = in_addr_min - cfg_min + cfg_offset;
- m.d.comb += [
- min_above_min.eq(self.in_addr_min >= self.cfg_min),
- min_below_max.eq(self.in_addr_min <= self.cfg_max),
- max_below_max.eq(self.in_addr_max <= self.cfg_max),
- self.out_hit.eq(self.cfg_en & min_above_min &
- min_below_max & max_below_max),
- self.out_prot.eq(self.out_hit & (
- (self.in_trans_type & ~self.cfg_wen) | (~self.in_trans_type & ~self.cfg_ren))),
- self.out_addr.eq(self.in_addr_min - self.cfg_min + self.cfg_offset)
- ]
-
- return m
+++ /dev/null
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# /*
-# * ram_tp_no_change
-# *
-# * This code implements a parameterizable two-port memory. Port 0 can read and
-# * write while Port 1 can read only. The Xilinx tools will infer a BRAM with
-# * Port 0 in "no change" mode, i.e., during a write, it retains the last read
-# * value on the output. Port 1 (read-only) is in "write first" mode. Still, it
-# * outputs the old data during the write cycle. Note: Port 1 outputs invalid
-# * data in the cycle after the write when reading the same address.
-# *
-# * For more information, see Xilinx PG058 Block Memory Generator Product Guide.
-# */
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-from nmigen import Memory
-
-import math
-
-#
-# module ram_tp_no_change
-# #(
-ADDR_WIDTH = 10
-DATA_WIDTH = 36
-# )
-# (
-# input clk,
-# input we,
-# input [ADDR_WIDTH-1:0] addr0,
-# input [ADDR_WIDTH-1:0] addr1,
-# input [DATA_WIDTH-1:0] d_i,
-# output [DATA_WIDTH-1:0] d0_o,
-# output [DATA_WIDTH-1:0] d1_o
-# );
-
-
-class ram_tp_no_change(Elaboratable):
-
- def __init__(self):
- self.we = Signal() # input
- self.addr0 = Signal(ADDR_WIDTH) # input
- self.addr1 = Signal(ADDR_WIDTH) # input
- self.d_i = Signal(DATA_WIDTH) # input
- self.d0_o = Signal(DATA_WIDTH) # output
- self.d1_o = Signal(DATA_WIDTH) # output
-
- DEPTH = int(math.pow(2, ADDR_WIDTH))
- self.ram = Memory(width=DATA_WIDTH, depth=DEPTH)
- #
- # localparam DEPTH = 2**ADDR_WIDTH;
- #
- # (* ram_style = "block" *) reg [DATA_WIDTH-1:0] ram[DEPTH];
- # reg [DATA_WIDTH-1:0] d0;
- # reg [DATA_WIDTH-1:0] d1;
- #
- # always_ff @(posedge clk) begin
- # if(we == 1'b1) begin
- # ram[addr0] <= d_i;
- # end else begin
- # only change data if we==false
- # d0 <= ram[addr0];
- # end
- # d1 <= ram[addr1];
- # end
- #
- # assign d0_o = d0;
- # assign d1_o = d1;
- #
-
- def elaborate(self, platform=None):
- m = Module()
- m.submodules.read_ram0 = read_ram0 = self.ram.read_port()
- m.submodules.read_ram1 = read_ram1 = self.ram.read_port()
- m.submodules.write_ram = write_ram = self.ram.write_port()
-
- # write port
- m.d.comb += write_ram.en.eq(self.we)
- m.d.comb += write_ram.addr.eq(self.addr0)
- m.d.comb += write_ram.data.eq(self.d_i)
-
- # read ports
- m.d.comb += read_ram0.addr.eq(self.addr0)
- m.d.comb += read_ram1.addr.eq(self.addr1)
- with m.If(self.we == 0):
- m.d.sync += self.d0_o.eq(read_ram0.data)
- m.d.sync += self.d1_o.eq(read_ram1.data)
-
- return m
+++ /dev/null
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-#
-# /*
-# * ram_tp_write_first
-# *
-# * This code implements a parameterizable two-port memory. Port 0 can read and
-# * write while Port 1 can read only. Xilinx Vivado will infer a BRAM in
-# * "write first" mode, i.e., upon a read and write to the same address, the
-# * new value is read. Note: Port 1 outputs invalid data in the cycle after
-# * the write when reading the same address.
-# *
-# * For more information, see Xilinx PG058 Block Memory Generator Product Guide.
-# */
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-from nmigen import Memory
-
-import math
-#
-# module ram_tp_write_first
-# #(
-ADDR_WIDTH = 10
-DATA_WIDTH = 36
-# )
-# (
-# input clk,
-# input we,
-# input [ADDR_WIDTH-1:0] addr0,
-# input [ADDR_WIDTH-1:0] addr1,
-# input [DATA_WIDTH-1:0] d_i,
-# output [DATA_WIDTH-1:0] d0_o,
-# output [DATA_WIDTH-1:0] d1_o
-# );
-
-
-class ram_tp_write_first(Elaboratable):
-
- def __init__(self):
- self.we = Signal() # input
- self.addr0 = Signal(ADDR_WIDTH) # input
- self.addr1 = Signal(ADDR_WIDTH) # input
- self.d_i = Signal(DATA_WIDTH) # input
- self.d0_o = Signal(DATA_WIDTH) # output
- self.d1_o = Signal(DATA_WIDTH) # output
-
- DEPTH = int(math.pow(2, ADDR_WIDTH))
- self.ram = Memory(width=DATA_WIDTH, depth=DEPTH)
-
- #
- # localparam DEPTH = 2**ADDR_WIDTH;
- #
- # (* ram_style = "block" *) reg [DATA_WIDTH-1:0] ram[DEPTH];
- # reg [ADDR_WIDTH-1:0] raddr0;
- # reg [ADDR_WIDTH-1:0] raddr1;
- #
- # always_ff @(posedge clk) begin
- # if(we == 1'b1) begin
- # ram[addr0] <= d_i;
- # end
- # raddr0 <= addr0;
- # raddr1 <= addr1;
- # end
- #
- # assign d0_o = ram[raddr0];
- # assign d1_o = ram[raddr1];
- #
-
- def elaborate(self, platform=None):
- m = Module()
- m.submodules.read_ram0 = read_ram0 = self.ram.read_port()
- m.submodules.read_ram1 = read_ram1 = self.ram.read_port()
- m.submodules.write_ram = write_ram = self.ram.write_port()
-
- # write port
- m.d.comb += write_ram.en.eq(self.we)
- m.d.comb += write_ram.addr.eq(self.addr0)
- m.d.comb += write_ram.data.eq(self.d_i)
-
- # read ports
- m.d.comb += read_ram0.addr.eq(self.addr0)
- m.d.comb += read_ram1.addr.eq(self.addr1)
- m.d.sync += self.d0_o.eq(read_ram0.data)
- m.d.sync += self.d1_o.eq(read_ram1.data)
-
- return m
+++ /dev/null
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-
-# this file has been generated by sv2nmigen
-
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-import rab_slice
-import coreconfig
-
-#
-# module slice_top
-# //#(
-# // parameter N_SLICES = 16,
-# // parameter N_REGS = 4*N_SLICES,
-# // parameter ADDR_WIDTH_PHYS = 40,
-# // parameter ADDR_WIDTH_VIRT = 32
-# // )
-# (
-# input logic [N_REGS-1:0] [63:0] int_cfg_regs,
-# input logic int_rw,
-# input logic [ADDR_WIDTH_VIRT-1:0] int_addr_min,
-# input logic [ADDR_WIDTH_VIRT-1:0] int_addr_max,
-# input logic multi_hit_allow,
-# output logic multi_hit,
-# output logic [N_SLICES-1:0] prot,
-# output logic [N_SLICES-1:0] hit,
-# output logic cache_coherent,
-# output logic [ADDR_WIDTH_PHYS-1:0] out_addr
-# );
-#
-
-
-class slice_top(Elaboratable):
-
- def __init__(self):
- # FIXME self.int_cfg_regs = Signal() # input
- self.params = coreconfig.CoreConfig() # rename ?
- self.int_rw = Signal() # input
- self.int_addr_min = Signal(self.params.ADDR_WIDTH_VIRT) # input
- self.int_addr_max = Signal(self.params.ADDR_WIDTH_VIRT) # input
- self.multi_hit_allow = Signal() # input
- self.multi_hit = Signal() # output
- self.prot = Signal(self.params.N_SLICES) # output
- self.hit = Signal(self.params.N_SLICES) # output
- self.cache_coherent = Signal() # output
- self.out_addr = Signal(self.params.ADDR_WIDTH_PHYS) # output
-
- def elaborate(self, platform=None):
- m = Module()
-
- first_hit = Signal()
-
- for i in range(self.params.N_SLICES):
- # TODO pass params / core config here
- u_slice = rab_slice.rab_slice(self.params)
- setattr(m.submodules, "u_slice%d" % i, u_slice)
- # TODO set param and connect ports
-
- # In case of a multi hit, the lowest slice with a hit is selected.
- # TODO always_comb begin : HIT_CHECK
- m.d.comb += [
- first_hit.eq(0),
- self.multi_hit.eq(0),
- self.out_addr.eq(0),
- self.cache_coherent.eq(0)]
-
- for j in range(self.params.N_SLICES):
- with m.If(self.hit[j] == 1):
- with m.If(first_hit == 1):
- with m.If(self.multi_hit_allow == 0):
- m.d.comb += [self.multi_hit.eq(1)]
- with m.Elif(first_hit == 1):
- m.d.comb += [first_hit.eq(1)
- # only output first slice that was hit
- # SV self.out_addr.eq(slice_out_addr[ADDR_WIDTH_PHYS*j + : ADDR_WIDTH_PHYS]),
- # SV self.cache_coherent.eq(int_cfg_regs[4*j+3][3]),
- ]
- return m
-
- # TODO translate generate statement
-
-
-"""
- logic [ADDR_WIDTH_PHYS*N_SLICES-1:0] slice_out_addr;
-
- generate
- for ( i=0; i<N_SLICES; i++ )
- begin
- rab_slice
- #(
- .ADDR_WIDTH_PHYS ( ADDR_WIDTH_PHYS ),
- .ADDR_WIDTH_VIRT ( ADDR_WIDTH_VIRT )
- )
- u_slice
- (
- .cfg_min ( int_cfg_regs[4*i] [ADDR_WIDTH_VIRT-1:0] ),
- .cfg_max ( int_cfg_regs[4*i+1][ADDR_WIDTH_VIRT-1:0] ),
- .cfg_offset ( int_cfg_regs[4*i+2][ADDR_WIDTH_PHYS-1:0] ),
- .cfg_wen ( int_cfg_regs[4*i+3][2] ),
- .cfg_ren ( int_cfg_regs[4*i+3][1] ),
- .cfg_en ( int_cfg_regs[4*i+3][0] ),
- .in_trans_type ( int_rw ),
- .in_addr_min ( int_addr_min ),
- .in_addr_max ( int_addr_max ),
- .out_addr ( slice_out_addr[ADDR_WIDTH_PHYS*i+ADDR_WIDTH_PHYS-1:ADDR_WIDTH_PHYS*i] ),
- .out_prot ( prot[i] ),
- .out_hit ( hit[i] )
- );
- end
- endgenerate
-
- // In case of a multi hit, the lowest slice with a hit is selected.
- always_comb begin : HIT_CHECK
- first_hit = 0;
- multi_hit = 0;
- out_addr = '0;
- cache_coherent = 0;
- for (j = 0; j < N_SLICES; j++) begin
- if (hit[j] == 1'b1) begin
- if (first_hit == 1'b1) begin
- if (multi_hit_allow == 1'b0) begin
- multi_hit = 1'b1;
- end
- end else begin
- first_hit = 1'b1;
- out_addr = slice_out_addr[ADDR_WIDTH_PHYS*j +: ADDR_WIDTH_PHYS];
- cache_coherent = int_cfg_regs[4*j+3][3];
- end
- end
- end
- end
-"""
-
-# sv 2 migen: TODO add translate code for generate statements and for loops inside always_comb
+++ /dev/null
-from ram_tp_write_first import ram_tp_write_first
-from nmigen.compat.sim import run_simulation
-import sys
-sys.path.append("../")
-
-
-def tbench(dut):
- yield dut.we.eq(1)
- for i in range(0, 255):
- yield dut.addr0.eq(i)
- yield dut.d_i.eq(i)
- yield
-
-
-if __name__ == "__main__":
- dut = ram_tp_write_first()
- run_simulation(dut, tbench(dut), vcd_name="ram_tp_write_first.vcd")
- print("ram_tp_write_first Unit Test Success")
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-import sys
-sys.path.append("../")
-# sys.path.append("../../../TestUtil")
-from slice_top import slice_top
-
-def tbench(dut):
- yield
-
-
-if __name__ == "__main__":
- dut = slice_top()
- run_simulation(dut, tbench(dut), vcd_name="test_slice_top.vcd")
- print("slice_top Unit Test Success")
+++ /dev/null
-from soc.decoder.power_enums import (Function, Form, InternalOp,
- In1Sel, In2Sel, In3Sel, OutSel,
- RC, LdstLen, CryIn, get_csv,
- single_bit_flags,
- get_signal_name, default_values)
-import math
-
-
-class MemorySim:
- def __init__(self, bytes_per_word=8):
- self.mem = {}
- self.bytes_per_word = bytes_per_word
- self.word_log2 = math.ceil(math.log2(bytes_per_word))
-
- def _get_shifter_mask(self, width, remainder):
- shifter = ((self.bytes_per_word - width) - remainder) * \
- 8 # bits per byte
- mask = (1 << (width * 8)) - 1
- return shifter, mask
-
- # TODO: Implement ld/st of lesser width
- def ld(self, address, width=8):
- remainder = address & (self.bytes_per_word - 1)
- address = address >> self.word_log2
- assert remainder & (width - 1) == 0, "Unaligned access unsupported!"
- if address in self.mem:
- val = self.mem[address]
- else:
- val = 0
-
- if width != self.bytes_per_word:
- shifter, mask = self._get_shifter_mask(width, remainder)
- val = val & (mask << shifter)
- val >>= shifter
- print("Read {:x} from addr {:x}".format(val, address))
- return val
-
- def st(self, address, value, width=8):
- remainder = address & (self.bytes_per_word - 1)
- address = address >> self.word_log2
- assert remainder & (width - 1) == 0, "Unaligned access unsupported!"
- print("Writing {:x} to addr {:x}".format(value, address))
- if width != self.bytes_per_word:
- if address in self.mem:
- val = self.mem[address]
- else:
- val = 0
- shifter, mask = self._get_shifter_mask(width, remainder)
- val &= ~(mask << shifter)
- val |= value << shifter
- self.mem[address] = val
- else:
- self.mem[address] = value
-
-
-class RegFile:
- def __init__(self):
- self.regfile = [0] * 32
- self.sprs = {}
-
- def write_reg(self, regnum, value):
- all1s = (1 << 64)-1 # 64 bits worth of 1s
- value &= all1s
- print("Writing {:x} to reg r{}".format(value, regnum))
- self.regfile[regnum] = value
-
- def read_reg(self, regnum):
- val = self.regfile[regnum]
- print("Read {:x} from reg r{}".format(val, regnum))
- return val
-
- def assert_gpr(self, gpr, val):
- reg_val = self.read_reg(gpr)
- msg = "reg r{} got {:x}, expecting {:x}".format(
- gpr, reg_val, val)
- assert reg_val == val, msg
-
- def assert_gprs(self, gprs):
- for k, v in list(gprs.items()):
- self.assert_gpr(k, v)
-
- def set_xer(self, result, operanda, operandb):
- xer = 0
- if result & 1 << 64:
- xer |= XER.CA
-
- self.xer = xer
-
-
-class InternalOpSimulator:
- def __init__(self):
- self.mem_sim = MemorySim()
- self.regfile = RegFile()
-
- def execute_alu_op(self, op1, op2, internal_op, carry=0):
- print(internal_op)
- if internal_op == InternalOp.OP_ADD.value:
- return op1 + op2 + carry
- elif internal_op == InternalOp.OP_AND.value:
- return op1 & op2
- elif internal_op == InternalOp.OP_OR.value:
- return op1 | op2
- elif internal_op == InternalOp.OP_MUL_L64.value:
- return op1 * op2
- else:
- assert False, "Not implemented"
-
- def update_cr0(self, result):
- if result == 0:
- self.cr0 = 0b001
- elif result >> 63:
- self.cr0 = 0b100
- else:
- self.cr0 = 0b010
- print("update_cr0", self.cr0)
-
- def alu_op(self, pdecode2):
- all1s = (1 << 64)-1 # 64 bits worth of 1s
- internal_op = yield pdecode2.dec.op.internal_op
- operand1 = 0
- operand2 = 0
- result = 0
- carry = 0
- r1_ok = yield pdecode2.e.read_reg1.ok
- r2_ok = yield pdecode2.e.read_reg2.ok
- r3_ok = yield pdecode2.e.read_reg3.ok
- imm_ok = yield pdecode2.e.imm_data.ok
- if r1_ok:
- r1_sel = yield pdecode2.e.read_reg1.data
- operand1 = self.regfile.read_reg(r1_sel)
- elif r3_ok:
- r3_sel = yield pdecode2.e.read_reg3.data
- operand1 = self.regfile.read_reg(r3_sel)
- if r2_ok:
- r2_sel = yield pdecode2.e.read_reg2.data
- operand2 = self.regfile.read_reg(r2_sel)
- if imm_ok:
- operand2 = yield pdecode2.e.imm_data.data
-
- inv_a = yield pdecode2.dec.op.inv_a
- if inv_a:
- operand1 = (~operand1) & all1s
-
- cry_in = yield pdecode2.dec.op.cry_in
- if cry_in == CryIn.ONE.value:
- carry = 1
- elif cry_in == CryIn.CA.value:
- carry = self.carry_out
-
- # TODO rc_sel = yield pdecode2.dec.op.rc_sel
- result = self.execute_alu_op(operand1, operand2, internal_op,
- carry=carry)
-
- cry_out = yield pdecode2.dec.op.cry_out
- rc = yield pdecode2.e.rc.data
-
- if rc:
- self.update_cr0(result)
- if cry_out == 1:
- self.carry_out = (result >> 64)
- print("setting carry_out", self.carry_out)
-
- ro_ok = yield pdecode2.e.write_reg.ok
- if ro_ok:
- ro_sel = yield pdecode2.e.write_reg.data
- self.regfile.write_reg(ro_sel, result)
-
- def mem_op(self, pdecode2):
- internal_op = yield pdecode2.dec.op.internal_op
- addr_reg = yield pdecode2.e.read_reg1.data
- addr = self.regfile.read_reg(addr_reg)
-
- imm_ok = yield pdecode2.e.imm_data.ok
- r2_ok = yield pdecode2.e.read_reg2.ok
- width = yield pdecode2.e.data_len
- if imm_ok:
- imm = yield pdecode2.e.imm_data.data
- addr += imm
- elif r2_ok:
- r2_sel = yield pdecode2.e.read_reg2.data
- addr += self.regfile.read_reg(r2_sel)
- if internal_op == InternalOp.OP_STORE.value:
- val_reg = yield pdecode2.e.read_reg3.data
- val = self.regfile.read_reg(val_reg)
- self.mem_sim.st(addr, val, width)
- elif internal_op == InternalOp.OP_LOAD.value:
- dest_reg = yield pdecode2.e.write_reg.data
- val = self.mem_sim.ld(addr, width)
- self.regfile.write_reg(dest_reg, val)
-
- def execute_op(self, pdecode2):
- function = yield pdecode2.dec.op.function_unit
- if function == Function.ALU.value:
- yield from self.alu_op(pdecode2)
- elif function == Function.LDST.value:
- yield from self.mem_op(pdecode2)
--- /dev/null
+*.wpr
+__pycache__
--- /dev/null
+from nmigen import Module, Signal, Elaboratable
+from nmigen.lib.coding import Encoder, PriorityEncoder
+
+
+class AddressEncoder(Elaboratable):
+ """Address Encoder
+
+ The purpose of this module is to take in a vector and
+ encode the bits that are one hot into an address. This module
+ combines both nmigen's Encoder and PriorityEncoder and will state
+ whether the input line has a single bit hot, multiple bits hot,
+ or no bits hot. The output line will always have the lowest value
+ address output.
+
+ Usage:
+ The output is valid when either single or multiple match is high.
+ Otherwise output is 0.
+ """
+
+ def __init__(self, width):
+ """ Arguments:
+ * width: The desired length of the input vector
+ """
+ # Internal
+ self.encoder = Encoder(width)
+ self.p_encoder = PriorityEncoder(width)
+
+ # Input
+ self.i = Signal(width)
+
+ # Output
+ self.single_match = Signal(1)
+ self.multiple_match = Signal(1)
+ self.o = Signal(range(width))
+
+ def elaborate(self, platform=None):
+ m = Module()
+
+ # Add internal submodules
+ m.submodules.encoder = self.encoder
+ m.submodules.p_encoder = self.p_encoder
+
+ m.d.comb += [
+ self.encoder.i.eq(self.i),
+ self.p_encoder.i.eq(self.i)
+ ]
+
+ # Steps:
+ # 1. check if the input vector is non-zero
+ # 2. if non-zero, check if single match or multiple match
+ # 3. set output line to be lowest value address output
+
+ # If the priority encoder recieves an input of 0
+ # If n is 1 then the output is not valid
+ with m.If(self.p_encoder.n):
+ m.d.comb += [
+ self.single_match.eq(0),
+ self.multiple_match.eq(0),
+ self.o.eq(0)
+ ]
+ # If the priority encoder recieves an input > 0
+ with m.Else():
+ # Multiple Match if encoder n is invalid
+ with m.If(self.encoder.n):
+ m.d.comb += [
+ self.single_match.eq(0),
+ self.multiple_match.eq(1)
+ ]
+ # Single Match if encoder n is valid
+ with m.Else():
+ m.d.comb += [
+ self.single_match.eq(1),
+ self.multiple_match.eq(0)
+ ]
+ # Always set output based on priority encoder output
+ m.d.comb += self.o.eq(self.p_encoder.o)
+ return m
--- /dev/null
+from nmigen import Array, Cat, Module, Signal, Elaboratable
+from nmigen.lib.coding import Decoder
+from nmigen.cli import main # , verilog
+
+from .CamEntry import CamEntry
+from .AddressEncoder import AddressEncoder
+
+
+class Cam(Elaboratable):
+ """ Content Addressable Memory (CAM)
+
+ The purpose of this module is to quickly look up whether an
+ entry exists given a data key.
+ This module will search for the given data in all internal entries
+ and output whether a single or multiple match was found.
+ If an single entry is found the address be returned and single_match
+ is set HIGH. If multiple entries are found the lowest address is
+ returned and multiple_match is set HIGH. If neither single_match or
+ multiple_match are HIGH this implies no match was found. To write
+ to the CAM set the address bus to the desired entry and set write_enable
+ HIGH. Entry managment should be performed one level above this block
+ as lookup is performed within.
+
+ Notes:
+ The read and write operations take one clock cycle to complete.
+ Currently the read_warning line is present for interfacing but
+ is not necessary for this design. This module is capable of writing
+ in the first cycle, reading on the second, and output the correct
+ address on the third.
+ """
+
+ def __init__(self, data_size, cam_size):
+ """ Arguments:
+ * data_size: (bits) The bit size of the data
+ * cam_size: (number) The number of entries in the CAM
+ """
+
+ # Internal
+ self.cam_size = cam_size
+ self.encoder = AddressEncoder(cam_size)
+ self.decoder = Decoder(cam_size)
+ self.entry_array = Array(CamEntry(data_size) for x in range(cam_size))
+
+ # Input
+ self.enable = Signal(1)
+ self.write_enable = Signal(1)
+ self.data_in = Signal(data_size) # The data to be written
+ self.data_mask = Signal(data_size) # mask for ternary writes
+ # address of CAM Entry to write
+ self.address_in = Signal(range(cam_size))
+
+ # Output
+ self.read_warning = Signal(1) # High when a read interrupts a write
+ self.single_match = Signal(1) # High when there is only one match
+ self.multiple_match = Signal(1) # High when there at least two matches
+ # The lowest address matched
+ self.match_address = Signal(range(cam_size))
+
+ def elaborate(self, platform=None):
+ m = Module()
+ # AddressEncoder for match types and output address
+ m.submodules.AddressEncoder = self.encoder
+ # Decoder is used to select which entry will be written to
+ m.submodules.Decoder = self.decoder
+ # CamEntry Array Submodules
+ # Note these area added anonymously
+ entry_array = self.entry_array
+ m.submodules += entry_array
+
+ # Decoder logic
+ m.d.comb += [
+ self.decoder.i.eq(self.address_in),
+ self.decoder.n.eq(0)
+ ]
+
+ encoder_vector = []
+ with m.If(self.enable):
+ # Set the key value for every CamEntry
+ for index in range(self.cam_size):
+
+ # Write Operation
+ with m.If(self.write_enable):
+ with m.If(self.decoder.o[index]):
+ m.d.comb += entry_array[index].command.eq(2)
+ with m.Else():
+ m.d.comb += entry_array[index].command.eq(0)
+
+ # Read Operation
+ with m.Else():
+ m.d.comb += entry_array[index].command.eq(1)
+
+ # Send data input to all entries
+ m.d.comb += entry_array[index].data_in.eq(self.data_in)
+ # Send all entry matches to encoder
+ ematch = entry_array[index].match
+ encoder_vector.append(ematch)
+
+ # Give input to and accept output from encoder module
+ m.d.comb += [
+ self.encoder.i.eq(Cat(*encoder_vector)),
+ self.single_match.eq(self.encoder.single_match),
+ self.multiple_match.eq(self.encoder.multiple_match),
+ self.match_address.eq(self.encoder.o)
+ ]
+
+ # If the CAM is not enabled set all outputs to 0
+ with m.Else():
+ m.d.comb += [
+ self.read_warning.eq(0),
+ self.single_match.eq(0),
+ self.multiple_match.eq(0),
+ self.match_address.eq(0)
+ ]
+
+ return m
+
+ def ports(self):
+ return [self.enable, self.write_enable,
+ self.data_in, self.data_mask,
+ self.read_warning, self.single_match,
+ self.multiple_match, self.match_address]
+
+
+if __name__ == '__main__':
+ cam = Cam(4, 4)
+ main(cam, ports=cam.ports())
--- /dev/null
+from nmigen import Module, Signal, Elaboratable
+
+
+class CamEntry(Elaboratable):
+ """ Content Addressable Memory (CAM) Entry
+
+ The purpose of this module is to represent an entry within a CAM.
+ This module when given a read command will compare the given data
+ and output whether a match was found or not. When given a write
+ command it will write the given data into internal registers.
+ """
+
+ def __init__(self, data_size):
+ """ Arguments:
+ * data_size: (bit count) The size of the data
+ """
+ # Input
+ self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset
+ self.data_in = Signal(data_size) # Data input when writing
+
+ # Output
+ self.match = Signal(1) # Result of the internal/input key comparison
+ self.data = Signal(data_size)
+
+ def elaborate(self, platform=None):
+ m = Module()
+ with m.Switch(self.command):
+ with m.Case("00"):
+ m.d.sync += self.match.eq(0)
+ with m.Case("01"):
+ with m.If(self.data == self.data_in):
+ m.d.sync += self.match.eq(1)
+ with m.Else():
+ m.d.sync += self.match.eq(0)
+ with m.Case("10"):
+ m.d.sync += [
+ self.data.eq(self.data_in),
+ self.match.eq(0)
+ ]
+ with m.Case():
+ m.d.sync += [
+ self.match.eq(0),
+ self.data.eq(0)
+ ]
+
+ return m
--- /dev/null
+# SPDX-License-Identifier: LGPL-2.1-or-later
+# See Notices.txt for copyright information
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+from nmigen.cli import verilog, rtlil
+
+
+class LFSRPolynomial(set):
+ """ implements a polynomial for use in LFSR
+ """
+ def __init__(self, exponents=()):
+ for e in exponents:
+ assert isinstance(e, int), TypeError("%s must be an int" % repr(e))
+ assert (e >= 0), ValueError("%d must not be negative" % e)
+ set.__init__(self, set(exponents).union({0})) # must contain zero
+
+ @property
+ def max_exponent(self):
+ return max(self) # derived from set, so this returns the max exponent
+
+ @property
+ def exponents(self):
+ exponents = list(self) # get elements of set as a list
+ exponents.sort(reverse=True)
+ return exponents
+
+ def __str__(self):
+ expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2)
+ retval = map(lambda i: expd[min(i,2)].format(i), self.exponents)
+ return " + ".join(retval)
+
+ def __repr__(self):
+ return "LFSRPolynomial(%s)" % self.exponents
+
+
+# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs # noqa
+LFSR_POLY_2 = LFSRPolynomial([2, 1, 0])
+LFSR_POLY_3 = LFSRPolynomial([3, 2, 0])
+LFSR_POLY_4 = LFSRPolynomial([4, 3, 0])
+LFSR_POLY_5 = LFSRPolynomial([5, 3, 0])
+LFSR_POLY_6 = LFSRPolynomial([6, 5, 0])
+LFSR_POLY_7 = LFSRPolynomial([7, 6, 0])
+LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0])
+LFSR_POLY_9 = LFSRPolynomial([9, 5, 0])
+LFSR_POLY_10 = LFSRPolynomial([10, 7, 0])
+LFSR_POLY_11 = LFSRPolynomial([11, 9, 0])
+LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0])
+LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0])
+LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0])
+LFSR_POLY_15 = LFSRPolynomial([15, 14, 0])
+LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0])
+LFSR_POLY_17 = LFSRPolynomial([17, 14, 0])
+LFSR_POLY_18 = LFSRPolynomial([18, 11, 0])
+LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0])
+LFSR_POLY_20 = LFSRPolynomial([20, 17, 0])
+LFSR_POLY_21 = LFSRPolynomial([21, 19, 0])
+LFSR_POLY_22 = LFSRPolynomial([22, 21, 0])
+LFSR_POLY_23 = LFSRPolynomial([23, 18, 0])
+LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0])
+
+
+class LFSR(LFSRPolynomial, Elaboratable):
+ """ implements a Linear Feedback Shift Register
+ """
+ def __init__(self, polynomial):
+ """ Inputs:
+ ------
+ :polynomial: the polynomial to feedback on. may be a LFSRPolynomial
+ instance or an iterable of ints (list/tuple/generator)
+ :enable: enable (set LO to disable. NOTE: defaults to HI)
+
+ Outputs:
+ -------
+ :state: the LFSR state. bitwidth is taken from the polynomial
+ maximum exponent.
+
+ Note: if an LFSRPolynomial is passed in as the input, because
+ LFSRPolynomial is derived from set() it's ok:
+ LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p)
+ """
+ LFSRPolynomial.__init__(self, polynomial)
+ self.state = Signal(self.max_exponent, reset=1)
+ self.enable = Signal(reset=1)
+
+ def elaborate(self, platform):
+ m = Module()
+ # do absolutely nothing if the polynomial is empty (always has a zero)
+ if self.max_exponent <= 1:
+ return m
+
+ # create XOR-bunch, select bits from state based on exponent
+ feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain)
+ for exponent in self:
+ if exponent > 0: # don't have to skip, saves CPU cycles though
+ feedback ^= self.state[exponent - 1]
+
+ # if enabled, shift-and-feedback
+ with m.If(self.enable):
+ # shift up lower bits by Cat'ing in a new bit zero (feedback)
+ newstate = Cat(feedback, self.state[:-1])
+ m.d.sync += self.state.eq(newstate)
+
+ return m
+
+
+# example: Poly24
+if __name__ == '__main__':
+ p24 = rtlil.convert(LFSR(LFSR_POLY_24))
+ with open("lfsr2_p24.il", "w") as f:
+ f.write(p24)
--- /dev/null
+# SPDX-License-Identifier: LGPL-2.1-or-later
+# See Notices.txt for copyright information
+from nmigen import Module
+from typing import Iterable, Optional, Iterator, Any, Union
+from typing_extensions import final
+
+
+@final
+class LFSRPolynomial(set):
+ def __init__(self, exponents: Iterable[int] = ()):
+ def elements() -> Iterable[int]: ...
+ @property
+ def exponents(self) -> list[int]: ...
+ def __str__(self) -> str: ...
+ def __repr__(self) -> str: ...
+
+
+@final
+class LFSR:
+ def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ...
+ @property
+ def width(self) -> int: ...
+ def elaborate(self, platform: Any) -> Module: ...
--- /dev/null
+verilog:
+ python3 Cam.py generate -t v > Cam.v
--- /dev/null
+from nmigen import Cat, Memory, Module, Signal, Elaboratable
+from nmigen.cli import main
+from nmigen.cli import verilog, rtlil
+
+
+class MemorySet(Elaboratable):
+ def __init__(self, data_size, tag_size, set_count, active):
+ self.active = active
+ input_size = tag_size + data_size # Size of the input data
+ memory_width = input_size + 1 # The width of the cache memory
+ self.active = active
+ self.data_size = data_size
+ self.tag_size = tag_size
+
+ # XXX TODO, use rd-enable and wr-enable?
+ self.mem = Memory(width=memory_width, depth=set_count)
+ self.r = self.mem.read_port()
+ self.w = self.mem.write_port()
+
+ # inputs (address)
+ self.cset = Signal(range(set_count)) # The set to be checked
+ self.tag = Signal(tag_size) # The tag to find
+ self.data_i = Signal(data_size) # Incoming data
+
+ # outputs
+ self.valid = Signal()
+ self.data_o = Signal(data_size) # Outgoing data (excludes tag)
+
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.mem = self.mem
+ m.submodules.r = self.r
+ m.submodules.w = self.w
+
+ # temporaries
+ active_bit = Signal()
+ tag_valid = Signal()
+ data_start = self.active + 1
+ data_end = data_start + self.data_size
+ tag_start = data_end
+ tag_end = tag_start + self.tag_size
+
+ # connect the read port address to the set/entry
+ read_port = self.r
+ m.d.comb += read_port.addr.eq(self.cset)
+ # Pull out active bit from data
+ data = read_port.data
+ m.d.comb += active_bit.eq(data[self.active])
+ # Validate given tag vs stored tag
+ tag = data[tag_start:tag_end]
+ m.d.comb += tag_valid.eq(self.tag == tag)
+ # An entry is only valid if the tags match AND
+ # is marked as a valid entry
+ m.d.comb += self.valid.eq(tag_valid & active_bit)
+
+ # output data: TODO, check rd-enable?
+ m.d.comb += self.data_o.eq(data[data_start:data_end])
+
+ # connect the write port addr to the set/entry (only if write enabled)
+ # (which is only done on a match, see SAC.write_entry below)
+ write_port = self.w
+ with m.If(write_port.en):
+ m.d.comb += write_port.addr.eq(self.cset)
+ m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag))
+
+ return m
--- /dev/null
+from nmigen import Module, Signal, Elaboratable
+from nmigen.cli import main
+
+from soc.TLB.PteEntry import PteEntry
+
+
+class PermissionValidator(Elaboratable):
+ """ The purpose of this Module is to check the Permissions of a given PTE
+ against the requested access permissions.
+
+ This module will either validate (by setting the valid bit HIGH)
+ the request or find a permission fault and invalidate (by setting
+ the valid bit LOW) the request
+ """
+
+ def __init__(self, asid_size, pte_size):
+ """ Arguments:
+ * asid_size: (bit count) The size of the asid to be processed
+ * pte_size: (bit count) The size of the pte to be processed
+
+ Return:
+ * valid HIGH when permissions are correct
+ """
+ # Internal
+ self.pte_entry = PteEntry(asid_size, pte_size)
+
+ # Input
+ self.data = Signal(asid_size + pte_size)
+ self.xwr = Signal(3) # Execute, Write, Read
+ self.super_mode = Signal(1) # Supervisor Mode
+ self.super_access = Signal(1) # Supervisor Access
+ self.asid = Signal(15) # Address Space IDentifier (ASID)
+
+ # Output
+ self.valid = Signal(1) # Denotes if the permissions are correct
+
+ def elaborate(self, platform=None):
+ m = Module()
+
+ m.submodules.pte_entry = self.pte_entry
+
+ m.d.comb += self.pte_entry.i.eq(self.data)
+
+ # Check if the entry is valid
+ with m.If(self.pte_entry.v):
+ # ASID match or Global Permission
+ # Note that the MSB bound is exclusive
+ with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g):
+ # Check Execute, Write, Read (XWR) Permissions
+ with m.If(self.pte_entry.xwr == self.xwr):
+ # Supervisor Logic
+ with m.If(self.super_mode):
+ # Valid if entry is not in user mode or supervisor
+ # has Supervisor User Memory (SUM) access via the
+ # SUM bit in the sstatus register
+ m.d.comb += self.valid.eq((~self.pte_entry.u)
+ | self.super_access)
+ # User logic
+ with m.Else():
+ # Valid if the entry is in user mode only
+ m.d.comb += self.valid.eq(self.pte_entry.u)
+ with m.Else():
+ m.d.comb += self.valid.eq(0)
+ with m.Else():
+ m.d.comb += self.valid.eq(0)
+ with m.Else():
+ m.d.comb += self.valid.eq(0)
+ return m
--- /dev/null
+from nmigen import Module, Signal, Elaboratable
+from nmigen.cli import main
+
+
+class PteEntry(Elaboratable):
+ """ The purpose of this Module is to centralize the parsing of Page
+ Table Entries (PTE) into one module to prevent common mistakes
+ and duplication of code. The control bits are parsed out for
+ ease of use.
+
+ This module parses according to the standard PTE given by the
+ Volume II: RISC-V Privileged Architectures V1.10 Pg 60.
+ The Address Space IDentifier (ASID) is appended to the MSB of the input
+ and is parsed out as such.
+
+ An valid input Signal would be:
+ ASID PTE
+ Bits:[78-64][63-0]
+
+ The output PTE value will include the control bits.
+ """
+ def __init__(self, asid_size, pte_size):
+ """ Arguments:
+ * asid_size: (bit count) The size of the asid to be processed
+ * pte_size: (bit count) The size of the pte to be processed
+
+ Return:
+ * d The Dirty bit from the PTE portion of i
+ * a The Accessed bit from the PTE portion of i
+ * g The Global bit from the PTE portion of i
+ * u The User Mode bit from the PTE portion of i
+ * xwr The Execute/Write/Read bit from the PTE portion of i
+ * v The Valid bit from the PTE portion of i
+ * asid The asid portion of i
+ * pte The pte portion of i
+ """
+ # Internal
+ self.asid_start = pte_size
+ self.asid_end = pte_size + asid_size
+
+ # Input
+ self.i = Signal(asid_size + pte_size)
+
+ # Output
+ self.d = Signal(1) # Dirty bit (From pte)
+ self.a = Signal(1) # Accessed bit (From pte)
+ self.g = Signal(1) # Global Access (From pte)
+ self.u = Signal(1) # User Mode (From pte)
+ self.xwr = Signal(3) # Execute Read Write (From pte)
+ self.v = Signal(1) # Valid (From pte)
+ self.asid = Signal(asid_size) # Associated Address Space IDentifier
+ self.pte = Signal(pte_size) # Full Page Table Entry
+
+ def elaborate(self, platform=None):
+ m = Module()
+ # Pull out all control bites from PTE
+ m.d.comb += [
+ self.d.eq(self.i[7]),
+ self.a.eq(self.i[6]),
+ self.g.eq(self.i[5]),
+ self.u.eq(self.i[4]),
+ self.xwr.eq(self.i[1:4]),
+ self.v.eq(self.i[0])
+ ]
+ m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end])
+ m.d.comb += self.pte.eq(self.i[0:self.asid_start])
+ return m
--- /dev/null
+"""
+
+Online simulator of 4-way set-associative cache:
+http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/sa4.html
+
+Python simulator of a N-way set-associative cache:
+https://github.com/vaskevich/CacheSim/blob/master/cachesim.py
+"""
+
+from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable
+from nmigen.compat.genlib import fsm
+from nmigen.cli import main
+from nmigen.cli import verilog, rtlil
+
+from .AddressEncoder import AddressEncoder
+from .MemorySet import MemorySet
+
+# TODO: use a LFSR that advances continuously and picking the bottom
+# few bits from it to select which cache line to replace, instead of PLRU
+# http://bugs.libre-riscv.org/show_bug.cgi?id=71
+from .ariane.plru import PLRU
+from .LFSR import LFSR, LFSR_POLY_24
+
+SA_NA = "00" # no action (none)
+SA_RD = "01" # read
+SA_WR = "10" # write
+
+
+class SetAssociativeCache(Elaboratable):
+ """ Set Associative Cache Memory
+
+ The purpose of this module is to generate a memory cache given the
+ constraints passed in. This will create a n-way set associative cache.
+ It is expected for the SV TLB that the VMA will provide the set number
+ while the ASID provides the tag (still to be decided).
+
+ """
+
+ def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False):
+ """ Arguments
+ * tag_size (bits): The bit count of the tag
+ * data_size (bits): The bit count of the data to be stored
+ * set_count (number): The number of sets/entries in the cache
+ * way_count (number): The number of slots a data can be stored
+ in one set
+ * lfsr: if set, use an LFSR for (pseudo-randomly) selecting
+ set/entry to write to. otherwise, use a PLRU
+ """
+ # Internals
+ self.lfsr_mode = lfsr
+ self.way_count = way_count # The number of slots in one set
+ self.tag_size = tag_size # The bit count of the tag
+ self.data_size = data_size # The bit count of the data to be stored
+
+ # set up Memory array
+ self.mem_array = Array() # memory array
+ for i in range(way_count):
+ ms = MemorySet(data_size, tag_size, set_count, active=0)
+ self.mem_array.append(ms)
+
+ # Finds valid entries
+ self.encoder = AddressEncoder(way_count)
+
+ # setup PLRU or LFSR
+ if lfsr:
+ # LFSR mode
+ self.lfsr = LFSR(LFSR_POLY_24)
+ else:
+ # PLRU mode
+ # One block to handle plru calculations
+ self.plru = PLRU(way_count)
+ self.plru_array = Array() # PLRU data on each set
+ for i in range(set_count):
+ name = "plru%d" % i
+ self.plru_array.append(Signal(self.plru.TLBSZ, name=name))
+
+ # Input
+ self.enable = Signal(1) # Whether the cache is enabled
+ self.command = Signal(2) # 00=None, 01=Read, 10=Write (see SA_XX)
+ self.cset = Signal(range(set_count)) # The set to be checked
+ self.tag = Signal(tag_size) # The tag to find
+ self.data_i = Signal(data_size) # The input data
+
+ # Output
+ self.ready = Signal(1) # 0 => Processing 1 => Ready for commands
+ self.hit = Signal(1) # Tag matched one way in the given set
+ # Tag matched many ways in the given set
+ self.multiple_hit = Signal(1)
+ self.data_o = Signal(data_size) # The data linked to the matched tag
+
+ def check_tags(self, m):
+ """ Validate the tags in the selected set. If one and only one
+ tag matches set its state to zero and increment all others
+ by one. We only advance to next state if a single hit is found.
+ """
+ # Vector to store way valid results
+ # A zero denotes a way is invalid
+ valid_vector = []
+ # Loop through memory to prep read/write ports and set valid_vector
+ for i in range(self.way_count):
+ valid_vector.append(self.mem_array[i].valid)
+
+ # Pass encoder the valid vector
+ m.d.comb += self.encoder.i.eq(Cat(*valid_vector))
+
+ # Only one entry should be marked
+ # This is due to already verifying the tags
+ # matched and the valid bit is high
+ with m.If(self.hit):
+ m.next = "FINISHED_READ"
+ # Pull out data from the read port
+ data = self.mem_array[self.encoder.o].data_o
+ m.d.comb += self.data_o.eq(data)
+ if not self.lfsr_mode:
+ self.access_plru(m)
+
+ # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k
+ with m.Elif(self.multiple_hit):
+ # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
+ m.d.comb += self.data_o.eq(0)
+
+ # No tag matches means no data
+ with m.Else():
+ # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
+ m.d.comb += self.data_o.eq(0)
+
+ def access_plru(self, m):
+ """ An entry was accessed and the plru tree must now be updated
+ """
+ # Pull out the set's entry being edited
+ plru_entry = self.plru_array[self.cset]
+ m.d.comb += [
+ # Set the plru data to the current state
+ self.plru.plru_tree.eq(plru_entry),
+ # Set that the cache was accessed
+ self.plru.lu_access_i.eq(1)
+ ]
+
+ def read(self, m):
+ """ Go through the read process of the cache.
+ This takes two cycles to complete. First it checks for a valid tag
+ and secondly it updates the LRU values.
+ """
+ with m.FSM() as fsm_read:
+ with m.State("READY"):
+ m.d.comb += self.ready.eq(0)
+ # check_tags will set the state if the conditions are met
+ self.check_tags(m)
+ with m.State("FINISHED_READ"):
+ m.next = "READY"
+ m.d.comb += self.ready.eq(1)
+ if not self.lfsr_mode:
+ plru_tree_o = self.plru.plru_tree_o
+ m.d.sync += self.plru_array[self.cset].eq(plru_tree_o)
+
+ def write_entry(self, m):
+ if not self.lfsr_mode:
+ m.d.comb += [ # set cset (mem address) into PLRU
+ self.plru.plru_tree.eq(self.plru_array[self.cset]),
+ # and connect plru to encoder for write
+ self.encoder.i.eq(self.plru.replace_en_o)
+ ]
+ write_port = self.mem_array[self.encoder.o].w
+ else:
+ # use the LFSR to generate a random(ish) one of the mem array
+ lfsr_output = Signal(range(self.way_count))
+ lfsr_random = Signal(range(self.way_count))
+ m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits
+ # address too big, limit to range of array
+ m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count,
+ lfsr_output - self.way_count,
+ lfsr_output))
+ write_port = self.mem_array[lfsr_random].w
+
+ # then if there is a match from the encoder, enable the selected write
+ with m.If(self.encoder.single_match):
+ m.d.comb += write_port.en.eq(1)
+
+ def write(self, m):
+ """ Go through the write process of the cache.
+ This takes two cycles to complete. First it writes the entry,
+ and secondly it updates the PLRU (in plru mode)
+ """
+ with m.FSM() as fsm_write:
+ with m.State("READY"):
+ m.d.comb += self.ready.eq(0)
+ self.write_entry(m)
+ m.next = "FINISHED_WRITE"
+ with m.State("FINISHED_WRITE"):
+ m.d.comb += self.ready.eq(1)
+ if not self.lfsr_mode:
+ plru_entry = self.plru_array[self.cset]
+ m.d.sync += plru_entry.eq(self.plru.plru_tree_o)
+ m.next = "READY"
+
+ def elaborate(self, platform=None):
+ m = Module()
+
+ # ----
+ # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array
+ # ----
+
+ m.submodules.AddressEncoder = self.encoder
+ if self.lfsr_mode:
+ m.submodules.LFSR = self.lfsr
+ else:
+ m.submodules.PLRU = self.plru
+
+ for i, mem in enumerate(self.mem_array):
+ setattr(m.submodules, "mem%d" % i, mem)
+
+ # ----
+ # select mode: PLRU connect to encoder, LFSR do... something
+ # ----
+
+ if not self.lfsr_mode:
+ # Set what entry was hit
+ m.d.comb += self.plru.lu_hit.eq(self.encoder.o)
+ else:
+ # enable LFSR
+ m.d.comb += self.lfsr.enable.eq(self.enable)
+
+ # ----
+ # connect hit/multiple hit to encoder output
+ # ----
+
+ m.d.comb += [
+ self.hit.eq(self.encoder.single_match),
+ self.multiple_hit.eq(self.encoder.multiple_match),
+ ]
+
+ # ----
+ # connect incoming data/tag/cset(addr) to mem_array
+ # ----
+
+ for mem in self.mem_array:
+ write_port = mem.w
+ m.d.comb += [mem.cset.eq(self.cset),
+ mem.tag.eq(self.tag),
+ mem.data_i.eq(self.data_i),
+ write_port.en.eq(0), # default: disable write
+ ]
+ # ----
+ # Commands: READ/WRITE/TODO
+ # ----
+
+ with m.If(self.enable):
+ with m.Switch(self.command):
+ # Search all sets at a particular tag
+ with m.Case(SA_RD):
+ self.read(m)
+ with m.Case(SA_WR):
+ self.write(m)
+ # Maybe catch multiple tags write here?
+ # TODO
+ # TODO: invalidate/flush, flush-all?
+
+ return m
+
+ def ports(self):
+ return [self.enable, self.command, self.cset, self.tag, self.data_i,
+ self.ready, self.hit, self.multiple_hit, self.data_o]
+
+
+if __name__ == '__main__':
+ sac = SetAssociativeCache(4, 8, 4, 6)
+ vl = rtlil.convert(sac, ports=sac.ports())
+ with open("SetAssociativeCache.il", "w") as f:
+ f.write(vl)
+
+ sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True)
+ vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports())
+ with open("SetAssociativeCacheLFSR.il", "w") as f:
+ f.write(vl)
--- /dev/null
+""" TLB Module
+
+ The expected form of the data is:
+ * Item (Bits)
+ * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0)
+"""
+
+from nmigen import Memory, Module, Signal, Cat, Elaboratable
+from nmigen.cli import main
+
+from .PermissionValidator import PermissionValidator
+from .Cam import Cam
+
+
+class TLB(Elaboratable):
+ def __init__(self, asid_size, vma_size, pte_size, L1_size):
+ """ Arguments
+ * asid_size: Address Space IDentifier (ASID) typically 15 bits
+ * vma_size: Virtual Memory Address (VMA) typically 36 bits
+ * pte_size: Page Table Entry (PTE) typically 64 bits
+
+ Notes:
+ These arguments should represent the largest possible size
+ defined by the MODE settings. See
+ Volume II: RISC-V Privileged Architectures V1.10 Page 57
+ """
+
+ # Internal
+ self.state = 0
+ # L1 Cache Modules
+ self.cam_L1 = Cam(vma_size, L1_size)
+ self.mem_L1 = Memory(width=asid_size + pte_size, depth=L1_size)
+
+ # Permission Validator
+ self.perm_validator = PermissionValidator(asid_size, pte_size)
+
+ # Inputs
+ self.supermode = Signal(1) # Supervisor Mode
+ self.super_access = Signal(1) # Supervisor Access
+ # 00=None, 01=Search, 10=Write L1, 11=Write L2
+ self.command = Signal(2)
+ self.xwr = Signal(3) # Execute, Write, Read
+ self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64
+ self.address_L1 = Signal(range(L1_size))
+ self.asid = Signal(asid_size) # Address Space IDentifier (ASID)
+ self.vma = Signal(vma_size) # Virtual Memory Address (VMA)
+ self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE)
+
+ # Outputs
+ self.hit = Signal(1) # Denotes if the VMA had a mapped PTE
+ self.perm_valid = Signal(1) # Denotes if the permissions are correct
+ self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA
+
+ def search(self, m, read_L1, write_L1):
+ """ searches the TLB
+ """
+ m.d.comb += [
+ write_L1.en.eq(0),
+ self.cam_L1.write_enable.eq(0),
+ self.cam_L1.data_in.eq(self.vma)
+ ]
+ # Match found in L1 CAM
+ match_found = Signal(reset_less=True)
+ m.d.comb += match_found.eq(self.cam_L1.single_match
+ | self.cam_L1.multiple_match)
+ with m.If(match_found):
+ # Memory shortcut variables
+ mem_address = self.cam_L1.match_address
+ # Memory Logic
+ m.d.comb += read_L1.addr.eq(mem_address)
+ # Permission Validator Logic
+ m.d.comb += [
+ self.hit.eq(1),
+ # Set permission validator data to the correct
+ # register file data according to CAM match
+ # address
+ self.perm_validator.data.eq(read_L1.data),
+ # Execute, Read, Write
+ self.perm_validator.xwr.eq(self.xwr),
+ # Supervisor Mode
+ self.perm_validator.super_mode.eq(self.supermode),
+ # Supverisor Access
+ self.perm_validator.super_access.eq(self.super_access),
+ # Address Space IDentifier (ASID)
+ self.perm_validator.asid.eq(self.asid),
+ # Output result of permission validation
+ self.perm_valid.eq(self.perm_validator.valid)
+ ]
+ # Only output PTE if permissions are valid
+ with m.If(self.perm_validator.valid):
+ # XXX TODO - dummy for now
+ reg_data = Signal.like(self.pte_out)
+ m.d.comb += [
+ self.pte_out.eq(reg_data)
+ ]
+ with m.Else():
+ m.d.comb += [
+ self.pte_out.eq(0)
+ ]
+ # Miss Logic
+ with m.Else():
+ m.d.comb += [
+ self.hit.eq(0),
+ self.perm_valid.eq(0),
+ self.pte_out.eq(0)
+ ]
+
+ def write_l1(self, m, read_L1, write_L1):
+ """ writes to the L1 cache
+ """
+ # Memory_L1 Logic
+ m.d.comb += [
+ write_L1.en.eq(1),
+ write_L1.addr.eq(self.address_L1),
+ # The Cat places arguments from LSB -> MSB
+ write_L1.data.eq(Cat(self.pte_in, self.asid))
+ ]
+ # CAM_L1 Logic
+ m.d.comb += [
+ self.cam_L1.write_enable.eq(1),
+ self.cam_L1.data_in.eq(self.vma), # data_in is sent to all entries
+ # self.cam_L1.address_in.eq(todo) # a CAM entry needs to be selected
+
+ ]
+
+ def elaborate(self, platform):
+ m = Module()
+ # Add submodules
+ # Submodules for L1 Cache
+ m.submodules.cam_L1 = self.cam_L1
+ m.submodules.read_L1 = read_L1 = self.mem_L1.read_port()
+ m.submodules.write_L1 = write_L1 = self.mem_L1.write_port()
+
+ # Permission Validator Submodule
+ m.submodules.perm_valididator = self.perm_validator
+
+ # When MODE specifies translation
+ # TODO add in different bit length handling ie prefix 0s
+ tlb_enable = Signal(reset_less=True)
+ m.d.comb += tlb_enable.eq(self.mode != 0)
+
+ with m.If(tlb_enable):
+ m.d.comb += [
+ self.cam_L1.enable.eq(1)
+ ]
+ with m.Switch(self.command):
+ # Search
+ with m.Case("01"):
+ self.search(m, read_L1, write_L1)
+
+ # Write L1
+ # Expected that the miss will be handled in software
+ with m.Case("10"):
+ self.write_l1(m, read_L1, write_L1)
+
+ # TODO
+ # with m.Case("11"):
+
+ # When disabled
+ with m.Else():
+ m.d.comb += [
+ self.cam_L1.enable.eq(0),
+ # XXX TODO - self.reg_file.enable.eq(0),
+ self.hit.eq(0),
+ self.perm_valid.eq(0), # XXX TODO, check this
+ self.pte_out.eq(0)
+ ]
+ return m
+
+
+if __name__ == '__main__':
+ tlb = TLB(15, 36, 64, 4)
+ main(tlb, ports=[tlb.supermode, tlb.super_access, tlb.command,
+ tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid,
+ tlb.vma, tlb.pte_in,
+ tlb.hit, tlb.perm_valid, tlb.pte_out,
+ ] + tlb.cam_L1.ports())
--- /dev/null
+#include <cstdint>
+#include <iostream>
+#include <cmath>
+
+
+#define NWAY 4
+#define NLINE 256
+#define HIT 0
+#define MISS 1
+#define MS 1000
+/*
+Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing
+Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt
+four-way set associative - three bits
+ each bit represents one branch point in a binary decision tree; let 1
+ represent that the left side has been referenced more recently than the
+ right side, and 0 vice-versa
+ are all 4 lines valid?
+ / \
+ yes no, use an invalid line
+ |
+ |
+ |
+ bit_0 == 0? state | replace ref to | next state
+ / \ ------+-------- -------+-----------
+ y n 00x | line_0 line_0 | 11_
+ / \ 01x | line_1 line_1 | 10_
+ bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
+ / \ / \ 1x1 | line_3 line_3 | 0_0
+ y n y n
+ / \ / \ ('x' means ('_' means unchanged)
+ line_0 line_1 line_2 line_3 don't care)
+ 8-way set associative - 7 = 1+2+4 bits
+16-way set associative - 15 = 1+2+4+8 bits
+32-way set associative - 31 = 1+2+4+8+16 bits
+64-way set associative - 63 = 1+2+4+8+16+32 bits
+*/
+using namespace std;
+struct AddressField {
+ uint64_t wd_idx : 2;//Unused
+ uint64_t offset : 4;//Unused
+ uint64_t index : 8;//NLINE = 256 = 2^8
+ uint64_t tag : 50;
+};
+
+union Address {
+ uint32_t* p;
+ AddressField fields;
+};
+
+struct Cell {
+ bool v;
+ uint64_t tag;
+
+ Cell() : v(false), tag(0) {}
+
+ bool isHit(uint64_t tag) {
+ return v && (tag == this->tag);
+ }
+
+ void fetch(uint32_t* address) {
+ Address addr;
+ addr.p = address;
+ addr.fields.offset = 0;
+ addr.fields.wd_idx = 0;
+ tag = addr.fields.tag;
+ v = true;
+ }
+};
+
+ostream& operator<<(ostream & out, const Cell& cell) {
+ out << " v:" << cell.v << " tag:" << hex << cell.tag;
+ return out;
+}
+
+struct Block {
+ Cell cell[NWAY];
+ uint32_t state;
+ uint64_t *mask;//Mask the state to get accurate value for specified 1 bit.
+ uint64_t *value;
+ uint64_t *next_value;
+
+ Block() : state(0) {
+ switch (NWAY) {
+ case 4:
+ mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101};
+ value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101};
+ next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000};
+ break;
+ case 8:
+ mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001,
+ 0b1010001};
+ value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000,
+ 0b1010001};
+ next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000,
+ 0b0000001, 0b0000000};
+ break;
+ //TODO - more NWAY goes here.
+ default:
+ std::cout << "Error definition NWAY = " << NWAY << std::endl;
+ }
+ }
+
+ uint32_t *getByTag(uint64_t tag, uint32_t *pway) {
+ for (int i = 0; i < NWAY; ++i) {
+ if (cell[i].isHit(tag)) {
+ *pway = i;
+ return pway;
+ }
+ }
+ return NULL;
+ }
+
+ void setLRU(uint32_t *address) {
+ int way = 0;
+ uint32_t st = state;
+ for (int i = 0; i < NWAY; ++i) {
+ if ((state & mask[i]) == value[i]) {
+ state ^= mask[i];
+ way = i;
+ break;
+ }
+ }
+ cell[way].fetch(address);
+ cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl;
+ }
+
+ uint32_t *get(uint32_t *address, uint32_t *pway) {
+ Address addr;
+ addr.p = address;
+ uint32_t *d = getByTag(addr.fields.tag, pway);
+ if (d != NULL) {
+ return &d[addr.fields.offset];
+ }
+ return d;
+ }
+
+ int set(uint32_t *address) {
+ uint32_t way = 0;
+ uint32_t *p = get(address, &way);
+ if (p != NULL) {
+ printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state);
+ state &= ~mask[way];
+ printf("%X --> ", state);
+ state |= next_value[way];
+ printf("%X\n", state);
+ // *p = *address; //skip since address is fake.
+ return HIT;
+ } else {
+ setLRU(address);
+ return MISS;
+ }
+ }
+};
+
+ostream& operator<<(ostream & out, const Block& block) {
+ out << "state:" << block.state << " ";
+ for (int i = 0; i<NWAY; i++) {
+ out << block.cell[i];
+ }
+ return out;
+}
+
+struct Cache {
+ Block block[NLINE];
+ uint32_t count[2];
+ Cache() { count[HIT] = 0; count[MISS] = 0; }
+
+ void access(uint32_t* address) {
+ Address addr;
+ addr.p = address;
+ Block& b = block[addr.fields.index];
+ ++count[b.set(address)];
+ }
+
+};
+ostream& operator<<(ostream & out, const Cache& cache) {
+ out << "\n==Summary==\n\tHit: " << cache.count[HIT] << " Miss: " << cache.count[MISS] << std::endl;
+ for (int i = 0; i < NLINE; i++) {
+ out << cache.block[i] << endl;
+ }
+ return out;
+}
+
+Cache cache;
+void multiply(uint32_t* m1, uint32_t* m2, uint32_t* res)
+{
+ int x, i, j;
+ for (i = 0; i < MS; i++) {
+ for (j = 0; j < MS; j++) {
+ cache.access(res + i*MS +j);
+ for (x = 0; x < MS; x++) {
+ cache.access(m1 + i*MS + x);
+ cache.access(m2 + x*MS + j);
+ cache.access(res + i*MS +j);
+ // res[i][j] += m1[i][x] * m2[x][j];
+ cache.access(res + i*MS +j);
+ }
+ }
+ }
+}
+
+int main()
+{
+ uint32_t* m1 = (uint32_t*) 0xFACE00A000000000LL; // fake virtual address; don’t access it
+ uint32_t* m2 = (uint32_t*) 0xFACE00B000000000LL; // fake virtual address; don’t access it
+ uint32_t* res = (uint32_t*) 0xFACE00C000000000LL; // fake virtual address; don’t access it
+ multiply(m1, m2, res);
+ cout << cache << endl;
+ return 0;
+}
--- /dev/null
+from nmigen import Const
+
+INSTR_ADDR_MISALIGNED = Const(0, 64)
+INSTR_ACCESS_FAULT = Const(1, 64)
+ILLEGAL_INSTR = Const(2, 64)
+BREAKPOINT = Const(3, 64)
+LD_ADDR_MISALIGNED = Const(4, 64)
+LD_ACCESS_FAULT = Const(5, 64)
+ST_ADDR_MISALIGNED = Const(6, 64)
+ST_ACCESS_FAULT = Const(7, 64)
+ENV_CALL_UMODE = Const(8, 64) # environment call from user mode
+ENV_CALL_SMODE = Const(9, 64) # environment call from supervisor mode
+ENV_CALL_MMODE = Const(11, 64) # environment call from machine mode
+INSTR_PAGE_FAULT = Const(12, 64) # Instruction page fault
+LOAD_PAGE_FAULT = Const(13, 64) # Load page fault
+STORE_PAGE_FAULT = Const(15, 64) # Store page fault
--- /dev/null
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Author: Florian Zaruba, ETH Zurich
+# Date: 12.11.2017
+# Description: Handles cache misses.
+from nmigen.lib.coding import Encoder, PriorityEncoder
+
+
+# --------------
+# MISS Handler
+# --------------
+import ariane_pkg::*;
+import std_cache_pkg::*;
+
+unsigned NR_PORTS = 3
+
+class MissReq(RecordObject):
+ def __init__(self, name=None):
+ Record.__init__(self, name)
+ self.valid = Signal()
+ self.addr = Signal(64)
+ self.be = Signal(8)
+ self.size = Signal(2)
+ self.we = Signal()
+ self.wdata = Signal(64)
+ bypass = Signal()
+
+class CacheLine:
+ def __init__(self):
+ self.tag = Signal(DCACHE_TAG_WIDTH) # tag array
+ self.data = Signal(DCACHE_LINE_WIDTH) # data array
+ self.valid = Signal() # state array
+ self.dirty = Signal() # state array
+
+# cache line byte enable
+class CLBE:
+ def __init__(self):
+ self.tag = Signal(DCACHE_TAG_WIDTH+7)//8) # byte enable into tag array
+ self.data = Signal(DCACHE_LINE_WIDTH+7)//8) # byte enable data array
+ # bit enable into state array (valid for a pair of dirty/valid bits)
+ self.vldrty = Signal(DCACHE_SET_ASSOC)
+ } cl_be_t;
+
+
+
+ # FSM states
+"""
+ enum logic [3:0] {
+ IDLE, # 0
+ FLUSHING, # 1
+ FLUSH, # 2
+ WB_CACHELINE_FLUSH, # 3
+ FLUSH_REQ_STATUS, # 4
+ WB_CACHELINE_MISS, # 5
+ WAIT_GNT_SRAM, # 6
+ MISS, # 7
+ REQ_CACHELINE, # 8
+ MISS_REPL, # 9
+ SAVE_CACHELINE, # A
+ INIT, # B
+ AMO_LOAD, # C
+ AMO_SAVE_LOAD, # D
+ AMO_STORE # E
+ } state_d, state_q;
+"""
+
+class MissHandler(Elaboratable):
+ def __init__(self, NR_PORTS):
+ self.NR_PORTS = NR_PORTS
+ self.pwid = pwid = ceil(log(NR_PORTS) / log(2))
+ self.flush_i = Signal() # flush request
+ self.flush_ack_o = Signal() # acknowledge successful flush
+ self.miss_o = Signal()
+ self.busy_i = Signal() # dcache is busy with something
+
+ # Bypass or miss
+ self.miss_req_i = Array(MissReq(name="missreq") for i in range(NR_PORTS))
+ # Bypass handling
+ self.bypass_gnt_o = Signal(NR_PORTS)
+ self.bypass_valid_o = Signal(NR_PORTS)
+ self.bypass_data_o = Array(Signal(name="bdata_o", 64) \
+ for i in range(NR_PORTS))
+
+ # AXI port
+ output ariane_axi::req_t axi_bypass_o,
+ input ariane_axi::resp_t axi_bypass_i,
+
+ # Miss handling (~> cacheline refill)
+ self.miss_gnt_o = Signal(NR_PORTS)
+ self.active_serving_o = Signal(NR_PORTS)
+
+ self.critical_word_o = Signal(64)
+ self.critical_word_valid_o = Signal()
+ output ariane_axi::req_t axi_data_o,
+ input ariane_axi::resp_t axi_data_i,
+
+ self.mshr_addr_i = Array(Signal(name="bdata_o", 56) \
+ for i in range(NR_PORTS))
+ self.mshr_addr_matches_o = Signal(NR_PORTS)
+ self.mshr_index_matches_o = Signal(NR_PORTS)
+
+ # AMO
+ self.amo_req_i = AMOReq()
+ self.amo_resp_o = AMOResp()
+ # Port to SRAMs, for refill and eviction
+ self.req_o = Signal(DCACHE_SET_ASSOC)
+ self.addr_o = Signal(DCACHE_INDEX_WIDTH) # address into cache array
+ self.data_o = CacheLine()
+ self.be_o = CLBE()
+ self.data_i = Array(CacheLine() \
+ for i in range(DCACHE_SET_ASSOC))
+ self.we_o = Signal()
+
+ def elaborate(self, platform):
+ # Registers
+ mshr_t mshr_d, mshr_q;
+ logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q;
+ logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q;
+ # cache line to evict
+ cache_line_t evict_cl_d, evict_cl_q;
+
+ logic serve_amo_d, serve_amo_q;
+ # Request from one FSM
+ miss_req_valid = Signal(self.NR_PORTS)
+ miss_req_bypass = Signal(self.NR_PORTS)
+ miss_req_addr = Array(Signal(name="miss_req_addr", 64) \
+ for i in range(NR_PORTS))
+ miss_req_wdata = Array(Signal(name="miss_req_wdata", 64) \
+ for i in range(NR_PORTS))
+ miss_req_we = Signal(self.NR_PORTS)
+ miss_req_be = Array(Signal(name="miss_req_be", 8) \
+ for i in range(NR_PORTS))
+ miss_req_size = Array(Signal(name="miss_req_size", 2) \
+ for i in range(NR_PORTS))
+
+ # Cache Line Refill <-> AXI
+ req_fsm_miss_valid = Signal()
+ req_fsm_miss_addr = Signal(64)
+ req_fsm_miss_wdata = Signal(DCACHE_LINE_WIDTH)
+ req_fsm_miss_we = Signal()
+ req_fsm_miss_be = Signal(DCACHE_LINE_WIDTH//8)
+ ariane_axi::ad_req_t req_fsm_miss_req;
+ req_fsm_miss_size = Signal(2)
+
+ gnt_miss_fsm = Signal()
+ valid_miss_fsm = Signal()
+ nmiss = DCACHE_LINE_WIDTH//64
+ data_miss_fsm = Array(Signal(name="data_miss_fsm", 64) \
+ for i in range(nmiss))
+
+ # Cache Management <-> LFSR
+ lfsr_enable = Signal()
+ lfsr_oh = Signal(DCACHE_SET_ASSOC)
+ lfsr_bin = Signal($clog2(DCACHE_SET_ASSOC-1))
+ # AMOs
+ ariane_pkg::amo_t amo_op;
+ amo_operand_a = Signal(64)
+ amo_operand_b = Signal(64)
+ amo_result_o = Signal(64)
+
+ struct packed {
+ logic [63:3] address;
+ logic valid;
+ } reservation_d, reservation_q;
+
+ # ------------------------------
+ # Cache Management
+ # ------------------------------
+ evict_way = Signal(DCACHE_SET_ASSOC)
+ valid_way = Signal(DCACHE_SET_ASSOC)
+
+ for (i in range(DCACHE_SET_ASSOC):
+ comb += evict_way[i].eq(data_i[i].valid & data_i[i].dirty)
+ comb += valid_way[i].eq(data_i[i].valid)
+
+ # ----------------------
+ # Default Assignments
+ # ----------------------
+ # to AXI refill
+ req_fsm_miss_req = ariane_axi::CACHE_LINE_REQ;
+ req_fsm_miss_size = Const(0b11, 2)
+ # core
+ serve_amo_d = serve_amo_q;
+ # --------------------------------
+ # Flush and Miss operation
+ # --------------------------------
+ state_d = state_q;
+ cnt_d = cnt_q;
+ evict_way_d = evict_way_q;
+ evict_cl_d = evict_cl_q;
+ mshr_d = mshr_q;
+ # communicate to the requester which unit we are currently serving
+ active_serving_o[mshr_q.id] = mshr_q.valid;
+ # AMOs
+ # silence the unit when not used
+ amo_op = amo_req_i.amo_op;
+
+ reservation_d = reservation_q;
+ with m.FSM() as state_q:
+
+ with m.Case("IDLE"):
+ # lowest priority are AMOs, wait until everything else
+ # is served before going for the AMOs
+ with m.If (amo_req_i.req & ~busy_i):
+ # 1. Flush the cache
+ with m.If(~serve_amo_q):
+ m.next = "FLUSH_REQ_STATUS"
+ serve_amo_d.eq(0b1
+ cnt_d.eq(0
+ # 2. Do the AMO
+ with m.Else():
+ m.next = "AMO_LOAD"
+ serve_amo_d.eq(0b0
+
+ # check if we want to flush and can flush
+ # e.g.: we are not busy anymore
+ # TODO: Check that the busy flag is indeed needed
+ with m.If (flush_i & ~busy_i):
+ m.next = "FLUSH_REQ_STATUS"
+ cnt_d = 0
+
+ # check if one of the state machines missed
+ for i in range(NR_PORTS):
+ # here comes the refill portion of code
+ with m.If (miss_req_valid[i] & ~miss_req_bypass[i]):
+ m.next = "MISS"
+ # we are taking another request so don't
+ # take the AMO
+ serve_amo_d = 0b0;
+ # save to MSHR
+ wid = DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH
+ comb += [ mshr_d.valid.eq(0b1),
+ mshr_d.we.eq(miss_req_we[i]),
+ mshr_d.id.eq(i),
+ mshr_d.addr.eq(miss_req_addr[i][0:wid]),
+ mshr_d.wdata.eq(miss_req_wdata[i]),
+ mshr_d.be.eq(miss_req_be[i]),
+ ]
+ break
+
+ # ~> we missed on the cache
+ with m.Case("MISS"):
+ # 1. Check if there is an empty cache-line
+ # 2. If not -> evict one
+ comb += req_o.eq(1)
+ sync += addr_o.eq(mshr_q.addr[:DCACHE_INDEX_WIDTH]
+ m.next = "MISS_REPL"
+ comb += miss_o.eq(1)
+
+ # ~> second miss cycle
+ with m.Case("MISS_REPL"):
+ # if all are valid we need to evict one,
+ # pseudo random from LFSR
+ with m.If(~(~valid_way).bool()):
+ comb += lfsr_enable.eq(0b1)
+ comb += evict_way_d.eq(lfsr_oh)
+ # do we need to write back the cache line?
+ with m.If(data_i[lfsr_bin].dirty):
+ state_d = WB_CACHELINE_MISS;
+ comb += evict_cl_d.tag.eq(data_i[lfsr_bin].tag)
+ comb += evict_cl_d.data.eq(data_i[lfsr_bin].data)
+ comb += cnt_d.eq(mshr_q.addr[:DCACHE_INDEX_WIDTH])
+ # no - we can request a cache line now
+ with m.Else():
+ m.next = "REQ_CACHELINE"
+ # we have at least one free way
+ with m.Else():
+ # get victim cache-line by looking for the
+ # first non-valid bit
+ comb += evict_way_d.eq(get_victim_cl(~valid_way)
+ m.next = "REQ_CACHELINE"
+
+ # ~> we can just load the cache-line,
+ # the way is store in evict_way_q
+ with m.Case("REQ_CACHELINE"):
+ comb += req_fsm_miss_valid .eq(1)
+ sync += req_fsm_miss_addr .eq(mshr_q.addr)
+
+ with m.If (gnt_miss_fsm):
+ m.next = "SAVE_CACHELINE"
+ comb += miss_gnt_o[mshr_q.id].eq(1)
+
+ # ~> replace the cacheline
+ with m.Case("SAVE_CACHELINE"):
+ # calculate cacheline offset
+ automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
+ sync += cl_offset.eq(mshr_q.addr[3:DCACHE_BYTE_OFFSET] << 6)
+ # we've got a valid response from refill unit
+ with m.If (valid_miss_fsm):
+ wid = DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH
+ sync += addr_o .eq(mshr_q.addr[:DCACHE_INDEX_WIDTH])
+ sync += req_o .eq(evict_way_q)
+ comb += we_o .eq(1)
+ comb += be_o .eq(1)
+ sync += be_o.vldrty .eq(evict_way_q)
+ sync += data_o.tag .eq(mshr_q.addr[DCACHE_INDEX_WIDTH:wid]
+ comb += data_o.data .eq(data_miss_fsm)
+ comb += data_o.valid.eq(1)
+ comb += data_o.dirty.eq(0)
+
+ # is this a write?
+ with m.If (mshr_q.we):
+ # Yes, so safe the updated data now
+ for i in range(8):
+ # check if we really want to write
+ # the corresponding byte
+ with m.If (mshr_q.be[i]):
+ sync += data_o.data[(cl_offset + i*8) +: 8].eq(mshr_q.wdata[i];
+ # it's immediately dirty if we write
+ comb += data_o.dirty.eq(1)
+
+ # reset MSHR
+ comb += mshr_d.valid.eq(0)
+ # go back to idle
+ m.next = 'IDLE'
+
+ # ------------------------------
+ # Write Back Operation
+ # ------------------------------
+ # ~> evict a cache line from way saved in evict_way_q
+ with m.Case("WB_CACHELINE_FLUSH"):
+ with m.Case("WB_CACHELINE_MISS"):
+
+ comb += req_fsm_miss_valid .eq(0b1)
+ sync += req_fsm_miss_addr .eq({evict_cl_q.tag, cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET], {{DCACHE_BYTE_OFFSET}{0b0}}};
+ comb += req_fsm_miss_be .eq(1)
+ comb += req_fsm_miss_we .eq(0b1)
+ sync += req_fsm_miss_wdata .eq(evict_cl_q.data;
+
+ # we've got a grant --> this is timing critical, think about it
+ if (gnt_miss_fsm) begin
+ # write status array
+ sync += addr_o .eq(cnt_q)
+ comb += req_o .eq(0b1)
+ comb += we_o .eq(0b1)
+ comb += data_o.valid.eq(INVALIDATE_ON_FLUSH ? 0b0 : 0b1)
+ # invalidate
+ sync += be_o.vldrty.eq(evict_way_q)
+ # go back to handling the miss or flushing,
+ # depending on where we came from
+ with m.If(state_q == WB_CACHELINE_MISS):
+ m.next = "MISS"
+ with m.Else():
+ m.next = "FLUSH_REQ_STATUS"
+
+ # ------------------------------
+ # Flushing & Initialization
+ # ------------------------------
+ # ~> make another request to check the same
+ # cache-line if there are still some valid entries
+ with m.Case("FLUSH_REQ_STATUS"):
+ comb += req_o .eq(1)
+ sync += addr_o .eq(cnt_q)
+ m.next = "FLUSHING"
+
+ with m.Case("FLUSHING"):
+ # this has priority
+ # at least one of the cache lines is dirty
+ with m.If(~evict_way):
+ # evict cache line, look for the first
+ # cache-line which is dirty
+ comb += evict_way_d.eq(get_victim_cl(evict_way))
+ comb += evict_cl_d .eq(data_i[one_hot_to_bin(evict_way)])
+ state_d = WB_CACHELINE_FLUSH;
+ # not dirty ~> increment and continue
+ with m.Else():
+ # increment and re-request
+ sync += cnt_d.eq(cnt_q + (1 << DCACHE_BYTE_OFFSET))
+ m.next = "FLUSH_REQ_STATUS"
+ sync += addr_o .eq(cnt_q)
+ comb += req_o .eq(1)
+ comb += be_o.vldrty.eq(INVALIDATE_ON_FLUSH ? 1 : 0)
+ comb += we_o .eq(1)
+ # finished with flushing operation, go back to idle
+ with m.If (cnt_q[DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH] \
+ == DCACHE_NUM_WORDS-1):
+ # only acknowledge if the flush wasn't
+ # triggered by an atomic
+ sync += flush_ack_o.eq(~serve_amo_q)
+ m.next = "IDLE"
+
+ # ~> only called after reset
+ with m.Case("INIT"):
+ # initialize status array
+ sync += addr_o.eq(cnt_q)
+ comb += req_o .eq(1)
+ comb += we_o .eq(1)
+ # only write the dirty array
+ comb += be_o.vldrty.eq(1)
+ sync += cnt_d .eq(cnt_q + (1 << DCACHE_BYTE_OFFSET))
+ # finished initialization
+ with m.If (cnt_q[DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH] \
+ == DCACHE_NUM_WORDS-1)
+ m.next = "IDLE"
+
+ # ----------------------
+ # AMOs
+ # ----------------------
+ # TODO(zarubaf) Move this closer to memory
+ # ~> we are here because we need to do the AMO,
+ # the cache is clean at this point
+ # start by executing the load
+ with m.Case("AMO_LOAD"):
+ comb += req_fsm_miss_valid.eq(1)
+ # address is in operand a
+ comb += req_fsm_miss_addr.eq(amo_req_i.operand_a)
+ comb += req_fsm_miss_req.eq(ariane_axi::SINGLE_REQ)
+ comb += req_fsm_miss_size.eq(amo_req_i.size)
+ # the request has been granted
+ with m.If(gnt_miss_fsm):
+ m.next = "AMO_SAVE_LOAD"
+ # save the load value
+ with m.Case("AMO_SAVE_LOAD"):
+ with m.If (valid_miss_fsm):
+ # we are only concerned about the lower 64-bit
+ comb += mshr_d.wdata.eq(data_miss_fsm[0])
+ m.next = "AMO_STORE"
+ # and do the store
+ with m.Case("AMO_STORE"):
+ load_data = Signal(64)
+ # re-align load data
+ comb += load_data.eq(data_align(amo_req_i.operand_a[:3],
+ mshr_q.wdata))
+ # Sign-extend for word operation
+ with m.If (amo_req_i.size == 0b10):
+ comb += amo_operand_a.eq(sext32(load_data[:32]))
+ comb += amo_operand_b.eq(sext32(amo_req_i.operand_b[:32]))
+ with m.Else():
+ comb += amo_operand_a.eq(load_data)
+ comb += amo_operand_b.eq(amo_req_i.operand_b)
+
+ # we do not need a store request for load reserved
+ # or a failing store conditional
+ # we can bail-out without making any further requests
+ with m.If ((amo_req_i.amo_op == AMO_LR) | \
+ ((amo_req_i.amo_op == AMO_SC) & \
+ ((reservation_q.valid & \
+ (reservation_q.address != \
+ amo_req_i.operand_a[3:64])) | \
+ ~reservation_q.valid))):
+ comb += req_fsm_miss_valid.eq(0)
+ m.next = "IDLE"
+ comb += amo_resp_o.ack.eq(1)
+ # write-back the result
+ comb += amo_resp_o.result.eq(amo_operand_a)
+ # we know that the SC failed
+ with m.If (amo_req_i.amo_op == AMO_SC):
+ comb += amo_resp_o.result.eq(1)
+ # also clear the reservation
+ comb += reservation_d.valid.eq(0)
+ with m.Else():
+ comb += req_fsm_miss_valid.eq(1)
+
+ comb += req_fsm_miss_we .eq(1)
+ comb += req_fsm_miss_req .eq(ariane_axi::SINGLE_REQ)
+ comb += req_fsm_miss_size.eq(amo_req_i.size)
+ comb += req_fsm_miss_addr.eq(amo_req_i.operand_a)
+
+ comb += req_fsm_miss_wdata.eq(
+ data_align(amo_req_i.operand_a[0:3], amo_result_o))
+ comb += req_fsm_miss_be.eq(
+ be_gen(amo_req_i.operand_a[0:3], amo_req_i.size))
+
+ # place a reservation on the memory
+ with m.If (amo_req_i.amo_op == AMO_LR):
+ comb += reservation_d.address.eq(amo_req_i.operand_a[3:64])
+ comb += reservation_d.valid.eq(1)
+
+ # the request is valid or we didn't need to go for another store
+ with m.If (valid_miss_fsm):
+ m.next = "IDLE"
+ comb += amo_resp_o.ack.eq(1)
+ # write-back the result
+ comb += amo_resp_o.result.eq(amo_operand_a;
+
+ if (amo_req_i.amo_op == AMO_SC) begin
+ comb += amo_resp_o.result.eq(0)
+ # An SC must fail if there is another SC
+ # (to any address) between the LR and the SC in
+ # program order (even to the same address).
+ # in any case destroy the reservation
+ comb += reservation_d.valid.eq(0)
+
+ # check MSHR for aliasing
+
+ comb += mshr_addr_matches_o .eq(0)
+ comb += mshr_index_matches_o.eq()
+
+ for i in range(NR_PORTS):
+ # check mshr for potential matching of other units,
+ # exclude the unit currently being served
+ with m.If (mshr_q.valid & \
+ (mshr_addr_i[i][DCACHE_BYTE_OFFSET:56] == \
+ mshr_q.addr[DCACHE_BYTE_OFFSET:56])):
+ comb += mshr_addr_matches_o[i].eq(1)
+
+ # same as previous, but checking only the index
+ with m.If (mshr_q.valid & \
+ (mshr_addr_i[i][DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH] == \
+ mshr_q.addr[DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH])):
+ mshr_index_matches_o[i].eq(1)
+
+ # --------------------
+ # Sequential Process
+ # --------------------
+
+ """
+ #pragma translate_off
+ `ifndef VERILATOR
+ # assert that cache only hits on one way
+ assert property (
+ @(posedge clk_i) $onehot0(evict_way_q)) else $warning("Evict-way should be one-hot encoded");
+ `endif
+ #pragma translate_on
+ """
+
+ # ----------------------
+ # Bypass Arbiter
+ # ----------------------
+ # Connection Arbiter <-> AXI
+ req_fsm_bypass_valid = Signal()
+ req_fsm_bypass_addr = Signal(64)
+ req_fsm_bypass_wdata = Signal(64)
+ req_fsm_bypass_we = Signal()
+ req_fsm_bypass_be = Signal(8)
+ req_fsm_bypass_size = Signal(2)
+ gnt_bypass_fsm = Signal()
+ valid_bypass_fsm = Signal()
+ data_bypass_fsm = Signal(64)
+ logic [$clog2(NR_PORTS)-1:0] id_fsm_bypass;
+ logic [3:0] id_bypass_fsm;
+ logic [3:0] gnt_id_bypass_fsm;
+
+ i_bypass_arbiter = ib = AXIArbiter( NR_PORTS, 64)
+ comb += [
+ # Master Side
+ ib.data_req_i .eq( miss_req_valid & miss_req_bypass ),
+ ib.address_i .eq( miss_req_addr ),
+ ib.data_wdata_i .eq( miss_req_wdata ),
+ ib.data_we_i .eq( miss_req_we ),
+ ib.data_be_i .eq( miss_req_be ),
+ ib.data_size_i .eq( miss_req_size ),
+ ib.data_gnt_o .eq( bypass_gnt_o ),
+ ib.data_rvalid_o .eq( bypass_valid_o ),
+ ib.data_rdata_o .eq( bypass_data_o ),
+ # Slave Sid
+ ib.id_i .eq( id_bypass_fsm[$clog2(NR_PORTS)-1:0] ),
+ ib.id_o .eq( id_fsm_bypass ),
+ ib.gnt_id_i .eq( gnt_id_bypass_fsm[$clog2(NR_PORTS)-1:0] ),
+ ib.address_o .eq( req_fsm_bypass_addr ),
+ ib.data_wdata_o .eq( req_fsm_bypass_wdata ),
+ ib.data_req_o .eq( req_fsm_bypass_valid ),
+ ib.data_we_o .eq( req_fsm_bypass_we ),
+ ib.data_be_o .eq( req_fsm_bypass_be ),
+ ib.data_size_o .eq( req_fsm_bypass_size ),
+ ib.data_gnt_i .eq( gnt_bypass_fsm ),
+ ib.data_rvalid_i .eq( valid_bypass_fsm ),
+ ib.data_rdata_i .eq( data_bypass_fsm ),
+ ]
+
+ axi_adapter #(
+ .DATA_WIDTH ( 64 ),
+ .AXI_ID_WIDTH ( 4 ),
+ .CACHELINE_BYTE_OFFSET ( DCACHE_BYTE_OFFSET )
+ ) i_bypass_axi_adapter (
+ .clk_i,
+ .rst_ni,
+ .req_i ( req_fsm_bypass_valid ),
+ .type_i ( ariane_axi::SINGLE_REQ ),
+ .gnt_o ( gnt_bypass_fsm ),
+ .addr_i ( req_fsm_bypass_addr ),
+ .we_i ( req_fsm_bypass_we ),
+ .wdata_i ( req_fsm_bypass_wdata ),
+ .be_i ( req_fsm_bypass_be ),
+ .size_i ( req_fsm_bypass_size ),
+ .id_i ( Cat(id_fsm_bypass, 0, 0) ),
+ .valid_o ( valid_bypass_fsm ),
+ .rdata_o ( data_bypass_fsm ),
+ .gnt_id_o ( gnt_id_bypass_fsm ),
+ .id_o ( id_bypass_fsm ),
+ .critical_word_o ( ), # not used for single requests
+ .critical_word_valid_o ( ), # not used for single requests
+ .axi_req_o ( axi_bypass_o ),
+ .axi_resp_i ( axi_bypass_i )
+ );
+
+ # ----------------------
+ # Cache Line AXI Refill
+ # ----------------------
+ axi_adapter #(
+ .DATA_WIDTH ( DCACHE_LINE_WIDTH ),
+ .AXI_ID_WIDTH ( 4 ),
+ .CACHELINE_BYTE_OFFSET ( DCACHE_BYTE_OFFSET )
+ ) i_miss_axi_adapter (
+ .clk_i,
+ .rst_ni,
+ .req_i ( req_fsm_miss_valid ),
+ .type_i ( req_fsm_miss_req ),
+ .gnt_o ( gnt_miss_fsm ),
+ .addr_i ( req_fsm_miss_addr ),
+ .we_i ( req_fsm_miss_we ),
+ .wdata_i ( req_fsm_miss_wdata ),
+ .be_i ( req_fsm_miss_be ),
+ .size_i ( req_fsm_miss_size ),
+ .id_i ( Const(0b1100, 4) ),
+ .gnt_id_o ( ), # open
+ .valid_o ( valid_miss_fsm ),
+ .rdata_o ( data_miss_fsm ),
+ .id_o ( ),
+ .critical_word_o,
+ .critical_word_valid_o,
+ .axi_req_o ( axi_data_o ),
+ .axi_resp_i ( axi_data_i )
+ );
+
+ # -----------------
+ # Replacement LFSR
+ # -----------------
+ lfsr_8bit #(.WIDTH (DCACHE_SET_ASSOC)) i_lfsr (
+ .en_i ( lfsr_enable ),
+ .refill_way_oh ( lfsr_oh ),
+ .refill_way_bin ( lfsr_bin ),
+ .*
+ );
+
+ # -----------------
+ # AMO ALU
+ # -----------------
+ amo_alu i_amo_alu (
+ .amo_op_i ( amo_op ),
+ .amo_operand_a_i ( amo_operand_a ),
+ .amo_operand_b_i ( amo_operand_b ),
+ .amo_result_o ( amo_result_o )
+ );
+
+ # -----------------
+ # Struct Split
+ # -----------------
+
+ for i in range(NR_PORTS):
+ miss_req = MissReq()
+ comb += miss_req.eq(miss_req_i[i]);
+ comb += miss_req_valid [i] .eq(miss_req.valid)
+ comb += miss_req_bypass [i] .eq(miss_req.bypass)
+ comb += miss_req_addr [i] .eq(miss_req.addr)
+ comb += miss_req_wdata [i] .eq(miss_req.wdata)
+ comb += miss_req_we [i] .eq(miss_req.we)
+ comb += miss_req_be [i] .eq(miss_req.be)
+ comb += miss_req_size [i] .eq(miss_req.size)
+
+ # --------------
+ # AXI Arbiter
+ # --------------s
+ #
+ # Description: Arbitrates access to AXI refill/bypass
+ #
+class AXIArbiter:
+ def __init__(self, NR_PORTS = 3, DATA_WIDTH = 64):
+ self.NR_PORTS = NR_PORTS
+ self.DATA_WIDTH = DATA_WIDTH
+ self.pwid = pwid = ceil(log(NR_PORTS) / log(2))
+ rst_ni = ResetSignal() # Asynchronous reset active low
+ # master ports
+ self.data_req_i = Signal(NR_PORTS)
+ self.address_i = Array(Signal(name="address_i", 64) \
+ for i in range(NR_PORTS))
+ self.data_wdata_i = Array(Signal(name="data_wdata_i", 64) \
+ for i in range(NR_PORTS))
+ self.data_we_i = Signal(NR_PORTS)
+ self.data_be_i = Array(Signal(name="data_wdata_i", DATA_WIDTH/8) \
+ for i in range(NR_PORTS))
+ self.data_size_i = Array(Signal(name="data_size_i", 2) \
+ for i in range(NR_PORTS))
+ self.data_gnt_o = Signal(NR_PORTS)
+ self.data_rvalid_o = Signal(NR_PORTS)
+ self.data_rdata_o = Array(Signal(name="data_rdata_o", 64) \
+ for i in range(NR_PORTS))
+
+ # slave port
+ self.id_i = Signal(pwid)
+ self.id_o = Signal(pwid)
+ self.gnt_id_i = Signal(pwid)
+ self.data_req_o = Signal()
+ self.address_o = Signal(64)
+ self.data_wdata_o = Signal(DATA_WIDTH)
+ self.data_we_o = Signal()
+ self.data_be_o = Signal(DATA_WIDTH/8)
+ self.data_size_o = Signal(2)
+ self.data_gnt_i = Signal()
+ self.data_rvalid_i = Signal()
+ self.data_rdata_i = Signal(DATA_WIDTH)
+
+ def elaborate(self, platform):
+ #enum logic [1:0] { IDLE, REQ, SERVING } state_d, state_q;
+
+ class Packet:
+ def __init__(self, pwid, DATA_WIDTH):
+ self.id = Signal(pwid)
+ self.address = Signal(64)
+ self.data = Signal(64)
+ self.size = Signal(2)
+ self.be = Signal(DATA_WIDTH/8)
+ self.we = Signal()
+
+ request_index = Signal(self.pwid)
+ req_q = Packet(self.pwid, self.DATA_WIDTH)
+ req_d = Packet(self.pwid, self.DATA_WIDTH)
+
+ # request register
+ sync += req_q.eq(req_d)
+
+ # request port
+ comb += self.address_o .eq(req_q.address)
+ comb += self.data_wdata_o .eq(req_q.data)
+ comb += self.data_be_o .eq(req_q.be)
+ comb += self.data_size_o .eq(req_q.size)
+ comb += self.data_we_o .eq(req_q.we)
+ comb += self.id_o .eq(req_q.id)
+ comb += self.data_gnt_o .eq(0)
+ # read port
+ comb += self.data_rvalid_o .eq(0)
+ comb += self.data_rdata_o .eq(0)
+ comb += self.data_rdata_o[req_q.id].eq(data_rdata_i)
+
+ m.submodules.pp = pp = PriorityEncoder(self.NR_PORTS)
+ comb += pp.i.eq(self.data_req_i) # select one request (priority-based)
+ comb += request_index.eq(pp.o)
+
+ with m.Switch("state") as s:
+
+ with m.Case("IDLE"):
+ # wait for incoming requests (priority encoder data_req_i)
+ with m.If(~pp.n): # one output valid from encoder
+ comb += self.data_req_o .eq(self.data_req_i[i])
+ comb += self.data_gnt_o[i].eq(self.data_req_i[i])
+ # save the request
+ comb += req_d.address.eq(self.address_i[i])
+ comb += req_d.id.eq(request_index)
+ comb += req_d.data.eq(self.data_wdata_i[i])
+ comb += req_d.size.eq(self.data_size_i[i])
+ comb += req_d.be.eq(self.data_be_i[i])
+ comb += req_d.we.eq(self.data_we_i[i])
+ m.next = "SERVING"
+
+ comb += self.address_o .eq(self.address_i[request_index])
+ comb += self.data_wdata_o .eq(self.data_wdata_i[request_index])
+ comb += self.data_be_o .eq(self.data_be_i[request_index])
+ comb += self.data_size_o .eq(self.data_size_i[request_index])
+ comb += self.data_we_o .eq(self.data_we_i[request_index])
+ comb += self.id_o .eq(request_index)
+
+ with m.Case("SERVING"):
+ comb += self.data_req_o.eq(1)
+ with m.If (self.data_rvalid_i):
+ comb += self.data_rvalid_o[req_q.id].eq(1)
+ m.next = "IDLE"
+
+ # ------------
+ # Assertions
+ # ------------
+
+ """
+#pragma translate_off
+`ifndef VERILATOR
+# make sure that we eventually get an rvalid after we received a grant
+assert property (@(posedge clk_i) data_gnt_i |-> ##[1:$] data_rvalid_i )
+ else begin $error("There was a grant without a rvalid"); $stop(); end
+# assert that there is no grant without a request
+assert property (@(negedge clk_i) data_gnt_i |-> data_req_o)
+ else begin $error("There was a grant without a request."); $stop(); end
+# assert that the address does not contain X when request is sent
+assert property ( @(posedge clk_i) (data_req_o) |-> (!$isunknown(address_o)) )
+ else begin $error("address contains X when request is set"); $stop(); end
+
+`endif
+#pragma translate_on
+ """
+
--- /dev/null
+"""
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Author: Florian Zaruba, ETH Zurich
+# Date: 19/04/2017
+# Description: Memory Management Unit for Ariane, contains TLB and
+# address translation unit. SV48 as defined in
+# Volume II: RISC-V Privileged Architectures V1.10 Page 63
+
+import ariane_pkg::*;
+"""
+
+from nmigen import Const, Signal, Cat, Module, Mux
+from nmigen.cli import verilog, rtlil
+
+from ptw import DCacheReqI, DCacheReqO, TLBUpdate, PTE, PTW
+from tlb import TLB
+from exceptcause import (INSTR_ACCESS_FAULT, INSTR_PAGE_FAULT,
+ LOAD_PAGE_FAULT, STORE_PAGE_FAULT)
+
+PRIV_LVL_M = Const(0b11, 2)
+PRIV_LVL_S = Const(0b01, 2)
+PRIV_LVL_U = Const(0b00, 2)
+
+
+class RVException:
+ def __init__(self):
+ self.cause = Signal(64) # cause of exception
+ self.tval = Signal(64) # more info of causing exception
+ # (e.g.: instruction causing it),
+ # address of LD/ST fault
+ self.valid = Signal()
+
+ def eq(self, inp):
+ res = []
+ for (o, i) in zip(self.ports(), inp.ports()):
+ res.append(o.eq(i))
+ return res
+
+ def __iter__(self):
+ yield self.cause
+ yield self.tval
+ yield self.valid
+
+ def ports(self):
+ return list(self)
+
+
+class ICacheReqI:
+ def __init__(self):
+ self.fetch_valid = Signal() # address translation valid
+ self.fetch_paddr = Signal(64) # physical address in
+ self.fetch_exception = RVException() # exception occurred during fetch
+
+ def __iter__(self):
+ yield self.fetch_valid
+ yield self.fetch_paddr
+ yield from self.fetch_exception
+
+ def ports(self):
+ return list(self)
+
+
+class ICacheReqO:
+ def __init__(self):
+ self.fetch_req = Signal() # address translation request
+ self.fetch_vaddr = Signal(64) # virtual address out
+
+ def __iter__(self):
+ yield self.fetch_req
+ yield self.fetch_vaddr
+
+ def ports(self):
+ return list(self)
+
+
+class MMU:
+ def __init__(self, instr_tlb_entries = 4,
+ data_tlb_entries = 4,
+ asid_width = 1):
+ self.instr_tlb_entries = instr_tlb_entries
+ self.data_tlb_entries = data_tlb_entries
+ self.asid_width = asid_width
+
+ self.flush_i = Signal()
+ self.enable_translation_i = Signal()
+ self.en_ld_st_translation_i = Signal() # enable VM translation for LD/ST
+ # IF interface
+ self.icache_areq_i = ICacheReqO()
+ self.icache_areq_o = ICacheReqI()
+ # LSU interface
+ # this is a more minimalistic interface because the actual addressing
+ # logic is handled in the LSU as we distinguish load and stores,
+ # what we do here is simple address translation
+ self.misaligned_ex_i = RVException()
+ self.lsu_req_i = Signal() # request address translation
+ self.lsu_vaddr_i = Signal(64) # virtual address in
+ self.lsu_is_store_i = Signal() # the translation is requested by a store
+ # if we need to walk the page table we can't grant in the same cycle
+
+ # Cycle 0
+ self.lsu_dtlb_hit_o = Signal() # sent in the same cycle as the request
+ # if translation hits in the DTLB
+ # Cycle 1
+ self.lsu_valid_o = Signal() # translation is valid
+ self.lsu_paddr_o = Signal(64) # translated address
+ self.lsu_exception_o = RVException() # addr translate threw exception
+
+ # General control signals
+ self.priv_lvl_i = Signal(2)
+ self.ld_st_priv_lvl_i = Signal(2)
+ self.sum_i = Signal()
+ self.mxr_i = Signal()
+ # input logic flag_mprv_i,
+ self.satp_ppn_i = Signal(44)
+ self.asid_i = Signal(self.asid_width)
+ self.flush_tlb_i = Signal()
+ # Performance counters
+ self.itlb_miss_o = Signal()
+ self.dtlb_miss_o = Signal()
+ # PTW memory interface
+ self.req_port_i = DCacheReqO()
+ self.req_port_o = DCacheReqI()
+
+ def elaborate(self, platform):
+ m = Module()
+
+ iaccess_err = Signal() # insufficient priv to access instr page
+ daccess_err = Signal() # insufficient priv to access data page
+ ptw_active = Signal() # PTW is currently walking a page table
+ walking_instr = Signal() # PTW is walking because of an ITLB miss
+ ptw_error = Signal() # PTW threw an exception
+
+ update_vaddr = Signal(48) # guessed
+ uaddr64 = Cat(update_vaddr, Const(0, 25)) # extend to 64bit with zeros
+ update_ptw_itlb = TLBUpdate(self.asid_width)
+ update_ptw_dtlb = TLBUpdate(self.asid_width)
+
+ itlb_lu_access = Signal()
+ itlb_content = PTE()
+ itlb_is_2M = Signal()
+ itlb_is_1G = Signal()
+ itlb_is_512G = Signal()
+ itlb_lu_hit = Signal()
+
+ dtlb_lu_access = Signal()
+ dtlb_content = PTE()
+ dtlb_is_2M = Signal()
+ dtlb_is_1G = Signal()
+ dtlb_is_512G = Signal()
+ dtlb_lu_hit = Signal()
+
+ # Assignments
+ m.d.comb += [itlb_lu_access.eq(self.icache_areq_i.fetch_req),
+ dtlb_lu_access.eq(self.lsu_req_i)
+ ]
+
+ # ITLB
+ m.submodules.i_tlb = i_tlb = TLB(self.instr_tlb_entries,
+ self.asid_width)
+ m.d.comb += [i_tlb.flush_i.eq(self.flush_tlb_i),
+ i_tlb.update_i.eq(update_ptw_itlb),
+ i_tlb.lu_access_i.eq(itlb_lu_access),
+ i_tlb.lu_asid_i.eq(self.asid_i),
+ i_tlb.lu_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
+ itlb_content.eq(i_tlb.lu_content_o),
+ itlb_is_2M.eq(i_tlb.lu_is_2M_o),
+ itlb_is_1G.eq(i_tlb.lu_is_1G_o),
+ itlb_is_512G.eq(i_tlb.lu_is_512G_o),
+ itlb_lu_hit.eq(i_tlb.lu_hit_o),
+ ]
+
+ # DTLB
+ m.submodules.d_tlb = d_tlb = TLB(self.data_tlb_entries,
+ self.asid_width)
+ m.d.comb += [d_tlb.flush_i.eq(self.flush_tlb_i),
+ d_tlb.update_i.eq(update_ptw_dtlb),
+ d_tlb.lu_access_i.eq(dtlb_lu_access),
+ d_tlb.lu_asid_i.eq(self.asid_i),
+ d_tlb.lu_vaddr_i.eq(self.lsu_vaddr_i),
+ dtlb_content.eq(d_tlb.lu_content_o),
+ dtlb_is_2M.eq(d_tlb.lu_is_2M_o),
+ dtlb_is_1G.eq(d_tlb.lu_is_1G_o),
+ dtlb_is_512G.eq(d_tlb.lu_is_512G_o),
+ dtlb_lu_hit.eq(d_tlb.lu_hit_o),
+ ]
+
+ # PTW
+ m.submodules.ptw = ptw = PTW(self.asid_width)
+ m.d.comb += [ptw_active.eq(ptw.ptw_active_o),
+ walking_instr.eq(ptw.walking_instr_o),
+ ptw_error.eq(ptw.ptw_error_o),
+ ptw.enable_translation_i.eq(self.enable_translation_i),
+
+ update_vaddr.eq(ptw.update_vaddr_o),
+ update_ptw_itlb.eq(ptw.itlb_update_o),
+ update_ptw_dtlb.eq(ptw.dtlb_update_o),
+
+ ptw.itlb_access_i.eq(itlb_lu_access),
+ ptw.itlb_hit_i.eq(itlb_lu_hit),
+ ptw.itlb_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
+
+ ptw.dtlb_access_i.eq(dtlb_lu_access),
+ ptw.dtlb_hit_i.eq(dtlb_lu_hit),
+ ptw.dtlb_vaddr_i.eq(self.lsu_vaddr_i),
+
+ ptw.req_port_i.eq(self.req_port_i),
+ self.req_port_o.eq(ptw.req_port_o),
+ ]
+
+ # ila_1 i_ila_1 (
+ # .clk(clk_i), # input wire clk
+ # .probe0({req_port_o.address_tag, req_port_o.address_index}),
+ # .probe1(req_port_o.data_req), # input wire [63:0] probe1
+ # .probe2(req_port_i.data_gnt), # input wire [0:0] probe2
+ # .probe3(req_port_i.data_rdata), # input wire [0:0] probe3
+ # .probe4(req_port_i.data_rvalid), # input wire [0:0] probe4
+ # .probe5(ptw_error), # input wire [1:0] probe5
+ # .probe6(update_vaddr), # input wire [0:0] probe6
+ # .probe7(update_ptw_itlb.valid), # input wire [0:0] probe7
+ # .probe8(update_ptw_dtlb.valid), # input wire [0:0] probe8
+ # .probe9(dtlb_lu_access), # input wire [0:0] probe9
+ # .probe10(lsu_vaddr_i), # input wire [0:0] probe10
+ # .probe11(dtlb_lu_hit), # input wire [0:0] probe11
+ # .probe12(itlb_lu_access), # input wire [0:0] probe12
+ # .probe13(icache_areq_i.fetch_vaddr), # input wire [0:0] probe13
+ # .probe14(itlb_lu_hit) # input wire [0:0] probe13
+ # );
+
+ #-----------------------
+ # Instruction Interface
+ #-----------------------
+ # The instruction interface is a simple request response interface
+
+ # MMU disabled: just pass through
+ m.d.comb += [self.icache_areq_o.fetch_valid.eq(
+ self.icache_areq_i.fetch_req),
+ # play through in case we disabled address translation
+ self.icache_areq_o.fetch_paddr.eq(
+ self.icache_areq_i.fetch_vaddr)
+ ]
+ # two potential exception sources:
+ # 1. HPTW threw an exception -> signal with a page fault exception
+ # 2. We got an access error because of insufficient permissions ->
+ # throw an access exception
+ m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0)
+ # Check whether we are allowed to access this memory region
+ # from a fetch perspective
+
+ # PLATEN TODO: use PermissionValidator instead [we like modules]
+ m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \
+ (((self.priv_lvl_i == PRIV_LVL_U) & \
+ ~itlb_content.u) | \
+ ((self.priv_lvl_i == PRIV_LVL_S) & \
+ itlb_content.u)))
+
+ # MMU enabled: address from TLB, request delayed until hit.
+ # Error when TLB hit and no access right or TLB hit and
+ # translated address not valid (e.g. AXI decode error),
+ # or when PTW performs walk due to ITLB miss and raises
+ # an error.
+ with m.If (self.enable_translation_i):
+ # we work with SV48, so if VM is enabled, check that
+ # all bits [47:38] are equal
+ with m.If (self.icache_areq_i.fetch_req & \
+ ~(((~self.icache_areq_i.fetch_vaddr[47:64]) == 0) | \
+ (self.icache_areq_i.fetch_vaddr[47:64]) == 0)):
+ fe = self.icache_areq_o.fetch_exception
+ m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
+ fe.tval.eq(self.icache_areq_i.fetch_vaddr),
+ fe.valid.eq(1)
+ ]
+
+ m.d.comb += self.icache_areq_o.fetch_valid.eq(0)
+
+ # 4K page
+ paddr = Signal.like(self.icache_areq_o.fetch_paddr)
+ paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12],
+ itlb_content.ppn)
+ m.d.comb += paddr.eq(paddr4k)
+ # Mega page
+ with m.If(itlb_is_2M):
+ m.d.comb += paddr[12:21].eq(
+ self.icache_areq_i.fetch_vaddr[12:21])
+ # Giga page
+ with m.If(itlb_is_1G):
+ m.d.comb += paddr[12:30].eq(
+ self.icache_areq_i.fetch_vaddr[12:30])
+ m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr)
+ # Tera page
+ with m.If(itlb_is_512G):
+ m.d.comb += paddr[12:39].eq(
+ self.icache_areq_i.fetch_vaddr[12:39])
+ m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr)
+
+ # ---------
+ # ITLB Hit
+ # --------
+ # if we hit the ITLB output the request signal immediately
+ with m.If(itlb_lu_hit):
+ m.d.comb += self.icache_areq_o.fetch_valid.eq(
+ self.icache_areq_i.fetch_req)
+ # we got an access error
+ with m.If (iaccess_err):
+ # throw a page fault
+ fe = self.icache_areq_o.fetch_exception
+ m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
+ fe.tval.eq(self.icache_areq_i.fetch_vaddr),
+ fe.valid.eq(1)
+ ]
+ # ---------
+ # ITLB Miss
+ # ---------
+ # watch out for exceptions happening during walking the page table
+ with m.Elif(ptw_active & walking_instr):
+ m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error)
+ fe = self.icache_areq_o.fetch_exception
+ m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT),
+ fe.tval.eq(uaddr64),
+ fe.valid.eq(1)
+ ]
+
+ #-----------------------
+ # Data Interface
+ #-----------------------
+
+ lsu_vaddr = Signal(64)
+ dtlb_pte = PTE()
+ misaligned_ex = RVException()
+ lsu_req = Signal()
+ lsu_is_store = Signal()
+ dtlb_hit = Signal()
+ #dtlb_is_2M = Signal()
+ #dtlb_is_1G = Signal()
+ #dtlb_is_512 = Signal()
+
+ # check if we need to do translation or if we are always
+ # ready (e.g.: we are not translating anything)
+ m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i,
+ dtlb_lu_hit, 1))
+
+ # The data interface is simpler and only consists of a
+ # request/response interface
+ m.d.comb += [
+ # save request and DTLB response
+ lsu_vaddr.eq(self.lsu_vaddr_i),
+ lsu_req.eq(self.lsu_req_i),
+ misaligned_ex.eq(self.misaligned_ex_i),
+ dtlb_pte.eq(dtlb_content),
+ dtlb_hit.eq(dtlb_lu_hit),
+ lsu_is_store.eq(self.lsu_is_store_i),
+ #dtlb_is_2M.eq(dtlb_is_2M),
+ #dtlb_is_1G.eq(dtlb_is_1G),
+ ##dtlb_is_512.eq(self.dtlb_is_512G) #????
+ ]
+ m.d.sync += [
+ self.lsu_paddr_o.eq(lsu_vaddr),
+ self.lsu_valid_o.eq(lsu_req),
+ self.lsu_exception_o.eq(misaligned_ex),
+ ]
+
+ sverr = Signal()
+ usrerr = Signal()
+
+ m.d.comb += [
+ # mute misaligned exceptions if there is no request
+ # otherwise they will throw accidental exceptions
+ misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i),
+
+ # SUM is not set and we are trying to access a user
+ # page in supervisor mode
+ sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \
+ dtlb_pte.u),
+ # this is not a user page but we are in user mode and
+ # trying to access it
+ usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u),
+
+ # Check if the User flag is set, then we may only
+ # access it in supervisor mode if SUM is enabled
+ daccess_err.eq(sverr | usrerr),
+ ]
+
+ # translation is enabled and no misaligned exception occurred
+ with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid):
+ m.d.comb += lsu_req.eq(0)
+ # 4K page
+ paddr = Signal.like(lsu_vaddr)
+ paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn)
+ m.d.comb += paddr.eq(paddr4k)
+ # Mega page
+ with m.If(dtlb_is_2M):
+ m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21])
+ # Giga page
+ with m.If(dtlb_is_1G):
+ m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30])
+ m.d.sync += self.lsu_paddr_o.eq(paddr)
+ # TODO platen tera_page
+
+ # ---------
+ # DTLB Hit
+ # --------
+ with m.If(dtlb_hit & lsu_req):
+ m.d.comb += lsu_req.eq(1)
+ # this is a store
+ with m.If (lsu_is_store):
+ # check if the page is write-able and
+ # we are not violating privileges
+ # also check if the dirty flag is set
+ with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d):
+ le = self.lsu_exception_o
+ m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
+ le.tval.eq(lsu_vaddr),
+ le.valid.eq(1)
+ ]
+
+ # this is a load, check for sufficient access
+ # privileges - throw a page fault if necessary
+ with m.Elif(daccess_err):
+ le = self.lsu_exception_o
+ m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
+ le.tval.eq(lsu_vaddr),
+ le.valid.eq(1)
+ ]
+ # ---------
+ # DTLB Miss
+ # ---------
+ # watch out for exceptions
+ with m.Elif (ptw_active & ~walking_instr):
+ # page table walker threw an exception
+ with m.If (ptw_error):
+ # an error makes the translation valid
+ m.d.comb += lsu_req.eq(1)
+ # the page table walker can only throw page faults
+ with m.If (lsu_is_store):
+ le = self.lsu_exception_o
+ m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
+ le.tval.eq(uaddr64),
+ le.valid.eq(1)
+ ]
+ with m.Else():
+ m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
+ le.tval.eq(uaddr64),
+ le.valid.eq(1)
+ ]
+
+ return m
+
+ def ports(self):
+ return [self.flush_i, self.enable_translation_i,
+ self.en_ld_st_translation_i,
+ self.lsu_req_i,
+ self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o,
+ self.lsu_valid_o, self.lsu_paddr_o,
+ self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i,
+ self.satp_ppn_i, self.asid_i, self.flush_tlb_i,
+ self.itlb_miss_o, self.dtlb_miss_o] + \
+ self.icache_areq_i.ports() + self.icache_areq_o.ports() + \
+ self.req_port_i.ports() + self.req_port_o.ports() + \
+ self.misaligned_ex_i.ports() + self.lsu_exception_o.ports()
+
+if __name__ == '__main__':
+ mmu = MMU()
+ vl = rtlil.convert(mmu, ports=mmu.ports())
+ with open("test_mmu.il", "w") as f:
+ f.write(vl)
+
--- /dev/null
+pseudo-LRU
+
+two-way set associative - one bit
+
+ indicates which line of the two has been reference more recently
+
+
+four-way set associative - three bits
+
+ each bit represents one branch point in a binary decision tree; let 1
+ represent that the left side has been referenced more recently than the
+ right side, and 0 vice-versa
+
+ are all 4 lines valid?
+ / \
+ yes no, use an invalid line
+ |
+ |
+ |
+ bit_0 == 0? state | replace ref to | next state
+ / \ ------+-------- -------+-----------
+ y n 00x | line_0 line_0 | 11_
+ / \ 01x | line_1 line_1 | 10_
+ bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
+ / \ / \ 1x1 | line_3 line_3 | 0_0
+ y n y n
+ / \ / \ ('x' means ('_' means unchanged)
+ line_0 line_1 line_2 line_3 don't care)
+
+ (see Figure 3-7, p. 3-18, in Intel Embedded Pentium Processor Family Dev.
+ Manual, 1998, http://www.intel.com/design/intarch/manuals/273204.htm)
+
+
+note that there is a 6-bit encoding for true LRU for four-way set associative
+
+ bit 0: bank[1] more recently used than bank[0]
+ bit 1: bank[2] more recently used than bank[0]
+ bit 2: bank[2] more recently used than bank[1]
+ bit 3: bank[3] more recently used than bank[0]
+ bit 4: bank[3] more recently used than bank[1]
+ bit 5: bank[3] more recently used than bank[2]
+
+ this results in 24 valid bit patterns within the 64 possible bit patterns
+ (4! possible valid traces for bank references)
+
+ e.g., a trace of 0 1 2 3, where 0 is LRU and 3 is MRU, is encoded as 111111
+
+ you can implement a state machine with a 256x6 ROM (6-bit state encoding
+ appended with a 2-bit bank reference input will yield a new 6-bit state),
+ and you can implement an LRU bank indicator with a 64x2 ROM
+
--- /dev/null
+# moved to nmutil https://git.libre-soc.org/?p=nmutil.git;a=tree
+from nmutil.plru import PLRU
--- /dev/null
+"""
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Author: David Schaffenrath, TU Graz
+# Author: Florian Zaruba, ETH Zurich
+# Date: 24.4.2017
+# Description: Hardware-PTW
+
+/* verilator lint_off WIDTH */
+import ariane_pkg::*;
+
+see linux kernel source:
+
+* "arch/riscv/include/asm/page.h"
+* "arch/riscv/include/asm/mmu_context.h"
+* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET)
+
+"""
+
+from nmigen import Const, Signal, Cat, Module, Elaboratable
+from nmigen.hdl.ast import ArrayProxy
+from nmigen.cli import verilog, rtlil
+from math import log2
+
+
+DCACHE_SET_ASSOC = 8
+CONFIG_L1D_SIZE = 32*1024
+DCACHE_INDEX_WIDTH = int(log2(CONFIG_L1D_SIZE / DCACHE_SET_ASSOC))
+DCACHE_TAG_WIDTH = 56 - DCACHE_INDEX_WIDTH
+
+ASID_WIDTH = 8
+
+
+class DCacheReqI:
+ def __init__(self):
+ self.address_index = Signal(DCACHE_INDEX_WIDTH)
+ self.address_tag = Signal(DCACHE_TAG_WIDTH)
+ self.data_wdata = Signal(64)
+ self.data_req = Signal()
+ self.data_we = Signal()
+ self.data_be = Signal(8)
+ self.data_size = Signal(2)
+ self.kill_req = Signal()
+ self.tag_valid = Signal()
+
+ def eq(self, inp):
+ res = []
+ for (o, i) in zip(self.ports(), inp.ports()):
+ res.append(o.eq(i))
+ return res
+
+ def ports(self):
+ return [self.address_index, self.address_tag,
+ self.data_wdata, self.data_req,
+ self.data_we, self.data_be, self.data_size,
+ self.kill_req, self.tag_valid,
+ ]
+
+class DCacheReqO:
+ def __init__(self):
+ self.data_gnt = Signal()
+ self.data_rvalid = Signal()
+ self.data_rdata = Signal(64) # actually in PTE object format
+
+ def eq(self, inp):
+ res = []
+ for (o, i) in zip(self.ports(), inp.ports()):
+ res.append(o.eq(i))
+ return res
+
+ def ports(self):
+ return [self.data_gnt, self.data_rvalid, self.data_rdata]
+
+
+class PTE: #(RecordObject):
+ def __init__(self):
+ self.v = Signal()
+ self.r = Signal()
+ self.w = Signal()
+ self.x = Signal()
+ self.u = Signal()
+ self.g = Signal()
+ self.a = Signal()
+ self.d = Signal()
+ self.rsw = Signal(2)
+ self.ppn = Signal(44)
+ self.reserved = Signal(10)
+
+ def flatten(self):
+ return Cat(*self.ports())
+
+ def eq(self, x):
+ if isinstance(x, ArrayProxy):
+ res = []
+ for o in self.ports():
+ i = getattr(x, o.name)
+ res.append(i)
+ x = Cat(*res)
+ else:
+ x = x.flatten()
+ return self.flatten().eq(x)
+
+ def __iter__(self):
+ """ order is critical so that flatten creates LSB to MSB
+ """
+ yield self.v
+ yield self.r
+ yield self.w
+ yield self.x
+ yield self.u
+ yield self.g
+ yield self.a
+ yield self.d
+ yield self.rsw
+ yield self.ppn
+ yield self.reserved
+
+ def ports(self):
+ return list(self)
+
+
+class TLBUpdate:
+ def __init__(self, asid_width):
+ self.valid = Signal() # valid flag
+ self.is_2M = Signal()
+ self.is_1G = Signal()
+ self.is_512G = Signal()
+ self.vpn = Signal(36)
+ self.asid = Signal(asid_width)
+ self.content = PTE()
+
+ def flatten(self):
+ return Cat(*self.ports())
+
+ def eq(self, x):
+ return self.flatten().eq(x.flatten())
+
+ def ports(self):
+ return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \
+ self.content.ports()
+
+
+# SV48 defines four levels of page tables
+LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1
+LVL2 = Const(1, 2)
+LVL3 = Const(2, 2)
+LVL4 = Const(3, 2)
+
+
+class PTW(Elaboratable):
+ def __init__(self, asid_width=8):
+ self.asid_width = asid_width
+
+ self.flush_i = Signal() # flush everything, we need to do this because
+ # actually everything we do is speculative at this stage
+ # e.g.: there could be a CSR instruction that changes everything
+ self.ptw_active_o = Signal(reset=1) # active if not IDLE
+ self.walking_instr_o = Signal() # set when walking for TLB
+ self.ptw_error_o = Signal() # set when an error occurred
+ self.enable_translation_i = Signal() # CSRs indicate to enable SV48
+ self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st
+
+ self.lsu_is_store_i = Signal() # translation triggered by store
+ # PTW memory interface
+ self.req_port_i = DCacheReqO()
+ self.req_port_o = DCacheReqI()
+
+ # to TLBs, update logic
+ self.itlb_update_o = TLBUpdate(asid_width)
+ self.dtlb_update_o = TLBUpdate(asid_width)
+
+ self.update_vaddr_o = Signal(48)
+
+ self.asid_i = Signal(self.asid_width)
+ # from TLBs
+ # did we miss?
+ self.itlb_access_i = Signal()
+ self.itlb_hit_i = Signal()
+ self.itlb_vaddr_i = Signal(64)
+
+ self.dtlb_access_i = Signal()
+ self.dtlb_hit_i = Signal()
+ self.dtlb_vaddr_i = Signal(64)
+ # from CSR file
+ self.satp_ppn_i = Signal(44) # ppn from satp
+ self.mxr_i = Signal()
+ # Performance counters
+ self.itlb_miss_o = Signal()
+ self.dtlb_miss_o = Signal()
+
+ def ports(self):
+ return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o,
+ ]
+ return [
+ self.enable_translation_i, self.en_ld_st_translation_i,
+ self.lsu_is_store_i, self.req_port_i, self.req_port_o,
+ self.update_vaddr_o,
+ self.asid_i,
+ self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i,
+ self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i,
+ self.satp_ppn_i, self.mxr_i,
+ self.itlb_miss_o, self.dtlb_miss_o
+ ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports()
+
+ def elaborate(self, platform):
+ m = Module()
+
+ # input registers
+ data_rvalid = Signal()
+ data_rdata = Signal(64)
+
+ # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata
+ # is spec'd in 64-bit binary-format: better to spec as Record?
+ pte = PTE()
+ m.d.comb += pte.flatten().eq(data_rdata)
+
+ # SV48 defines four levels of page tables
+ ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above)
+ ptw_lvl1 = Signal()
+ ptw_lvl2 = Signal()
+ ptw_lvl3 = Signal()
+ ptw_lvl4 = Signal()
+ m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1),
+ ptw_lvl2.eq(ptw_lvl == LVL2),
+ ptw_lvl3.eq(ptw_lvl == LVL3),
+ ptw_lvl4.eq(ptw_lvl == LVL4)
+ ]
+
+ # is this an instruction page table walk?
+ is_instr_ptw = Signal()
+ global_mapping = Signal()
+ # latched tag signal
+ tag_valid = Signal()
+ # register the ASID
+ tlb_update_asid = Signal(self.asid_width)
+ # register VPN we need to walk, SV48 defines a 48 bit virtual addr
+ vaddr = Signal(64)
+ # 4 byte aligned physical pointer
+ ptw_pptr = Signal(56)
+
+ end = DCACHE_INDEX_WIDTH + DCACHE_TAG_WIDTH
+ m.d.sync += [
+ # Assignments
+ self.update_vaddr_o.eq(vaddr),
+
+ self.walking_instr_o.eq(is_instr_ptw),
+ # directly output the correct physical address
+ self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]),
+ self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]),
+ # we are never going to kill this request
+ self.req_port_o.kill_req.eq(0), # XXX assign comb?
+ # we are never going to write with the HPTW
+ self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb?
+ # -----------
+ # TLB Update
+ # -----------
+ self.itlb_update_o.vpn.eq(vaddr[12:48]),
+ self.dtlb_update_o.vpn.eq(vaddr[12:48]),
+ # update the correct page table level
+ self.itlb_update_o.is_2M.eq(ptw_lvl3),
+ self.itlb_update_o.is_1G.eq(ptw_lvl2),
+ self.itlb_update_o.is_512G.eq(ptw_lvl1),
+ self.dtlb_update_o.is_2M.eq(ptw_lvl3),
+ self.dtlb_update_o.is_1G.eq(ptw_lvl2),
+ self.dtlb_update_o.is_512G.eq(ptw_lvl1),
+
+ # output the correct ASID
+ self.itlb_update_o.asid.eq(tlb_update_asid),
+ self.dtlb_update_o.asid.eq(tlb_update_asid),
+ # set the global mapping bit
+ self.itlb_update_o.content.eq(pte),
+ self.itlb_update_o.content.g.eq(global_mapping),
+ self.dtlb_update_o.content.eq(pte),
+ self.dtlb_update_o.content.g.eq(global_mapping),
+
+ self.req_port_o.tag_valid.eq(tag_valid),
+ ]
+
+ #-------------------
+ # Page table walker #needs update
+ #-------------------
+ # A virtual address va is translated into a physical address pa as
+ # follows:
+ # 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv48,
+ # PAGESIZE=2^12 and LEVELS=4.)
+ # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE.
+ # (For Sv32, PTESIZE=4.)
+ # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an
+ # access exception.
+ # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to
+ # step 5. Otherwise, this PTE is a pointer to the next level of
+ # the page table.
+ # Let i=i-1. If i < 0, stop and raise an access exception.
+ # Otherwise, let a = pte.ppn × PAGESIZE and go to step 2.
+ # 5. A leaf PTE has been found. Determine if the requested memory
+ # access is allowed by the pte.r, pte.w, and pte.x bits. If not,
+ # stop and raise an access exception. Otherwise, the translation is
+ # successful. Set pte.a to 1, and, if the memory access is a
+ # store, set pte.d to 1.
+ # The translated physical address is given as follows:
+ # - pa.pgoff = va.pgoff.
+ # - If i > 0, then this is a superpage translation and
+ # pa.ppn[i-1:0] = va.vpn[i-1:0].
+ # - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
+ # 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned
+ # superpage stop and raise a page-fault exception.
+
+ m.d.sync += tag_valid.eq(0)
+
+ # default assignments
+ m.d.comb += [
+ # PTW memory interface
+ self.req_port_o.data_req.eq(0),
+ self.req_port_o.data_be.eq(Const(0xFF, 8)),
+ self.req_port_o.data_size.eq(Const(0b11, 2)),
+ self.req_port_o.data_we.eq(0),
+ self.ptw_error_o.eq(0),
+ self.itlb_update_o.valid.eq(0),
+ self.dtlb_update_o.valid.eq(0),
+
+ self.itlb_miss_o.eq(0),
+ self.dtlb_miss_o.eq(0),
+ ]
+
+ # ------------
+ # State Machine
+ # ------------
+
+ with m.FSM() as fsm:
+
+ with m.State("IDLE"):
+ self.idle(m, is_instr_ptw, ptw_lvl, global_mapping,
+ ptw_pptr, vaddr, tlb_update_asid)
+
+ with m.State("WAIT_GRANT"):
+ self.grant(m, tag_valid, data_rvalid)
+
+ with m.State("PTE_LOOKUP"):
+ # we wait for the valid signal
+ with m.If(data_rvalid):
+ self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, ptw_lvl4,
+ data_rvalid, global_mapping,
+ is_instr_ptw, ptw_pptr)
+
+ # Propagate error to MMU/LSU
+ with m.State("PROPAGATE_ERROR"):
+ m.next = "IDLE"
+ m.d.comb += self.ptw_error_o.eq(1)
+
+ # wait for the rvalid before going back to IDLE
+ with m.State("WAIT_RVALID"):
+ with m.If(data_rvalid):
+ m.next = "IDLE"
+
+ m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata),
+ data_rvalid.eq(self.req_port_i.data_rvalid)
+ ]
+
+ return m
+
+ def set_grant_state(self, m):
+ # should we have flushed before we got an rvalid,
+ # wait for it until going back to IDLE
+ with m.If(self.flush_i):
+ with m.If (self.req_port_i.data_gnt):
+ m.next = "WAIT_RVALID"
+ with m.Else():
+ m.next = "IDLE"
+ with m.Else():
+ m.next = "WAIT_GRANT"
+
+ def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping,
+ ptw_pptr, vaddr, tlb_update_asid):
+ # by default we start with the top-most page table
+ m.d.sync += [is_instr_ptw.eq(0),
+ ptw_lvl.eq(LVL1),
+ global_mapping.eq(0),
+ self.ptw_active_o.eq(0), # deactive (IDLE)
+ ]
+ # work out itlb/dtlb miss
+ m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \
+ self.itlb_access_i & \
+ ~self.itlb_hit_i & \
+ ~self.dtlb_access_i)
+ m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \
+ self.dtlb_access_i & \
+ ~self.dtlb_hit_i)
+ # we got an ITLB miss?
+ with m.If(self.itlb_miss_o):
+ pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:48],
+ self.satp_ppn_i)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ is_instr_ptw.eq(1),
+ vaddr.eq(self.itlb_vaddr_i),
+ tlb_update_asid.eq(self.asid_i),
+ ]
+ self.set_grant_state(m)
+
+ # we got a DTLB miss?
+ with m.Elif(self.dtlb_miss_o):
+ pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:48],
+ self.satp_ppn_i)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ vaddr.eq(self.dtlb_vaddr_i),
+ tlb_update_asid.eq(self.asid_i),
+ ]
+ self.set_grant_state(m)
+
+ def grant(self, m, tag_valid, data_rvalid):
+ # we've got a data WAIT_GRANT so tell the
+ # cache that the tag is valid
+
+ # send a request out
+ m.d.comb += self.req_port_o.data_req.eq(1)
+ # wait for the WAIT_GRANT
+ with m.If(self.req_port_i.data_gnt):
+ # send the tag valid signal one cycle later
+ m.d.sync += tag_valid.eq(1)
+ # should we have flushed before we got an rvalid,
+ # wait for it until going back to IDLE
+ with m.If(self.flush_i):
+ with m.If (~data_rvalid):
+ m.next = "WAIT_RVALID"
+ with m.Else():
+ m.next = "IDLE"
+ with m.Else():
+ m.next = "PTE_LOOKUP"
+
+ def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, ptw_lvl4,
+ data_rvalid, global_mapping,
+ is_instr_ptw, ptw_pptr):
+ # temporaries
+ pte_rx = Signal(reset_less=True)
+ pte_exe = Signal(reset_less=True)
+ pte_inv = Signal(reset_less=True)
+ pte_a = Signal(reset_less=True)
+ st_wd = Signal(reset_less=True)
+ m.d.comb += [pte_rx.eq(pte.r | pte.x),
+ pte_exe.eq(~pte.x | ~pte.a),
+ pte_inv.eq(~pte.v | (~pte.r & pte.w)),
+ pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))),
+ st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))]
+
+ l1err = Signal(reset_less=True)
+ l2err = Signal(reset_less=True)
+ l3err = Signal(reset_less=True)
+ m.d.comb += [l3err.eq((ptw_lvl3) & pte.ppn[0:9] != Const(0,0)),
+ l2err.eq((ptw_lvl2) & pte.ppn[0:18] != Const(0, 18)),
+ l1err.eq((ptw_lvl1) & pte.ppn[0:27] != Const(0, 27))]
+
+ # check if the global mapping bit is set
+ with m.If (pte.g):
+ m.d.sync += global_mapping.eq(1)
+
+ m.next = "IDLE"
+
+ # -------------
+ # Invalid PTE
+ # -------------
+ # If pte.v = 0, or if pte.r = 0 and pte.w = 1,
+ # stop and raise a page-fault exception.
+ with m.If (pte_inv):
+ m.next = "PROPAGATE_ERROR"
+
+ # -----------
+ # Valid PTE
+ # -----------
+
+ # it is a valid PTE
+ # if pte.r = 1 or pte.x = 1 it is a valid PTE
+ with m.Elif (pte_rx):
+ # Valid translation found (either 1G, 2M or 4K)
+ with m.If(is_instr_ptw):
+ # ------------
+ # Update ITLB
+ # ------------
+ # If page not executable, we can directly raise error.
+ # This doesn't put a useless entry into the TLB.
+ # The same idea applies to the access flag since we let
+ # the access flag be managed by SW.
+ with m.If (pte_exe):
+ m.next = "IDLE"
+ with m.Else():
+ m.d.comb += self.itlb_update_o.valid.eq(1)
+
+ with m.Else():
+ # ------------
+ # Update DTLB
+ # ------------
+ # Check if the access flag has been set, otherwise
+ # throw page-fault and let software handle those bits.
+ # If page not readable (there are no write-only pages)
+ # directly raise an error. This doesn't put a useless
+ # entry into the TLB.
+ with m.If(pte_a):
+ m.d.comb += self.dtlb_update_o.valid.eq(1)
+ with m.Else():
+ m.next = "PROPAGATE_ERROR"
+ # Request is a store: perform additional checks
+ # If the request was a store and the page not
+ # write-able, raise an error
+ # the same applies if the dirty flag is not set
+ with m.If (st_wd):
+ m.d.comb += self.dtlb_update_o.valid.eq(0)
+ m.next = "PROPAGATE_ERROR"
+
+ # check if the ppn is correctly aligned: Case (6)
+ with m.If(l1err | l2err | l3err):
+ m.next = "PROPAGATE_ERROR"
+ m.d.comb += [self.dtlb_update_o.valid.eq(0),
+ self.itlb_update_o.valid.eq(0)]
+
+ # this is a pointer to the next TLB level
+ with m.Else():
+ # pointer to next level of page table
+ with m.If (ptw_lvl1):
+ # we are in the second level now
+ pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39], pte.ppn)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ ptw_lvl.eq(LVL2)
+ ]
+ with m.If(ptw_lvl2):
+ # here we received a pointer to the third level
+ pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ ptw_lvl.eq(LVL3)
+ ]
+ with m.If(ptw_lvl3): #guess: shift page levels by one
+ # here we received a pointer to the fourth level
+ # the last one is near the page offset
+ pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ ptw_lvl.eq(LVL4)
+ ]
+ self.set_grant_state(m)
+
+ with m.If (ptw_lvl4):
+ # Should already be the last level
+ # page table => Error
+ m.d.sync += ptw_lvl.eq(LVL4)
+ m.next = "PROPAGATE_ERROR"
+
+
+if __name__ == '__main__':
+ ptw = PTW()
+ vl = rtlil.convert(ptw, ports=ptw.ports())
+ with open("test_ptw.il", "w") as f:
+ f.write(vl)
--- /dev/null
+import sys
+from soc.TLB.ariane.plru import PLRU
+from nmigen.compat.sim import run_simulation
+
+
+def tbench(dut):
+ yield
+
+
+if __name__ == "__main__":
+ dut = PLRU(4)
+ run_simulation(dut, tbench(dut), vcd_name="test_plru.vcd")
+ print("PLRU Unit Test Success")
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from soc.TLB.ariane.ptw import PTW, PTE
+
+# unit was changed, test needs to be changed
+
+
+def tbench(dut):
+
+ addr = 0x8000000
+
+ #pte = PTE()
+ # yield pte.v.eq(1)
+ # yield pte.r.eq(1)
+
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield dut.req_port_i.data_rvalid.eq(1)
+ yield dut.req_port_i.data_rdata.eq(0x43) # pte.flatten())
+
+ # data lookup
+ yield dut.en_ld_st_translation_i.eq(1)
+ yield dut.asid_i.eq(1)
+
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x400000000)
+
+ yield
+ yield
+ yield
+
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x200000)
+
+ yield
+ yield
+ yield
+
+ yield dut.req_port_i.data_gnt.eq(0)
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x400000011)
+
+ yield
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield
+ yield
+
+ # data lookup, PTW levels 1-2-3
+ addr = 0x4000000
+ yield dut.dtlb_vaddr_i.eq(addr)
+ yield dut.mxr_i.eq(0x1)
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield dut.req_port_i.data_rvalid.eq(1)
+ # pte.flatten())
+ yield dut.req_port_i.data_rdata.eq(0x41 | (addr >> 12) << 10)
+
+ yield dut.en_ld_st_translation_i.eq(1)
+ yield dut.asid_i.eq(1)
+
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(addr)
+
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+
+ yield dut.req_port_i.data_gnt.eq(0)
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x400000011)
+
+ yield
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield
+ yield
+ yield
+ yield
+
+ # instruction lookup
+ yield dut.en_ld_st_translation_i.eq(0)
+ yield dut.enable_translation_i.eq(1)
+ yield dut.asid_i.eq(1)
+
+ yield dut.itlb_access_i.eq(1)
+ yield dut.itlb_hit_i.eq(0)
+ yield dut.itlb_vaddr_i.eq(0x800000)
+
+ yield
+ yield
+ yield
+
+ yield dut.itlb_access_i.eq(1)
+ yield dut.itlb_hit_i.eq(0)
+ yield dut.itlb_vaddr_i.eq(0x200000)
+
+ yield
+ yield
+ yield
+
+ yield dut.req_port_i.data_gnt.eq(0)
+ yield dut.itlb_access_i.eq(1)
+ yield dut.itlb_hit_i.eq(0)
+ yield dut.itlb_vaddr_i.eq(0x800011)
+
+ yield
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield
+ yield
+
+ yield
+
+
+def test_ptw():
+ dut = PTW()
+ run_simulation(dut, tbench(dut), vcd_name="test_ptw.vcd")
+ print("PTW Unit Test Success")
+
+
+if __name__ == "__main__":
+ test_ptw()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+
+from soc.TLB.ariane.tlb import TLB
+
+
+def set_vaddr(addr):
+ yield dut.lu_vaddr_i.eq(addr)
+ yield dut.update_i.vpn.eq(addr >> 12)
+
+
+def tbench(dut):
+ yield dut.lu_access_i.eq(1)
+ yield dut.lu_asid_i.eq(1)
+ yield dut.update_i.valid.eq(1)
+ yield dut.update_i.is_1G.eq(0)
+ yield dut.update_i.is_2M.eq(0)
+ yield dut.update_i.asid.eq(1)
+ yield dut.update_i.content.ppn.eq(0)
+ yield dut.update_i.content.rsw.eq(0)
+ yield dut.update_i.content.r.eq(1)
+
+ yield
+
+ addr = 0x80000
+ yield from set_vaddr(addr)
+ yield
+
+ addr = 0x90001
+ yield from set_vaddr(addr)
+ yield
+
+ addr = 0x28000000
+ yield from set_vaddr(addr)
+ yield
+
+ addr = 0x28000001
+ yield from set_vaddr(addr)
+
+ addr = 0x28000001
+ yield from set_vaddr(addr)
+ yield
+
+ addr = 0x1000040000
+ yield from set_vaddr(addr)
+ yield
+
+ addr = 0x1000040001
+ yield from set_vaddr(addr)
+ yield
+
+ yield dut.update_i.is_1G.eq(1)
+ addr = 0x2040000
+ yield from set_vaddr(addr)
+ yield
+
+ yield dut.update_i.is_1G.eq(1)
+ addr = 0x2040001
+ yield from set_vaddr(addr)
+ yield
+
+ yield
+
+
+if __name__ == "__main__":
+ dut = TLB()
+ run_simulation(dut, tbench(dut), vcd_name="test_tlb.vcd")
+ print("TLB Unit Test Success")
--- /dev/null
+from nmigen.compat.sim import run_simulation
+
+from soc.TLB.ariane.tlb_content import TLBContent
+from soc.TestUtil.test_helper import assert_op, assert_eq
+
+
+def update(dut, a, t, g, m):
+ yield dut.replace_en_i.eq(1)
+ yield dut.update_i.valid.eq(1)
+ yield dut.update_i.is_512G.eq(t)
+ yield dut.update_i.is_1G.eq(g)
+ yield dut.update_i.is_2M.eq(m)
+ yield dut.update_i.vpn.eq(a)
+ yield
+ yield
+
+
+def check_hit(dut, hit, pagesize):
+ hit_d = yield dut.lu_hit_o
+ assert_eq("hit", hit_d, hit)
+
+ if(hit):
+ if(pagesize == "t"):
+ hitp = yield dut.lu_is_512G_o
+ assert_eq("lu_is_512G_o", hitp, 1)
+ elif(pagesize == "g"):
+ hitp = yield dut.lu_is_1G_o
+ assert_eq("lu_is_1G_o", hitp, 1)
+ elif(pagesize == "m"):
+ hitp = yield dut.lu_is_2M_o
+ assert_eq("lu_is_2M_o", hitp, 1)
+
+
+def addr(a, b, c, d):
+ return a | b << 9 | c << 18 | d << 27
+
+
+def tbench(dut):
+ yield dut.vpn0.eq(0x0A)
+ yield dut.vpn1.eq(0x0B)
+ yield dut.vpn2.eq(0x0C)
+ yield dut.vpn3.eq(0x0D)
+ yield from update(dut, addr(0xFF, 0xFF, 0xFF, 0x0D), 1, 0, 0)
+ yield from check_hit(dut, 1, "t")
+
+ yield from update(dut, addr(0xFF, 0xFF, 0x0C, 0x0D), 0, 1, 0)
+ yield from check_hit(dut, 1, "g")
+
+ yield from update(dut, addr(0xFF, 0x0B, 0x0C, 0x0D), 0, 0, 1)
+ yield from check_hit(dut, 1, "m")
+
+ yield from update(dut, addr(0x0A, 0x0B, 0x0C, 0x0D), 0, 0, 0)
+ yield from check_hit(dut, 1, "")
+
+ yield from update(dut, addr(0xAA, 0xBB, 0xCC, 0xDD), 0, 0, 0)
+ yield from check_hit(dut, 0, "miss")
+
+
+if __name__ == "__main__":
+ dut = TLBContent(4, 4)
+ #
+ run_simulation(dut, tbench(dut), vcd_name="test_tlb_content.vcd")
+ print("TLBContent Unit Test Success")
--- /dev/null
+"""
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Author: David Schaffenrath, TU Graz
+# Author: Florian Zaruba, ETH Zurich
+# Date: 21.4.2017
+# Description: Translation Lookaside Buffer, SV48
+# fully set-associative
+
+Implementation in c++:
+https://raw.githubusercontent.com/Tony-Hu/TreePLRU/master/TreePLRU.cpp
+
+Text description:
+https://people.cs.clemson.edu/~mark/464/p_lru.txt
+
+Online simulator:
+http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/vm.html
+"""
+from math import log2
+from nmigen import Signal, Module, Cat, Const, Array, Elaboratable
+from nmigen.cli import verilog, rtlil
+from nmigen.lib.coding import Encoder
+
+from soc.TLB.ariane.ptw import TLBUpdate, PTE, ASID_WIDTH
+from soc.TLB.ariane.plru import PLRU
+from soc.TLB.ariane.tlb_content import TLBContent
+
+TLB_ENTRIES = 8
+
+
+class TLB(Elaboratable):
+ def __init__(self, tlb_entries=8, asid_width=8):
+ self.tlb_entries = tlb_entries
+ self.asid_width = asid_width
+
+ self.flush_i = Signal() # Flush signal
+ # Lookup signals
+ self.lu_access_i = Signal()
+ self.lu_asid_i = Signal(self.asid_width)
+ self.lu_vaddr_i = Signal(64)
+ self.lu_content_o = PTE()
+ self.lu_is_2M_o = Signal()
+ self.lu_is_1G_o = Signal()
+ self.lu_is_512G_o = Signal()
+ self.lu_hit_o = Signal()
+ # Update TLB
+ self.pte_width = len(self.lu_content_o.flatten())
+ self.update_i = TLBUpdate(asid_width)
+
+ def elaborate(self, platform):
+ m = Module()
+
+ vpn3 = Signal(9) # FIXME unused signal
+ vpn2 = Signal(9)
+ vpn1 = Signal(9)
+ vpn0 = Signal(9)
+
+ # -------------
+ # Translation
+ # -------------
+
+ # SV48 defines four levels of page tables
+ m.d.comb += [vpn0.eq(self.lu_vaddr_i[12:21]),
+ vpn1.eq(self.lu_vaddr_i[21:30]),
+ vpn2.eq(self.lu_vaddr_i[30:39]),
+ vpn3.eq(self.lu_vaddr_i[39:48]), # FIXME
+ ]
+
+ tc = []
+ for i in range(self.tlb_entries):
+ tlc = TLBContent(self.pte_width, self.asid_width)
+ setattr(m.submodules, "tc%d" % i, tlc)
+ tc.append(tlc)
+ # connect inputs
+ tlc.update_i = self.update_i # saves a lot of graphviz links
+ m.d.comb += [tlc.vpn0.eq(vpn0),
+ tlc.vpn1.eq(vpn1),
+ tlc.vpn2.eq(vpn2),
+ # TODO 4th
+ tlc.flush_i.eq(self.flush_i),
+ # tlc.update_i.eq(self.update_i),
+ tlc.lu_asid_i.eq(self.lu_asid_i)]
+ tc = Array(tc)
+
+ # --------------
+ # Select hit
+ # --------------
+
+ # use Encoder to select hit index
+ # XXX TODO: assert that there's only one valid entry (one lu_hit)
+ hitsel = Encoder(self.tlb_entries)
+ m.submodules.hitsel = hitsel
+
+ hits = []
+ for i in range(self.tlb_entries):
+ hits.append(tc[i].lu_hit_o)
+ m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well)
+ idx = hitsel.o
+
+ active = Signal(reset_less=True)
+ m.d.comb += active.eq(~hitsel.n)
+ with m.If(active):
+ # active hit, send selected as output
+ m.d.comb += [self.lu_is_512G_o.eq(tc[idx].lu_is_512G_o),
+ self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o),
+ self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o),
+ self.lu_hit_o.eq(1),
+ self.lu_content_o.flatten().eq(tc[idx].lu_content_o),
+ ]
+
+ # --------------
+ # PLRU.
+ # --------------
+
+ p = PLRU(self.tlb_entries)
+ plru_tree = Signal(p.TLBSZ)
+ m.submodules.plru = p
+
+ # connect PLRU inputs/outputs
+ # XXX TODO: assert that there's only one valid entry (one replace_en)
+ en = []
+ for i in range(self.tlb_entries):
+ en.append(tc[i].replace_en_i)
+ m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags
+ p.lu_hit.eq(hitsel.i),
+ p.lu_access_i.eq(self.lu_access_i),
+ p.plru_tree.eq(plru_tree)]
+ m.d.sync += plru_tree.eq(p.plru_tree_o)
+
+ # --------------
+ # Sanity checks
+ # --------------
+
+ assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \
+ "TLB size must be a multiple of 2 and greater than 1"
+ assert (self.asid_width >= 1), \
+ "ASID width must be at least 1"
+
+ return m
+
+ """
+ # Just for checking
+ function int countSetBits(logic[self.tlb_entries-1:0] vector);
+ automatic int count = 0;
+ foreach (vector[idx]) begin
+ count += vector[idx];
+ end
+ return count;
+ endfunction
+
+ assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1))
+ else $error("More then one hit in TLB!"); $stop(); end
+ assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1))
+ else $error("More then one TLB entry selected for next replace!");
+ """
+
+ def ports(self):
+ return [self.flush_i, self.lu_access_i,
+ self.lu_asid_i, self.lu_vaddr_i,
+ self.lu_is_2M_o, self.lu_1G_o, self.lu_is_512G_o, self.lu_hit_o
+ ] + self.lu_content_o.ports() + self.update_i.ports()
+
+
+if __name__ == '__main__':
+ tlb = TLB()
+ vl = rtlil.convert(tlb, ports=tlb.ports())
+ with open("test_tlb.il", "w") as f:
+ f.write(vl)
--- /dev/null
+from nmigen import Signal, Module, Cat, Const, Elaboratable
+
+from soc.TLB.ariane.ptw import TLBUpdate, PTE
+
+
+class TLBEntry:
+ def __init__(self, asid_width):
+ self.asid = Signal(asid_width, name="ent_asid")
+ # SV48 defines four levels of page tables
+ self.vpn0 = Signal(9, name="ent_vpn0")
+ self.vpn1 = Signal(9, name="ent_vpn1")
+ self.vpn2 = Signal(9, name="ent_vpn2")
+ self.vpn3 = Signal(9, name="ent_vpn3")
+ self.is_2M = Signal(name="ent_is_2M")
+ self.is_1G = Signal(name="ent_is_1G")
+ self.is_512G = Signal(name="ent_is_512G")
+ self.valid = Signal(name="ent_valid")
+
+ def flatten(self):
+ return Cat(*self.ports())
+
+ def eq(self, x):
+ return self.flatten().eq(x.flatten())
+
+ def ports(self):
+ return [self.asid, self.vpn0, self.vpn1, self.vpn2,
+ self.is_2M, self.is_1G, self.valid]
+
+
+class TLBContent(Elaboratable):
+ def __init__(self, pte_width, asid_width):
+ self.asid_width = asid_width
+ self.pte_width = pte_width
+ self.flush_i = Signal() # Flush signal
+ # Update TLB
+ self.update_i = TLBUpdate(asid_width)
+ self.vpn3 = Signal(9)
+ self.vpn2 = Signal(9)
+ self.vpn1 = Signal(9)
+ self.vpn0 = Signal(9)
+ self.replace_en_i = Signal() # replace the following entry,
+ # set by replacement strategy
+ # Lookup signals
+ self.lu_asid_i = Signal(asid_width)
+ self.lu_content_o = Signal(pte_width)
+ self.lu_is_512G_o = Signal()
+ self.lu_is_2M_o = Signal()
+ self.lu_is_1G_o = Signal()
+ self.lu_hit_o = Signal()
+
+ def elaborate(self, platform):
+ m = Module()
+
+ tags = TLBEntry(self.asid_width)
+
+ content = Signal(self.pte_width)
+
+ m.d.comb += [self.lu_hit_o.eq(0),
+ self.lu_is_512G_o.eq(0),
+ self.lu_is_2M_o.eq(0),
+ self.lu_is_1G_o.eq(0)]
+
+ # temporaries for lookup
+ asid_ok = Signal(reset_less=True)
+ # tags_ok = Signal(reset_less=True)
+
+ vpn3_ok = Signal(reset_less=True)
+ vpn2_ok = Signal(reset_less=True)
+ vpn1_ok = Signal(reset_less=True)
+ vpn0_ok = Signal(reset_less=True)
+
+ #tags_2M = Signal(reset_less=True)
+ vpn0_or_2M = Signal(reset_less=True)
+
+ m.d.comb += [
+ # compare asid and vpn*
+ asid_ok.eq(tags.asid == self.lu_asid_i),
+ vpn3_ok.eq(tags.vpn3 == self.vpn3),
+ vpn2_ok.eq(tags.vpn2 == self.vpn2),
+ vpn1_ok.eq(tags.vpn1 == self.vpn1),
+ vpn0_ok.eq(tags.vpn0 == self.vpn0),
+ vpn0_or_2M.eq(tags.is_2M | vpn0_ok)
+ ]
+
+ with m.If(asid_ok & tags.valid):
+ # first level, only vpn3 needs to match
+ with m.If(tags.is_512G & vpn3_ok):
+ m.d.comb += [self.lu_content_o.eq(content),
+ self.lu_is_512G_o.eq(1),
+ self.lu_hit_o.eq(1),
+ ]
+ # second level , second level vpn2 and vpn3 need to match
+ with m.Elif(tags.is_1G & vpn2_ok & vpn3_ok):
+ m.d.comb += [self.lu_content_o.eq(content),
+ self.lu_is_1G_o.eq(1),
+ self.lu_hit_o.eq(1),
+ ]
+ # not a giga page hit nor a tera page hit so check further
+ with m.Elif(vpn1_ok):
+ # this could be a 2 mega page hit or a 4 kB hit
+ # output accordingly
+ with m.If(vpn0_or_2M):
+ m.d.comb += [self.lu_content_o.eq(content),
+ self.lu_is_2M_o.eq(tags.is_2M),
+ self.lu_hit_o.eq(1),
+ ]
+ # ------------------
+ # Update or Flush
+ # ------------------
+
+ # temporaries
+ replace_valid = Signal(reset_less=True)
+ m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i)
+
+ # flush
+ with m.If(self.flush_i):
+ # invalidate (flush) conditions: all if zero or just this ASID
+ with m.If(self.lu_asid_i == Const(0, self.asid_width) |
+ (self.lu_asid_i == tags.asid)):
+ m.d.sync += tags.valid.eq(0)
+
+ # normal replacement
+ with m.Elif(replace_valid):
+ m.d.sync += [ # update tag array
+ tags.asid.eq(self.update_i.asid),
+ tags.vpn3.eq(self.update_i.vpn[27:36]),
+ tags.vpn2.eq(self.update_i.vpn[18:27]),
+ tags.vpn1.eq(self.update_i.vpn[9:18]),
+ tags.vpn0.eq(self.update_i.vpn[0:9]),
+ tags.is_512G.eq(self.update_i.is_512G),
+ tags.is_1G.eq(self.update_i.is_1G),
+ tags.is_2M.eq(self.update_i.is_2M),
+ tags.valid.eq(1),
+ # and content as well
+ content.eq(self.update_i.content.flatten())
+ ]
+ return m
+
+ def ports(self):
+ return [self.flush_i,
+ self.lu_asid_i,
+ self.lu_is_2M_o, self.lu_is_1G_o, self.lu_is_512G_o, self.lu_hit_o,
+ ] + self.update_i.content.ports() + self.update_i.ports()
--- /dev/null
+# SPDX-License-Identifier: LGPL-2.1-or-later
+# See Notices.txt for copyright information
+from soc.TLB.LFSR import LFSR, LFSRPolynomial, LFSR_POLY_3
+
+from nmigen.back.pysim import Simulator, Delay, Tick
+import unittest
+
+
+class TestLFSR(unittest.TestCase):
+ def test_poly(self):
+ v = LFSRPolynomial()
+ self.assertEqual(repr(v), "LFSRPolynomial([0])")
+ self.assertEqual(str(v), "1")
+ v = LFSRPolynomial([1])
+ self.assertEqual(repr(v), "LFSRPolynomial([1, 0])")
+ self.assertEqual(str(v), "x + 1")
+ v = LFSRPolynomial([0, 1])
+ self.assertEqual(repr(v), "LFSRPolynomial([1, 0])")
+ self.assertEqual(str(v), "x + 1")
+ v = LFSRPolynomial([1, 2])
+ self.assertEqual(repr(v), "LFSRPolynomial([2, 1, 0])")
+ self.assertEqual(str(v), "x^2 + x + 1")
+ v = LFSRPolynomial([2])
+ self.assertEqual(repr(v), "LFSRPolynomial([2, 0])")
+ self.assertEqual(str(v), "x^2 + 1")
+ self.assertEqual(str(LFSR_POLY_3), "x^3 + x^2 + 1")
+
+ def test_lfsr_3(self):
+ module = LFSR(LFSR_POLY_3)
+ traces = [module.state, module.enable]
+ with Simulator(module,
+ vcd_file=open("Waveforms/test_LFSR2.vcd", "w"),
+ gtkw_file=open("Waveforms/test_LFSR2.gtkw", "w"),
+ traces=traces) as sim:
+ sim.add_clock(1e-6, phase=0.25e-6)
+ delay = Delay(1e-7)
+
+ def async_process():
+ yield module.enable.eq(0)
+ yield Tick()
+ self.assertEqual((yield module.state), 0x1)
+ yield Tick()
+ self.assertEqual((yield module.state), 0x1)
+ yield module.enable.eq(1)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x2)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x5)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x3)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x7)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x6)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x4)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x1)
+ yield Tick()
+
+ sim.add_process(async_process)
+ sim.run()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from soc.TLB.AddressEncoder import AddressEncoder
+from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op
+
+
+# This function allows for the easy setting of values to the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# i (Input): The array of single bits to be written
+def set_encoder(dut, i):
+ yield dut.i.eq(i)
+ yield
+
+# Checks the single match of the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# sm (Single Match): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+
+
+def check_single_match(dut, sm, op):
+ out_sm = yield dut.single_match
+ assert_op("Single Match", out_sm, sm, op)
+
+# Checks the multiple match of the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# mm (Multiple Match): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+
+
+def check_multiple_match(dut, mm, op):
+ out_mm = yield dut.multiple_match
+ assert_op("Multiple Match", out_mm, mm, op)
+
+# Checks the output of the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# o (Output): The expected output
+# op (Operation): (0 => ==), (1 => !=)
+
+
+def check_output(dut, o, op):
+ out_o = yield dut.o
+ assert_op("Output", out_o, o, op)
+
+# Checks the state of the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# sm (Single Match): The expected match result
+# mm (Multiple Match): The expected match result
+# o (Output): The expected output
+# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+# o_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+
+
+def check_all(dut, sm, mm, o, sm_op, mm_op, o_op):
+ yield from check_single_match(dut, sm, sm_op)
+ yield from check_multiple_match(dut, mm, mm_op)
+ yield from check_output(dut, o, o_op)
+
+
+def tbench(dut):
+ # Check invalid input
+ in_val = 0b000
+ single_match = 0
+ multiple_match = 0
+ output = 0
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+
+ # Check single bit
+ in_val = 0b001
+ single_match = 1
+ multiple_match = 0
+ output = 0
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+
+ # Check another single bit
+ in_val = 0b100
+ single_match = 1
+ multiple_match = 0
+ output = 2
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+
+ # Check multiple match
+ # We expected the lowest bit to be returned which is address 0
+ in_val = 0b101
+ single_match = 0
+ multiple_match = 1
+ output = 0
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+
+ # Check another multiple match
+ # We expected the lowest bit to be returned which is address 1
+ in_val = 0b110
+ single_match = 0
+ multiple_match = 1
+ output = 1
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+
+
+def test_addr():
+ dut = AddressEncoder(4)
+ run_simulation(dut, tbench(dut),
+ vcd_name="Waveforms/test_address_encoder.vcd")
+ print("AddressEncoder Unit Test Success")
+
+
+if __name__ == "__main__":
+ test_addr()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+
+from soc.TLB.Cam import Cam
+
+from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op
+
+# This function allows for the easy setting of values to the Cam
+# Arguments:
+# dut: The Cam being tested
+# e (Enable): Whether the block is going to be enabled
+# we (Write Enable): Whether the Cam will write on the next cycle
+# a (Address): Where the data will be written if write enable is high
+# d (Data): Either what we are looking for or will write to the address
+
+
+def set_cam(dut, e, we, a, d):
+ yield dut.enable.eq(e)
+ yield dut.write_enable.eq(we)
+ yield dut.address_in.eq(a)
+ yield dut.data_in.eq(d)
+ yield
+
+# Checks the multiple match of the Cam
+# Arguments:
+# dut: The Cam being tested
+# mm (Multiple Match): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+
+
+def check_multiple_match(dut, mm, op):
+ out_mm = yield dut.multiple_match
+ assert_op("Multiple Match", out_mm, mm, op)
+
+# Checks the single match of the Cam
+# Arguments:
+# dut: The Cam being tested
+# sm (Single Match): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+
+
+def check_single_match(dut, sm, op):
+ out_sm = yield dut.single_match
+ assert_op("Single Match", out_sm, sm, op)
+
+# Checks the address output of the Cam
+# Arguments:
+# dut: The Cam being tested
+# ma (Match Address): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+
+
+def check_match_address(dut, ma, op):
+ out_ma = yield dut.match_address
+ assert_op("Match Address", out_ma, ma, op)
+
+# Checks the state of the Cam
+# Arguments:
+# dut: The Cam being tested
+# sm (Single Match): The expected match result
+# mm (Multiple Match): The expected match result
+# ma: (Match Address): The expected address output
+# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+# ma_op (Operation): Operation for the address assertion (0 => ==), (1 => !=)
+
+
+def check_all(dut, mm, sm, ma, mm_op, sm_op, ma_op):
+ yield from check_multiple_match(dut, mm, mm_op)
+ yield from check_single_match(dut, sm, sm_op)
+ yield from check_match_address(dut, ma, ma_op)
+
+
+def tbench(dut):
+ # NA
+ enable = 0
+ write_enable = 0
+ address = 0
+ data = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+
+ # Read Miss Multiple
+ # Note that the default starting entry data bits are all 0
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 0
+ multiple_match = 1
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_multiple_match(dut, multiple_match, 0)
+
+ # Read Miss
+ # Note that the default starting entry data bits are all 0
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 1
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+
+ # Write Entry 0
+ enable = 1
+ write_enable = 1
+ address = 0
+ data = 4
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+
+ # Read Hit Entry 0
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 4
+ multiple_match = 0
+ single_match = 1
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0)
+
+ # Search Hit
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 4
+ multiple_match = 0
+ single_match = 1
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0)
+
+ # Search Miss
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 5
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+
+ # Multiple Match test
+ # Write Entry 1
+ enable = 1
+ write_enable = 1
+ address = 1
+ data = 5
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+
+ # Write Entry 2
+ # Same data as Entry 1
+ enable = 1
+ write_enable = 1
+ address = 2
+ data = 5
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+
+ # Read Hit Data 5
+ enable = 1
+ write_enable = 0
+ address = 1
+ data = 5
+ multiple_match = 1
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0)
+
+ # Verify read_warning is not caused
+ # Write Entry 0
+ enable = 1
+ write_enable = 1
+ address = 0
+ data = 7
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ # Note there is no yield we immediately attempt to read in the next cycle
+
+ # Read Hit Data 7
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 7
+ multiple_match = 0
+ single_match = 1
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+
+ yield
+
+
+def test_cam():
+ dut = Cam(4, 4)
+ run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_cam.vcd")
+ print("Cam Unit Test Success")
+
+
+if __name__ == "__main__":
+ test_cam()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+
+from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op
+from soc.TLB.CamEntry import CamEntry
+
+# This function allows for the easy setting of values to the Cam Entry
+# Arguments:
+# dut: The CamEntry being tested
+# c (command): NA (0), Read (1), Write (2), Reserve (3)
+# d (data): The data to be set
+
+
+def set_cam_entry(dut, c, d):
+ # Write desired values
+ yield dut.command.eq(c)
+ yield dut.data_in.eq(d)
+ yield
+ # Reset all lines
+ yield dut.command.eq(0)
+ yield dut.data_in.eq(0)
+ yield
+
+# Checks the data state of the CAM entry
+# Arguments:
+# dut: The CamEntry being tested
+# d (Data): The expected data
+# op (Operation): (0 => ==), (1 => !=)
+
+
+def check_data(dut, d, op):
+ out_d = yield dut.data
+ assert_op("Data", out_d, d, op)
+
+# Checks the match state of the CAM entry
+# Arguments:
+# dut: The CamEntry being tested
+# m (Match): The expected match
+# op (Operation): (0 => ==), (1 => !=)
+
+
+def check_match(dut, m, op):
+ out_m = yield dut.match
+ assert_op("Match", out_m, m, op)
+
+# Checks the state of the CAM entry
+# Arguments:
+# dut: The CamEntry being tested
+# d (data): The expected data
+# m (match): The expected match
+# d_op (Operation): Operation for the data assertion (0 => ==), (1 => !=)
+# m_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+
+
+def check_all(dut, d, m, d_op, m_op):
+ yield from check_data(dut, d, d_op)
+ yield from check_match(dut, m, m_op)
+
+# This tbench goes through the paces of testing the CamEntry module
+# It is done by writing and then reading various combinations of key/data pairs
+# and reading the results with varying keys to verify the resulting stored
+# data is correct.
+
+
+def tbench(dut):
+ # Check write
+ command = 2
+ data = 1
+ match = 0
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 0, 0)
+
+ # Check read miss
+ command = 1
+ data = 2
+ match = 0
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 1, 0)
+
+ # Check read hit
+ command = 1
+ data = 1
+ match = 1
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 0, 0)
+
+ # Check overwrite
+ command = 2
+ data = 5
+ match = 0
+ yield from set_cam_entry(dut, command, data)
+ yield
+ yield from check_all(dut, data, match, 0, 0)
+
+ # Check read hit
+ command = 1
+ data = 5
+ match = 1
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 0, 0)
+
+ # Check reset
+ command = 3
+ data = 0
+ match = 0
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 0, 0)
+
+ # Extra clock cycle for waveform
+ yield
+
+
+def test_camentry():
+ dut = CamEntry(4)
+ run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_cam_entry.vcd")
+ print("CamEntry Unit Test Success")
+
+
+if __name__ == "__main__":
+ test_camentry()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+
+from soc.TLB.PermissionValidator import PermissionValidator
+
+from soc.TestUtil.test_helper import assert_op
+
+
+def set_validator(dut, d, xwr, sm, sa, asid):
+ yield dut.data.eq(d)
+ yield dut.xwr.eq(xwr)
+ yield dut.super_mode.eq(sm)
+ yield dut.super_access.eq(sa)
+ yield dut.asid.eq(asid)
+ yield
+
+
+def check_valid(dut, v, op):
+ out_v = yield dut.valid
+ assert_op("Valid", out_v, v, op)
+
+
+def tbench(dut):
+ # 80 bits represented. Ignore the MSB as it will be truncated
+ # ASID is bits first 4 hex values (bits 64 - 78)
+
+ # Test user mode entry valid
+ # Global Bit matching ASID
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000031
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+
+ # Test user mode entry valid
+ # Global Bit nonmatching ASID
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000031
+ # Ignore MSB it will be truncated
+ asid = 0x7FF6
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+
+ # Test user mode entry invalid
+ # Global Bit nonmatching ASID
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000021
+ # Ignore MSB it will be truncated
+ asid = 0x7FF6
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 0
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+
+ # Test user mode entry valid
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000011
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+
+ # Test user mode entry invalid
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000011
+ # Ignore MSB it will be truncated
+ asid = 0x7FF6
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 0
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+
+ # Test supervisor mode entry valid
+ # The entry is NOT in user mode
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000001
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 1
+ super_access = 0
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+
+ # Test supervisor mode entry invalid
+ # The entry is in user mode
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000011
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 1
+ super_access = 0
+ xwr = 0
+ valid = 0
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+
+ # Test supervisor mode entry valid
+ # The entry is NOT in user mode with access
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000001
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 1
+ super_access = 1
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+
+ # Test supervisor mode entry valid
+ # The entry is in user mode with access
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000011
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 1
+ super_access = 1
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+
+
+def test_permv():
+ dut = PermissionValidator(15, 64)
+ run_simulation(dut, tbench(
+ dut), vcd_name="Waveforms/test_permission_validator.vcd")
+ print("PermissionValidator Unit Test Success")
+
+
+if __name__ == "__main__":
+ test_permv()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+
+from soc.TLB.PteEntry import PteEntry
+
+from soc.TestUtil.test_helper import assert_op
+
+
+def set_entry(dut, i):
+ yield dut.i.eq(i)
+ yield
+
+
+def check_dirty(dut, d, op):
+ out_d = yield dut.d
+ assert_op("Dirty", out_d, d, op)
+
+
+def check_accessed(dut, a, op):
+ out_a = yield dut.a
+ assert_op("Accessed", out_a, a, op)
+
+
+def check_global(dut, o, op):
+ out = yield dut.g
+ assert_op("Global", out, o, op)
+
+
+def check_user(dut, o, op):
+ out = yield dut.u
+ assert_op("User Mode", out, o, op)
+
+
+def check_xwr(dut, o, op):
+ out = yield dut.xwr
+ assert_op("XWR", out, o, op)
+
+
+def check_asid(dut, o, op):
+ out = yield dut.asid
+ assert_op("ASID", out, o, op)
+
+
+def check_pte(dut, o, op):
+ out = yield dut.pte
+ assert_op("ASID", out, o, op)
+
+
+def check_valid(dut, v, op):
+ out_v = yield dut.v
+ assert_op("Valid", out_v, v, op)
+
+
+def check_all(dut, d, a, g, u, xwr, v, asid, pte):
+ yield from check_dirty(dut, d, 0)
+ yield from check_accessed(dut, a, 0)
+ yield from check_global(dut, g, 0)
+ yield from check_user(dut, u, 0)
+ yield from check_xwr(dut, xwr, 0)
+ yield from check_asid(dut, asid, 0)
+ yield from check_pte(dut, pte, 0)
+ yield from check_valid(dut, v, 0)
+
+
+def tbench(dut):
+ # 80 bits represented. Ignore the MSB as it will be truncated
+ # ASID is bits first 4 hex values (bits 64 - 78)
+
+ i = 0x7FFF0000000000000031
+ dirty = 0
+ access = 0
+ glob = 1
+ user = 1
+ xwr = 0
+ valid = 1
+ asid = 0x7FFF
+ pte = 0x0000000000000031
+ yield from set_entry(dut, i)
+ yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
+
+ i = 0x0FFF00000000000000FF
+ dirty = 1
+ access = 1
+ glob = 1
+ user = 1
+ xwr = 7
+ valid = 1
+ asid = 0x0FFF
+ pte = 0x00000000000000FF
+ yield from set_entry(dut, i)
+ yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
+
+ i = 0x0721000000001100001F
+ dirty = 0
+ access = 0
+ glob = 0
+ user = 1
+ xwr = 7
+ valid = 1
+ asid = 0x0721
+ pte = 0x000000001100001F
+ yield from set_entry(dut, i)
+ yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
+
+ yield
+
+
+def test_pteentry():
+ dut = PteEntry(15, 64)
+ run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_pte_entry.vcd")
+ print("PteEntry Unit Test Success")
+
+
+if __name__ == "__main__":
+ test_pteentry()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+
+from soc.TLB.SetAssociativeCache import SetAssociativeCache
+
+from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op
+
+
+def set_sac(dut, e, c, s, t, d):
+ yield dut.enable.eq(e)
+ yield dut.command.eq(c)
+ yield dut.cset.eq(s)
+ yield dut.tag.eq(t)
+ yield dut.data_i.eq(d)
+ yield
+
+
+def tbench(dut):
+ enable = 1
+ command = 2
+ cset = 1
+ tag = 2
+ data = 3
+ yield from set_sac(dut, enable, command, cset, tag, data)
+ yield
+
+ enable = 1
+ command = 2
+ cset = 1
+ tag = 5
+ data = 8
+ yield from set_sac(dut, enable, command, cset, tag, data)
+ yield
+
+
+def test_assoc_cache():
+ dut = SetAssociativeCache(4, 4, 4, 4)
+ run_simulation(dut, tbench(
+ dut), vcd_name="Waveforms/test_set_associative_cache.vcd")
+ print("Set Associative Cache Unit Test Success")
+
+
+if __name__ == "__main__":
+ test_assoc_cache()
--- /dev/null
+#import tracemalloc
+# tracemalloc.start()
+
+from nmigen.compat.sim import run_simulation
+
+from soc.TLB.TLB import TLB
+
+from soc.TestUtil.test_helper import assert_op, assert_eq
+
+# self.supermode = Signal(1) # Supervisor Mode
+# self.super_access = Signal(1) # Supervisor Access
+# self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2
+# self.xwr = Signal(3) # Execute, Write, Read
+# self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64
+#self.address_L1 = Signal(range(L1_size))
+# self.asid = Signal(asid_size) # Address Space IDentifier (ASID)
+# self.vma = Signal(vma_size) # Virtual Memory Address (VMA)
+# self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE)
+#
+# self.hit = Signal(1) # Denotes if the VMA had a mapped PTE
+# self.perm_valid = Signal(1) # Denotes if the permissions are correct
+# self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA
+
+COMMAND_READ = 1
+COMMAND_WRITE_L1 = 2
+
+# Checks the data state of the CAM entry
+# Arguments:
+# dut: The CamEntry being tested
+# d (Data): The expected data
+# op (Operation): (0 => ==), (1 => !=)
+
+
+def check_hit(dut, d):
+ hit_d = yield dut.hit
+ #assert_eq("hit", hit_d, d)
+
+
+def tst_command(dut, cmd, xwr, cycles):
+ yield dut.command.eq(cmd)
+ yield dut.xwr.eq(xwr)
+ for i in range(0, cycles):
+ yield
+
+
+def tst_write_L1(dut, vma, address_L1, asid, pte_in):
+ yield dut.address_L1.eq(address_L1)
+ yield dut.asid.eq(asid)
+ yield dut.vma.eq(vma)
+ yield dut.pte_in.eq(pte_in)
+ yield from tst_command(dut, COMMAND_WRITE_L1, 7, 2)
+
+
+def tst_search(dut, vma, found):
+ yield dut.vma.eq(vma)
+ yield from tst_command(dut, COMMAND_READ, 7, 1)
+ yield from check_hit(dut, found)
+
+
+def zero(dut):
+ yield dut.supermode.eq(0)
+ yield dut.super_access.eq(0)
+ yield dut.mode.eq(0)
+ yield dut.address_L1.eq(0)
+ yield dut.asid.eq(0)
+ yield dut.vma.eq(0)
+ yield dut.pte_in.eq(0)
+
+
+def tbench(dut):
+ yield from zero(dut)
+ yield dut.mode.eq(0xF) # enable TLB
+ # test hit
+ yield from tst_write_L1(dut, 0xFEEDFACE, 0, 0xFFFF, 0xF0F0)
+ yield from tst_search(dut, 0xFEEDFACE, 1)
+ yield from tst_search(dut, 0xFACEFEED, 0)
+
+
+def test_tlb():
+ dut = TLB(15, 36, 64, 8)
+ run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_tlb.vcd")
+ print("TLB Unit Test Success")
+
+
+if __name__ == "__main__":
+ test_tlb()
--- /dev/null
+class DualPortSplitter(Elaboratable):
+ """DualPortSplitter
+
+ * one incoming PortInterface
+ * two *OUTGOING* PortInterfaces
+ * uses LDSTSplitter to do it
+
+ (actually, thinking about it LDSTSplitter could simply be
+ modified to conform to PortInterface: one in, two out)
+
+ once that is done each pair of ports may be wired directly
+ to the dual ports of L0CacheBuffer
+
+ The split is carried out so that, regardless of alignment or
+ mis-alignment, outgoing PortInterface[0] takes bit 4 == 0
+ of the address, whilst outgoing PortInterface[1] takes
+ bit 4 == 1.
+
+ PortInterface *may* need to be changed so that the length is
+ a binary number (accepting values 1-16).
+ """
+
+ def __init__(self,inp):
+ self.outp = [PortInterface(name="outp_0"),
+ PortInterface(name="outp_1")]
+ print(self.outp)
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ m.submodules.splitter = splitter = LDSTSplitter(64, 48, 4)
+ self.inp = splitter.pi
+ comb += splitter.addr_i.eq(self.inp.addr) # XXX
+ #comb += splitter.len_i.eq()
+ #comb += splitter.valid_i.eq()
+ comb += splitter.is_ld_i.eq(self.inp.is_ld_i)
+ comb += splitter.is_st_i.eq(self.inp.is_st_i)
+ #comb += splitter.st_data_i.eq()
+ #comb += splitter.sld_valid_i.eq()
+ #comb += splitter.sld_data_i.eq()
+ #comb += splitter.sst_valid_i.eq()
+ return m
--- /dev/null
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+# module axi4_ar_buffer
+# #(
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+#
+# input logic [AXI_ID_WIDTH-1:0] s_axi4_arid,
+# input logic [31:0] s_axi4_araddr,
+# input logic s_axi4_arvalid,
+# output logic s_axi4_arready,
+# input logic [7:0] s_axi4_arlen,
+# input logic [2:0] s_axi4_arsize,
+# input logic [1:0] s_axi4_arburst,
+# input logic s_axi4_arlock,
+# input logic [2:0] s_axi4_arprot,
+# input logic [3:0] s_axi4_arcache,
+# input logic [AXI_USER_WIDTH-1:0] s_axi4_aruser,
+#
+# output logic [AXI_ID_WIDTH-1:0] m_axi4_arid,
+# output logic [31:0] m_axi4_araddr,
+# output logic m_axi4_arvalid,
+# input logic m_axi4_arready,
+# output logic [7:0] m_axi4_arlen,
+# output logic [2:0] m_axi4_arsize,
+# output logic [1:0] m_axi4_arburst,
+# output logic m_axi4_arlock,
+# output logic [2:0] m_axi4_arprot,
+# output logic [3:0] m_axi4_arcache,
+# output logic [AXI_USER_WIDTH-1:0] m_axi4_aruser
+# );
+
+
+class axi4_ar_buffer(Elaboratable):
+
+ def __init__(self):
+ # self.axi4_aclk = Signal() # input
+ # self.axi4_arstn = Signal() # input
+ self.s_axi4_arid = Signal(AXI_ID_WIDTH) # input
+ self.s_axi4_araddr = Signal(32) # input
+ self.s_axi4_arvalid = Signal() # input
+ self.s_axi4_arready = Signal() # output
+ self.s_axi4_arlen = Signal(8) # input
+ self.s_axi4_arsize = Signal(3) # input
+ self.s_axi4_arburst = Signal(2) # input
+ self.s_axi4_arlock = Signal() # input
+ self.s_axi4_arprot = Signal(3) # input
+ self.s_axi4_arcache = Signal(4) # input
+ self.s_axi4_aruser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_arid = Signal(AXI_ID_WIDTH) # output
+ self.m_axi4_araddr = Signal(32) # output
+ self.m_axi4_arvalid = Signal() # output
+ self.m_axi4_arready = Signal() # input
+ self.m_axi4_arlen = Signal(8) # output
+ self.m_axi4_arsize = Signal(3) # output
+ self.m_axi4_arburst = Signal(2) # output
+ self.m_axi4_arlock = Signal() # output
+ self.m_axi4_arprot = Signal(3) # output
+ self.m_axi4_arcache = Signal(4) # output
+ self.m_axi4_aruser = Signal(AXI_USER_WIDTH) # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ # #TODO use record types here
+ # wire [AXI_ID_WIDTH+AXI_USER_WIDTH+52:0] data_in;
+ # wire [AXI_ID_WIDTH+AXI_USER_WIDTH+52:0] data_out;
+
+ # assign data_in [3:0] = s_axi4_arcache;
+ # assign data_in [6:4] = s_axi4_arprot;
+ # assign data_in [7] = s_axi4_arlock;
+ # assign data_in [9:8] = s_axi4_arburst;
+ # assign data_in [12:10] = s_axi4_arsize;
+ # assign data_in [20:13] = s_axi4_arlen;
+ # assign data_in [52:21] = s_axi4_araddr;
+ # assign data_in [52+AXI_ID_WIDTH:53] = s_axi4_arid;
+ # assign data_in[52+AXI_ID_WIDTH+AXI_USER_WIDTH:53+AXI_ID_WIDTH] = s_axi4_aruser;
+ #
+ # assign m_axi4_arcache = data_out[3:0];
+ # assign m_axi4_arprot = data_out[6:4];
+ # assign m_axi4_arlock = data_out[7];
+ # assign m_axi4_arburst = data_out[9:8];
+ # assign m_axi4_arsize = data_out[12:10];
+ # assign m_axi4_arlen = data_out[20:13];
+ # assign m_axi4_araddr = data_out[52:21];
+ # assign m_axi4_arid = data_out[52+AXI_ID_WIDTH:53];
+ # assign m_axi4_aruser = data_out[52+AXI_ID_WIDTH+AXI_USER_WIDTH:53+AXI_ID_WIDTH];
+
+ # m.d.comb += self.m_axi4_arcache.eq(..)
+ # m.d.comb += self.m_axi4_arprot.eq(..)
+ # m.d.comb += self.m_axi4_arlock.eq(..)
+ # m.d.comb += self.m_axi4_arburst.eq(..)
+ # m.d.comb += self.m_axi4_arsize.eq(..)
+ # m.d.comb += self.m_axi4_arlen.eq(..)
+ # m.d.comb += self.m_axi4_araddr.eq(..)
+ # m.d.comb += self.m_axi4_arid.eq(..)
+ # m.d.comb += self.m_axi4_aruser.eq(..)
+ return m
+
+# TODO convert axi_buffer_rab.sv
+#
+# axi_buffer_rab
+# #(
+# .DATA_WIDTH ( AXI_ID_WIDTH+AXI_USER_WIDTH+53 ),
+# .BUFFER_DEPTH ( 4 )
+# )
+# u_buffer
+# (
+# .clk ( axi4_aclk ),
+# .rstn ( axi4_arstn ),
+# .valid_out ( m_axi4_arvalid ),
+# .data_out ( data_out ),
+# .ready_in ( m_axi4_arready ),
+# .valid_in ( s_axi4_arvalid ),
+# .data_in ( data_in ),
+# .ready_out ( s_axi4_arready )
+# );
+#
+
+# endmodule
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class axi4_ar_sender(Elaboratable):
+
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.l1_done_o = Signal() # output
+ self.l1_accept_i = Signal() # input
+ self.l1_drop_i = Signal() # input
+ self.l1_save_i = Signal() # input
+ self.l2_done_o = Signal() # output
+ self.l2_accept_i = Signal() # input
+ self.l2_drop_i = Signal() # input
+ self.l2_sending_o = Signal() # output
+ self.l1_araddr_i = Signal(AXI_ADDR_WIDTH) # input
+ self.l2_araddr_i = Signal(AXI_ADDR_WIDTH) # input
+ self.s_axi4_arid = Signal(AXI_ID_WIDTH) # input
+ self.s_axi4_arvalid = Signal() # input
+ self.s_axi4_arready = Signal() # output
+ self.s_axi4_arlen = Signal(8) # input
+ self.s_axi4_arsize = Signal(3) # input
+ self.s_axi4_arburst = Signal(2) # input
+ self.s_axi4_arlock = Signal() # input
+ self.s_axi4_arprot = Signal(3) # input
+ self.s_axi4_arcache = Signal(4) # input
+ self.s_axi4_aruser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_arid = Signal(AXI_ID_WIDTH) # output
+ self.m_axi4_araddr = Signal(AXI_ADDR_WIDTH) # output
+ self.m_axi4_arvalid = Signal() # output
+ self.m_axi4_arready = Signal() # input
+ self.m_axi4_arlen = Signal(8) # output
+ self.m_axi4_arsize = Signal(3) # output
+ self.m_axi4_arburst = Signal(2) # output
+ self.m_axi4_arlock = Signal() # output
+ self.m_axi4_arprot = Signal(3) # output
+ self.m_axi4_arcache = Signal(4) # output
+ self.m_axi4_aruser = Signal(AXI_USER_WIDTH) # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.l1_save.eq(self.None)
+ m.d.comb += self.l1_done_o.eq(self.None)
+ m.d.comb += self.m_axi4_arvalid.eq(self.None)
+ m.d.comb += self.s_axi4_arready.eq(self.None)
+ m.d.comb += self.m_axi4_aruser.eq(self.None)
+ m.d.comb += self.m_axi4_arcache.eq(self.None)
+ m.d.comb += self.m_axi4_arprot.eq(self.None)
+ m.d.comb += self.m_axi4_arlock.eq(self.None)
+ m.d.comb += self.m_axi4_arburst.eq(self.None)
+ m.d.comb += self.m_axi4_arsize.eq(self.None)
+ m.d.comb += self.m_axi4_arlen.eq(self.None)
+ m.d.comb += self.m_axi4_araddr.eq(self.None)
+ m.d.comb += self.m_axi4_arid.eq(self.None)
+ m.d.comb += self.l2_sending_o.eq(self.None)
+ m.d.comb += self.l2_sent.eq(self.None)
+ m.d.comb += self.l2_done_o.eq(self.None)
+ m.d.comb += self.m_axi4_aruser.eq(self.s_axi4_aruser)
+ m.d.comb += self.m_axi4_arcache.eq(self.s_axi4_arcache)
+ m.d.comb += self.m_axi4_arprot.eq(self.s_axi4_arprot)
+ m.d.comb += self.m_axi4_arlock.eq(self.s_axi4_arlock)
+ m.d.comb += self.m_axi4_arburst.eq(self.s_axi4_arburst)
+ m.d.comb += self.m_axi4_arsize.eq(self.s_axi4_arsize)
+ m.d.comb += self.m_axi4_arlen.eq(self.s_axi4_arlen)
+ m.d.comb += self.m_axi4_araddr.eq(self.l1_araddr_i)
+ m.d.comb += self.m_axi4_arid.eq(self.s_axi4_arid)
+ m.d.comb += self.l2_sending_o.eq(self.1: 'b0)
+ m.d.comb += self.l2_available_q.eq(self.1: 'b0)
+ m.d.comb += self.l2_done_o.eq(self.1: 'b0)
+ return m
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# module axi4_ar_sender
+# #(
+# parameter AXI_ADDR_WIDTH = 40,
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4,
+# parameter ENABLE_L2TLB = 0
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+#
+# output logic l1_done_o,
+# input logic l1_accept_i,
+# input logic l1_drop_i,
+# input logic l1_save_i,
+#
+# output logic l2_done_o,
+# input logic l2_accept_i,
+# input logic l2_drop_i,
+# output logic l2_sending_o,
+#
+# input logic [AXI_ADDR_WIDTH-1:0] l1_araddr_i,
+# input logic [AXI_ADDR_WIDTH-1:0] l2_araddr_i,
+#
+# input logic [AXI_ID_WIDTH-1:0] s_axi4_arid,
+# input logic s_axi4_arvalid,
+# output logic s_axi4_arready,
+# input logic [7:0] s_axi4_arlen,
+# input logic [2:0] s_axi4_arsize,
+# input logic [1:0] s_axi4_arburst,
+# input logic s_axi4_arlock,
+# input logic [2:0] s_axi4_arprot,
+# input logic [3:0] s_axi4_arcache,
+# input logic [AXI_USER_WIDTH-1:0] s_axi4_aruser,
+#
+# output logic [AXI_ID_WIDTH-1:0] m_axi4_arid,
+# output logic [AXI_ADDR_WIDTH-1:0] m_axi4_araddr,
+# output logic m_axi4_arvalid,
+# input logic m_axi4_arready,
+# output logic [7:0] m_axi4_arlen,
+# output logic [2:0] m_axi4_arsize,
+# output logic [1:0] m_axi4_arburst,
+# output logic m_axi4_arlock,
+# output logic [2:0] m_axi4_arprot,
+# output logic [3:0] m_axi4_arcache,
+# output logic [AXI_USER_WIDTH-1:0] m_axi4_aruser
+# );
+#
+# logic l1_save;
+#
+# logic l2_sent;
+# logic l2_available_q;
+#
+# assign l1_save = l1_save_i & l2_available_q;
+#
+# assign l1_done_o = s_axi4_arvalid & s_axi4_arready ;
+#
+# // if 1: accept and forward a transaction translated by L1
+# // 2: drop or save request (if L2 slot not occupied already)
+# assign m_axi4_arvalid = (s_axi4_arvalid & l1_accept_i) |
+# l2_sending_o;
+# assign s_axi4_arready = (m_axi4_arvalid & m_axi4_arready & ~l2_sending_o) |
+# (s_axi4_arvalid & (l1_drop_i | l1_save));
+#
+# generate
+# if (ENABLE_L2TLB == 1) begin
+# logic [AXI_USER_WIDTH-1:0] l2_axi4_aruser ;
+# logic [3:0] l2_axi4_arcache ;
+# logic [3:0] l2_axi4_arregion;
+# logic [3:0] l2_axi4_arqos ;
+# logic [2:0] l2_axi4_arprot ;
+# logic l2_axi4_arlock ;
+# logic [1:0] l2_axi4_arburst ;
+# logic [2:0] l2_axi4_arsize ;
+# logic [7:0] l2_axi4_arlen ;
+# logic [AXI_ID_WIDTH-1:0] l2_axi4_arid ;
+#
+# assign m_axi4_aruser = l2_sending_o ? l2_axi4_aruser : s_axi4_aruser;
+# assign m_axi4_arcache = l2_sending_o ? l2_axi4_arcache : s_axi4_arcache;
+# assign m_axi4_arprot = l2_sending_o ? l2_axi4_arprot : s_axi4_arprot;
+# assign m_axi4_arlock = l2_sending_o ? l2_axi4_arlock : s_axi4_arlock;
+# assign m_axi4_arburst = l2_sending_o ? l2_axi4_arburst : s_axi4_arburst;
+# assign m_axi4_arsize = l2_sending_o ? l2_axi4_arsize : s_axi4_arsize;
+# assign m_axi4_arlen = l2_sending_o ? l2_axi4_arlen : s_axi4_arlen;
+# assign m_axi4_araddr = l2_sending_o ? l2_araddr_i : l1_araddr_i;
+# assign m_axi4_arid = l2_sending_o ? l2_axi4_arid : s_axi4_arid;
+#
+# // Buffer AXI signals in case of L1 miss
+# always @(posedge axi4_aclk or negedge axi4_arstn) begin
+# if (axi4_arstn == 1'b0) begin
+# l2_axi4_aruser <= 'b0;
+# l2_axi4_arcache <= 'b0;
+# l2_axi4_arprot <= 'b0;
+# l2_axi4_arlock <= 1'b0;
+# l2_axi4_arburst <= 'b0;
+# l2_axi4_arsize <= 'b0;
+# l2_axi4_arlen <= 'b0;
+# l2_axi4_arid <= 'b0;
+# end else if (l1_save) begin
+# l2_axi4_aruser <= s_axi4_aruser;
+# l2_axi4_arcache <= s_axi4_arcache;
+# l2_axi4_arprot <= s_axi4_arprot;
+# l2_axi4_arlock <= s_axi4_arlock;
+# l2_axi4_arburst <= s_axi4_arburst;
+# l2_axi4_arsize <= s_axi4_arsize;
+# l2_axi4_arlen <= s_axi4_arlen;
+# l2_axi4_arid <= s_axi4_arid;
+# end
+# end
+#
+# // signal that an l1_save_i can be accepted
+# always @(posedge axi4_aclk or negedge axi4_arstn) begin
+# if (axi4_arstn == 1'b0) begin
+# l2_available_q <= 1'b1;
+# end else if (l2_sent | l2_drop_i) begin
+# l2_available_q <= 1'b1;
+# end else if (l1_save) begin
+# l2_available_q <= 1'b0;
+# end
+# end
+#
+# assign l2_sending_o = l2_accept_i & ~l2_available_q;
+# assign l2_sent = l2_sending_o & m_axi4_arvalid & m_axi4_arready;
+#
+# // if 1: having sent out a transaction translated by L2
+# // 2: drop request (L2 slot is available again)
+# assign l2_done_o = l2_sent | l2_drop_i;
+#
+# end else begin // !`ifdef ENABLE_L2TLB
+# assign m_axi4_aruser = s_axi4_aruser;
+# assign m_axi4_arcache = s_axi4_arcache;
+# assign m_axi4_arprot = s_axi4_arprot;
+# assign m_axi4_arlock = s_axi4_arlock;
+# assign m_axi4_arburst = s_axi4_arburst;
+# assign m_axi4_arsize = s_axi4_arsize;
+# assign m_axi4_arlen = s_axi4_arlen;
+# assign m_axi4_araddr = l1_araddr_i;
+# assign m_axi4_arid = s_axi4_arid;
+#
+# assign l2_sending_o = 1'b0;
+# assign l2_available_q = 1'b0;
+# assign l2_done_o = 1'b0;
+# end // else: !if(ENABLE_L2TLB == 1)
+# endgenerate
+#
+# endmodule
+#
+#
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class axi4_aw_buffer(Elaboratable):
+
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.s_axi4_awid = Signal(AXI_ID_WIDTH) # input
+ self.s_axi4_awaddr = Signal(32) # input
+ self.s_axi4_awvalid = Signal() # input
+ self.s_axi4_awready = Signal() # output
+ self.s_axi4_awlen = Signal(8) # input
+ self.s_axi4_awsize = Signal(3) # input
+ self.s_axi4_awburst = Signal(2) # input
+ self.s_axi4_awlock = Signal() # input
+ self.s_axi4_awprot = Signal(3) # input
+ self.s_axi4_awcache = Signal(4) # input
+ self.s_axi4_awregion = Signal(4) # input
+ self.s_axi4_awqos = Signal(4) # input
+ self.s_axi4_awuser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_awid = Signal(AXI_ID_WIDTH) # output
+ self.m_axi4_awaddr = Signal(32) # output
+ self.m_axi4_awvalid = Signal() # output
+ self.m_axi4_awready = Signal() # input
+ self.m_axi4_awlen = Signal(8) # output
+ self.m_axi4_awsize = Signal(3) # output
+ self.m_axi4_awburst = Signal(2) # output
+ self.m_axi4_awlock = Signal() # output
+ self.m_axi4_awprot = Signal(3) # output
+ self.m_axi4_awcache = Signal(4) # output
+ self.m_axi4_awregion = Signal(4) # output
+ self.m_axi4_awqos = Signal(4) # output
+ self.m_axi4_awuser = Signal(AXI_USER_WIDTH) # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.None.eq(self.s_axi4_awcache)
+ m.d.comb += self.None.eq(self.s_axi4_awprot)
+ m.d.comb += self.None.eq(self.s_axi4_awlock)
+ m.d.comb += self.None.eq(self.s_axi4_awburst)
+ m.d.comb += self.None.eq(self.s_axi4_awsize)
+ m.d.comb += self.None.eq(self.s_axi4_awlen)
+ m.d.comb += self.None.eq(self.s_axi4_awaddr)
+ m.d.comb += self.None.eq(self.s_axi4_awregion)
+ m.d.comb += self.None.eq(self.s_axi4_awqos)
+ m.d.comb += self.None.eq(self.s_axi4_awid)
+ m.d.comb += self.None.eq(self.s_axi4_awuser)
+ m.d.comb += self.m_axi4_awcache.eq(self.None)
+ m.d.comb += self.m_axi4_awprot.eq(self.None)
+ m.d.comb += self.m_axi4_awlock.eq(self.None)
+ m.d.comb += self.m_axi4_awburst.eq(self.None)
+ m.d.comb += self.m_axi4_awsize.eq(self.None)
+ m.d.comb += self.m_axi4_awlen.eq(self.None)
+ m.d.comb += self.m_axi4_awaddr.eq(self.None)
+ m.d.comb += self.m_axi4_awregion.eq(self.None)
+ m.d.comb += self.m_axi4_awqos.eq(self.None)
+ m.d.comb += self.m_axi4_awid.eq(self.None)
+ m.d.comb += self.m_axi4_awuser.eq(self.None)
+ return m
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# module axi4_aw_buffer
+# #(
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+#
+# input logic [AXI_ID_WIDTH-1:0] s_axi4_awid,
+# input logic [31:0] s_axi4_awaddr,
+# input logic s_axi4_awvalid,
+# output logic s_axi4_awready,
+# input logic [7:0] s_axi4_awlen,
+# input logic [2:0] s_axi4_awsize,
+# input logic [1:0] s_axi4_awburst,
+# input logic s_axi4_awlock,
+# input logic [2:0] s_axi4_awprot,
+# input logic [3:0] s_axi4_awcache,
+# input logic [3:0] s_axi4_awregion,
+# input logic [3:0] s_axi4_awqos,
+# input logic [AXI_USER_WIDTH-1:0] s_axi4_awuser,
+#
+# output logic [AXI_ID_WIDTH-1:0] m_axi4_awid,
+# output logic [31:0] m_axi4_awaddr,
+# output logic m_axi4_awvalid,
+# input logic m_axi4_awready,
+# output logic [7:0] m_axi4_awlen,
+# output logic [2:0] m_axi4_awsize,
+# output logic [1:0] m_axi4_awburst,
+# output logic m_axi4_awlock,
+# output logic [2:0] m_axi4_awprot,
+# output logic [3:0] m_axi4_awcache,
+# output logic [3:0] m_axi4_awregion,
+# output logic [3:0] m_axi4_awqos,
+# output logic [AXI_USER_WIDTH-1:0] m_axi4_awuser
+# );
+#
+# wire [AXI_USER_WIDTH+AXI_ID_WIDTH+60:0] data_in;
+# wire [AXI_USER_WIDTH+AXI_ID_WIDTH+60:0] data_out;
+#
+# assign data_in [3:0] = s_axi4_awcache;
+# assign data_in [6:4] = s_axi4_awprot;
+# assign data_in [7] = s_axi4_awlock;
+# assign data_in [9:8] = s_axi4_awburst;
+# assign data_in [12:10] = s_axi4_awsize;
+# assign data_in [20:13] = s_axi4_awlen;
+# assign data_in [52:21] = s_axi4_awaddr;
+# assign data_in [56:53] = s_axi4_awregion;
+# assign data_in [60:57] = s_axi4_awqos;
+# assign data_in [60+AXI_ID_WIDTH:61] = s_axi4_awid;
+# assign data_in [60+AXI_ID_WIDTH+AXI_USER_WIDTH:61+AXI_ID_WIDTH] = s_axi4_awuser;
+#
+# assign m_axi4_awcache = data_out[3:0];
+# assign m_axi4_awprot = data_out[6:4];
+# assign m_axi4_awlock = data_out[7];
+# assign m_axi4_awburst = data_out[9:8];
+# assign m_axi4_awsize = data_out[12:10];
+# assign m_axi4_awlen = data_out[20:13];
+# assign m_axi4_awaddr = data_out[52:21];
+# assign m_axi4_awregion = data_out[56:53];
+# assign m_axi4_awqos = data_out[60:57];
+# assign m_axi4_awid = data_out[60+AXI_ID_WIDTH:61];
+# assign m_axi4_awuser = data_out[60+AXI_ID_WIDTH+AXI_USER_WIDTH:61+AXI_ID_WIDTH];
+#
+# axi_buffer_rab
+# #(
+# .DATA_WIDTH ( AXI_ID_WIDTH+AXI_USER_WIDTH+61 ),
+# .BUFFER_DEPTH ( 4 )
+# )
+# u_buffer
+# (
+# .clk ( axi4_aclk ),
+# .rstn ( axi4_arstn ),
+# .valid_out ( m_axi4_awvalid ),
+# .data_out ( data_out ),
+# .ready_in ( m_axi4_awready ),
+# .valid_in ( s_axi4_awvalid ),
+# .data_in ( data_in ),
+# .ready_out ( s_axi4_awready )
+# );
+# endmodule
+#
+#
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class axi4_aw_sender(Elaboratable):
+
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.l1_done_o = Signal() # output
+ self.l1_accept_i = Signal() # input
+ self.l1_drop_i = Signal() # input
+ self.l1_save_i = Signal() # input
+ self.l2_done_o = Signal() # output
+ self.l2_accept_i = Signal() # input
+ self.l2_drop_i = Signal() # input
+ self.l2_sending_o = Signal() # output
+ self.l1_awaddr_i = Signal(AXI_ADDR_WIDTH) # input
+ self.l2_awaddr_i = Signal(AXI_ADDR_WIDTH) # input
+ self.s_axi4_awid = Signal(AXI_ID_WIDTH) # input
+ self.s_axi4_awvalid = Signal() # input
+ self.s_axi4_awready = Signal() # output
+ self.s_axi4_awlen = Signal(8) # input
+ self.s_axi4_awsize = Signal(3) # input
+ self.s_axi4_awburst = Signal(2) # input
+ self.s_axi4_awlock = Signal() # input
+ self.s_axi4_awprot = Signal(3) # input
+ self.s_axi4_awcache = Signal(4) # input
+ self.s_axi4_awregion = Signal(4) # input
+ self.s_axi4_awqos = Signal(4) # input
+ self.s_axi4_awuser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_awid = Signal(AXI_ID_WIDTH) # output
+ self.m_axi4_awaddr = Signal(AXI_ADDR_WIDTH) # output
+ self.m_axi4_awvalid = Signal() # output
+ self.m_axi4_awready = Signal() # input
+ self.m_axi4_awlen = Signal(8) # output
+ self.m_axi4_awsize = Signal(3) # output
+ self.m_axi4_awburst = Signal(2) # output
+ self.m_axi4_awlock = Signal() # output
+ self.m_axi4_awprot = Signal(3) # output
+ self.m_axi4_awcache = Signal(4) # output
+ self.m_axi4_awregion = Signal(4) # output
+ self.m_axi4_awqos = Signal(4) # output
+ self.m_axi4_awuser = Signal(AXI_USER_WIDTH) # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.l1_save.eq(self.None)
+ m.d.comb += self.l1_done_o.eq(self.None)
+ m.d.comb += self.m_axi4_awvalid.eq(self.None)
+ m.d.comb += self.s_axi4_awready.eq(self.None)
+ m.d.comb += self.m_axi4_awuser.eq(self.None)
+ m.d.comb += self.m_axi4_awcache.eq(self.None)
+ m.d.comb += self.m_axi4_awregion.eq(self.None)
+ m.d.comb += self.m_axi4_awqos.eq(self.None)
+ m.d.comb += self.m_axi4_awprot.eq(self.None)
+ m.d.comb += self.m_axi4_awlock.eq(self.None)
+ m.d.comb += self.m_axi4_awburst.eq(self.None)
+ m.d.comb += self.m_axi4_awsize.eq(self.None)
+ m.d.comb += self.m_axi4_awlen.eq(self.None)
+ m.d.comb += self.m_axi4_awaddr.eq(self.None)
+ m.d.comb += self.m_axi4_awid.eq(self.None)
+ m.d.comb += self.l2_sending_o.eq(self.None)
+ m.d.comb += self.l2_sent.eq(self.None)
+ m.d.comb += self.l2_done_o.eq(self.None)
+ m.d.comb += self.m_axi4_awuser.eq(self.s_axi4_awuser)
+ m.d.comb += self.m_axi4_awcache.eq(self.s_axi4_awcache)
+ m.d.comb += self.m_axi4_awregion.eq(self.s_axi4_awregion)
+ m.d.comb += self.m_axi4_awqos.eq(self.s_axi4_awqos)
+ m.d.comb += self.m_axi4_awprot.eq(self.s_axi4_awprot)
+ m.d.comb += self.m_axi4_awlock.eq(self.s_axi4_awlock)
+ m.d.comb += self.m_axi4_awburst.eq(self.s_axi4_awburst)
+ m.d.comb += self.m_axi4_awsize.eq(self.s_axi4_awsize)
+ m.d.comb += self.m_axi4_awlen.eq(self.s_axi4_awlen)
+ m.d.comb += self.m_axi4_awaddr.eq(self.l1_awaddr_i)
+ m.d.comb += self.m_axi4_awid.eq(self.s_axi4_awid)
+ m.d.comb += self.l2_sending_o.eq(self.1: 'b0)
+ m.d.comb += self.l2_available_q.eq(self.1: 'b0)
+ m.d.comb += self.l2_done_o.eq(self.1: 'b0)
+ return m
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# module axi4_aw_sender
+# #(
+# parameter AXI_ADDR_WIDTH = 40,
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4,
+# parameter ENABLE_L2TLB = 0
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+#
+# output logic l1_done_o,
+# input logic l1_accept_i,
+# input logic l1_drop_i,
+# input logic l1_save_i,
+#
+# output logic l2_done_o,
+# input logic l2_accept_i,
+# input logic l2_drop_i,
+# output logic l2_sending_o,
+#
+# input logic [AXI_ADDR_WIDTH-1:0] l1_awaddr_i,
+# input logic [AXI_ADDR_WIDTH-1:0] l2_awaddr_i,
+#
+# input logic [AXI_ID_WIDTH-1:0] s_axi4_awid,
+# input logic s_axi4_awvalid,
+# output logic s_axi4_awready,
+# input logic [7:0] s_axi4_awlen,
+# input logic [2:0] s_axi4_awsize,
+# input logic [1:0] s_axi4_awburst,
+# input logic s_axi4_awlock,
+# input logic [2:0] s_axi4_awprot,
+# input logic [3:0] s_axi4_awcache,
+# input logic [3:0] s_axi4_awregion,
+# input logic [3:0] s_axi4_awqos,
+# input logic [AXI_USER_WIDTH-1:0] s_axi4_awuser,
+#
+# output logic [AXI_ID_WIDTH-1:0] m_axi4_awid,
+# output logic [AXI_ADDR_WIDTH-1:0] m_axi4_awaddr,
+# output logic m_axi4_awvalid,
+# input logic m_axi4_awready,
+# output logic [7:0] m_axi4_awlen,
+# output logic [2:0] m_axi4_awsize,
+# output logic [1:0] m_axi4_awburst,
+# output logic m_axi4_awlock,
+# output logic [2:0] m_axi4_awprot,
+# output logic [3:0] m_axi4_awcache,
+# output logic [3:0] m_axi4_awregion,
+# output logic [3:0] m_axi4_awqos,
+# output logic [AXI_USER_WIDTH-1:0] m_axi4_awuser
+# );
+#
+# logic l1_save;
+#
+# logic l2_sent;
+# logic l2_available_q;
+#
+# assign l1_save = l1_save_i & l2_available_q;
+#
+# assign l1_done_o = s_axi4_awvalid & s_axi4_awready ;
+#
+# // if 1: accept and forward a transaction translated by L1
+# // 2: drop or save request (if L2 slot not occupied already)
+# assign m_axi4_awvalid = (s_axi4_awvalid & l1_accept_i) |
+# l2_sending_o;
+# assign s_axi4_awready = (m_axi4_awvalid & m_axi4_awready & ~l2_sending_o) |
+# (s_axi4_awvalid & (l1_drop_i | l1_save));
+#
+# generate
+# if (ENABLE_L2TLB == 1) begin
+# logic [AXI_USER_WIDTH-1:0] l2_axi4_awuser ;
+# logic [3:0] l2_axi4_awcache ;
+# logic [3:0] l2_axi4_awregion;
+# logic [3:0] l2_axi4_awqos ;
+# logic [2:0] l2_axi4_awprot ;
+# logic l2_axi4_awlock ;
+# logic [1:0] l2_axi4_awburst ;
+# logic [2:0] l2_axi4_awsize ;
+# logic [7:0] l2_axi4_awlen ;
+# logic [AXI_ID_WIDTH-1:0] l2_axi4_awid ;
+#
+# assign m_axi4_awuser = l2_sending_o ? l2_axi4_awuser : s_axi4_awuser;
+# assign m_axi4_awcache = l2_sending_o ? l2_axi4_awcache : s_axi4_awcache;
+# assign m_axi4_awregion = l2_sending_o ? l2_axi4_awregion : s_axi4_awregion;
+# assign m_axi4_awqos = l2_sending_o ? l2_axi4_awqos : s_axi4_awqos;
+# assign m_axi4_awprot = l2_sending_o ? l2_axi4_awprot : s_axi4_awprot;
+# assign m_axi4_awlock = l2_sending_o ? l2_axi4_awlock : s_axi4_awlock;
+# assign m_axi4_awburst = l2_sending_o ? l2_axi4_awburst : s_axi4_awburst;
+# assign m_axi4_awsize = l2_sending_o ? l2_axi4_awsize : s_axi4_awsize;
+# assign m_axi4_awlen = l2_sending_o ? l2_axi4_awlen : s_axi4_awlen;
+# assign m_axi4_awaddr = l2_sending_o ? l2_awaddr_i : l1_awaddr_i;
+# assign m_axi4_awid = l2_sending_o ? l2_axi4_awid : s_axi4_awid;
+#
+# // buffer AXI signals in case of L1 miss
+# always @(posedge axi4_aclk or negedge axi4_arstn) begin
+# if (axi4_arstn == 1'b0) begin
+# l2_axi4_awuser <= 'b0;
+# l2_axi4_awcache <= 'b0;
+# l2_axi4_awregion <= 'b0;
+# l2_axi4_awqos <= 'b0;
+# l2_axi4_awprot <= 'b0;
+# l2_axi4_awlock <= 1'b0;
+# l2_axi4_awburst <= 'b0;
+# l2_axi4_awsize <= 'b0;
+# l2_axi4_awlen <= 'b0;
+# l2_axi4_awid <= 'b0;
+# end else if (l1_save) begin
+# l2_axi4_awuser <= s_axi4_awuser;
+# l2_axi4_awcache <= s_axi4_awcache;
+# l2_axi4_awregion <= s_axi4_awregion;
+# l2_axi4_awqos <= s_axi4_awqos;
+# l2_axi4_awprot <= s_axi4_awprot;
+# l2_axi4_awlock <= s_axi4_awlock;
+# l2_axi4_awburst <= s_axi4_awburst;
+# l2_axi4_awsize <= s_axi4_awsize;
+# l2_axi4_awlen <= s_axi4_awlen;
+# l2_axi4_awid <= s_axi4_awid;
+# end
+# end
+#
+# // signal that an l1_save_i can be accepted
+# always @(posedge axi4_aclk or negedge axi4_arstn) begin
+# if (axi4_arstn == 1'b0) begin
+# l2_available_q <= 1'b1;
+# end else if (l2_sent | l2_drop_i) begin
+# l2_available_q <= 1'b1;
+# end else if (l1_save) begin
+# l2_available_q <= 1'b0;
+# end
+# end
+#
+# assign l2_sending_o = l2_accept_i & ~l2_available_q;
+# assign l2_sent = l2_sending_o & m_axi4_awvalid & m_axi4_awready;
+#
+# // if 1: having sent out a transaction translated by L2
+# // 2: drop request (L2 slot is available again)
+# assign l2_done_o = l2_sent | l2_drop_i;
+#
+# end else begin // !`ifdef ENABLE_L2TLB
+# assign m_axi4_awuser = s_axi4_awuser;
+# assign m_axi4_awcache = s_axi4_awcache;
+# assign m_axi4_awregion = s_axi4_awregion;
+# assign m_axi4_awqos = s_axi4_awqos;
+# assign m_axi4_awprot = s_axi4_awprot;
+# assign m_axi4_awlock = s_axi4_awlock;
+# assign m_axi4_awburst = s_axi4_awburst;
+# assign m_axi4_awsize = s_axi4_awsize;
+# assign m_axi4_awlen = s_axi4_awlen;
+# assign m_axi4_awaddr = l1_awaddr_i;
+# assign m_axi4_awid = s_axi4_awid;
+#
+# assign l2_sending_o = 1'b0;
+# assign l2_available_q = 1'b0;
+# assign l2_done_o = 1'b0;
+# end // !`ifdef ENABLE_L2TLB
+# endgenerate
+#
+# endmodule
+#
+#
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class axi4_b_buffer(Elaboratable):
+
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.s_axi4_bid = Signal(AXI_ID_WIDTH) # output
+ self.s_axi4_bresp = Signal(2) # output
+ self.s_axi4_bvalid = Signal() # output
+ self.s_axi4_buser = Signal(AXI_USER_WIDTH) # output
+ self.s_axi4_bready = Signal() # input
+ self.m_axi4_bid = Signal(AXI_ID_WIDTH) # input
+ self.m_axi4_bresp = Signal(2) # input
+ self.m_axi4_bvalid = Signal() # input
+ self.m_axi4_buser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_bready = Signal() # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.None.eq(self.m_axi4_bresp)
+ m.d.comb += self.None.eq(self.m_axi4_bid)
+ m.d.comb += self.None.eq(self.m_axi4_buser)
+ m.d.comb += self.s_axi4_buser.eq(self.None)
+ m.d.comb += self.s_axi4_bid.eq(self.None)
+ m.d.comb += self.s_axi4_bresp.eq(self.None)
+ return m
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# module axi4_b_buffer
+# #(
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+#
+# output logic [AXI_ID_WIDTH-1:0] s_axi4_bid,
+# output logic [1:0] s_axi4_bresp,
+# output logic s_axi4_bvalid,
+# output logic [AXI_USER_WIDTH-1:0] s_axi4_buser,
+# input logic s_axi4_bready,
+#
+# input logic [AXI_ID_WIDTH-1:0] m_axi4_bid,
+# input logic [1:0] m_axi4_bresp,
+# input logic m_axi4_bvalid,
+# input logic [AXI_USER_WIDTH-1:0] m_axi4_buser,
+# output logic m_axi4_bready
+# );
+#
+# wire [AXI_ID_WIDTH+AXI_USER_WIDTH+1:0] data_in;
+# wire [AXI_ID_WIDTH+AXI_USER_WIDTH+1:0] data_out;
+#
+# assign data_in [1:0] = m_axi4_bresp;
+# assign data_in [AXI_ID_WIDTH+1:2] = m_axi4_bid;
+# assign data_in[AXI_ID_WIDTH+AXI_USER_WIDTH+1:AXI_ID_WIDTH+2] = m_axi4_buser;
+#
+# assign s_axi4_buser = data_out[AXI_ID_WIDTH+AXI_USER_WIDTH+1:AXI_ID_WIDTH+2];
+# assign s_axi4_bid = data_out[AXI_ID_WIDTH+1:2];
+# assign s_axi4_bresp = data_out[1:0];
+#
+# axi_buffer_rab
+# #(
+# .DATA_WIDTH ( AXI_ID_WIDTH+AXI_USER_WIDTH+2 ),
+# .BUFFER_DEPTH ( 4 )
+# )
+# u_buffer
+# (
+# .clk ( axi4_aclk ),
+# .rstn ( axi4_arstn ),
+# .valid_out( s_axi4_bvalid ),
+# .data_out ( data_out ),
+# .ready_in ( s_axi4_bready ),
+# .valid_in ( m_axi4_bvalid ),
+# .data_in ( data_in ),
+# .ready_out( m_axi4_bready )
+# );
+#
+# endmodule
+#
+#
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class axi4_b_sender(Elaboratable):
+
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.drop_i = Signal() # input
+ self.done_o = Signal() # output
+ self.id_i = Signal(AXI_ID_WIDTH) # input
+ self.prefetch_i = Signal() # input
+ self.hit_i = Signal() # input
+ self.s_axi4_bid = Signal(AXI_ID_WIDTH) # output
+ self.s_axi4_bresp = Signal(2) # output
+ self.s_axi4_bvalid = Signal() # output
+ self.s_axi4_buser = Signal(AXI_USER_WIDTH) # output
+ self.s_axi4_bready = Signal() # input
+ self.m_axi4_bid = Signal(AXI_ID_WIDTH) # input
+ self.m_axi4_bresp = Signal(2) # input
+ self.m_axi4_bvalid = Signal() # input
+ self.m_axi4_buser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_bready = Signal() # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.fifo_push.eq(self.None)
+ m.d.comb += self.done_o.eq(self.fifo_push)
+ m.d.comb += self.fifo_pop.eq(self.None)
+ m.d.comb += self.s_axi4_buser.eq(self.None)
+ m.d.comb += self.s_axi4_bid.eq(self.None)
+ m.d.comb += self.s_axi4_bresp.eq(self.None)
+ m.d.comb += self.s_axi4_bvalid.eq(self.None)
+ m.d.comb += self.m_axi4_bready.eq(self.None)
+ return m
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# module axi4_b_sender
+# #(
+# parameter AXI_ID_WIDTH = 10,
+# parameter AXI_USER_WIDTH = 4
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+#
+# input logic drop_i,
+# output logic done_o,
+# input logic [AXI_ID_WIDTH-1:0] id_i,
+# input logic prefetch_i,
+# input logic hit_i,
+#
+# output logic [AXI_ID_WIDTH-1:0] s_axi4_bid,
+# output logic [1:0] s_axi4_bresp,
+# output logic s_axi4_bvalid,
+# output logic [AXI_USER_WIDTH-1:0] s_axi4_buser,
+# input logic s_axi4_bready,
+#
+# input logic [AXI_ID_WIDTH-1:0] m_axi4_bid,
+# input logic [1:0] m_axi4_bresp,
+# input logic m_axi4_bvalid,
+# input logic [AXI_USER_WIDTH-1:0] m_axi4_buser,
+# output logic m_axi4_bready
+# );
+#
+# logic fifo_valid;
+# logic fifo_pop;
+# logic fifo_push;
+# logic fifo_ready;
+# logic [AXI_ID_WIDTH-1:0] id;
+# logic prefetch;
+# logic hit;
+#
+# logic dropping;
+#
+# axi_buffer_rab
+# #(
+# .DATA_WIDTH ( 2+AXI_ID_WIDTH ),
+# .BUFFER_DEPTH ( 4 )
+# )
+# u_fifo
+# (
+# .clk ( axi4_aclk ),
+# .rstn ( axi4_arstn ),
+# // Pop
+# .data_out ( {prefetch, hit, id} ),
+# .valid_out ( fifo_valid ),
+# .ready_in ( fifo_pop ),
+# // Push
+# .valid_in ( fifo_push ),
+# .data_in ( {prefetch_i, hit_i, id_i} ),
+# .ready_out ( fifo_ready )
+# );
+#
+# assign fifo_push = drop_i & fifo_ready;
+# assign done_o = fifo_push;
+#
+# assign fifo_pop = dropping & s_axi4_bready;
+#
+# always @ (posedge axi4_aclk or negedge axi4_arstn) begin
+# if (axi4_arstn == 1'b0) begin
+# dropping <= 1'b0;
+# end else begin
+# if (fifo_valid && ~dropping)
+# dropping <= 1'b1;
+# else if (fifo_pop)
+# dropping <= 1'b0;
+# end
+# end
+#
+# assign s_axi4_buser = dropping ? {AXI_USER_WIDTH{1'b0}} : m_axi4_buser;
+# assign s_axi4_bid = dropping ? id : m_axi4_bid;
+#
+# assign s_axi4_bresp = (dropping & prefetch & hit) ? 2'b00 : // prefetch hit, mutli, prot
+# (dropping & prefetch ) ? 2'b10 : // prefetch miss
+# (dropping & hit) ? 2'b10 : // non-prefetch multi, prot
+# (dropping ) ? 2'b10 : // non-prefetch miss
+# m_axi4_bresp;
+#
+# assign s_axi4_bvalid = dropping | m_axi4_bvalid;
+# assign m_axi4_bready = ~dropping & s_axi4_bready;
+#
+# endmodule
+#
+#
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class axi4_r_buffer(Elaboratable):
+
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.s_axi4_rid = Signal(AXI_ID_WIDTH) # output
+ self.s_axi4_rresp = Signal(2) # output
+ self.s_axi4_rdata = Signal(AXI_DATA_WIDTH) # output
+ self.s_axi4_rlast = Signal() # output
+ self.s_axi4_rvalid = Signal() # output
+ self.s_axi4_ruser = Signal(AXI_USER_WIDTH) # output
+ self.s_axi4_rready = Signal() # input
+ self.m_axi4_rid = Signal(AXI_ID_WIDTH) # input
+ self.m_axi4_rresp = Signal(2) # input
+ self.m_axi4_rdata = Signal(AXI_DATA_WIDTH) # input
+ self.m_axi4_rlast = Signal() # input
+ self.m_axi4_rvalid = Signal() # input
+ self.m_axi4_ruser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_rready = Signal() # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.None.eq(self.m_axi4_rresp)
+ m.d.comb += self.None.eq(self.m_axi4_rlast)
+ m.d.comb += self.None.eq(self.m_axi4_rid)
+ m.d.comb += self.None.eq(self.m_axi4_rdata)
+ m.d.comb += self.None.eq(self.m_axi4_ruser)
+ m.d.comb += self.s_axi4_rresp.eq(self.None)
+ m.d.comb += self.s_axi4_rlast.eq(self.None)
+ m.d.comb += self.s_axi4_rid.eq(self.None)
+ m.d.comb += self.s_axi4_rdata.eq(self.None)
+ m.d.comb += self.s_axi4_ruser.eq(self.None)
+ return m
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# module axi4_r_buffer
+# #(
+# parameter AXI_DATA_WIDTH = 32,
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+#
+# output logic [AXI_ID_WIDTH-1:0] s_axi4_rid,
+# output logic [1:0] s_axi4_rresp,
+# output logic [AXI_DATA_WIDTH-1:0] s_axi4_rdata,
+# output logic s_axi4_rlast,
+# output logic s_axi4_rvalid,
+# output logic [AXI_USER_WIDTH-1:0] s_axi4_ruser,
+# input logic s_axi4_rready,
+#
+# input logic [AXI_ID_WIDTH-1:0] m_axi4_rid,
+# input logic [1:0] m_axi4_rresp,
+# input logic [AXI_DATA_WIDTH-1:0] m_axi4_rdata,
+# input logic m_axi4_rlast,
+# input logic m_axi4_rvalid,
+# input logic [AXI_USER_WIDTH-1:0] m_axi4_ruser,
+# output logic m_axi4_rready
+# );
+#
+# wire [AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3-1:0] data_in;
+# wire [AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3-1:0] data_out;
+#
+# localparam ID_START = 3;
+# localparam ID_END = AXI_ID_WIDTH-1 + ID_START;
+# localparam DATA_START = ID_END + 1;
+# localparam DATA_END = AXI_DATA_WIDTH-1 + DATA_START;
+# localparam USER_START = DATA_END + 1;
+# localparam USER_END = AXI_USER_WIDTH-1 + USER_START;
+#
+# assign data_in [1:0] = m_axi4_rresp;
+# assign data_in [2] = m_axi4_rlast;
+# assign data_in [ID_END:ID_START] = m_axi4_rid;
+# assign data_in[DATA_END:DATA_START] = m_axi4_rdata;
+# assign data_in[USER_END:USER_START] = m_axi4_ruser;
+#
+# assign s_axi4_rresp = data_out [1:0];
+# assign s_axi4_rlast = data_out [2];
+# assign s_axi4_rid = data_out [ID_END:ID_START];
+# assign s_axi4_rdata = data_out[DATA_END:DATA_START];
+# assign s_axi4_ruser = data_out[USER_END:USER_START];
+#
+# axi_buffer_rab
+# #(
+# .DATA_WIDTH ( AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3 ),
+# .BUFFER_DEPTH ( 4 )
+# )
+# u_buffer
+# (
+# .clk ( axi4_aclk ),
+# .rstn ( axi4_arstn ),
+# // Pop
+# .valid_out ( s_axi4_rvalid ),
+# .data_out ( data_out ),
+# .ready_in ( s_axi4_rready ),
+# // Push
+# .valid_in ( m_axi4_rvalid ),
+# .data_in ( data_in ),
+# .ready_out ( m_axi4_rready )
+# );
+#
+# endmodule
+#
+#
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class axi4_r_sender(Elaboratable):
+
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.drop_i = Signal() # input
+ self.drop_len_i = Signal(8) # input
+ self.done_o = Signal() # output
+ self.id_i = Signal(AXI_ID_WIDTH) # input
+ self.prefetch_i = Signal() # input
+ self.hit_i = Signal() # input
+ self.s_axi4_rid = Signal(AXI_ID_WIDTH) # output
+ self.s_axi4_rresp = Signal(2) # output
+ self.s_axi4_rdata = Signal(AXI_DATA_WIDTH) # output
+ self.s_axi4_rlast = Signal() # output
+ self.s_axi4_rvalid = Signal() # output
+ self.s_axi4_ruser = Signal(AXI_USER_WIDTH) # output
+ self.s_axi4_rready = Signal() # input
+ self.m_axi4_rid = Signal(AXI_ID_WIDTH) # input
+ self.m_axi4_rresp = Signal(2) # input
+ self.m_axi4_rdata = Signal(AXI_DATA_WIDTH) # input
+ self.m_axi4_rlast = Signal() # input
+ self.m_axi4_rvalid = Signal() # input
+ self.m_axi4_ruser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_rready = Signal() # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.fifo_push.eq(self.None)
+ m.d.comb += self.done_o.eq(self.fifo_push)
+ m.d.comb += self.s_axi4_rdata.eq(self.m_axi4_rdata)
+ m.d.comb += self.s_axi4_ruser.eq(self.None)
+ m.d.comb += self.s_axi4_rid.eq(self.None)
+ m.d.comb += self.s_axi4_rresp.eq(self.None)
+ m.d.comb += self.s_axi4_rvalid.eq(self.None)
+ m.d.comb += self.m_axi4_rready.eq(self.None)
+ return m
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# //import CfMath::log2;
+#
+# module axi4_r_sender
+# #(
+# parameter AXI_DATA_WIDTH = 32,
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+#
+# input logic drop_i,
+# input logic [7:0] drop_len_i,
+# output logic done_o,
+# input logic [AXI_ID_WIDTH-1:0] id_i,
+# input logic prefetch_i,
+# input logic hit_i,
+#
+# output logic [AXI_ID_WIDTH-1:0] s_axi4_rid,
+# output logic [1:0] s_axi4_rresp,
+# output logic [AXI_DATA_WIDTH-1:0] s_axi4_rdata,
+# output logic s_axi4_rlast,
+# output logic s_axi4_rvalid,
+# output logic [AXI_USER_WIDTH-1:0] s_axi4_ruser,
+# input logic s_axi4_rready,
+#
+# input logic [AXI_ID_WIDTH-1:0] m_axi4_rid,
+# input logic [1:0] m_axi4_rresp,
+# input logic [AXI_DATA_WIDTH-1:0] m_axi4_rdata,
+# input logic m_axi4_rlast,
+# input logic m_axi4_rvalid,
+# input logic [AXI_USER_WIDTH-1:0] m_axi4_ruser,
+# output logic m_axi4_rready
+# );
+#
+# localparam BUFFER_DEPTH = 16;
+#
+# logic fifo_valid;
+# logic fifo_pop;
+# logic fifo_push;
+# logic fifo_ready;
+# logic [AXI_ID_WIDTH-1:0] id;
+# logic [7:0] len;
+# logic prefetch;
+# logic hit;
+#
+# logic dropping;
+#
+# enum logic [1:0] { FORWARDING, DROPPING }
+# state_d, state_q;
+# logic burst_ongoing_d, burst_ongoing_q;
+# logic [7:0] drop_cnt_d, drop_cnt_q;
+#
+# axi_buffer_rab
+# #(
+# .DATA_WIDTH ( 2+AXI_ID_WIDTH+8 ),
+# .BUFFER_DEPTH ( BUFFER_DEPTH )
+# )
+# u_fifo
+# (
+# .clk ( axi4_aclk ),
+# .rstn ( axi4_arstn ),
+# // Pop
+# .data_out ( {prefetch, hit, id, len} ),
+# .valid_out ( fifo_valid ),
+# .ready_in ( fifo_pop ),
+# // Push
+# .valid_in ( fifo_push ),
+# .data_in ( {prefetch_i, hit_i, id_i, drop_len_i} ),
+# .ready_out ( fifo_ready )
+# );
+#
+# assign fifo_push = drop_i & fifo_ready;
+# assign done_o = fifo_push;
+#
+# always_comb begin
+# burst_ongoing_d = burst_ongoing_q;
+# drop_cnt_d = drop_cnt_q;
+# dropping = 1'b0;
+# s_axi4_rlast = 1'b0;
+# fifo_pop = 1'b0;
+# state_d = state_q;
+#
+# case (state_q)
+# FORWARDING: begin
+# s_axi4_rlast = m_axi4_rlast;
+# // Remember whether there is currently a burst ongoing.
+# if (m_axi4_rvalid && m_axi4_rready) begin
+# if (m_axi4_rlast) begin
+# burst_ongoing_d = 1'b0;
+# end else begin
+# burst_ongoing_d = 1'b1;
+# end
+# end
+# // If there is no burst ongoing and the FIFO has a drop request ready, process it.
+# if (!burst_ongoing_d && fifo_valid) begin
+# drop_cnt_d = len;
+# state_d = DROPPING;
+# end
+# end
+#
+# DROPPING: begin
+# dropping = 1'b1;
+# s_axi4_rlast = (drop_cnt_q == '0);
+# // Handshake on slave interface
+# if (s_axi4_rready) begin
+# drop_cnt_d -= 1;
+# if (drop_cnt_q == '0) begin
+# drop_cnt_d = '0;
+# fifo_pop = 1'b1;
+# state_d = FORWARDING;
+# end
+# end
+# end
+#
+# default: begin
+# state_d = FORWARDING;
+# end
+# endcase
+# end
+#
+# assign s_axi4_rdata = m_axi4_rdata;
+#
+# assign s_axi4_ruser = dropping ? {AXI_USER_WIDTH{1'b0}} : m_axi4_ruser;
+# assign s_axi4_rid = dropping ? id : m_axi4_rid;
+#
+# assign s_axi4_rresp = (dropping & prefetch & hit) ? 2'b00 : // prefetch hit, mutli, prot
+# (dropping & prefetch ) ? 2'b10 : // prefetch miss
+# (dropping & hit) ? 2'b10 : // non-prefetch multi, prot
+# (dropping ) ? 2'b10 : // non-prefetch miss
+# m_axi4_rresp;
+#
+# assign s_axi4_rvalid = dropping | m_axi4_rvalid;
+# assign m_axi4_rready = ~dropping & s_axi4_rready;
+#
+# always_ff @(posedge axi4_aclk, negedge axi4_arstn) begin
+# if (axi4_arstn == 1'b0) begin
+# burst_ongoing_q <= 1'b0;
+# drop_cnt_q <= 'b0;
+# state_q <= FORWARDING;
+# end else begin
+# burst_ongoing_q <= burst_ongoing_d;
+# drop_cnt_q <= drop_cnt_d;
+# state_q <= state_d;
+# end
+# end
+#
+# endmodule
+#
+#
+#
+#
--- /dev/null
+# this file has been generated by sv2nmigen
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class axi4_w_buffer(Elaboratable):
+
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.l1_done_o = Signal() # output
+ self.l1_accept_i = Signal() # input
+ self.l1_save_i = Signal() # input
+ self.l1_drop_i = Signal() # input
+ self.l1_master_i = Signal() # input
+ self.l1_id_i = Signal(AXI_ID_WIDTH) # input
+ self.l1_len_i = Signal(8) # input
+ self.l1_prefetch_i = Signal() # input
+ self.l1_hit_i = Signal() # input
+ self.l2_done_o = Signal() # output
+ self.l2_accept_i = Signal() # input
+ self.l2_drop_i = Signal() # input
+ self.l2_master_i = Signal() # input
+ self.l2_id_i = Signal(AXI_ID_WIDTH) # input
+ self.l2_len_i = Signal(8) # input
+ self.l2_prefetch_i = Signal() # input
+ self.l2_hit_i = Signal() # input
+ self.master_select_o = Signal() # output
+ self.input_stall_o = Signal() # output
+ self.output_stall_o = Signal() # output
+ self.b_drop_o = Signal() # output
+ self.b_done_i = Signal() # input
+ self.id_o = Signal(AXI_ID_WIDTH) # output
+ self.prefetch_o = Signal() # output
+ self.hit_o = Signal() # output
+ self.s_axi4_wdata = Signal(AXI_DATA_WIDTH) # input
+ self.s_axi4_wvalid = Signal() # input
+ self.s_axi4_wready = Signal() # output
+ self.s_axi4_wstrb = Signal(1+ERROR p_expression_25) # input
+ self.s_axi4_wlast = Signal() # input
+ self.s_axi4_wuser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_wdata = Signal(AXI_DATA_WIDTH) # output
+ self.m_axi4_wvalid = Signal() # output
+ self.m_axi4_wready = Signal() # input
+ self.m_axi4_wstrb = Signal(1+ERROR p_expression_25) # output
+ self.m_axi4_wlast = Signal() # output
+ self.m_axi4_wuser = Signal(AXI_USER_WIDTH) # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+
+
+#
+# //import CfMath::log2;
+#
+# module axi4_w_buffer
+# #(
+# parameter AXI_DATA_WIDTH = 32,
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4,
+# parameter ENABLE_L2TLB = 0,
+# parameter HUM_BUFFER_DEPTH = 16
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+#
+# // L1 & L2 interfaces
+# output logic l1_done_o,
+# input logic l1_accept_i,
+# input logic l1_save_i,
+# input logic l1_drop_i,
+# input logic l1_master_i,
+# input logic [AXI_ID_WIDTH-1:0] l1_id_i,
+# input logic [7:0] l1_len_i,
+# input logic l1_prefetch_i,
+# input logic l1_hit_i,
+#
+# output logic l2_done_o,
+# input logic l2_accept_i,
+# input logic l2_drop_i,
+# input logic l2_master_i,
+# input logic [AXI_ID_WIDTH-1:0] l2_id_i,
+# input logic [7:0] l2_len_i,
+# input logic l2_prefetch_i,
+# input logic l2_hit_i,
+#
+# output logic master_select_o,
+# output logic input_stall_o,
+# output logic output_stall_o,
+#
+# // B sender interface
+# output logic b_drop_o,
+# input logic b_done_i,
+# output logic [AXI_ID_WIDTH-1:0] id_o,
+# output logic prefetch_o,
+# output logic hit_o,
+#
+# // AXI W channel interfaces
+# input logic [AXI_DATA_WIDTH-1:0] s_axi4_wdata,
+# input logic s_axi4_wvalid,
+# output logic s_axi4_wready,
+# input logic [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb,
+# input logic s_axi4_wlast,
+# input logic [AXI_USER_WIDTH-1:0] s_axi4_wuser,
+#
+# output logic [AXI_DATA_WIDTH-1:0] m_axi4_wdata,
+# output logic m_axi4_wvalid,
+# input logic m_axi4_wready,
+# output logic [AXI_DATA_WIDTH/8-1:0] m_axi4_wstrb,
+# output logic m_axi4_wlast,
+# output logic [AXI_USER_WIDTH-1:0] m_axi4_wuser
+# );
+#
+"""
+
+ localparam BUFFER_WIDTH = AXI_DATA_WIDTH+AXI_USER_WIDTH+AXI_DATA_WIDTH/8+1;
+
+ localparam INPUT_BUFFER_DEPTH = 4;
+ localparam L1_FIFO_DEPTH = 8;
+ localparam L2_FIFO_DEPTH = 4;
+
+ logic [AXI_DATA_WIDTH-1:0] axi4_wdata;
+ logic axi4_wvalid;
+ logic axi4_wready;
+ logic [AXI_DATA_WIDTH/8-1:0] axi4_wstrb;
+ logic axi4_wlast;
+ logic [AXI_USER_WIDTH-1:0] axi4_wuser;
+
+ logic l1_fifo_valid_out;
+ logic l1_fifo_ready_in;
+ logic l1_fifo_valid_in;
+ logic l1_fifo_ready_out;
+
+ logic l1_req;
+ logic l1_accept_cur, l1_save_cur, l1_drop_cur;
+ logic l1_master_cur;
+ logic [AXI_ID_WIDTH-1:0] l1_id_cur;
+ logic [7:0] l1_len_cur;
+ logic l1_hit_cur, l1_prefetch_cur;
+ logic l1_save_in, l1_save_out;
+ logic [log2(L1_FIFO_DEPTH)-1:0] n_l1_save_SP;
+
+ logic l2_fifo_valid_out;
+ logic l2_fifo_ready_in;
+ logic l2_fifo_valid_in;
+ logic l2_fifo_ready_out;
+
+ logic l2_req;
+ logic l2_accept_cur, l2_drop_cur;
+ logic l2_master_cur;
+ logic [AXI_ID_WIDTH-1:0] l2_id_cur;
+ logic [7:0] l2_len_cur;
+ logic l2_hit_cur, l2_prefetch_cur;
+
+ logic fifo_select, fifo_select_SN, fifo_select_SP;
+ logic w_done;
+ logic b_drop_set;
+
+ // HUM buffer signals
+ logic hum_buf_ready_out;
+ logic hum_buf_valid_in;
+ logic hum_buf_ready_in;
+ logic hum_buf_valid_out;
+ logic hum_buf_underfull;
+
+ logic [AXI_DATA_WIDTH-1:0] hum_buf_wdata;
+ logic [AXI_DATA_WIDTH/8-1:0] hum_buf_wstrb;
+ logic hum_buf_wlast;
+ logic [AXI_USER_WIDTH-1:0] hum_buf_wuser;
+
+ logic hum_buf_drop_req_SN, hum_buf_drop_req_SP;
+ logic [7:0] hum_buf_drop_len_SN, hum_buf_drop_len_SP;
+ logic hum_buf_almost_full;
+
+ logic stop_store;
+ logic wlast_in, wlast_out;
+ logic signed [3:0] n_wlast_SN, n_wlast_SP;
+ logic block_forwarding;
+
+ // Search FSM
+ typedef enum logic [3:0] {STORE, BYPASS,
+ WAIT_L1_BYPASS_YES, WAIT_L2_BYPASS_YES,
+ WAIT_L1_BYPASS_NO, WAIT_L2_BYPASS_NO,
+ FLUSH, DISCARD,
+ DISCARD_FINISH}
+ hum_buf_state_t;
+ hum_buf_state_t hum_buf_SP; // Present state
+ hum_buf_state_tbg hum_buf_SN; // Next State
+
+ axi_buffer_rab
+ #(
+ .DATA_WIDTH ( BUFFER_WIDTH ),
+ .BUFFER_DEPTH ( INPUT_BUFFER_DEPTH )
+ )
+ u_input_buf
+ (
+ .clk ( axi4_aclk ),
+ .rstn ( axi4_arstn ),
+ // Push
+ .data_in ( {s_axi4_wuser, s_axi4_wstrb, s_axi4_wdata, s_axi4_wlast} ),
+ .valid_in ( s_axi4_wvalid ),
+ .ready_out ( s_axi4_wready ),
+ // Pop
+ .data_out ( {axi4_wuser, axi4_wstrb, axi4_wdata, axi4_wlast} ),
+ .valid_out ( axi4_wvalid ),
+ .ready_in ( axi4_wready )
+ );
+
+ axi_buffer_rab
+ #(
+ .DATA_WIDTH ( 2+AXI_ID_WIDTH+8+4 ),
+ .BUFFER_DEPTH ( L1_FIFO_DEPTH )
+ )
+ u_l1_fifo
+ (
+ .clk ( axi4_aclk ),
+ .rstn ( axi4_arstn ),
+ // Push
+ .data_in ( {l1_prefetch_i, l1_hit_i, l1_id_i, l1_len_i, l1_master_i, l1_accept_i, l1_save_i, l1_drop_i} ),
+ .valid_in ( l1_fifo_valid_in ),
+ .ready_out ( l1_fifo_ready_out ),
+ // Pop
+ .data_out ( {l1_prefetch_cur, l1_hit_cur, l1_id_cur, l1_len_cur, l1_master_cur, l1_accept_cur, l1_save_cur, l1_drop_cur} ),
+ .valid_out ( l1_fifo_valid_out ),
+ .ready_in ( l1_fifo_ready_in )
+ );
+
+ // Push upon receiving new requests from the TLB.
+ assign l1_req = l1_accept_i | l1_save_i | l1_drop_i;
+ assign l1_fifo_valid_in = l1_req & l1_fifo_ready_out;
+
+ // Signal handshake
+ assign l1_done_o = l1_fifo_valid_in;
+ assign l2_done_o = l2_fifo_valid_in;
+
+ // Stall AW input of L1 TLB
+ assign input_stall_o = ~(l1_fifo_ready_out & l2_fifo_ready_out);
+
+ // Interface b_drop signals + handshake
+ always_comb begin
+ if (fifo_select == 1'b0) begin
+ prefetch_o = l1_prefetch_cur;
+ hit_o = l1_hit_cur;
+ id_o = l1_id_cur;
+
+ l1_fifo_ready_in = w_done | b_done_i;
+ l2_fifo_ready_in = 1'b0;
+ end else begin
+ prefetch_o = l2_prefetch_cur;
+ hit_o = l2_hit_cur;
+ id_o = l2_id_cur;
+
+ l1_fifo_ready_in = 1'b0;
+ l2_fifo_ready_in = w_done | b_done_i;
+ end
+ end
+
+ // Detect when an L1 transaction save request enters or exits the L1 FIFO.
+ assign l1_save_in = l1_fifo_valid_in & l1_save_i;
+ assign l1_save_out = l1_fifo_ready_in & l1_save_cur;
+
+ // Count the number of L1 transaction to save in the L1 FIFO.
+ always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin
+ if (axi4_arstn == 0) begin
+ n_l1_save_SP <= '0;
+ end else if (l1_save_in ^ l1_save_out) begin
+ if (l1_save_in) begin
+ n_l1_save_SP <= n_l1_save_SP + 1'b1;
+ end else if (l1_save_out) begin
+ n_l1_save_SP <= n_l1_save_SP - 1'b1;
+ end
+ end
+ end
+
+ // Stall forwarding of AW L1 hits if:
+ // 1. The HUM buffer does not allow to be bypassed.
+ // 2. There are multiple L1 save requests in the FIFO, i.e., multiple L2 outputs pending.
+ assign output_stall_o = (n_l1_save_SP > 1) || (block_forwarding == 1'b1);
+
+ generate
+ if (ENABLE_L2TLB == 1) begin : HUM_BUFFER
+
+ axi_buffer_rab_bram
+ #(
+ .DATA_WIDTH ( BUFFER_WIDTH ),
+ .BUFFER_DEPTH ( HUM_BUFFER_DEPTH )
+ )
+ u_hum_buf
+ (
+ .clk ( axi4_aclk ),
+ .rstn ( axi4_arstn ),
+ // Push
+ .data_in ( {axi4_wuser, axi4_wstrb, axi4_wdata, axi4_wlast} ),
+ .valid_in ( hum_buf_valid_in ),
+ .ready_out ( hum_buf_ready_out ),
+ // Pop
+ .data_out ( {hum_buf_wuser, hum_buf_wstrb, hum_buf_wdata, hum_buf_wlast} ),
+ .valid_out ( hum_buf_valid_out ),
+ .ready_in ( hum_buf_ready_in ),
+ // Clear
+ .almost_full ( hum_buf_almost_full ),
+ .underfull ( hum_buf_underfull ),
+ .drop_req ( hum_buf_drop_req_SP ),
+ .drop_len ( hum_buf_drop_len_SP )
+ );
+
+ axi_buffer_rab
+ #(
+ .DATA_WIDTH ( 2+AXI_ID_WIDTH+8+3 ),
+ .BUFFER_DEPTH ( L2_FIFO_DEPTH )
+ )
+ u_l2_fifo
+ (
+ .clk ( axi4_aclk ),
+ .rstn ( axi4_arstn ),
+ // Push
+ .data_in ( {l2_prefetch_i, l2_hit_i, l2_id_i, l2_len_i, l2_master_i, l2_accept_i, l2_drop_i} ),
+ .valid_in ( l2_fifo_valid_in ),
+ .ready_out ( l2_fifo_ready_out ),
+ // Pop
+ .data_out ( {l2_prefetch_cur, l2_hit_cur, l2_id_cur, l2_len_cur, l2_master_cur, l2_accept_cur, l2_drop_cur} ),
+ .valid_out ( l2_fifo_valid_out ),
+ .ready_in ( l2_fifo_ready_in )
+ );
+
+ // Push upon receiving new result from TLB.
+ assign l2_req = l2_accept_i | l2_drop_i;
+ assign l2_fifo_valid_in = l2_req & l2_fifo_ready_out;
+
+ assign wlast_in = axi4_wlast & hum_buf_valid_in & hum_buf_ready_out;
+ assign wlast_out = hum_buf_wlast & hum_buf_valid_out & hum_buf_ready_in;
+
+ always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin
+ if (axi4_arstn == 0) begin
+ fifo_select_SP <= 1'b0;
+ hum_buf_drop_len_SP <= 'b0;
+ hum_buf_drop_req_SP <= 1'b0;
+ hum_buf_SP <= STORE;
+ n_wlast_SP <= 'b0;
+ end else begin
+ fifo_select_SP <= fifo_select_SN;
+ hum_buf_drop_len_SP <= hum_buf_drop_len_SN;
+ hum_buf_drop_req_SP <= hum_buf_drop_req_SN;
+ hum_buf_SP <= hum_buf_SN;
+ n_wlast_SP <= n_wlast_SN;
+ end
+ end
+
+ always_comb begin
+ n_wlast_SN = n_wlast_SP;
+ if (hum_buf_drop_req_SP) begin // Happens exactly once per burst to be dropped.
+ n_wlast_SN -= 1;
+ end
+ if (wlast_in) begin
+ n_wlast_SN += 1;
+ end
+ if (wlast_out) begin
+ n_wlast_SN -= 1;
+ end
+ end
+
+ always_comb begin : HUM_BUFFER_FSM
+ hum_buf_SN = hum_buf_SP;
+
+ m_axi4_wlast = 1'b0;
+ m_axi4_wdata = 'b0;
+ m_axi4_wstrb = 'b0;
+ m_axi4_wuser = 'b0;
+
+ m_axi4_wvalid = 1'b0;
+ axi4_wready = 1'b0;
+
+ hum_buf_valid_in = 1'b0;
+ hum_buf_ready_in = 1'b0;
+
+ hum_buf_drop_req_SN = hum_buf_drop_req_SP;
+ hum_buf_drop_len_SN = hum_buf_drop_len_SP;
+ master_select_o = 1'b0;
+
+ w_done = 1'b0; // read from FIFO without handshake with B sender
+ b_drop_o = 1'b0; // send data from FIFO to B sender (with handshake)
+ fifo_select = 1'b0;
+
+ fifo_select_SN = fifo_select_SP;
+ stop_store = 1'b0;
+
+ block_forwarding = 1'b0;
+
+ unique case (hum_buf_SP)
+
+ STORE : begin
+ // Simply store the data in the buffer.
+ hum_buf_valid_in = axi4_wvalid & hum_buf_ready_out;
+ axi4_wready = hum_buf_ready_out;
+
+ // We have got a full burst in the HUM buffer, thus stop storing.
+ if (wlast_in & !hum_buf_underfull | (n_wlast_SP > $signed(0))) begin
+ hum_buf_SN = WAIT_L1_BYPASS_YES;
+
+ // The buffer is full, thus wait for decision.
+ end else if (~hum_buf_ready_out) begin
+ hum_buf_SN = WAIT_L1_BYPASS_NO;
+ end
+
+ // Avoid the forwarding of L1 hits until we know whether we can bypass.
+ if (l1_fifo_valid_out & l1_save_cur) begin
+ block_forwarding = 1'b1;
+ end
+ end
+
+ WAIT_L1_BYPASS_YES : begin
+ // Wait for orders from L1 TLB.
+ if (l1_fifo_valid_out) begin
+
+ // L1 hit - forward data from buffer
+ if (l1_accept_cur) begin
+ m_axi4_wlast = hum_buf_wlast;
+ m_axi4_wdata = hum_buf_wdata;
+ m_axi4_wstrb = hum_buf_wstrb;
+ m_axi4_wuser = hum_buf_wuser;
+
+ m_axi4_wvalid = hum_buf_valid_out;
+ hum_buf_ready_in = m_axi4_wready;
+
+ master_select_o = l1_master_cur;
+
+ // Detect last data beat.
+ if (wlast_out) begin
+ fifo_select = 1'b0;
+ w_done = 1'b1;
+ hum_buf_SN = STORE;
+ end
+
+ // L1 miss - wait for L2
+ end else if (l1_save_cur) begin
+ fifo_select = 1'b0;
+ w_done = 1'b1;
+ hum_buf_SN = WAIT_L2_BYPASS_YES;
+
+ // L1 prefetch, prot, multi - drop data
+ end else if (l1_drop_cur) begin
+ fifo_select_SN = 1'b0; // L1
+ hum_buf_drop_req_SN = 1'b1;
+ hum_buf_drop_len_SN = l1_len_cur;
+ hum_buf_SN = FLUSH;
+ end
+ end
+ end
+
+ WAIT_L2_BYPASS_YES : begin
+ // Wait for orders from L2 TLB.
+ if (l2_fifo_valid_out) begin
+
+ // L2 hit - forward data from buffer
+ if (l2_accept_cur) begin
+ m_axi4_wlast = hum_buf_wlast;
+ m_axi4_wdata = hum_buf_wdata;
+ m_axi4_wstrb = hum_buf_wstrb;
+ m_axi4_wuser = hum_buf_wuser;
+
+ m_axi4_wvalid = hum_buf_valid_out;
+ hum_buf_ready_in = m_axi4_wready;
+
+ master_select_o = l2_master_cur;
+
+ // Detect last data beat.
+ if (wlast_out) begin
+ fifo_select = 1'b1;
+ w_done = 1'b1;
+ hum_buf_SN = STORE;
+ end
+
+ // L2 miss/prefetch hit
+ end else if (l2_drop_cur) begin
+ fifo_select_SN = 1'b1; // L2
+ hum_buf_drop_req_SN = 1'b1;
+ hum_buf_drop_len_SN = l2_len_cur;
+ hum_buf_SN = FLUSH;
+ end
+
+ // While we wait for orders from L2 TLB, we can still drop and accept L1 transactions.
+ end else if (l1_fifo_valid_out) begin
+
+ // L1 hit
+ if (l1_accept_cur) begin
+ hum_buf_SN = BYPASS;
+
+ // L1 prefetch/prot/multi
+ end else if (l1_drop_cur) begin
+ hum_buf_SN = DISCARD;
+ end
+ end
+ end
+
+ FLUSH : begin
+ // Clear HUM buffer flush request.
+ hum_buf_drop_req_SN = 1'b0;
+
+ // perform handshake with B sender
+ fifo_select = fifo_select_SP;
+ b_drop_o = 1'b1;
+ if (b_done_i) begin
+ hum_buf_SN = STORE;
+ end
+ end
+
+ BYPASS : begin
+ // Forward one full transaction from input buffer.
+ m_axi4_wlast = axi4_wlast;
+ m_axi4_wdata = axi4_wdata;
+ m_axi4_wstrb = axi4_wstrb;
+ m_axi4_wuser = axi4_wuser;
+
+ m_axi4_wvalid = axi4_wvalid;
+ axi4_wready = m_axi4_wready;
+
+ master_select_o = l1_master_cur;
+
+ // We have got a full transaction.
+ if (axi4_wlast & axi4_wready & axi4_wvalid) begin
+ fifo_select = 1'b0;
+ w_done = 1'b1;
+ hum_buf_SN = WAIT_L2_BYPASS_YES;
+ end
+ end
+
+ DISCARD : begin
+ // Discard one full transaction from input buffer.
+ axi4_wready = 1'b1;
+
+ // We have got a full transaction.
+ if (axi4_wlast & axi4_wready & axi4_wvalid) begin
+ // Try to perform handshake with B sender.
+ fifo_select = 1'b0;
+ b_drop_o = 1'b1;
+ // We cannot wait here due to axi4_wready.
+ if (b_done_i) begin
+ hum_buf_SN = WAIT_L2_BYPASS_YES;
+ end else begin
+ hum_buf_SN = DISCARD_FINISH;
+ end
+ end
+ end
+
+ DISCARD_FINISH : begin
+ // Perform handshake with B sender.
+ fifo_select = 1'b0;
+ b_drop_o = 1'b1;
+ if (b_done_i) begin
+ hum_buf_SN = WAIT_L2_BYPASS_YES;
+ end
+ end
+
+ WAIT_L1_BYPASS_NO : begin
+ // Do not allow the forwarding of L1 hits.
+ block_forwarding = 1'b1;
+
+ // Wait for orders from L1 TLB.
+ if (l1_fifo_valid_out) begin
+
+ // L1 hit - forward data from/through HUM buffer and refill the buffer
+ if (l1_accept_cur) begin
+ // Forward data from HUM buffer.
+ m_axi4_wlast = hum_buf_wlast;
+ m_axi4_wdata = hum_buf_wdata;
+ m_axi4_wstrb = hum_buf_wstrb;
+ m_axi4_wuser = hum_buf_wuser;
+
+ m_axi4_wvalid = hum_buf_valid_out;
+ hum_buf_ready_in = m_axi4_wready;
+
+ master_select_o = l1_master_cur;
+
+ // Refill the HUM buffer. Stop when buffer full.
+ stop_store = ~hum_buf_ready_out;
+ hum_buf_valid_in = stop_store ? 1'b0 : axi4_wvalid ;
+ axi4_wready = stop_store ? 1'b0 : hum_buf_ready_out;
+
+ // Detect last data beat.
+ if (wlast_out) begin
+ fifo_select = 1'b0;
+ w_done = 1'b1;
+ if (~hum_buf_ready_out | hum_buf_almost_full) begin
+ hum_buf_SN = WAIT_L1_BYPASS_NO;
+ end else begin
+ hum_buf_SN = STORE;
+ end
+ end
+
+ // Allow the forwarding of L1 hits.
+ block_forwarding = 1'b0;
+
+ // L1 miss - wait for L2
+ end else if (l1_save_cur) begin
+ fifo_select = 1'b0;
+ w_done = 1'b1;
+ hum_buf_SN = WAIT_L2_BYPASS_NO;
+
+ // L1 prefetch, prot, multi - drop data
+ end else if (l1_drop_cur) begin
+ fifo_select_SN = 1'b0; // L1
+ hum_buf_drop_req_SN = 1'b1;
+ hum_buf_drop_len_SN = l1_len_cur;
+ hum_buf_SN = FLUSH;
+
+ // Allow the forwarding of L1 hits.
+ block_forwarding = 1'b0;
+ end
+ end
+ end
+
+ WAIT_L2_BYPASS_NO : begin
+ // Do not allow the forwarding of L1 hits.
+ block_forwarding = 1'b1;
+
+ // Wait for orders from L2 TLB.
+ if (l2_fifo_valid_out) begin
+
+ // L2 hit - forward first part from HUM buffer, rest from input buffer
+ if (l2_accept_cur) begin
+ // Forward data from HUM buffer.
+ m_axi4_wlast = hum_buf_wlast;
+ m_axi4_wdata = hum_buf_wdata;
+ m_axi4_wstrb = hum_buf_wstrb;
+ m_axi4_wuser = hum_buf_wuser;
+
+ m_axi4_wvalid = hum_buf_valid_out;
+ hum_buf_ready_in = m_axi4_wready;
+
+ master_select_o = l2_master_cur;
+
+ // Refill the HUM buffer. Stop when buffer full.
+ stop_store = ~hum_buf_ready_out;
+ hum_buf_valid_in = stop_store ? 1'b0 : axi4_wvalid ;
+ axi4_wready = stop_store ? 1'b0 : hum_buf_ready_out;
+
+ // Detect last data beat.
+ if (wlast_out) begin
+ fifo_select = 1'b1;
+ w_done = 1'b1;
+ if (~hum_buf_ready_out | hum_buf_almost_full) begin
+ hum_buf_SN = WAIT_L1_BYPASS_NO;
+ end else begin
+ hum_buf_SN = STORE;
+ end
+ end
+
+ // Allow the forwarding of L1 hits.
+ block_forwarding = 1'b0;
+
+ // L2 miss/prefetch hit - drop data
+ end else if (l2_drop_cur) begin
+ fifo_select_SN = 1'b1; // L2
+ hum_buf_drop_req_SN = 1'b1;
+ hum_buf_drop_len_SN = l2_len_cur;
+ hum_buf_SN = FLUSH;
+
+ // Allow the forwarding of L1 hits.
+ block_forwarding = 1'b0;
+ end
+ end
+ end
+
+
+ default: begin
+ hum_buf_SN = STORE;
+ end
+
+ endcase // hum_buf_SP
+ end // HUM_BUFFER_FSM
+
+ assign b_drop_set = 1'b0;
+
+ end else begin // HUM_BUFFER
+
+ // register to perform the handshake with B sender
+ always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin
+ if (axi4_arstn == 0) begin
+ b_drop_o <= 1'b0;
+ end else if (b_done_i) begin
+ b_drop_o <= 1'b0;
+ end else if (b_drop_set) begin
+ b_drop_o <= 1'b1;;
+ end
+ end
+
+ always_comb begin : OUTPUT_CTRL
+
+ fifo_select = 1'b0;
+ w_done = 1'b0;
+ b_drop_set = 1'b0;
+
+ m_axi4_wlast = 1'b0;
+ m_axi4_wdata = 'b0;
+ m_axi4_wstrb = 'b0;
+ m_axi4_wuser = 'b0;
+
+ m_axi4_wvalid = 1'b0;
+ axi4_wready = 1'b0;
+
+ if (l1_fifo_valid_out) begin
+ // forward data
+ if (l1_accept_cur) begin
+ m_axi4_wlast = axi4_wlast;
+ m_axi4_wdata = axi4_wdata;
+ m_axi4_wstrb = axi4_wstrb;
+ m_axi4_wuser = axi4_wuser;
+
+ m_axi4_wvalid = axi4_wvalid;
+ axi4_wready = m_axi4_wready;
+
+ // Simply pop from FIFO upon last data beat.
+ w_done = axi4_wlast & axi4_wvalid & axi4_wready;
+
+ // discard entire burst
+ end else if (b_drop_o == 1'b0) begin
+ axi4_wready = 1'b1;
+
+ // Simply pop from FIFO upon last data beat. Perform handshake with B sender.
+ if (axi4_wlast & axi4_wvalid & axi4_wready)
+ b_drop_set = 1'b1;
+ end
+ end
+
+ end // OUTPUT_CTRL
+
+ assign master_select_o = l1_master_cur;
+ assign l2_fifo_ready_out = 1'b1;
+ assign block_forwarding = 1'b0;
+
+ // unused signals
+ assign hum_buf_ready_out = 1'b0;
+ assign hum_buf_valid_in = 1'b0;
+ assign hum_buf_ready_in = 1'b0;
+ assign hum_buf_valid_out = 1'b0;
+ assign hum_buf_wdata = 'b0;
+ assign hum_buf_wstrb = 'b0;
+ assign hum_buf_wlast = 1'b0;
+ assign hum_buf_wuser = 'b0;
+ assign hum_buf_drop_len_SN = 'b0;
+ assign hum_buf_drop_req_SN = 1'b0;
+ assign hum_buf_almost_full = 1'b0;
+
+ assign l2_fifo_valid_in = 1'b0;
+ assign l2_fifo_valid_out = 1'b0;
+ assign l2_prefetch_cur = 1'b0;
+ assign l2_hit_cur = 1'b0;
+ assign l2_id_cur = 'b0;
+ assign l2_len_cur = 'b0;
+ assign l2_master_cur = 1'b0;
+ assign l2_accept_cur = 1'b0;
+ assign l2_drop_cur = 1'b0;
+
+ assign l2_req = 1'b0;
+
+ assign fifo_select_SN = 1'b0;
+ assign fifo_select_SP = 1'b0;
+
+ assign stop_store = 1'b0;
+ assign n_wlast_SP = 'b0;
+ assign wlast_in = 1'b0;
+ assign wlast_out = 1'b0;
+
+ end // HUM_BUFFER
+
+ endgenerate
+"""
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class axi4_w_sender(Elaboratable):
+
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.s_axi4_wdata = Signal() # input
+ self.s_axi4_wvalid = Signal() # input
+ self.s_axi4_wready = Signal() # output
+ self.s_axi4_wstrb = Signal() # input
+ self.s_axi4_wlast = Signal() # input
+ self.s_axi4_wuser = Signal() # input
+ self.m_axi4_wdata = Signal() # output
+ self.m_axi4_wvalid = Signal() # output
+ self.m_axi4_wready = Signal() # input
+ self.m_axi4_wstrb = Signal() # output
+ self.m_axi4_wlast = Signal() # output
+ self.m_axi4_wuser = Signal() # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.m_axi4_wdata.eq(self.s_axi4_wdata)
+ m.d.comb += self.m_axi4_wstrb.eq(self.s_axi4_wstrb)
+ m.d.comb += self.m_axi4_wlast.eq(self.s_axi4_wlast)
+ m.d.comb += self.m_axi4_wuser.eq(self.s_axi4_wuser)
+ m.d.comb += self.m_axi4_wvalid.eq(self.s_axi4_wvalid)
+ m.d.comb += self.s_axi4_wready.eq(self.m_axi4_wready)
+ return m
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# module axi4_w_sender
+# #(
+# parameter AXI_DATA_WIDTH = 32,
+# parameter AXI_USER_WIDTH = 2
+# )
+# (
+# input axi4_aclk,
+# input axi4_arstn,
+#
+# input [AXI_DATA_WIDTH-1:0] s_axi4_wdata,
+# input s_axi4_wvalid,
+# output s_axi4_wready,
+# input [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb,
+# input s_axi4_wlast,
+# input [AXI_USER_WIDTH-1:0] s_axi4_wuser,
+#
+# output [AXI_DATA_WIDTH-1:0] m_axi4_wdata,
+# output m_axi4_wvalid,
+# input m_axi4_wready,
+# output [AXI_DATA_WIDTH/8-1:0] m_axi4_wstrb,
+# output m_axi4_wlast,
+# output [AXI_USER_WIDTH-1:0] m_axi4_wuser
+# );
+#
+# assign m_axi4_wdata = s_axi4_wdata;
+# assign m_axi4_wstrb = s_axi4_wstrb;
+# assign m_axi4_wlast = s_axi4_wlast;
+# assign m_axi4_wuser = s_axi4_wuser;
+#
+# assign m_axi4_wvalid = s_axi4_wvalid;
+# assign s_axi4_wready = m_axi4_wready;
+#
+# endmodule
+#
+#
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class axi_buffer_rab(Elaboratable):
+
+ def __init__(self):
+ self.clk = Signal() # input
+ self.rstn = Signal() # input
+ self.data_out = Signal(DATA_WIDTH) # output
+ self.valid_out = Signal() # output
+ self.ready_in = Signal() # input
+ self.valid_in = Signal() # input
+ self.data_in = Signal(DATA_WIDTH) # input
+ self.ready_out = Signal() # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.full.eq(self.None)
+ m.d.comb += self.data_out.eq(self.None)
+ m.d.comb += self.valid_out.eq(self.None)
+ m.d.comb += self.ready_out.eq(self.None)
+ return m
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# //import CfMath::log2;
+#
+# module axi_buffer_rab
+# //#(
+# // parameter DATA_WIDTH,
+# // parameter BUFFER_DEPTH
+# //)
+# (
+# input logic clk,
+# input logic rstn,
+#
+# // Downstream port
+# output logic [DATA_WIDTH-1:0] data_out,
+# output logic valid_out,
+# input logic ready_in,
+#
+# // Upstream port
+# input logic valid_in,
+# input logic [DATA_WIDTH-1:0] data_in,
+# output logic ready_out
+# );
+#
+# localparam integer LOG_BUFFER_DEPTH = log2(BUFFER_DEPTH);
+#
+# // Internal data structures
+# reg [LOG_BUFFER_DEPTH - 1 : 0] pointer_in; // location to which we last wrote
+# reg [LOG_BUFFER_DEPTH - 1 : 0] pointer_out; // location from which we last sent
+# reg [LOG_BUFFER_DEPTH : 0] elements; // number of elements in the buffer
+# reg [DATA_WIDTH - 1 : 0] buffer [BUFFER_DEPTH - 1 : 0];
+#
+# wire full;
+#
+# integer loop1;
+#
+# assign full = (elements == BUFFER_DEPTH);
+#
+# always @(posedge clk or negedge rstn)
+# begin: elements_sequential
+# if (rstn == 1'b0)
+# elements <= 0;
+# else
+# begin
+# // ------------------
+# // Are we filling up?
+# // ------------------
+# // One out, none in
+# if (ready_in && valid_out && (!valid_in || full))
+# elements <= elements - 1;
+# // None out, one in
+# else if ((!valid_out || !ready_in) && valid_in && !full)
+# elements <= elements + 1;
+# // Else, either one out and one in, or none out and none in - stays unchanged
+# end
+# end
+#
+# always @(posedge clk or negedge rstn)
+# begin: buffers_sequential
+# if (rstn == 1'b0)
+# begin
+# for (loop1 = 0 ; loop1 < BUFFER_DEPTH ; loop1 = loop1 + 1)
+# buffer[loop1] <= 0;
+# end
+# else
+# begin
+# // Update the memory
+# if (valid_in && !full)
+# buffer[pointer_in] <= data_in;
+# end
+# end
+#
+# always @(posedge clk or negedge rstn)
+# begin: sequential
+# if (rstn == 1'b0)
+# begin
+# pointer_out <= 0;
+# pointer_in <= 0;
+# end
+# else
+# begin
+# // ------------------------------------
+# // Check what to do with the input side
+# // ------------------------------------
+# // We have some input, increase by 1 the input pointer
+# if (valid_in && !full)
+# begin
+# if (pointer_in == $unsigned(BUFFER_DEPTH - 1))
+# pointer_in <= 0;
+# else
+# pointer_in <= pointer_in + 1;
+# end
+# // Else we don't have any input, the input pointer stays the same
+#
+# // -------------------------------------
+# // Check what to do with the output side
+# // -------------------------------------
+# // We had pushed one flit out, we can try to go for the next one
+# if (ready_in && valid_out)
+# begin
+# if (pointer_out == $unsigned(BUFFER_DEPTH - 1))
+# pointer_out <= 0;
+# else
+# pointer_out <= pointer_out + 1;
+# end
+# // Else stay on the same output location
+# end
+# end
+#
+# // Update output ports
+# assign data_out = buffer[pointer_out];
+# assign valid_out = (elements != 0);
+#
+# assign ready_out = ~full;
+#
+# endmodule
+#
+#
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class axi_buffer_rab_bram(Elaboratable):
+
+ def __init__(self):
+ self.clk = Signal() # input
+ self.rstn = Signal() # input
+ self.data_out = Signal(DATA_WIDTH) # output
+ self.valid_out = Signal() # output
+ self.ready_in = Signal() # input
+ self.valid_in = Signal() # input
+ self.data_in = Signal(DATA_WIDTH) # input
+ self.ready_out = Signal() # output
+ self.almost_full = Signal() # output
+ self.underfull = Signal() # output
+ self.drop_req = Signal() # input
+ self.drop_len = Signal(8) # input
+
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# ////import CfMath::log2;
+#
+# module axi_buffer_rab_bram
+# //#(
+# // parameter DATA_WIDTH,
+# // parameter BUFFER_DEPTH
+# // )
+# (
+# input logic clk,
+# input logic rstn,
+#
+# // Downstream port
+# output logic [DATA_WIDTH-1:0] data_out,
+# output logic valid_out,
+# input logic ready_in,
+#
+# // Upstream port
+# input logic valid_in,
+# input logic [DATA_WIDTH-1:0] data_in,
+# output logic ready_out,
+#
+# // Status and drop control
+# output logic almost_full,
+# output logic underfull,
+# input logic drop_req,
+# // Number of items to drop. As for AXI lengths, counting starts at zero, i.e., `drop_len == 0`
+# // and `drop_req` means drop one item.
+# input logic [7:0] drop_len
+# );
+#
+""" #docstring_begin
+ // The BRAM needs to be in "write-first" mode for first-word fall-through FIFO behavior.
+ // To still push and pop simultaneously if the buffer is full, we internally increase the
+ // buffer depth by 1.
+ localparam ACT_BUFFER_DEPTH = BUFFER_DEPTH+1;
+ localparam ACT_LOG_BUFFER_DEPTH = log2(ACT_BUFFER_DEPTH+1);
+
+ /**
+ * Internal data structures
+ */
+ // Location to which we last wrote
+ logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_in_d, ptr_in_q;
+ // Location from which we last sent
+ logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_out_d, ptr_out_q;
+ // Required for fall-through behavior on the first word
+ logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_out_bram;
+ // Number of elements in the buffer. Can be negative if elements that have been dropped have not
+ // yet been written.
+ logic signed [ACT_LOG_BUFFER_DEPTH:0] n_elems_d, n_elems_q;
+
+ logic [DATA_WIDTH-1:0] data_out_bram, data_out_q;
+ logic valid_out_q;
+
+ logic full;
+
+ assign almost_full = (n_elems_q == BUFFER_DEPTH-1);
+ assign full = (n_elems_q == BUFFER_DEPTH);
+
+ always_ff @(posedge clk, negedge rstn) begin
+ if (~rstn) begin
+ n_elems_q <= '0;
+ ptr_in_q <= '0;
+ ptr_out_q <= '0;
+ end else begin
+ n_elems_q <= n_elems_d;
+ ptr_in_q <= ptr_in_d;
+ ptr_out_q <= ptr_out_d;
+ end
+ end
+
+ // Update the number of elements.
+ always_comb begin
+ n_elems_d = n_elems_q;
+ if (drop_req) begin
+ n_elems_d -= (drop_len + 1);
+ end
+ if (valid_in && ready_out) begin
+ n_elems_d += 1;
+ end
+ if (valid_out && ready_in) begin
+ n_elems_d -= 1;
+ end
+ end
+
+ // Update the output pointer.
+ always_comb begin
+ ptr_out_d = ptr_out_q;
+ if (drop_req) begin
+ if ((ptr_out_q + drop_len + 1) > (ACT_BUFFER_DEPTH - 1)) begin
+ ptr_out_d = drop_len + 1 - (ACT_BUFFER_DEPTH - ptr_out_q);
+ end else begin
+ ptr_out_d += (drop_len + 1);
+ end
+ end
+ if (valid_out && ready_in) begin
+ if (ptr_out_d == (ACT_BUFFER_DEPTH - 1)) begin
+ ptr_out_d = '0;
+ end else begin
+ ptr_out_d += 1;
+ end
+ end
+ end
+
+ // The BRAM has a read latency of one cycle, so apply the new address one cycle earlier for
+ // first-word fall-through FIFO behavior.
+ //assign ptr_out_bram = (ptr_out_q == (ACT_BUFFER_DEPTH-1)) ? '0 : (ptr_out_q + 1);
+ assign ptr_out_bram = ptr_out_d;
+
+ // Update the input pointer.
+ always_comb begin
+ ptr_in_d = ptr_in_q;
+ if (valid_in && ready_out) begin
+ if (ptr_in_d == (ACT_BUFFER_DEPTH - 1)) begin
+ ptr_in_d = '0;
+ end else begin
+ ptr_in_d += 1;
+ end
+ end
+ end
+
+ // Update output ports.
+ assign valid_out = (n_elems_q > $signed(0));
+ assign underfull = (n_elems_q < $signed(0));
+ assign ready_out = ~full;
+
+ ram_tp_write_first #(
+ .ADDR_WIDTH ( ACT_LOG_BUFFER_DEPTH ),
+ .DATA_WIDTH ( DATA_WIDTH )
+ )
+ ram_tp_write_first_0
+ (
+ .clk ( clk ),
+ .we ( valid_in & ~full ),
+ .addr0 ( ptr_in_q ),
+ .addr1 ( ptr_out_bram ),
+ .d_i ( data_in ),
+ .d0_o ( ),
+ .d1_o ( data_out_bram )
+ );
+
+ // When reading from/writing two the same address on both ports ("Write-Read Collision"),
+ // the data on the read port is invalid (during the write cycle). In this implementation,
+ // this can happen only when the buffer is empty. Thus, we forward the data from an
+ // register in this case.
+ always @(posedge clk) begin
+ if (rstn == 1'b0) begin
+ data_out_q <= 'b0;
+ end else if ( (ptr_out_bram == ptr_in_q) && (valid_in && !full) ) begin
+ data_out_q <= data_in;
+ end
+ end
+
+ always @(posedge clk) begin
+ if (rstn == 1'b0) begin
+ valid_out_q <= 'b0;
+ end else begin
+ valid_out_q <= valid_out;
+ end
+ end
+
+ // Drive output data
+ always_comb begin
+ if (valid_out && !valid_out_q) begin // We have just written to an empty FIFO
+ data_out = data_out_q;
+ end else begin
+ data_out = data_out_bram;
+ end
+ end
+
+"""
+# endmodule
+#
+#
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class axi_rab_cfg(Elaboratable):
+
+ def __init__(self):
+ self.Clk_CI = Signal() # input
+ self.Rst_RBI = Signal() # input
+ self.s_axi_awaddr = Signal(AXI_ADDR_WIDTH) # input
+ self.s_axi_awvalid = Signal() # input
+ self.s_axi_awready = Signal() # output
+ self.s_axi_wdata = Signal() # input
+ self.s_axi_wstrb = Signal(1+ERROR p_expression_25) # input
+ self.s_axi_wvalid = Signal() # input
+ self.s_axi_wready = Signal() # output
+ self.s_axi_bresp = Signal(2) # output
+ self.s_axi_bvalid = Signal() # output
+ self.s_axi_bready = Signal() # input
+ self.s_axi_araddr = Signal(AXI_ADDR_WIDTH) # input
+ self.s_axi_arvalid = Signal() # input
+ self.s_axi_arready = Signal() # output
+ self.s_axi_rdata = Signal(AXI_DATA_WIDTH) # output
+ self.s_axi_rresp = Signal(2) # output
+ self.s_axi_rvalid = Signal() # output
+ self.s_axi_rready = Signal() # input
+ self.L1Cfg_DO = Signal() # output
+ self.L1AllowMultiHit_SO = Signal() # output
+ self.MissAddr_DI = Signal(ADDR_WIDTH_VIRT) # input
+ self.MissMeta_DI = Signal(MISS_META_WIDTH) # input
+ self.Miss_SI = Signal() # input
+ self.MhFifoFull_SO = Signal() # output
+ self.wdata_l2 = Signal() # output
+ self.waddr_l2 = Signal() # output
+ self.wren_l2 = Signal(N_PORTS) # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# // --=========================================================================--
+# //
+# // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ██████╗███████╗ ██████╗
+# // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔════╝██╔════╝
+# // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ █████╗ ██║ ███╗
+# // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██╔══╝ ██║ ██║
+# // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ╚██████╗██║ ╚██████╔╝
+# // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝╚═╝ ╚═════╝
+# //
+# //
+# // Author: Pirmin Vogel - vogelpi@iis.ee.ethz.ch
+# //
+# // Purpose : AXI4-Lite configuration and miss handling interface for RAB
+# //
+# // --=========================================================================--
+#
+# //import CfMath::log2;
+#
+# module axi_rab_cfg
+# #(
+# parameter N_PORTS = 3,
+# parameter N_REGS = 196,
+# parameter N_L2_SETS = 32,
+# parameter N_L2_SET_ENTRIES= 32,
+# parameter ADDR_WIDTH_PHYS = 40,
+# parameter ADDR_WIDTH_VIRT = 32,
+# parameter N_FLAGS = 4,
+# parameter AXI_DATA_WIDTH = 64,
+# parameter AXI_ADDR_WIDTH = 32,
+# parameter MISS_META_WIDTH = 10, // <= FIFO_WIDTH
+# parameter MH_FIFO_DEPTH = 16
+# )
+# (
+# input logic Clk_CI,
+# input logic Rst_RBI,
+#
+# // AXI Lite interface
+# input logic [AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
+# input logic s_axi_awvalid,
+# output logic s_axi_awready,
+# input logic [AXI_DATA_WIDTH/8-1:0][7:0] s_axi_wdata,
+# input logic [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb,
+# input logic s_axi_wvalid,
+# output logic s_axi_wready,
+# output logic [1:0] s_axi_bresp,
+# output logic s_axi_bvalid,
+# input logic s_axi_bready,
+# input logic [AXI_ADDR_WIDTH-1:0] s_axi_araddr,
+# input logic s_axi_arvalid,
+# output logic s_axi_arready,
+# output logic [AXI_DATA_WIDTH-1:0] s_axi_rdata,
+# output logic [1:0] s_axi_rresp,
+# output logic s_axi_rvalid,
+# input logic s_axi_rready,
+#
+# // Slice configuration
+# output logic [N_REGS-1:0][63:0] L1Cfg_DO,
+# output logic L1AllowMultiHit_SO,
+#
+# // Miss handling
+# input logic [ADDR_WIDTH_VIRT-1:0] MissAddr_DI,
+# input logic [MISS_META_WIDTH-1:0] MissMeta_DI,
+# input logic Miss_SI,
+# output logic MhFifoFull_SO,
+#
+# // L2 TLB
+# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] wdata_l2,
+# output logic [N_PORTS-1:0] [AXI_ADDR_WIDTH-1:0] waddr_l2,
+# output logic [N_PORTS-1:0] wren_l2
+# );
+#
+""" #docstring_begin
+
+ localparam ADDR_LSB = log2(64/8); // 64 even if the AXI Lite interface is 32,
+ // because RAB slices are 64 bit wide.
+ localparam ADDR_MSB = log2(N_REGS)+ADDR_LSB-1;
+
+ localparam L2SINGLE_AMAP_SIZE = 16'h4000; // Maximum 2048 TLB entries in L2
+
+ localparam integer N_L2_ENTRIES = N_L2_SETS * N_L2_SET_ENTRIES;
+
+ localparam logic [AXI_ADDR_WIDTH-1:0] L2_VA_MAX_ADDR = (N_L2_ENTRIES-1) << 2;
+
+ logic [AXI_DATA_WIDTH/8-1:0][7:0] L1Cfg_DP[N_REGS]; // [Byte][Bit]
+ genvar j;
+
+ // █████╗ ██╗ ██╗██╗██╗ ██╗ ██╗ ██╗████████╗███████╗
+ // ██╔══██╗╚██╗██╔╝██║██║ ██║ ██║ ██║╚══██╔══╝██╔════╝
+ // ███████║ ╚███╔╝ ██║███████║█████╗██║ ██║ ██║ █████╗
+ // ██╔══██║ ██╔██╗ ██║╚════██║╚════╝██║ ██║ ██║ ██╔══╝
+ // ██║ ██║██╔╝ ██╗██║ ██║ ███████╗██║ ██║ ███████╗
+ // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚═╝ ╚══════╝
+ //
+ logic [AXI_ADDR_WIDTH-1:0] awaddr_reg;
+ logic awaddr_done_rise;
+ logic awaddr_done_reg;
+ logic awaddr_done_reg_dly;
+
+ logic [AXI_DATA_WIDTH/8-1:0][7:0] wdata_reg;
+ logic [AXI_DATA_WIDTH/8-1:0] wstrb_reg;
+ logic wdata_done_rise;
+ logic wdata_done_reg;
+ logic wdata_done_reg_dly;
+
+ logic wresp_done_reg;
+ logic wresp_running_reg;
+
+ logic [AXI_ADDR_WIDTH-1:0] araddr_reg;
+ logic araddr_done_reg;
+
+ logic [AXI_DATA_WIDTH-1:0] rdata_reg;
+ logic rresp_done_reg;
+ logic rresp_running_reg;
+
+ logic awready;
+ logic wready;
+ logic bvalid;
+
+ logic arready;
+ logic rvalid;
+
+ logic wren;
+ logic wren_l1;
+
+ assign wren = ( wdata_done_rise & awaddr_done_reg ) | ( awaddr_done_rise & wdata_done_reg );
+ assign wdata_done_rise = wdata_done_reg & ~wdata_done_reg_dly;
+ assign awaddr_done_rise = awaddr_done_reg & ~awaddr_done_reg_dly;
+
+ // reg_dly
+ always @(posedge Clk_CI or negedge Rst_RBI)
+ begin
+ if (!Rst_RBI)
+ begin
+ wdata_done_reg_dly <= 1'b0;
+ awaddr_done_reg_dly <= 1'b0;
+ end
+ else
+ begin
+ wdata_done_reg_dly <= wdata_done_reg;
+ awaddr_done_reg_dly <= awaddr_done_reg;
+ end
+ end
+
+ // AW Channel
+ always @(posedge Clk_CI or negedge Rst_RBI)
+ begin
+ if (!Rst_RBI)
+ begin
+ awaddr_done_reg <= 1'b0;
+ awaddr_reg <= '0;
+ awready <= 1'b1;
+ end
+ else
+ begin
+ if (awready && s_axi_awvalid)
+ begin
+ awready <= 1'b0;
+ awaddr_done_reg <= 1'b1;
+ awaddr_reg <= s_axi_awaddr;
+ end
+ else if (awaddr_done_reg && wresp_done_reg)
+ begin
+ awready <= 1'b1;
+ awaddr_done_reg <= 1'b0;
+ end
+ end
+ end
+
+ // W Channel
+ always @(posedge Clk_CI or negedge Rst_RBI)
+ begin
+ if (!Rst_RBI)
+ begin
+ wdata_done_reg <= 1'b0;
+ wready <= 1'b1;
+ wdata_reg <= '0;
+ wstrb_reg <= '0;
+ end
+ else
+ begin
+ if (wready && s_axi_wvalid)
+ begin
+ wready <= 1'b0;
+ wdata_done_reg <= 1'b1;
+ wdata_reg <= s_axi_wdata;
+ wstrb_reg <= s_axi_wstrb;
+ end
+ else if (wdata_done_reg && wresp_done_reg)
+ begin
+ wready <= 1'b1;
+ wdata_done_reg <= 1'b0;
+ end
+ end
+ end
+
+ // B Channel
+ always @(posedge Clk_CI or negedge Rst_RBI)
+ begin
+ if (!Rst_RBI)
+ begin
+ bvalid <= 1'b0;
+ wresp_done_reg <= 1'b0;
+ wresp_running_reg <= 1'b0;
+ end
+ else
+ begin
+ if (awaddr_done_reg && wdata_done_reg && !wresp_done_reg)
+ begin
+ if (!wresp_running_reg)
+ begin
+ bvalid <= 1'b1;
+ wresp_running_reg <= 1'b1;
+ end
+ else if (s_axi_bready)
+ begin
+ bvalid <= 1'b0;
+ wresp_done_reg <= 1'b1;
+ wresp_running_reg <= 1'b0;
+ end
+ end
+ else
+ begin
+ bvalid <= 1'b0;
+ wresp_done_reg <= 1'b0;
+ wresp_running_reg <= 1'b0;
+ end
+ end
+ end
+
+ // AR Channel
+ always @(posedge Clk_CI or negedge Rst_RBI)
+ begin
+ if (!Rst_RBI)
+ begin
+ araddr_done_reg <= 1'b0;
+ arready <= 1'b1;
+ araddr_reg <= '0;
+ end
+ else
+ begin
+ if (arready && s_axi_arvalid)
+ begin
+ arready <= 1'b0;
+ araddr_done_reg <= 1'b1;
+ araddr_reg <= s_axi_araddr;
+ end
+ else if (araddr_done_reg && rresp_done_reg)
+ begin
+ arready <= 1'b1;
+ araddr_done_reg <= 1'b0;
+ end
+ end
+ end
+
+ // R Channel
+ always @(posedge Clk_CI or negedge Rst_RBI)
+ begin
+ if (!Rst_RBI)
+ begin
+ rresp_done_reg <= 1'b0;
+ rvalid <= 1'b0;
+ rresp_running_reg <= 1'b0;
+ end
+ else
+ begin
+ if (araddr_done_reg && !rresp_done_reg)
+ begin
+ if (!rresp_running_reg)
+ begin
+ rvalid <= 1'b1;
+ rresp_running_reg <= 1'b1;
+ end
+ else if (s_axi_rready)
+ begin
+ rvalid <= 1'b0;
+ rresp_done_reg <= 1'b1;
+ rresp_running_reg <= 1'b0;
+ end
+ end
+ else
+ begin
+ rvalid <= 1'b0;
+ rresp_done_reg <= 1'b0;
+ rresp_running_reg <= 1'b0;
+ end
+ end
+ end
+
+ // ██╗ ██╗ ██████╗███████╗ ██████╗ ██████╗ ███████╗ ██████╗
+ // ██║ ███║ ██╔════╝██╔════╝██╔════╝ ██╔══██╗██╔════╝██╔════╝
+ // ██║ ╚██║ ██║ █████╗ ██║ ███╗ ██████╔╝█████╗ ██║ ███╗
+ // ██║ ██║ ██║ ██╔══╝ ██║ ██║ ██╔══██╗██╔══╝ ██║ ██║
+ // ███████╗██║ ╚██████╗██║ ╚██████╔╝ ██║ ██║███████╗╚██████╔╝
+ // ╚══════╝╚═╝ ╚═════╝╚═╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝ ╚═════╝
+ //
+ assign wren_l1 = wren && (awaddr_reg < L2SINGLE_AMAP_SIZE);
+
+ always @( posedge Clk_CI or negedge Rst_RBI )
+ begin
+ var integer idx_reg, idx_byte;
+ if ( Rst_RBI == 1'b0 )
+ begin
+ for ( idx_reg = 0; idx_reg < N_REGS; idx_reg++ )
+ L1Cfg_DP[idx_reg] <= '0;
+ end
+ else if ( wren_l1 )
+ begin
+ if ( awaddr_reg[ADDR_LSB+1] == 1'b0 ) begin // VIRT_ADDR
+ for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin
+ if ( (idx_byte < ADDR_WIDTH_VIRT/8) ) begin
+ if ( wstrb_reg[idx_byte] ) begin
+ L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte];
+ end
+ end
+ else begin // Let synthesizer optimize away unused registers.
+ L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0;
+ end
+ end
+ end
+ else if ( awaddr_reg[ADDR_LSB+1:ADDR_LSB] == 2'b10 ) begin // PHYS_ADDR
+ for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin
+ if ( (idx_byte < ADDR_WIDTH_PHYS/8) ) begin
+ if ( wstrb_reg[idx_byte] ) begin
+ L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte];
+ end
+ end
+ else begin // Let synthesizer optimize away unused registers.
+ L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0;
+ end
+ end
+ end
+ else begin // ( awaddr_reg[ADDR_LSB+1:ADDR_LSB] == 2'b11 ) // FLAGS
+ for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin
+ if ( (idx_byte < 1) ) begin
+ if ( wstrb_reg[idx_byte] ) begin
+ L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte] & { {{8-N_FLAGS}{1'b0}}, {{N_FLAGS}{1'b1}} };
+ end
+ end
+ else begin // Let synthesizer optimize away unused registers.
+ L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0;
+ end
+ end
+ end
+ end
+ end // always @ ( posedge Clk_CI or negedge Rst_RBI )
+
+ generate
+ // Mask unused bits -> Synthesizer should optimize away unused registers
+ for( j=0; j<N_REGS; j++ ) begin
+ if ( j[1] == 1'b0 ) // VIRT_ADDR
+ assign L1Cfg_DO[j] = { {{64-ADDR_WIDTH_VIRT}{1'b0}},{ADDR_WIDTH_VIRT{1'b1}} } & L1Cfg_DP[j];
+ else if ( j[1:0] == 2'b10 ) // PHYS_ADDR
+ assign L1Cfg_DO[j] = { {{64-ADDR_WIDTH_PHYS}{1'b0}},{ADDR_WIDTH_PHYS{1'b1}} } & L1Cfg_DP[j];
+ else // if ( j[1:0] == 2'b11 ) // FLAGS
+ assign L1Cfg_DO[j] = { {{64-N_FLAGS}{1'b0}},{N_FLAGS{1'b1}} } & L1Cfg_DP[j];
+ end
+ endgenerate
+
+ always_comb
+ begin
+ if ( araddr_reg[ADDR_LSB-1] == 1'b1 ) // read upper 32 bit, for debugging over 32-bit interface
+ rdata_reg = { {32'h00000000},{L1Cfg_DO[araddr_reg[ADDR_MSB:ADDR_LSB]][63:32]} };
+ else
+ rdata_reg = L1Cfg_DO[araddr_reg[ADDR_MSB:ADDR_LSB]];
+ end
+
+ assign s_axi_awready = awready;
+ assign s_axi_wready = wready;
+
+ assign s_axi_bresp = 2'b00;
+ assign s_axi_bvalid = bvalid;
+
+ assign s_axi_arready = arready;
+ assign s_axi_rresp = 2'b00;
+ assign s_axi_rvalid = rvalid;
+
+ // ██╗ ██████╗ ██████╗███████╗ ██████╗
+ // ██║ ╚════██╗ ██╔════╝██╔════╝██╔════╝
+ // ██║ █████╔╝ ██║ █████╗ ██║ ███╗
+ // ██║ ██╔═══╝ ██║ ██╔══╝ ██║ ██║
+ // ███████╗███████╗ ╚██████╗██║ ╚██████╔╝
+ // ╚══════╝╚══════╝ ╚═════╝╚═╝ ╚═════╝
+ //
+ logic [N_PORTS-1:0] l2_addr_is_in_va_rams;
+ logic [N_PORTS-1:0] upper_word_is_written;
+ logic [N_PORTS-1:0] lower_word_is_written;
+ generate
+ for( j=0; j< N_PORTS; j++)
+ begin
+ if (AXI_DATA_WIDTH == 64) begin
+ assign l2_addr_is_in_va_rams[j] = (awaddr_reg >= (j+1)*L2SINGLE_AMAP_SIZE) && (awaddr_reg[log2(L2SINGLE_AMAP_SIZE)-1:0] <= L2_VA_MAX_ADDR);
+ assign upper_word_is_written[j] = (wstrb_reg[7:4] != 4'b0000);
+ assign lower_word_is_written[j] = (wstrb_reg[3:0] != 4'b0000);
+ end else begin
+ assign l2_addr_is_in_va_rams[j] = 1'b0;
+ assign upper_word_is_written[j] = 1'b0;
+ assign lower_word_is_written[j] = 1'b0;
+ end
+
+ always @( posedge Clk_CI or negedge Rst_RBI ) begin
+ var integer idx_byte, off_byte;
+ if ( Rst_RBI == 1'b0 )
+ begin
+ wren_l2[j] <= 1'b0;
+ wdata_l2[j] <= '0;
+ end
+ else if (wren)
+ begin
+ if ( (awaddr_reg >= (j+1)*L2SINGLE_AMAP_SIZE) && (awaddr_reg < (j+2)*L2SINGLE_AMAP_SIZE) && (|wstrb_reg) )
+ wren_l2[j] <= 1'b1;
+ if (AXI_DATA_WIDTH == 32) begin
+ for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ )
+ wdata_l2[j][idx_byte*8 +: 8] <= wdata_reg[idx_byte] & {8{wstrb_reg[idx_byte]}};
+ end
+ else if (AXI_DATA_WIDTH == 64) begin
+ if (lower_word_is_written[j] == 1'b1)
+ off_byte = 0;
+ else
+ off_byte = 4;
+ // always put the payload in the lower word and set upper word to 0
+ for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8/2; idx_byte++ )
+ wdata_l2[j][idx_byte*8 +: 8] <= wdata_reg[idx_byte+off_byte] & {8{wstrb_reg[idx_byte+off_byte]}};
+ wdata_l2[j][AXI_DATA_WIDTH-1:AXI_DATA_WIDTH/2] <= 'b0;
+ end
+ // pragma translate_off
+ else
+ $fatal(1, "Unsupported AXI_DATA_WIDTH!");
+ // pragma translate_on
+ end
+ else
+ wren_l2[j] <= '0;
+ end // always @ ( posedge Clk_CI or negedge Rst_RBI )
+
+ // Properly align the 32-bit word address when writing from 64-bit interface:
+ // Depending on the system, the incoming address is (non-)aligned to the 64-bit
+ // word when writing the upper 32-bit word.
+ always_comb begin
+ waddr_l2[j] = (awaddr_reg -(j+1)*L2SINGLE_AMAP_SIZE)/4;
+ if (wren_l2[j]) begin
+ if (AXI_DATA_WIDTH == 64) begin
+ if (upper_word_is_written[j] == 1'b1) begin
+ // address must be non-aligned
+ waddr_l2[j][0] = 1'b1;
+ end
+ end
+ // pragma translate_off
+ else if (AXI_DATA_WIDTH != 32) begin
+ $fatal(1, "Unsupported AXI_DATA_WIDTH!");
+ end
+ // pragma translate_on
+ end
+ end
+
+ // Assert that only one 32-bit word is ever written at a time to VA RAMs on 64-bit data
+ // systems.
+ // pragma translate_off
+ always_ff @ (posedge Clk_CI) begin
+ if (AXI_DATA_WIDTH == 64) begin
+ if (l2_addr_is_in_va_rams[j]) begin
+ if (upper_word_is_written[j]) begin
+ assert (!lower_word_is_written[j])
+ else $error("Unsupported write across two 32-bit words to VA RAMs!");
+ end
+ else if (lower_word_is_written[j]) begin
+ assert (!upper_word_is_written[j])
+ else $error("Unsupported write across two 32-bit words to VA RAMs!");
+ end
+ end
+ end
+ end
+ // pragma translate_on
+
+ end // for (j=0; j< N_PORTS; j++)
+ endgenerate
+
+ // ███╗ ███╗██╗ ██╗ ███████╗██╗███████╗ ██████╗ ███████╗
+ // ████╗ ████║██║ ██║ ██╔════╝██║██╔════╝██╔═══██╗██╔════╝
+ // ██╔████╔██║███████║ █████╗ ██║█████╗ ██║ ██║███████╗
+ // ██║╚██╔╝██║██╔══██║ ██╔══╝ ██║██╔══╝ ██║ ██║╚════██║
+ // ██║ ╚═╝ ██║██║ ██║ ██║ ██║██║ ╚██████╔╝███████║
+ // ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═════╝ ╚══════╝
+ //
+ logic [ADDR_WIDTH_VIRT-1:0] AddrFifoDin_D;
+ logic AddrFifoWen_S;
+ logic AddrFifoRen_S;
+ logic [ADDR_WIDTH_VIRT-1:0] AddrFifoDout_D;
+ logic AddrFifoFull_S;
+ logic AddrFifoEmpty_S;
+ logic AddrFifoEmpty_SB;
+ logic AddrFifoFull_SB;
+
+ logic [MISS_META_WIDTH-1:0] MetaFifoDin_D;
+ logic MetaFifoWen_S;
+ logic MetaFifoRen_S;
+ logic [MISS_META_WIDTH-1:0] MetaFifoDout_D;
+ logic MetaFifoFull_S;
+ logic MetaFifoEmpty_S;
+ logic MetaFifoEmpty_SB;
+ logic MetaFifoFull_SB;
+
+ logic FifosDisabled_S;
+ logic ConfRegWen_S;
+ logic [1:0] ConfReg_DN;
+ logic [1:0] ConfReg_DP;
+
+ logic [AXI_DATA_WIDTH-1:0] wdata_reg_vec;
+
+ assign FifosDisabled_S = ConfReg_DP[0];
+ assign L1AllowMultiHit_SO = ConfReg_DP[1];
+
+ assign AddrFifoEmpty_S = ~AddrFifoEmpty_SB;
+ assign MetaFifoEmpty_S = ~MetaFifoEmpty_SB;
+
+ assign AddrFifoFull_S = ~AddrFifoFull_SB;
+ assign MetaFifoFull_S = ~MetaFifoFull_SB;
+
+ assign MhFifoFull_SO = (AddrFifoWen_S & AddrFifoFull_S) | (MetaFifoWen_S & MetaFifoFull_S);
+
+ generate
+ for ( j=0; j<AXI_DATA_WIDTH/8; j++ )
+ assign wdata_reg_vec[(j+1)*8-1:j*8] = wdata_reg[j];
+ endgenerate
+
+ // write address FIFO
+ always_comb
+ begin
+ AddrFifoWen_S = 1'b0;
+ AddrFifoDin_D = 'b0;
+ if ( (Miss_SI == 1'b1) && (FifosDisabled_S == 1'b0) ) // register a new miss
+ begin
+ AddrFifoWen_S = 1'b1;
+ AddrFifoDin_D = MissAddr_DI;
+ end
+ else if ( (wren_l1 == 1'b1) && (awaddr_reg[ADDR_MSB:0] == 'b0) && (FifosDisabled_S == 1'b0)) // write request from AXI interface
+ begin
+ AddrFifoWen_S = 1'b1;
+ AddrFifoDin_D = wdata_reg_vec[ADDR_WIDTH_VIRT-1:0];
+ end
+ end
+
+ // write meta FIFO
+ always_comb
+ begin
+ MetaFifoWen_S = 1'b0;
+ MetaFifoDin_D = 'b0;
+ if ( (Miss_SI == 1'b1) && (FifosDisabled_S == 1'b0) ) // register a new miss
+ begin
+ MetaFifoWen_S = 1'b1;
+ MetaFifoDin_D[MISS_META_WIDTH-1:0] = MissMeta_DI;
+ end
+ else if ( (wren_l1 == 1'b1) && (awaddr_reg[ADDR_MSB:0] == 4'h8) && (FifosDisabled_S == 1'b0) ) // write request from AXI interface
+ begin
+ MetaFifoWen_S = 1'b1;
+ MetaFifoDin_D = wdata_reg_vec[MISS_META_WIDTH-1:0];
+ end
+ end
+
+ // write configuration register
+ always_comb
+ begin
+ ConfRegWen_S = 1'b0;
+ ConfReg_DN = 1'b0;
+ if ( (wren_l1 == 1'b1) && (awaddr_reg[ADDR_MSB:0] == 8'h10) ) // write request from AXI interface
+ begin
+ ConfRegWen_S = 1'b1;
+ ConfReg_DN = wdata_reg_vec[$high(ConfReg_DN):0];
+ end
+ end
+
+ // AXI read data
+ always_comb
+ begin
+ s_axi_rdata = rdata_reg; // read L1 config
+ AddrFifoRen_S = 1'b0;
+ MetaFifoRen_S = 1'b0;
+ if ( rvalid == 1'b1 )
+ begin
+ // read address FIFO
+ if ( araddr_reg[ADDR_MSB:0] == 'b0 )
+ begin
+ s_axi_rdata = {AXI_DATA_WIDTH{1'b0}};
+ s_axi_rdata[ADDR_WIDTH_VIRT-1:0] = AddrFifoDout_D;
+ if ( AddrFifoEmpty_S == 1'b0 )
+ AddrFifoRen_S = 1'b1;
+ end
+ // read meta FIFO
+ else if ( araddr_reg[ADDR_MSB:0] == 4'h8 )
+ begin
+ s_axi_rdata = {AXI_DATA_WIDTH{1'b0}};
+ s_axi_rdata[31] = MetaFifoEmpty_S;
+ s_axi_rdata[MISS_META_WIDTH-1:0] = MetaFifoDout_D;
+ if ( MetaFifoEmpty_S == 1'b0 )
+ MetaFifoRen_S = 1'b1;
+ end
+ // read configuration register
+ else if ( araddr_reg[ADDR_MSB:0] == 8'h10 )
+ begin
+ s_axi_rdata = {AXI_DATA_WIDTH{1'b0}};
+ s_axi_rdata[$high(ConfReg_DP):0] = ConfReg_DP;
+ end
+ end // if ( rvalid == 1'b1 )
+ end // always_comb begin
+
+ // configuration register
+ always_ff @(posedge Clk_CI or negedge Rst_RBI) begin
+ if (Rst_RBI == 1'b0)
+ begin
+ ConfReg_DP <= 'b0;
+ end
+ else if (ConfRegWen_S == 1'b1)
+ begin
+ ConfReg_DP <= ConfReg_DN;
+ end
+ end
+
+ generic_fifo
+ #(
+ .DATA_WIDTH ( ADDR_WIDTH_VIRT ),
+ .DATA_DEPTH ( MH_FIFO_DEPTH )
+ )
+ fifo_addr_i
+ (
+ .clk ( Clk_CI ),
+ .rst_n ( Rst_RBI ),
+ .data_i ( AddrFifoDin_D ),
+ .valid_i ( AddrFifoWen_S & AddrFifoFull_SB ),
+ .grant_o ( AddrFifoFull_SB ),
+ .data_o ( AddrFifoDout_D ),
+ .valid_o ( AddrFifoEmpty_SB ),
+ .grant_i ( AddrFifoRen_S ),
+ .test_mode_i ( 1'b0 )
+ );
+
+ generic_fifo
+ #(
+ .DATA_WIDTH ( MISS_META_WIDTH ),
+ .DATA_DEPTH ( MH_FIFO_DEPTH )
+ )
+ fifo_meta_i
+ (
+ .clk ( Clk_CI ),
+ .rst_n ( Rst_RBI ),
+ .data_i ( MetaFifoDin_D ),
+ .valid_i ( MetaFifoWen_S & MetaFifoFull_SB ),
+ .grant_o ( MetaFifoFull_SB ),
+ .data_o ( MetaFifoDout_D ),
+ .valid_o ( MetaFifoEmpty_SB ),
+ .grant_i ( MetaFifoRen_S ),
+ .test_mode_i ( 1'b0 )
+ );
+"""
+#
+# endmodule
+#
+#
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class axi_rab_top(Elaboratable):
+
+ def __init__(self):
+ self.Clk_CI = Signal() # input
+ self.NonGatedClk_CI = Signal() # input
+ self.Rst_RBI = Signal() # input
+ self.s_axi4_awid = Signal() # input
+ self.s_axi4_awaddr = Signal() # input
+ self.s_axi4_awvalid = Signal(N_PORTS) # input
+ self.s_axi4_awready = Signal(N_PORTS) # output
+ self.s_axi4_awlen = Signal() # input
+ self.s_axi4_awsize = Signal() # input
+ self.s_axi4_awburst = Signal() # input
+ self.s_axi4_awlock = Signal(N_PORTS) # input
+ self.s_axi4_awprot = Signal() # input
+ self.s_axi4_awcache = Signal() # input
+ self.s_axi4_awregion = Signal() # input
+ self.s_axi4_awqos = Signal() # input
+ self.s_axi4_awuser = Signal() # input
+ self.s_axi4_wdata = Signal() # input
+ self.s_axi4_wvalid = Signal(N_PORTS) # input
+ self.s_axi4_wready = Signal(N_PORTS) # output
+ self.s_axi4_wstrb = Signal() # input
+ self.s_axi4_wlast = Signal(N_PORTS) # input
+ self.s_axi4_wuser = Signal() # input
+ self.s_axi4_bid = Signal() # output
+ self.s_axi4_bresp = Signal() # output
+ self.s_axi4_bvalid = Signal(N_PORTS) # output
+ self.s_axi4_buser = Signal() # output
+ self.s_axi4_bready = Signal(N_PORTS) # input
+ self.s_axi4_arid = Signal() # input
+ self.s_axi4_araddr = Signal() # input
+ self.s_axi4_arvalid = Signal(N_PORTS) # input
+ self.s_axi4_arready = Signal(N_PORTS) # output
+ self.s_axi4_arlen = Signal() # input
+ self.s_axi4_arsize = Signal() # input
+ self.s_axi4_arburst = Signal() # input
+ self.s_axi4_arlock = Signal(N_PORTS) # input
+ self.s_axi4_arprot = Signal() # input
+ self.s_axi4_arcache = Signal() # input
+ self.s_axi4_aruser = Signal() # input
+ self.s_axi4_rid = Signal() # output
+ self.s_axi4_rdata = Signal() # output
+ self.s_axi4_rresp = Signal() # output
+ self.s_axi4_rvalid = Signal(N_PORTS) # output
+ self.s_axi4_rready = Signal(N_PORTS) # input
+ self.s_axi4_rlast = Signal(N_PORTS) # output
+ self.s_axi4_ruser = Signal() # output
+ self.m0_axi4_awid = Signal() # output
+ self.m0_axi4_awaddr = Signal() # output
+ self.m0_axi4_awvalid = Signal(N_PORTS) # output
+ self.m0_axi4_awready = Signal(N_PORTS) # input
+ self.m0_axi4_awlen = Signal() # output
+ self.m0_axi4_awsize = Signal() # output
+ self.m0_axi4_awburst = Signal() # output
+ self.m0_axi4_awlock = Signal(N_PORTS) # output
+ self.m0_axi4_awprot = Signal() # output
+ self.m0_axi4_awcache = Signal() # output
+ self.m0_axi4_awregion = Signal() # output
+ self.m0_axi4_awqos = Signal() # output
+ self.m0_axi4_awuser = Signal() # output
+ self.m0_axi4_wdata = Signal() # output
+ self.m0_axi4_wvalid = Signal(N_PORTS) # output
+ self.m0_axi4_wready = Signal(N_PORTS) # input
+ self.m0_axi4_wstrb = Signal() # output
+ self.m0_axi4_wlast = Signal(N_PORTS) # output
+ self.m0_axi4_wuser = Signal() # output
+ self.m0_axi4_bid = Signal() # input
+ self.m0_axi4_bresp = Signal() # input
+ self.m0_axi4_bvalid = Signal(N_PORTS) # input
+ self.m0_axi4_buser = Signal() # input
+ self.m0_axi4_bready = Signal(N_PORTS) # output
+ self.m0_axi4_arid = Signal() # output
+ self.m0_axi4_araddr = Signal() # output
+ self.m0_axi4_arvalid = Signal(N_PORTS) # output
+ self.m0_axi4_arready = Signal(N_PORTS) # input
+ self.m0_axi4_arlen = Signal() # output
+ self.m0_axi4_arsize = Signal() # output
+ self.m0_axi4_arburst = Signal() # output
+ self.m0_axi4_arlock = Signal(N_PORTS) # output
+ self.m0_axi4_arprot = Signal() # output
+ self.m0_axi4_arcache = Signal() # output
+ self.m0_axi4_aruser = Signal() # output
+ self.m0_axi4_rid = Signal() # input
+ self.m0_axi4_rdata = Signal() # input
+ self.m0_axi4_rresp = Signal() # input
+ self.m0_axi4_rvalid = Signal(N_PORTS) # input
+ self.m0_axi4_rready = Signal(N_PORTS) # output
+ self.m0_axi4_rlast = Signal(N_PORTS) # input
+ self.m0_axi4_ruser = Signal() # input
+ self.m1_axi4_awid = Signal() # output
+ self.m1_axi4_awaddr = Signal() # output
+ self.m1_axi4_awvalid = Signal(N_PORTS) # output
+ self.m1_axi4_awready = Signal(N_PORTS) # input
+ self.m1_axi4_awlen = Signal() # output
+ self.m1_axi4_awsize = Signal() # output
+ self.m1_axi4_awburst = Signal() # output
+ self.m1_axi4_awlock = Signal(N_PORTS) # output
+ self.m1_axi4_awprot = Signal() # output
+ self.m1_axi4_awcache = Signal() # output
+ self.m1_axi4_awregion = Signal() # output
+ self.m1_axi4_awqos = Signal() # output
+ self.m1_axi4_awuser = Signal() # output
+ self.m1_axi4_wdata = Signal() # output
+ self.m1_axi4_wvalid = Signal(N_PORTS) # output
+ self.m1_axi4_wready = Signal(N_PORTS) # input
+ self.m1_axi4_wstrb = Signal() # output
+ self.m1_axi4_wlast = Signal(N_PORTS) # output
+ self.m1_axi4_wuser = Signal() # output
+ self.m1_axi4_bid = Signal() # input
+ self.m1_axi4_bresp = Signal() # input
+ self.m1_axi4_bvalid = Signal(N_PORTS) # input
+ self.m1_axi4_buser = Signal() # input
+ self.m1_axi4_bready = Signal(N_PORTS) # output
+ self.m1_axi4_arid = Signal() # output
+ self.m1_axi4_araddr = Signal() # output
+ self.m1_axi4_arvalid = Signal(N_PORTS) # output
+ self.m1_axi4_arready = Signal(N_PORTS) # input
+ self.m1_axi4_arlen = Signal() # output
+ self.m1_axi4_arsize = Signal() # output
+ self.m1_axi4_arburst = Signal() # output
+ self.m1_axi4_arlock = Signal(N_PORTS) # output
+ self.m1_axi4_arprot = Signal() # output
+ self.m1_axi4_arcache = Signal() # output
+ self.m1_axi4_aruser = Signal() # output
+ self.m1_axi4_rid = Signal() # input
+ self.m1_axi4_rdata = Signal() # input
+ self.m1_axi4_rresp = Signal() # input
+ self.m1_axi4_rvalid = Signal(N_PORTS) # input
+ self.m1_axi4_rready = Signal(N_PORTS) # output
+ self.m1_axi4_rlast = Signal(N_PORTS) # input
+ self.m1_axi4_ruser = Signal() # input
+ self.s_axi4lite_awaddr = Signal(AXI_LITE_ADDR_WIDTH) # input
+ self.s_axi4lite_awvalid = Signal() # input
+ self.s_axi4lite_awready = Signal() # output
+ self.s_axi4lite_wdata = Signal(AXI_LITE_DATA_WIDTH) # input
+ self.s_axi4lite_wvalid = Signal() # input
+ self.s_axi4lite_wready = Signal() # output
+ self.s_axi4lite_wstrb = Signal(1+ERROR p_expression_25) # input
+ self.s_axi4lite_bresp = Signal(2) # output
+ self.s_axi4lite_bvalid = Signal() # output
+ self.s_axi4lite_bready = Signal() # input
+ self.s_axi4lite_araddr = Signal(AXI_LITE_ADDR_WIDTH) # input
+ self.s_axi4lite_arvalid = Signal() # input
+ self.s_axi4lite_arready = Signal() # output
+ self.s_axi4lite_rdata = Signal(AXI_LITE_DATA_WIDTH) # output
+ self.s_axi4lite_rresp = Signal(2) # output
+ self.s_axi4lite_rvalid = Signal() # output
+ self.s_axi4lite_rready = Signal() # input
+ self.int_miss = Signal(N_PORTS) # output
+ self.int_multi = Signal(N_PORTS) # output
+ self.int_prot = Signal(N_PORTS) # output
+ self.int_mhf_full = Signal() # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# // --=========================================================================--
+# //
+# // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ████████╗ ██████╗ ██████╗
+# // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ╚══██╔══╝██╔═══██╗██╔══██╗
+# // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ ██║ ██║██████╔╝
+# // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██║ ██║██╔═══╝
+# // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ██║ ╚██████╔╝██║
+# // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═════╝ ╚═╝
+# //
+# // --=========================================================================--
+# /*
+# * axi_rab_top
+# *
+# * The remapping address block (RAB) performs address translation for AXI
+# * transactions arriving at the input port and forwards them to different
+# * downstream AXI ports.
+# *
+# * The five axi channels are each buffered on the input side using a FIFO,
+# * described in axi4_XX_buffer. The RAB lookup result is merged into the
+# * AXI transaction via the axi4_XX_sender instances, which manages upstream
+# * error signaling for failed lookups.
+# *
+# * Address translation is performed based on data stored in up to two
+# * translation lookaside buffers (TLBs), which are private per RAB port (each
+# * of which having two AXI master ports and one AXI slave port). These TLBs
+# * are managed in software through the AXI-Lite interface.
+# *
+# * If ACP is enabled, the `cache_coherent` flag in the TLBs is used to
+# * multiplex between the two ports. If ACP is disabled, only the first master
+# * port is used. In this case, the `cache_coherent` flag is used to set the
+# * AxCACHE signals of the AXI bus accordingly.
+# *
+# * Authors:
+# * Antonio Pullini <pullinia@iis.ee.ethz.ch>
+# * Conrad Burchert <bconrad@ethz.ch>
+# * Maheshwara Sharma <msharma@student.ethz.ch>
+# * Andreas Kurth <akurth@iis.ee.ethz.ch>
+# * Johannes Weinbuch <jweinbuch@student.ethz.ch>
+# * Pirmin Vogel <vogelpi@iis.ee.ethz.ch>
+# */
+#
+# //`include "pulp_soc_defines.sv"
+#
+# ////import CfMath::log2;
+#
+# module axi_rab_top
+#
+# // Parameters {{{
+# #(
+# parameter N_PORTS = 2,
+# parameter N_L2_SETS = 32,
+# parameter N_L2_SET_ENTRIES = 32,
+# parameter AXI_DATA_WIDTH = 64,
+# parameter AXI_S_ADDR_WIDTH = 32,
+# parameter AXI_M_ADDR_WIDTH = 40,
+# parameter AXI_LITE_DATA_WIDTH = 64,
+# parameter AXI_LITE_ADDR_WIDTH = 32,
+# parameter AXI_ID_WIDTH = 10,
+# parameter AXI_USER_WIDTH = 6,
+# parameter MH_FIFO_DEPTH = 16
+# )
+# // }}}
+#
+# // Ports {{{
+# (
+#
+# input logic Clk_CI, // This clock may be gated.
+# input logic NonGatedClk_CI,
+# input logic Rst_RBI,
+#
+# // For every slave port there are two master ports. The master
+# // port to use can be set using the master_select flag of the protection
+# // bits of a slice
+#
+# // AXI4 Slave {{{
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_awid,
+# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] s_axi4_awaddr,
+# input logic [N_PORTS-1:0] s_axi4_awvalid,
+# output logic [N_PORTS-1:0] s_axi4_awready,
+# input logic [N_PORTS-1:0] [7:0] s_axi4_awlen,
+# input logic [N_PORTS-1:0] [2:0] s_axi4_awsize,
+# input logic [N_PORTS-1:0] [1:0] s_axi4_awburst,
+# input logic [N_PORTS-1:0] s_axi4_awlock,
+# input logic [N_PORTS-1:0] [2:0] s_axi4_awprot,
+# input logic [N_PORTS-1:0] [3:0] s_axi4_awcache,
+# input logic [N_PORTS-1:0] [3:0] s_axi4_awregion,
+# input logic [N_PORTS-1:0] [3:0] s_axi4_awqos,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_awuser,
+#
+# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] s_axi4_wdata,
+# input logic [N_PORTS-1:0] s_axi4_wvalid,
+# output logic [N_PORTS-1:0] s_axi4_wready,
+# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb,
+# input logic [N_PORTS-1:0] s_axi4_wlast,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_wuser,
+#
+# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_bid,
+# output logic [N_PORTS-1:0] [1:0] s_axi4_bresp,
+# output logic [N_PORTS-1:0] s_axi4_bvalid,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_buser,
+# input logic [N_PORTS-1:0] s_axi4_bready,
+#
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_arid,
+# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] s_axi4_araddr,
+# input logic [N_PORTS-1:0] s_axi4_arvalid,
+# output logic [N_PORTS-1:0] s_axi4_arready,
+# input logic [N_PORTS-1:0] [7:0] s_axi4_arlen,
+# input logic [N_PORTS-1:0] [2:0] s_axi4_arsize,
+# input logic [N_PORTS-1:0] [1:0] s_axi4_arburst,
+# input logic [N_PORTS-1:0] s_axi4_arlock,
+# input logic [N_PORTS-1:0] [2:0] s_axi4_arprot,
+# input logic [N_PORTS-1:0] [3:0] s_axi4_arcache,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_aruser,
+#
+# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_rid,
+# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] s_axi4_rdata,
+# output logic [N_PORTS-1:0] [1:0] s_axi4_rresp,
+# output logic [N_PORTS-1:0] s_axi4_rvalid,
+# input logic [N_PORTS-1:0] s_axi4_rready,
+# output logic [N_PORTS-1:0] s_axi4_rlast,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_ruser,
+# // }}}
+#
+# // AXI4 Master 0 {{{
+# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_awid,
+# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m0_axi4_awaddr,
+# output logic [N_PORTS-1:0] m0_axi4_awvalid,
+# input logic [N_PORTS-1:0] m0_axi4_awready,
+# output logic [N_PORTS-1:0] [7:0] m0_axi4_awlen,
+# output logic [N_PORTS-1:0] [2:0] m0_axi4_awsize,
+# output logic [N_PORTS-1:0] [1:0] m0_axi4_awburst,
+# output logic [N_PORTS-1:0] m0_axi4_awlock,
+# output logic [N_PORTS-1:0] [2:0] m0_axi4_awprot,
+# output logic [N_PORTS-1:0] [3:0] m0_axi4_awcache,
+# output logic [N_PORTS-1:0] [3:0] m0_axi4_awregion,
+# output logic [N_PORTS-1:0] [3:0] m0_axi4_awqos,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_awuser,
+#
+# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m0_axi4_wdata,
+# output logic [N_PORTS-1:0] m0_axi4_wvalid,
+# input logic [N_PORTS-1:0] m0_axi4_wready,
+# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] m0_axi4_wstrb,
+# output logic [N_PORTS-1:0] m0_axi4_wlast,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_wuser,
+#
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_bid,
+# input logic [N_PORTS-1:0] [1:0] m0_axi4_bresp,
+# input logic [N_PORTS-1:0] m0_axi4_bvalid,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_buser,
+# output logic [N_PORTS-1:0] m0_axi4_bready,
+#
+# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_arid,
+# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m0_axi4_araddr,
+# output logic [N_PORTS-1:0] m0_axi4_arvalid,
+# input logic [N_PORTS-1:0] m0_axi4_arready,
+# output logic [N_PORTS-1:0] [7:0] m0_axi4_arlen,
+# output logic [N_PORTS-1:0] [2:0] m0_axi4_arsize,
+# output logic [N_PORTS-1:0] [1:0] m0_axi4_arburst,
+# output logic [N_PORTS-1:0] m0_axi4_arlock,
+# output logic [N_PORTS-1:0] [2:0] m0_axi4_arprot,
+# output logic [N_PORTS-1:0] [3:0] m0_axi4_arcache,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_aruser,
+#
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_rid,
+# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m0_axi4_rdata,
+# input logic [N_PORTS-1:0] [1:0] m0_axi4_rresp,
+# input logic [N_PORTS-1:0] m0_axi4_rvalid,
+# output logic [N_PORTS-1:0] m0_axi4_rready,
+# input logic [N_PORTS-1:0] m0_axi4_rlast,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_ruser,
+# // }}}
+#
+# // AXI4 Master 1 {{{
+# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_awid,
+# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m1_axi4_awaddr,
+# output logic [N_PORTS-1:0] m1_axi4_awvalid,
+# input logic [N_PORTS-1:0] m1_axi4_awready,
+# output logic [N_PORTS-1:0] [7:0] m1_axi4_awlen,
+# output logic [N_PORTS-1:0] [2:0] m1_axi4_awsize,
+# output logic [N_PORTS-1:0] [1:0] m1_axi4_awburst,
+# output logic [N_PORTS-1:0] m1_axi4_awlock,
+# output logic [N_PORTS-1:0] [2:0] m1_axi4_awprot,
+# output logic [N_PORTS-1:0] [3:0] m1_axi4_awcache,
+# output logic [N_PORTS-1:0] [3:0] m1_axi4_awregion,
+# output logic [N_PORTS-1:0] [3:0] m1_axi4_awqos,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_awuser,
+#
+# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m1_axi4_wdata,
+# output logic [N_PORTS-1:0] m1_axi4_wvalid,
+# input logic [N_PORTS-1:0] m1_axi4_wready,
+# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] m1_axi4_wstrb,
+# output logic [N_PORTS-1:0] m1_axi4_wlast,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_wuser,
+#
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_bid,
+# input logic [N_PORTS-1:0] [1:0] m1_axi4_bresp,
+# input logic [N_PORTS-1:0] m1_axi4_bvalid,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_buser,
+# output logic [N_PORTS-1:0] m1_axi4_bready,
+#
+# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_arid,
+# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m1_axi4_araddr,
+# output logic [N_PORTS-1:0] m1_axi4_arvalid,
+# input logic [N_PORTS-1:0] m1_axi4_arready,
+# output logic [N_PORTS-1:0] [7:0] m1_axi4_arlen,
+# output logic [N_PORTS-1:0] [2:0] m1_axi4_arsize,
+# output logic [N_PORTS-1:0] [1:0] m1_axi4_arburst,
+# output logic [N_PORTS-1:0] m1_axi4_arlock,
+# output logic [N_PORTS-1:0] [2:0] m1_axi4_arprot,
+# output logic [N_PORTS-1:0] [3:0] m1_axi4_arcache,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_aruser,
+#
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_rid,
+# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m1_axi4_rdata,
+# input logic [N_PORTS-1:0] [1:0] m1_axi4_rresp,
+# input logic [N_PORTS-1:0] m1_axi4_rvalid,
+# output logic [N_PORTS-1:0] m1_axi4_rready,
+# input logic [N_PORTS-1:0] m1_axi4_rlast,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_ruser,
+# // }}}
+#
+# // AXI 4 Lite Slave (Configuration Interface) {{{
+# // AXI4-Lite port to setup the rab slices
+# // use this to program the configuration registers
+# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi4lite_awaddr,
+# input logic s_axi4lite_awvalid,
+# output logic s_axi4lite_awready,
+#
+# input logic [AXI_LITE_DATA_WIDTH-1:0] s_axi4lite_wdata,
+# input logic s_axi4lite_wvalid,
+# output logic s_axi4lite_wready,
+# input logic [AXI_LITE_DATA_WIDTH/8-1:0] s_axi4lite_wstrb,
+#
+# output logic [1:0] s_axi4lite_bresp,
+# output logic s_axi4lite_bvalid,
+# input logic s_axi4lite_bready,
+#
+# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi4lite_araddr,
+# input logic s_axi4lite_arvalid,
+# output logic s_axi4lite_arready,
+#
+# output logic [AXI_LITE_DATA_WIDTH-1:0] s_axi4lite_rdata,
+# output logic [1:0] s_axi4lite_rresp,
+# output logic s_axi4lite_rvalid,
+# input logic s_axi4lite_rready,
+# // }}}
+#
+# // BRAMs {{{
+# //`ifdef RAB_AX_LOG_EN
+# // BramPort.Slave ArBram_PS,
+# // BramPort.Slave AwBram_PS,
+# //`endif
+# // }}}
+#
+# // Logger Control {{{
+# //`ifdef RAB_AX_LOG_EN
+# // input logic LogEn_SI,
+# // input logic ArLogClr_SI,
+# // input logic AwLogClr_SI,
+# // output logic ArLogRdy_SO,
+# // output logic AwLogRdy_SO,
+# //`endif
+# // }}}
+#
+# // Interrupt Outputs {{{
+# // Interrupt lines to handle misses, collisions of slices/multiple hits,
+# // protection faults and overflow of the miss handling fifo
+# //`ifdef RAB_AX_LOG_EN
+# // output logic int_ar_log_full,
+# // output logic int_aw_log_full,
+# //`endif
+# output logic [N_PORTS-1:0] int_miss,
+# output logic [N_PORTS-1:0] int_multi,
+# output logic [N_PORTS-1:0] int_prot,
+# output logic int_mhf_full
+# // }}}
+#
+# );
+#
+"""#docstring_begin
+
+ // }}}
+
+ // Signals {{{
+ // ███████╗██╗ ██████╗ ███╗ ██╗ █████╗ ██╗ ███████╗
+ // ██╔════╝██║██╔════╝ ████╗ ██║██╔══██╗██║ ██╔════╝
+ // ███████╗██║██║ ███╗██╔██╗ ██║███████║██║ ███████╗
+ // ╚════██║██║██║ ██║██║╚██╗██║██╔══██║██║ ╚════██║
+ // ███████║██║╚██████╔╝██║ ╚████║██║ ██║███████╗███████║
+ // ╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚══════╝
+ //
+
+ // Internal AXI4 lines, these connect buffers on the slave side to the rab core and
+ // multiplexers which switch between the two master outputs
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_awid;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_awaddr;
+ logic [N_PORTS-1:0] int_awvalid;
+ logic [N_PORTS-1:0] int_awready;
+ logic [N_PORTS-1:0] [7:0] int_awlen;
+ logic [N_PORTS-1:0] [2:0] int_awsize;
+ logic [N_PORTS-1:0] [1:0] int_awburst;
+ logic [N_PORTS-1:0] int_awlock;
+ logic [N_PORTS-1:0] [2:0] int_awprot;
+ logic [N_PORTS-1:0] [3:0] int_awcache;
+ logic [N_PORTS-1:0] [3:0] int_awregion;
+ logic [N_PORTS-1:0] [3:0] int_awqos;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_awuser;
+
+ logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_wdata;
+ logic [N_PORTS-1:0] int_wvalid;
+ logic [N_PORTS-1:0] int_wready;
+ logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] int_wstrb;
+ logic [N_PORTS-1:0] int_wlast;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_wuser;
+
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_bid;
+ logic [N_PORTS-1:0] [1:0] int_bresp;
+ logic [N_PORTS-1:0] int_bvalid;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_buser;
+ logic [N_PORTS-1:0] int_bready;
+
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_arid;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_araddr;
+ logic [N_PORTS-1:0] int_arvalid;
+ logic [N_PORTS-1:0] int_arready;
+ logic [N_PORTS-1:0] [7:0] int_arlen;
+ logic [N_PORTS-1:0] [2:0] int_arsize;
+ logic [N_PORTS-1:0] [1:0] int_arburst;
+ logic [N_PORTS-1:0] int_arlock;
+ logic [N_PORTS-1:0] [2:0] int_arprot;
+ logic [N_PORTS-1:0] [3:0] int_arcache;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_aruser;
+
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_rid;
+ logic [N_PORTS-1:0] [1:0] int_rresp;
+ logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_rdata;
+ logic [N_PORTS-1:0] int_rlast;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_ruser;
+ logic [N_PORTS-1:0] int_rvalid;
+ logic [N_PORTS-1:0] int_rready;
+
+ // rab_core outputs
+ logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] int_wtrans_addr;
+ logic [N_PORTS-1:0] int_wtrans_accept;
+ logic [N_PORTS-1:0] int_wtrans_drop;
+ logic [N_PORTS-1:0] int_wtrans_miss;
+ logic [N_PORTS-1:0] int_wtrans_sent;
+ logic [N_PORTS-1:0] int_wtrans_cache_coherent;
+ logic [N_PORTS-1:0] int_wmaster_select;
+
+ logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] int_rtrans_addr;
+ logic [N_PORTS-1:0] int_rtrans_accept;
+ logic [N_PORTS-1:0] int_rtrans_drop;
+ logic [N_PORTS-1:0] int_rtrans_miss;
+ logic [N_PORTS-1:0] int_rtrans_sent;
+ logic [N_PORTS-1:0] int_rtrans_cache_coherent;
+ logic [N_PORTS-1:0] int_rmaster_select;
+
+ logic [N_PORTS-1:0] w_master_select;
+
+ // Internal master0 AXI4 lines. These connect the first master port to the
+ // multiplexers
+ // For channels read address, write address and write data the other lines
+ // are ignored if valid is not set, therefore we only need to multiplex those
+ logic [N_PORTS-1:0] int_m0_awvalid;
+ logic [N_PORTS-1:0] int_m0_awready;
+
+ logic [N_PORTS-1:0] int_m0_wvalid;
+ logic [N_PORTS-1:0] int_m0_wready;
+
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m0_bid;
+ logic [N_PORTS-1:0] [1:0] int_m0_bresp;
+ logic [N_PORTS-1:0] int_m0_bvalid;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m0_buser;
+ logic [N_PORTS-1:0] int_m0_bready;
+
+ logic [N_PORTS-1:0] int_m0_arvalid;
+ logic [N_PORTS-1:0] int_m0_arready;
+
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m0_rid;
+ logic [N_PORTS-1:0] [1:0] int_m0_rresp;
+ logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_m0_rdata;
+ logic [N_PORTS-1:0] int_m0_rlast;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m0_ruser;
+ logic [N_PORTS-1:0] int_m0_rready;
+ logic [N_PORTS-1:0] int_m0_rvalid;
+
+ logic [N_PORTS-1:0] l1_m0_ar_accept;
+ logic [N_PORTS-1:0] l1_m0_ar_drop;
+ logic [N_PORTS-1:0] l1_m0_ar_save;
+ logic [N_PORTS-1:0] l1_m0_ar_done;
+ logic [N_PORTS-1:0] l2_m0_ar_accept;
+ logic [N_PORTS-1:0] l2_m0_ar_drop;
+ logic [N_PORTS-1:0] l2_m0_ar_done;
+ logic [N_PORTS-1:0] l2_m0_ar_sending;
+
+ logic [N_PORTS-1:0] l1_m0_aw_accept;
+ logic [N_PORTS-1:0] l1_m0_aw_drop;
+ logic [N_PORTS-1:0] l1_m0_aw_save;
+ logic [N_PORTS-1:0] l1_m0_aw_done;
+ logic [N_PORTS-1:0] l2_m0_aw_accept;
+ logic [N_PORTS-1:0] l2_m0_aw_drop;
+ logic [N_PORTS-1:0] l2_m0_aw_done;
+ logic [N_PORTS-1:0] l2_m0_aw_sending;
+
+ // Internal master1 AXI4 lines. These connect the second master port to the
+ // multiplexers
+ // For channels read address, write address and write data the other lines
+ // are ignored if valid is not set, therefore we only need to multiplex those
+ logic [N_PORTS-1:0] int_m1_awvalid;
+ logic [N_PORTS-1:0] int_m1_awready;
+
+ logic [N_PORTS-1:0] int_m1_wvalid;
+ logic [N_PORTS-1:0] int_m1_wready;
+
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m1_bid;
+ logic [N_PORTS-1:0] [1:0] int_m1_bresp;
+ logic [N_PORTS-1:0] int_m1_bvalid;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m1_buser;
+ logic [N_PORTS-1:0] int_m1_bready;
+
+ logic [N_PORTS-1:0] int_m1_arvalid;
+ logic [N_PORTS-1:0] int_m1_arready;
+
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m1_rid;
+ logic [N_PORTS-1:0] [1:0] int_m1_rresp;
+ logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_m1_rdata;
+ logic [N_PORTS-1:0] int_m1_rlast;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m1_ruser;
+ logic [N_PORTS-1:0] int_m1_rvalid;
+ logic [N_PORTS-1:0] int_m1_rready;
+
+ logic [N_PORTS-1:0] l1_m1_ar_accept;
+ logic [N_PORTS-1:0] l1_m1_ar_drop;
+ logic [N_PORTS-1:0] l1_m1_ar_save;
+ logic [N_PORTS-1:0] l1_m1_ar_done;
+ logic [N_PORTS-1:0] l2_m1_ar_accept;
+ logic [N_PORTS-1:0] l2_m1_ar_drop;
+ logic [N_PORTS-1:0] l2_m1_ar_done;
+
+ logic [N_PORTS-1:0] l1_m1_aw_accept;
+ logic [N_PORTS-1:0] l1_m1_aw_drop;
+ logic [N_PORTS-1:0] l1_m1_aw_save;
+ logic [N_PORTS-1:0] l1_m1_aw_done;
+ logic [N_PORTS-1:0] l2_m1_aw_accept;
+ logic [N_PORTS-1:0] l2_m1_aw_drop;
+ logic [N_PORTS-1:0] l2_m1_aw_done;
+
+ // L1 outputs
+ logic [N_PORTS-1:0] rab_miss; // L1 RAB miss
+ logic [N_PORTS-1:0] rab_prot;
+ logic [N_PORTS-1:0] rab_multi;
+ logic [N_PORTS-1:0] rab_prefetch;
+
+ //
+ // Signals used to support L2 TLB
+ //
+ // L2 RAM configuration signals
+ logic [N_PORTS-1:0] [AXI_LITE_DATA_WIDTH-1:0] L2CfgWData_D;
+ logic [N_PORTS-1:0] [AXI_LITE_ADDR_WIDTH-1:0] L2CfgWAddr_D;
+ logic [N_PORTS-1:0] L2CfgWE_S;
+
+ // L1 output and drop Buffer
+ logic [N_PORTS-1:0] L1OutRwType_D, L1DropRwType_DP;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L1OutUser_D, L1DropUser_DP;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L1OutId_D, L1DropId_DP;
+ logic [N_PORTS-1:0] [7:0] L1OutLen_D, L1DropLen_DP;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L1OutAddr_D, L1DropAddr_DP;
+ logic [N_PORTS-1:0] L1OutProt_D, L1DropProt_DP;
+ logic [N_PORTS-1:0] L1OutMulti_D, L1DropMulti_DP;
+ logic [N_PORTS-1:0] L1DropEn_S;
+ logic [N_PORTS-1:0] L1DropPrefetch_S;
+
+ logic [N_PORTS-1:0] L1DropValid_SN, L1DropValid_SP;
+
+ // L2 input Buffer
+ logic [N_PORTS-1:0] L2InRwType_DP;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L2InUser_DP;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L2InId_DP;
+ logic [N_PORTS-1:0] [7:0] L2InLen_DP;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L2InAddr_DP;
+ logic [N_PORTS-1:0] L2InEn_S;
+
+ // L2 output Buffer
+ logic [N_PORTS-1:0] L2OutRwType_DP;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L2OutUser_DP;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L2OutId_DP;
+ logic [N_PORTS-1:0] [7:0] L2OutLen_DP;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L2OutInAddr_DP;
+
+ logic [N_PORTS-1:0] L2OutHit_SN, L2OutHit_SP;
+ logic [N_PORTS-1:0] L2OutMiss_SN, L2OutMiss_SP;
+ logic [N_PORTS-1:0] L2OutProt_SN, L2OutProt_SP;
+ logic [N_PORTS-1:0] L2OutMulti_SN, L2OutMulti_SP;
+ logic [N_PORTS-1:0] L2OutCC_SN, L2OutCC_SP;
+ logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] L2OutAddr_DN, L2OutAddr_DP;
+
+ logic [N_PORTS-1:0] L2OutValid_SN, L2OutValid_SP;
+ logic [N_PORTS-1:0] L2OutPrefetch_S;
+ logic [N_PORTS-1:0] L2OutReady_S;
+ logic [N_PORTS-1:0] L2OutEn_S;
+
+ // L2 outputs
+ logic [N_PORTS-1:0] L2Busy_S;
+ logic [N_PORTS-1:0] L2OutValid_S;
+
+ logic [N_PORTS-1:0] L2Miss_S;
+
+ // Signals for interfacing the AXI modules
+ logic [N_PORTS-1:0] l1_ar_accept;
+ logic [N_PORTS-1:0] l1_aw_accept;
+ logic [N_PORTS-1:0] l1_w_accept;
+ logic [N_PORTS-1:0] l1_xw_accept;
+
+ logic [N_PORTS-1:0] l1_ar_drop;
+ logic [N_PORTS-1:0] l1_aw_drop;
+ logic [N_PORTS-1:0] l1_w_drop;
+ logic [N_PORTS-1:0] l1_xw_drop;
+
+ logic [N_PORTS-1:0] l1_ar_save;
+ logic [N_PORTS-1:0] l1_aw_save;
+ logic [N_PORTS-1:0] l1_w_save;
+ logic [N_PORTS-1:0] l1_xw_save;
+
+ logic [N_PORTS-1:0] l1_ar_done;
+ logic [N_PORTS-1:0] l1_r_done;
+ logic [N_PORTS-1:0] l1_r_drop;
+ logic [N_PORTS-1:0] lx_r_drop;
+ logic [N_PORTS-1:0] lx_r_done;
+
+ logic [N_PORTS-1:0] l1_aw_done;
+ logic [N_PORTS-1:0] l1_w_done;
+ logic [N_PORTS-1:0] l1_xw_done;
+ logic [N_PORTS-1:0] l1_aw_done_SP;
+ logic [N_PORTS-1:0] l1_w_done_SP;
+
+ logic [N_PORTS-1:0] l2_ar_accept;
+ logic [N_PORTS-1:0] l2_aw_accept;
+ logic [N_PORTS-1:0] l2_w_accept;
+ logic [N_PORTS-1:0] l2_xw_accept;
+
+ logic [N_PORTS-1:0] l2_ar_drop;
+ logic [N_PORTS-1:0] l2_r_drop;
+ logic [N_PORTS-1:0] l2_xr_drop;
+ logic [N_PORTS-1:0] l2_aw_drop;
+ logic [N_PORTS-1:0] l2_w_drop;
+ logic [N_PORTS-1:0] l2_xw_drop;
+
+ logic [N_PORTS-1:0] l2_aw_done;
+ logic [N_PORTS-1:0] l2_w_done;
+ logic [N_PORTS-1:0] l2_xw_done;
+ logic [N_PORTS-1:0] l2_aw_done_SP;
+ logic [N_PORTS-1:0] l2_w_done_SP;
+
+ logic [N_PORTS-1:0] l2_ar_done;
+ logic [N_PORTS-1:0] l2_r_done;
+ logic [N_PORTS-1:0] l2_xr_done;
+ logic [N_PORTS-1:0] l2_ar_done_SP;
+ logic [N_PORTS-1:0] l2_r_done_SP;
+
+ logic [N_PORTS-1:0] l1_mx_aw_done;
+ logic [N_PORTS-1:0] l1_mx_ar_done;
+ logic [N_PORTS-1:0] l1_m0_aw_done_SP;
+ logic [N_PORTS-1:0] l1_m0_ar_done_SP;
+ logic [N_PORTS-1:0] l1_m1_aw_done_SP;
+ logic [N_PORTS-1:0] l1_m1_ar_done_SP;
+
+ logic [N_PORTS-1:0] l2_mx_aw_done;
+ logic [N_PORTS-1:0] l2_mx_ar_done;
+ logic [N_PORTS-1:0] l2_m0_aw_done_SP;
+ logic [N_PORTS-1:0] l2_m0_ar_done_SP;
+ logic [N_PORTS-1:0] l2_m1_aw_done_SP;
+ logic [N_PORTS-1:0] l2_m1_ar_done_SP;
+
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] l1_id_drop, lx_id_drop, b_id_drop;
+ logic [N_PORTS-1:0] [7:0] l1_len_drop, lx_len_drop;
+ logic [N_PORTS-1:0] l1_prefetch_drop, lx_prefetch_drop, b_prefetch_drop;
+ logic [N_PORTS-1:0] l1_hit_drop, lx_hit_drop, b_hit_drop;
+
+ logic [N_PORTS-1:0] b_drop;
+ logic [N_PORTS-1:0] b_done;
+
+ logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] l2_aw_addr;
+ logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] l2_ar_addr;
+
+ logic [N_PORTS-1:0] l2_cache_coherent;
+ logic [N_PORTS-1:0] l2_master_select;
+
+ logic [N_PORTS-1:0] aw_in_stall;
+ logic [N_PORTS-1:0] aw_out_stall;
+
+ genvar i;
+
+ // RRESP FSM
+ typedef enum logic {IDLE, BUSY} r_resp_mux_ctrl_state_t;
+ r_resp_mux_ctrl_state_t [N_PORTS-1:0] RRespMuxCtrl_SN, RRespMuxCtrl_SP;
+ logic [N_PORTS-1:0] RRespSel_SN, RRespSel_SP;
+ logic [N_PORTS-1:0] RRespBurst_S;
+ logic [N_PORTS-1:0] RRespSelIm_S;
+
+ // }}}
+
+ // Local parameters {{{
+
+ // Enable L2 for select ports
+ localparam integer ENABLE_L2TLB[N_PORTS-1:0] = `EN_L2TLB_ARRAY;
+
+ // L2TLB parameters
+ localparam integer HUM_BUFFER_DEPTH = (N_L2_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS)+13;
+
+ // }}}
+
+ // Derive `master_select` from cache coherency flag. {{{
+ `ifdef EN_ACP
+ assign int_wmaster_select = int_wtrans_cache_coherent;
+ assign int_rmaster_select = int_rtrans_cache_coherent;
+ assign l2_master_select = l2_cache_coherent;
+ `else
+ assign int_wmaster_select = '0;
+ assign int_rmaster_select = '0;
+ assign l2_master_select = '0;
+ `endif
+ // }}}
+
+ // Buf and Send {{{
+ // ██████╗ ██╗ ██╗███████╗ ██╗ ███████╗███████╗███╗ ██╗██████╗
+ // ██╔══██╗██║ ██║██╔════╝ ██║ ██╔════╝██╔════╝████╗ ██║██╔══██╗
+ // ██████╔╝██║ ██║█████╗ ████████╗ ███████╗█████╗ ██╔██╗ ██║██║ ██║
+ // ██╔══██╗██║ ██║██╔══╝ ██╔═██╔═╝ ╚════██║██╔══╝ ██║╚██╗██║██║ ██║
+ // ██████╔╝╚██████╔╝██║ ██████║ ███████║███████╗██║ ╚████║██████╔╝
+ // ╚═════╝ ╚═════╝ ╚═╝ ╚═════╝ ╚══════╝╚══════╝╚═╝ ╚═══╝╚═════╝
+ //
+ logic[N_PORTS-1:0] m0_write_is_burst, m0_read_is_burst;
+ logic[N_PORTS-1:0] m1_write_is_burst, m1_read_is_burst;
+
+ generate for (i = 0; i < N_PORTS; i++) begin : BUF_AND_SEND
+
+ // Write Address channel (aw) {{{
+ /*
+ * write address channel (aw)
+ *
+ * ██╗ ██╗██████╗ ██╗████████╗███████╗ █████╗ ██████╗ ██████╗ ██████╗
+ * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔══██╗██╔══██╗██╔══██╗
+ * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ███████║██║ ██║██║ ██║██████╔╝
+ * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██╔══██║██║ ██║██║ ██║██╔══██╗
+ * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██║ ██║██████╔╝██████╔╝██║ ██║
+ * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝
+ *
+ */
+
+ axi4_aw_buffer
+ #(
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH )
+ )
+ u_aw_buffer
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_awid ( s_axi4_awid[i] ),
+ .s_axi4_awaddr ( s_axi4_awaddr[i] ),
+ .s_axi4_awvalid ( s_axi4_awvalid[i] ),
+ .s_axi4_awready ( s_axi4_awready[i] ),
+ .s_axi4_awlen ( s_axi4_awlen[i] ),
+ .s_axi4_awsize ( s_axi4_awsize[i] ),
+ .s_axi4_awburst ( s_axi4_awburst[i] ),
+ .s_axi4_awlock ( s_axi4_awlock[i] ),
+ .s_axi4_awprot ( s_axi4_awprot[i] ),
+ .s_axi4_awcache ( s_axi4_awcache[i] ),
+ .s_axi4_awregion ( s_axi4_awregion[i] ),
+ .s_axi4_awqos ( s_axi4_awqos[i] ),
+ .s_axi4_awuser ( s_axi4_awuser[i] ),
+ .m_axi4_awid ( int_awid[i] ),
+ .m_axi4_awaddr ( int_awaddr[i] ),
+ .m_axi4_awvalid ( int_awvalid[i] ),
+ .m_axi4_awready ( int_awready[i] ),
+ .m_axi4_awlen ( int_awlen[i] ),
+ .m_axi4_awsize ( int_awsize[i] ),
+ .m_axi4_awburst ( int_awburst[i] ),
+ .m_axi4_awlock ( int_awlock[i] ),
+ .m_axi4_awprot ( int_awprot[i] ),
+ .m_axi4_awcache ( int_awcache[i] ),
+ .m_axi4_awregion ( int_awregion[i] ),
+ .m_axi4_awqos ( int_awqos[i] ),
+ .m_axi4_awuser ( int_awuser[i] )
+ );
+
+ axi4_aw_sender
+ #(
+ .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ),
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH ),
+ .ENABLE_L2TLB ( ENABLE_L2TLB[i] )
+ )
+ u_aw_sender_m0
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .l1_done_o ( l1_m0_aw_done[i] ),
+ .l1_accept_i ( l1_m0_aw_accept[i] ),
+ .l1_drop_i ( l1_m0_aw_drop[i] ),
+ .l1_save_i ( l1_m0_aw_save[i] ),
+ .l2_done_o ( l2_m0_aw_done[i] ),
+ .l2_accept_i ( l2_m0_aw_accept[i] ),
+ .l2_drop_i ( l2_m0_aw_drop[i] ),
+ .l2_sending_o ( l2_m0_aw_sending[i] ),
+ .l1_awaddr_i ( int_wtrans_addr[i] ),
+ .l2_awaddr_i ( l2_aw_addr[i] ),
+ .s_axi4_awid ( int_awid[i] ),
+ .s_axi4_awvalid ( int_m0_awvalid[i] ),
+ .s_axi4_awready ( int_m0_awready[i] ),
+ .s_axi4_awlen ( int_awlen[i] ),
+ .s_axi4_awsize ( int_awsize[i] ),
+ .s_axi4_awburst ( int_awburst[i] ),
+ .s_axi4_awlock ( int_awlock[i] ),
+ .s_axi4_awprot ( int_awprot[i] ),
+ .s_axi4_awcache ( int_awcache[i] ),
+ .s_axi4_awregion ( int_awregion[i] ),
+ .s_axi4_awqos ( int_awqos[i] ),
+ .s_axi4_awuser ( int_awuser[i] ),
+ .m_axi4_awid ( m0_axi4_awid[i] ),
+ .m_axi4_awaddr ( m0_axi4_awaddr[i] ),
+ .m_axi4_awvalid ( m0_axi4_awvalid[i] ),
+ .m_axi4_awready ( m0_axi4_awready[i] ),
+ .m_axi4_awlen ( m0_axi4_awlen[i] ),
+ .m_axi4_awsize ( m0_axi4_awsize[i] ),
+ .m_axi4_awburst ( m0_axi4_awburst[i] ),
+ .m_axi4_awlock ( m0_axi4_awlock[i] ),
+ .m_axi4_awprot ( m0_axi4_awprot[i] ),
+ .m_axi4_awcache ( ),
+ .m_axi4_awregion ( m0_axi4_awregion[i] ),
+ .m_axi4_awqos ( m0_axi4_awqos[i] ),
+ .m_axi4_awuser ( m0_axi4_awuser[i] )
+ );
+
+ // The AXCACHE signals are set according to burstiness and cache coherence or statically
+ // when not connected to ACP on Zynq (implemented below).
+ assign m0_write_is_burst[i] = (m0_axi4_awlen[i] != {8{1'b0}}) && (m0_axi4_awburst[i] != 2'b00);
+ `ifndef EN_ACP
+ always_comb begin
+ if ( (l2_m0_aw_sending[i] & l2_cache_coherent[i]) | int_wtrans_cache_coherent[i]) begin
+ if (m0_write_is_burst[i]) begin
+ m0_axi4_awcache[i] = 4'b0111;
+ end else begin
+ m0_axi4_awcache[i] = 4'b1111;
+ end
+ end else begin
+ m0_axi4_awcache[i] = 4'b0011;
+ end
+ end
+ `else
+ assign m0_axi4_awcache[i] = 4'b0011;
+ `endif
+
+ axi4_aw_sender
+ #(
+ .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ),
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH ),
+ .ENABLE_L2TLB ( ENABLE_L2TLB[i] )
+ )
+ u_aw_sender_m1
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .l1_accept_i ( l1_m1_aw_accept[i] ),
+ .l1_drop_i ( l1_m1_aw_drop[i] ),
+ .l1_save_i ( l1_m1_aw_save[i] ),
+ .l1_done_o ( l1_m1_aw_done[i] ),
+ .l2_accept_i ( l2_m1_aw_accept[i] ),
+ .l2_drop_i ( l2_m1_aw_drop[i] ),
+ .l2_done_o ( l2_m1_aw_done[i] ),
+ .l2_sending_o ( ), // just helps to set axcache
+ .l1_awaddr_i ( int_wtrans_addr[i] ),
+ .l2_awaddr_i ( l2_aw_addr[i] ),
+ .s_axi4_awid ( int_awid[i] ),
+ .s_axi4_awvalid ( int_m1_awvalid[i] ),
+ .s_axi4_awready ( int_m1_awready[i] ),
+ .s_axi4_awlen ( int_awlen[i] ),
+ .s_axi4_awsize ( int_awsize[i] ),
+ .s_axi4_awburst ( int_awburst[i] ),
+ .s_axi4_awlock ( int_awlock[i] ),
+ .s_axi4_awprot ( int_awprot[i] ),
+ .s_axi4_awcache ( int_awcache[i] ),
+ .s_axi4_awregion ( int_awregion[i] ),
+ .s_axi4_awqos ( int_awqos[i] ),
+ .s_axi4_awuser ( int_awuser[i] ),
+ .m_axi4_awid ( m1_axi4_awid[i] ),
+ .m_axi4_awaddr ( m1_axi4_awaddr[i] ),
+ .m_axi4_awvalid ( m1_axi4_awvalid[i] ),
+ .m_axi4_awready ( m1_axi4_awready[i] ),
+ .m_axi4_awlen ( m1_axi4_awlen[i] ),
+ .m_axi4_awsize ( m1_axi4_awsize[i] ),
+ .m_axi4_awburst ( m1_axi4_awburst[i] ),
+ .m_axi4_awlock ( m1_axi4_awlock[i] ),
+ .m_axi4_awprot ( m1_axi4_awprot[i] ),
+ .m_axi4_awcache ( ),
+ .m_axi4_awregion ( m1_axi4_awregion[i] ),
+ .m_axi4_awqos ( m1_axi4_awqos[i] ),
+ .m_axi4_awuser ( m1_axi4_awuser[i] )
+ );
+
+ // The AXCACHE signals are set according to burstiness and cache coherence or statically
+ // when not connected to ACP on Zynq (implemented below).
+ assign m1_write_is_burst[i] = (m1_axi4_awlen[i] != {8{1'b0}}) && (m1_axi4_awburst[i] != 2'b00);
+ `ifdef EN_ACP
+ always_comb begin
+ if (m1_write_is_burst[i]) begin
+ m1_axi4_awcache[i] = 4'b1011;
+ end else begin
+ m1_axi4_awcache[i] = 4'b1111;
+ end
+ end
+ `else
+ assign m1_axi4_awcache[i] = 4'b0011;
+ `endif
+
+ // }}}
+
+ // Write Data channel (w) {{{
+ /*
+ * write data channel (w)
+ *
+ * ██╗ ██╗██████╗ ██╗████████╗███████╗ ██████╗ █████╗ ████████╗ █████╗
+ * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔══██╗╚══██╔══╝██╔══██╗
+ * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ██║ ██║███████║ ██║ ███████║
+ * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██║ ██║██╔══██║ ██║ ██╔══██║
+ * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██████╔╝██║ ██║ ██║ ██║ ██║
+ * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝
+ *
+ */
+ axi4_w_buffer
+ #(
+ .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH ),
+ .ENABLE_L2TLB ( ENABLE_L2TLB[i] ),
+ .HUM_BUFFER_DEPTH ( HUM_BUFFER_DEPTH )
+ )
+ u_w_buffer
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+
+ // L1 interface
+ .l1_done_o ( l1_w_done[i] ),
+ .l1_accept_i ( l1_w_accept[i] ),
+ .l1_save_i ( l1_w_save[i] ),
+ .l1_drop_i ( l1_w_drop[i] ),
+ .l1_master_i ( int_wmaster_select[i] ),
+ .l1_id_i ( l1_id_drop[i] ),
+ .l1_len_i ( l1_len_drop[i] ),
+ .l1_prefetch_i ( l1_prefetch_drop[i] ),
+ .l1_hit_i ( l1_hit_drop[i] ),
+
+ // L2 interface
+ .l2_done_o ( l2_w_done[i] ),
+ .l2_accept_i ( l2_w_accept[i] ),
+ .l2_drop_i ( l2_w_drop[i] ),
+ .l2_master_i ( l2_master_select[i] ),
+ .l2_id_i ( lx_id_drop[i] ),
+ .l2_len_i ( lx_len_drop[i] ),
+ .l2_prefetch_i ( lx_prefetch_drop[i] ),
+ .l2_hit_i ( lx_hit_drop[i] ),
+
+ // Top-level control outputs
+ .master_select_o ( w_master_select[i] ),
+ .input_stall_o ( aw_in_stall[i] ), // stall L1 AW input if request buffers full
+ .output_stall_o ( aw_out_stall[i] ), // stall L1 AW hit forwarding if bypass not possible
+
+ // B sender interface
+ .b_drop_o ( b_drop[i] ),
+ .b_done_i ( b_done[i] ),
+ .id_o ( b_id_drop[i] ),
+ .prefetch_o ( b_prefetch_drop[i] ),
+ .hit_o ( b_hit_drop[i] ),
+
+ // AXI W channel interfaces
+ .s_axi4_wdata ( s_axi4_wdata[i] ),
+ .s_axi4_wvalid ( s_axi4_wvalid[i] ),
+ .s_axi4_wready ( s_axi4_wready[i] ),
+ .s_axi4_wstrb ( s_axi4_wstrb[i] ),
+ .s_axi4_wlast ( s_axi4_wlast[i] ),
+ .s_axi4_wuser ( s_axi4_wuser[i] ),
+ .m_axi4_wdata ( int_wdata[i] ),
+ .m_axi4_wvalid ( int_wvalid[i] ),
+ .m_axi4_wready ( int_wready[i] ),
+ .m_axi4_wstrb ( int_wstrb[i] ),
+ .m_axi4_wlast ( int_wlast[i] ),
+ .m_axi4_wuser ( int_wuser[i] )
+ );
+
+ axi4_w_sender
+ #(
+ .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH )
+ )
+ u_w_sender_m0
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_wdata ( int_wdata[i] ),
+ .s_axi4_wvalid ( int_m0_wvalid[i] ),
+ .s_axi4_wready ( int_m0_wready[i] ),
+ .s_axi4_wstrb ( int_wstrb[i] ),
+ .s_axi4_wlast ( int_wlast[i] ),
+ .s_axi4_wuser ( int_wuser[i] ),
+ .m_axi4_wdata ( m0_axi4_wdata[i] ),
+ .m_axi4_wvalid ( m0_axi4_wvalid[i] ),
+ .m_axi4_wready ( m0_axi4_wready[i] ),
+ .m_axi4_wstrb ( m0_axi4_wstrb[i] ),
+ .m_axi4_wlast ( m0_axi4_wlast[i] ),
+ .m_axi4_wuser ( m0_axi4_wuser[i] )
+ );
+
+ axi4_w_sender
+ #(
+ .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH )
+
+ )
+ u_w_sender_m1
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_wdata ( int_wdata[i] ),
+ .s_axi4_wvalid ( int_m1_wvalid[i] ),
+ .s_axi4_wready ( int_m1_wready[i] ),
+ .s_axi4_wstrb ( int_wstrb[i] ),
+ .s_axi4_wlast ( int_wlast[i] ),
+ .s_axi4_wuser ( int_wuser[i] ),
+ .m_axi4_wdata ( m1_axi4_wdata[i] ),
+ .m_axi4_wvalid ( m1_axi4_wvalid[i] ),
+ .m_axi4_wready ( m1_axi4_wready[i] ),
+ .m_axi4_wstrb ( m1_axi4_wstrb[i] ),
+ .m_axi4_wlast ( m1_axi4_wlast[i] ),
+ .m_axi4_wuser ( m1_axi4_wuser[i] )
+ );
+
+ /*
+ * Multiplexer to switch between the two output master ports on the write data (w) channel
+ */
+ always_comb begin
+ /* Only one output can be selected at any time */
+ if (w_master_select[i] == 1'b0) begin
+ int_m0_wvalid[i] = int_wvalid[i];
+ int_m1_wvalid[i] = 1'b0;
+ int_wready[i] = int_m0_wready[i];
+ end else begin
+ int_m0_wvalid[i] = 1'b0;
+ int_m1_wvalid[i] = int_wvalid[i];
+ int_wready[i] = int_m1_wready[i];
+ end
+ end
+
+ // }}}
+
+ // Write Response channel (b) {{{
+ /*
+ * write response channel (b)
+ *
+ * ██╗ ██╗██████╗ ██╗████████╗███████╗ ██████╗ ███████╗███████╗██████╗
+ * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔════╝██╔════╝██╔══██╗
+ * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ██████╔╝█████╗ ███████╗██████╔╝
+ * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██╔══██╗██╔══╝ ╚════██║██╔═══╝
+ * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██║ ██║███████╗███████║██║
+ * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝
+ *
+ */
+ axi4_b_buffer
+ #(
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH )
+ )
+ u_b_buffer_m0
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_bid ( int_m0_bid[i] ),
+ .s_axi4_bresp ( int_m0_bresp[i] ),
+ .s_axi4_bvalid ( int_m0_bvalid[i] ),
+ .s_axi4_buser ( int_m0_buser[i] ),
+ .s_axi4_bready ( int_m0_bready[i] ),
+ .m_axi4_bid ( m0_axi4_bid[i] ),
+ .m_axi4_bresp ( m0_axi4_bresp[i] ),
+ .m_axi4_bvalid ( m0_axi4_bvalid[i] ),
+ .m_axi4_buser ( m0_axi4_buser[i] ),
+ .m_axi4_bready ( m0_axi4_bready[i] )
+ );
+
+ axi4_b_buffer
+ #(
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH )
+ )
+ u_b_buffer_m1
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_bid ( int_m1_bid[i] ),
+ .s_axi4_bresp ( int_m1_bresp[i] ),
+ .s_axi4_bvalid ( int_m1_bvalid[i] ),
+ .s_axi4_buser ( int_m1_buser[i] ),
+ .s_axi4_bready ( int_m1_bready[i] ),
+ .m_axi4_bid ( m1_axi4_bid[i] ),
+ .m_axi4_bresp ( m1_axi4_bresp[i] ),
+ .m_axi4_bvalid ( m1_axi4_bvalid[i] ),
+ .m_axi4_buser ( m1_axi4_buser[i] ),
+ .m_axi4_bready ( m1_axi4_bready[i] )
+ );
+
+ axi4_b_sender
+ #(
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH )
+ )
+ u_b_sender
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .drop_i ( b_drop[i] ),
+ .done_o ( b_done[i] ),
+ .id_i ( b_id_drop[i] ),
+ .prefetch_i ( b_prefetch_drop[i] ),
+ .hit_i ( b_hit_drop[i] ),
+ .s_axi4_bid ( s_axi4_bid[i] ),
+ .s_axi4_bresp ( s_axi4_bresp[i] ),
+ .s_axi4_bvalid ( s_axi4_bvalid[i] ),
+ .s_axi4_buser ( s_axi4_buser[i] ),
+ .s_axi4_bready ( s_axi4_bready[i] ),
+ .m_axi4_bid ( int_bid[i] ),
+ .m_axi4_bresp ( int_bresp[i] ),
+ .m_axi4_bvalid ( int_bvalid[i] ),
+ .m_axi4_buser ( int_buser[i] ),
+ .m_axi4_bready ( int_bready[i] )
+ );
+
+ /*
+ * Multiplexer to switch between the two output master ports on the write response (b) channel
+ */
+ always_comb begin
+ /* Output 1 always gets priority, so if it has something to send connect
+ it and let output 0 wait using rready = 0 */
+ if (int_m1_bvalid[i] == 1'b1) begin
+ int_m0_bready[i] = 1'b0;
+ int_m1_bready[i] = int_bready[i];
+
+ int_bid[i] = int_m1_bid[i];
+ int_bresp[i] = int_m1_bresp[i];
+ int_buser[i] = int_m1_buser[i];
+ int_bvalid[i] = int_m1_bvalid[i];
+ end else begin
+ int_m0_bready[i] = int_bready[i];
+ int_m1_bready[i] = 1'b0;
+
+ int_bid[i] = int_m0_bid[i];
+ int_bresp[i] = int_m0_bresp[i];
+ int_buser[i] = int_m0_buser[i];
+ int_bvalid[i] = int_m0_bvalid[i];
+ end
+ end
+
+ // }}}
+
+ // Read Address channel (ar) {{{
+ /*
+ * read address channel (ar)
+ *
+ * ██████╗ ███████╗ █████╗ ██████╗ █████╗ ██████╗ ██████╗ ██████╗
+ * ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ██╔══██╗██╔══██╗██╔══██╗██╔══██╗
+ * ██████╔╝█████╗ ███████║██║ ██║ ███████║██║ ██║██║ ██║██████╔╝
+ * ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ██╔══██║██║ ██║██║ ██║██╔══██╗
+ * ██║ ██║███████╗██║ ██║██████╔╝ ██║ ██║██████╔╝██████╔╝██║ ██║
+ * ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝
+ *
+ */
+ axi4_ar_buffer
+ #(
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH )
+ )
+ u_ar_buffer
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_arid ( s_axi4_arid[i] ),
+ .s_axi4_araddr ( s_axi4_araddr[i] ),
+ .s_axi4_arvalid ( s_axi4_arvalid[i] ),
+ .s_axi4_arready ( s_axi4_arready[i] ),
+ .s_axi4_arlen ( s_axi4_arlen[i] ),
+ .s_axi4_arsize ( s_axi4_arsize[i] ),
+ .s_axi4_arburst ( s_axi4_arburst[i] ),
+ .s_axi4_arlock ( s_axi4_arlock[i] ),
+ .s_axi4_arprot ( s_axi4_arprot[i] ),
+ .s_axi4_arcache ( s_axi4_arcache[i] ),
+ .s_axi4_aruser ( s_axi4_aruser[i] ),
+ .m_axi4_arid ( int_arid[i] ),
+ .m_axi4_araddr ( int_araddr[i] ),
+ .m_axi4_arvalid ( int_arvalid[i] ),
+ .m_axi4_arready ( int_arready[i] ),
+ .m_axi4_arlen ( int_arlen[i] ),
+ .m_axi4_arsize ( int_arsize[i] ),
+ .m_axi4_arburst ( int_arburst[i] ),
+ .m_axi4_arlock ( int_arlock[i] ),
+ .m_axi4_arprot ( int_arprot[i] ),
+ .m_axi4_arcache ( int_arcache[i] ),
+ .m_axi4_aruser ( int_aruser[i] )
+ );
+
+ axi4_ar_sender
+ #(
+ .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ),
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH ),
+ .ENABLE_L2TLB ( ENABLE_L2TLB[i] )
+ )
+ u_ar_sender_m0
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .l1_done_o ( l1_m0_ar_done[i] ),
+ .l1_accept_i ( l1_m0_ar_accept[i] ),
+ .l1_drop_i ( l1_m0_ar_drop[i] ),
+ .l1_save_i ( l1_m0_ar_save[i] ),
+ .l2_done_o ( l2_m0_ar_done[i] ),
+ .l2_accept_i ( l2_m0_ar_accept[i] ),
+ .l2_drop_i ( l2_m0_ar_drop[i] ),
+ .l2_sending_o ( l2_m0_ar_sending[i] ),
+ .l1_araddr_i ( int_rtrans_addr[i] ),
+ .l2_araddr_i ( l2_ar_addr[i] ),
+ .s_axi4_arid ( int_arid[i] ),
+ .s_axi4_arvalid ( int_m0_arvalid[i] ),
+ .s_axi4_arready ( int_m0_arready[i] ),
+ .s_axi4_arlen ( int_arlen[i] ),
+ .s_axi4_arsize ( int_arsize[i] ),
+ .s_axi4_arburst ( int_arburst[i] ),
+ .s_axi4_arlock ( int_arlock[i] ),
+ .s_axi4_arprot ( int_arprot[i] ),
+ .s_axi4_arcache ( int_arcache[i] ),
+ .s_axi4_aruser ( int_aruser[i] ),
+ .m_axi4_arid ( m0_axi4_arid[i] ),
+ .m_axi4_araddr ( m0_axi4_araddr[i] ),
+ .m_axi4_arvalid ( m0_axi4_arvalid[i] ),
+ .m_axi4_arready ( m0_axi4_arready[i] ),
+ .m_axi4_arlen ( m0_axi4_arlen[i] ),
+ .m_axi4_arsize ( m0_axi4_arsize[i] ),
+ .m_axi4_arburst ( m0_axi4_arburst[i] ),
+ .m_axi4_arlock ( m0_axi4_arlock[i] ),
+ .m_axi4_arprot ( m0_axi4_arprot[i] ),
+ .m_axi4_arcache ( ),
+ .m_axi4_aruser ( m0_axi4_aruser[i] )
+ );
+
+ // The AXCACHE signals are set according to burstiness and cache coherence or statically
+ // when not connected to ACP on Zynq (implemented below).
+ assign m0_read_is_burst[i] = (m0_axi4_arlen[i] != {8{1'b0}}) && (m0_axi4_arburst[i] != 2'b00);
+ `ifndef EN_ACP
+ always_comb begin
+ if ( (l2_m0_ar_sending[i] & l2_cache_coherent[i]) | int_rtrans_cache_coherent[i]) begin
+ if (m0_read_is_burst[i]) begin
+ m0_axi4_arcache[i] = 4'b1011;
+ end else begin
+ m0_axi4_arcache[i] = 4'b1111;
+ end
+ end else begin
+ m0_axi4_arcache[i] = 4'b0011;
+ end
+ end
+ `else
+ assign m0_axi4_arcache[i] = 4'b0011;
+ `endif
+
+ axi4_ar_sender
+ #(
+ .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ),
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH ),
+ .ENABLE_L2TLB ( ENABLE_L2TLB[i] )
+ )
+ u_ar_sender_m1
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .l1_done_o ( l1_m1_ar_done[i] ),
+ .l1_accept_i ( l1_m1_ar_accept[i] ),
+ .l1_drop_i ( l1_m1_ar_drop[i] ),
+ .l1_save_i ( l1_m1_ar_save[i] ),
+ .l2_done_o ( l2_m1_ar_done[i] ),
+ .l2_accept_i ( l2_m1_ar_accept[i] ),
+ .l2_drop_i ( l2_m1_ar_drop[i] ),
+ .l2_sending_o ( ), // just helps to set axcache
+ .l1_araddr_i ( int_rtrans_addr[i] ),
+ .l2_araddr_i ( l2_ar_addr[i] ),
+ .s_axi4_arid ( int_arid[i] ),
+ .s_axi4_arvalid ( int_m1_arvalid[i] ),
+ .s_axi4_arready ( int_m1_arready[i] ),
+ .s_axi4_arlen ( int_arlen[i] ),
+ .s_axi4_arsize ( int_arsize[i] ),
+ .s_axi4_arburst ( int_arburst[i] ),
+ .s_axi4_arlock ( int_arlock[i] ),
+ .s_axi4_arprot ( int_arprot[i] ),
+ .s_axi4_arcache ( int_arcache[i] ),
+ .s_axi4_aruser ( int_aruser[i] ),
+ .m_axi4_arid ( m1_axi4_arid[i] ),
+ .m_axi4_araddr ( m1_axi4_araddr[i] ),
+ .m_axi4_arvalid ( m1_axi4_arvalid[i] ),
+ .m_axi4_arready ( m1_axi4_arready[i] ),
+ .m_axi4_arlen ( m1_axi4_arlen[i] ),
+ .m_axi4_arsize ( m1_axi4_arsize[i] ),
+ .m_axi4_arburst ( m1_axi4_arburst[i] ),
+ .m_axi4_arlock ( m1_axi4_arlock[i] ),
+ .m_axi4_arprot ( m1_axi4_arprot[i] ),
+ .m_axi4_arcache ( ),
+ .m_axi4_aruser ( m1_axi4_aruser[i] )
+ );
+
+ // The AXCACHE signals are set according to burstiness and cache coherence or statically
+ // when not connected to ACP on Zynq (implemented below).
+ assign m1_read_is_burst[i] = (m1_axi4_arlen[i] != {8{1'b0}}) && (m1_axi4_arburst[i] != 2'b00);
+ `ifdef EN_ACP
+ always_comb begin
+ if (m1_read_is_burst[i]) begin
+ m1_axi4_arcache[i] = 4'b1011;
+ end else begin
+ m1_axi4_arcache[i] = 4'b1111;
+ end
+ end
+ `else
+ assign m1_axi4_arcache[i] = 4'b0011;
+ `endif
+
+ // }}}
+
+ // Read Response channel (r) {{{
+ /*
+ * read response channel (r)
+ *
+ * ██████╗ ███████╗ █████╗ ██████╗ ██████╗ ███████╗███████╗██████╗
+ * ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ██╔══██╗██╔════╝██╔════╝██╔══██╗
+ * ██████╔╝█████╗ ███████║██║ ██║ ██████╔╝█████╗ ███████╗██████╔╝
+ * ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ██╔══██╗██╔══╝ ╚════██║██╔═══╝
+ * ██║ ██║███████╗██║ ██║██████╔╝ ██║ ██║███████╗███████║██║
+ * ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝
+ *
+ */
+ axi4_r_buffer
+ #(
+ .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH )
+ )
+ u_r_buffer_m0
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_rid ( int_m0_rid[i] ),
+ .s_axi4_rresp ( int_m0_rresp[i] ),
+ .s_axi4_rdata ( int_m0_rdata[i] ),
+ .s_axi4_rlast ( int_m0_rlast[i] ),
+ .s_axi4_rvalid ( int_m0_rvalid[i] ),
+ .s_axi4_ruser ( int_m0_ruser[i] ),
+ .s_axi4_rready ( int_m0_rready[i] ),
+ .m_axi4_rid ( m0_axi4_rid[i] ),
+ .m_axi4_rresp ( m0_axi4_rresp[i] ),
+ .m_axi4_rdata ( m0_axi4_rdata[i] ),
+ .m_axi4_rlast ( m0_axi4_rlast[i] ),
+ .m_axi4_rvalid ( m0_axi4_rvalid[i] ),
+ .m_axi4_ruser ( m0_axi4_ruser[i] ),
+ .m_axi4_rready ( m0_axi4_rready[i] )
+ );
+
+ axi4_r_buffer
+ #(
+ .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH )
+ )
+ u_r_buffer_m1
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_rid ( int_m1_rid[i] ),
+ .s_axi4_rresp ( int_m1_rresp[i] ),
+ .s_axi4_rdata ( int_m1_rdata[i] ),
+ .s_axi4_rlast ( int_m1_rlast[i] ),
+ .s_axi4_rvalid ( int_m1_rvalid[i] ),
+ .s_axi4_ruser ( int_m1_ruser[i] ),
+ .s_axi4_rready ( int_m1_rready[i] ),
+ .m_axi4_rid ( m1_axi4_rid[i] ),
+ .m_axi4_rresp ( m1_axi4_rresp[i] ),
+ .m_axi4_rdata ( m1_axi4_rdata[i] ),
+ .m_axi4_rlast ( m1_axi4_rlast[i] ),
+ .m_axi4_rvalid ( m1_axi4_rvalid[i] ),
+ .m_axi4_ruser ( m1_axi4_ruser[i] ),
+ .m_axi4_rready ( m1_axi4_rready[i] )
+ );
+
+ axi4_r_sender
+ #(
+ .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH )
+ )
+ u_r_sender
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .drop_i ( lx_r_drop[i] ),
+ .drop_len_i ( lx_len_drop[i] ),
+ .done_o ( lx_r_done[i] ),
+ .id_i ( lx_id_drop[i] ),
+ .prefetch_i ( lx_prefetch_drop[i] ),
+ .hit_i ( lx_hit_drop[i] ),
+ .s_axi4_rid ( s_axi4_rid[i] ),
+ .s_axi4_rresp ( s_axi4_rresp[i] ),
+ .s_axi4_rdata ( s_axi4_rdata[i] ),
+ .s_axi4_rlast ( s_axi4_rlast[i] ),
+ .s_axi4_rvalid ( s_axi4_rvalid[i] ),
+ .s_axi4_ruser ( s_axi4_ruser[i] ),
+ .s_axi4_rready ( s_axi4_rready[i] ),
+ .m_axi4_rid ( int_rid[i] ),
+ .m_axi4_rresp ( int_rresp[i] ),
+ .m_axi4_rdata ( int_rdata[i] ),
+ .m_axi4_rlast ( int_rlast[i] ),
+ .m_axi4_rvalid ( int_rvalid[i] ),
+ .m_axi4_ruser ( int_ruser[i] ),
+ .m_axi4_rready ( int_rready[i] )
+ );
+
+ /*
+ * Multiplexer to switch between the two output master ports on the read response(r) channel
+ *
+ * Do not perform read burst interleaving as the DMA does not support it. This means we can only
+ * switch between the two masters upon sending rlast or when idle.
+ *
+ * However, if the downstream already performs burst interleaving, this cannot be undone here.
+ * Also, the downstream may interleave a burst reponse with a single-beat transaction. In this
+ * case, the FSM below falls out of the burst mode. To avoid it performing burst interleaving
+ * after such an event, it gives priority to the master which received the last burst in case
+ * both have a have a burst ready (rvalid).
+ *
+ * Order of priority:
+ * 1. Ongoing burst transaction
+ * 2. Single-beat transaction on Master 1.
+ * 3. Single-beat transaction on Master 0.
+ * 4. Burst transaction on master that received the last burst.
+ */
+ // Select signal
+ always_ff @(posedge Clk_CI) begin
+ if (Rst_RBI == 0) begin
+ RRespSel_SP[i] <= 1'b0;
+ end else begin
+ RRespSel_SP[i] <= RRespSel_SN[i];
+ end
+ end
+
+ // FSM
+ always_comb begin : RRespMuxFsm
+ RRespMuxCtrl_SN[i] = RRespMuxCtrl_SP[i];
+ RRespSel_SN[i] = RRespSel_SP[i];
+
+ RRespBurst_S[i] = 1'b0;
+ RRespSelIm_S[i] = 1'b0;
+
+ unique case (RRespMuxCtrl_SP[i])
+
+ IDLE: begin
+ // immediately forward single-beat transactions
+ if (int_m1_rvalid[i] && int_m1_rlast[i])
+ RRespSelIm_S[i] = 1'b1;
+ else if (int_m0_rvalid[i] && int_m0_rlast[i])
+ RRespSelIm_S[i] = 1'b0;
+
+ // bursts - they also start immediately
+ else if (int_m1_rvalid[i] || int_m0_rvalid[i]) begin
+ RRespMuxCtrl_SN[i] = BUSY;
+
+ // in case both are ready, continue with the master that had the last burst
+ if (int_m1_rvalid[i] && int_m0_rvalid[i]) begin
+ RRespSel_SN[i] = RRespSel_SP[i];
+ RRespSelIm_S[i] = RRespSel_SP[i];
+ end else if (int_m1_rvalid[i]) begin
+ RRespSel_SN[i] = 1'b1;
+ RRespSelIm_S[i] = 1'b1;
+ end else begin
+ RRespSel_SN[i] = 1'b0;
+ RRespSelIm_S[i] = 1'b0;
+ end
+ end
+ end
+
+ BUSY: begin
+ RRespBurst_S[i] = 1'b1;
+ // detect last handshake of currently ongoing transfer
+ if (int_rvalid[i] && int_rready[i] && int_rlast[i])
+ RRespMuxCtrl_SN[i] = IDLE;
+ end
+
+ default: begin
+ RRespMuxCtrl_SN[i] = IDLE;
+ end
+
+ endcase
+ end
+
+ // FSM state
+ always_ff @(posedge Clk_CI) begin
+ if (Rst_RBI == 0) begin
+ RRespMuxCtrl_SP[i] <= IDLE;
+ end else begin
+ RRespMuxCtrl_SP[i] <= RRespMuxCtrl_SN[i];
+ end
+ end
+
+ // Actual multiplexer
+ always_comb begin
+ if ( (RRespBurst_S[i] && RRespSel_SP[i]) || (!RRespBurst_S[i] && RRespSelIm_S[i]) ) begin
+ int_m0_rready[i] = 1'b0;
+ int_m1_rready[i] = int_rready[i];
+
+ int_rid[i] = int_m1_rid[i];
+ int_rresp[i] = int_m1_rresp[i];
+ int_rdata[i] = int_m1_rdata[i];
+ int_rlast[i] = int_m1_rlast[i];
+ int_ruser[i] = int_m1_ruser[i];
+ int_rvalid[i] = int_m1_rvalid[i];
+ end else begin
+ int_m0_rready[i] = int_rready[i];
+ int_m1_rready[i] = 1'b0;
+
+ int_rid[i] = int_m0_rid[i];
+ int_rresp[i] = int_m0_rresp[i];
+ int_rdata[i] = int_m0_rdata[i];
+ int_rlast[i] = int_m0_rlast[i];
+ int_ruser[i] = int_m0_ruser[i];
+ int_rvalid[i] = int_m0_rvalid[i];
+ end
+ end
+
+ end // BUF & SEND
+
+ // }}}
+
+ endgenerate // BUF & SEND }}}
+
+ // Log {{{
+
+`ifdef RAB_AX_LOG_EN
+ AxiBramLogger
+ #(
+ .AXI_ID_BITW ( AXI_ID_WIDTH ),
+ .AXI_ADDR_BITW ( AXI_S_ADDR_WIDTH ),
+ .NUM_LOG_ENTRIES ( `RAB_AX_LOG_ENTRIES )
+ )
+ u_aw_logger
+ (
+ .Clk_CI ( NonGatedClk_CI ),
+ .TimestampClk_CI ( Clk_CI ),
+ .Rst_RBI ( Rst_RBI ),
+ .AxiValid_SI ( s_axi4_awvalid[1] ),
+ .AxiReady_SI ( s_axi4_awready[1] ),
+ .AxiId_DI ( s_axi4_awid[1] ),
+ .AxiAddr_DI ( s_axi4_awaddr[1] ),
+ .AxiLen_DI ( s_axi4_awlen[1] ),
+ .Clear_SI ( AwLogClr_SI ),
+ .LogEn_SI ( LogEn_SI ),
+ .Full_SO ( int_aw_log_full ),
+ .Ready_SO ( AwLogRdy_SO ),
+ .Bram_PS ( AwBram_PS )
+ );
+
+ AxiBramLogger
+ #(
+ .AXI_ID_BITW ( AXI_ID_WIDTH ),
+ .AXI_ADDR_BITW ( AXI_S_ADDR_WIDTH ),
+ .NUM_LOG_ENTRIES ( `RAB_AX_LOG_ENTRIES )
+ )
+ u_ar_logger
+ (
+ .Clk_CI ( NonGatedClk_CI ),
+ .TimestampClk_CI ( Clk_CI ),
+ .Rst_RBI ( Rst_RBI ),
+ .AxiValid_SI ( s_axi4_arvalid[1] ),
+ .AxiReady_SI ( s_axi4_arready[1] ),
+ .AxiId_DI ( s_axi4_arid[1] ),
+ .AxiAddr_DI ( s_axi4_araddr[1] ),
+ .AxiLen_DI ( s_axi4_arlen[1] ),
+ .Clear_SI ( ArLogClr_SI ),
+ .LogEn_SI ( LogEn_SI ),
+ .Full_SO ( int_ar_log_full ),
+ .Ready_SO ( ArLogRdy_SO ),
+ .Bram_PS ( ArBram_PS )
+ );
+`endif
+
+ // }}}
+
+ // RAB Core {{{
+ // ██████╗ █████╗ ██████╗ ██████╗ ██████╗ ██████╗ ███████╗
+ // ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔═══██╗██╔══██╗██╔════╝
+ // ██████╔╝███████║██████╔╝ ██║ ██║ ██║██████╔╝█████╗
+ // ██╔══██╗██╔══██║██╔══██╗ ██║ ██║ ██║██╔══██╗██╔══╝
+ // ██║ ██║██║ ██║██████╔╝ ╚██████╗╚██████╔╝██║ ██║███████╗
+ // ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝
+ //
+ /*
+ * rab_core
+ *
+ * The rab core translates addresses. It has two ports, which can be used
+ * independently, however they will compete for time internally, as lookups
+ * are serialized.
+ *
+ * type is the read(0) or write(1) used to check the protection flags. If they
+ * don't match an interrupt is created on the int_prot line.
+ */
+
+ rab_core
+ #(
+ .N_PORTS ( N_PORTS ),
+ .N_L2_SETS ( N_L2_SETS ),
+ .N_L2_SET_ENTRIES ( N_L2_SET_ENTRIES ),
+ .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ),
+ .AXI_S_ADDR_WIDTH ( AXI_S_ADDR_WIDTH ),
+ .AXI_M_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ),
+ .AXI_LITE_DATA_WIDTH ( AXI_LITE_DATA_WIDTH ),
+ .AXI_LITE_ADDR_WIDTH ( AXI_LITE_ADDR_WIDTH ),
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH ),
+ .MH_FIFO_DEPTH ( MH_FIFO_DEPTH )
+ )
+ u_rab_core
+ (
+ .Clk_CI ( Clk_CI ),
+ .Rst_RBI ( Rst_RBI ),
+
+ // Config IF
+ .s_axi_awaddr ( s_axi4lite_awaddr ),
+ .s_axi_awvalid ( s_axi4lite_awvalid ),
+ .s_axi_awready ( s_axi4lite_awready ),
+ .s_axi_wdata ( s_axi4lite_wdata ),
+ .s_axi_wstrb ( s_axi4lite_wstrb ),
+ .s_axi_wvalid ( s_axi4lite_wvalid ),
+ .s_axi_wready ( s_axi4lite_wready ),
+ .s_axi_bresp ( s_axi4lite_bresp ),
+ .s_axi_bvalid ( s_axi4lite_bvalid ),
+ .s_axi_bready ( s_axi4lite_bready ),
+ .s_axi_araddr ( s_axi4lite_araddr ),
+ .s_axi_arvalid ( s_axi4lite_arvalid ),
+ .s_axi_arready ( s_axi4lite_arready ),
+ .s_axi_rready ( s_axi4lite_rready ),
+ .s_axi_rdata ( s_axi4lite_rdata ),
+ .s_axi_rresp ( s_axi4lite_rresp ),
+ .s_axi_rvalid ( s_axi4lite_rvalid ),
+
+ // L1 miss info outputs -> L2 TLB arbitration
+ .int_miss ( rab_miss ),
+ .int_multi ( rab_multi ),
+ .int_prot ( rab_prot ),
+ .int_prefetch ( rab_prefetch ),
+ .int_mhf_full ( int_mhf_full ),
+
+ // L1 transaction info outputs -> L2 TLB arbitration
+ .int_axaddr_o ( L1OutAddr_D ),
+ .int_axid_o ( L1OutId_D ),
+ .int_axlen_o ( L1OutLen_D ),
+ .int_axuser_o ( L1OutUser_D ),
+
+ // Write Req IF
+ .port1_addr ( int_awaddr ),
+ .port1_id ( int_awid ),
+ .port1_len ( int_awlen ),
+ .port1_size ( int_awsize ),
+ .port1_addr_valid ( int_awvalid & ~aw_in_stall ), // avoid the FSM accepting new AW requests
+ .port1_type ( {N_PORTS{1'b1}} ),
+ .port1_user ( int_awuser ),
+ .port1_sent ( int_wtrans_sent ), // signal done to L1 FSM
+ .port1_out_addr ( int_wtrans_addr ),
+ .port1_cache_coherent ( int_wtrans_cache_coherent ),
+ .port1_accept ( int_wtrans_accept ),
+ .port1_drop ( int_wtrans_drop ),
+ .port1_miss ( int_wtrans_miss ),
+
+ // Read Req IF
+ .port2_addr ( int_araddr ),
+ .port2_id ( int_arid ),
+ .port2_len ( int_arlen ),
+ .port2_size ( int_arsize ),
+ .port2_addr_valid ( int_arvalid ),
+ .port2_type ( {N_PORTS{1'b0}} ),
+ .port2_user ( int_aruser ),
+ .port2_sent ( int_rtrans_sent ), // signal done to L1 FSM
+ .port2_out_addr ( int_rtrans_addr ),
+ .port2_cache_coherent ( int_rtrans_cache_coherent ),
+ .port2_accept ( int_rtrans_accept ),
+ .port2_drop ( int_rtrans_drop ),
+ .port2_miss ( int_rtrans_miss ),
+
+ // L2 miss info inputs -> axi_rab_cfg
+ .miss_l2_i ( L2Miss_S ),
+ .miss_l2_addr_i ( L2OutInAddr_DP ),
+ .miss_l2_id_i ( L2OutId_DP ),
+ .miss_l2_user_i ( L2OutUser_DP ),
+
+ // L2 config outputs
+ .wdata_l2_o ( L2CfgWData_D ),
+ .waddr_l2_o ( L2CfgWAddr_D ),
+ .wren_l2_o ( L2CfgWE_S )
+ );
+
+ // }}}
+
+ // AX SPLITS {{{
+ // █████╗ ██╗ ██╗ ███████╗██████╗ ██╗ ██╗████████╗
+ // ██╔══██╗╚██╗██╔╝ ██╔════╝██╔══██╗██║ ██║╚══██╔══╝
+ // ███████║ ╚███╔╝ ███████╗██████╔╝██║ ██║ ██║
+ // ██╔══██║ ██╔██╗ ╚════██║██╔═══╝ ██║ ██║ ██║
+ // ██║ ██║██╔╝ ██╗ ███████║██║ ███████╗██║ ██║
+ // ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚══════╝╚═╝ ╚═╝
+ //
+ /**
+ * Multiplex the two output master ports of the Read Address and Write Address (AR/AW) channels.
+ *
+ * Use the `int_xmaster_select` signal to route the signals to either Master 0 (to memory) or
+ * Master 1 (to ACP). In case of an L1 miss: Route the signals to both masters. They shall be
+ * saved until the L2 outputs are available.
+ */
+ generate for (i = 0; i < N_PORTS; i++) begin : AX_SPLIT
+
+ /*
+ * When accepting L1 transactions, we must just do so on the selected master. Drop requests must
+ * be performed on any one of the two masters. Save requests must be performed by both masters.
+ */
+ always_comb begin : AW_L1_SPLIT
+
+ // TLB handshake
+ l1_m0_aw_accept[i] = 1'b0;
+ l1_m1_aw_accept[i] = 1'b0;
+ l1_m0_aw_drop[i] = 1'b0;
+ l1_m1_aw_drop[i] = 1'b0;
+ l1_m0_aw_save[i] = 1'b0;
+ l1_m1_aw_save[i] = 1'b0;
+
+ l1_mx_aw_done[i] = 1'b0;
+
+ // AXI sender input handshake
+ int_m0_awvalid[i] = 1'b0;
+ int_m1_awvalid[i] = 1'b0;
+ int_awready[i] = 1'b0;
+
+ // accept on selected master only
+ if (l1_aw_accept[i]) begin
+ if (int_wmaster_select[i]) begin
+ l1_m1_aw_accept[i] = 1'b1;
+ l1_mx_aw_done[i] = l1_m1_aw_done[i];
+
+ int_m1_awvalid[i] = int_awvalid[i];
+ int_awready[i] = int_m1_awready[i];
+
+ end else begin
+ l1_m0_aw_accept[i] = 1'b1;
+ l1_mx_aw_done[i] = l1_m0_aw_done[i];
+
+ int_m0_awvalid[i] = int_awvalid[i];
+ int_awready[i] = int_m0_awready[i];
+ end
+
+ // drop on Master 0 only
+ end else if (l1_aw_drop[i]) begin
+ l1_m0_aw_drop[i] = 1'b1;
+ l1_mx_aw_done[i] = l1_m0_aw_done[i];
+
+ int_m0_awvalid[i] = int_awvalid[i];
+ int_awready[i] = l1_m0_aw_done[i];
+
+ // save on both masters
+ end else if (l1_aw_save[i]) begin
+ // split save
+ l1_m0_aw_save[i] = ~l1_m0_aw_done_SP[i];
+ l1_m1_aw_save[i] = ~l1_m1_aw_done_SP[i];
+
+ // combine done
+ l1_mx_aw_done[i] = l1_m0_aw_done_SP[i] & l1_m1_aw_done_SP[i];
+
+ int_m0_awvalid[i] = int_awvalid[i];
+ int_m1_awvalid[i] = int_awvalid[i];
+ int_awready[i] = l1_mx_aw_done[i];
+ end
+ end
+
+ // signal back to handshake splitter
+ assign l1_aw_done[i] = l1_mx_aw_done[i];
+
+ always_ff @(posedge Clk_CI) begin : L1_MX_AW_DONE_REG
+ if (Rst_RBI == 0) begin
+ l1_m0_aw_done_SP[i] <= 1'b0;
+ l1_m1_aw_done_SP[i] <= 1'b0;
+ end else if (l1_mx_aw_done[i]) begin
+ l1_m0_aw_done_SP[i] <= 1'b0;
+ l1_m1_aw_done_SP[i] <= 1'b0;
+ end else begin
+ l1_m0_aw_done_SP[i] <= l1_m0_aw_done_SP[i] | l1_m0_aw_done[i];
+ l1_m1_aw_done_SP[i] <= l1_m1_aw_done_SP[i] | l1_m1_aw_done[i];
+ end
+ end
+
+ /*
+ * When accepting L2 transactions, we must drop the corresponding transaction from the other
+ * master to make it available again for save requests from L1_DROP_SAVE.
+ */
+ always_comb begin : AW_L2_SPLIT
+
+ l2_m0_aw_accept[i] = 1'b0;
+ l2_m1_aw_accept[i] = 1'b0;
+ l2_m0_aw_drop[i] = 1'b0;
+ l2_m1_aw_drop[i] = 1'b0;
+
+ // de-assert request signals individually upon handshakes
+ if (l2_aw_accept[i]) begin
+ if (l2_master_select[i]) begin
+ l2_m1_aw_accept[i] = ~l2_m1_aw_done_SP[i];
+ l2_m0_aw_drop[i] = ~l2_m0_aw_done_SP[i];
+
+ end else begin
+ l2_m0_aw_accept[i] = ~l2_m0_aw_done_SP[i];
+ l2_m1_aw_drop[i] = ~l2_m1_aw_done_SP[i];
+
+ end
+ end else begin
+ l2_m0_aw_drop[i] = ~l2_m0_aw_done_SP[i] ? l2_aw_drop[i] : 1'b0;
+ l2_m1_aw_drop[i] = ~l2_m1_aw_done_SP[i] ? l2_aw_drop[i] : 1'b0;
+
+ end
+
+ // combine done
+ l2_mx_aw_done[i] = l2_m0_aw_done_SP[i] & l2_m1_aw_done_SP[i];
+
+ l2_aw_done[i] = l2_mx_aw_done[i];
+ end
+
+ always_ff @(posedge Clk_CI) begin : L2_MX_AW_DONE_REG
+ if (Rst_RBI == 0) begin
+ l2_m0_aw_done_SP[i] <= 1'b0;
+ l2_m1_aw_done_SP[i] <= 1'b0;
+ end else if (l2_mx_aw_done[i]) begin
+ l2_m0_aw_done_SP[i] <= 1'b0;
+ l2_m1_aw_done_SP[i] <= 1'b0;
+ end else begin
+ l2_m0_aw_done_SP[i] <= l2_m0_aw_done_SP[i] | l2_m0_aw_done[i];
+ l2_m1_aw_done_SP[i] <= l2_m1_aw_done_SP[i] | l2_m1_aw_done[i];
+ end
+ end
+
+ /*
+ * When accepting L1 transactions, we must just do so on the selected master. Drop requests must
+ * be performed on any one of the two masters. Save requests must be performed by both masters.
+ */
+ always_comb begin : AR_L1_SPLIT
+
+ // TLB handshake
+ l1_m0_ar_accept[i] = 1'b0;
+ l1_m1_ar_accept[i] = 1'b0;
+ l1_m0_ar_drop[i] = 1'b0;
+ l1_m1_ar_drop[i] = 1'b0;
+ l1_m0_ar_save[i] = 1'b0;
+ l1_m1_ar_save[i] = 1'b0;
+
+ l1_mx_ar_done[i] = 1'b0;
+
+ // AXI sender input handshake
+ int_m0_arvalid[i] = 1'b0;
+ int_m1_arvalid[i] = 1'b0;
+ int_arready[i] = 1'b0;
+
+ // accept on selected master only
+ if (l1_ar_accept[i]) begin
+ if (int_rmaster_select[i]) begin
+ l1_m1_ar_accept[i] = 1'b1;
+ l1_mx_ar_done[i] = l1_m1_ar_done[i];
+
+ int_m1_arvalid[i] = int_arvalid[i];
+ int_arready[i] = int_m1_arready[i];
+
+ end else begin
+ l1_m0_ar_accept[i] = 1'b1;
+ l1_mx_ar_done[i] = l1_m0_ar_done[i];
+
+ int_m0_arvalid[i] = int_arvalid[i];
+ int_arready[i] = int_m0_arready[i];
+ end
+
+ // drop on Master 0 only
+ end else if (l1_ar_drop[i]) begin
+ l1_m0_ar_drop[i] = 1'b1;
+ l1_mx_ar_done[i] = l1_m0_ar_done[i];
+
+ int_m0_arvalid[i] = int_arvalid[i];
+ int_arready[i] = l1_m0_ar_done[i];
+
+ // save on both masters
+ end else if (l1_ar_save[i]) begin
+ // split save
+ l1_m0_ar_save[i] = ~l1_m0_ar_done_SP[i];
+ l1_m1_ar_save[i] = ~l1_m1_ar_done_SP[i];
+
+ // combine done
+ l1_mx_ar_done[i] = l1_m0_ar_done_SP[i] & l1_m1_ar_done_SP[i];
+
+ int_m0_arvalid[i] = int_arvalid[i];
+ int_m1_arvalid[i] = int_arvalid[i];
+ int_arready[i] = l1_mx_ar_done[i];
+ end
+ end
+
+ // signal back to handshake splitter
+ assign l1_ar_done[i] = l1_mx_ar_done[i];
+
+ always_ff @(posedge Clk_CI) begin : L1_MX_AR_DONE_REG
+ if (Rst_RBI == 0) begin
+ l1_m0_ar_done_SP[i] <= 1'b0;
+ l1_m1_ar_done_SP[i] <= 1'b0;
+ end else if (l1_mx_ar_done[i]) begin
+ l1_m0_ar_done_SP[i] <= 1'b0;
+ l1_m1_ar_done_SP[i] <= 1'b0;
+ end else begin
+ l1_m0_ar_done_SP[i] <= l1_m0_ar_done_SP[i] | l1_m0_ar_done[i];
+ l1_m1_ar_done_SP[i] <= l1_m1_ar_done_SP[i] | l1_m1_ar_done[i];
+ end
+ end
+
+ /*
+ * When accepting L2 transactions, we must drop the corresponding transaction from the other
+ * master to make it available again for save requests from L1_DROP_SAVE.
+ */
+ always_comb begin : AR_L2_SPLIT
+
+ l2_m0_ar_accept[i] = 1'b0;
+ l2_m1_ar_accept[i] = 1'b0;
+ l2_m0_ar_drop[i] = 1'b0;
+ l2_m1_ar_drop[i] = 1'b0;
+
+ // de-assert request signals individually upon handshakes
+ if (l2_ar_accept[i]) begin
+ if (l2_master_select[i]) begin
+ l2_m1_ar_accept[i] = ~l2_m1_ar_done_SP[i];
+ l2_m0_ar_drop[i] = ~l2_m0_ar_done_SP[i];
+
+ end else begin
+ l2_m0_ar_accept[i] = ~l2_m0_ar_done_SP[i];
+ l2_m1_ar_drop[i] = ~l2_m1_ar_done_SP[i];
+
+ end
+ end else if (l2_ar_drop[i]) begin
+ l2_m0_ar_drop[i] = ~l2_m0_ar_done_SP[i] ? l2_ar_drop[i] : 1'b0;
+ l2_m1_ar_drop[i] = ~l2_m1_ar_done_SP[i] ? l2_ar_drop[i] : 1'b0;
+
+ end
+
+ // combine done
+ l2_mx_ar_done[i] = l2_m0_ar_done_SP[i] & l2_m1_ar_done_SP[i];
+
+ l2_ar_done[i] = l2_mx_ar_done[i];
+ end
+
+ always_ff @(posedge Clk_CI) begin : L2_MX_AR_DONE_REG
+ if (Rst_RBI == 0) begin
+ l2_m0_ar_done_SP[i] <= 1'b0;
+ l2_m1_ar_done_SP[i] <= 1'b0;
+ end else if (l2_mx_ar_done[i]) begin
+ l2_m0_ar_done_SP[i] <= 1'b0;
+ l2_m1_ar_done_SP[i] <= 1'b0;
+ end else begin
+ l2_m0_ar_done_SP[i] <= l2_m0_ar_done_SP[i] | l2_m0_ar_done[i];
+ l2_m1_ar_done_SP[i] <= l2_m1_ar_done_SP[i] | l2_m1_ar_done[i];
+ end
+ end
+
+ end // AX_SPLIT
+ endgenerate // AX_SPLIT
+
+ // }}}
+
+ // HANDSHAKE SPLITS {{{
+ // ██╗ ██╗███████╗ ███████╗██████╗ ██╗ ██╗████████╗
+ // ██║ ██║██╔════╝ ██╔════╝██╔══██╗██║ ██║╚══██╔══╝
+ // ███████║███████╗ ███████╗██████╔╝██║ ██║ ██║
+ // ██╔══██║╚════██║ ╚════██║██╔═══╝ ██║ ██║ ██║
+ // ██║ ██║███████║ ███████║██║ ███████╗██║ ██║
+ // ╚═╝ ╚═╝╚══════╝ ╚══════╝╚═╝ ╚══════╝╚═╝ ╚═╝
+ //
+ /*
+ * We need to perform combined handshakes with multiple AXI modules
+ * upon transactions drops, accepts, saves etc. from two TLBs.
+ */
+ generate for (i = 0; i < N_PORTS; i++) begin : HANDSHAKE_SPLIT
+
+ assign l1_xw_accept[i] = int_wtrans_accept[i] & ~aw_out_stall[i];
+ assign int_wtrans_sent[i] = l1_xw_done[i];
+
+ assign l1_ar_accept[i] = int_rtrans_accept[i];
+ assign int_rtrans_sent[i] = l1_ar_done[i];
+
+ /*
+ * L1 AW sender + W buffer handshake split
+ */
+ // forward
+ assign l1_aw_accept[i] = l1_xw_accept[i] & ~l1_aw_done_SP[i];
+ assign l1_w_accept[i] = l1_xw_accept[i] & ~l1_w_done_SP[i];
+
+ assign l1_aw_save[i] = l1_xw_save[i] & ~l1_aw_done_SP[i];
+ assign l1_w_save[i] = l1_xw_save[i] & ~l1_w_done_SP[i];
+
+ assign l1_aw_drop[i] = l1_xw_drop[i] & ~l1_aw_done_SP[i];
+ assign l1_w_drop[i] = l1_xw_drop[i] & ~l1_w_done_SP[i];
+
+ // backward
+ assign l1_xw_done[i] = l1_aw_done_SP[i] & l1_w_done_SP[i];
+
+ always_ff @(posedge Clk_CI) begin : L1_XW_HS_SPLIT
+ if (Rst_RBI == 0) begin
+ l1_aw_done_SP[i] <= 1'b0;
+ l1_w_done_SP[i] <= 1'b0;
+ end else if (l1_xw_done[i]) begin
+ l1_aw_done_SP[i] <= 1'b0;
+ l1_w_done_SP[i] <= 1'b0;
+ end else begin
+ l1_aw_done_SP[i] <= l1_aw_done_SP[i] | l1_aw_done[i];
+ l1_w_done_SP[i] <= l1_w_done_SP[i] | l1_w_done[i];
+ end
+ end
+
+ if (ENABLE_L2TLB[i] == 1) begin : L2_HS_SPLIT
+
+ /*
+ * L1 AR sender + R sender handshake split
+ *
+ * AR and R do not need to be strictly in sync. We thus use separate handshakes.
+ * But the handshake signals for the R sender are multiplexed with the those for
+ * the L2. However, L2_ACCEPT_DROP_SAVE has always higher priority.
+ */
+ assign lx_r_drop[i] = l2_r_drop[i] | l1_r_drop[i];
+ assign l1_r_done[i] = l2_r_drop[i] ? 1'b0 : lx_r_done[i];
+ assign l2_r_done[i] = l2_r_drop[i] ? lx_r_done[i] : 1'b0;
+
+ /*
+ * L2 AW sender + W buffer handshake split
+ */
+ // forward
+ assign l2_aw_accept[i] = l2_xw_accept[i] & ~l2_aw_done_SP[i];
+ assign l2_w_accept[i] = l2_xw_accept[i] & ~l2_w_done_SP[i];
+
+ assign l2_aw_drop[i] = l2_xw_drop[i] & ~l2_aw_done_SP[i];
+ assign l2_w_drop[i] = l2_xw_drop[i] & ~l2_w_done_SP[i];
+
+ // backward
+ assign l2_xw_done[i] = l2_aw_done_SP[i] & l2_w_done_SP[i];
+
+ always_ff @(posedge Clk_CI) begin : L2_XW_HS_SPLIT
+ if (Rst_RBI == 0) begin
+ l2_aw_done_SP[i] <= 1'b0;
+ l2_w_done_SP[i] <= 1'b0;
+ end else if (l2_xw_done[i]) begin
+ l2_aw_done_SP[i] <= 1'b0;
+ l2_w_done_SP[i] <= 1'b0;
+ end else begin
+ l2_aw_done_SP[i] <= l2_aw_done_SP[i] | l2_aw_done[i];
+ l2_w_done_SP[i] <= l2_w_done_SP[i] | l2_w_done[i];
+ end
+ end
+
+ /*
+ * L2 AR + R sender handshake split
+ */
+ // forward
+ assign l2_ar_drop[i] = l2_xr_drop[i] & ~l2_ar_done_SP[i];
+ assign l2_r_drop[i] = l2_xr_drop[i] & ~l2_r_done_SP[i];
+
+ // backward - make sure to always clear L2_XR_HS_SPLIT
+ always_comb begin
+ if (l2_xr_drop[i]) begin
+ l2_xr_done[i] = l2_ar_done_SP[i] & l2_r_done_SP[i];
+ end else begin
+ l2_xr_done[i] = l2_ar_done_SP[i];
+ end
+ end
+
+ always_ff @(posedge Clk_CI) begin : L2_XR_HS_SPLIT
+ if (Rst_RBI == 0) begin
+ l2_ar_done_SP[i] <= 1'b0;
+ l2_r_done_SP[i] <= 1'b0;
+ end else if (l2_xr_done[i]) begin
+ l2_ar_done_SP[i] <= 1'b0;
+ l2_r_done_SP[i] <= 1'b0;
+ end else begin
+ l2_ar_done_SP[i] <= l2_ar_done_SP[i] | l2_ar_done[i];
+ l2_r_done_SP[i] <= l2_r_done_SP[i] | l2_r_done[i];
+ end
+ end
+
+ end else begin // if (ENABLE_L2TLB[i] == 1)
+
+ assign lx_r_drop[i] = l1_r_drop[i];
+ assign l1_r_done[i] = lx_r_done[i];
+
+ assign l2_aw_accept[i] = 1'b0;
+ assign l2_w_accept[i] = 1'b0;
+ assign l2_aw_drop[i] = 1'b0;
+ assign l2_w_drop[i] = 1'b0;
+ assign l2_xw_done[i] = 1'b0;
+ assign l2_aw_done_SP[i] = 1'b0;
+ assign l2_w_done_SP[i] = 1'b0;
+
+ assign l2_ar_accept[i] = 1'b0;
+ assign l2_ar_drop[i] = 1'b0;
+ assign l2_r_drop[i] = 1'b0;
+ assign l2_xr_done[i] = 1'b0;
+ assign l2_r_done[i] = 1'b0;
+ assign l2_ar_done_SP[i] = 1'b0;
+ assign l2_r_done_SP[i] = 1'b0;
+
+ end // if (ENABLE_L2TLB[i] == 1)
+
+ end // HANDSHAKE_SPLIT
+ endgenerate // HANDSHAKE_SPLIT
+
+ // }}}
+
+ // L2 TLB {{{
+ // ██╗ ██████╗ ████████╗██╗ ██████╗
+ // ██║ ╚════██╗ ╚══██╔══╝██║ ██╔══██╗
+ // ██║ █████╔╝ ██║ ██║ ██████╔╝
+ // ██║ ██╔═══╝ ██║ ██║ ██╔══██╗
+ // ███████╗███████╗ ██║ ███████╗██████╔╝
+ // ╚══════╝╚══════╝ ╚═╝ ╚══════╝╚═════╝
+ //
+ /*
+ * l2_tlb
+ *
+ * The L2 TLB translates addresses upon misses in the L1 TLB (rab_core).
+ *
+ * It supports one ongoing translation at a time. If an L1 miss occurs while the L2 is busy,
+ * the L1 is stalled untill the L2 is available again.
+ *
+ */
+ generate for (i = 0; i < N_PORTS; i++) begin : L2_TLB
+ if (ENABLE_L2TLB[i] == 1) begin : L2_TLB
+
+ /*
+ * L1 output selector
+ */
+ assign L1OutRwType_D[i] = int_wtrans_drop[i] ? 1'b1 : 1'b0;
+ assign L1OutProt_D[i] = rab_prot[i];
+ assign L1OutMulti_D[i] = rab_multi[i];
+
+ /*
+ * L1 output control + L1_DROP_BUF, L2_IN_BUF management
+ *
+ * Forward the L1 drop request to AR/AW sender modules if
+ * 1. the transactions needs to be dropped (L1 multi, prot, prefetch), or
+ * 2. if a lookup in the L2 TLB is required (L1 miss) and the input buffer is not full.
+ *
+ * The AR/AW senders do not support more than 1 oustanding L1 miss. The push back towards
+ * the upstream is realized by not accepting the save request (saving the L1 transaction)
+ * in the senders as long as the L2 TLB is busy or has valid output. This ultimately
+ * blocks the L1 TLB.
+ *
+ * Together with the AW drop/save, we also perform the W drop/save as AW and W need to
+ * absolutely remain in order. In contrast, the R drop is performed
+ */
+ always_comb begin : L1_DROP_SAVE
+
+ l1_ar_drop[i] = 1'b0;
+ l1_ar_save[i] = 1'b0;
+ l1_xw_drop[i] = 1'b0;
+ l1_xw_save[i] = 1'b0;
+
+ l1_id_drop[i] = L1OutId_D[i];
+ l1_len_drop[i] = L1OutLen_D[i];
+ l1_prefetch_drop[i] = rab_prefetch[i];
+ l1_hit_drop[i] = 1'b1; // there are no drops for L1 misses
+
+ L1DropEn_S[i] = 1'b0;
+ L2InEn_S[i] = 1'b0;
+
+ if ( rab_prot[i] | rab_multi[i] | rab_prefetch[i] ) begin
+ // 1. Drop
+ l1_ar_drop[i] = int_rtrans_drop[i] & ~L1DropValid_SP[i];
+ l1_xw_drop[i] = int_wtrans_drop[i] & ~L1DropValid_SP[i];
+
+ // Store to L1_DROP_BUF upon handshake
+ L1DropEn_S[i] = (l1_ar_drop[i] & l1_ar_done[i]) |
+ (l1_xw_drop[i] & l1_xw_done[i]);
+
+ end else if ( rab_miss[i] ) begin
+ // 2. Save - Make sure L2 is really available.
+ l1_ar_save[i] = int_rtrans_drop[i] & ~L2Busy_S[i];
+ l1_xw_save[i] = int_wtrans_drop[i] & ~L2Busy_S[i];
+
+ // Store to L2_IN_BUF upon handshake - triggers the L2 TLB
+ L2InEn_S[i] = (l1_ar_save[i] & l1_ar_done[i]) |
+ (l1_xw_save[i] & l1_xw_done[i]);
+ end
+ end
+
+ /*
+ * L2 output control + L2_OUT_BUF management + R/B sender control + W buffer control
+ *
+ * Perform L1 R transaction drops unless the L2 output buffer holds valid data. The AXI specs
+ * require the B response to be sent only after consuming/discarding the corresponding data
+ * in the W channel. Thus, we only send L2 drop request to the W buffer here. The drop
+ * request to the B sender is then sent by the W buffer autonomously.
+ *
+ * L1 AW/W drop requests are managed by L1_DROP_SAVE.
+ */
+ always_comb begin : L2_ACCEPT_DROP_SAVE
+
+ l2_ar_addr[i] = 'b0;
+ l2_aw_addr[i] = 'b0;
+ l2_ar_accept[i] = 1'b0;
+ l2_xr_drop[i] = 1'b0;
+ l2_xw_accept[i] = 1'b0;
+ l2_xw_drop[i] = 1'b0;
+
+ l1_r_drop[i] = 1'b0;
+
+ lx_id_drop[i] = 'b0;
+ lx_len_drop[i] = 'b0;
+ lx_prefetch_drop[i] = 1'b0;
+ lx_hit_drop[i] = 1'b0;
+
+ L1DropValid_SN[i] = L1DropValid_SP[i] | L1DropEn_S[i];
+ L2OutValid_SN[i] = L2OutValid_SP[i];
+ L2OutReady_S[i] = 1'b0;
+ L2OutEn_S[i] = 1'b0;
+
+ L2Miss_S[i] = 1'b0;
+ int_multi[i] = 1'b0;
+ int_prot[i] = 1'b0;
+
+ if (L2OutValid_SP[i] == 1'b0) begin
+
+ // Drop L1 from R senders
+ if (L1DropValid_SP[i] == 1'b1) begin
+
+ // Only perform the R sender drop here.
+ if (~L1DropRwType_DP[i]) begin
+
+ l1_r_drop[i] = 1'b1;
+ lx_id_drop[i] = L1DropId_DP[i];
+ lx_len_drop[i] = L1DropLen_DP[i];
+ lx_prefetch_drop[i] = L1DropPrefetch_S[i];
+ lx_hit_drop[i] = 1'b1; // there are no drops for L1 misses
+
+ // Invalidate L1_DROP_BUF upon handshake
+ if ( l1_r_drop[i] & l1_r_done[i] ) begin
+
+ L1DropValid_SN[i] = 1'b0;
+ int_prot[i] = L1DropProt_DP[i];
+ int_multi[i] = L1DropMulti_DP[i];
+ end
+
+ end else begin
+ // Invalidate L1_DROP_BUF
+ L1DropValid_SN[i] = 1'b0;
+ int_prot[i] = L1DropProt_DP[i];
+ int_multi[i] = L1DropMulti_DP[i];
+ end
+ end
+
+ end else begin // L2_OUT_BUF has valid data
+
+ if ( L2OutHit_SP[i] & ~(L2OutPrefetch_S[i] | L2OutProt_SP[i] | L2OutMulti_SP[i]) ) begin
+
+ l2_ar_addr[i] = L2OutAddr_DP[i];
+ l2_aw_addr[i] = L2OutAddr_DP[i];
+
+ l2_ar_accept[i] = L2OutRwType_DP[i] ? 1'b0 : 1'b1;
+ l2_xw_accept[i] = L2OutRwType_DP[i] ? 1'b1 : 1'b0;
+
+ // Invalidate L2_OUT_BUF upon handshake
+ L2OutValid_SN[i] = ~( (l2_ar_accept[i] & l2_ar_done[i]) |
+ (l2_xw_accept[i] & l2_xw_done[i]) );
+ end else begin
+
+ lx_id_drop[i] = L2OutId_DP[i];
+ lx_len_drop[i] = L2OutLen_DP[i];
+ lx_prefetch_drop[i] = L2OutPrefetch_S[i];
+ lx_hit_drop[i] = L2OutHit_SP[i];
+
+ // The l2_xr_drop will also perform the handshake with the R sender
+ l2_xr_drop[i] = L2OutRwType_DP[i] ? 1'b0 : 1'b1;
+ l2_xw_drop[i] = L2OutRwType_DP[i] ? 1'b1 : 1'b0;
+
+ // Invalidate L1_DROP_BUF upon handshake
+ if ( (l2_xr_drop[i] & l2_xr_done[i]) | (l2_xw_drop[i] & l2_xw_done[i]) ) begin
+
+ L2OutValid_SN[i] = 1'b0;
+ L2Miss_S[i] = ~L2OutHit_SP[i];
+ int_prot[i] = L2OutProt_SP[i];
+ int_multi[i] = L2OutMulti_SP[i];
+ end
+ end
+ end
+
+ // Only accept new L2 output after ongoing drops have finished.
+ if ( (l2_xr_drop[i] == l2_xr_done[i]) &
+ (l2_xw_drop[i] == l2_xw_done[i]) &
+ (l1_r_drop[i] == l1_r_done[i] ) ) begin
+ // Store to L2_OUT_BUF upon handshake with L2 TLB module
+ if ( (L2OutValid_SP[i] == 1'b0) && (L2OutValid_S[i] == 1'b1) ) begin
+ L2OutValid_SN[i] = 1'b1;
+ L2OutReady_S[i] = 1'b1;
+ L2OutEn_S[i] = 1'b1;
+ end
+ end
+ end
+
+ /*
+ * L1 drop buffer
+ *
+ * Used in case of multi, prot and prefetch hits in the L1 TLB.
+ */
+ always_ff @(posedge Clk_CI) begin : L1_DROP_BUF
+ if (Rst_RBI == 0) begin
+ L1DropProt_DP[i] <= 1'b0;
+ L1DropMulti_DP[i] <= 1'b0;
+ L1DropRwType_DP[i] <= 1'b0;
+ L1DropUser_DP[i] <= 'b0;
+ L1DropId_DP[i] <= 'b0;
+ L1DropLen_DP[i] <= 'b0;
+ L1DropAddr_DP[i] <= 'b0;
+ end else if (L1DropEn_S[i] == 1'b1) begin
+ L1DropProt_DP[i] <= L1OutProt_D[i] ;
+ L1DropMulti_DP[i] <= L1OutMulti_D[i] ;
+ L1DropRwType_DP[i] <= L1OutRwType_D[i];
+ L1DropUser_DP[i] <= L1OutUser_D[i] ;
+ L1DropId_DP[i] <= L1OutId_D[i] ;
+ L1DropLen_DP[i] <= L1OutLen_D[i] ;
+ L1DropAddr_DP[i] <= L1OutAddr_D[i] ;
+ end
+ end // always_ff @ (posedge Clk_CI)
+
+ /*
+ * L2 input buffer
+ *
+ * Make sure there are no combinational paths between L1 TLB/inputs and L2 TLB.
+ */
+ always_ff @(posedge Clk_CI) begin : L2_IN_BUF
+ if (Rst_RBI == 0) begin
+ L2InRwType_DP[i] <= 1'b0;
+ L2InUser_DP[i] <= 'b0;
+ L2InId_DP[i] <= 'b0;
+ L2InLen_DP[i] <= 'b0;
+ L2InAddr_DP[i] <= 'b0;
+ end else if (L2InEn_S[i] == 1'b1) begin
+ L2InRwType_DP[i] <= L1OutRwType_D[i];
+ L2InUser_DP[i] <= L1OutUser_D[i] ;
+ L2InId_DP[i] <= L1OutId_D[i] ;
+ L2InLen_DP[i] <= L1OutLen_D[i] ;
+ L2InAddr_DP[i] <= L1OutAddr_D[i] ;
+ end
+ end // always_ff @ (posedge Clk_CI)
+
+ l2_tlb
+ #(
+ .AXI_S_ADDR_WIDTH ( AXI_S_ADDR_WIDTH ),
+ .AXI_M_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ),
+ .AXI_LITE_DATA_WIDTH ( AXI_LITE_DATA_WIDTH ),
+ .AXI_LITE_ADDR_WIDTH ( AXI_LITE_ADDR_WIDTH ),
+ .N_SETS ( `RAB_L2_N_SETS ),
+ .N_OFFSETS ( `RAB_L2_N_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS ),
+ .N_PAR_VA_RAMS ( `RAB_L2_N_PAR_VA_RAMS ),
+ .HIT_OFFSET_STORE_WIDTH ( log2(`RAB_L2_N_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS) )
+ )
+ u_l2_tlb
+ (
+ .clk_i ( Clk_CI ),
+ .rst_ni ( Rst_RBI ),
+
+ // Config inputs
+ .we_i ( L2CfgWE_S[i] ),
+ .waddr_i ( L2CfgWAddr_D[i] ),
+ .wdata_i ( L2CfgWData_D[i] ),
+
+ // Request input
+ .start_i ( L2InEn_S[i] ),
+ .busy_o ( L2Busy_S[i] ),
+ .rw_type_i ( L2InRwType_DP[i] ),
+ .in_addr_i ( L2InAddr_DP[i] ),
+
+ // Response output
+ .out_ready_i ( L2OutReady_S[i] ),
+ .out_valid_o ( L2OutValid_S[i] ),
+ .hit_o ( L2OutHit_SN[i] ),
+ .miss_o ( L2OutMiss_SN[i] ),
+ .prot_o ( L2OutProt_SN[i] ),
+ .multi_o ( L2OutMulti_SN[i] ),
+ .cache_coherent_o ( L2OutCC_SN[i] ),
+ .out_addr_o ( L2OutAddr_DN[i] )
+ );
+
+ /*
+ * L2 output buffer
+ *
+ * Make sure there are no combinational paths between L1 TLB/inputs and L2 TLB.
+ */
+ always_ff @(posedge Clk_CI) begin : L2_OUT_BUF
+ if (Rst_RBI == 0) begin
+ L2OutRwType_DP[i] <= 1'b0;
+ L2OutUser_DP[i] <= 'b0;
+ L2OutLen_DP[i] <= 'b0;
+ L2OutId_DP[i] <= 'b0;
+ L2OutInAddr_DP[i] <= 'b0;
+
+ L2OutHit_SP[i] <= 1'b0;
+ L2OutMiss_SP[i] <= 1'b0;
+ L2OutProt_SP[i] <= 1'b0;
+ L2OutMulti_SP[i] <= 1'b0;
+ L2OutCC_SP[i] <= 1'b0;
+ L2OutAddr_DP[i] <= 'b0;
+ end else if (L2OutEn_S[i] == 1'b1) begin
+ L2OutRwType_DP[i] <= L2InRwType_DP[i];
+ L2OutUser_DP[i] <= L2InUser_DP[i] ;
+ L2OutLen_DP[i] <= L2InLen_DP[i] ;
+ L2OutId_DP[i] <= L2InId_DP[i] ;
+ L2OutInAddr_DP[i] <= L2InAddr_DP[i] ;
+
+ L2OutHit_SP[i] <= L2OutHit_SN[i] ;
+ L2OutMiss_SP[i] <= L2OutMiss_SN[i] ;
+ L2OutProt_SP[i] <= L2OutProt_SN[i] ;
+ L2OutMulti_SP[i] <= L2OutMulti_SN[i];
+ L2OutCC_SP[i] <= L2OutCC_SN[i] ;
+ L2OutAddr_DP[i] <= L2OutAddr_DN[i] ;
+ end
+ end // always_ff @ (posedge Clk_CI)
+
+ always_ff @(posedge Clk_CI) begin : BUF_VALID
+ if (Rst_RBI == 0) begin
+ L1DropValid_SP[i] = 1'b0;
+ L2OutValid_SP[i] = 1'b0;
+ end else begin
+ L1DropValid_SP[i] = L1DropValid_SN[i];
+ L2OutValid_SP[i] = L2OutValid_SN[i];
+ end
+ end
+
+ always_comb begin : BUF_TO_PREFETCH
+ // L1 Drop Buf
+ if (L1DropUser_DP[i] == {AXI_USER_WIDTH{1'b1}})
+ L1DropPrefetch_S[i] = 1'b1;
+ else
+ L1DropPrefetch_S[i] = 1'b0;
+
+ // L2 Out Buf
+ if (L2OutUser_DP[i] == {AXI_USER_WIDTH{1'b1}})
+ L2OutPrefetch_S[i] = 1'b1;
+ else
+ L2OutPrefetch_S[i] = 1'b0;
+ end
+
+ assign l2_cache_coherent[i] = L2OutCC_SP[i];
+ assign int_miss[i] = L2Miss_S[i];
+
+ end else begin : L2_TLB_STUB // if (ENABLE_L2TLB[i] == 1)
+
+ assign l1_ar_drop[i] = int_rtrans_drop[i];
+ assign l1_r_drop[i] = int_rtrans_drop[i];
+ assign l1_xw_drop[i] = int_wtrans_drop[i];
+
+ assign l1_ar_save[i] = 1'b0;
+ assign l1_xw_save[i] = 1'b0;
+ assign l2_xw_accept[i] = 1'b0;
+ assign l2_xr_drop[i] = 1'b0;
+ assign l2_xw_drop[i] = 1'b0;
+
+ assign l2_ar_addr[i] = 'b0;
+ assign l2_aw_addr[i] = 'b0;
+
+ assign l1_id_drop[i] = int_wtrans_drop[i] ? int_awid[i] :
+ int_rtrans_drop[i] ? int_arid[i] :
+ '0;
+ assign l1_len_drop[i] = int_wtrans_drop[i] ? int_awlen[i] :
+ int_rtrans_drop[i] ? int_arlen[i] :
+ '0;
+ assign l1_prefetch_drop[i] = rab_prefetch[i];
+ assign l1_hit_drop[i] = ~rab_miss[i];
+
+ assign lx_id_drop[i] = int_wtrans_drop[i] ? int_awid[i] :
+ int_rtrans_drop[i] ? int_arid[i] :
+ '0;
+ assign lx_len_drop[i] = int_wtrans_drop[i] ? int_awlen[i] :
+ int_rtrans_drop[i] ? int_arlen[i] :
+ '0;
+ assign lx_prefetch_drop[i] = rab_prefetch[i];
+ assign lx_hit_drop[i] = ~rab_miss[i];
+
+ assign l2_cache_coherent[i] = 1'b0;
+
+ assign int_miss[i] = rab_miss[i];
+ assign int_prot[i] = rab_prot[i];
+ assign int_multi[i] = rab_multi[i];
+
+ // unused signals
+ assign L2Miss_S[i] = 1'b0;
+
+ assign L1OutRwType_D[i] = 1'b0;
+ assign L1OutProt_D[i] = 1'b0;
+ assign L1OutMulti_D[i] = 1'b0;
+
+ assign L1DropRwType_DP[i] = 1'b0;
+ assign L1DropUser_DP[i] = 'b0;
+ assign L1DropId_DP[i] = 'b0;
+ assign L1DropLen_DP[i] = 'b0;
+ assign L1DropAddr_DP[i] = 'b0;
+ assign L1DropProt_DP[i] = 1'b0;
+ assign L1DropMulti_DP[i] = 1'b0;
+
+ assign L1DropEn_S[i] = 1'b0;
+ assign L1DropPrefetch_S[i] = 1'b0;
+ assign L1DropValid_SN[i] = 1'b0;
+ assign L1DropValid_SP[i] = 1'b0;
+
+ assign L2InRwType_DP[i] = 1'b0;
+ assign L2InUser_DP[i] = 'b0;
+ assign L2InId_DP[i] = 'b0;
+ assign L2InLen_DP[i] = 'b0;
+ assign L2InAddr_DP[i] = 'b0;
+
+ assign L2InEn_S[i] = 1'b0;
+
+ assign L2OutHit_SN[i] = 1'b0;
+ assign L2OutMiss_SN[i] = 1'b0;
+ assign L2OutProt_SN[i] = 1'b0;
+ assign L2OutMulti_SN[i] = 1'b0;
+ assign L2OutCC_SN[i] = 1'b0;
+ assign L2OutAddr_DN[i] = 'b0;
+
+ assign L2OutRwType_DP[i] = 1'b0;
+ assign L2OutUser_DP[i] = 'b0;
+ assign L2OutId_DP[i] = 'b0;
+ assign L2OutLen_DP[i] = 'b0;
+ assign L2OutInAddr_DP[i] = 'b0;
+ assign L2OutHit_SP[i] = 1'b0;
+ assign L2OutMiss_SP[i] = 1'b0;
+ assign L2OutProt_SP[i] = 1'b0;
+ assign L2OutMulti_SP[i] = 1'b0;
+ assign L2OutCC_SP[i] = 1'b0;
+ assign L2OutAddr_DP[i] = 'b0;
+
+ assign L2OutEn_S[i] = 1'b0;
+ assign L2OutPrefetch_S[i] = 1'b0;
+ assign L2Busy_S[i] = 1'b0;
+ assign L2OutValid_S[i] = 1'b0;
+ assign L2OutValid_SN[i] = 1'b0;
+ assign L2OutValid_SP[i] = 1'b0;
+ assign L2OutReady_S[i] = 1'b0;
+
+ end // !`ifdef ENABLE_L2TLB
+ end // for (i = 0; i < N_PORTS; i++)
+ endgenerate
+
+// }}}
+"""
+# endmodule
+#
+#
+# // vim: ts=2 sw=2 sts=2 et nosmartindent autoindent foldmethod=marker
+#
+#
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class check_ram(Elaboratable):
+
+ def __init__(self):
+ self.clk_i = Signal() # input
+ self.rst_ni = Signal() # input
+ self.in_addr = Signal(ADDR_WIDTH) # input
+ self.rw_type = Signal() # input
+ self.ram_we = Signal() # input
+ self.port0_addr = Signal(1+ERROR p_expression_25) # input
+ self.port1_addr = Signal(1+ERROR p_expression_25) # input
+ self.ram_wdata = Signal(RAM_DATA_WIDTH) # input
+ self.output_sent = Signal() # input
+ self.output_valid = Signal() # input
+ self.offset_addr_d = Signal(OFFSET_WIDTH) # input
+ self.hit_addr = Signal(1+ERROR p_expression_25) # output
+ self.master = Signal() # output
+ self.hit = Signal() # output
+ self.multi_hit = Signal() # output
+ self.prot = Signal() # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# //import CfMath::log2;
+#
+# //`define MULTI_HIT_FULL_SET
+#
+# module check_ram
+# //#(
+# // parameter ADDR_WIDTH = 32,
+# // parameter RAM_DATA_WIDTH = 32,
+# // parameter PAGE_SIZE = 4096, // 4kB
+# // parameter SET_WIDTH = 5,
+# // parameter OFFSET_WIDTH = 4
+# // )
+# (
+# input logic clk_i,
+# input logic rst_ni,
+# input logic [ADDR_WIDTH-1:0] in_addr,
+# input logic rw_type, // 1 => write, 0=> read
+# input logic ram_we,
+# input logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr,
+# input logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port1_addr,
+# input logic [RAM_DATA_WIDTH-1:0] ram_wdata,
+# input logic output_sent,
+# input logic output_valid,
+# input logic [OFFSET_WIDTH-1:0] offset_addr_d,
+# output logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr,
+# output logic master,
+# output logic hit,
+# output logic multi_hit,
+# output logic prot
+# );
+#
+""" #docstring_begin
+
+ localparam IGNORE_LSB = log2(PAGE_SIZE); // 12
+
+ logic [RAM_DATA_WIDTH-1:0] port0_data_o, port1_data_o; // RAM read data outputs
+ logic port0_hit, port1_hit; // Ram output matches in_addr
+
+ logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr_saved, port1_addr_saved;
+
+ // Hit FSM Signals
+ typedef enum logic {SEARCH, HIT} hit_state_t;
+ hit_state_t hit_SP; // Hit FSM state
+ hit_state_t hit_SN; // Hit FSM next state
+
+ // Multi Hit FSM signals
+`ifdef MULTI_HIT_FULL_SET
+ typedef enum logic[1:0] {NO_HITS, ONE_HIT, MULTI_HIT} multi_state_t;
+ multi_state_t multi_SP; // Multi Hit FSM state
+ multi_state_t multi_SN; // Multi Hit FSM next state
+
+ logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr_saved;
+ logic master_saved;
+`endif
+
+ //// --------------- Block RAM (Dual Port) -------------- ////
+
+ // The outputs of the BRAMs are only valid if in the previous cycle:
+ // 1. the inputs were valid, and
+ // 2. the BRAM was not written to.
+ // Otherwise, the outputs must be ignored which is controlled by the output_valid signal.
+ // This signal is driven by the uppler level L2 TLB module.
+ ram_tp_no_change #(
+ .ADDR_WIDTH( SET_WIDTH+OFFSET_WIDTH+1 ),
+ .DATA_WIDTH( RAM_DATA_WIDTH )
+ )
+ ram_tp_no_change_0
+ (
+ .clk ( clk_i ),
+ .we ( ram_we ),
+ .addr0 ( port0_addr ),
+ .addr1 ( port1_addr ),
+ .d_i ( ram_wdata ),
+ .d0_o ( port0_data_o ),
+ .d1_o ( port1_data_o )
+ );
+
+ //// Check Ram Outputs
+ assign port0_hit = (port0_data_o[0] == 1'b1) && (in_addr[ADDR_WIDTH-1: IGNORE_LSB] == port0_data_o[RAM_DATA_WIDTH-1:4]);
+ assign port1_hit = (port1_data_o[0] == 1'b1) && (in_addr[ADDR_WIDTH-1: IGNORE_LSB] == port1_data_o[RAM_DATA_WIDTH-1:4]);
+ //// ----------------------------------------------------- /////
+
+ //// ------------------- Check if Hit ------------------------ ////
+ // FSM
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ hit_SP <= SEARCH;
+ end else begin
+ hit_SP <= hit_SN;
+ end
+ end
+
+ always_ff @(posedge clk_i, negedge rst_ni) begin
+ if (!rst_ni) begin
+ port0_addr_saved <= '0;
+ port1_addr_saved <= '0;
+ end else begin
+ port0_addr_saved <= port0_addr;
+ port1_addr_saved <= port1_addr;
+ end
+ end
+
+ always_comb begin
+ hit_SN = hit_SP;
+ hit = 1'b0;
+ hit_addr = 0;
+ master = 1'b0;
+ unique case(hit_SP)
+ SEARCH :
+ if (output_valid)
+ if (port0_hit || port1_hit) begin
+ hit_SN = HIT;
+ hit = 1'b1;
+ hit_addr = port0_hit ? {port0_addr_saved[SET_WIDTH+OFFSET_WIDTH:OFFSET_WIDTH], offset_addr_d} :
+ port1_hit ? {port1_addr_saved[SET_WIDTH+OFFSET_WIDTH:OFFSET_WIDTH], offset_addr_d} :
+ 0;
+ master = port0_hit ? port0_data_o[3] :
+ port1_hit ? port1_data_o[3] :
+ 1'b0;
+ end
+
+ HIT : begin
+`ifdef MULTI_HIT_FULL_SET // Since the search continues after the first hit, it needs to be saved to be accessed later.
+ hit = 1'b1;
+ hit_addr = hit_addr_saved;
+ master = master_saved;
+`endif
+ if (output_sent)
+ hit_SN = SEARCH;
+ end
+
+ default : begin
+ hit_SN = SEARCH;
+ end
+ endcase // case (hit_SP)
+ end // always_comb begin
+
+ //// ------------------------------------------- ////
+
+ assign prot = output_valid && port0_hit ? ((~port0_data_o[2] && rw_type) || (~port0_data_o[1] && ~rw_type)) :
+ output_valid && port1_hit ? ((~port1_data_o[2] && rw_type) || (~port1_data_o[1] && ~rw_type)) :
+ 1'b0;
+
+ //// ------------------- Multi ------------------- ////
+`ifdef MULTI_HIT_FULL_SET
+
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ hit_addr_saved <= 0;
+ master_saved <= 1'b0;
+ end else if (output_valid) begin
+ hit_addr_saved <= hit_addr;
+ master_saved <= master;
+ end
+ end
+
+ // FSM
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ multi_SP <= NO_HITS;
+ end else begin
+ multi_SP <= multi_SN;
+ end
+ end
+
+ always_comb begin
+ multi_SN = multi_SP;
+ multi_hit = 1'b0;
+ unique case(multi_SP)
+ NO_HITS :
+ if(output_valid && (port0_hit && port1_hit)) begin
+ multi_SN = MULTI_HIT;
+ multi_hit = 1'b1;
+ end else if(output_valid && (port0_hit || port1_hit))
+ multi_SN = ONE_HIT;
+
+ ONE_HIT :
+ if(output_valid && (port0_hit || port1_hit)) begin
+ multi_SN = MULTI_HIT;
+ multi_hit = 1'b1;
+ end else if (output_sent)
+ multi_SN = NO_HITS;
+
+ MULTI_HIT : begin
+ multi_hit = 1'b1;
+ if (output_sent)
+ multi_SN = NO_HITS;
+ end
+
+ endcase // case (multi_SP)
+ end // always_comb begin
+
+`else // !`ifdef MULTI_HIT_FULL_SET
+ assign multi_hit = output_valid && port0_hit && port1_hit;
+`endif // !`ifdef MULTI_HIT_FULL_SET
+ //// ------------------------------------------- ////
+"""
+# endmodule
+#
+#
--- /dev/null
+class CoreConfig:
+ def __init__(self):
+ self.N_SLICES = 16
+ self.N_REGS = 4*self.N_SLICES
+ self.ADDR_WIDTH_PHYS = 40
+ self.ADDR_WIDTH_VIRT = 32
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class fsm(Elaboratable):
+
+ def __init__(self):
+ self.Clk_CI = Signal() # input
+ self.Rst_RBI = Signal() # input
+ self.port1_addr_valid_i = Signal() # input
+ self.port2_addr_valid_i = Signal() # input
+ self.port1_sent_i = Signal() # input
+ self.port2_sent_i = Signal() # input
+ self.select_i = Signal() # input
+ self.no_hit_i = Signal() # input
+ self.multi_hit_i = Signal() # input
+ self.no_prot_i = Signal() # input
+ self.prefetch_i = Signal() # input
+ self.out_addr_i = Signal(AXI_M_ADDR_WIDTH) # input
+ self.cache_coherent_i = Signal() # input
+ self.port1_accept_o = Signal() # output
+ self.port1_drop_o = Signal() # output
+ self.port1_miss_o = Signal() # output
+ self.port2_accept_o = Signal() # output
+ self.port2_drop_o = Signal() # output
+ self.port2_miss_o = Signal() # output
+ self.out_addr_o = Signal(AXI_M_ADDR_WIDTH) # output
+ self.cache_coherent_o = Signal() # output
+ self.miss_o = Signal() # output
+ self.multi_o = Signal() # output
+ self.prot_o = Signal() # output
+ self.prefetch_o = Signal() # output
+ self.in_addr_i = Signal(AXI_S_ADDR_WIDTH) # input
+ self.in_id_i = Signal(AXI_ID_WIDTH) # input
+ self.in_len_i = Signal(8) # input
+ self.in_user_i = Signal(AXI_USER_WIDTH) # input
+ self.in_addr_o = Signal(AXI_S_ADDR_WIDTH) # output
+ self.in_id_o = Signal(AXI_ID_WIDTH) # output
+ self.in_len_o = Signal(8) # output
+ self.in_user_o = Signal(AXI_USER_WIDTH) # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# //`timescale 1ns / 1ps
+#
+# module fsm
+# #(
+# parameter AXI_M_ADDR_WIDTH = 40,
+# parameter AXI_S_ADDR_WIDTH = 32,
+# parameter AXI_ID_WIDTH = 8,
+# parameter AXI_USER_WIDTH = 6
+# )
+# (
+# input logic Clk_CI,
+# input logic Rst_RBI,
+#
+# input logic port1_addr_valid_i,
+# input logic port2_addr_valid_i,
+# input logic port1_sent_i,
+# input logic port2_sent_i,
+# input logic select_i,
+# input logic no_hit_i,
+# input logic multi_hit_i,
+# input logic no_prot_i,
+# input logic prefetch_i,
+# input logic [AXI_M_ADDR_WIDTH-1:0] out_addr_i,
+# input logic cache_coherent_i,
+# output logic port1_accept_o,
+# output logic port1_drop_o,
+# output logic port1_miss_o,
+# output logic port2_accept_o,
+# output logic port2_drop_o,
+# output logic port2_miss_o,
+# output logic [AXI_M_ADDR_WIDTH-1:0] out_addr_o,
+# output logic cache_coherent_o,
+# output logic miss_o,
+# output logic multi_o,
+# output logic prot_o,
+# output logic prefetch_o,
+# input logic [AXI_S_ADDR_WIDTH-1:0] in_addr_i,
+# input logic [AXI_ID_WIDTH-1:0] in_id_i,
+# input logic [7:0] in_len_i,
+# input logic [AXI_USER_WIDTH-1:0] in_user_i,
+# output logic [AXI_S_ADDR_WIDTH-1:0] in_addr_o,
+# output logic [AXI_ID_WIDTH-1:0] in_id_o,
+# output logic [7:0] in_len_o,
+# output logic [AXI_USER_WIDTH-1:0] in_user_o
+# );
+#
+""" #docstring_begin
+
+ //-------------Internal Signals----------------------
+
+ typedef enum logic {IDLE, WAIT} state_t;
+ logic state_SP; // Present state
+ logic state_SN; // Next State
+
+ logic port1_accept_SN;
+ logic port1_drop_SN;
+ logic port1_miss_SN;
+ logic port2_accept_SN;
+ logic port2_drop_SN;
+ logic port2_miss_SN;
+ logic miss_SN;
+ logic multi_SN;
+ logic prot_SN;
+ logic prefetch_SN;
+ logic cache_coherent_SN;
+ logic [AXI_M_ADDR_WIDTH-1:0] out_addr_DN;
+
+ logic out_reg_en_S;
+
+ //----------FSM comb------------------------------
+
+ always_comb begin: FSM_COMBO
+ state_SN = state_SP;
+
+ port1_accept_SN = 1'b0;
+ port1_drop_SN = 1'b0;
+ port1_miss_SN = 1'b0;
+ port2_accept_SN = 1'b0;
+ port2_drop_SN = 1'b0;
+ port2_miss_SN = 1'b0;
+ miss_SN = 1'b0;
+ multi_SN = 1'b0;
+ prot_SN = 1'b0;
+ prefetch_SN = 1'b0;
+ cache_coherent_SN = 1'b0;
+ out_addr_DN = '0;
+
+ out_reg_en_S = 1'b0; // by default hold register output
+
+ unique case(state_SP)
+ IDLE :
+ if ( (port1_addr_valid_i & select_i) | (port2_addr_valid_i & ~select_i) ) begin
+ out_reg_en_S = 1'b1;
+ state_SN = WAIT;
+
+ // Select inputs for output registers
+ if (port1_addr_valid_i & select_i) begin
+ port1_accept_SN = ~(no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
+ port1_drop_SN = (no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
+ port1_miss_SN = no_hit_i;
+ port2_accept_SN = 1'b0;
+ port2_drop_SN = 1'b0;
+ port2_miss_SN = 1'b0;
+ end else if (port2_addr_valid_i & ~select_i) begin
+ port1_accept_SN = 1'b0;
+ port1_drop_SN = 1'b0;
+ port1_miss_SN = 1'b0;
+ port2_accept_SN = ~(no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
+ port2_drop_SN = (no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
+ port2_miss_SN = no_hit_i;
+ end
+
+ miss_SN = port1_miss_SN | port2_miss_SN;
+ multi_SN = multi_hit_i;
+ prot_SN = ~no_prot_i;
+ prefetch_SN = ~no_hit_i & prefetch_i;
+
+ cache_coherent_SN = cache_coherent_i;
+ out_addr_DN = out_addr_i;
+ end
+
+ WAIT :
+ if ( port1_sent_i | port2_sent_i ) begin
+ out_reg_en_S = 1'b1; // "clear" the register
+ state_SN = IDLE;
+ end
+
+ default : begin
+ state_SN = IDLE;
+ end
+ endcase
+ end
+
+ //----------FSM seq-------------------------------
+
+ always_ff @(posedge Clk_CI, negedge Rst_RBI) begin: FSM_SEQ
+ if (Rst_RBI == 1'b0)
+ state_SP <= IDLE;
+ else
+ state_SP <= state_SN;
+ end
+
+ //----------Output seq--------------------------
+
+ always_ff @(posedge Clk_CI, negedge Rst_RBI) begin: OUTPUT_SEQ
+ if (Rst_RBI == 1'b0) begin
+ port1_accept_o = 1'b0;
+ port1_drop_o = 1'b0;
+ port1_miss_o = 1'b0;
+ port2_accept_o = 1'b0;
+ port2_drop_o = 1'b0;
+ port2_miss_o = 1'b0;
+ miss_o = 1'b0;
+ multi_o = 1'b0;
+ prot_o = 1'b0;
+ prefetch_o = 1'b0;
+ cache_coherent_o = 1'b0;
+ out_addr_o = '0;
+ in_addr_o = '0;
+ in_id_o = '0;
+ in_len_o = '0;
+ in_user_o = '0;
+ end else if (out_reg_en_S == 1'b1) begin
+ port1_accept_o = port1_accept_SN;
+ port1_drop_o = port1_drop_SN;
+ port1_miss_o = port1_miss_SN;
+ port2_accept_o = port2_accept_SN;
+ port2_drop_o = port2_drop_SN;
+ port2_miss_o = port2_miss_SN;
+ miss_o = miss_SN;
+ multi_o = multi_SN;
+ prot_o = prot_SN;
+ prefetch_o = prefetch_SN;
+ cache_coherent_o = cache_coherent_SN;
+ out_addr_o = out_addr_DN;
+ in_addr_o = in_addr_i;
+ in_id_o = in_id_i;
+ in_len_o = in_len_i;
+ in_user_o = in_user_i;
+ end
+ end // block: OUTPUT_SEQ
+"""
+#
+# endmodule
+#
+#
--- /dev/null
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class l2_tlb(Elaboratable):
+
+ def __init__(self):
+ self.clk_i = Signal() # input
+ self.rst_ni = Signal() # input
+ self.we_i = Signal() # input
+ self.waddr_i = Signal(AXI_LITE_ADDR_WIDTH) # input
+ self.wdata_i = Signal(AXI_LITE_DATA_WIDTH) # input
+ self.start_i = Signal() # input
+ self.busy_o = Signal() # output
+ self.in_addr_i = Signal(AXI_S_ADDR_WIDTH) # input
+ self.rw_type_i = Signal() # input
+ self.out_ready_i = Signal() # input
+ self.out_valid_o = Signal() # output
+ self.hit_o = Signal() # output
+ self.miss_o = Signal() # output
+ self.prot_o = Signal() # output
+ self.multi_o = Signal() # output
+ self.cache_coherent_o = Signal() # output
+ self.out_addr_o = Signal(AXI_M_ADDR_WIDTH) # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+
+
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# //`include "pulp_soc_defines.sv"
+#
+# ////import CfMath::log2;
+#
+# //`define MULTI_HIT_FULL_SET // Enable full multi hit detection. Always the entire set is searched.
+# //`define MULTI_HIT_CUR_CYCLE // Enable partial multi hit detection. Only multi hits in the same search cycle are detected.
+#
+# //`ifdef MULTI_HIT_FULL_SET
+# // `ifndef MULTI_HIT_CUR_CYCLE
+# // `define MULTI_HIT_CUR_CYCLE
+# // `endif
+# //`endif
+#
+# module l2_tlb
+# //#(
+# // parameter AXI_S_ADDR_WIDTH = 32,
+# // parameter AXI_M_ADDR_WIDTH = 40,
+# // parameter AXI_LITE_DATA_WIDTH = 64,
+# // parameter AXI_LITE_ADDR_WIDTH = 32,
+# // parameter N_SETS = 32,
+# // parameter N_OFFSETS = 4, //per port. There are 2 ports.
+# // parameter PAGE_SIZE = 4096, // 4kB
+# // parameter N_PAR_VA_RAMS = 4,
+# // parameter HIT_OFFSET_STORE_WIDTH = 2 // Num of bits of VA RAM offset stored. This should not be greater than OFFSET_WIDTH
+# // )
+# (
+# input logic clk_i,
+# input logic rst_ni,
+#
+# input logic we_i,
+# input logic [AXI_LITE_ADDR_WIDTH-1:0] waddr_i,
+# input logic [AXI_LITE_DATA_WIDTH-1:0] wdata_i,
+#
+# input logic start_i,
+# output logic busy_o,
+# input logic [AXI_S_ADDR_WIDTH-1:0] in_addr_i,
+# input logic rw_type_i, //1 => write, 0=> read
+#
+# input logic out_ready_i,
+# output logic out_valid_o,
+# output logic hit_o,
+# output logic miss_o,
+# output logic prot_o,
+# output logic multi_o,
+# output logic cache_coherent_o,
+# output logic [AXI_M_ADDR_WIDTH-1:0] out_addr_o
+# );
+#
+""" #docstring_begin
+
+ localparam VA_RAM_DEPTH = N_SETS * N_OFFSETS * 2;
+ localparam PA_RAM_DEPTH = VA_RAM_DEPTH * N_PAR_VA_RAMS;
+ localparam VA_RAM_ADDR_WIDTH = log2(VA_RAM_DEPTH);
+ localparam PA_RAM_ADDR_WIDTH = log2(PA_RAM_DEPTH);
+ localparam SET_WIDTH = log2(N_SETS);
+ localparam OFFSET_WIDTH = log2(N_OFFSETS);
+ localparam LL_WIDTH = log2(N_PAR_VA_RAMS);
+ localparam IGNORE_LSB = log2(PAGE_SIZE);
+
+ localparam VA_RAM_DATA_WIDTH = AXI_S_ADDR_WIDTH - IGNORE_LSB + 4;
+ localparam PA_RAM_DATA_WIDTH = AXI_M_ADDR_WIDTH - IGNORE_LSB;
+
+ logic [N_PAR_VA_RAMS-1:0] hit, prot, multi_hit, cache_coherent;
+ logic [N_PAR_VA_RAMS-1:0] ram_we;
+ logic last_search, last_search_next;
+ logic first_search, first_search_next;
+ logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] ram_waddr;
+ logic [N_PAR_VA_RAMS-1:0][SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr;
+ logic pa_ram_we;
+ logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_raddr, pa_port0_waddr; // PA RAM read, Write addr;
+ logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_raddr_reg_SN, pa_port0_raddr_reg_SP; // registered addresses, needed for WAIT_ON_WRITE;
+ logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_addr; // PA RAM addr
+ logic [PA_RAM_DATA_WIDTH-1:0] pa_port0_data, pa_data, pa_port0_data_reg; // PA RAM data
+ logic pa_ram_store_data_SN, pa_ram_store_data_SP;
+ logic hit_top, prot_top, multi_hit_top, first_hit_top;
+ logic output_sent;
+ int hit_block_num;
+
+ logic searching, search_done;
+ logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr, port0_raddr; // VA RAM port0 addr
+ logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port1_addr; // VA RAM port1 addr
+ logic [OFFSET_WIDTH-1:0] offset_addr, offset_addr_d;
+ logic [OFFSET_WIDTH-1:0] offset_start_addr, offset_end_addr;
+ logic [SET_WIDTH-1:0] set_num;
+
+ logic va_output_valid;
+ logic searching_q;
+
+ genvar z;
+
+ // Search FSM
+ typedef enum logic [1:0] {IDLE, SEARCH, DONE} search_state_t;
+ search_state_t search_SP; // Present state
+ search_state_t search_SN; // Next State
+
+ // Output FSM
+ typedef enum logic [1:0] {OUT_IDLE, SEND_OUTPUT, WAIT_ON_WRITE} out_state_t;
+ out_state_t out_SP; // Present state
+ out_state_t out_SN; // Next State
+
+ logic miss_next;
+ logic hit_next;
+ logic prot_next;
+ logic multi_next;
+ logic cache_coherent_next;
+
+ // Generate the VA Block rams and their surrounding logic
+ generate
+ for (z = 0; z < N_PAR_VA_RAMS; z++) begin : VA_RAMS
+ check_ram
+ #(
+ .ADDR_WIDTH ( AXI_S_ADDR_WIDTH ),
+ .RAM_DATA_WIDTH ( VA_RAM_DATA_WIDTH ),
+ .PAGE_SIZE ( PAGE_SIZE ),
+ .SET_WIDTH ( SET_WIDTH ),
+ .OFFSET_WIDTH ( OFFSET_WIDTH )
+ )
+ u_check_ram
+ (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .in_addr ( in_addr_i ),
+ .rw_type ( rw_type_i ),
+ .ram_we ( ram_we[z] ),
+ .port0_addr ( port0_addr ),
+ .port1_addr ( port1_addr ),
+ .ram_wdata ( wdata_i[VA_RAM_DATA_WIDTH-1:0] ),
+ .output_sent ( output_sent ),
+ .output_valid ( va_output_valid ),
+ .offset_addr_d ( offset_addr_d ),
+ .hit_addr ( hit_addr[z] ),
+ .master ( cache_coherent[z] ),
+ .hit ( hit[z] ),
+ .multi_hit ( multi_hit[z] ),
+ .prot ( prot[z] )
+ );
+ end // for (z = 0; z < N_PORTS; z++)
+ endgenerate
+
+ ////////////////// ---------------- Control and Address --------------- ////////////////////////
+ // FSM
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ search_SP <= IDLE;
+ end else begin
+ search_SP <= search_SN;
+ end
+ end
+
+ always_comb begin : SEARCH_FSM
+ search_SN = search_SP;
+ busy_o = 1'b0;
+ searching = 1'b0;
+ search_done = 1'b0;
+ last_search_next = 1'b0;
+ first_search_next = first_search;
+
+ unique case (search_SP)
+ IDLE : begin
+ if (start_i) begin
+ search_SN = SEARCH;
+ first_search_next = 1'b1;
+ end
+ end
+
+ SEARCH : begin
+ busy_o = 1'b1;
+
+ // detect last search cycle
+ if ( (first_search == 1'b0) && (offset_addr == offset_end_addr) )
+ last_search_next = 1'b1;
+
+ // pause search during VA RAM reconfigration
+ if (|ram_we) begin
+ searching = 1'b0;
+ end else begin
+ searching = 1'b1;
+ first_search_next = 1'b0;
+ end
+
+ if (va_output_valid) begin
+ // stop search
+`ifdef MULTI_HIT_FULL_SET
+ if (last_search | prot_top | multi_hit_top) begin
+`else
+ if (last_search | prot_top | multi_hit_top | hit_top ) begin
+`endif
+ search_SN = DONE;
+ search_done = 1'b1;
+ end
+ end
+ end
+
+ DONE : begin
+ busy_o = 1'b1;
+ if (out_valid_o & out_ready_i)
+ search_SN = IDLE;
+ end
+
+ default : begin
+ search_SN = IDLE;
+ end
+ endcase // case (prot_SP)
+ end // always_comb begin
+
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ last_search <= 1'b0;
+ first_search <= 1'b0;
+ end else begin
+ last_search <= last_search_next;
+ first_search <= first_search_next;
+ end
+ end
+
+ /*
+ * VA RAM address generation
+ *
+ * The input address and set number, and thus the offset start address, are available in the
+ * cycle after the start signal. The buffered offset_addr becomes available one cycle later.
+ * During the first search cycle, we therefore directly use offset_addr_start for the lookup.
+ */
+ assign set_num = in_addr_i[SET_WIDTH+IGNORE_LSB -1 : IGNORE_LSB];
+
+ assign port0_raddr[OFFSET_WIDTH] = 1'b0;
+ assign port1_addr [OFFSET_WIDTH] = 1'b1;
+
+ assign port0_raddr[OFFSET_WIDTH-1:0] = first_search ? offset_start_addr : offset_addr;
+ assign port1_addr [OFFSET_WIDTH-1:0] = first_search ? offset_start_addr : offset_addr;
+
+ assign port0_raddr[SET_WIDTH+OFFSET_WIDTH : OFFSET_WIDTH+1] = set_num;
+ assign port1_addr [SET_WIDTH+OFFSET_WIDTH : OFFSET_WIDTH+1] = set_num;
+
+ assign port0_addr = ram_we ? ram_waddr : port0_raddr;
+
+ // The outputs of the BRAMs are only valid if in the previous cycle:
+ // 1. the inputs were valid, and
+ // 2. the BRAMs were not written to.
+ // Otherwise, the outputs must be ignored.
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ searching_q <= 1'b0;
+ end else begin
+ searching_q <= searching;
+ end
+ end
+ assign va_output_valid = searching_q;
+
+ // Address offset for looking up the VA RAMs
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ offset_addr <= 0;
+ end else if (first_search) begin
+ offset_addr <= offset_start_addr + 1'b1;
+ end else if (searching) begin
+ offset_addr <= offset_addr + 1'b1;
+ end
+ end
+
+ // Delayed address offest for looking up the PA RAM upon a hit in the VA RAMs
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ offset_addr_d <= 0;
+ end else if (first_search) begin
+ offset_addr_d <= offset_start_addr;
+ end else if (searching) begin
+ offset_addr_d <= offset_addr_d + 1'b1;
+ end
+ end
+
+ // Store the offset addr for hit to reduce latency for next search.
+ generate
+ if (HIT_OFFSET_STORE_WIDTH > 0) begin : OFFSET_STORE
+`ifndef MULTI_HIT_FULL_SET
+ logic [N_SETS-1:0][HIT_OFFSET_STORE_WIDTH-1:0] hit_offset_addr; // Contains offset addr for previous hit for every SET.
+ logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr_reg;
+
+ assign offset_start_addr = { hit_offset_addr[set_num] , {{OFFSET_WIDTH-HIT_OFFSET_STORE_WIDTH}{1'b0}} };
+ assign offset_end_addr = hit_offset_addr[set_num]-1'b1;
+
+ // Register the hit addr
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ hit_addr_reg <= 0;
+ end else if (hit_top) begin
+ hit_addr_reg <= hit_addr[hit_block_num];
+ end
+ end
+
+ // Store hit addr for each set. The next search in the same set will start from the saved addr.
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ hit_offset_addr <= 0;
+ end else if (hit_o) begin
+ hit_offset_addr[set_num][HIT_OFFSET_STORE_WIDTH-1:0] <= hit_addr_reg[OFFSET_WIDTH-1 : (OFFSET_WIDTH - HIT_OFFSET_STORE_WIDTH)];
+ end
+ end
+`else // No need to store offset if full multi hit detection is enabled because the entire SET is searched.
+ assign offset_start_addr = 0;
+ assign offset_end_addr = {OFFSET_WIDTH{1'b1}};
+`endif
+ end else begin // if (HIT_OFFSET_STORE_WIDTH > 0)
+ assign offset_start_addr = 0;
+ assign offset_end_addr = {OFFSET_WIDTH{1'b1}};
+ end
+ endgenerate
+
+ assign prot_top = |prot;
+
+ //////////////////////////////////////////////////////////////////////////////////////
+ // check for hit, multi hit
+ // In case of a multi hit, the hit_block_num indicates the lowest VA RAM with a hit.
+ // In case of a multi hit in the same VA RAM, Port 0 is given priority.
+ always_comb begin : HIT_CHECK
+ hit_top = |hit;
+ hit_block_num = 0;
+ first_hit_top = 1'b0;
+ multi_hit_top = 1'b0;
+ for (int i=N_PAR_VA_RAMS-1; i>=0; i--) begin
+ if (hit[i] == 1'b1) begin
+`ifdef MULTI_HIT_CUR_CYCLE
+ if (multi_hit[i] | first_hit_top ) begin
+ multi_hit_top = 1'b1;
+ end
+`endif
+ first_hit_top = 1'b1;
+ hit_block_num = i;
+ end
+ end // for (int i=0; i<N_PAR_VA_RAMS; i++)
+ end // always_comb begin
+
+ ///////////////////// ------------- Outputs ------------ //////////////////////////////////
+ //// FSM
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ out_SP <= OUT_IDLE;
+ pa_ram_store_data_SP <= 1'b0;
+ pa_port0_raddr_reg_SP <= 'b0;
+ end else begin
+ out_SP <= out_SN;
+ pa_ram_store_data_SP <= pa_ram_store_data_SN;
+ pa_port0_raddr_reg_SP <= pa_port0_raddr_reg_SN;
+ end
+ end
+
+ always_comb begin : OUTPUT_FSM
+ out_SN = out_SP;
+
+ miss_next = miss_o;
+ prot_next = prot_o;
+ multi_next = multi_o;
+ hit_next = hit_o;
+ cache_coherent_next = cache_coherent_o;
+ pa_port0_raddr_reg_SN = pa_port0_raddr_reg_SP;
+
+ pa_port0_raddr = 'b0;
+ pa_ram_store_data_SN = 1'b0;
+
+ out_valid_o = 1'b0;
+ output_sent = 1'b0;
+
+ unique case (out_SP)
+ OUT_IDLE : begin
+ hit_next = 1'b0;
+ miss_next = 1'b0;
+ prot_next = 1'b0;
+ multi_next = 1'b0;
+ cache_coherent_next = 1'b0;
+
+ // abort transaction
+ if ((search_done & ~hit_top) | prot_top | multi_hit_top) begin
+ out_SN = SEND_OUTPUT;
+
+ if (search_done & ~hit_top) begin
+ miss_next = 1'b1;
+ end
+ if (prot_top) begin
+ prot_next = 1'b1;
+ hit_next = 1'b1;
+ end
+ if (multi_hit_top) begin
+ multi_next = 1'b1;
+ hit_next = 1'b1;
+ end
+
+ // read PA RAM
+ end else if (search_done & hit_top) begin
+ hit_next = 1'b1;
+ cache_coherent_next = cache_coherent[hit_block_num];
+ pa_port0_raddr = (N_PAR_VA_RAMS * hit_addr[hit_block_num]) + hit_block_num;
+ pa_port0_raddr_reg_SN = pa_port0_raddr;
+
+ // read PA RAM now
+ if (~pa_ram_we) begin
+ out_SN = SEND_OUTPUT;
+ pa_ram_store_data_SN = 1'b1;
+
+ // read PA RAM after PA RAM reconfiguration
+ end else begin // pa_ram_we
+ out_SN = WAIT_ON_WRITE;
+
+ end
+ end
+ end
+
+ WAIT_ON_WRITE : begin
+ if ( ~pa_ram_we ) begin
+ out_SN = SEND_OUTPUT;
+ pa_port0_raddr = pa_port0_raddr_reg_SP;
+ pa_ram_store_data_SN = 1'b1;
+ end
+ end
+
+ SEND_OUTPUT : begin
+ out_valid_o = 1'b1;
+ if (out_ready_i) begin
+ out_SN = OUT_IDLE;
+ output_sent = 1'b1;
+ end
+ end
+
+ default : begin
+ out_SN = OUT_IDLE;
+ end
+
+ endcase // case (out_SP)
+ end // always_comb begin
+
+ //// Output signals
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ miss_o <= 1'b0;
+ prot_o <= 1'b0;
+ multi_o <= 1'b0;
+ hit_o <= 1'b0;
+ cache_coherent_o <= 1'b0;
+ end else begin
+ miss_o <= miss_next;
+ prot_o <= prot_next;
+ multi_o <= multi_next;
+ hit_o <= hit_next;
+ cache_coherent_o <= cache_coherent_next;
+ end
+ end
+
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+ ///////////////////// --------------- Physical Address -------------- ////////////////////////////
+
+ /// PA Block RAM
+ ram_tp_no_change #(
+ .ADDR_WIDTH( PA_RAM_ADDR_WIDTH ),
+ .DATA_WIDTH( PA_RAM_DATA_WIDTH )
+ )
+ pa_ram
+ (
+ .clk ( clk_i ),
+ .we ( pa_ram_we ),
+ .addr0 ( pa_port0_addr ),
+ .addr1 ( '0 ),
+ .d_i ( wdata_i[PA_RAM_DATA_WIDTH-1:0] ),
+ .d0_o ( pa_port0_data ),
+ .d1_o ( )
+ );
+
+ assign out_addr_o[IGNORE_LSB-1:0] = in_addr_i[IGNORE_LSB-1:0];
+ assign out_addr_o[AXI_M_ADDR_WIDTH-1:IGNORE_LSB] = pa_data;
+
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ pa_port0_data_reg <= 0;
+ end else if (pa_ram_store_data_SP) begin
+ pa_port0_data_reg <= pa_port0_data;
+ end
+ end
+
+ assign pa_data = pa_ram_store_data_SP ? pa_port0_data : pa_port0_data_reg;
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+///// Write enable for all block rams
+generate if (LL_WIDTH != 0) begin
+ always_comb begin
+ var reg[LL_WIDTH:0] para;
+ var int para_int;
+ for (para = 0; para < N_PAR_VA_RAMS; para=para+1'b1) begin
+ para_int = int'(para);
+ ram_we[para_int] = we_i && (waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] == 1'b0) && (waddr_i[LL_WIDTH-1:0] == para);
+ end
+ end
+end else begin
+ assign ram_we[0] = we_i && (waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] == 1'b0);
+end
+
+endgenerate
+
+// Addresses are word, not byte addresses
+assign pa_ram_we = we_i && (waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] == 1'b1); //waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] will be 0 for all VA writes and 1 for all PA writes
+assign ram_waddr = waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH-1:LL_WIDTH];
+assign pa_port0_waddr = waddr_i[PA_RAM_ADDR_WIDTH-1:0];
+assign pa_port0_addr = pa_ram_we ? pa_port0_waddr : pa_port0_raddr;
+
+"""
+# endmodule
+#
+# // vim: ts=3 sw=3 sts=3 et nosmartindent autoindent foldmethod=marker tw=100
+#
+#
--- /dev/null
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+
+# this file has been generated by sv2nmigen
+
+#
+# //`include "pulp_soc_defines.sv"
+#
+# ////import CfMath::log2;
+#
+# //`define MY_ARRAY_SUM(MY_ARRAY,ARRAY_SIZE) ( (ARRAY_SIZE==1) ? MY_ARRAY[0] : (ARRAY_SIZE==2) ? MY_ARRAY[0] + MY_ARRAY[1] : (ARRAY_SIZE==3) ? MY_ARRAY[0] + MY_ARRAY[1] + MY_ARRAY[2] : (ARRAY_SIZE==4) ? MY_ARRAY[0] + MY_ARRAY[1] + MY_ARRAY[2] + MY_ARRAY[3] : 0 )
+#
+
+# module rab_core
+# #(
+# parameter N_PORTS = 3,
+# parameter N_L2_SETS = 32,
+# parameter N_L2_SET_ENTRIES = 32,
+# parameter AXI_DATA_WIDTH = 64,
+# parameter AXI_S_ADDR_WIDTH = 32,
+# parameter AXI_M_ADDR_WIDTH = 40,
+# parameter AXI_LITE_DATA_WIDTH = 64,
+# parameter AXI_LITE_ADDR_WIDTH = 32,
+# parameter AXI_ID_WIDTH = 8,
+# parameter AXI_USER_WIDTH = 6,
+# parameter MH_FIFO_DEPTH = 16
+# )
+# (
+# input logic Clk_CI,
+# input logic Rst_RBI,
+#
+# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi_awaddr,
+# input logic s_axi_awvalid,
+# output logic s_axi_awready,
+#
+# input logic [AXI_LITE_DATA_WIDTH-1:0] s_axi_wdata,
+# input logic [AXI_LITE_DATA_WIDTH/8-1:0] s_axi_wstrb,
+# input logic s_axi_wvalid,
+# output logic s_axi_wready,
+#
+# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi_araddr,
+# input logic s_axi_arvalid,
+# output logic s_axi_arready,
+#
+# input logic s_axi_rready,
+# output logic [AXI_LITE_DATA_WIDTH-1:0] s_axi_rdata,
+# output logic [1:0] s_axi_rresp,
+# output logic s_axi_rvalid,
+#
+# output logic [1:0] s_axi_bresp,
+# output logic s_axi_bvalid,
+# input logic s_axi_bready,
+#
+# output logic [N_PORTS-1:0] int_miss,
+# output logic [N_PORTS-1:0] int_prot,
+# output logic [N_PORTS-1:0] int_multi,
+# output logic [N_PORTS-1:0] int_prefetch,
+# output logic int_mhf_full,
+#
+# output logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_axaddr_o,
+# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_axid_o,
+# output logic [N_PORTS-1:0] [7:0] int_axlen_o,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_axuser_o,
+#
+# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] port1_addr,
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] port1_id,
+# input logic [N_PORTS-1:0] [7:0] port1_len,
+# input logic [N_PORTS-1:0] [2:0] port1_size,
+# input logic [N_PORTS-1:0] port1_addr_valid,
+# input logic [N_PORTS-1:0] port1_type,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] port1_user,
+# input logic [N_PORTS-1:0] port1_sent,
+# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] port1_out_addr,
+# output logic [N_PORTS-1:0] port1_cache_coherent,
+# output logic [N_PORTS-1:0] port1_accept,
+# output logic [N_PORTS-1:0] port1_drop,
+# output logic [N_PORTS-1:0] port1_miss,
+#
+# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] port2_addr,
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] port2_id,
+# input logic [N_PORTS-1:0] [7:0] port2_len,
+# input logic [N_PORTS-1:0] [2:0] port2_size,
+# input logic [N_PORTS-1:0] port2_addr_valid,
+# input logic [N_PORTS-1:0] port2_type,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] port2_user,
+# input logic [N_PORTS-1:0] port2_sent,
+# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] port2_out_addr,
+# output logic [N_PORTS-1:0] port2_cache_coherent,
+# output logic [N_PORTS-1:0] port2_accept,
+# output logic [N_PORTS-1:0] port2_drop,
+# output logic [N_PORTS-1:0] port2_miss,
+#
+# input logic [N_PORTS-1:0] miss_l2_i,
+# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] miss_l2_addr_i,
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] miss_l2_id_i,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] miss_l2_user_i,
+#
+# output logic [N_PORTS-1:0] [AXI_LITE_DATA_WIDTH-1:0] wdata_l2_o,
+# output logic [N_PORTS-1:0] [AXI_LITE_ADDR_WIDTH-1:0] waddr_l2_o,
+# output logic [N_PORTS-1:0] wren_l2_o
+# );
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class rab_core(Elaboratable):
+
+ def __init__(self):
+ self.s_axi_awaddr = Signal(AXI_LITE_ADDR_WIDTH) # input
+ self.s_axi_awvalid = Signal() # input
+ self.s_axi_awready = Signal() # output
+ self.s_axi_wdata = Signal(AXI_LITE_DATA_WIDTH) # input
+ self.s_axi_wstrb = Signal(FIXME) # input
+ self.s_axi_wvalid = Signal() # input
+ self.s_axi_wready = Signal() # output
+ self.s_axi_araddr = Signal(AXI_LITE_ADDR_WIDTH) # input
+ self.s_axi_arvalid = Signal() # input
+ self.s_axi_arready = Signal() # output
+ self.s_axi_rready = Signal() # input
+ self.s_axi_rdata = Signal(AXI_LITE_DATA_WIDTH) # output
+ self.s_axi_rresp = Signal(2) # output
+ self.s_axi_rvalid = Signal() # output
+ self.s_axi_bresp = Signal(2) # output
+ self.s_axi_bvalid = Signal() # output
+ self.s_axi_bready = Signal() # input
+ self.int_miss = Signal(N_PORTS) # output
+ self.int_prot = Signal(N_PORTS) # output
+ self.int_multi = Signal(N_PORTS) # output
+ self.int_prefetch = Signal(N_PORTS) # output
+ self.int_mhf_full = Signal() # output
+ self.int_axaddr_o = Signal() # output
+ self.int_axid_o = Signal() # output
+ self.int_axlen_o = Signal() # output
+ self.int_axuser_o = Signal() # output
+ self.port1_addr = Signal() # input
+ self.port1_id = Signal() # input
+ self.port1_len = Signal() # input
+ self.port1_size = Signal() # input
+ self.port1_addr_valid = Signal(N_PORTS) # input
+ self.port1_type = Signal(N_PORTS) # input
+ self.port1_user = Signal() # input
+ self.port1_sent = Signal(N_PORTS) # input
+ self.port1_out_addr = Signal() # output
+ self.port1_cache_coherent = Signal(N_PORTS) # output
+ self.port1_accept = Signal(N_PORTS) # output
+ self.port1_drop = Signal(N_PORTS) # output
+ self.port1_miss = Signal(N_PORTS) # output
+ self.port2_addr = Signal() # input
+ self.port2_id = Signal() # input
+ self.port2_len = Signal() # input
+ self.port2_size = Signal() # input
+ self.port2_addr_valid = Signal(N_PORTS) # input
+ self.port2_type = Signal(N_PORTS) # input
+ self.port2_user = Signal() # input
+ self.port2_sent = Signal(N_PORTS) # input
+ self.port2_out_addr = Signal() # output
+ self.port2_cache_coherent = Signal(N_PORTS) # output
+ self.port2_accept = Signal(N_PORTS) # output
+ self.port2_drop = Signal(N_PORTS) # output
+ self.port2_miss = Signal(N_PORTS) # output
+ self.miss_l2_i = Signal(N_PORTS) # input
+ self.miss_l2_addr_i = Signal() # input
+ self.miss_l2_id_i = Signal() # input
+ self.miss_l2_user_i = Signal() # input
+ self.wdata_l2_o = Signal() # output
+ self.waddr_l2_o = Signal() # output
+ self.wren_l2_o = Signal(N_PORTS) # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+
+
+"""
+
+
+ // ███████╗██╗ ██████╗ ███╗ ██╗ █████╗ ██╗ ███████╗
+ // ██╔════╝██║██╔════╝ ████╗ ██║██╔══██╗██║ ██╔════╝
+ // ███████╗██║██║ ███╗██╔██╗ ██║███████║██║ ███████╗
+ // ╚════██║██║██║ ██║██║╚██╗██║██╔══██║██║ ╚════██║
+ // ███████║██║╚██████╔╝██║ ╚████║██║ ██║███████╗███████║
+ // ╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚══════╝
+ // signals
+
+ localparam integer ENABLE_L2TLB[N_PORTS-1:0] = `EN_L2TLB_ARRAY;
+
+ localparam integer N_SLICES[N_PORTS-1:0] = `N_SLICES_ARRAY;
+ localparam N_SLICES_TOT = `MY_ARRAY_SUM(N_SLICES,N_PORTS);
+ localparam N_SLICES_MAX = `N_SLICES_MAX;
+
+ localparam N_REGS = 4*N_SLICES_TOT + 4;
+ localparam AXI_SIZE_WIDTH = log2(AXI_DATA_WIDTH/8);
+
+ localparam PORT_ID_WIDTH = (N_PORTS < 2) ? 1 : log2(N_PORTS);
+ localparam MISS_META_WIDTH = PORT_ID_WIDTH + AXI_USER_WIDTH + AXI_ID_WIDTH;
+
+ logic [N_PORTS-1:0] [15:0] p1_burst_size;
+ logic [N_PORTS-1:0] [15:0] p2_burst_size;
+
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p1_align_addr;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p2_align_addr;
+
+ logic [N_PORTS-1:0] [AXI_SIZE_WIDTH-1:0] p1_mask;
+ logic [N_PORTS-1:0] [AXI_SIZE_WIDTH-1:0] p2_mask;
+
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p1_max_addr;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p2_max_addr;
+
+ logic [N_PORTS-1:0] p1_prefetch;
+ logic [N_PORTS-1:0] p2_prefetch;
+
+ logic [N_PORTS-1:0] int_rw;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_addr_min;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_addr_max;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_id;
+ logic [N_PORTS-1:0] [7:0] int_len;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_user;
+
+ logic [N_PORTS-1:0] hit;
+ logic [N_PORTS-1:0] prot;
+ logic [N_PORTS-1:0] prefetch;
+
+ logic [N_PORTS-1:0] no_hit;
+ logic [N_PORTS-1:0] no_prot;
+
+ logic [N_PORTS-1:0] [N_SLICES_MAX-1:0] hit_slices;
+ logic [N_PORTS-1:0] [N_SLICES_MAX-1:0] prot_slices;
+
+ logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] out_addr;
+ logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] out_addr_reg;
+
+ logic [N_PORTS-1:0] cache_coherent;
+ logic [N_PORTS-1:0] cache_coherent_reg;
+
+ logic [N_PORTS-1:0] select;
+ reg [N_PORTS-1:0] curr_priority;
+
+ reg [N_PORTS-1:0] multi_hit;
+
+ logic [N_PORTS-1:0] miss_valid_mhf;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] miss_addr_mhf;
+ logic [N_PORTS-1:0] [MISS_META_WIDTH-1:0] miss_meta_mhf;
+
+ logic [N_REGS-1:0] [63:0] int_cfg_regs;
+ logic [N_PORTS-1:0] [4*N_SLICES_MAX-1:0] [63:0] int_cfg_regs_slices;
+
+ logic L1AllowMultiHit_S;
+
+ genvar z;
+
+ // █████╗ ███████╗███████╗██╗ ██████╗ ███╗ ██╗███╗ ███╗███████╗███╗ ██╗████████╗███████╗
+ // ██╔══██╗██╔════╝██╔════╝██║██╔════╝ ████╗ ██║████╗ ████║██╔════╝████╗ ██║╚══██╔══╝██╔════╝
+ // ███████║███████╗███████╗██║██║ ███╗██╔██╗ ██║██╔████╔██║█████╗ ██╔██╗ ██║ ██║ ███████╗
+ // ██╔══██║╚════██║╚════██║██║██║ ██║██║╚██╗██║██║╚██╔╝██║██╔══╝ ██║╚██╗██║ ██║ ╚════██║
+ // ██║ ██║███████║███████║██║╚██████╔╝██║ ╚████║██║ ╚═╝ ██║███████╗██║ ╚████║ ██║ ███████║
+ // ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚═╝ ╚═══╝ ╚═╝ ╚══════╝
+ // assignments
+
+ always_comb
+ begin : PORT_SELECT
+ var integer idx;
+
+ for (idx=0; idx<N_PORTS; idx++) begin
+
+ // select = 1 -> port1 active
+ // select = 0 -> port2 active
+ select[idx] = (curr_priority[idx] & port1_addr_valid[idx]) | ~port2_addr_valid[idx];
+
+ p1_burst_size[idx] = (port1_len[idx] + 1) << port1_size[idx];
+ p2_burst_size[idx] = (port2_len[idx] + 1) << port2_size[idx];
+
+ // align min addr for max addr computation to allow for smart AXI bursts around the 4k boundary
+ if (port1_size[idx] == 3'b001)
+ p1_mask[idx] = 3'b110;
+ else if (port1_size[idx] == 3'b010)
+ p1_mask[idx] = 3'b100;
+ else if (port1_size[idx] == 3'b011)
+ p1_mask[idx] = 3'b000;
+ else
+ p1_mask[idx] = 3'b111;
+
+ p1_align_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH] = port1_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH];
+ p1_align_addr[idx][AXI_SIZE_WIDTH-1:0] = port1_addr[idx][AXI_SIZE_WIDTH-1:0] & p1_mask[idx];
+
+ if (port2_size[idx] == 3'b001)
+ p2_mask[idx] = 3'b110;
+ else if (port2_size[idx] == 3'b010)
+ p2_mask[idx] = 3'b100;
+ else if (port2_size[idx] == 3'b011)
+ p2_mask[idx] = 3'b000;
+ else
+ p2_mask[idx] = 3'b111;
+
+ if (port1_user[idx] == {AXI_USER_WIDTH{1'b1}})
+ p1_prefetch[idx] = 1'b1;
+ else
+ p1_prefetch[idx] = 1'b0;
+
+ if (port2_user[idx] == {AXI_USER_WIDTH{1'b1}})
+ p2_prefetch[idx] = 1'b1;
+ else
+ p2_prefetch[idx] = 1'b0;
+
+ p2_align_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH] = port2_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH];
+ p2_align_addr[idx][AXI_SIZE_WIDTH-1:0] = port2_addr[idx][AXI_SIZE_WIDTH-1:0] & p2_mask[idx];
+
+ p1_max_addr[idx] = p1_align_addr[idx] + p1_burst_size[idx] - 1;
+ p2_max_addr[idx] = p2_align_addr[idx] + p2_burst_size[idx] - 1;
+
+ int_addr_min[idx] = select[idx] ? port1_addr[idx] : port2_addr[idx];
+ int_addr_max[idx] = select[idx] ? p1_max_addr[idx] : p2_max_addr[idx];
+ int_rw[idx] = select[idx] ? port1_type[idx] : port2_type[idx];
+ int_id[idx] = select[idx] ? port1_id[idx] : port2_id[idx];
+ int_len[idx] = select[idx] ? port1_len[idx] : port2_len[idx];
+ int_user[idx] = select[idx] ? port1_user[idx] : port2_user[idx];
+ prefetch[idx] = select[idx] ? p1_prefetch[idx] : p2_prefetch[idx];
+
+ hit [idx] = | hit_slices [idx];
+ prot[idx] = | prot_slices[idx];
+
+ no_hit [idx] = ~hit [idx];
+ no_prot[idx] = ~prot[idx];
+
+ port1_out_addr[idx] = out_addr_reg[idx];
+ port2_out_addr[idx] = out_addr_reg[idx];
+
+ port1_cache_coherent[idx] = cache_coherent_reg[idx];
+ port2_cache_coherent[idx] = cache_coherent_reg[idx];
+ end
+ end
+
+ always_comb
+ begin
+ var integer idx_port, idx_slice;
+ var integer reg_num;
+ reg_num=0;
+ for ( idx_port = 0; idx_port < N_PORTS; idx_port++ ) begin
+ for ( idx_slice = 0; idx_slice < 4*N_SLICES[idx_port]; idx_slice++ ) begin
+ int_cfg_regs_slices[idx_port][idx_slice] = int_cfg_regs[4+reg_num];
+ reg_num++;
+ end
+ // int_cfg_regs_slices[idx_port][N_SLICES_MAX:N_SLICES[idx_port]] will be dangling
+ // Fix to zero. Synthesis will remove these signals.
+ // int_cfg_regs_slices[idx_port][4*N_SLICES_MAX-1:4*N_SLICES[idx_port]] = 0;
+ end
+ end
+
+ always @(posedge Clk_CI or negedge Rst_RBI)
+ begin : PORT_PRIORITY
+ var integer idx;
+ if (Rst_RBI == 1'b0)
+ curr_priority = 'h0;
+ else begin
+ for (idx=0; idx<N_PORTS; idx++) begin
+ if (port1_accept[idx] || port1_drop[idx])
+ curr_priority[idx] = 1'b1;
+ else if (port2_accept[idx] || port2_drop[idx])
+ curr_priority[idx] = 1'b0;
+ end
+ end
+ end
+
+ // find port that misses
+ logic [PORT_ID_WIDTH-1:0] PortIdx_D; // index of the first missing port
+ var integer idx_miss;
+ always_comb begin : MHF_PORT_SELECT
+ PortIdx_D = 'b0;
+ for (idx_miss = 0; idx_miss < N_PORTS; idx_miss++) begin
+ if (miss_valid_mhf[idx_miss] == 1'b1) begin
+ PortIdx_D = idx_miss;
+ break;
+ end
+ end
+ end // always_comb begin
+
+ // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ██████╗███████╗ ██████╗
+ // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔════╝██╔════╝
+ // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ █████╗ ██║ ███╗
+ // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██╔══╝ ██║ ██║
+ // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ╚██████╗██║ ╚██████╔╝
+ // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝╚═╝ ╚═════╝
+ axi_rab_cfg
+ #(
+ .N_PORTS ( N_PORTS ),
+ .N_REGS ( N_REGS ),
+ .N_L2_SETS ( N_L2_SETS ),
+ .N_L2_SET_ENTRIES( N_L2_SET_ENTRIES ),
+ .ADDR_WIDTH_PHYS ( AXI_M_ADDR_WIDTH ),
+ .ADDR_WIDTH_VIRT ( AXI_S_ADDR_WIDTH ),
+ .N_FLAGS ( 4 ),
+ .AXI_DATA_WIDTH ( AXI_LITE_DATA_WIDTH ),
+ .AXI_ADDR_WIDTH ( AXI_LITE_ADDR_WIDTH ),
+ .MISS_META_WIDTH ( MISS_META_WIDTH ),
+ .MH_FIFO_DEPTH ( MH_FIFO_DEPTH )
+ )
+ u_axi_rab_cfg
+ (
+ .Clk_CI ( Clk_CI ),
+ .Rst_RBI ( Rst_RBI ),
+ .s_axi_awaddr ( s_axi_awaddr ),
+ .s_axi_awvalid ( s_axi_awvalid ),
+ .s_axi_wdata ( s_axi_wdata ),
+ .s_axi_wstrb ( s_axi_wstrb ),
+ .s_axi_wvalid ( s_axi_wvalid ),
+ .s_axi_bready ( s_axi_bready ),
+ .s_axi_araddr ( s_axi_araddr ),
+ .s_axi_arvalid ( s_axi_arvalid ),
+ .s_axi_rready ( s_axi_rready ),
+ .s_axi_arready ( s_axi_arready ),
+ .s_axi_rdata ( s_axi_rdata ),
+ .s_axi_rresp ( s_axi_rresp ),
+ .s_axi_rvalid ( s_axi_rvalid ),
+ .s_axi_wready ( s_axi_wready ),
+ .s_axi_bresp ( s_axi_bresp ),
+ .s_axi_bvalid ( s_axi_bvalid ),
+ .s_axi_awready ( s_axi_awready ),
+ .L1Cfg_DO ( int_cfg_regs ),
+ .L1AllowMultiHit_SO ( L1AllowMultiHit_S ),
+ .MissAddr_DI ( miss_addr_mhf[PortIdx_D] ),
+ .MissMeta_DI ( miss_meta_mhf[PortIdx_D] ),
+ .Miss_SI ( miss_valid_mhf[PortIdx_D] ),
+ .MhFifoFull_SO ( int_mhf_full ),
+ .wdata_l2 ( wdata_l2_o ),
+ .waddr_l2 ( waddr_l2_o ),
+ .wren_l2 ( wren_l2_o )
+ );
+
+ generate for (z = 0; z < N_PORTS; z++) begin : MHF_TLB_SELECT
+ if (ENABLE_L2TLB[z] == 1) begin // L2 TLB is enabled
+ assign miss_valid_mhf[z] = miss_l2_i[z];
+ assign miss_addr_mhf[z] = miss_l2_addr_i[z];
+ assign miss_meta_mhf[z] = {miss_l2_user_i[z], PortIdx_D, miss_l2_id_i[z]};
+ end else begin// L2 TLB is disabled
+ assign miss_valid_mhf[z] = int_miss[z];
+ assign miss_addr_mhf[z] = int_addr_min[z];
+ assign miss_meta_mhf[z] = {int_user[z], PortIdx_D, int_id[z]};
+ end
+ end
+ endgenerate
+
+ // ███████╗██╗ ██╗ ██████╗███████╗ ████████╗ ██████╗ ██████╗
+ // ██╔════╝██║ ██║██╔════╝██╔════╝ ╚══██╔══╝██╔═══██╗██╔══██╗
+ // ███████╗██║ ██║██║ █████╗ ██║ ██║ ██║██████╔╝
+ // ╚════██║██║ ██║██║ ██╔══╝ ██║ ██║ ██║██╔═══╝
+ // ███████║███████╗██║╚██████╗███████╗ ██║ ╚██████╔╝██║
+ // ╚══════╝╚══════╝╚═╝ ╚═════╝╚══════╝ ╚═╝ ╚═════╝ ╚═╝
+ generate for (z = 0; z < N_PORTS; z++) begin : SLICE_TOP_GEN
+ slice_top
+ #(
+ .N_SLICES ( N_SLICES[z] ),
+ .N_REGS ( 4*N_SLICES[z] ),
+ .ADDR_WIDTH_PHYS ( AXI_M_ADDR_WIDTH ),
+ .ADDR_WIDTH_VIRT ( AXI_S_ADDR_WIDTH )
+ )
+ u_slice_top
+ (
+ .int_cfg_regs ( int_cfg_regs_slices[z][4*N_SLICES[z]-1:0] ),
+ .int_rw ( int_rw[z] ),
+ .int_addr_min ( int_addr_min[z] ),
+ .int_addr_max ( int_addr_max[z] ),
+ .multi_hit_allow ( L1AllowMultiHit_S ),
+ .multi_hit ( multi_hit[z] ),
+ .prot ( prot_slices[z][N_SLICES[z]-1:0] ),
+ .hit ( hit_slices [z][N_SLICES[z]-1:0] ),
+ .cache_coherent ( cache_coherent[z] ),
+ .out_addr ( out_addr[z] )
+ );
+ // hit_slices [N_SLICES_MAX-1:N_SLICES_MAX-N_SLICES[z]] will be dangling
+ // prot_slices[N_SLICES_MAX-1:N_SLICES_MAX-N_SLICES[z]] will be dangling
+ // Fix to zero. Synthesis will remove these signals.
+ if ( N_SLICES[z] < N_SLICES_MAX ) begin
+ assign hit_slices [z][N_SLICES_MAX-1:N_SLICES[z]] = 0;
+ assign prot_slices[z][N_SLICES_MAX-1:N_SLICES[z]] = 0;
+ end
+ end // for (z = 0; z < N_PORTS; z++)
+ endgenerate
+
+ // ███████╗███████╗███╗ ███╗
+ // ██╔════╝██╔════╝████╗ ████║
+ // █████╗ ███████╗██╔████╔██║
+ // ██╔══╝ ╚════██║██║╚██╔╝██║
+ // ██║ ███████║██║ ╚═╝ ██║
+ // ╚═╝ ╚══════╝╚═╝ ╚═╝
+ //
+ generate for (z = 0; z < N_PORTS; z++) begin : FSM_GEN
+ fsm
+ #(
+ .AXI_M_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ),
+ .AXI_S_ADDR_WIDTH ( AXI_S_ADDR_WIDTH ),
+ .AXI_ID_WIDTH ( AXI_ID_WIDTH ),
+ .AXI_USER_WIDTH ( AXI_USER_WIDTH )
+ )
+ u_fsm
+ (
+ .Clk_CI ( Clk_CI ),
+ .Rst_RBI ( Rst_RBI ),
+ .port1_addr_valid_i ( port1_addr_valid[z] ),
+ .port2_addr_valid_i ( port2_addr_valid[z] ),
+ .port1_sent_i ( port1_sent[z] ),
+ .port2_sent_i ( port2_sent[z] ),
+ .select_i ( select[z] ),
+ .no_hit_i ( no_hit[z] ),
+ .multi_hit_i ( multi_hit[z] ),
+ .no_prot_i ( no_prot[z] ),
+ .prefetch_i ( prefetch[z] ),
+ .out_addr_i ( out_addr[z] ),
+ .cache_coherent_i ( cache_coherent[z] ),
+ .port1_accept_o ( port1_accept[z] ),
+ .port1_drop_o ( port1_drop[z] ),
+ .port1_miss_o ( port1_miss[z] ),
+ .port2_accept_o ( port2_accept[z] ),
+ .port2_drop_o ( port2_drop[z] ),
+ .port2_miss_o ( port2_miss[z] ),
+ .out_addr_o ( out_addr_reg[z] ),
+ .cache_coherent_o ( cache_coherent_reg[z] ),
+ .miss_o ( int_miss[z] ),
+ .multi_o ( int_multi[z] ),
+ .prot_o ( int_prot[z] ),
+ .prefetch_o ( int_prefetch[z] ),
+ .in_addr_i ( int_addr_min[z] ),
+ .in_id_i ( int_id[z] ),
+ .in_len_i ( int_len[z] ),
+ .in_user_i ( int_user[z] ),
+ .in_addr_o ( int_axaddr_o[z] ),
+ .in_id_o ( int_axid_o[z] ),
+ .in_len_o ( int_axlen_o[z] ),
+ .in_user_o ( int_axuser_o[z] )
+ );
+ end
+ endgenerate
+
+"""
--- /dev/null
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# module rab_slice
+# #(
+# parameter ADDR_WIDTH_PHYS = 40,
+# parameter ADDR_WIDTH_VIRT = 32
+# )
+# (
+# input logic [ADDR_WIDTH_VIRT-1:0] cfg_min,
+# input logic [ADDR_WIDTH_VIRT-1:0] cfg_max,
+# input logic [ADDR_WIDTH_PHYS-1:0] cfg_offset,
+# input logic cfg_wen,
+# input logic cfg_ren,
+# input logic cfg_en,
+# input logic in_trans_type,
+# input logic [ADDR_WIDTH_VIRT-1:0] in_addr_min,
+# input logic [ADDR_WIDTH_VIRT-1:0] in_addr_max,
+# output logic out_hit,
+# output logic out_prot,
+# output logic [ADDR_WIDTH_PHYS-1:0] out_addr
+# );
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+
+
+class rab_slice(Elaboratable):
+
+ def __init__(self, params): # pass config object
+ # TODO parameters
+ self.params = params
+ self.cfg_min = Signal(params.ADDR_WIDTH_VIRT) # input
+ self.cfg_max = Signal(params.ADDR_WIDTH_VIRT) # input
+ self.cfg_offset = Signal(params.ADDR_WIDTH_PHYS) # input
+ self.cfg_wen = Signal() # input
+ self.cfg_ren = Signal() # input
+ self.cfg_en = Signal() # input
+ self.in_trans_type = Signal() # input
+ self.in_addr_min = Signal(params.ADDR_WIDTH_VIRT) # input
+ self.in_addr_max = Signal(params.ADDR_WIDTH_VIRT) # input
+ self.out_hit = Signal() # output
+ self.out_prot = Signal() # output
+ self.out_addr = Signal(params.ADDR_WIDTH_PHYS) # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+ min_above_min = Signal()
+ min_below_max = Signal()
+ max_below_max = Signal()
+
+ # assign min_above_min = (in_addr_min >= cfg_min) ? 1'b1 : 1'b0;
+ # assign min_below_max = (in_addr_min <= cfg_max) ? 1'b1 : 1'b0;
+ # assign max_below_max = (in_addr_max <= cfg_max) ? 1'b1 : 1'b0;
+ # assign out_hit = cfg_en & min_above_min & min_below_max & max_below_max;
+ # assign out_prot = out_hit & ((in_trans_type & ~cfg_wen) | (~in_trans_type & ~cfg_ren));
+ # assign out_addr = in_addr_min - cfg_min + cfg_offset;
+ m.d.comb += [
+ min_above_min.eq(self.in_addr_min >= self.cfg_min),
+ min_below_max.eq(self.in_addr_min <= self.cfg_max),
+ max_below_max.eq(self.in_addr_max <= self.cfg_max),
+ self.out_hit.eq(self.cfg_en & min_above_min &
+ min_below_max & max_below_max),
+ self.out_prot.eq(self.out_hit & (
+ (self.in_trans_type & ~self.cfg_wen) | (~self.in_trans_type & ~self.cfg_ren))),
+ self.out_addr.eq(self.in_addr_min - self.cfg_min + self.cfg_offset)
+ ]
+
+ return m
--- /dev/null
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# /*
+# * ram_tp_no_change
+# *
+# * This code implements a parameterizable two-port memory. Port 0 can read and
+# * write while Port 1 can read only. The Xilinx tools will infer a BRAM with
+# * Port 0 in "no change" mode, i.e., during a write, it retains the last read
+# * value on the output. Port 1 (read-only) is in "write first" mode. Still, it
+# * outputs the old data during the write cycle. Note: Port 1 outputs invalid
+# * data in the cycle after the write when reading the same address.
+# *
+# * For more information, see Xilinx PG058 Block Memory Generator Product Guide.
+# */
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+from nmigen import Memory
+
+import math
+
+#
+# module ram_tp_no_change
+# #(
+ADDR_WIDTH = 10
+DATA_WIDTH = 36
+# )
+# (
+# input clk,
+# input we,
+# input [ADDR_WIDTH-1:0] addr0,
+# input [ADDR_WIDTH-1:0] addr1,
+# input [DATA_WIDTH-1:0] d_i,
+# output [DATA_WIDTH-1:0] d0_o,
+# output [DATA_WIDTH-1:0] d1_o
+# );
+
+
+class ram_tp_no_change(Elaboratable):
+
+ def __init__(self):
+ self.we = Signal() # input
+ self.addr0 = Signal(ADDR_WIDTH) # input
+ self.addr1 = Signal(ADDR_WIDTH) # input
+ self.d_i = Signal(DATA_WIDTH) # input
+ self.d0_o = Signal(DATA_WIDTH) # output
+ self.d1_o = Signal(DATA_WIDTH) # output
+
+ DEPTH = int(math.pow(2, ADDR_WIDTH))
+ self.ram = Memory(width=DATA_WIDTH, depth=DEPTH)
+ #
+ # localparam DEPTH = 2**ADDR_WIDTH;
+ #
+ # (* ram_style = "block" *) reg [DATA_WIDTH-1:0] ram[DEPTH];
+ # reg [DATA_WIDTH-1:0] d0;
+ # reg [DATA_WIDTH-1:0] d1;
+ #
+ # always_ff @(posedge clk) begin
+ # if(we == 1'b1) begin
+ # ram[addr0] <= d_i;
+ # end else begin
+ # only change data if we==false
+ # d0 <= ram[addr0];
+ # end
+ # d1 <= ram[addr1];
+ # end
+ #
+ # assign d0_o = d0;
+ # assign d1_o = d1;
+ #
+
+ def elaborate(self, platform=None):
+ m = Module()
+ m.submodules.read_ram0 = read_ram0 = self.ram.read_port()
+ m.submodules.read_ram1 = read_ram1 = self.ram.read_port()
+ m.submodules.write_ram = write_ram = self.ram.write_port()
+
+ # write port
+ m.d.comb += write_ram.en.eq(self.we)
+ m.d.comb += write_ram.addr.eq(self.addr0)
+ m.d.comb += write_ram.data.eq(self.d_i)
+
+ # read ports
+ m.d.comb += read_ram0.addr.eq(self.addr0)
+ m.d.comb += read_ram1.addr.eq(self.addr1)
+ with m.If(self.we == 0):
+ m.d.sync += self.d0_o.eq(read_ram0.data)
+ m.d.sync += self.d1_o.eq(read_ram1.data)
+
+ return m
--- /dev/null
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+#
+# /*
+# * ram_tp_write_first
+# *
+# * This code implements a parameterizable two-port memory. Port 0 can read and
+# * write while Port 1 can read only. Xilinx Vivado will infer a BRAM in
+# * "write first" mode, i.e., upon a read and write to the same address, the
+# * new value is read. Note: Port 1 outputs invalid data in the cycle after
+# * the write when reading the same address.
+# *
+# * For more information, see Xilinx PG058 Block Memory Generator Product Guide.
+# */
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+from nmigen import Memory
+
+import math
+#
+# module ram_tp_write_first
+# #(
+ADDR_WIDTH = 10
+DATA_WIDTH = 36
+# )
+# (
+# input clk,
+# input we,
+# input [ADDR_WIDTH-1:0] addr0,
+# input [ADDR_WIDTH-1:0] addr1,
+# input [DATA_WIDTH-1:0] d_i,
+# output [DATA_WIDTH-1:0] d0_o,
+# output [DATA_WIDTH-1:0] d1_o
+# );
+
+
+class ram_tp_write_first(Elaboratable):
+
+ def __init__(self):
+ self.we = Signal() # input
+ self.addr0 = Signal(ADDR_WIDTH) # input
+ self.addr1 = Signal(ADDR_WIDTH) # input
+ self.d_i = Signal(DATA_WIDTH) # input
+ self.d0_o = Signal(DATA_WIDTH) # output
+ self.d1_o = Signal(DATA_WIDTH) # output
+
+ DEPTH = int(math.pow(2, ADDR_WIDTH))
+ self.ram = Memory(width=DATA_WIDTH, depth=DEPTH)
+
+ #
+ # localparam DEPTH = 2**ADDR_WIDTH;
+ #
+ # (* ram_style = "block" *) reg [DATA_WIDTH-1:0] ram[DEPTH];
+ # reg [ADDR_WIDTH-1:0] raddr0;
+ # reg [ADDR_WIDTH-1:0] raddr1;
+ #
+ # always_ff @(posedge clk) begin
+ # if(we == 1'b1) begin
+ # ram[addr0] <= d_i;
+ # end
+ # raddr0 <= addr0;
+ # raddr1 <= addr1;
+ # end
+ #
+ # assign d0_o = ram[raddr0];
+ # assign d1_o = ram[raddr1];
+ #
+
+ def elaborate(self, platform=None):
+ m = Module()
+ m.submodules.read_ram0 = read_ram0 = self.ram.read_port()
+ m.submodules.read_ram1 = read_ram1 = self.ram.read_port()
+ m.submodules.write_ram = write_ram = self.ram.write_port()
+
+ # write port
+ m.d.comb += write_ram.en.eq(self.we)
+ m.d.comb += write_ram.addr.eq(self.addr0)
+ m.d.comb += write_ram.data.eq(self.d_i)
+
+ # read ports
+ m.d.comb += read_ram0.addr.eq(self.addr0)
+ m.d.comb += read_ram1.addr.eq(self.addr1)
+ m.d.sync += self.d0_o.eq(read_ram0.data)
+ m.d.sync += self.d1_o.eq(read_ram1.data)
+
+ return m
--- /dev/null
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+
+# this file has been generated by sv2nmigen
+
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+import rab_slice
+import coreconfig
+
+#
+# module slice_top
+# //#(
+# // parameter N_SLICES = 16,
+# // parameter N_REGS = 4*N_SLICES,
+# // parameter ADDR_WIDTH_PHYS = 40,
+# // parameter ADDR_WIDTH_VIRT = 32
+# // )
+# (
+# input logic [N_REGS-1:0] [63:0] int_cfg_regs,
+# input logic int_rw,
+# input logic [ADDR_WIDTH_VIRT-1:0] int_addr_min,
+# input logic [ADDR_WIDTH_VIRT-1:0] int_addr_max,
+# input logic multi_hit_allow,
+# output logic multi_hit,
+# output logic [N_SLICES-1:0] prot,
+# output logic [N_SLICES-1:0] hit,
+# output logic cache_coherent,
+# output logic [ADDR_WIDTH_PHYS-1:0] out_addr
+# );
+#
+
+
+class slice_top(Elaboratable):
+
+ def __init__(self):
+ # FIXME self.int_cfg_regs = Signal() # input
+ self.params = coreconfig.CoreConfig() # rename ?
+ self.int_rw = Signal() # input
+ self.int_addr_min = Signal(self.params.ADDR_WIDTH_VIRT) # input
+ self.int_addr_max = Signal(self.params.ADDR_WIDTH_VIRT) # input
+ self.multi_hit_allow = Signal() # input
+ self.multi_hit = Signal() # output
+ self.prot = Signal(self.params.N_SLICES) # output
+ self.hit = Signal(self.params.N_SLICES) # output
+ self.cache_coherent = Signal() # output
+ self.out_addr = Signal(self.params.ADDR_WIDTH_PHYS) # output
+
+ def elaborate(self, platform=None):
+ m = Module()
+
+ first_hit = Signal()
+
+ for i in range(self.params.N_SLICES):
+ # TODO pass params / core config here
+ u_slice = rab_slice.rab_slice(self.params)
+ setattr(m.submodules, "u_slice%d" % i, u_slice)
+ # TODO set param and connect ports
+
+ # In case of a multi hit, the lowest slice with a hit is selected.
+ # TODO always_comb begin : HIT_CHECK
+ m.d.comb += [
+ first_hit.eq(0),
+ self.multi_hit.eq(0),
+ self.out_addr.eq(0),
+ self.cache_coherent.eq(0)]
+
+ for j in range(self.params.N_SLICES):
+ with m.If(self.hit[j] == 1):
+ with m.If(first_hit == 1):
+ with m.If(self.multi_hit_allow == 0):
+ m.d.comb += [self.multi_hit.eq(1)]
+ with m.Elif(first_hit == 1):
+ m.d.comb += [first_hit.eq(1)
+ # only output first slice that was hit
+ # SV self.out_addr.eq(slice_out_addr[ADDR_WIDTH_PHYS*j + : ADDR_WIDTH_PHYS]),
+ # SV self.cache_coherent.eq(int_cfg_regs[4*j+3][3]),
+ ]
+ return m
+
+ # TODO translate generate statement
+
+
+"""
+ logic [ADDR_WIDTH_PHYS*N_SLICES-1:0] slice_out_addr;
+
+ generate
+ for ( i=0; i<N_SLICES; i++ )
+ begin
+ rab_slice
+ #(
+ .ADDR_WIDTH_PHYS ( ADDR_WIDTH_PHYS ),
+ .ADDR_WIDTH_VIRT ( ADDR_WIDTH_VIRT )
+ )
+ u_slice
+ (
+ .cfg_min ( int_cfg_regs[4*i] [ADDR_WIDTH_VIRT-1:0] ),
+ .cfg_max ( int_cfg_regs[4*i+1][ADDR_WIDTH_VIRT-1:0] ),
+ .cfg_offset ( int_cfg_regs[4*i+2][ADDR_WIDTH_PHYS-1:0] ),
+ .cfg_wen ( int_cfg_regs[4*i+3][2] ),
+ .cfg_ren ( int_cfg_regs[4*i+3][1] ),
+ .cfg_en ( int_cfg_regs[4*i+3][0] ),
+ .in_trans_type ( int_rw ),
+ .in_addr_min ( int_addr_min ),
+ .in_addr_max ( int_addr_max ),
+ .out_addr ( slice_out_addr[ADDR_WIDTH_PHYS*i+ADDR_WIDTH_PHYS-1:ADDR_WIDTH_PHYS*i] ),
+ .out_prot ( prot[i] ),
+ .out_hit ( hit[i] )
+ );
+ end
+ endgenerate
+
+ // In case of a multi hit, the lowest slice with a hit is selected.
+ always_comb begin : HIT_CHECK
+ first_hit = 0;
+ multi_hit = 0;
+ out_addr = '0;
+ cache_coherent = 0;
+ for (j = 0; j < N_SLICES; j++) begin
+ if (hit[j] == 1'b1) begin
+ if (first_hit == 1'b1) begin
+ if (multi_hit_allow == 1'b0) begin
+ multi_hit = 1'b1;
+ end
+ end else begin
+ first_hit = 1'b1;
+ out_addr = slice_out_addr[ADDR_WIDTH_PHYS*j +: ADDR_WIDTH_PHYS];
+ cache_coherent = int_cfg_regs[4*j+3][3];
+ end
+ end
+ end
+ end
+"""
+
+# sv 2 migen: TODO add translate code for generate statements and for loops inside always_comb
--- /dev/null
+from ram_tp_write_first import ram_tp_write_first
+from nmigen.compat.sim import run_simulation
+import sys
+sys.path.append("../")
+
+
+def tbench(dut):
+ yield dut.we.eq(1)
+ for i in range(0, 255):
+ yield dut.addr0.eq(i)
+ yield dut.d_i.eq(i)
+ yield
+
+
+if __name__ == "__main__":
+ dut = ram_tp_write_first()
+ run_simulation(dut, tbench(dut), vcd_name="ram_tp_write_first.vcd")
+ print("ram_tp_write_first Unit Test Success")
--- /dev/null
+from nmigen.compat.sim import run_simulation
+import sys
+sys.path.append("../")
+# sys.path.append("../../../TestUtil")
+from slice_top import slice_top
+
+def tbench(dut):
+ yield
+
+
+if __name__ == "__main__":
+ dut = slice_top()
+ run_simulation(dut, tbench(dut), vcd_name="test_slice_top.vcd")
+ print("slice_top Unit Test Success")
--- /dev/null
+from soc.decoder.power_enums import (Function, Form, InternalOp,
+ In1Sel, In2Sel, In3Sel, OutSel,
+ RC, LdstLen, CryIn, get_csv,
+ single_bit_flags,
+ get_signal_name, default_values)
+import math
+
+
+class MemorySim:
+ def __init__(self, bytes_per_word=8):
+ self.mem = {}
+ self.bytes_per_word = bytes_per_word
+ self.word_log2 = math.ceil(math.log2(bytes_per_word))
+
+ def _get_shifter_mask(self, width, remainder):
+ shifter = ((self.bytes_per_word - width) - remainder) * \
+ 8 # bits per byte
+ mask = (1 << (width * 8)) - 1
+ return shifter, mask
+
+ # TODO: Implement ld/st of lesser width
+ def ld(self, address, width=8):
+ remainder = address & (self.bytes_per_word - 1)
+ address = address >> self.word_log2
+ assert remainder & (width - 1) == 0, "Unaligned access unsupported!"
+ if address in self.mem:
+ val = self.mem[address]
+ else:
+ val = 0
+
+ if width != self.bytes_per_word:
+ shifter, mask = self._get_shifter_mask(width, remainder)
+ val = val & (mask << shifter)
+ val >>= shifter
+ print("Read {:x} from addr {:x}".format(val, address))
+ return val
+
+ def st(self, address, value, width=8):
+ remainder = address & (self.bytes_per_word - 1)
+ address = address >> self.word_log2
+ assert remainder & (width - 1) == 0, "Unaligned access unsupported!"
+ print("Writing {:x} to addr {:x}".format(value, address))
+ if width != self.bytes_per_word:
+ if address in self.mem:
+ val = self.mem[address]
+ else:
+ val = 0
+ shifter, mask = self._get_shifter_mask(width, remainder)
+ val &= ~(mask << shifter)
+ val |= value << shifter
+ self.mem[address] = val
+ else:
+ self.mem[address] = value
+
+
+class RegFile:
+ def __init__(self):
+ self.regfile = [0] * 32
+ self.sprs = {}
+
+ def write_reg(self, regnum, value):
+ all1s = (1 << 64)-1 # 64 bits worth of 1s
+ value &= all1s
+ print("Writing {:x} to reg r{}".format(value, regnum))
+ self.regfile[regnum] = value
+
+ def read_reg(self, regnum):
+ val = self.regfile[regnum]
+ print("Read {:x} from reg r{}".format(val, regnum))
+ return val
+
+ def assert_gpr(self, gpr, val):
+ reg_val = self.read_reg(gpr)
+ msg = "reg r{} got {:x}, expecting {:x}".format(
+ gpr, reg_val, val)
+ assert reg_val == val, msg
+
+ def assert_gprs(self, gprs):
+ for k, v in list(gprs.items()):
+ self.assert_gpr(k, v)
+
+ def set_xer(self, result, operanda, operandb):
+ xer = 0
+ if result & 1 << 64:
+ xer |= XER.CA
+
+ self.xer = xer
+
+
+class InternalOpSimulator:
+ def __init__(self):
+ self.mem_sim = MemorySim()
+ self.regfile = RegFile()
+
+ def execute_alu_op(self, op1, op2, internal_op, carry=0):
+ print(internal_op)
+ if internal_op == InternalOp.OP_ADD.value:
+ return op1 + op2 + carry
+ elif internal_op == InternalOp.OP_AND.value:
+ return op1 & op2
+ elif internal_op == InternalOp.OP_OR.value:
+ return op1 | op2
+ elif internal_op == InternalOp.OP_MUL_L64.value:
+ return op1 * op2
+ else:
+ assert False, "Not implemented"
+
+ def update_cr0(self, result):
+ if result == 0:
+ self.cr0 = 0b001
+ elif result >> 63:
+ self.cr0 = 0b100
+ else:
+ self.cr0 = 0b010
+ print("update_cr0", self.cr0)
+
+ def alu_op(self, pdecode2):
+ all1s = (1 << 64)-1 # 64 bits worth of 1s
+ internal_op = yield pdecode2.dec.op.internal_op
+ operand1 = 0
+ operand2 = 0
+ result = 0
+ carry = 0
+ r1_ok = yield pdecode2.e.read_reg1.ok
+ r2_ok = yield pdecode2.e.read_reg2.ok
+ r3_ok = yield pdecode2.e.read_reg3.ok
+ imm_ok = yield pdecode2.e.imm_data.ok
+ if r1_ok:
+ r1_sel = yield pdecode2.e.read_reg1.data
+ operand1 = self.regfile.read_reg(r1_sel)
+ elif r3_ok:
+ r3_sel = yield pdecode2.e.read_reg3.data
+ operand1 = self.regfile.read_reg(r3_sel)
+ if r2_ok:
+ r2_sel = yield pdecode2.e.read_reg2.data
+ operand2 = self.regfile.read_reg(r2_sel)
+ if imm_ok:
+ operand2 = yield pdecode2.e.imm_data.data
+
+ inv_a = yield pdecode2.dec.op.inv_a
+ if inv_a:
+ operand1 = (~operand1) & all1s
+
+ cry_in = yield pdecode2.dec.op.cry_in
+ if cry_in == CryIn.ONE.value:
+ carry = 1
+ elif cry_in == CryIn.CA.value:
+ carry = self.carry_out
+
+ # TODO rc_sel = yield pdecode2.dec.op.rc_sel
+ result = self.execute_alu_op(operand1, operand2, internal_op,
+ carry=carry)
+
+ cry_out = yield pdecode2.dec.op.cry_out
+ rc = yield pdecode2.e.rc.data
+
+ if rc:
+ self.update_cr0(result)
+ if cry_out == 1:
+ self.carry_out = (result >> 64)
+ print("setting carry_out", self.carry_out)
+
+ ro_ok = yield pdecode2.e.write_reg.ok
+ if ro_ok:
+ ro_sel = yield pdecode2.e.write_reg.data
+ self.regfile.write_reg(ro_sel, result)
+
+ def mem_op(self, pdecode2):
+ internal_op = yield pdecode2.dec.op.internal_op
+ addr_reg = yield pdecode2.e.read_reg1.data
+ addr = self.regfile.read_reg(addr_reg)
+
+ imm_ok = yield pdecode2.e.imm_data.ok
+ r2_ok = yield pdecode2.e.read_reg2.ok
+ width = yield pdecode2.e.data_len
+ if imm_ok:
+ imm = yield pdecode2.e.imm_data.data
+ addr += imm
+ elif r2_ok:
+ r2_sel = yield pdecode2.e.read_reg2.data
+ addr += self.regfile.read_reg(r2_sel)
+ if internal_op == InternalOp.OP_STORE.value:
+ val_reg = yield pdecode2.e.read_reg3.data
+ val = self.regfile.read_reg(val_reg)
+ self.mem_sim.st(addr, val, width)
+ elif internal_op == InternalOp.OP_LOAD.value:
+ dest_reg = yield pdecode2.e.write_reg.data
+ val = self.mem_sim.ld(addr, width)
+ self.regfile.write_reg(dest_reg, val)
+
+ def execute_op(self, pdecode2):
+ function = yield pdecode2.dec.op.function_unit
+ if function == Function.ALU.value:
+ yield from self.alu_op(pdecode2)
+ elif function == Function.LDST.value:
+ yield from self.mem_op(pdecode2)