From: Luke Kenneth Casson Leighton Date: Sun, 3 May 2020 09:49:14 +0000 (+0100) Subject: move TLB and iommu to unused directory X-Git-Tag: div_pipeline~1400 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fee9c4d083993a0fb3e8417b6901a868b33ddc1f;p=soc.git move TLB and iommu to unused directory --- diff --git a/src/soc/TLB/.gitignore b/src/soc/TLB/.gitignore deleted file mode 100644 index 3324664b..00000000 --- a/src/soc/TLB/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.wpr -__pycache__ diff --git a/src/soc/TLB/AddressEncoder.py b/src/soc/TLB/AddressEncoder.py deleted file mode 100644 index 49da8197..00000000 --- a/src/soc/TLB/AddressEncoder.py +++ /dev/null @@ -1,77 +0,0 @@ -from nmigen import Module, Signal, Elaboratable -from nmigen.lib.coding import Encoder, PriorityEncoder - - -class AddressEncoder(Elaboratable): - """Address Encoder - - The purpose of this module is to take in a vector and - encode the bits that are one hot into an address. This module - combines both nmigen's Encoder and PriorityEncoder and will state - whether the input line has a single bit hot, multiple bits hot, - or no bits hot. The output line will always have the lowest value - address output. - - Usage: - The output is valid when either single or multiple match is high. - Otherwise output is 0. - """ - - def __init__(self, width): - """ Arguments: - * width: The desired length of the input vector - """ - # Internal - self.encoder = Encoder(width) - self.p_encoder = PriorityEncoder(width) - - # Input - self.i = Signal(width) - - # Output - self.single_match = Signal(1) - self.multiple_match = Signal(1) - self.o = Signal(range(width)) - - def elaborate(self, platform=None): - m = Module() - - # Add internal submodules - m.submodules.encoder = self.encoder - m.submodules.p_encoder = self.p_encoder - - m.d.comb += [ - self.encoder.i.eq(self.i), - self.p_encoder.i.eq(self.i) - ] - - # Steps: - # 1. check if the input vector is non-zero - # 2. if non-zero, check if single match or multiple match - # 3. set output line to be lowest value address output - - # If the priority encoder recieves an input of 0 - # If n is 1 then the output is not valid - with m.If(self.p_encoder.n): - m.d.comb += [ - self.single_match.eq(0), - self.multiple_match.eq(0), - self.o.eq(0) - ] - # If the priority encoder recieves an input > 0 - with m.Else(): - # Multiple Match if encoder n is invalid - with m.If(self.encoder.n): - m.d.comb += [ - self.single_match.eq(0), - self.multiple_match.eq(1) - ] - # Single Match if encoder n is valid - with m.Else(): - m.d.comb += [ - self.single_match.eq(1), - self.multiple_match.eq(0) - ] - # Always set output based on priority encoder output - m.d.comb += self.o.eq(self.p_encoder.o) - return m diff --git a/src/soc/TLB/Cam.py b/src/soc/TLB/Cam.py deleted file mode 100644 index c5fd0699..00000000 --- a/src/soc/TLB/Cam.py +++ /dev/null @@ -1,126 +0,0 @@ -from nmigen import Array, Cat, Module, Signal, Elaboratable -from nmigen.lib.coding import Decoder -from nmigen.cli import main # , verilog - -from .CamEntry import CamEntry -from .AddressEncoder import AddressEncoder - - -class Cam(Elaboratable): - """ Content Addressable Memory (CAM) - - The purpose of this module is to quickly look up whether an - entry exists given a data key. - This module will search for the given data in all internal entries - and output whether a single or multiple match was found. - If an single entry is found the address be returned and single_match - is set HIGH. If multiple entries are found the lowest address is - returned and multiple_match is set HIGH. If neither single_match or - multiple_match are HIGH this implies no match was found. To write - to the CAM set the address bus to the desired entry and set write_enable - HIGH. Entry managment should be performed one level above this block - as lookup is performed within. - - Notes: - The read and write operations take one clock cycle to complete. - Currently the read_warning line is present for interfacing but - is not necessary for this design. This module is capable of writing - in the first cycle, reading on the second, and output the correct - address on the third. - """ - - def __init__(self, data_size, cam_size): - """ Arguments: - * data_size: (bits) The bit size of the data - * cam_size: (number) The number of entries in the CAM - """ - - # Internal - self.cam_size = cam_size - self.encoder = AddressEncoder(cam_size) - self.decoder = Decoder(cam_size) - self.entry_array = Array(CamEntry(data_size) for x in range(cam_size)) - - # Input - self.enable = Signal(1) - self.write_enable = Signal(1) - self.data_in = Signal(data_size) # The data to be written - self.data_mask = Signal(data_size) # mask for ternary writes - # address of CAM Entry to write - self.address_in = Signal(range(cam_size)) - - # Output - self.read_warning = Signal(1) # High when a read interrupts a write - self.single_match = Signal(1) # High when there is only one match - self.multiple_match = Signal(1) # High when there at least two matches - # The lowest address matched - self.match_address = Signal(range(cam_size)) - - def elaborate(self, platform=None): - m = Module() - # AddressEncoder for match types and output address - m.submodules.AddressEncoder = self.encoder - # Decoder is used to select which entry will be written to - m.submodules.Decoder = self.decoder - # CamEntry Array Submodules - # Note these area added anonymously - entry_array = self.entry_array - m.submodules += entry_array - - # Decoder logic - m.d.comb += [ - self.decoder.i.eq(self.address_in), - self.decoder.n.eq(0) - ] - - encoder_vector = [] - with m.If(self.enable): - # Set the key value for every CamEntry - for index in range(self.cam_size): - - # Write Operation - with m.If(self.write_enable): - with m.If(self.decoder.o[index]): - m.d.comb += entry_array[index].command.eq(2) - with m.Else(): - m.d.comb += entry_array[index].command.eq(0) - - # Read Operation - with m.Else(): - m.d.comb += entry_array[index].command.eq(1) - - # Send data input to all entries - m.d.comb += entry_array[index].data_in.eq(self.data_in) - # Send all entry matches to encoder - ematch = entry_array[index].match - encoder_vector.append(ematch) - - # Give input to and accept output from encoder module - m.d.comb += [ - self.encoder.i.eq(Cat(*encoder_vector)), - self.single_match.eq(self.encoder.single_match), - self.multiple_match.eq(self.encoder.multiple_match), - self.match_address.eq(self.encoder.o) - ] - - # If the CAM is not enabled set all outputs to 0 - with m.Else(): - m.d.comb += [ - self.read_warning.eq(0), - self.single_match.eq(0), - self.multiple_match.eq(0), - self.match_address.eq(0) - ] - - return m - - def ports(self): - return [self.enable, self.write_enable, - self.data_in, self.data_mask, - self.read_warning, self.single_match, - self.multiple_match, self.match_address] - - -if __name__ == '__main__': - cam = Cam(4, 4) - main(cam, ports=cam.ports()) diff --git a/src/soc/TLB/CamEntry.py b/src/soc/TLB/CamEntry.py deleted file mode 100644 index b1d93082..00000000 --- a/src/soc/TLB/CamEntry.py +++ /dev/null @@ -1,46 +0,0 @@ -from nmigen import Module, Signal, Elaboratable - - -class CamEntry(Elaboratable): - """ Content Addressable Memory (CAM) Entry - - The purpose of this module is to represent an entry within a CAM. - This module when given a read command will compare the given data - and output whether a match was found or not. When given a write - command it will write the given data into internal registers. - """ - - def __init__(self, data_size): - """ Arguments: - * data_size: (bit count) The size of the data - """ - # Input - self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset - self.data_in = Signal(data_size) # Data input when writing - - # Output - self.match = Signal(1) # Result of the internal/input key comparison - self.data = Signal(data_size) - - def elaborate(self, platform=None): - m = Module() - with m.Switch(self.command): - with m.Case("00"): - m.d.sync += self.match.eq(0) - with m.Case("01"): - with m.If(self.data == self.data_in): - m.d.sync += self.match.eq(1) - with m.Else(): - m.d.sync += self.match.eq(0) - with m.Case("10"): - m.d.sync += [ - self.data.eq(self.data_in), - self.match.eq(0) - ] - with m.Case(): - m.d.sync += [ - self.match.eq(0), - self.data.eq(0) - ] - - return m diff --git a/src/soc/TLB/LFSR.py b/src/soc/TLB/LFSR.py deleted file mode 100644 index d8b606ec..00000000 --- a/src/soc/TLB/LFSR.py +++ /dev/null @@ -1,109 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -# See Notices.txt for copyright information -from nmigen import Signal, Module, Const, Cat, Elaboratable -from nmigen.cli import verilog, rtlil - - -class LFSRPolynomial(set): - """ implements a polynomial for use in LFSR - """ - def __init__(self, exponents=()): - for e in exponents: - assert isinstance(e, int), TypeError("%s must be an int" % repr(e)) - assert (e >= 0), ValueError("%d must not be negative" % e) - set.__init__(self, set(exponents).union({0})) # must contain zero - - @property - def max_exponent(self): - return max(self) # derived from set, so this returns the max exponent - - @property - def exponents(self): - exponents = list(self) # get elements of set as a list - exponents.sort(reverse=True) - return exponents - - def __str__(self): - expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2) - retval = map(lambda i: expd[min(i,2)].format(i), self.exponents) - return " + ".join(retval) - - def __repr__(self): - return "LFSRPolynomial(%s)" % self.exponents - - -# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs # noqa -LFSR_POLY_2 = LFSRPolynomial([2, 1, 0]) -LFSR_POLY_3 = LFSRPolynomial([3, 2, 0]) -LFSR_POLY_4 = LFSRPolynomial([4, 3, 0]) -LFSR_POLY_5 = LFSRPolynomial([5, 3, 0]) -LFSR_POLY_6 = LFSRPolynomial([6, 5, 0]) -LFSR_POLY_7 = LFSRPolynomial([7, 6, 0]) -LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0]) -LFSR_POLY_9 = LFSRPolynomial([9, 5, 0]) -LFSR_POLY_10 = LFSRPolynomial([10, 7, 0]) -LFSR_POLY_11 = LFSRPolynomial([11, 9, 0]) -LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0]) -LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0]) -LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0]) -LFSR_POLY_15 = LFSRPolynomial([15, 14, 0]) -LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0]) -LFSR_POLY_17 = LFSRPolynomial([17, 14, 0]) -LFSR_POLY_18 = LFSRPolynomial([18, 11, 0]) -LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0]) -LFSR_POLY_20 = LFSRPolynomial([20, 17, 0]) -LFSR_POLY_21 = LFSRPolynomial([21, 19, 0]) -LFSR_POLY_22 = LFSRPolynomial([22, 21, 0]) -LFSR_POLY_23 = LFSRPolynomial([23, 18, 0]) -LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0]) - - -class LFSR(LFSRPolynomial, Elaboratable): - """ implements a Linear Feedback Shift Register - """ - def __init__(self, polynomial): - """ Inputs: - ------ - :polynomial: the polynomial to feedback on. may be a LFSRPolynomial - instance or an iterable of ints (list/tuple/generator) - :enable: enable (set LO to disable. NOTE: defaults to HI) - - Outputs: - ------- - :state: the LFSR state. bitwidth is taken from the polynomial - maximum exponent. - - Note: if an LFSRPolynomial is passed in as the input, because - LFSRPolynomial is derived from set() it's ok: - LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p) - """ - LFSRPolynomial.__init__(self, polynomial) - self.state = Signal(self.max_exponent, reset=1) - self.enable = Signal(reset=1) - - def elaborate(self, platform): - m = Module() - # do absolutely nothing if the polynomial is empty (always has a zero) - if self.max_exponent <= 1: - return m - - # create XOR-bunch, select bits from state based on exponent - feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain) - for exponent in self: - if exponent > 0: # don't have to skip, saves CPU cycles though - feedback ^= self.state[exponent - 1] - - # if enabled, shift-and-feedback - with m.If(self.enable): - # shift up lower bits by Cat'ing in a new bit zero (feedback) - newstate = Cat(feedback, self.state[:-1]) - m.d.sync += self.state.eq(newstate) - - return m - - -# example: Poly24 -if __name__ == '__main__': - p24 = rtlil.convert(LFSR(LFSR_POLY_24)) - with open("lfsr2_p24.il", "w") as f: - f.write(p24) diff --git a/src/soc/TLB/LFSR.pyi b/src/soc/TLB/LFSR.pyi deleted file mode 100644 index 64eb9115..00000000 --- a/src/soc/TLB/LFSR.pyi +++ /dev/null @@ -1,23 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -# See Notices.txt for copyright information -from nmigen import Module -from typing import Iterable, Optional, Iterator, Any, Union -from typing_extensions import final - - -@final -class LFSRPolynomial(set): - def __init__(self, exponents: Iterable[int] = ()): - def elements() -> Iterable[int]: ... - @property - def exponents(self) -> list[int]: ... - def __str__(self) -> str: ... - def __repr__(self) -> str: ... - - -@final -class LFSR: - def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ... - @property - def width(self) -> int: ... - def elaborate(self, platform: Any) -> Module: ... diff --git a/src/soc/TLB/Makefile b/src/soc/TLB/Makefile deleted file mode 100644 index 1eb67acc..00000000 --- a/src/soc/TLB/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -verilog: - python3 Cam.py generate -t v > Cam.v diff --git a/src/soc/TLB/MemorySet.py b/src/soc/TLB/MemorySet.py deleted file mode 100644 index 11890edf..00000000 --- a/src/soc/TLB/MemorySet.py +++ /dev/null @@ -1,66 +0,0 @@ -from nmigen import Cat, Memory, Module, Signal, Elaboratable -from nmigen.cli import main -from nmigen.cli import verilog, rtlil - - -class MemorySet(Elaboratable): - def __init__(self, data_size, tag_size, set_count, active): - self.active = active - input_size = tag_size + data_size # Size of the input data - memory_width = input_size + 1 # The width of the cache memory - self.active = active - self.data_size = data_size - self.tag_size = tag_size - - # XXX TODO, use rd-enable and wr-enable? - self.mem = Memory(width=memory_width, depth=set_count) - self.r = self.mem.read_port() - self.w = self.mem.write_port() - - # inputs (address) - self.cset = Signal(range(set_count)) # The set to be checked - self.tag = Signal(tag_size) # The tag to find - self.data_i = Signal(data_size) # Incoming data - - # outputs - self.valid = Signal() - self.data_o = Signal(data_size) # Outgoing data (excludes tag) - - def elaborate(self, platform): - m = Module() - m.submodules.mem = self.mem - m.submodules.r = self.r - m.submodules.w = self.w - - # temporaries - active_bit = Signal() - tag_valid = Signal() - data_start = self.active + 1 - data_end = data_start + self.data_size - tag_start = data_end - tag_end = tag_start + self.tag_size - - # connect the read port address to the set/entry - read_port = self.r - m.d.comb += read_port.addr.eq(self.cset) - # Pull out active bit from data - data = read_port.data - m.d.comb += active_bit.eq(data[self.active]) - # Validate given tag vs stored tag - tag = data[tag_start:tag_end] - m.d.comb += tag_valid.eq(self.tag == tag) - # An entry is only valid if the tags match AND - # is marked as a valid entry - m.d.comb += self.valid.eq(tag_valid & active_bit) - - # output data: TODO, check rd-enable? - m.d.comb += self.data_o.eq(data[data_start:data_end]) - - # connect the write port addr to the set/entry (only if write enabled) - # (which is only done on a match, see SAC.write_entry below) - write_port = self.w - with m.If(write_port.en): - m.d.comb += write_port.addr.eq(self.cset) - m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag)) - - return m diff --git a/src/soc/TLB/PermissionValidator.py b/src/soc/TLB/PermissionValidator.py deleted file mode 100644 index 5bc90b2f..00000000 --- a/src/soc/TLB/PermissionValidator.py +++ /dev/null @@ -1,68 +0,0 @@ -from nmigen import Module, Signal, Elaboratable -from nmigen.cli import main - -from soc.TLB.PteEntry import PteEntry - - -class PermissionValidator(Elaboratable): - """ The purpose of this Module is to check the Permissions of a given PTE - against the requested access permissions. - - This module will either validate (by setting the valid bit HIGH) - the request or find a permission fault and invalidate (by setting - the valid bit LOW) the request - """ - - def __init__(self, asid_size, pte_size): - """ Arguments: - * asid_size: (bit count) The size of the asid to be processed - * pte_size: (bit count) The size of the pte to be processed - - Return: - * valid HIGH when permissions are correct - """ - # Internal - self.pte_entry = PteEntry(asid_size, pte_size) - - # Input - self.data = Signal(asid_size + pte_size) - self.xwr = Signal(3) # Execute, Write, Read - self.super_mode = Signal(1) # Supervisor Mode - self.super_access = Signal(1) # Supervisor Access - self.asid = Signal(15) # Address Space IDentifier (ASID) - - # Output - self.valid = Signal(1) # Denotes if the permissions are correct - - def elaborate(self, platform=None): - m = Module() - - m.submodules.pte_entry = self.pte_entry - - m.d.comb += self.pte_entry.i.eq(self.data) - - # Check if the entry is valid - with m.If(self.pte_entry.v): - # ASID match or Global Permission - # Note that the MSB bound is exclusive - with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g): - # Check Execute, Write, Read (XWR) Permissions - with m.If(self.pte_entry.xwr == self.xwr): - # Supervisor Logic - with m.If(self.super_mode): - # Valid if entry is not in user mode or supervisor - # has Supervisor User Memory (SUM) access via the - # SUM bit in the sstatus register - m.d.comb += self.valid.eq((~self.pte_entry.u) - | self.super_access) - # User logic - with m.Else(): - # Valid if the entry is in user mode only - m.d.comb += self.valid.eq(self.pte_entry.u) - with m.Else(): - m.d.comb += self.valid.eq(0) - with m.Else(): - m.d.comb += self.valid.eq(0) - with m.Else(): - m.d.comb += self.valid.eq(0) - return m diff --git a/src/soc/TLB/PteEntry.py b/src/soc/TLB/PteEntry.py deleted file mode 100644 index 73ea9220..00000000 --- a/src/soc/TLB/PteEntry.py +++ /dev/null @@ -1,67 +0,0 @@ -from nmigen import Module, Signal, Elaboratable -from nmigen.cli import main - - -class PteEntry(Elaboratable): - """ The purpose of this Module is to centralize the parsing of Page - Table Entries (PTE) into one module to prevent common mistakes - and duplication of code. The control bits are parsed out for - ease of use. - - This module parses according to the standard PTE given by the - Volume II: RISC-V Privileged Architectures V1.10 Pg 60. - The Address Space IDentifier (ASID) is appended to the MSB of the input - and is parsed out as such. - - An valid input Signal would be: - ASID PTE - Bits:[78-64][63-0] - - The output PTE value will include the control bits. - """ - def __init__(self, asid_size, pte_size): - """ Arguments: - * asid_size: (bit count) The size of the asid to be processed - * pte_size: (bit count) The size of the pte to be processed - - Return: - * d The Dirty bit from the PTE portion of i - * a The Accessed bit from the PTE portion of i - * g The Global bit from the PTE portion of i - * u The User Mode bit from the PTE portion of i - * xwr The Execute/Write/Read bit from the PTE portion of i - * v The Valid bit from the PTE portion of i - * asid The asid portion of i - * pte The pte portion of i - """ - # Internal - self.asid_start = pte_size - self.asid_end = pte_size + asid_size - - # Input - self.i = Signal(asid_size + pte_size) - - # Output - self.d = Signal(1) # Dirty bit (From pte) - self.a = Signal(1) # Accessed bit (From pte) - self.g = Signal(1) # Global Access (From pte) - self.u = Signal(1) # User Mode (From pte) - self.xwr = Signal(3) # Execute Read Write (From pte) - self.v = Signal(1) # Valid (From pte) - self.asid = Signal(asid_size) # Associated Address Space IDentifier - self.pte = Signal(pte_size) # Full Page Table Entry - - def elaborate(self, platform=None): - m = Module() - # Pull out all control bites from PTE - m.d.comb += [ - self.d.eq(self.i[7]), - self.a.eq(self.i[6]), - self.g.eq(self.i[5]), - self.u.eq(self.i[4]), - self.xwr.eq(self.i[1:4]), - self.v.eq(self.i[0]) - ] - m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end]) - m.d.comb += self.pte.eq(self.i[0:self.asid_start]) - return m diff --git a/src/soc/TLB/SetAssociativeCache.py b/src/soc/TLB/SetAssociativeCache.py deleted file mode 100644 index 30ad8090..00000000 --- a/src/soc/TLB/SetAssociativeCache.py +++ /dev/null @@ -1,274 +0,0 @@ -""" - -Online simulator of 4-way set-associative cache: -http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/sa4.html - -Python simulator of a N-way set-associative cache: -https://github.com/vaskevich/CacheSim/blob/master/cachesim.py -""" - -from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable -from nmigen.compat.genlib import fsm -from nmigen.cli import main -from nmigen.cli import verilog, rtlil - -from .AddressEncoder import AddressEncoder -from .MemorySet import MemorySet - -# TODO: use a LFSR that advances continuously and picking the bottom -# few bits from it to select which cache line to replace, instead of PLRU -# http://bugs.libre-riscv.org/show_bug.cgi?id=71 -from .ariane.plru import PLRU -from .LFSR import LFSR, LFSR_POLY_24 - -SA_NA = "00" # no action (none) -SA_RD = "01" # read -SA_WR = "10" # write - - -class SetAssociativeCache(Elaboratable): - """ Set Associative Cache Memory - - The purpose of this module is to generate a memory cache given the - constraints passed in. This will create a n-way set associative cache. - It is expected for the SV TLB that the VMA will provide the set number - while the ASID provides the tag (still to be decided). - - """ - - def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False): - """ Arguments - * tag_size (bits): The bit count of the tag - * data_size (bits): The bit count of the data to be stored - * set_count (number): The number of sets/entries in the cache - * way_count (number): The number of slots a data can be stored - in one set - * lfsr: if set, use an LFSR for (pseudo-randomly) selecting - set/entry to write to. otherwise, use a PLRU - """ - # Internals - self.lfsr_mode = lfsr - self.way_count = way_count # The number of slots in one set - self.tag_size = tag_size # The bit count of the tag - self.data_size = data_size # The bit count of the data to be stored - - # set up Memory array - self.mem_array = Array() # memory array - for i in range(way_count): - ms = MemorySet(data_size, tag_size, set_count, active=0) - self.mem_array.append(ms) - - # Finds valid entries - self.encoder = AddressEncoder(way_count) - - # setup PLRU or LFSR - if lfsr: - # LFSR mode - self.lfsr = LFSR(LFSR_POLY_24) - else: - # PLRU mode - # One block to handle plru calculations - self.plru = PLRU(way_count) - self.plru_array = Array() # PLRU data on each set - for i in range(set_count): - name = "plru%d" % i - self.plru_array.append(Signal(self.plru.TLBSZ, name=name)) - - # Input - self.enable = Signal(1) # Whether the cache is enabled - self.command = Signal(2) # 00=None, 01=Read, 10=Write (see SA_XX) - self.cset = Signal(range(set_count)) # The set to be checked - self.tag = Signal(tag_size) # The tag to find - self.data_i = Signal(data_size) # The input data - - # Output - self.ready = Signal(1) # 0 => Processing 1 => Ready for commands - self.hit = Signal(1) # Tag matched one way in the given set - # Tag matched many ways in the given set - self.multiple_hit = Signal(1) - self.data_o = Signal(data_size) # The data linked to the matched tag - - def check_tags(self, m): - """ Validate the tags in the selected set. If one and only one - tag matches set its state to zero and increment all others - by one. We only advance to next state if a single hit is found. - """ - # Vector to store way valid results - # A zero denotes a way is invalid - valid_vector = [] - # Loop through memory to prep read/write ports and set valid_vector - for i in range(self.way_count): - valid_vector.append(self.mem_array[i].valid) - - # Pass encoder the valid vector - m.d.comb += self.encoder.i.eq(Cat(*valid_vector)) - - # Only one entry should be marked - # This is due to already verifying the tags - # matched and the valid bit is high - with m.If(self.hit): - m.next = "FINISHED_READ" - # Pull out data from the read port - data = self.mem_array[self.encoder.o].data_o - m.d.comb += self.data_o.eq(data) - if not self.lfsr_mode: - self.access_plru(m) - - # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k - with m.Elif(self.multiple_hit): - # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck - m.d.comb += self.data_o.eq(0) - - # No tag matches means no data - with m.Else(): - # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck - m.d.comb += self.data_o.eq(0) - - def access_plru(self, m): - """ An entry was accessed and the plru tree must now be updated - """ - # Pull out the set's entry being edited - plru_entry = self.plru_array[self.cset] - m.d.comb += [ - # Set the plru data to the current state - self.plru.plru_tree.eq(plru_entry), - # Set that the cache was accessed - self.plru.lu_access_i.eq(1) - ] - - def read(self, m): - """ Go through the read process of the cache. - This takes two cycles to complete. First it checks for a valid tag - and secondly it updates the LRU values. - """ - with m.FSM() as fsm_read: - with m.State("READY"): - m.d.comb += self.ready.eq(0) - # check_tags will set the state if the conditions are met - self.check_tags(m) - with m.State("FINISHED_READ"): - m.next = "READY" - m.d.comb += self.ready.eq(1) - if not self.lfsr_mode: - plru_tree_o = self.plru.plru_tree_o - m.d.sync += self.plru_array[self.cset].eq(plru_tree_o) - - def write_entry(self, m): - if not self.lfsr_mode: - m.d.comb += [ # set cset (mem address) into PLRU - self.plru.plru_tree.eq(self.plru_array[self.cset]), - # and connect plru to encoder for write - self.encoder.i.eq(self.plru.replace_en_o) - ] - write_port = self.mem_array[self.encoder.o].w - else: - # use the LFSR to generate a random(ish) one of the mem array - lfsr_output = Signal(range(self.way_count)) - lfsr_random = Signal(range(self.way_count)) - m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits - # address too big, limit to range of array - m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count, - lfsr_output - self.way_count, - lfsr_output)) - write_port = self.mem_array[lfsr_random].w - - # then if there is a match from the encoder, enable the selected write - with m.If(self.encoder.single_match): - m.d.comb += write_port.en.eq(1) - - def write(self, m): - """ Go through the write process of the cache. - This takes two cycles to complete. First it writes the entry, - and secondly it updates the PLRU (in plru mode) - """ - with m.FSM() as fsm_write: - with m.State("READY"): - m.d.comb += self.ready.eq(0) - self.write_entry(m) - m.next = "FINISHED_WRITE" - with m.State("FINISHED_WRITE"): - m.d.comb += self.ready.eq(1) - if not self.lfsr_mode: - plru_entry = self.plru_array[self.cset] - m.d.sync += plru_entry.eq(self.plru.plru_tree_o) - m.next = "READY" - - def elaborate(self, platform=None): - m = Module() - - # ---- - # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array - # ---- - - m.submodules.AddressEncoder = self.encoder - if self.lfsr_mode: - m.submodules.LFSR = self.lfsr - else: - m.submodules.PLRU = self.plru - - for i, mem in enumerate(self.mem_array): - setattr(m.submodules, "mem%d" % i, mem) - - # ---- - # select mode: PLRU connect to encoder, LFSR do... something - # ---- - - if not self.lfsr_mode: - # Set what entry was hit - m.d.comb += self.plru.lu_hit.eq(self.encoder.o) - else: - # enable LFSR - m.d.comb += self.lfsr.enable.eq(self.enable) - - # ---- - # connect hit/multiple hit to encoder output - # ---- - - m.d.comb += [ - self.hit.eq(self.encoder.single_match), - self.multiple_hit.eq(self.encoder.multiple_match), - ] - - # ---- - # connect incoming data/tag/cset(addr) to mem_array - # ---- - - for mem in self.mem_array: - write_port = mem.w - m.d.comb += [mem.cset.eq(self.cset), - mem.tag.eq(self.tag), - mem.data_i.eq(self.data_i), - write_port.en.eq(0), # default: disable write - ] - # ---- - # Commands: READ/WRITE/TODO - # ---- - - with m.If(self.enable): - with m.Switch(self.command): - # Search all sets at a particular tag - with m.Case(SA_RD): - self.read(m) - with m.Case(SA_WR): - self.write(m) - # Maybe catch multiple tags write here? - # TODO - # TODO: invalidate/flush, flush-all? - - return m - - def ports(self): - return [self.enable, self.command, self.cset, self.tag, self.data_i, - self.ready, self.hit, self.multiple_hit, self.data_o] - - -if __name__ == '__main__': - sac = SetAssociativeCache(4, 8, 4, 6) - vl = rtlil.convert(sac, ports=sac.ports()) - with open("SetAssociativeCache.il", "w") as f: - f.write(vl) - - sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True) - vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports()) - with open("SetAssociativeCacheLFSR.il", "w") as f: - f.write(vl) diff --git a/src/soc/TLB/TLB.py b/src/soc/TLB/TLB.py deleted file mode 100644 index a3c02247..00000000 --- a/src/soc/TLB/TLB.py +++ /dev/null @@ -1,177 +0,0 @@ -""" TLB Module - - The expected form of the data is: - * Item (Bits) - * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0) -""" - -from nmigen import Memory, Module, Signal, Cat, Elaboratable -from nmigen.cli import main - -from .PermissionValidator import PermissionValidator -from .Cam import Cam - - -class TLB(Elaboratable): - def __init__(self, asid_size, vma_size, pte_size, L1_size): - """ Arguments - * asid_size: Address Space IDentifier (ASID) typically 15 bits - * vma_size: Virtual Memory Address (VMA) typically 36 bits - * pte_size: Page Table Entry (PTE) typically 64 bits - - Notes: - These arguments should represent the largest possible size - defined by the MODE settings. See - Volume II: RISC-V Privileged Architectures V1.10 Page 57 - """ - - # Internal - self.state = 0 - # L1 Cache Modules - self.cam_L1 = Cam(vma_size, L1_size) - self.mem_L1 = Memory(width=asid_size + pte_size, depth=L1_size) - - # Permission Validator - self.perm_validator = PermissionValidator(asid_size, pte_size) - - # Inputs - self.supermode = Signal(1) # Supervisor Mode - self.super_access = Signal(1) # Supervisor Access - # 00=None, 01=Search, 10=Write L1, 11=Write L2 - self.command = Signal(2) - self.xwr = Signal(3) # Execute, Write, Read - self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64 - self.address_L1 = Signal(range(L1_size)) - self.asid = Signal(asid_size) # Address Space IDentifier (ASID) - self.vma = Signal(vma_size) # Virtual Memory Address (VMA) - self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE) - - # Outputs - self.hit = Signal(1) # Denotes if the VMA had a mapped PTE - self.perm_valid = Signal(1) # Denotes if the permissions are correct - self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA - - def search(self, m, read_L1, write_L1): - """ searches the TLB - """ - m.d.comb += [ - write_L1.en.eq(0), - self.cam_L1.write_enable.eq(0), - self.cam_L1.data_in.eq(self.vma) - ] - # Match found in L1 CAM - match_found = Signal(reset_less=True) - m.d.comb += match_found.eq(self.cam_L1.single_match - | self.cam_L1.multiple_match) - with m.If(match_found): - # Memory shortcut variables - mem_address = self.cam_L1.match_address - # Memory Logic - m.d.comb += read_L1.addr.eq(mem_address) - # Permission Validator Logic - m.d.comb += [ - self.hit.eq(1), - # Set permission validator data to the correct - # register file data according to CAM match - # address - self.perm_validator.data.eq(read_L1.data), - # Execute, Read, Write - self.perm_validator.xwr.eq(self.xwr), - # Supervisor Mode - self.perm_validator.super_mode.eq(self.supermode), - # Supverisor Access - self.perm_validator.super_access.eq(self.super_access), - # Address Space IDentifier (ASID) - self.perm_validator.asid.eq(self.asid), - # Output result of permission validation - self.perm_valid.eq(self.perm_validator.valid) - ] - # Only output PTE if permissions are valid - with m.If(self.perm_validator.valid): - # XXX TODO - dummy for now - reg_data = Signal.like(self.pte_out) - m.d.comb += [ - self.pte_out.eq(reg_data) - ] - with m.Else(): - m.d.comb += [ - self.pte_out.eq(0) - ] - # Miss Logic - with m.Else(): - m.d.comb += [ - self.hit.eq(0), - self.perm_valid.eq(0), - self.pte_out.eq(0) - ] - - def write_l1(self, m, read_L1, write_L1): - """ writes to the L1 cache - """ - # Memory_L1 Logic - m.d.comb += [ - write_L1.en.eq(1), - write_L1.addr.eq(self.address_L1), - # The Cat places arguments from LSB -> MSB - write_L1.data.eq(Cat(self.pte_in, self.asid)) - ] - # CAM_L1 Logic - m.d.comb += [ - self.cam_L1.write_enable.eq(1), - self.cam_L1.data_in.eq(self.vma), # data_in is sent to all entries - # self.cam_L1.address_in.eq(todo) # a CAM entry needs to be selected - - ] - - def elaborate(self, platform): - m = Module() - # Add submodules - # Submodules for L1 Cache - m.submodules.cam_L1 = self.cam_L1 - m.submodules.read_L1 = read_L1 = self.mem_L1.read_port() - m.submodules.write_L1 = write_L1 = self.mem_L1.write_port() - - # Permission Validator Submodule - m.submodules.perm_valididator = self.perm_validator - - # When MODE specifies translation - # TODO add in different bit length handling ie prefix 0s - tlb_enable = Signal(reset_less=True) - m.d.comb += tlb_enable.eq(self.mode != 0) - - with m.If(tlb_enable): - m.d.comb += [ - self.cam_L1.enable.eq(1) - ] - with m.Switch(self.command): - # Search - with m.Case("01"): - self.search(m, read_L1, write_L1) - - # Write L1 - # Expected that the miss will be handled in software - with m.Case("10"): - self.write_l1(m, read_L1, write_L1) - - # TODO - # with m.Case("11"): - - # When disabled - with m.Else(): - m.d.comb += [ - self.cam_L1.enable.eq(0), - # XXX TODO - self.reg_file.enable.eq(0), - self.hit.eq(0), - self.perm_valid.eq(0), # XXX TODO, check this - self.pte_out.eq(0) - ] - return m - - -if __name__ == '__main__': - tlb = TLB(15, 36, 64, 4) - main(tlb, ports=[tlb.supermode, tlb.super_access, tlb.command, - tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid, - tlb.vma, tlb.pte_in, - tlb.hit, tlb.perm_valid, tlb.pte_out, - ] + tlb.cam_L1.ports()) diff --git a/src/soc/TLB/__init__.py b/src/soc/TLB/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/soc/TLB/ariane/TreePLRU.cpp b/src/soc/TLB/ariane/TreePLRU.cpp deleted file mode 100644 index 2f6aeea5..00000000 --- a/src/soc/TLB/ariane/TreePLRU.cpp +++ /dev/null @@ -1,211 +0,0 @@ -#include -#include -#include - - -#define NWAY 4 -#define NLINE 256 -#define HIT 0 -#define MISS 1 -#define MS 1000 -/* -Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing -Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt -four-way set associative - three bits - each bit represents one branch point in a binary decision tree; let 1 - represent that the left side has been referenced more recently than the - right side, and 0 vice-versa - are all 4 lines valid? - / \ - yes no, use an invalid line - | - | - | - bit_0 == 0? state | replace ref to | next state - / \ ------+-------- -------+----------- - y n 00x | line_0 line_0 | 11_ - / \ 01x | line_1 line_1 | 10_ - bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1 - / \ / \ 1x1 | line_3 line_3 | 0_0 - y n y n - / \ / \ ('x' means ('_' means unchanged) - line_0 line_1 line_2 line_3 don't care) - 8-way set associative - 7 = 1+2+4 bits -16-way set associative - 15 = 1+2+4+8 bits -32-way set associative - 31 = 1+2+4+8+16 bits -64-way set associative - 63 = 1+2+4+8+16+32 bits -*/ -using namespace std; -struct AddressField { - uint64_t wd_idx : 2;//Unused - uint64_t offset : 4;//Unused - uint64_t index : 8;//NLINE = 256 = 2^8 - uint64_t tag : 50; -}; - -union Address { - uint32_t* p; - AddressField fields; -}; - -struct Cell { - bool v; - uint64_t tag; - - Cell() : v(false), tag(0) {} - - bool isHit(uint64_t tag) { - return v && (tag == this->tag); - } - - void fetch(uint32_t* address) { - Address addr; - addr.p = address; - addr.fields.offset = 0; - addr.fields.wd_idx = 0; - tag = addr.fields.tag; - v = true; - } -}; - -ostream& operator<<(ostream & out, const Cell& cell) { - out << " v:" << cell.v << " tag:" << hex << cell.tag; - return out; -} - -struct Block { - Cell cell[NWAY]; - uint32_t state; - uint64_t *mask;//Mask the state to get accurate value for specified 1 bit. - uint64_t *value; - uint64_t *next_value; - - Block() : state(0) { - switch (NWAY) { - case 4: - mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101}; - value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101}; - next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000}; - break; - case 8: - mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001, - 0b1010001}; - value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000, - 0b1010001}; - next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000, - 0b0000001, 0b0000000}; - break; - //TODO - more NWAY goes here. - default: - std::cout << "Error definition NWAY = " << NWAY << std::endl; - } - } - - uint32_t *getByTag(uint64_t tag, uint32_t *pway) { - for (int i = 0; i < NWAY; ++i) { - if (cell[i].isHit(tag)) { - *pway = i; - return pway; - } - } - return NULL; - } - - void setLRU(uint32_t *address) { - int way = 0; - uint32_t st = state; - for (int i = 0; i < NWAY; ++i) { - if ((state & mask[i]) == value[i]) { - state ^= mask[i]; - way = i; - break; - } - } - cell[way].fetch(address); - cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl; - } - - uint32_t *get(uint32_t *address, uint32_t *pway) { - Address addr; - addr.p = address; - uint32_t *d = getByTag(addr.fields.tag, pway); - if (d != NULL) { - return &d[addr.fields.offset]; - } - return d; - } - - int set(uint32_t *address) { - uint32_t way = 0; - uint32_t *p = get(address, &way); - if (p != NULL) { - printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state); - state &= ~mask[way]; - printf("%X --> ", state); - state |= next_value[way]; - printf("%X\n", state); - // *p = *address; //skip since address is fake. - return HIT; - } else { - setLRU(address); - return MISS; - } - } -}; - -ostream& operator<<(ostream & out, const Block& block) { - out << "state:" << block.state << " "; - for (int i = 0; i cacheline refill) - self.miss_gnt_o = Signal(NR_PORTS) - self.active_serving_o = Signal(NR_PORTS) - - self.critical_word_o = Signal(64) - self.critical_word_valid_o = Signal() - output ariane_axi::req_t axi_data_o, - input ariane_axi::resp_t axi_data_i, - - self.mshr_addr_i = Array(Signal(name="bdata_o", 56) \ - for i in range(NR_PORTS)) - self.mshr_addr_matches_o = Signal(NR_PORTS) - self.mshr_index_matches_o = Signal(NR_PORTS) - - # AMO - self.amo_req_i = AMOReq() - self.amo_resp_o = AMOResp() - # Port to SRAMs, for refill and eviction - self.req_o = Signal(DCACHE_SET_ASSOC) - self.addr_o = Signal(DCACHE_INDEX_WIDTH) # address into cache array - self.data_o = CacheLine() - self.be_o = CLBE() - self.data_i = Array(CacheLine() \ - for i in range(DCACHE_SET_ASSOC)) - self.we_o = Signal() - - def elaborate(self, platform): - # Registers - mshr_t mshr_d, mshr_q; - logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q; - logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q; - # cache line to evict - cache_line_t evict_cl_d, evict_cl_q; - - logic serve_amo_d, serve_amo_q; - # Request from one FSM - miss_req_valid = Signal(self.NR_PORTS) - miss_req_bypass = Signal(self.NR_PORTS) - miss_req_addr = Array(Signal(name="miss_req_addr", 64) \ - for i in range(NR_PORTS)) - miss_req_wdata = Array(Signal(name="miss_req_wdata", 64) \ - for i in range(NR_PORTS)) - miss_req_we = Signal(self.NR_PORTS) - miss_req_be = Array(Signal(name="miss_req_be", 8) \ - for i in range(NR_PORTS)) - miss_req_size = Array(Signal(name="miss_req_size", 2) \ - for i in range(NR_PORTS)) - - # Cache Line Refill <-> AXI - req_fsm_miss_valid = Signal() - req_fsm_miss_addr = Signal(64) - req_fsm_miss_wdata = Signal(DCACHE_LINE_WIDTH) - req_fsm_miss_we = Signal() - req_fsm_miss_be = Signal(DCACHE_LINE_WIDTH//8) - ariane_axi::ad_req_t req_fsm_miss_req; - req_fsm_miss_size = Signal(2) - - gnt_miss_fsm = Signal() - valid_miss_fsm = Signal() - nmiss = DCACHE_LINE_WIDTH//64 - data_miss_fsm = Array(Signal(name="data_miss_fsm", 64) \ - for i in range(nmiss)) - - # Cache Management <-> LFSR - lfsr_enable = Signal() - lfsr_oh = Signal(DCACHE_SET_ASSOC) - lfsr_bin = Signal($clog2(DCACHE_SET_ASSOC-1)) - # AMOs - ariane_pkg::amo_t amo_op; - amo_operand_a = Signal(64) - amo_operand_b = Signal(64) - amo_result_o = Signal(64) - - struct packed { - logic [63:3] address; - logic valid; - } reservation_d, reservation_q; - - # ------------------------------ - # Cache Management - # ------------------------------ - evict_way = Signal(DCACHE_SET_ASSOC) - valid_way = Signal(DCACHE_SET_ASSOC) - - for (i in range(DCACHE_SET_ASSOC): - comb += evict_way[i].eq(data_i[i].valid & data_i[i].dirty) - comb += valid_way[i].eq(data_i[i].valid) - - # ---------------------- - # Default Assignments - # ---------------------- - # to AXI refill - req_fsm_miss_req = ariane_axi::CACHE_LINE_REQ; - req_fsm_miss_size = Const(0b11, 2) - # core - serve_amo_d = serve_amo_q; - # -------------------------------- - # Flush and Miss operation - # -------------------------------- - state_d = state_q; - cnt_d = cnt_q; - evict_way_d = evict_way_q; - evict_cl_d = evict_cl_q; - mshr_d = mshr_q; - # communicate to the requester which unit we are currently serving - active_serving_o[mshr_q.id] = mshr_q.valid; - # AMOs - # silence the unit when not used - amo_op = amo_req_i.amo_op; - - reservation_d = reservation_q; - with m.FSM() as state_q: - - with m.Case("IDLE"): - # lowest priority are AMOs, wait until everything else - # is served before going for the AMOs - with m.If (amo_req_i.req & ~busy_i): - # 1. Flush the cache - with m.If(~serve_amo_q): - m.next = "FLUSH_REQ_STATUS" - serve_amo_d.eq(0b1 - cnt_d.eq(0 - # 2. Do the AMO - with m.Else(): - m.next = "AMO_LOAD" - serve_amo_d.eq(0b0 - - # check if we want to flush and can flush - # e.g.: we are not busy anymore - # TODO: Check that the busy flag is indeed needed - with m.If (flush_i & ~busy_i): - m.next = "FLUSH_REQ_STATUS" - cnt_d = 0 - - # check if one of the state machines missed - for i in range(NR_PORTS): - # here comes the refill portion of code - with m.If (miss_req_valid[i] & ~miss_req_bypass[i]): - m.next = "MISS" - # we are taking another request so don't - # take the AMO - serve_amo_d = 0b0; - # save to MSHR - wid = DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH - comb += [ mshr_d.valid.eq(0b1), - mshr_d.we.eq(miss_req_we[i]), - mshr_d.id.eq(i), - mshr_d.addr.eq(miss_req_addr[i][0:wid]), - mshr_d.wdata.eq(miss_req_wdata[i]), - mshr_d.be.eq(miss_req_be[i]), - ] - break - - # ~> we missed on the cache - with m.Case("MISS"): - # 1. Check if there is an empty cache-line - # 2. If not -> evict one - comb += req_o.eq(1) - sync += addr_o.eq(mshr_q.addr[:DCACHE_INDEX_WIDTH] - m.next = "MISS_REPL" - comb += miss_o.eq(1) - - # ~> second miss cycle - with m.Case("MISS_REPL"): - # if all are valid we need to evict one, - # pseudo random from LFSR - with m.If(~(~valid_way).bool()): - comb += lfsr_enable.eq(0b1) - comb += evict_way_d.eq(lfsr_oh) - # do we need to write back the cache line? - with m.If(data_i[lfsr_bin].dirty): - state_d = WB_CACHELINE_MISS; - comb += evict_cl_d.tag.eq(data_i[lfsr_bin].tag) - comb += evict_cl_d.data.eq(data_i[lfsr_bin].data) - comb += cnt_d.eq(mshr_q.addr[:DCACHE_INDEX_WIDTH]) - # no - we can request a cache line now - with m.Else(): - m.next = "REQ_CACHELINE" - # we have at least one free way - with m.Else(): - # get victim cache-line by looking for the - # first non-valid bit - comb += evict_way_d.eq(get_victim_cl(~valid_way) - m.next = "REQ_CACHELINE" - - # ~> we can just load the cache-line, - # the way is store in evict_way_q - with m.Case("REQ_CACHELINE"): - comb += req_fsm_miss_valid .eq(1) - sync += req_fsm_miss_addr .eq(mshr_q.addr) - - with m.If (gnt_miss_fsm): - m.next = "SAVE_CACHELINE" - comb += miss_gnt_o[mshr_q.id].eq(1) - - # ~> replace the cacheline - with m.Case("SAVE_CACHELINE"): - # calculate cacheline offset - automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset; - sync += cl_offset.eq(mshr_q.addr[3:DCACHE_BYTE_OFFSET] << 6) - # we've got a valid response from refill unit - with m.If (valid_miss_fsm): - wid = DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH - sync += addr_o .eq(mshr_q.addr[:DCACHE_INDEX_WIDTH]) - sync += req_o .eq(evict_way_q) - comb += we_o .eq(1) - comb += be_o .eq(1) - sync += be_o.vldrty .eq(evict_way_q) - sync += data_o.tag .eq(mshr_q.addr[DCACHE_INDEX_WIDTH:wid] - comb += data_o.data .eq(data_miss_fsm) - comb += data_o.valid.eq(1) - comb += data_o.dirty.eq(0) - - # is this a write? - with m.If (mshr_q.we): - # Yes, so safe the updated data now - for i in range(8): - # check if we really want to write - # the corresponding byte - with m.If (mshr_q.be[i]): - sync += data_o.data[(cl_offset + i*8) +: 8].eq(mshr_q.wdata[i]; - # it's immediately dirty if we write - comb += data_o.dirty.eq(1) - - # reset MSHR - comb += mshr_d.valid.eq(0) - # go back to idle - m.next = 'IDLE' - - # ------------------------------ - # Write Back Operation - # ------------------------------ - # ~> evict a cache line from way saved in evict_way_q - with m.Case("WB_CACHELINE_FLUSH"): - with m.Case("WB_CACHELINE_MISS"): - - comb += req_fsm_miss_valid .eq(0b1) - sync += req_fsm_miss_addr .eq({evict_cl_q.tag, cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET], {{DCACHE_BYTE_OFFSET}{0b0}}}; - comb += req_fsm_miss_be .eq(1) - comb += req_fsm_miss_we .eq(0b1) - sync += req_fsm_miss_wdata .eq(evict_cl_q.data; - - # we've got a grant --> this is timing critical, think about it - if (gnt_miss_fsm) begin - # write status array - sync += addr_o .eq(cnt_q) - comb += req_o .eq(0b1) - comb += we_o .eq(0b1) - comb += data_o.valid.eq(INVALIDATE_ON_FLUSH ? 0b0 : 0b1) - # invalidate - sync += be_o.vldrty.eq(evict_way_q) - # go back to handling the miss or flushing, - # depending on where we came from - with m.If(state_q == WB_CACHELINE_MISS): - m.next = "MISS" - with m.Else(): - m.next = "FLUSH_REQ_STATUS" - - # ------------------------------ - # Flushing & Initialization - # ------------------------------ - # ~> make another request to check the same - # cache-line if there are still some valid entries - with m.Case("FLUSH_REQ_STATUS"): - comb += req_o .eq(1) - sync += addr_o .eq(cnt_q) - m.next = "FLUSHING" - - with m.Case("FLUSHING"): - # this has priority - # at least one of the cache lines is dirty - with m.If(~evict_way): - # evict cache line, look for the first - # cache-line which is dirty - comb += evict_way_d.eq(get_victim_cl(evict_way)) - comb += evict_cl_d .eq(data_i[one_hot_to_bin(evict_way)]) - state_d = WB_CACHELINE_FLUSH; - # not dirty ~> increment and continue - with m.Else(): - # increment and re-request - sync += cnt_d.eq(cnt_q + (1 << DCACHE_BYTE_OFFSET)) - m.next = "FLUSH_REQ_STATUS" - sync += addr_o .eq(cnt_q) - comb += req_o .eq(1) - comb += be_o.vldrty.eq(INVALIDATE_ON_FLUSH ? 1 : 0) - comb += we_o .eq(1) - # finished with flushing operation, go back to idle - with m.If (cnt_q[DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH] \ - == DCACHE_NUM_WORDS-1): - # only acknowledge if the flush wasn't - # triggered by an atomic - sync += flush_ack_o.eq(~serve_amo_q) - m.next = "IDLE" - - # ~> only called after reset - with m.Case("INIT"): - # initialize status array - sync += addr_o.eq(cnt_q) - comb += req_o .eq(1) - comb += we_o .eq(1) - # only write the dirty array - comb += be_o.vldrty.eq(1) - sync += cnt_d .eq(cnt_q + (1 << DCACHE_BYTE_OFFSET)) - # finished initialization - with m.If (cnt_q[DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH] \ - == DCACHE_NUM_WORDS-1) - m.next = "IDLE" - - # ---------------------- - # AMOs - # ---------------------- - # TODO(zarubaf) Move this closer to memory - # ~> we are here because we need to do the AMO, - # the cache is clean at this point - # start by executing the load - with m.Case("AMO_LOAD"): - comb += req_fsm_miss_valid.eq(1) - # address is in operand a - comb += req_fsm_miss_addr.eq(amo_req_i.operand_a) - comb += req_fsm_miss_req.eq(ariane_axi::SINGLE_REQ) - comb += req_fsm_miss_size.eq(amo_req_i.size) - # the request has been granted - with m.If(gnt_miss_fsm): - m.next = "AMO_SAVE_LOAD" - # save the load value - with m.Case("AMO_SAVE_LOAD"): - with m.If (valid_miss_fsm): - # we are only concerned about the lower 64-bit - comb += mshr_d.wdata.eq(data_miss_fsm[0]) - m.next = "AMO_STORE" - # and do the store - with m.Case("AMO_STORE"): - load_data = Signal(64) - # re-align load data - comb += load_data.eq(data_align(amo_req_i.operand_a[:3], - mshr_q.wdata)) - # Sign-extend for word operation - with m.If (amo_req_i.size == 0b10): - comb += amo_operand_a.eq(sext32(load_data[:32])) - comb += amo_operand_b.eq(sext32(amo_req_i.operand_b[:32])) - with m.Else(): - comb += amo_operand_a.eq(load_data) - comb += amo_operand_b.eq(amo_req_i.operand_b) - - # we do not need a store request for load reserved - # or a failing store conditional - # we can bail-out without making any further requests - with m.If ((amo_req_i.amo_op == AMO_LR) | \ - ((amo_req_i.amo_op == AMO_SC) & \ - ((reservation_q.valid & \ - (reservation_q.address != \ - amo_req_i.operand_a[3:64])) | \ - ~reservation_q.valid))): - comb += req_fsm_miss_valid.eq(0) - m.next = "IDLE" - comb += amo_resp_o.ack.eq(1) - # write-back the result - comb += amo_resp_o.result.eq(amo_operand_a) - # we know that the SC failed - with m.If (amo_req_i.amo_op == AMO_SC): - comb += amo_resp_o.result.eq(1) - # also clear the reservation - comb += reservation_d.valid.eq(0) - with m.Else(): - comb += req_fsm_miss_valid.eq(1) - - comb += req_fsm_miss_we .eq(1) - comb += req_fsm_miss_req .eq(ariane_axi::SINGLE_REQ) - comb += req_fsm_miss_size.eq(amo_req_i.size) - comb += req_fsm_miss_addr.eq(amo_req_i.operand_a) - - comb += req_fsm_miss_wdata.eq( - data_align(amo_req_i.operand_a[0:3], amo_result_o)) - comb += req_fsm_miss_be.eq( - be_gen(amo_req_i.operand_a[0:3], amo_req_i.size)) - - # place a reservation on the memory - with m.If (amo_req_i.amo_op == AMO_LR): - comb += reservation_d.address.eq(amo_req_i.operand_a[3:64]) - comb += reservation_d.valid.eq(1) - - # the request is valid or we didn't need to go for another store - with m.If (valid_miss_fsm): - m.next = "IDLE" - comb += amo_resp_o.ack.eq(1) - # write-back the result - comb += amo_resp_o.result.eq(amo_operand_a; - - if (amo_req_i.amo_op == AMO_SC) begin - comb += amo_resp_o.result.eq(0) - # An SC must fail if there is another SC - # (to any address) between the LR and the SC in - # program order (even to the same address). - # in any case destroy the reservation - comb += reservation_d.valid.eq(0) - - # check MSHR for aliasing - - comb += mshr_addr_matches_o .eq(0) - comb += mshr_index_matches_o.eq() - - for i in range(NR_PORTS): - # check mshr for potential matching of other units, - # exclude the unit currently being served - with m.If (mshr_q.valid & \ - (mshr_addr_i[i][DCACHE_BYTE_OFFSET:56] == \ - mshr_q.addr[DCACHE_BYTE_OFFSET:56])): - comb += mshr_addr_matches_o[i].eq(1) - - # same as previous, but checking only the index - with m.If (mshr_q.valid & \ - (mshr_addr_i[i][DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH] == \ - mshr_q.addr[DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH])): - mshr_index_matches_o[i].eq(1) - - # -------------------- - # Sequential Process - # -------------------- - - """ - #pragma translate_off - `ifndef VERILATOR - # assert that cache only hits on one way - assert property ( - @(posedge clk_i) $onehot0(evict_way_q)) else $warning("Evict-way should be one-hot encoded"); - `endif - #pragma translate_on - """ - - # ---------------------- - # Bypass Arbiter - # ---------------------- - # Connection Arbiter <-> AXI - req_fsm_bypass_valid = Signal() - req_fsm_bypass_addr = Signal(64) - req_fsm_bypass_wdata = Signal(64) - req_fsm_bypass_we = Signal() - req_fsm_bypass_be = Signal(8) - req_fsm_bypass_size = Signal(2) - gnt_bypass_fsm = Signal() - valid_bypass_fsm = Signal() - data_bypass_fsm = Signal(64) - logic [$clog2(NR_PORTS)-1:0] id_fsm_bypass; - logic [3:0] id_bypass_fsm; - logic [3:0] gnt_id_bypass_fsm; - - i_bypass_arbiter = ib = AXIArbiter( NR_PORTS, 64) - comb += [ - # Master Side - ib.data_req_i .eq( miss_req_valid & miss_req_bypass ), - ib.address_i .eq( miss_req_addr ), - ib.data_wdata_i .eq( miss_req_wdata ), - ib.data_we_i .eq( miss_req_we ), - ib.data_be_i .eq( miss_req_be ), - ib.data_size_i .eq( miss_req_size ), - ib.data_gnt_o .eq( bypass_gnt_o ), - ib.data_rvalid_o .eq( bypass_valid_o ), - ib.data_rdata_o .eq( bypass_data_o ), - # Slave Sid - ib.id_i .eq( id_bypass_fsm[$clog2(NR_PORTS)-1:0] ), - ib.id_o .eq( id_fsm_bypass ), - ib.gnt_id_i .eq( gnt_id_bypass_fsm[$clog2(NR_PORTS)-1:0] ), - ib.address_o .eq( req_fsm_bypass_addr ), - ib.data_wdata_o .eq( req_fsm_bypass_wdata ), - ib.data_req_o .eq( req_fsm_bypass_valid ), - ib.data_we_o .eq( req_fsm_bypass_we ), - ib.data_be_o .eq( req_fsm_bypass_be ), - ib.data_size_o .eq( req_fsm_bypass_size ), - ib.data_gnt_i .eq( gnt_bypass_fsm ), - ib.data_rvalid_i .eq( valid_bypass_fsm ), - ib.data_rdata_i .eq( data_bypass_fsm ), - ] - - axi_adapter #( - .DATA_WIDTH ( 64 ), - .AXI_ID_WIDTH ( 4 ), - .CACHELINE_BYTE_OFFSET ( DCACHE_BYTE_OFFSET ) - ) i_bypass_axi_adapter ( - .clk_i, - .rst_ni, - .req_i ( req_fsm_bypass_valid ), - .type_i ( ariane_axi::SINGLE_REQ ), - .gnt_o ( gnt_bypass_fsm ), - .addr_i ( req_fsm_bypass_addr ), - .we_i ( req_fsm_bypass_we ), - .wdata_i ( req_fsm_bypass_wdata ), - .be_i ( req_fsm_bypass_be ), - .size_i ( req_fsm_bypass_size ), - .id_i ( Cat(id_fsm_bypass, 0, 0) ), - .valid_o ( valid_bypass_fsm ), - .rdata_o ( data_bypass_fsm ), - .gnt_id_o ( gnt_id_bypass_fsm ), - .id_o ( id_bypass_fsm ), - .critical_word_o ( ), # not used for single requests - .critical_word_valid_o ( ), # not used for single requests - .axi_req_o ( axi_bypass_o ), - .axi_resp_i ( axi_bypass_i ) - ); - - # ---------------------- - # Cache Line AXI Refill - # ---------------------- - axi_adapter #( - .DATA_WIDTH ( DCACHE_LINE_WIDTH ), - .AXI_ID_WIDTH ( 4 ), - .CACHELINE_BYTE_OFFSET ( DCACHE_BYTE_OFFSET ) - ) i_miss_axi_adapter ( - .clk_i, - .rst_ni, - .req_i ( req_fsm_miss_valid ), - .type_i ( req_fsm_miss_req ), - .gnt_o ( gnt_miss_fsm ), - .addr_i ( req_fsm_miss_addr ), - .we_i ( req_fsm_miss_we ), - .wdata_i ( req_fsm_miss_wdata ), - .be_i ( req_fsm_miss_be ), - .size_i ( req_fsm_miss_size ), - .id_i ( Const(0b1100, 4) ), - .gnt_id_o ( ), # open - .valid_o ( valid_miss_fsm ), - .rdata_o ( data_miss_fsm ), - .id_o ( ), - .critical_word_o, - .critical_word_valid_o, - .axi_req_o ( axi_data_o ), - .axi_resp_i ( axi_data_i ) - ); - - # ----------------- - # Replacement LFSR - # ----------------- - lfsr_8bit #(.WIDTH (DCACHE_SET_ASSOC)) i_lfsr ( - .en_i ( lfsr_enable ), - .refill_way_oh ( lfsr_oh ), - .refill_way_bin ( lfsr_bin ), - .* - ); - - # ----------------- - # AMO ALU - # ----------------- - amo_alu i_amo_alu ( - .amo_op_i ( amo_op ), - .amo_operand_a_i ( amo_operand_a ), - .amo_operand_b_i ( amo_operand_b ), - .amo_result_o ( amo_result_o ) - ); - - # ----------------- - # Struct Split - # ----------------- - - for i in range(NR_PORTS): - miss_req = MissReq() - comb += miss_req.eq(miss_req_i[i]); - comb += miss_req_valid [i] .eq(miss_req.valid) - comb += miss_req_bypass [i] .eq(miss_req.bypass) - comb += miss_req_addr [i] .eq(miss_req.addr) - comb += miss_req_wdata [i] .eq(miss_req.wdata) - comb += miss_req_we [i] .eq(miss_req.we) - comb += miss_req_be [i] .eq(miss_req.be) - comb += miss_req_size [i] .eq(miss_req.size) - - # -------------- - # AXI Arbiter - # --------------s - # - # Description: Arbitrates access to AXI refill/bypass - # -class AXIArbiter: - def __init__(self, NR_PORTS = 3, DATA_WIDTH = 64): - self.NR_PORTS = NR_PORTS - self.DATA_WIDTH = DATA_WIDTH - self.pwid = pwid = ceil(log(NR_PORTS) / log(2)) - rst_ni = ResetSignal() # Asynchronous reset active low - # master ports - self.data_req_i = Signal(NR_PORTS) - self.address_i = Array(Signal(name="address_i", 64) \ - for i in range(NR_PORTS)) - self.data_wdata_i = Array(Signal(name="data_wdata_i", 64) \ - for i in range(NR_PORTS)) - self.data_we_i = Signal(NR_PORTS) - self.data_be_i = Array(Signal(name="data_wdata_i", DATA_WIDTH/8) \ - for i in range(NR_PORTS)) - self.data_size_i = Array(Signal(name="data_size_i", 2) \ - for i in range(NR_PORTS)) - self.data_gnt_o = Signal(NR_PORTS) - self.data_rvalid_o = Signal(NR_PORTS) - self.data_rdata_o = Array(Signal(name="data_rdata_o", 64) \ - for i in range(NR_PORTS)) - - # slave port - self.id_i = Signal(pwid) - self.id_o = Signal(pwid) - self.gnt_id_i = Signal(pwid) - self.data_req_o = Signal() - self.address_o = Signal(64) - self.data_wdata_o = Signal(DATA_WIDTH) - self.data_we_o = Signal() - self.data_be_o = Signal(DATA_WIDTH/8) - self.data_size_o = Signal(2) - self.data_gnt_i = Signal() - self.data_rvalid_i = Signal() - self.data_rdata_i = Signal(DATA_WIDTH) - - def elaborate(self, platform): - #enum logic [1:0] { IDLE, REQ, SERVING } state_d, state_q; - - class Packet: - def __init__(self, pwid, DATA_WIDTH): - self.id = Signal(pwid) - self.address = Signal(64) - self.data = Signal(64) - self.size = Signal(2) - self.be = Signal(DATA_WIDTH/8) - self.we = Signal() - - request_index = Signal(self.pwid) - req_q = Packet(self.pwid, self.DATA_WIDTH) - req_d = Packet(self.pwid, self.DATA_WIDTH) - - # request register - sync += req_q.eq(req_d) - - # request port - comb += self.address_o .eq(req_q.address) - comb += self.data_wdata_o .eq(req_q.data) - comb += self.data_be_o .eq(req_q.be) - comb += self.data_size_o .eq(req_q.size) - comb += self.data_we_o .eq(req_q.we) - comb += self.id_o .eq(req_q.id) - comb += self.data_gnt_o .eq(0) - # read port - comb += self.data_rvalid_o .eq(0) - comb += self.data_rdata_o .eq(0) - comb += self.data_rdata_o[req_q.id].eq(data_rdata_i) - - m.submodules.pp = pp = PriorityEncoder(self.NR_PORTS) - comb += pp.i.eq(self.data_req_i) # select one request (priority-based) - comb += request_index.eq(pp.o) - - with m.Switch("state") as s: - - with m.Case("IDLE"): - # wait for incoming requests (priority encoder data_req_i) - with m.If(~pp.n): # one output valid from encoder - comb += self.data_req_o .eq(self.data_req_i[i]) - comb += self.data_gnt_o[i].eq(self.data_req_i[i]) - # save the request - comb += req_d.address.eq(self.address_i[i]) - comb += req_d.id.eq(request_index) - comb += req_d.data.eq(self.data_wdata_i[i]) - comb += req_d.size.eq(self.data_size_i[i]) - comb += req_d.be.eq(self.data_be_i[i]) - comb += req_d.we.eq(self.data_we_i[i]) - m.next = "SERVING" - - comb += self.address_o .eq(self.address_i[request_index]) - comb += self.data_wdata_o .eq(self.data_wdata_i[request_index]) - comb += self.data_be_o .eq(self.data_be_i[request_index]) - comb += self.data_size_o .eq(self.data_size_i[request_index]) - comb += self.data_we_o .eq(self.data_we_i[request_index]) - comb += self.id_o .eq(request_index) - - with m.Case("SERVING"): - comb += self.data_req_o.eq(1) - with m.If (self.data_rvalid_i): - comb += self.data_rvalid_o[req_q.id].eq(1) - m.next = "IDLE" - - # ------------ - # Assertions - # ------------ - - """ -#pragma translate_off -`ifndef VERILATOR -# make sure that we eventually get an rvalid after we received a grant -assert property (@(posedge clk_i) data_gnt_i |-> ##[1:$] data_rvalid_i ) - else begin $error("There was a grant without a rvalid"); $stop(); end -# assert that there is no grant without a request -assert property (@(negedge clk_i) data_gnt_i |-> data_req_o) - else begin $error("There was a grant without a request."); $stop(); end -# assert that the address does not contain X when request is sent -assert property ( @(posedge clk_i) (data_req_o) |-> (!$isunknown(address_o)) ) - else begin $error("address contains X when request is set"); $stop(); end - -`endif -#pragma translate_on - """ - diff --git a/src/soc/TLB/ariane/mmu.py b/src/soc/TLB/ariane/mmu.py deleted file mode 100644 index a14862cd..00000000 --- a/src/soc/TLB/ariane/mmu.py +++ /dev/null @@ -1,474 +0,0 @@ -""" -# Copyright 2018 ETH Zurich and University of Bologna. -# Copyright and related rights are licensed under the Solderpad Hardware -# License, Version 0.51 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# or agreed to in writing, software, hardware and materials distributed under -# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. -# -# Author: Florian Zaruba, ETH Zurich -# Date: 19/04/2017 -# Description: Memory Management Unit for Ariane, contains TLB and -# address translation unit. SV48 as defined in -# Volume II: RISC-V Privileged Architectures V1.10 Page 63 - -import ariane_pkg::*; -""" - -from nmigen import Const, Signal, Cat, Module, Mux -from nmigen.cli import verilog, rtlil - -from ptw import DCacheReqI, DCacheReqO, TLBUpdate, PTE, PTW -from tlb import TLB -from exceptcause import (INSTR_ACCESS_FAULT, INSTR_PAGE_FAULT, - LOAD_PAGE_FAULT, STORE_PAGE_FAULT) - -PRIV_LVL_M = Const(0b11, 2) -PRIV_LVL_S = Const(0b01, 2) -PRIV_LVL_U = Const(0b00, 2) - - -class RVException: - def __init__(self): - self.cause = Signal(64) # cause of exception - self.tval = Signal(64) # more info of causing exception - # (e.g.: instruction causing it), - # address of LD/ST fault - self.valid = Signal() - - def eq(self, inp): - res = [] - for (o, i) in zip(self.ports(), inp.ports()): - res.append(o.eq(i)) - return res - - def __iter__(self): - yield self.cause - yield self.tval - yield self.valid - - def ports(self): - return list(self) - - -class ICacheReqI: - def __init__(self): - self.fetch_valid = Signal() # address translation valid - self.fetch_paddr = Signal(64) # physical address in - self.fetch_exception = RVException() # exception occurred during fetch - - def __iter__(self): - yield self.fetch_valid - yield self.fetch_paddr - yield from self.fetch_exception - - def ports(self): - return list(self) - - -class ICacheReqO: - def __init__(self): - self.fetch_req = Signal() # address translation request - self.fetch_vaddr = Signal(64) # virtual address out - - def __iter__(self): - yield self.fetch_req - yield self.fetch_vaddr - - def ports(self): - return list(self) - - -class MMU: - def __init__(self, instr_tlb_entries = 4, - data_tlb_entries = 4, - asid_width = 1): - self.instr_tlb_entries = instr_tlb_entries - self.data_tlb_entries = data_tlb_entries - self.asid_width = asid_width - - self.flush_i = Signal() - self.enable_translation_i = Signal() - self.en_ld_st_translation_i = Signal() # enable VM translation for LD/ST - # IF interface - self.icache_areq_i = ICacheReqO() - self.icache_areq_o = ICacheReqI() - # LSU interface - # this is a more minimalistic interface because the actual addressing - # logic is handled in the LSU as we distinguish load and stores, - # what we do here is simple address translation - self.misaligned_ex_i = RVException() - self.lsu_req_i = Signal() # request address translation - self.lsu_vaddr_i = Signal(64) # virtual address in - self.lsu_is_store_i = Signal() # the translation is requested by a store - # if we need to walk the page table we can't grant in the same cycle - - # Cycle 0 - self.lsu_dtlb_hit_o = Signal() # sent in the same cycle as the request - # if translation hits in the DTLB - # Cycle 1 - self.lsu_valid_o = Signal() # translation is valid - self.lsu_paddr_o = Signal(64) # translated address - self.lsu_exception_o = RVException() # addr translate threw exception - - # General control signals - self.priv_lvl_i = Signal(2) - self.ld_st_priv_lvl_i = Signal(2) - self.sum_i = Signal() - self.mxr_i = Signal() - # input logic flag_mprv_i, - self.satp_ppn_i = Signal(44) - self.asid_i = Signal(self.asid_width) - self.flush_tlb_i = Signal() - # Performance counters - self.itlb_miss_o = Signal() - self.dtlb_miss_o = Signal() - # PTW memory interface - self.req_port_i = DCacheReqO() - self.req_port_o = DCacheReqI() - - def elaborate(self, platform): - m = Module() - - iaccess_err = Signal() # insufficient priv to access instr page - daccess_err = Signal() # insufficient priv to access data page - ptw_active = Signal() # PTW is currently walking a page table - walking_instr = Signal() # PTW is walking because of an ITLB miss - ptw_error = Signal() # PTW threw an exception - - update_vaddr = Signal(48) # guessed - uaddr64 = Cat(update_vaddr, Const(0, 25)) # extend to 64bit with zeros - update_ptw_itlb = TLBUpdate(self.asid_width) - update_ptw_dtlb = TLBUpdate(self.asid_width) - - itlb_lu_access = Signal() - itlb_content = PTE() - itlb_is_2M = Signal() - itlb_is_1G = Signal() - itlb_is_512G = Signal() - itlb_lu_hit = Signal() - - dtlb_lu_access = Signal() - dtlb_content = PTE() - dtlb_is_2M = Signal() - dtlb_is_1G = Signal() - dtlb_is_512G = Signal() - dtlb_lu_hit = Signal() - - # Assignments - m.d.comb += [itlb_lu_access.eq(self.icache_areq_i.fetch_req), - dtlb_lu_access.eq(self.lsu_req_i) - ] - - # ITLB - m.submodules.i_tlb = i_tlb = TLB(self.instr_tlb_entries, - self.asid_width) - m.d.comb += [i_tlb.flush_i.eq(self.flush_tlb_i), - i_tlb.update_i.eq(update_ptw_itlb), - i_tlb.lu_access_i.eq(itlb_lu_access), - i_tlb.lu_asid_i.eq(self.asid_i), - i_tlb.lu_vaddr_i.eq(self.icache_areq_i.fetch_vaddr), - itlb_content.eq(i_tlb.lu_content_o), - itlb_is_2M.eq(i_tlb.lu_is_2M_o), - itlb_is_1G.eq(i_tlb.lu_is_1G_o), - itlb_is_512G.eq(i_tlb.lu_is_512G_o), - itlb_lu_hit.eq(i_tlb.lu_hit_o), - ] - - # DTLB - m.submodules.d_tlb = d_tlb = TLB(self.data_tlb_entries, - self.asid_width) - m.d.comb += [d_tlb.flush_i.eq(self.flush_tlb_i), - d_tlb.update_i.eq(update_ptw_dtlb), - d_tlb.lu_access_i.eq(dtlb_lu_access), - d_tlb.lu_asid_i.eq(self.asid_i), - d_tlb.lu_vaddr_i.eq(self.lsu_vaddr_i), - dtlb_content.eq(d_tlb.lu_content_o), - dtlb_is_2M.eq(d_tlb.lu_is_2M_o), - dtlb_is_1G.eq(d_tlb.lu_is_1G_o), - dtlb_is_512G.eq(d_tlb.lu_is_512G_o), - dtlb_lu_hit.eq(d_tlb.lu_hit_o), - ] - - # PTW - m.submodules.ptw = ptw = PTW(self.asid_width) - m.d.comb += [ptw_active.eq(ptw.ptw_active_o), - walking_instr.eq(ptw.walking_instr_o), - ptw_error.eq(ptw.ptw_error_o), - ptw.enable_translation_i.eq(self.enable_translation_i), - - update_vaddr.eq(ptw.update_vaddr_o), - update_ptw_itlb.eq(ptw.itlb_update_o), - update_ptw_dtlb.eq(ptw.dtlb_update_o), - - ptw.itlb_access_i.eq(itlb_lu_access), - ptw.itlb_hit_i.eq(itlb_lu_hit), - ptw.itlb_vaddr_i.eq(self.icache_areq_i.fetch_vaddr), - - ptw.dtlb_access_i.eq(dtlb_lu_access), - ptw.dtlb_hit_i.eq(dtlb_lu_hit), - ptw.dtlb_vaddr_i.eq(self.lsu_vaddr_i), - - ptw.req_port_i.eq(self.req_port_i), - self.req_port_o.eq(ptw.req_port_o), - ] - - # ila_1 i_ila_1 ( - # .clk(clk_i), # input wire clk - # .probe0({req_port_o.address_tag, req_port_o.address_index}), - # .probe1(req_port_o.data_req), # input wire [63:0] probe1 - # .probe2(req_port_i.data_gnt), # input wire [0:0] probe2 - # .probe3(req_port_i.data_rdata), # input wire [0:0] probe3 - # .probe4(req_port_i.data_rvalid), # input wire [0:0] probe4 - # .probe5(ptw_error), # input wire [1:0] probe5 - # .probe6(update_vaddr), # input wire [0:0] probe6 - # .probe7(update_ptw_itlb.valid), # input wire [0:0] probe7 - # .probe8(update_ptw_dtlb.valid), # input wire [0:0] probe8 - # .probe9(dtlb_lu_access), # input wire [0:0] probe9 - # .probe10(lsu_vaddr_i), # input wire [0:0] probe10 - # .probe11(dtlb_lu_hit), # input wire [0:0] probe11 - # .probe12(itlb_lu_access), # input wire [0:0] probe12 - # .probe13(icache_areq_i.fetch_vaddr), # input wire [0:0] probe13 - # .probe14(itlb_lu_hit) # input wire [0:0] probe13 - # ); - - #----------------------- - # Instruction Interface - #----------------------- - # The instruction interface is a simple request response interface - - # MMU disabled: just pass through - m.d.comb += [self.icache_areq_o.fetch_valid.eq( - self.icache_areq_i.fetch_req), - # play through in case we disabled address translation - self.icache_areq_o.fetch_paddr.eq( - self.icache_areq_i.fetch_vaddr) - ] - # two potential exception sources: - # 1. HPTW threw an exception -> signal with a page fault exception - # 2. We got an access error because of insufficient permissions -> - # throw an access exception - m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0) - # Check whether we are allowed to access this memory region - # from a fetch perspective - - # PLATEN TODO: use PermissionValidator instead [we like modules] - m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \ - (((self.priv_lvl_i == PRIV_LVL_U) & \ - ~itlb_content.u) | \ - ((self.priv_lvl_i == PRIV_LVL_S) & \ - itlb_content.u))) - - # MMU enabled: address from TLB, request delayed until hit. - # Error when TLB hit and no access right or TLB hit and - # translated address not valid (e.g. AXI decode error), - # or when PTW performs walk due to ITLB miss and raises - # an error. - with m.If (self.enable_translation_i): - # we work with SV48, so if VM is enabled, check that - # all bits [47:38] are equal - with m.If (self.icache_areq_i.fetch_req & \ - ~(((~self.icache_areq_i.fetch_vaddr[47:64]) == 0) | \ - (self.icache_areq_i.fetch_vaddr[47:64]) == 0)): - fe = self.icache_areq_o.fetch_exception - m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT), - fe.tval.eq(self.icache_areq_i.fetch_vaddr), - fe.valid.eq(1) - ] - - m.d.comb += self.icache_areq_o.fetch_valid.eq(0) - - # 4K page - paddr = Signal.like(self.icache_areq_o.fetch_paddr) - paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12], - itlb_content.ppn) - m.d.comb += paddr.eq(paddr4k) - # Mega page - with m.If(itlb_is_2M): - m.d.comb += paddr[12:21].eq( - self.icache_areq_i.fetch_vaddr[12:21]) - # Giga page - with m.If(itlb_is_1G): - m.d.comb += paddr[12:30].eq( - self.icache_areq_i.fetch_vaddr[12:30]) - m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr) - # Tera page - with m.If(itlb_is_512G): - m.d.comb += paddr[12:39].eq( - self.icache_areq_i.fetch_vaddr[12:39]) - m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr) - - # --------- - # ITLB Hit - # -------- - # if we hit the ITLB output the request signal immediately - with m.If(itlb_lu_hit): - m.d.comb += self.icache_areq_o.fetch_valid.eq( - self.icache_areq_i.fetch_req) - # we got an access error - with m.If (iaccess_err): - # throw a page fault - fe = self.icache_areq_o.fetch_exception - m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT), - fe.tval.eq(self.icache_areq_i.fetch_vaddr), - fe.valid.eq(1) - ] - # --------- - # ITLB Miss - # --------- - # watch out for exceptions happening during walking the page table - with m.Elif(ptw_active & walking_instr): - m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error) - fe = self.icache_areq_o.fetch_exception - m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT), - fe.tval.eq(uaddr64), - fe.valid.eq(1) - ] - - #----------------------- - # Data Interface - #----------------------- - - lsu_vaddr = Signal(64) - dtlb_pte = PTE() - misaligned_ex = RVException() - lsu_req = Signal() - lsu_is_store = Signal() - dtlb_hit = Signal() - #dtlb_is_2M = Signal() - #dtlb_is_1G = Signal() - #dtlb_is_512 = Signal() - - # check if we need to do translation or if we are always - # ready (e.g.: we are not translating anything) - m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i, - dtlb_lu_hit, 1)) - - # The data interface is simpler and only consists of a - # request/response interface - m.d.comb += [ - # save request and DTLB response - lsu_vaddr.eq(self.lsu_vaddr_i), - lsu_req.eq(self.lsu_req_i), - misaligned_ex.eq(self.misaligned_ex_i), - dtlb_pte.eq(dtlb_content), - dtlb_hit.eq(dtlb_lu_hit), - lsu_is_store.eq(self.lsu_is_store_i), - #dtlb_is_2M.eq(dtlb_is_2M), - #dtlb_is_1G.eq(dtlb_is_1G), - ##dtlb_is_512.eq(self.dtlb_is_512G) #???? - ] - m.d.sync += [ - self.lsu_paddr_o.eq(lsu_vaddr), - self.lsu_valid_o.eq(lsu_req), - self.lsu_exception_o.eq(misaligned_ex), - ] - - sverr = Signal() - usrerr = Signal() - - m.d.comb += [ - # mute misaligned exceptions if there is no request - # otherwise they will throw accidental exceptions - misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i), - - # SUM is not set and we are trying to access a user - # page in supervisor mode - sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \ - dtlb_pte.u), - # this is not a user page but we are in user mode and - # trying to access it - usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u), - - # Check if the User flag is set, then we may only - # access it in supervisor mode if SUM is enabled - daccess_err.eq(sverr | usrerr), - ] - - # translation is enabled and no misaligned exception occurred - with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid): - m.d.comb += lsu_req.eq(0) - # 4K page - paddr = Signal.like(lsu_vaddr) - paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn) - m.d.comb += paddr.eq(paddr4k) - # Mega page - with m.If(dtlb_is_2M): - m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21]) - # Giga page - with m.If(dtlb_is_1G): - m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30]) - m.d.sync += self.lsu_paddr_o.eq(paddr) - # TODO platen tera_page - - # --------- - # DTLB Hit - # -------- - with m.If(dtlb_hit & lsu_req): - m.d.comb += lsu_req.eq(1) - # this is a store - with m.If (lsu_is_store): - # check if the page is write-able and - # we are not violating privileges - # also check if the dirty flag is set - with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d): - le = self.lsu_exception_o - m.d.sync += [le.cause.eq(STORE_PAGE_FAULT), - le.tval.eq(lsu_vaddr), - le.valid.eq(1) - ] - - # this is a load, check for sufficient access - # privileges - throw a page fault if necessary - with m.Elif(daccess_err): - le = self.lsu_exception_o - m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT), - le.tval.eq(lsu_vaddr), - le.valid.eq(1) - ] - # --------- - # DTLB Miss - # --------- - # watch out for exceptions - with m.Elif (ptw_active & ~walking_instr): - # page table walker threw an exception - with m.If (ptw_error): - # an error makes the translation valid - m.d.comb += lsu_req.eq(1) - # the page table walker can only throw page faults - with m.If (lsu_is_store): - le = self.lsu_exception_o - m.d.sync += [le.cause.eq(STORE_PAGE_FAULT), - le.tval.eq(uaddr64), - le.valid.eq(1) - ] - with m.Else(): - m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT), - le.tval.eq(uaddr64), - le.valid.eq(1) - ] - - return m - - def ports(self): - return [self.flush_i, self.enable_translation_i, - self.en_ld_st_translation_i, - self.lsu_req_i, - self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o, - self.lsu_valid_o, self.lsu_paddr_o, - self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i, - self.satp_ppn_i, self.asid_i, self.flush_tlb_i, - self.itlb_miss_o, self.dtlb_miss_o] + \ - self.icache_areq_i.ports() + self.icache_areq_o.ports() + \ - self.req_port_i.ports() + self.req_port_o.ports() + \ - self.misaligned_ex_i.ports() + self.lsu_exception_o.ports() - -if __name__ == '__main__': - mmu = MMU() - vl = rtlil.convert(mmu, ports=mmu.ports()) - with open("test_mmu.il", "w") as f: - f.write(vl) - diff --git a/src/soc/TLB/ariane/p_lru.txt b/src/soc/TLB/ariane/p_lru.txt deleted file mode 100644 index 4bac7680..00000000 --- a/src/soc/TLB/ariane/p_lru.txt +++ /dev/null @@ -1,51 +0,0 @@ -pseudo-LRU - -two-way set associative - one bit - - indicates which line of the two has been reference more recently - - -four-way set associative - three bits - - each bit represents one branch point in a binary decision tree; let 1 - represent that the left side has been referenced more recently than the - right side, and 0 vice-versa - - are all 4 lines valid? - / \ - yes no, use an invalid line - | - | - | - bit_0 == 0? state | replace ref to | next state - / \ ------+-------- -------+----------- - y n 00x | line_0 line_0 | 11_ - / \ 01x | line_1 line_1 | 10_ - bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1 - / \ / \ 1x1 | line_3 line_3 | 0_0 - y n y n - / \ / \ ('x' means ('_' means unchanged) - line_0 line_1 line_2 line_3 don't care) - - (see Figure 3-7, p. 3-18, in Intel Embedded Pentium Processor Family Dev. - Manual, 1998, http://www.intel.com/design/intarch/manuals/273204.htm) - - -note that there is a 6-bit encoding for true LRU for four-way set associative - - bit 0: bank[1] more recently used than bank[0] - bit 1: bank[2] more recently used than bank[0] - bit 2: bank[2] more recently used than bank[1] - bit 3: bank[3] more recently used than bank[0] - bit 4: bank[3] more recently used than bank[1] - bit 5: bank[3] more recently used than bank[2] - - this results in 24 valid bit patterns within the 64 possible bit patterns - (4! possible valid traces for bank references) - - e.g., a trace of 0 1 2 3, where 0 is LRU and 3 is MRU, is encoded as 111111 - - you can implement a state machine with a 256x6 ROM (6-bit state encoding - appended with a 2-bit bank reference input will yield a new 6-bit state), - and you can implement an LRU bank indicator with a 64x2 ROM - diff --git a/src/soc/TLB/ariane/plru.py b/src/soc/TLB/ariane/plru.py deleted file mode 100644 index a8db5c27..00000000 --- a/src/soc/TLB/ariane/plru.py +++ /dev/null @@ -1,105 +0,0 @@ -from nmigen import Signal, Module, Cat, Const -from nmigen.hdl.ir import Elaboratable -from math import log2 - - -class PLRU(Elaboratable): - """ PLRU - Pseudo Least Recently Used Replacement - - PLRU-tree indexing: - lvl0 0 - / \ - / \ - lvl1 1 2 - / \ / \ - lvl2 3 4 5 6 - / \ /\/\ /\ - ... ... ... ... - """ - def __init__(self, entries): - self.entries = entries - self.lu_hit = Signal(entries) - self.replace_en_o = Signal(entries) - self.lu_access_i = Signal() - # Tree (bit per entry) - self.TLBSZ = 2*(self.entries-1) - self.plru_tree = Signal(self.TLBSZ) - self.plru_tree_o = Signal(self.TLBSZ) - - def elaborate(self, platform=None): - m = Module() - - # Just predefine which nodes will be set/cleared - # E.g. for a TLB with 8 entries, the for-loop is semantically - # equivalent to the following pseudo-code: - # unique case (1'b1) - # lu_hit[7]: plru_tree[0, 2, 6] = {1, 1, 1}; - # lu_hit[6]: plru_tree[0, 2, 6] = {1, 1, 0}; - # lu_hit[5]: plru_tree[0, 2, 5] = {1, 0, 1}; - # lu_hit[4]: plru_tree[0, 2, 5] = {1, 0, 0}; - # lu_hit[3]: plru_tree[0, 1, 4] = {0, 1, 1}; - # lu_hit[2]: plru_tree[0, 1, 4] = {0, 1, 0}; - # lu_hit[1]: plru_tree[0, 1, 3] = {0, 0, 1}; - # lu_hit[0]: plru_tree[0, 1, 3] = {0, 0, 0}; - # default: begin /* No hit */ end - # endcase - LOG_TLB = int(log2(self.entries)) - print(LOG_TLB) - for i in range(self.entries): - # we got a hit so update the pointer as it was least recently used - hit = Signal(reset_less=True) - m.d.comb += hit.eq(self.lu_hit[i] & self.lu_access_i) - with m.If(hit): - # Set the nodes to the values we would expect - for lvl in range(LOG_TLB): - idx_base = (1< MSB, lvl1 <=> MSB-1, ... - shift = LOG_TLB - lvl; - new_idx = Const(~((i >> (shift-1)) & 1), (1, False)) - plru_idx = idx_base + (i >> shift) - print ("plru", i, lvl, hex(idx_base), - plru_idx, shift, new_idx) - m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx) - - # Decode tree to write enable signals - # Next for-loop basically creates the following logic for e.g. - # an 8 entry TLB (note: pseudo-code obviously): - # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1} - # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0} - # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1} - # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0} - # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1} - # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0} - # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1} - # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0} - # For each entry traverse the tree. If every tree-node matches - # the corresponding bit of the entry's index, this is - # the next entry to replace. - replace = [] - for i in range(self.entries): - en = [] - for lvl in range(LOG_TLB): - idx_base = (1< MSB, lvl1 <=> MSB-1, ... - shift = LOG_TLB - lvl; - new_idx = (i >> (shift-1)) & 1; - plru_idx = idx_base + (i>>shift) - plru = Signal(reset_less=True, - name="plru-%d-%d-%d" % (i, lvl, plru_idx)) - m.d.comb += plru.eq(self.plru_tree[plru_idx]) - # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx; - if new_idx: - en.append(~plru) # yes inverted (using bool()) - else: - en.append(plru) # yes inverted (using bool()) - print ("plru", i, en) - # boolean logic manipulation: - # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2) - replace.append(~Cat(*en).bool()) - m.d.comb += self.replace_en_o.eq(Cat(*replace)) - - return m - - def ports(self): - return [self.entries, self.lu_hit, self.replace_en_o, - self.lu_access_i, self.plru_tree, self.plru_tree_o] diff --git a/src/soc/TLB/ariane/ptw.py b/src/soc/TLB/ariane/ptw.py deleted file mode 100644 index 4046c711..00000000 --- a/src/soc/TLB/ariane/ptw.py +++ /dev/null @@ -1,556 +0,0 @@ -""" -# Copyright 2018 ETH Zurich and University of Bologna. -# Copyright and related rights are licensed under the Solderpad Hardware -# License, Version 0.51 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# or agreed to in writing, software, hardware and materials distributed under -# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. -# -# Author: David Schaffenrath, TU Graz -# Author: Florian Zaruba, ETH Zurich -# Date: 24.4.2017 -# Description: Hardware-PTW - -/* verilator lint_off WIDTH */ -import ariane_pkg::*; - -see linux kernel source: - -* "arch/riscv/include/asm/page.h" -* "arch/riscv/include/asm/mmu_context.h" -* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET) - -""" - -from nmigen import Const, Signal, Cat, Module, Elaboratable -from nmigen.hdl.ast import ArrayProxy -from nmigen.cli import verilog, rtlil -from math import log2 - - -DCACHE_SET_ASSOC = 8 -CONFIG_L1D_SIZE = 32*1024 -DCACHE_INDEX_WIDTH = int(log2(CONFIG_L1D_SIZE / DCACHE_SET_ASSOC)) -DCACHE_TAG_WIDTH = 56 - DCACHE_INDEX_WIDTH - -ASID_WIDTH = 8 - - -class DCacheReqI: - def __init__(self): - self.address_index = Signal(DCACHE_INDEX_WIDTH) - self.address_tag = Signal(DCACHE_TAG_WIDTH) - self.data_wdata = Signal(64) - self.data_req = Signal() - self.data_we = Signal() - self.data_be = Signal(8) - self.data_size = Signal(2) - self.kill_req = Signal() - self.tag_valid = Signal() - - def eq(self, inp): - res = [] - for (o, i) in zip(self.ports(), inp.ports()): - res.append(o.eq(i)) - return res - - def ports(self): - return [self.address_index, self.address_tag, - self.data_wdata, self.data_req, - self.data_we, self.data_be, self.data_size, - self.kill_req, self.tag_valid, - ] - -class DCacheReqO: - def __init__(self): - self.data_gnt = Signal() - self.data_rvalid = Signal() - self.data_rdata = Signal(64) # actually in PTE object format - - def eq(self, inp): - res = [] - for (o, i) in zip(self.ports(), inp.ports()): - res.append(o.eq(i)) - return res - - def ports(self): - return [self.data_gnt, self.data_rvalid, self.data_rdata] - - -class PTE: #(RecordObject): - def __init__(self): - self.v = Signal() - self.r = Signal() - self.w = Signal() - self.x = Signal() - self.u = Signal() - self.g = Signal() - self.a = Signal() - self.d = Signal() - self.rsw = Signal(2) - self.ppn = Signal(44) - self.reserved = Signal(10) - - def flatten(self): - return Cat(*self.ports()) - - def eq(self, x): - if isinstance(x, ArrayProxy): - res = [] - for o in self.ports(): - i = getattr(x, o.name) - res.append(i) - x = Cat(*res) - else: - x = x.flatten() - return self.flatten().eq(x) - - def __iter__(self): - """ order is critical so that flatten creates LSB to MSB - """ - yield self.v - yield self.r - yield self.w - yield self.x - yield self.u - yield self.g - yield self.a - yield self.d - yield self.rsw - yield self.ppn - yield self.reserved - - def ports(self): - return list(self) - - -class TLBUpdate: - def __init__(self, asid_width): - self.valid = Signal() # valid flag - self.is_2M = Signal() - self.is_1G = Signal() - self.is_512G = Signal() - self.vpn = Signal(36) - self.asid = Signal(asid_width) - self.content = PTE() - - def flatten(self): - return Cat(*self.ports()) - - def eq(self, x): - return self.flatten().eq(x.flatten()) - - def ports(self): - return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \ - self.content.ports() - - -# SV48 defines four levels of page tables -LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1 -LVL2 = Const(1, 2) -LVL3 = Const(2, 2) -LVL4 = Const(3, 2) - - -class PTW(Elaboratable): - def __init__(self, asid_width=8): - self.asid_width = asid_width - - self.flush_i = Signal() # flush everything, we need to do this because - # actually everything we do is speculative at this stage - # e.g.: there could be a CSR instruction that changes everything - self.ptw_active_o = Signal(reset=1) # active if not IDLE - self.walking_instr_o = Signal() # set when walking for TLB - self.ptw_error_o = Signal() # set when an error occurred - self.enable_translation_i = Signal() # CSRs indicate to enable SV48 - self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st - - self.lsu_is_store_i = Signal() # translation triggered by store - # PTW memory interface - self.req_port_i = DCacheReqO() - self.req_port_o = DCacheReqI() - - # to TLBs, update logic - self.itlb_update_o = TLBUpdate(asid_width) - self.dtlb_update_o = TLBUpdate(asid_width) - - self.update_vaddr_o = Signal(48) - - self.asid_i = Signal(self.asid_width) - # from TLBs - # did we miss? - self.itlb_access_i = Signal() - self.itlb_hit_i = Signal() - self.itlb_vaddr_i = Signal(64) - - self.dtlb_access_i = Signal() - self.dtlb_hit_i = Signal() - self.dtlb_vaddr_i = Signal(64) - # from CSR file - self.satp_ppn_i = Signal(44) # ppn from satp - self.mxr_i = Signal() - # Performance counters - self.itlb_miss_o = Signal() - self.dtlb_miss_o = Signal() - - def ports(self): - return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o, - ] - return [ - self.enable_translation_i, self.en_ld_st_translation_i, - self.lsu_is_store_i, self.req_port_i, self.req_port_o, - self.update_vaddr_o, - self.asid_i, - self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i, - self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i, - self.satp_ppn_i, self.mxr_i, - self.itlb_miss_o, self.dtlb_miss_o - ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports() - - def elaborate(self, platform): - m = Module() - - # input registers - data_rvalid = Signal() - data_rdata = Signal(64) - - # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata - # is spec'd in 64-bit binary-format: better to spec as Record? - pte = PTE() - m.d.comb += pte.flatten().eq(data_rdata) - - # SV48 defines four levels of page tables - ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above) - ptw_lvl1 = Signal() - ptw_lvl2 = Signal() - ptw_lvl3 = Signal() - ptw_lvl4 = Signal() - m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1), - ptw_lvl2.eq(ptw_lvl == LVL2), - ptw_lvl3.eq(ptw_lvl == LVL3), - ptw_lvl4.eq(ptw_lvl == LVL4) - ] - - # is this an instruction page table walk? - is_instr_ptw = Signal() - global_mapping = Signal() - # latched tag signal - tag_valid = Signal() - # register the ASID - tlb_update_asid = Signal(self.asid_width) - # register VPN we need to walk, SV48 defines a 48 bit virtual addr - vaddr = Signal(64) - # 4 byte aligned physical pointer - ptw_pptr = Signal(56) - - end = DCACHE_INDEX_WIDTH + DCACHE_TAG_WIDTH - m.d.sync += [ - # Assignments - self.update_vaddr_o.eq(vaddr), - - self.walking_instr_o.eq(is_instr_ptw), - # directly output the correct physical address - self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]), - self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]), - # we are never going to kill this request - self.req_port_o.kill_req.eq(0), # XXX assign comb? - # we are never going to write with the HPTW - self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb? - # ----------- - # TLB Update - # ----------- - self.itlb_update_o.vpn.eq(vaddr[12:48]), - self.dtlb_update_o.vpn.eq(vaddr[12:48]), - # update the correct page table level - self.itlb_update_o.is_2M.eq(ptw_lvl3), - self.itlb_update_o.is_1G.eq(ptw_lvl2), - self.itlb_update_o.is_512G.eq(ptw_lvl1), - self.dtlb_update_o.is_2M.eq(ptw_lvl3), - self.dtlb_update_o.is_1G.eq(ptw_lvl2), - self.dtlb_update_o.is_512G.eq(ptw_lvl1), - - # output the correct ASID - self.itlb_update_o.asid.eq(tlb_update_asid), - self.dtlb_update_o.asid.eq(tlb_update_asid), - # set the global mapping bit - self.itlb_update_o.content.eq(pte), - self.itlb_update_o.content.g.eq(global_mapping), - self.dtlb_update_o.content.eq(pte), - self.dtlb_update_o.content.g.eq(global_mapping), - - self.req_port_o.tag_valid.eq(tag_valid), - ] - - #------------------- - # Page table walker #needs update - #------------------- - # A virtual address va is translated into a physical address pa as - # follows: - # 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv48, - # PAGESIZE=2^12 and LEVELS=4.) - # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. - # (For Sv32, PTESIZE=4.) - # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an - # access exception. - # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to - # step 5. Otherwise, this PTE is a pointer to the next level of - # the page table. - # Let i=i-1. If i < 0, stop and raise an access exception. - # Otherwise, let a = pte.ppn × PAGESIZE and go to step 2. - # 5. A leaf PTE has been found. Determine if the requested memory - # access is allowed by the pte.r, pte.w, and pte.x bits. If not, - # stop and raise an access exception. Otherwise, the translation is - # successful. Set pte.a to 1, and, if the memory access is a - # store, set pte.d to 1. - # The translated physical address is given as follows: - # - pa.pgoff = va.pgoff. - # - If i > 0, then this is a superpage translation and - # pa.ppn[i-1:0] = va.vpn[i-1:0]. - # - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i]. - # 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned - # superpage stop and raise a page-fault exception. - - m.d.sync += tag_valid.eq(0) - - # default assignments - m.d.comb += [ - # PTW memory interface - self.req_port_o.data_req.eq(0), - self.req_port_o.data_be.eq(Const(0xFF, 8)), - self.req_port_o.data_size.eq(Const(0b11, 2)), - self.req_port_o.data_we.eq(0), - self.ptw_error_o.eq(0), - self.itlb_update_o.valid.eq(0), - self.dtlb_update_o.valid.eq(0), - - self.itlb_miss_o.eq(0), - self.dtlb_miss_o.eq(0), - ] - - # ------------ - # State Machine - # ------------ - - with m.FSM() as fsm: - - with m.State("IDLE"): - self.idle(m, is_instr_ptw, ptw_lvl, global_mapping, - ptw_pptr, vaddr, tlb_update_asid) - - with m.State("WAIT_GRANT"): - self.grant(m, tag_valid, data_rvalid) - - with m.State("PTE_LOOKUP"): - # we wait for the valid signal - with m.If(data_rvalid): - self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, ptw_lvl4, - data_rvalid, global_mapping, - is_instr_ptw, ptw_pptr) - - # Propagate error to MMU/LSU - with m.State("PROPAGATE_ERROR"): - m.next = "IDLE" - m.d.comb += self.ptw_error_o.eq(1) - - # wait for the rvalid before going back to IDLE - with m.State("WAIT_RVALID"): - with m.If(data_rvalid): - m.next = "IDLE" - - m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata), - data_rvalid.eq(self.req_port_i.data_rvalid) - ] - - return m - - def set_grant_state(self, m): - # should we have flushed before we got an rvalid, - # wait for it until going back to IDLE - with m.If(self.flush_i): - with m.If (self.req_port_i.data_gnt): - m.next = "WAIT_RVALID" - with m.Else(): - m.next = "IDLE" - with m.Else(): - m.next = "WAIT_GRANT" - - def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping, - ptw_pptr, vaddr, tlb_update_asid): - # by default we start with the top-most page table - m.d.sync += [is_instr_ptw.eq(0), - ptw_lvl.eq(LVL1), - global_mapping.eq(0), - self.ptw_active_o.eq(0), # deactive (IDLE) - ] - # work out itlb/dtlb miss - m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \ - self.itlb_access_i & \ - ~self.itlb_hit_i & \ - ~self.dtlb_access_i) - m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \ - self.dtlb_access_i & \ - ~self.dtlb_hit_i) - # we got an ITLB miss? - with m.If(self.itlb_miss_o): - pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:48], - self.satp_ppn_i) - m.d.sync += [ptw_pptr.eq(pptr), - is_instr_ptw.eq(1), - vaddr.eq(self.itlb_vaddr_i), - tlb_update_asid.eq(self.asid_i), - ] - self.set_grant_state(m) - - # we got a DTLB miss? - with m.Elif(self.dtlb_miss_o): - pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:48], - self.satp_ppn_i) - m.d.sync += [ptw_pptr.eq(pptr), - vaddr.eq(self.dtlb_vaddr_i), - tlb_update_asid.eq(self.asid_i), - ] - self.set_grant_state(m) - - def grant(self, m, tag_valid, data_rvalid): - # we've got a data WAIT_GRANT so tell the - # cache that the tag is valid - - # send a request out - m.d.comb += self.req_port_o.data_req.eq(1) - # wait for the WAIT_GRANT - with m.If(self.req_port_i.data_gnt): - # send the tag valid signal one cycle later - m.d.sync += tag_valid.eq(1) - # should we have flushed before we got an rvalid, - # wait for it until going back to IDLE - with m.If(self.flush_i): - with m.If (~data_rvalid): - m.next = "WAIT_RVALID" - with m.Else(): - m.next = "IDLE" - with m.Else(): - m.next = "PTE_LOOKUP" - - def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, ptw_lvl4, - data_rvalid, global_mapping, - is_instr_ptw, ptw_pptr): - # temporaries - pte_rx = Signal(reset_less=True) - pte_exe = Signal(reset_less=True) - pte_inv = Signal(reset_less=True) - pte_a = Signal(reset_less=True) - st_wd = Signal(reset_less=True) - m.d.comb += [pte_rx.eq(pte.r | pte.x), - pte_exe.eq(~pte.x | ~pte.a), - pte_inv.eq(~pte.v | (~pte.r & pte.w)), - pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))), - st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))] - - l1err = Signal(reset_less=True) - l2err = Signal(reset_less=True) - l3err = Signal(reset_less=True) - m.d.comb += [l3err.eq((ptw_lvl3) & pte.ppn[0:9] != Const(0,0)), - l2err.eq((ptw_lvl2) & pte.ppn[0:18] != Const(0, 18)), - l1err.eq((ptw_lvl1) & pte.ppn[0:27] != Const(0, 27))] - - # check if the global mapping bit is set - with m.If (pte.g): - m.d.sync += global_mapping.eq(1) - - m.next = "IDLE" - - # ------------- - # Invalid PTE - # ------------- - # If pte.v = 0, or if pte.r = 0 and pte.w = 1, - # stop and raise a page-fault exception. - with m.If (pte_inv): - m.next = "PROPAGATE_ERROR" - - # ----------- - # Valid PTE - # ----------- - - # it is a valid PTE - # if pte.r = 1 or pte.x = 1 it is a valid PTE - with m.Elif (pte_rx): - # Valid translation found (either 1G, 2M or 4K) - with m.If(is_instr_ptw): - # ------------ - # Update ITLB - # ------------ - # If page not executable, we can directly raise error. - # This doesn't put a useless entry into the TLB. - # The same idea applies to the access flag since we let - # the access flag be managed by SW. - with m.If (pte_exe): - m.next = "IDLE" - with m.Else(): - m.d.comb += self.itlb_update_o.valid.eq(1) - - with m.Else(): - # ------------ - # Update DTLB - # ------------ - # Check if the access flag has been set, otherwise - # throw page-fault and let software handle those bits. - # If page not readable (there are no write-only pages) - # directly raise an error. This doesn't put a useless - # entry into the TLB. - with m.If(pte_a): - m.d.comb += self.dtlb_update_o.valid.eq(1) - with m.Else(): - m.next = "PROPAGATE_ERROR" - # Request is a store: perform additional checks - # If the request was a store and the page not - # write-able, raise an error - # the same applies if the dirty flag is not set - with m.If (st_wd): - m.d.comb += self.dtlb_update_o.valid.eq(0) - m.next = "PROPAGATE_ERROR" - - # check if the ppn is correctly aligned: Case (6) - with m.If(l1err | l2err | l3err): - m.next = "PROPAGATE_ERROR" - m.d.comb += [self.dtlb_update_o.valid.eq(0), - self.itlb_update_o.valid.eq(0)] - - # this is a pointer to the next TLB level - with m.Else(): - # pointer to next level of page table - with m.If (ptw_lvl1): - # we are in the second level now - pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39], pte.ppn) - m.d.sync += [ptw_pptr.eq(pptr), - ptw_lvl.eq(LVL2) - ] - with m.If(ptw_lvl2): - # here we received a pointer to the third level - pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn) - m.d.sync += [ptw_pptr.eq(pptr), - ptw_lvl.eq(LVL3) - ] - with m.If(ptw_lvl3): #guess: shift page levels by one - # here we received a pointer to the fourth level - # the last one is near the page offset - pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn) - m.d.sync += [ptw_pptr.eq(pptr), - ptw_lvl.eq(LVL4) - ] - self.set_grant_state(m) - - with m.If (ptw_lvl4): - # Should already be the last level - # page table => Error - m.d.sync += ptw_lvl.eq(LVL4) - m.next = "PROPAGATE_ERROR" - - -if __name__ == '__main__': - ptw = PTW() - vl = rtlil.convert(ptw, ports=ptw.ports()) - with open("test_ptw.il", "w") as f: - f.write(vl) diff --git a/src/soc/TLB/ariane/test/test_plru.py b/src/soc/TLB/ariane/test/test_plru.py deleted file mode 100644 index 9222d796..00000000 --- a/src/soc/TLB/ariane/test/test_plru.py +++ /dev/null @@ -1,13 +0,0 @@ -import sys -from soc.TLB.ariane.plru import PLRU -from nmigen.compat.sim import run_simulation - - -def tbench(dut): - yield - - -if __name__ == "__main__": - dut = PLRU(4) - run_simulation(dut, tbench(dut), vcd_name="test_plru.vcd") - print("PLRU Unit Test Success") diff --git a/src/soc/TLB/ariane/test/test_ptw.py b/src/soc/TLB/ariane/test/test_ptw.py deleted file mode 100644 index 39697566..00000000 --- a/src/soc/TLB/ariane/test/test_ptw.py +++ /dev/null @@ -1,127 +0,0 @@ -from nmigen.compat.sim import run_simulation -from soc.TLB.ariane.ptw import PTW, PTE - -# unit was changed, test needs to be changed - - -def tbench(dut): - - addr = 0x8000000 - - #pte = PTE() - # yield pte.v.eq(1) - # yield pte.r.eq(1) - - yield dut.req_port_i.data_gnt.eq(1) - yield dut.req_port_i.data_rvalid.eq(1) - yield dut.req_port_i.data_rdata.eq(0x43) # pte.flatten()) - - # data lookup - yield dut.en_ld_st_translation_i.eq(1) - yield dut.asid_i.eq(1) - - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(0x400000000) - - yield - yield - yield - - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(0x200000) - - yield - yield - yield - - yield dut.req_port_i.data_gnt.eq(0) - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(0x400000011) - - yield - yield dut.req_port_i.data_gnt.eq(1) - yield - yield - - # data lookup, PTW levels 1-2-3 - addr = 0x4000000 - yield dut.dtlb_vaddr_i.eq(addr) - yield dut.mxr_i.eq(0x1) - yield dut.req_port_i.data_gnt.eq(1) - yield dut.req_port_i.data_rvalid.eq(1) - # pte.flatten()) - yield dut.req_port_i.data_rdata.eq(0x41 | (addr >> 12) << 10) - - yield dut.en_ld_st_translation_i.eq(1) - yield dut.asid_i.eq(1) - - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(addr) - - yield - yield - yield - yield - yield - yield - yield - yield - - yield dut.req_port_i.data_gnt.eq(0) - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(0x400000011) - - yield - yield dut.req_port_i.data_gnt.eq(1) - yield - yield - yield - yield - - # instruction lookup - yield dut.en_ld_st_translation_i.eq(0) - yield dut.enable_translation_i.eq(1) - yield dut.asid_i.eq(1) - - yield dut.itlb_access_i.eq(1) - yield dut.itlb_hit_i.eq(0) - yield dut.itlb_vaddr_i.eq(0x800000) - - yield - yield - yield - - yield dut.itlb_access_i.eq(1) - yield dut.itlb_hit_i.eq(0) - yield dut.itlb_vaddr_i.eq(0x200000) - - yield - yield - yield - - yield dut.req_port_i.data_gnt.eq(0) - yield dut.itlb_access_i.eq(1) - yield dut.itlb_hit_i.eq(0) - yield dut.itlb_vaddr_i.eq(0x800011) - - yield - yield dut.req_port_i.data_gnt.eq(1) - yield - yield - - yield - - -def test_ptw(): - dut = PTW() - run_simulation(dut, tbench(dut), vcd_name="test_ptw.vcd") - print("PTW Unit Test Success") - - -if __name__ == "__main__": - test_ptw() diff --git a/src/soc/TLB/ariane/test/test_tlb.py b/src/soc/TLB/ariane/test/test_tlb.py deleted file mode 100644 index e1b17b8b..00000000 --- a/src/soc/TLB/ariane/test/test_tlb.py +++ /dev/null @@ -1,67 +0,0 @@ -from nmigen.compat.sim import run_simulation - -from soc.TLB.ariane.tlb import TLB - - -def set_vaddr(addr): - yield dut.lu_vaddr_i.eq(addr) - yield dut.update_i.vpn.eq(addr >> 12) - - -def tbench(dut): - yield dut.lu_access_i.eq(1) - yield dut.lu_asid_i.eq(1) - yield dut.update_i.valid.eq(1) - yield dut.update_i.is_1G.eq(0) - yield dut.update_i.is_2M.eq(0) - yield dut.update_i.asid.eq(1) - yield dut.update_i.content.ppn.eq(0) - yield dut.update_i.content.rsw.eq(0) - yield dut.update_i.content.r.eq(1) - - yield - - addr = 0x80000 - yield from set_vaddr(addr) - yield - - addr = 0x90001 - yield from set_vaddr(addr) - yield - - addr = 0x28000000 - yield from set_vaddr(addr) - yield - - addr = 0x28000001 - yield from set_vaddr(addr) - - addr = 0x28000001 - yield from set_vaddr(addr) - yield - - addr = 0x1000040000 - yield from set_vaddr(addr) - yield - - addr = 0x1000040001 - yield from set_vaddr(addr) - yield - - yield dut.update_i.is_1G.eq(1) - addr = 0x2040000 - yield from set_vaddr(addr) - yield - - yield dut.update_i.is_1G.eq(1) - addr = 0x2040001 - yield from set_vaddr(addr) - yield - - yield - - -if __name__ == "__main__": - dut = TLB() - run_simulation(dut, tbench(dut), vcd_name="test_tlb.vcd") - print("TLB Unit Test Success") diff --git a/src/soc/TLB/ariane/test/test_tlb_content.py b/src/soc/TLB/ariane/test/test_tlb_content.py deleted file mode 100644 index 1bc60d88..00000000 --- a/src/soc/TLB/ariane/test/test_tlb_content.py +++ /dev/null @@ -1,63 +0,0 @@ -from nmigen.compat.sim import run_simulation - -from soc.TLB.ariane.tlb_content import TLBContent -from soc.TestUtil.test_helper import assert_op, assert_eq - - -def update(dut, a, t, g, m): - yield dut.replace_en_i.eq(1) - yield dut.update_i.valid.eq(1) - yield dut.update_i.is_512G.eq(t) - yield dut.update_i.is_1G.eq(g) - yield dut.update_i.is_2M.eq(m) - yield dut.update_i.vpn.eq(a) - yield - yield - - -def check_hit(dut, hit, pagesize): - hit_d = yield dut.lu_hit_o - assert_eq("hit", hit_d, hit) - - if(hit): - if(pagesize == "t"): - hitp = yield dut.lu_is_512G_o - assert_eq("lu_is_512G_o", hitp, 1) - elif(pagesize == "g"): - hitp = yield dut.lu_is_1G_o - assert_eq("lu_is_1G_o", hitp, 1) - elif(pagesize == "m"): - hitp = yield dut.lu_is_2M_o - assert_eq("lu_is_2M_o", hitp, 1) - - -def addr(a, b, c, d): - return a | b << 9 | c << 18 | d << 27 - - -def tbench(dut): - yield dut.vpn0.eq(0x0A) - yield dut.vpn1.eq(0x0B) - yield dut.vpn2.eq(0x0C) - yield dut.vpn3.eq(0x0D) - yield from update(dut, addr(0xFF, 0xFF, 0xFF, 0x0D), 1, 0, 0) - yield from check_hit(dut, 1, "t") - - yield from update(dut, addr(0xFF, 0xFF, 0x0C, 0x0D), 0, 1, 0) - yield from check_hit(dut, 1, "g") - - yield from update(dut, addr(0xFF, 0x0B, 0x0C, 0x0D), 0, 0, 1) - yield from check_hit(dut, 1, "m") - - yield from update(dut, addr(0x0A, 0x0B, 0x0C, 0x0D), 0, 0, 0) - yield from check_hit(dut, 1, "") - - yield from update(dut, addr(0xAA, 0xBB, 0xCC, 0xDD), 0, 0, 0) - yield from check_hit(dut, 0, "miss") - - -if __name__ == "__main__": - dut = TLBContent(4, 4) - # - run_simulation(dut, tbench(dut), vcd_name="test_tlb_content.vcd") - print("TLBContent Unit Test Success") diff --git a/src/soc/TLB/ariane/tlb.py b/src/soc/TLB/ariane/tlb.py deleted file mode 100644 index 72b67a2d..00000000 --- a/src/soc/TLB/ariane/tlb.py +++ /dev/null @@ -1,176 +0,0 @@ -""" -# Copyright 2018 ETH Zurich and University of Bologna. -# Copyright and related rights are licensed under the Solderpad Hardware -# License, Version 0.51 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# or agreed to in writing, software, hardware and materials distributed under -# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. -# -# Author: David Schaffenrath, TU Graz -# Author: Florian Zaruba, ETH Zurich -# Date: 21.4.2017 -# Description: Translation Lookaside Buffer, SV48 -# fully set-associative - -Implementation in c++: -https://raw.githubusercontent.com/Tony-Hu/TreePLRU/master/TreePLRU.cpp - -Text description: -https://people.cs.clemson.edu/~mark/464/p_lru.txt - -Online simulator: -http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/vm.html -""" -from math import log2 -from nmigen import Signal, Module, Cat, Const, Array, Elaboratable -from nmigen.cli import verilog, rtlil -from nmigen.lib.coding import Encoder - -from soc.TLB.ariane.ptw import TLBUpdate, PTE, ASID_WIDTH -from soc.TLB.ariane.plru import PLRU -from soc.TLB.ariane.tlb_content import TLBContent - -TLB_ENTRIES = 8 - - -class TLB(Elaboratable): - def __init__(self, tlb_entries=8, asid_width=8): - self.tlb_entries = tlb_entries - self.asid_width = asid_width - - self.flush_i = Signal() # Flush signal - # Lookup signals - self.lu_access_i = Signal() - self.lu_asid_i = Signal(self.asid_width) - self.lu_vaddr_i = Signal(64) - self.lu_content_o = PTE() - self.lu_is_2M_o = Signal() - self.lu_is_1G_o = Signal() - self.lu_is_512G_o = Signal() - self.lu_hit_o = Signal() - # Update TLB - self.pte_width = len(self.lu_content_o.flatten()) - self.update_i = TLBUpdate(asid_width) - - def elaborate(self, platform): - m = Module() - - vpn3 = Signal(9) # FIXME unused signal - vpn2 = Signal(9) - vpn1 = Signal(9) - vpn0 = Signal(9) - - # ------------- - # Translation - # ------------- - - # SV48 defines four levels of page tables - m.d.comb += [vpn0.eq(self.lu_vaddr_i[12:21]), - vpn1.eq(self.lu_vaddr_i[21:30]), - vpn2.eq(self.lu_vaddr_i[30:39]), - vpn3.eq(self.lu_vaddr_i[39:48]), # FIXME - ] - - tc = [] - for i in range(self.tlb_entries): - tlc = TLBContent(self.pte_width, self.asid_width) - setattr(m.submodules, "tc%d" % i, tlc) - tc.append(tlc) - # connect inputs - tlc.update_i = self.update_i # saves a lot of graphviz links - m.d.comb += [tlc.vpn0.eq(vpn0), - tlc.vpn1.eq(vpn1), - tlc.vpn2.eq(vpn2), - # TODO 4th - tlc.flush_i.eq(self.flush_i), - # tlc.update_i.eq(self.update_i), - tlc.lu_asid_i.eq(self.lu_asid_i)] - tc = Array(tc) - - # -------------- - # Select hit - # -------------- - - # use Encoder to select hit index - # XXX TODO: assert that there's only one valid entry (one lu_hit) - hitsel = Encoder(self.tlb_entries) - m.submodules.hitsel = hitsel - - hits = [] - for i in range(self.tlb_entries): - hits.append(tc[i].lu_hit_o) - m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well) - idx = hitsel.o - - active = Signal(reset_less=True) - m.d.comb += active.eq(~hitsel.n) - with m.If(active): - # active hit, send selected as output - m.d.comb += [self.lu_is_512G_o.eq(tc[idx].lu_is_512G_o), - self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o), - self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o), - self.lu_hit_o.eq(1), - self.lu_content_o.flatten().eq(tc[idx].lu_content_o), - ] - - # -------------- - # PLRU. - # -------------- - - p = PLRU(self.tlb_entries) - plru_tree = Signal(p.TLBSZ) - m.submodules.plru = p - - # connect PLRU inputs/outputs - # XXX TODO: assert that there's only one valid entry (one replace_en) - en = [] - for i in range(self.tlb_entries): - en.append(tc[i].replace_en_i) - m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags - p.lu_hit.eq(hitsel.i), - p.lu_access_i.eq(self.lu_access_i), - p.plru_tree.eq(plru_tree)] - m.d.sync += plru_tree.eq(p.plru_tree_o) - - # -------------- - # Sanity checks - # -------------- - - assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \ - "TLB size must be a multiple of 2 and greater than 1" - assert (self.asid_width >= 1), \ - "ASID width must be at least 1" - - return m - - """ - # Just for checking - function int countSetBits(logic[self.tlb_entries-1:0] vector); - automatic int count = 0; - foreach (vector[idx]) begin - count += vector[idx]; - end - return count; - endfunction - - assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1)) - else $error("More then one hit in TLB!"); $stop(); end - assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1)) - else $error("More then one TLB entry selected for next replace!"); - """ - - def ports(self): - return [self.flush_i, self.lu_access_i, - self.lu_asid_i, self.lu_vaddr_i, - self.lu_is_2M_o, self.lu_1G_o, self.lu_is_512G_o, self.lu_hit_o - ] + self.lu_content_o.ports() + self.update_i.ports() - - -if __name__ == '__main__': - tlb = TLB() - vl = rtlil.convert(tlb, ports=tlb.ports()) - with open("test_tlb.il", "w") as f: - f.write(vl) diff --git a/src/soc/TLB/ariane/tlb_content.py b/src/soc/TLB/ariane/tlb_content.py deleted file mode 100644 index bfd17c13..00000000 --- a/src/soc/TLB/ariane/tlb_content.py +++ /dev/null @@ -1,143 +0,0 @@ -from nmigen import Signal, Module, Cat, Const, Elaboratable - -from soc.TLB.ariane.ptw import TLBUpdate, PTE - - -class TLBEntry: - def __init__(self, asid_width): - self.asid = Signal(asid_width, name="ent_asid") - # SV48 defines four levels of page tables - self.vpn0 = Signal(9, name="ent_vpn0") - self.vpn1 = Signal(9, name="ent_vpn1") - self.vpn2 = Signal(9, name="ent_vpn2") - self.vpn3 = Signal(9, name="ent_vpn3") - self.is_2M = Signal(name="ent_is_2M") - self.is_1G = Signal(name="ent_is_1G") - self.is_512G = Signal(name="ent_is_512G") - self.valid = Signal(name="ent_valid") - - def flatten(self): - return Cat(*self.ports()) - - def eq(self, x): - return self.flatten().eq(x.flatten()) - - def ports(self): - return [self.asid, self.vpn0, self.vpn1, self.vpn2, - self.is_2M, self.is_1G, self.valid] - - -class TLBContent(Elaboratable): - def __init__(self, pte_width, asid_width): - self.asid_width = asid_width - self.pte_width = pte_width - self.flush_i = Signal() # Flush signal - # Update TLB - self.update_i = TLBUpdate(asid_width) - self.vpn3 = Signal(9) - self.vpn2 = Signal(9) - self.vpn1 = Signal(9) - self.vpn0 = Signal(9) - self.replace_en_i = Signal() # replace the following entry, - # set by replacement strategy - # Lookup signals - self.lu_asid_i = Signal(asid_width) - self.lu_content_o = Signal(pte_width) - self.lu_is_512G_o = Signal() - self.lu_is_2M_o = Signal() - self.lu_is_1G_o = Signal() - self.lu_hit_o = Signal() - - def elaborate(self, platform): - m = Module() - - tags = TLBEntry(self.asid_width) - - content = Signal(self.pte_width) - - m.d.comb += [self.lu_hit_o.eq(0), - self.lu_is_512G_o.eq(0), - self.lu_is_2M_o.eq(0), - self.lu_is_1G_o.eq(0)] - - # temporaries for lookup - asid_ok = Signal(reset_less=True) - # tags_ok = Signal(reset_less=True) - - vpn3_ok = Signal(reset_less=True) - vpn2_ok = Signal(reset_less=True) - vpn1_ok = Signal(reset_less=True) - vpn0_ok = Signal(reset_less=True) - - #tags_2M = Signal(reset_less=True) - vpn0_or_2M = Signal(reset_less=True) - - m.d.comb += [ - # compare asid and vpn* - asid_ok.eq(tags.asid == self.lu_asid_i), - vpn3_ok.eq(tags.vpn3 == self.vpn3), - vpn2_ok.eq(tags.vpn2 == self.vpn2), - vpn1_ok.eq(tags.vpn1 == self.vpn1), - vpn0_ok.eq(tags.vpn0 == self.vpn0), - vpn0_or_2M.eq(tags.is_2M | vpn0_ok) - ] - - with m.If(asid_ok & tags.valid): - # first level, only vpn3 needs to match - with m.If(tags.is_512G & vpn3_ok): - m.d.comb += [self.lu_content_o.eq(content), - self.lu_is_512G_o.eq(1), - self.lu_hit_o.eq(1), - ] - # second level , second level vpn2 and vpn3 need to match - with m.Elif(tags.is_1G & vpn2_ok & vpn3_ok): - m.d.comb += [self.lu_content_o.eq(content), - self.lu_is_1G_o.eq(1), - self.lu_hit_o.eq(1), - ] - # not a giga page hit nor a tera page hit so check further - with m.Elif(vpn1_ok): - # this could be a 2 mega page hit or a 4 kB hit - # output accordingly - with m.If(vpn0_or_2M): - m.d.comb += [self.lu_content_o.eq(content), - self.lu_is_2M_o.eq(tags.is_2M), - self.lu_hit_o.eq(1), - ] - # ------------------ - # Update or Flush - # ------------------ - - # temporaries - replace_valid = Signal(reset_less=True) - m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i) - - # flush - with m.If(self.flush_i): - # invalidate (flush) conditions: all if zero or just this ASID - with m.If(self.lu_asid_i == Const(0, self.asid_width) | - (self.lu_asid_i == tags.asid)): - m.d.sync += tags.valid.eq(0) - - # normal replacement - with m.Elif(replace_valid): - m.d.sync += [ # update tag array - tags.asid.eq(self.update_i.asid), - tags.vpn3.eq(self.update_i.vpn[27:36]), - tags.vpn2.eq(self.update_i.vpn[18:27]), - tags.vpn1.eq(self.update_i.vpn[9:18]), - tags.vpn0.eq(self.update_i.vpn[0:9]), - tags.is_512G.eq(self.update_i.is_512G), - tags.is_1G.eq(self.update_i.is_1G), - tags.is_2M.eq(self.update_i.is_2M), - tags.valid.eq(1), - # and content as well - content.eq(self.update_i.content.flatten()) - ] - return m - - def ports(self): - return [self.flush_i, - self.lu_asid_i, - self.lu_is_2M_o, self.lu_is_1G_o, self.lu_is_512G_o, self.lu_hit_o, - ] + self.update_i.content.ports() + self.update_i.ports() diff --git a/src/soc/TLB/test/__init__.py b/src/soc/TLB/test/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/soc/TLB/test/test_LFSR2.py b/src/soc/TLB/test/test_LFSR2.py deleted file mode 100644 index 33208f83..00000000 --- a/src/soc/TLB/test/test_LFSR2.py +++ /dev/null @@ -1,69 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -# See Notices.txt for copyright information -from soc.TLB.LFSR import LFSR, LFSRPolynomial, LFSR_POLY_3 - -from nmigen.back.pysim import Simulator, Delay, Tick -import unittest - - -class TestLFSR(unittest.TestCase): - def test_poly(self): - v = LFSRPolynomial() - self.assertEqual(repr(v), "LFSRPolynomial([0])") - self.assertEqual(str(v), "1") - v = LFSRPolynomial([1]) - self.assertEqual(repr(v), "LFSRPolynomial([1, 0])") - self.assertEqual(str(v), "x + 1") - v = LFSRPolynomial([0, 1]) - self.assertEqual(repr(v), "LFSRPolynomial([1, 0])") - self.assertEqual(str(v), "x + 1") - v = LFSRPolynomial([1, 2]) - self.assertEqual(repr(v), "LFSRPolynomial([2, 1, 0])") - self.assertEqual(str(v), "x^2 + x + 1") - v = LFSRPolynomial([2]) - self.assertEqual(repr(v), "LFSRPolynomial([2, 0])") - self.assertEqual(str(v), "x^2 + 1") - self.assertEqual(str(LFSR_POLY_3), "x^3 + x^2 + 1") - - def test_lfsr_3(self): - module = LFSR(LFSR_POLY_3) - traces = [module.state, module.enable] - with Simulator(module, - vcd_file=open("Waveforms/test_LFSR2.vcd", "w"), - gtkw_file=open("Waveforms/test_LFSR2.gtkw", "w"), - traces=traces) as sim: - sim.add_clock(1e-6, phase=0.25e-6) - delay = Delay(1e-7) - - def async_process(): - yield module.enable.eq(0) - yield Tick() - self.assertEqual((yield module.state), 0x1) - yield Tick() - self.assertEqual((yield module.state), 0x1) - yield module.enable.eq(1) - yield Tick() - yield delay - self.assertEqual((yield module.state), 0x2) - yield Tick() - yield delay - self.assertEqual((yield module.state), 0x5) - yield Tick() - yield delay - self.assertEqual((yield module.state), 0x3) - yield Tick() - yield delay - self.assertEqual((yield module.state), 0x7) - yield Tick() - yield delay - self.assertEqual((yield module.state), 0x6) - yield Tick() - yield delay - self.assertEqual((yield module.state), 0x4) - yield Tick() - yield delay - self.assertEqual((yield module.state), 0x1) - yield Tick() - - sim.add_process(async_process) - sim.run() diff --git a/src/soc/TLB/test/test_address_encoder.py b/src/soc/TLB/test/test_address_encoder.py deleted file mode 100644 index 70d435d6..00000000 --- a/src/soc/TLB/test/test_address_encoder.py +++ /dev/null @@ -1,116 +0,0 @@ -from nmigen.compat.sim import run_simulation -from soc.TLB.AddressEncoder import AddressEncoder -from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op - - -# This function allows for the easy setting of values to the AddressEncoder -# Arguments: -# dut: The AddressEncoder being tested -# i (Input): The array of single bits to be written -def set_encoder(dut, i): - yield dut.i.eq(i) - yield - -# Checks the single match of the AddressEncoder -# Arguments: -# dut: The AddressEncoder being tested -# sm (Single Match): The expected match result -# op (Operation): (0 => ==), (1 => !=) - - -def check_single_match(dut, sm, op): - out_sm = yield dut.single_match - assert_op("Single Match", out_sm, sm, op) - -# Checks the multiple match of the AddressEncoder -# Arguments: -# dut: The AddressEncoder being tested -# mm (Multiple Match): The expected match result -# op (Operation): (0 => ==), (1 => !=) - - -def check_multiple_match(dut, mm, op): - out_mm = yield dut.multiple_match - assert_op("Multiple Match", out_mm, mm, op) - -# Checks the output of the AddressEncoder -# Arguments: -# dut: The AddressEncoder being tested -# o (Output): The expected output -# op (Operation): (0 => ==), (1 => !=) - - -def check_output(dut, o, op): - out_o = yield dut.o - assert_op("Output", out_o, o, op) - -# Checks the state of the AddressEncoder -# Arguments: -# dut: The AddressEncoder being tested -# sm (Single Match): The expected match result -# mm (Multiple Match): The expected match result -# o (Output): The expected output -# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) -# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) -# o_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) - - -def check_all(dut, sm, mm, o, sm_op, mm_op, o_op): - yield from check_single_match(dut, sm, sm_op) - yield from check_multiple_match(dut, mm, mm_op) - yield from check_output(dut, o, o_op) - - -def tbench(dut): - # Check invalid input - in_val = 0b000 - single_match = 0 - multiple_match = 0 - output = 0 - yield from set_encoder(dut, in_val) - yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) - - # Check single bit - in_val = 0b001 - single_match = 1 - multiple_match = 0 - output = 0 - yield from set_encoder(dut, in_val) - yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) - - # Check another single bit - in_val = 0b100 - single_match = 1 - multiple_match = 0 - output = 2 - yield from set_encoder(dut, in_val) - yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) - - # Check multiple match - # We expected the lowest bit to be returned which is address 0 - in_val = 0b101 - single_match = 0 - multiple_match = 1 - output = 0 - yield from set_encoder(dut, in_val) - yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) - - # Check another multiple match - # We expected the lowest bit to be returned which is address 1 - in_val = 0b110 - single_match = 0 - multiple_match = 1 - output = 1 - yield from set_encoder(dut, in_val) - yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) - - -def test_addr(): - dut = AddressEncoder(4) - run_simulation(dut, tbench(dut), - vcd_name="Waveforms/test_address_encoder.vcd") - print("AddressEncoder Unit Test Success") - - -if __name__ == "__main__": - test_addr() diff --git a/src/soc/TLB/test/test_cam.py b/src/soc/TLB/test/test_cam.py deleted file mode 100644 index d11cd974..00000000 --- a/src/soc/TLB/test/test_cam.py +++ /dev/null @@ -1,218 +0,0 @@ -from nmigen.compat.sim import run_simulation - -from soc.TLB.Cam import Cam - -from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op - -# This function allows for the easy setting of values to the Cam -# Arguments: -# dut: The Cam being tested -# e (Enable): Whether the block is going to be enabled -# we (Write Enable): Whether the Cam will write on the next cycle -# a (Address): Where the data will be written if write enable is high -# d (Data): Either what we are looking for or will write to the address - - -def set_cam(dut, e, we, a, d): - yield dut.enable.eq(e) - yield dut.write_enable.eq(we) - yield dut.address_in.eq(a) - yield dut.data_in.eq(d) - yield - -# Checks the multiple match of the Cam -# Arguments: -# dut: The Cam being tested -# mm (Multiple Match): The expected match result -# op (Operation): (0 => ==), (1 => !=) - - -def check_multiple_match(dut, mm, op): - out_mm = yield dut.multiple_match - assert_op("Multiple Match", out_mm, mm, op) - -# Checks the single match of the Cam -# Arguments: -# dut: The Cam being tested -# sm (Single Match): The expected match result -# op (Operation): (0 => ==), (1 => !=) - - -def check_single_match(dut, sm, op): - out_sm = yield dut.single_match - assert_op("Single Match", out_sm, sm, op) - -# Checks the address output of the Cam -# Arguments: -# dut: The Cam being tested -# ma (Match Address): The expected match result -# op (Operation): (0 => ==), (1 => !=) - - -def check_match_address(dut, ma, op): - out_ma = yield dut.match_address - assert_op("Match Address", out_ma, ma, op) - -# Checks the state of the Cam -# Arguments: -# dut: The Cam being tested -# sm (Single Match): The expected match result -# mm (Multiple Match): The expected match result -# ma: (Match Address): The expected address output -# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) -# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) -# ma_op (Operation): Operation for the address assertion (0 => ==), (1 => !=) - - -def check_all(dut, mm, sm, ma, mm_op, sm_op, ma_op): - yield from check_multiple_match(dut, mm, mm_op) - yield from check_single_match(dut, sm, sm_op) - yield from check_match_address(dut, ma, ma_op) - - -def tbench(dut): - # NA - enable = 0 - write_enable = 0 - address = 0 - data = 0 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_single_match(dut, single_match, 0) - - # Read Miss Multiple - # Note that the default starting entry data bits are all 0 - enable = 1 - write_enable = 0 - address = 0 - data = 0 - multiple_match = 1 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_multiple_match(dut, multiple_match, 0) - - # Read Miss - # Note that the default starting entry data bits are all 0 - enable = 1 - write_enable = 0 - address = 0 - data = 1 - multiple_match = 0 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_single_match(dut, single_match, 0) - - # Write Entry 0 - enable = 1 - write_enable = 1 - address = 0 - data = 4 - multiple_match = 0 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_single_match(dut, single_match, 0) - - # Read Hit Entry 0 - enable = 1 - write_enable = 0 - address = 0 - data = 4 - multiple_match = 0 - single_match = 1 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0) - - # Search Hit - enable = 1 - write_enable = 0 - address = 0 - data = 4 - multiple_match = 0 - single_match = 1 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0) - - # Search Miss - enable = 1 - write_enable = 0 - address = 0 - data = 5 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_single_match(dut, single_match, 0) - - # Multiple Match test - # Write Entry 1 - enable = 1 - write_enable = 1 - address = 1 - data = 5 - multiple_match = 0 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_single_match(dut, single_match, 0) - - # Write Entry 2 - # Same data as Entry 1 - enable = 1 - write_enable = 1 - address = 2 - data = 5 - multiple_match = 0 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_single_match(dut, single_match, 0) - - # Read Hit Data 5 - enable = 1 - write_enable = 0 - address = 1 - data = 5 - multiple_match = 1 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0) - - # Verify read_warning is not caused - # Write Entry 0 - enable = 1 - write_enable = 1 - address = 0 - data = 7 - multiple_match = 0 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - # Note there is no yield we immediately attempt to read in the next cycle - - # Read Hit Data 7 - enable = 1 - write_enable = 0 - address = 0 - data = 7 - multiple_match = 0 - single_match = 1 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_single_match(dut, single_match, 0) - - yield - - -def test_cam(): - dut = Cam(4, 4) - run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_cam.vcd") - print("Cam Unit Test Success") - - -if __name__ == "__main__": - test_cam() diff --git a/src/soc/TLB/test/test_cam_entry.py b/src/soc/TLB/test/test_cam_entry.py deleted file mode 100644 index 961445b6..00000000 --- a/src/soc/TLB/test/test_cam_entry.py +++ /dev/null @@ -1,119 +0,0 @@ -from nmigen.compat.sim import run_simulation - -from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op -from soc.TLB.CamEntry import CamEntry - -# This function allows for the easy setting of values to the Cam Entry -# Arguments: -# dut: The CamEntry being tested -# c (command): NA (0), Read (1), Write (2), Reserve (3) -# d (data): The data to be set - - -def set_cam_entry(dut, c, d): - # Write desired values - yield dut.command.eq(c) - yield dut.data_in.eq(d) - yield - # Reset all lines - yield dut.command.eq(0) - yield dut.data_in.eq(0) - yield - -# Checks the data state of the CAM entry -# Arguments: -# dut: The CamEntry being tested -# d (Data): The expected data -# op (Operation): (0 => ==), (1 => !=) - - -def check_data(dut, d, op): - out_d = yield dut.data - assert_op("Data", out_d, d, op) - -# Checks the match state of the CAM entry -# Arguments: -# dut: The CamEntry being tested -# m (Match): The expected match -# op (Operation): (0 => ==), (1 => !=) - - -def check_match(dut, m, op): - out_m = yield dut.match - assert_op("Match", out_m, m, op) - -# Checks the state of the CAM entry -# Arguments: -# dut: The CamEntry being tested -# d (data): The expected data -# m (match): The expected match -# d_op (Operation): Operation for the data assertion (0 => ==), (1 => !=) -# m_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) - - -def check_all(dut, d, m, d_op, m_op): - yield from check_data(dut, d, d_op) - yield from check_match(dut, m, m_op) - -# This tbench goes through the paces of testing the CamEntry module -# It is done by writing and then reading various combinations of key/data pairs -# and reading the results with varying keys to verify the resulting stored -# data is correct. - - -def tbench(dut): - # Check write - command = 2 - data = 1 - match = 0 - yield from set_cam_entry(dut, command, data) - yield from check_all(dut, data, match, 0, 0) - - # Check read miss - command = 1 - data = 2 - match = 0 - yield from set_cam_entry(dut, command, data) - yield from check_all(dut, data, match, 1, 0) - - # Check read hit - command = 1 - data = 1 - match = 1 - yield from set_cam_entry(dut, command, data) - yield from check_all(dut, data, match, 0, 0) - - # Check overwrite - command = 2 - data = 5 - match = 0 - yield from set_cam_entry(dut, command, data) - yield - yield from check_all(dut, data, match, 0, 0) - - # Check read hit - command = 1 - data = 5 - match = 1 - yield from set_cam_entry(dut, command, data) - yield from check_all(dut, data, match, 0, 0) - - # Check reset - command = 3 - data = 0 - match = 0 - yield from set_cam_entry(dut, command, data) - yield from check_all(dut, data, match, 0, 0) - - # Extra clock cycle for waveform - yield - - -def test_camentry(): - dut = CamEntry(4) - run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_cam_entry.vcd") - print("CamEntry Unit Test Success") - - -if __name__ == "__main__": - test_camentry() diff --git a/src/soc/TLB/test/test_permission_validator.py b/src/soc/TLB/test/test_permission_validator.py deleted file mode 100644 index b52b5459..00000000 --- a/src/soc/TLB/test/test_permission_validator.py +++ /dev/null @@ -1,150 +0,0 @@ -from nmigen.compat.sim import run_simulation - -from soc.TLB.PermissionValidator import PermissionValidator - -from soc.TestUtil.test_helper import assert_op - - -def set_validator(dut, d, xwr, sm, sa, asid): - yield dut.data.eq(d) - yield dut.xwr.eq(xwr) - yield dut.super_mode.eq(sm) - yield dut.super_access.eq(sa) - yield dut.asid.eq(asid) - yield - - -def check_valid(dut, v, op): - out_v = yield dut.valid - assert_op("Valid", out_v, v, op) - - -def tbench(dut): - # 80 bits represented. Ignore the MSB as it will be truncated - # ASID is bits first 4 hex values (bits 64 - 78) - - # Test user mode entry valid - # Global Bit matching ASID - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000031 - # Ignore MSB it will be truncated - asid = 0x7FFF - super_mode = 0 - super_access = 0 - xwr = 0 - valid = 1 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test user mode entry valid - # Global Bit nonmatching ASID - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000031 - # Ignore MSB it will be truncated - asid = 0x7FF6 - super_mode = 0 - super_access = 0 - xwr = 0 - valid = 1 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test user mode entry invalid - # Global Bit nonmatching ASID - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000021 - # Ignore MSB it will be truncated - asid = 0x7FF6 - super_mode = 0 - super_access = 0 - xwr = 0 - valid = 0 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test user mode entry valid - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000011 - # Ignore MSB it will be truncated - asid = 0x7FFF - super_mode = 0 - super_access = 0 - xwr = 0 - valid = 1 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test user mode entry invalid - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000011 - # Ignore MSB it will be truncated - asid = 0x7FF6 - super_mode = 0 - super_access = 0 - xwr = 0 - valid = 0 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test supervisor mode entry valid - # The entry is NOT in user mode - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000001 - # Ignore MSB it will be truncated - asid = 0x7FFF - super_mode = 1 - super_access = 0 - xwr = 0 - valid = 1 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test supervisor mode entry invalid - # The entry is in user mode - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000011 - # Ignore MSB it will be truncated - asid = 0x7FFF - super_mode = 1 - super_access = 0 - xwr = 0 - valid = 0 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test supervisor mode entry valid - # The entry is NOT in user mode with access - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000001 - # Ignore MSB it will be truncated - asid = 0x7FFF - super_mode = 1 - super_access = 1 - xwr = 0 - valid = 1 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test supervisor mode entry valid - # The entry is in user mode with access - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000011 - # Ignore MSB it will be truncated - asid = 0x7FFF - super_mode = 1 - super_access = 1 - xwr = 0 - valid = 1 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - -def test_permv(): - dut = PermissionValidator(15, 64) - run_simulation(dut, tbench( - dut), vcd_name="Waveforms/test_permission_validator.vcd") - print("PermissionValidator Unit Test Success") - - -if __name__ == "__main__": - test_permv() diff --git a/src/soc/TLB/test/test_pte_entry.py b/src/soc/TLB/test/test_pte_entry.py deleted file mode 100644 index 51b3dcf0..00000000 --- a/src/soc/TLB/test/test_pte_entry.py +++ /dev/null @@ -1,114 +0,0 @@ -from nmigen.compat.sim import run_simulation - -from soc.TLB.PteEntry import PteEntry - -from soc.TestUtil.test_helper import assert_op - - -def set_entry(dut, i): - yield dut.i.eq(i) - yield - - -def check_dirty(dut, d, op): - out_d = yield dut.d - assert_op("Dirty", out_d, d, op) - - -def check_accessed(dut, a, op): - out_a = yield dut.a - assert_op("Accessed", out_a, a, op) - - -def check_global(dut, o, op): - out = yield dut.g - assert_op("Global", out, o, op) - - -def check_user(dut, o, op): - out = yield dut.u - assert_op("User Mode", out, o, op) - - -def check_xwr(dut, o, op): - out = yield dut.xwr - assert_op("XWR", out, o, op) - - -def check_asid(dut, o, op): - out = yield dut.asid - assert_op("ASID", out, o, op) - - -def check_pte(dut, o, op): - out = yield dut.pte - assert_op("ASID", out, o, op) - - -def check_valid(dut, v, op): - out_v = yield dut.v - assert_op("Valid", out_v, v, op) - - -def check_all(dut, d, a, g, u, xwr, v, asid, pte): - yield from check_dirty(dut, d, 0) - yield from check_accessed(dut, a, 0) - yield from check_global(dut, g, 0) - yield from check_user(dut, u, 0) - yield from check_xwr(dut, xwr, 0) - yield from check_asid(dut, asid, 0) - yield from check_pte(dut, pte, 0) - yield from check_valid(dut, v, 0) - - -def tbench(dut): - # 80 bits represented. Ignore the MSB as it will be truncated - # ASID is bits first 4 hex values (bits 64 - 78) - - i = 0x7FFF0000000000000031 - dirty = 0 - access = 0 - glob = 1 - user = 1 - xwr = 0 - valid = 1 - asid = 0x7FFF - pte = 0x0000000000000031 - yield from set_entry(dut, i) - yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte) - - i = 0x0FFF00000000000000FF - dirty = 1 - access = 1 - glob = 1 - user = 1 - xwr = 7 - valid = 1 - asid = 0x0FFF - pte = 0x00000000000000FF - yield from set_entry(dut, i) - yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte) - - i = 0x0721000000001100001F - dirty = 0 - access = 0 - glob = 0 - user = 1 - xwr = 7 - valid = 1 - asid = 0x0721 - pte = 0x000000001100001F - yield from set_entry(dut, i) - yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte) - - yield - - -def test_pteentry(): - dut = PteEntry(15, 64) - run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_pte_entry.vcd") - print("PteEntry Unit Test Success") - - -if __name__ == "__main__": - test_pteentry() diff --git a/src/soc/TLB/test/test_set_associative_cache.py b/src/soc/TLB/test/test_set_associative_cache.py deleted file mode 100644 index edec055b..00000000 --- a/src/soc/TLB/test/test_set_associative_cache.py +++ /dev/null @@ -1,43 +0,0 @@ -from nmigen.compat.sim import run_simulation - -from soc.TLB.SetAssociativeCache import SetAssociativeCache - -from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op - - -def set_sac(dut, e, c, s, t, d): - yield dut.enable.eq(e) - yield dut.command.eq(c) - yield dut.cset.eq(s) - yield dut.tag.eq(t) - yield dut.data_i.eq(d) - yield - - -def tbench(dut): - enable = 1 - command = 2 - cset = 1 - tag = 2 - data = 3 - yield from set_sac(dut, enable, command, cset, tag, data) - yield - - enable = 1 - command = 2 - cset = 1 - tag = 5 - data = 8 - yield from set_sac(dut, enable, command, cset, tag, data) - yield - - -def test_assoc_cache(): - dut = SetAssociativeCache(4, 4, 4, 4) - run_simulation(dut, tbench( - dut), vcd_name="Waveforms/test_set_associative_cache.vcd") - print("Set Associative Cache Unit Test Success") - - -if __name__ == "__main__": - test_assoc_cache() diff --git a/src/soc/TLB/test/test_tlb.py b/src/soc/TLB/test/test_tlb.py deleted file mode 100644 index 38656623..00000000 --- a/src/soc/TLB/test/test_tlb.py +++ /dev/null @@ -1,86 +0,0 @@ -#import tracemalloc -# tracemalloc.start() - -from nmigen.compat.sim import run_simulation - -from soc.TLB.TLB import TLB - -from soc.TestUtil.test_helper import assert_op, assert_eq - -# self.supermode = Signal(1) # Supervisor Mode -# self.super_access = Signal(1) # Supervisor Access -# self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2 -# self.xwr = Signal(3) # Execute, Write, Read -# self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64 -#self.address_L1 = Signal(range(L1_size)) -# self.asid = Signal(asid_size) # Address Space IDentifier (ASID) -# self.vma = Signal(vma_size) # Virtual Memory Address (VMA) -# self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE) -# -# self.hit = Signal(1) # Denotes if the VMA had a mapped PTE -# self.perm_valid = Signal(1) # Denotes if the permissions are correct -# self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA - -COMMAND_READ = 1 -COMMAND_WRITE_L1 = 2 - -# Checks the data state of the CAM entry -# Arguments: -# dut: The CamEntry being tested -# d (Data): The expected data -# op (Operation): (0 => ==), (1 => !=) - - -def check_hit(dut, d): - hit_d = yield dut.hit - #assert_eq("hit", hit_d, d) - - -def tst_command(dut, cmd, xwr, cycles): - yield dut.command.eq(cmd) - yield dut.xwr.eq(xwr) - for i in range(0, cycles): - yield - - -def tst_write_L1(dut, vma, address_L1, asid, pte_in): - yield dut.address_L1.eq(address_L1) - yield dut.asid.eq(asid) - yield dut.vma.eq(vma) - yield dut.pte_in.eq(pte_in) - yield from tst_command(dut, COMMAND_WRITE_L1, 7, 2) - - -def tst_search(dut, vma, found): - yield dut.vma.eq(vma) - yield from tst_command(dut, COMMAND_READ, 7, 1) - yield from check_hit(dut, found) - - -def zero(dut): - yield dut.supermode.eq(0) - yield dut.super_access.eq(0) - yield dut.mode.eq(0) - yield dut.address_L1.eq(0) - yield dut.asid.eq(0) - yield dut.vma.eq(0) - yield dut.pte_in.eq(0) - - -def tbench(dut): - yield from zero(dut) - yield dut.mode.eq(0xF) # enable TLB - # test hit - yield from tst_write_L1(dut, 0xFEEDFACE, 0, 0xFFFF, 0xF0F0) - yield from tst_search(dut, 0xFEEDFACE, 1) - yield from tst_search(dut, 0xFACEFEED, 0) - - -def test_tlb(): - dut = TLB(15, 36, 64, 8) - run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_tlb.vcd") - print("TLB Unit Test Success") - - -if __name__ == "__main__": - test_tlb() diff --git a/src/soc/iommu/axi_rab/axi4_ar_buffer.py b/src/soc/iommu/axi_rab/axi4_ar_buffer.py deleted file mode 100644 index 1f3a5ff3..00000000 --- a/src/soc/iommu/axi_rab/axi4_ar_buffer.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright 2018 ETH Zurich and University of Bologna. -# Copyright and related rights are licensed under the Solderpad Hardware -# License, Version 0.51 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# or agreed to in writing, software, hardware and materials distributed under -# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. - -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - -# module axi4_ar_buffer -# #( -# parameter AXI_ID_WIDTH = 4, -# parameter AXI_USER_WIDTH = 4 -# ) -# ( -# input logic axi4_aclk, -# input logic axi4_arstn, -# -# input logic [AXI_ID_WIDTH-1:0] s_axi4_arid, -# input logic [31:0] s_axi4_araddr, -# input logic s_axi4_arvalid, -# output logic s_axi4_arready, -# input logic [7:0] s_axi4_arlen, -# input logic [2:0] s_axi4_arsize, -# input logic [1:0] s_axi4_arburst, -# input logic s_axi4_arlock, -# input logic [2:0] s_axi4_arprot, -# input logic [3:0] s_axi4_arcache, -# input logic [AXI_USER_WIDTH-1:0] s_axi4_aruser, -# -# output logic [AXI_ID_WIDTH-1:0] m_axi4_arid, -# output logic [31:0] m_axi4_araddr, -# output logic m_axi4_arvalid, -# input logic m_axi4_arready, -# output logic [7:0] m_axi4_arlen, -# output logic [2:0] m_axi4_arsize, -# output logic [1:0] m_axi4_arburst, -# output logic m_axi4_arlock, -# output logic [2:0] m_axi4_arprot, -# output logic [3:0] m_axi4_arcache, -# output logic [AXI_USER_WIDTH-1:0] m_axi4_aruser -# ); - - -class axi4_ar_buffer(Elaboratable): - - def __init__(self): - # self.axi4_aclk = Signal() # input - # self.axi4_arstn = Signal() # input - self.s_axi4_arid = Signal(AXI_ID_WIDTH) # input - self.s_axi4_araddr = Signal(32) # input - self.s_axi4_arvalid = Signal() # input - self.s_axi4_arready = Signal() # output - self.s_axi4_arlen = Signal(8) # input - self.s_axi4_arsize = Signal(3) # input - self.s_axi4_arburst = Signal(2) # input - self.s_axi4_arlock = Signal() # input - self.s_axi4_arprot = Signal(3) # input - self.s_axi4_arcache = Signal(4) # input - self.s_axi4_aruser = Signal(AXI_USER_WIDTH) # input - self.m_axi4_arid = Signal(AXI_ID_WIDTH) # output - self.m_axi4_araddr = Signal(32) # output - self.m_axi4_arvalid = Signal() # output - self.m_axi4_arready = Signal() # input - self.m_axi4_arlen = Signal(8) # output - self.m_axi4_arsize = Signal(3) # output - self.m_axi4_arburst = Signal(2) # output - self.m_axi4_arlock = Signal() # output - self.m_axi4_arprot = Signal(3) # output - self.m_axi4_arcache = Signal(4) # output - self.m_axi4_aruser = Signal(AXI_USER_WIDTH) # output - - def elaborate(self, platform=None): - m = Module() - # #TODO use record types here - # wire [AXI_ID_WIDTH+AXI_USER_WIDTH+52:0] data_in; - # wire [AXI_ID_WIDTH+AXI_USER_WIDTH+52:0] data_out; - - # assign data_in [3:0] = s_axi4_arcache; - # assign data_in [6:4] = s_axi4_arprot; - # assign data_in [7] = s_axi4_arlock; - # assign data_in [9:8] = s_axi4_arburst; - # assign data_in [12:10] = s_axi4_arsize; - # assign data_in [20:13] = s_axi4_arlen; - # assign data_in [52:21] = s_axi4_araddr; - # assign data_in [52+AXI_ID_WIDTH:53] = s_axi4_arid; - # assign data_in[52+AXI_ID_WIDTH+AXI_USER_WIDTH:53+AXI_ID_WIDTH] = s_axi4_aruser; - # - # assign m_axi4_arcache = data_out[3:0]; - # assign m_axi4_arprot = data_out[6:4]; - # assign m_axi4_arlock = data_out[7]; - # assign m_axi4_arburst = data_out[9:8]; - # assign m_axi4_arsize = data_out[12:10]; - # assign m_axi4_arlen = data_out[20:13]; - # assign m_axi4_araddr = data_out[52:21]; - # assign m_axi4_arid = data_out[52+AXI_ID_WIDTH:53]; - # assign m_axi4_aruser = data_out[52+AXI_ID_WIDTH+AXI_USER_WIDTH:53+AXI_ID_WIDTH]; - - # m.d.comb += self.m_axi4_arcache.eq(..) - # m.d.comb += self.m_axi4_arprot.eq(..) - # m.d.comb += self.m_axi4_arlock.eq(..) - # m.d.comb += self.m_axi4_arburst.eq(..) - # m.d.comb += self.m_axi4_arsize.eq(..) - # m.d.comb += self.m_axi4_arlen.eq(..) - # m.d.comb += self.m_axi4_araddr.eq(..) - # m.d.comb += self.m_axi4_arid.eq(..) - # m.d.comb += self.m_axi4_aruser.eq(..) - return m - -# TODO convert axi_buffer_rab.sv -# -# axi_buffer_rab -# #( -# .DATA_WIDTH ( AXI_ID_WIDTH+AXI_USER_WIDTH+53 ), -# .BUFFER_DEPTH ( 4 ) -# ) -# u_buffer -# ( -# .clk ( axi4_aclk ), -# .rstn ( axi4_arstn ), -# .valid_out ( m_axi4_arvalid ), -# .data_out ( data_out ), -# .ready_in ( m_axi4_arready ), -# .valid_in ( s_axi4_arvalid ), -# .data_in ( data_in ), -# .ready_out ( s_axi4_arready ) -# ); -# - -# endmodule diff --git a/src/soc/iommu/axi_rab/axi4_ar_sender.py b/src/soc/iommu/axi_rab/axi4_ar_sender.py deleted file mode 100644 index 4cbd97d5..00000000 --- a/src/soc/iommu/axi_rab/axi4_ar_sender.py +++ /dev/null @@ -1,232 +0,0 @@ -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class axi4_ar_sender(Elaboratable): - - def __init__(self): - self.axi4_aclk = Signal() # input - self.axi4_arstn = Signal() # input - self.l1_done_o = Signal() # output - self.l1_accept_i = Signal() # input - self.l1_drop_i = Signal() # input - self.l1_save_i = Signal() # input - self.l2_done_o = Signal() # output - self.l2_accept_i = Signal() # input - self.l2_drop_i = Signal() # input - self.l2_sending_o = Signal() # output - self.l1_araddr_i = Signal(AXI_ADDR_WIDTH) # input - self.l2_araddr_i = Signal(AXI_ADDR_WIDTH) # input - self.s_axi4_arid = Signal(AXI_ID_WIDTH) # input - self.s_axi4_arvalid = Signal() # input - self.s_axi4_arready = Signal() # output - self.s_axi4_arlen = Signal(8) # input - self.s_axi4_arsize = Signal(3) # input - self.s_axi4_arburst = Signal(2) # input - self.s_axi4_arlock = Signal() # input - self.s_axi4_arprot = Signal(3) # input - self.s_axi4_arcache = Signal(4) # input - self.s_axi4_aruser = Signal(AXI_USER_WIDTH) # input - self.m_axi4_arid = Signal(AXI_ID_WIDTH) # output - self.m_axi4_araddr = Signal(AXI_ADDR_WIDTH) # output - self.m_axi4_arvalid = Signal() # output - self.m_axi4_arready = Signal() # input - self.m_axi4_arlen = Signal(8) # output - self.m_axi4_arsize = Signal(3) # output - self.m_axi4_arburst = Signal(2) # output - self.m_axi4_arlock = Signal() # output - self.m_axi4_arprot = Signal(3) # output - self.m_axi4_arcache = Signal(4) # output - self.m_axi4_aruser = Signal(AXI_USER_WIDTH) # output - - def elaborate(self, platform=None): - m = Module() - m.d.comb += self.l1_save.eq(self.None) - m.d.comb += self.l1_done_o.eq(self.None) - m.d.comb += self.m_axi4_arvalid.eq(self.None) - m.d.comb += self.s_axi4_arready.eq(self.None) - m.d.comb += self.m_axi4_aruser.eq(self.None) - m.d.comb += self.m_axi4_arcache.eq(self.None) - m.d.comb += self.m_axi4_arprot.eq(self.None) - m.d.comb += self.m_axi4_arlock.eq(self.None) - m.d.comb += self.m_axi4_arburst.eq(self.None) - m.d.comb += self.m_axi4_arsize.eq(self.None) - m.d.comb += self.m_axi4_arlen.eq(self.None) - m.d.comb += self.m_axi4_araddr.eq(self.None) - m.d.comb += self.m_axi4_arid.eq(self.None) - m.d.comb += self.l2_sending_o.eq(self.None) - m.d.comb += self.l2_sent.eq(self.None) - m.d.comb += self.l2_done_o.eq(self.None) - m.d.comb += self.m_axi4_aruser.eq(self.s_axi4_aruser) - m.d.comb += self.m_axi4_arcache.eq(self.s_axi4_arcache) - m.d.comb += self.m_axi4_arprot.eq(self.s_axi4_arprot) - m.d.comb += self.m_axi4_arlock.eq(self.s_axi4_arlock) - m.d.comb += self.m_axi4_arburst.eq(self.s_axi4_arburst) - m.d.comb += self.m_axi4_arsize.eq(self.s_axi4_arsize) - m.d.comb += self.m_axi4_arlen.eq(self.s_axi4_arlen) - m.d.comb += self.m_axi4_araddr.eq(self.l1_araddr_i) - m.d.comb += self.m_axi4_arid.eq(self.s_axi4_arid) - m.d.comb += self.l2_sending_o.eq(self.1: 'b0) - m.d.comb += self.l2_available_q.eq(self.1: 'b0) - m.d.comb += self.l2_done_o.eq(self.1: 'b0) - return m - -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# module axi4_ar_sender -# #( -# parameter AXI_ADDR_WIDTH = 40, -# parameter AXI_ID_WIDTH = 4, -# parameter AXI_USER_WIDTH = 4, -# parameter ENABLE_L2TLB = 0 -# ) -# ( -# input logic axi4_aclk, -# input logic axi4_arstn, -# -# output logic l1_done_o, -# input logic l1_accept_i, -# input logic l1_drop_i, -# input logic l1_save_i, -# -# output logic l2_done_o, -# input logic l2_accept_i, -# input logic l2_drop_i, -# output logic l2_sending_o, -# -# input logic [AXI_ADDR_WIDTH-1:0] l1_araddr_i, -# input logic [AXI_ADDR_WIDTH-1:0] l2_araddr_i, -# -# input logic [AXI_ID_WIDTH-1:0] s_axi4_arid, -# input logic s_axi4_arvalid, -# output logic s_axi4_arready, -# input logic [7:0] s_axi4_arlen, -# input logic [2:0] s_axi4_arsize, -# input logic [1:0] s_axi4_arburst, -# input logic s_axi4_arlock, -# input logic [2:0] s_axi4_arprot, -# input logic [3:0] s_axi4_arcache, -# input logic [AXI_USER_WIDTH-1:0] s_axi4_aruser, -# -# output logic [AXI_ID_WIDTH-1:0] m_axi4_arid, -# output logic [AXI_ADDR_WIDTH-1:0] m_axi4_araddr, -# output logic m_axi4_arvalid, -# input logic m_axi4_arready, -# output logic [7:0] m_axi4_arlen, -# output logic [2:0] m_axi4_arsize, -# output logic [1:0] m_axi4_arburst, -# output logic m_axi4_arlock, -# output logic [2:0] m_axi4_arprot, -# output logic [3:0] m_axi4_arcache, -# output logic [AXI_USER_WIDTH-1:0] m_axi4_aruser -# ); -# -# logic l1_save; -# -# logic l2_sent; -# logic l2_available_q; -# -# assign l1_save = l1_save_i & l2_available_q; -# -# assign l1_done_o = s_axi4_arvalid & s_axi4_arready ; -# -# // if 1: accept and forward a transaction translated by L1 -# // 2: drop or save request (if L2 slot not occupied already) -# assign m_axi4_arvalid = (s_axi4_arvalid & l1_accept_i) | -# l2_sending_o; -# assign s_axi4_arready = (m_axi4_arvalid & m_axi4_arready & ~l2_sending_o) | -# (s_axi4_arvalid & (l1_drop_i | l1_save)); -# -# generate -# if (ENABLE_L2TLB == 1) begin -# logic [AXI_USER_WIDTH-1:0] l2_axi4_aruser ; -# logic [3:0] l2_axi4_arcache ; -# logic [3:0] l2_axi4_arregion; -# logic [3:0] l2_axi4_arqos ; -# logic [2:0] l2_axi4_arprot ; -# logic l2_axi4_arlock ; -# logic [1:0] l2_axi4_arburst ; -# logic [2:0] l2_axi4_arsize ; -# logic [7:0] l2_axi4_arlen ; -# logic [AXI_ID_WIDTH-1:0] l2_axi4_arid ; -# -# assign m_axi4_aruser = l2_sending_o ? l2_axi4_aruser : s_axi4_aruser; -# assign m_axi4_arcache = l2_sending_o ? l2_axi4_arcache : s_axi4_arcache; -# assign m_axi4_arprot = l2_sending_o ? l2_axi4_arprot : s_axi4_arprot; -# assign m_axi4_arlock = l2_sending_o ? l2_axi4_arlock : s_axi4_arlock; -# assign m_axi4_arburst = l2_sending_o ? l2_axi4_arburst : s_axi4_arburst; -# assign m_axi4_arsize = l2_sending_o ? l2_axi4_arsize : s_axi4_arsize; -# assign m_axi4_arlen = l2_sending_o ? l2_axi4_arlen : s_axi4_arlen; -# assign m_axi4_araddr = l2_sending_o ? l2_araddr_i : l1_araddr_i; -# assign m_axi4_arid = l2_sending_o ? l2_axi4_arid : s_axi4_arid; -# -# // Buffer AXI signals in case of L1 miss -# always @(posedge axi4_aclk or negedge axi4_arstn) begin -# if (axi4_arstn == 1'b0) begin -# l2_axi4_aruser <= 'b0; -# l2_axi4_arcache <= 'b0; -# l2_axi4_arprot <= 'b0; -# l2_axi4_arlock <= 1'b0; -# l2_axi4_arburst <= 'b0; -# l2_axi4_arsize <= 'b0; -# l2_axi4_arlen <= 'b0; -# l2_axi4_arid <= 'b0; -# end else if (l1_save) begin -# l2_axi4_aruser <= s_axi4_aruser; -# l2_axi4_arcache <= s_axi4_arcache; -# l2_axi4_arprot <= s_axi4_arprot; -# l2_axi4_arlock <= s_axi4_arlock; -# l2_axi4_arburst <= s_axi4_arburst; -# l2_axi4_arsize <= s_axi4_arsize; -# l2_axi4_arlen <= s_axi4_arlen; -# l2_axi4_arid <= s_axi4_arid; -# end -# end -# -# // signal that an l1_save_i can be accepted -# always @(posedge axi4_aclk or negedge axi4_arstn) begin -# if (axi4_arstn == 1'b0) begin -# l2_available_q <= 1'b1; -# end else if (l2_sent | l2_drop_i) begin -# l2_available_q <= 1'b1; -# end else if (l1_save) begin -# l2_available_q <= 1'b0; -# end -# end -# -# assign l2_sending_o = l2_accept_i & ~l2_available_q; -# assign l2_sent = l2_sending_o & m_axi4_arvalid & m_axi4_arready; -# -# // if 1: having sent out a transaction translated by L2 -# // 2: drop request (L2 slot is available again) -# assign l2_done_o = l2_sent | l2_drop_i; -# -# end else begin // !`ifdef ENABLE_L2TLB -# assign m_axi4_aruser = s_axi4_aruser; -# assign m_axi4_arcache = s_axi4_arcache; -# assign m_axi4_arprot = s_axi4_arprot; -# assign m_axi4_arlock = s_axi4_arlock; -# assign m_axi4_arburst = s_axi4_arburst; -# assign m_axi4_arsize = s_axi4_arsize; -# assign m_axi4_arlen = s_axi4_arlen; -# assign m_axi4_araddr = l1_araddr_i; -# assign m_axi4_arid = s_axi4_arid; -# -# assign l2_sending_o = 1'b0; -# assign l2_available_q = 1'b0; -# assign l2_done_o = 1'b0; -# end // else: !if(ENABLE_L2TLB == 1) -# endgenerate -# -# endmodule -# -# diff --git a/src/soc/iommu/axi_rab/axi4_aw_buffer.py b/src/soc/iommu/axi_rab/axi4_aw_buffer.py deleted file mode 100644 index f5ca37d1..00000000 --- a/src/soc/iommu/axi_rab/axi4_aw_buffer.py +++ /dev/null @@ -1,157 +0,0 @@ -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class axi4_aw_buffer(Elaboratable): - - def __init__(self): - self.axi4_aclk = Signal() # input - self.axi4_arstn = Signal() # input - self.s_axi4_awid = Signal(AXI_ID_WIDTH) # input - self.s_axi4_awaddr = Signal(32) # input - self.s_axi4_awvalid = Signal() # input - self.s_axi4_awready = Signal() # output - self.s_axi4_awlen = Signal(8) # input - self.s_axi4_awsize = Signal(3) # input - self.s_axi4_awburst = Signal(2) # input - self.s_axi4_awlock = Signal() # input - self.s_axi4_awprot = Signal(3) # input - self.s_axi4_awcache = Signal(4) # input - self.s_axi4_awregion = Signal(4) # input - self.s_axi4_awqos = Signal(4) # input - self.s_axi4_awuser = Signal(AXI_USER_WIDTH) # input - self.m_axi4_awid = Signal(AXI_ID_WIDTH) # output - self.m_axi4_awaddr = Signal(32) # output - self.m_axi4_awvalid = Signal() # output - self.m_axi4_awready = Signal() # input - self.m_axi4_awlen = Signal(8) # output - self.m_axi4_awsize = Signal(3) # output - self.m_axi4_awburst = Signal(2) # output - self.m_axi4_awlock = Signal() # output - self.m_axi4_awprot = Signal(3) # output - self.m_axi4_awcache = Signal(4) # output - self.m_axi4_awregion = Signal(4) # output - self.m_axi4_awqos = Signal(4) # output - self.m_axi4_awuser = Signal(AXI_USER_WIDTH) # output - - def elaborate(self, platform=None): - m = Module() - m.d.comb += self.None.eq(self.s_axi4_awcache) - m.d.comb += self.None.eq(self.s_axi4_awprot) - m.d.comb += self.None.eq(self.s_axi4_awlock) - m.d.comb += self.None.eq(self.s_axi4_awburst) - m.d.comb += self.None.eq(self.s_axi4_awsize) - m.d.comb += self.None.eq(self.s_axi4_awlen) - m.d.comb += self.None.eq(self.s_axi4_awaddr) - m.d.comb += self.None.eq(self.s_axi4_awregion) - m.d.comb += self.None.eq(self.s_axi4_awqos) - m.d.comb += self.None.eq(self.s_axi4_awid) - m.d.comb += self.None.eq(self.s_axi4_awuser) - m.d.comb += self.m_axi4_awcache.eq(self.None) - m.d.comb += self.m_axi4_awprot.eq(self.None) - m.d.comb += self.m_axi4_awlock.eq(self.None) - m.d.comb += self.m_axi4_awburst.eq(self.None) - m.d.comb += self.m_axi4_awsize.eq(self.None) - m.d.comb += self.m_axi4_awlen.eq(self.None) - m.d.comb += self.m_axi4_awaddr.eq(self.None) - m.d.comb += self.m_axi4_awregion.eq(self.None) - m.d.comb += self.m_axi4_awqos.eq(self.None) - m.d.comb += self.m_axi4_awid.eq(self.None) - m.d.comb += self.m_axi4_awuser.eq(self.None) - return m - -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# module axi4_aw_buffer -# #( -# parameter AXI_ID_WIDTH = 4, -# parameter AXI_USER_WIDTH = 4 -# ) -# ( -# input logic axi4_aclk, -# input logic axi4_arstn, -# -# input logic [AXI_ID_WIDTH-1:0] s_axi4_awid, -# input logic [31:0] s_axi4_awaddr, -# input logic s_axi4_awvalid, -# output logic s_axi4_awready, -# input logic [7:0] s_axi4_awlen, -# input logic [2:0] s_axi4_awsize, -# input logic [1:0] s_axi4_awburst, -# input logic s_axi4_awlock, -# input logic [2:0] s_axi4_awprot, -# input logic [3:0] s_axi4_awcache, -# input logic [3:0] s_axi4_awregion, -# input logic [3:0] s_axi4_awqos, -# input logic [AXI_USER_WIDTH-1:0] s_axi4_awuser, -# -# output logic [AXI_ID_WIDTH-1:0] m_axi4_awid, -# output logic [31:0] m_axi4_awaddr, -# output logic m_axi4_awvalid, -# input logic m_axi4_awready, -# output logic [7:0] m_axi4_awlen, -# output logic [2:0] m_axi4_awsize, -# output logic [1:0] m_axi4_awburst, -# output logic m_axi4_awlock, -# output logic [2:0] m_axi4_awprot, -# output logic [3:0] m_axi4_awcache, -# output logic [3:0] m_axi4_awregion, -# output logic [3:0] m_axi4_awqos, -# output logic [AXI_USER_WIDTH-1:0] m_axi4_awuser -# ); -# -# wire [AXI_USER_WIDTH+AXI_ID_WIDTH+60:0] data_in; -# wire [AXI_USER_WIDTH+AXI_ID_WIDTH+60:0] data_out; -# -# assign data_in [3:0] = s_axi4_awcache; -# assign data_in [6:4] = s_axi4_awprot; -# assign data_in [7] = s_axi4_awlock; -# assign data_in [9:8] = s_axi4_awburst; -# assign data_in [12:10] = s_axi4_awsize; -# assign data_in [20:13] = s_axi4_awlen; -# assign data_in [52:21] = s_axi4_awaddr; -# assign data_in [56:53] = s_axi4_awregion; -# assign data_in [60:57] = s_axi4_awqos; -# assign data_in [60+AXI_ID_WIDTH:61] = s_axi4_awid; -# assign data_in [60+AXI_ID_WIDTH+AXI_USER_WIDTH:61+AXI_ID_WIDTH] = s_axi4_awuser; -# -# assign m_axi4_awcache = data_out[3:0]; -# assign m_axi4_awprot = data_out[6:4]; -# assign m_axi4_awlock = data_out[7]; -# assign m_axi4_awburst = data_out[9:8]; -# assign m_axi4_awsize = data_out[12:10]; -# assign m_axi4_awlen = data_out[20:13]; -# assign m_axi4_awaddr = data_out[52:21]; -# assign m_axi4_awregion = data_out[56:53]; -# assign m_axi4_awqos = data_out[60:57]; -# assign m_axi4_awid = data_out[60+AXI_ID_WIDTH:61]; -# assign m_axi4_awuser = data_out[60+AXI_ID_WIDTH+AXI_USER_WIDTH:61+AXI_ID_WIDTH]; -# -# axi_buffer_rab -# #( -# .DATA_WIDTH ( AXI_ID_WIDTH+AXI_USER_WIDTH+61 ), -# .BUFFER_DEPTH ( 4 ) -# ) -# u_buffer -# ( -# .clk ( axi4_aclk ), -# .rstn ( axi4_arstn ), -# .valid_out ( m_axi4_awvalid ), -# .data_out ( data_out ), -# .ready_in ( m_axi4_awready ), -# .valid_in ( s_axi4_awvalid ), -# .data_in ( data_in ), -# .ready_out ( s_axi4_awready ) -# ); -# endmodule -# -# diff --git a/src/soc/iommu/axi_rab/axi4_aw_sender.py b/src/soc/iommu/axi_rab/axi4_aw_sender.py deleted file mode 100644 index fbc917df..00000000 --- a/src/soc/iommu/axi_rab/axi4_aw_sender.py +++ /dev/null @@ -1,252 +0,0 @@ -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class axi4_aw_sender(Elaboratable): - - def __init__(self): - self.axi4_aclk = Signal() # input - self.axi4_arstn = Signal() # input - self.l1_done_o = Signal() # output - self.l1_accept_i = Signal() # input - self.l1_drop_i = Signal() # input - self.l1_save_i = Signal() # input - self.l2_done_o = Signal() # output - self.l2_accept_i = Signal() # input - self.l2_drop_i = Signal() # input - self.l2_sending_o = Signal() # output - self.l1_awaddr_i = Signal(AXI_ADDR_WIDTH) # input - self.l2_awaddr_i = Signal(AXI_ADDR_WIDTH) # input - self.s_axi4_awid = Signal(AXI_ID_WIDTH) # input - self.s_axi4_awvalid = Signal() # input - self.s_axi4_awready = Signal() # output - self.s_axi4_awlen = Signal(8) # input - self.s_axi4_awsize = Signal(3) # input - self.s_axi4_awburst = Signal(2) # input - self.s_axi4_awlock = Signal() # input - self.s_axi4_awprot = Signal(3) # input - self.s_axi4_awcache = Signal(4) # input - self.s_axi4_awregion = Signal(4) # input - self.s_axi4_awqos = Signal(4) # input - self.s_axi4_awuser = Signal(AXI_USER_WIDTH) # input - self.m_axi4_awid = Signal(AXI_ID_WIDTH) # output - self.m_axi4_awaddr = Signal(AXI_ADDR_WIDTH) # output - self.m_axi4_awvalid = Signal() # output - self.m_axi4_awready = Signal() # input - self.m_axi4_awlen = Signal(8) # output - self.m_axi4_awsize = Signal(3) # output - self.m_axi4_awburst = Signal(2) # output - self.m_axi4_awlock = Signal() # output - self.m_axi4_awprot = Signal(3) # output - self.m_axi4_awcache = Signal(4) # output - self.m_axi4_awregion = Signal(4) # output - self.m_axi4_awqos = Signal(4) # output - self.m_axi4_awuser = Signal(AXI_USER_WIDTH) # output - - def elaborate(self, platform=None): - m = Module() - m.d.comb += self.l1_save.eq(self.None) - m.d.comb += self.l1_done_o.eq(self.None) - m.d.comb += self.m_axi4_awvalid.eq(self.None) - m.d.comb += self.s_axi4_awready.eq(self.None) - m.d.comb += self.m_axi4_awuser.eq(self.None) - m.d.comb += self.m_axi4_awcache.eq(self.None) - m.d.comb += self.m_axi4_awregion.eq(self.None) - m.d.comb += self.m_axi4_awqos.eq(self.None) - m.d.comb += self.m_axi4_awprot.eq(self.None) - m.d.comb += self.m_axi4_awlock.eq(self.None) - m.d.comb += self.m_axi4_awburst.eq(self.None) - m.d.comb += self.m_axi4_awsize.eq(self.None) - m.d.comb += self.m_axi4_awlen.eq(self.None) - m.d.comb += self.m_axi4_awaddr.eq(self.None) - m.d.comb += self.m_axi4_awid.eq(self.None) - m.d.comb += self.l2_sending_o.eq(self.None) - m.d.comb += self.l2_sent.eq(self.None) - m.d.comb += self.l2_done_o.eq(self.None) - m.d.comb += self.m_axi4_awuser.eq(self.s_axi4_awuser) - m.d.comb += self.m_axi4_awcache.eq(self.s_axi4_awcache) - m.d.comb += self.m_axi4_awregion.eq(self.s_axi4_awregion) - m.d.comb += self.m_axi4_awqos.eq(self.s_axi4_awqos) - m.d.comb += self.m_axi4_awprot.eq(self.s_axi4_awprot) - m.d.comb += self.m_axi4_awlock.eq(self.s_axi4_awlock) - m.d.comb += self.m_axi4_awburst.eq(self.s_axi4_awburst) - m.d.comb += self.m_axi4_awsize.eq(self.s_axi4_awsize) - m.d.comb += self.m_axi4_awlen.eq(self.s_axi4_awlen) - m.d.comb += self.m_axi4_awaddr.eq(self.l1_awaddr_i) - m.d.comb += self.m_axi4_awid.eq(self.s_axi4_awid) - m.d.comb += self.l2_sending_o.eq(self.1: 'b0) - m.d.comb += self.l2_available_q.eq(self.1: 'b0) - m.d.comb += self.l2_done_o.eq(self.1: 'b0) - return m - -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# module axi4_aw_sender -# #( -# parameter AXI_ADDR_WIDTH = 40, -# parameter AXI_ID_WIDTH = 4, -# parameter AXI_USER_WIDTH = 4, -# parameter ENABLE_L2TLB = 0 -# ) -# ( -# input logic axi4_aclk, -# input logic axi4_arstn, -# -# output logic l1_done_o, -# input logic l1_accept_i, -# input logic l1_drop_i, -# input logic l1_save_i, -# -# output logic l2_done_o, -# input logic l2_accept_i, -# input logic l2_drop_i, -# output logic l2_sending_o, -# -# input logic [AXI_ADDR_WIDTH-1:0] l1_awaddr_i, -# input logic [AXI_ADDR_WIDTH-1:0] l2_awaddr_i, -# -# input logic [AXI_ID_WIDTH-1:0] s_axi4_awid, -# input logic s_axi4_awvalid, -# output logic s_axi4_awready, -# input logic [7:0] s_axi4_awlen, -# input logic [2:0] s_axi4_awsize, -# input logic [1:0] s_axi4_awburst, -# input logic s_axi4_awlock, -# input logic [2:0] s_axi4_awprot, -# input logic [3:0] s_axi4_awcache, -# input logic [3:0] s_axi4_awregion, -# input logic [3:0] s_axi4_awqos, -# input logic [AXI_USER_WIDTH-1:0] s_axi4_awuser, -# -# output logic [AXI_ID_WIDTH-1:0] m_axi4_awid, -# output logic [AXI_ADDR_WIDTH-1:0] m_axi4_awaddr, -# output logic m_axi4_awvalid, -# input logic m_axi4_awready, -# output logic [7:0] m_axi4_awlen, -# output logic [2:0] m_axi4_awsize, -# output logic [1:0] m_axi4_awburst, -# output logic m_axi4_awlock, -# output logic [2:0] m_axi4_awprot, -# output logic [3:0] m_axi4_awcache, -# output logic [3:0] m_axi4_awregion, -# output logic [3:0] m_axi4_awqos, -# output logic [AXI_USER_WIDTH-1:0] m_axi4_awuser -# ); -# -# logic l1_save; -# -# logic l2_sent; -# logic l2_available_q; -# -# assign l1_save = l1_save_i & l2_available_q; -# -# assign l1_done_o = s_axi4_awvalid & s_axi4_awready ; -# -# // if 1: accept and forward a transaction translated by L1 -# // 2: drop or save request (if L2 slot not occupied already) -# assign m_axi4_awvalid = (s_axi4_awvalid & l1_accept_i) | -# l2_sending_o; -# assign s_axi4_awready = (m_axi4_awvalid & m_axi4_awready & ~l2_sending_o) | -# (s_axi4_awvalid & (l1_drop_i | l1_save)); -# -# generate -# if (ENABLE_L2TLB == 1) begin -# logic [AXI_USER_WIDTH-1:0] l2_axi4_awuser ; -# logic [3:0] l2_axi4_awcache ; -# logic [3:0] l2_axi4_awregion; -# logic [3:0] l2_axi4_awqos ; -# logic [2:0] l2_axi4_awprot ; -# logic l2_axi4_awlock ; -# logic [1:0] l2_axi4_awburst ; -# logic [2:0] l2_axi4_awsize ; -# logic [7:0] l2_axi4_awlen ; -# logic [AXI_ID_WIDTH-1:0] l2_axi4_awid ; -# -# assign m_axi4_awuser = l2_sending_o ? l2_axi4_awuser : s_axi4_awuser; -# assign m_axi4_awcache = l2_sending_o ? l2_axi4_awcache : s_axi4_awcache; -# assign m_axi4_awregion = l2_sending_o ? l2_axi4_awregion : s_axi4_awregion; -# assign m_axi4_awqos = l2_sending_o ? l2_axi4_awqos : s_axi4_awqos; -# assign m_axi4_awprot = l2_sending_o ? l2_axi4_awprot : s_axi4_awprot; -# assign m_axi4_awlock = l2_sending_o ? l2_axi4_awlock : s_axi4_awlock; -# assign m_axi4_awburst = l2_sending_o ? l2_axi4_awburst : s_axi4_awburst; -# assign m_axi4_awsize = l2_sending_o ? l2_axi4_awsize : s_axi4_awsize; -# assign m_axi4_awlen = l2_sending_o ? l2_axi4_awlen : s_axi4_awlen; -# assign m_axi4_awaddr = l2_sending_o ? l2_awaddr_i : l1_awaddr_i; -# assign m_axi4_awid = l2_sending_o ? l2_axi4_awid : s_axi4_awid; -# -# // buffer AXI signals in case of L1 miss -# always @(posedge axi4_aclk or negedge axi4_arstn) begin -# if (axi4_arstn == 1'b0) begin -# l2_axi4_awuser <= 'b0; -# l2_axi4_awcache <= 'b0; -# l2_axi4_awregion <= 'b0; -# l2_axi4_awqos <= 'b0; -# l2_axi4_awprot <= 'b0; -# l2_axi4_awlock <= 1'b0; -# l2_axi4_awburst <= 'b0; -# l2_axi4_awsize <= 'b0; -# l2_axi4_awlen <= 'b0; -# l2_axi4_awid <= 'b0; -# end else if (l1_save) begin -# l2_axi4_awuser <= s_axi4_awuser; -# l2_axi4_awcache <= s_axi4_awcache; -# l2_axi4_awregion <= s_axi4_awregion; -# l2_axi4_awqos <= s_axi4_awqos; -# l2_axi4_awprot <= s_axi4_awprot; -# l2_axi4_awlock <= s_axi4_awlock; -# l2_axi4_awburst <= s_axi4_awburst; -# l2_axi4_awsize <= s_axi4_awsize; -# l2_axi4_awlen <= s_axi4_awlen; -# l2_axi4_awid <= s_axi4_awid; -# end -# end -# -# // signal that an l1_save_i can be accepted -# always @(posedge axi4_aclk or negedge axi4_arstn) begin -# if (axi4_arstn == 1'b0) begin -# l2_available_q <= 1'b1; -# end else if (l2_sent | l2_drop_i) begin -# l2_available_q <= 1'b1; -# end else if (l1_save) begin -# l2_available_q <= 1'b0; -# end -# end -# -# assign l2_sending_o = l2_accept_i & ~l2_available_q; -# assign l2_sent = l2_sending_o & m_axi4_awvalid & m_axi4_awready; -# -# // if 1: having sent out a transaction translated by L2 -# // 2: drop request (L2 slot is available again) -# assign l2_done_o = l2_sent | l2_drop_i; -# -# end else begin // !`ifdef ENABLE_L2TLB -# assign m_axi4_awuser = s_axi4_awuser; -# assign m_axi4_awcache = s_axi4_awcache; -# assign m_axi4_awregion = s_axi4_awregion; -# assign m_axi4_awqos = s_axi4_awqos; -# assign m_axi4_awprot = s_axi4_awprot; -# assign m_axi4_awlock = s_axi4_awlock; -# assign m_axi4_awburst = s_axi4_awburst; -# assign m_axi4_awsize = s_axi4_awsize; -# assign m_axi4_awlen = s_axi4_awlen; -# assign m_axi4_awaddr = l1_awaddr_i; -# assign m_axi4_awid = s_axi4_awid; -# -# assign l2_sending_o = 1'b0; -# assign l2_available_q = 1'b0; -# assign l2_done_o = 1'b0; -# end // !`ifdef ENABLE_L2TLB -# endgenerate -# -# endmodule -# -# diff --git a/src/soc/iommu/axi_rab/axi4_b_buffer.py b/src/soc/iommu/axi_rab/axi4_b_buffer.py deleted file mode 100644 index 42fce1ad..00000000 --- a/src/soc/iommu/axi_rab/axi4_b_buffer.py +++ /dev/null @@ -1,94 +0,0 @@ -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class axi4_b_buffer(Elaboratable): - - def __init__(self): - self.axi4_aclk = Signal() # input - self.axi4_arstn = Signal() # input - self.s_axi4_bid = Signal(AXI_ID_WIDTH) # output - self.s_axi4_bresp = Signal(2) # output - self.s_axi4_bvalid = Signal() # output - self.s_axi4_buser = Signal(AXI_USER_WIDTH) # output - self.s_axi4_bready = Signal() # input - self.m_axi4_bid = Signal(AXI_ID_WIDTH) # input - self.m_axi4_bresp = Signal(2) # input - self.m_axi4_bvalid = Signal() # input - self.m_axi4_buser = Signal(AXI_USER_WIDTH) # input - self.m_axi4_bready = Signal() # output - - def elaborate(self, platform=None): - m = Module() - m.d.comb += self.None.eq(self.m_axi4_bresp) - m.d.comb += self.None.eq(self.m_axi4_bid) - m.d.comb += self.None.eq(self.m_axi4_buser) - m.d.comb += self.s_axi4_buser.eq(self.None) - m.d.comb += self.s_axi4_bid.eq(self.None) - m.d.comb += self.s_axi4_bresp.eq(self.None) - return m - -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# module axi4_b_buffer -# #( -# parameter AXI_ID_WIDTH = 4, -# parameter AXI_USER_WIDTH = 4 -# ) -# ( -# input logic axi4_aclk, -# input logic axi4_arstn, -# -# output logic [AXI_ID_WIDTH-1:0] s_axi4_bid, -# output logic [1:0] s_axi4_bresp, -# output logic s_axi4_bvalid, -# output logic [AXI_USER_WIDTH-1:0] s_axi4_buser, -# input logic s_axi4_bready, -# -# input logic [AXI_ID_WIDTH-1:0] m_axi4_bid, -# input logic [1:0] m_axi4_bresp, -# input logic m_axi4_bvalid, -# input logic [AXI_USER_WIDTH-1:0] m_axi4_buser, -# output logic m_axi4_bready -# ); -# -# wire [AXI_ID_WIDTH+AXI_USER_WIDTH+1:0] data_in; -# wire [AXI_ID_WIDTH+AXI_USER_WIDTH+1:0] data_out; -# -# assign data_in [1:0] = m_axi4_bresp; -# assign data_in [AXI_ID_WIDTH+1:2] = m_axi4_bid; -# assign data_in[AXI_ID_WIDTH+AXI_USER_WIDTH+1:AXI_ID_WIDTH+2] = m_axi4_buser; -# -# assign s_axi4_buser = data_out[AXI_ID_WIDTH+AXI_USER_WIDTH+1:AXI_ID_WIDTH+2]; -# assign s_axi4_bid = data_out[AXI_ID_WIDTH+1:2]; -# assign s_axi4_bresp = data_out[1:0]; -# -# axi_buffer_rab -# #( -# .DATA_WIDTH ( AXI_ID_WIDTH+AXI_USER_WIDTH+2 ), -# .BUFFER_DEPTH ( 4 ) -# ) -# u_buffer -# ( -# .clk ( axi4_aclk ), -# .rstn ( axi4_arstn ), -# .valid_out( s_axi4_bvalid ), -# .data_out ( data_out ), -# .ready_in ( s_axi4_bready ), -# .valid_in ( m_axi4_bvalid ), -# .data_in ( data_in ), -# .ready_out( m_axi4_bready ) -# ); -# -# endmodule -# -# diff --git a/src/soc/iommu/axi_rab/axi4_b_sender.py b/src/soc/iommu/axi_rab/axi4_b_sender.py deleted file mode 100644 index 1c61a2a5..00000000 --- a/src/soc/iommu/axi_rab/axi4_b_sender.py +++ /dev/null @@ -1,136 +0,0 @@ -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class axi4_b_sender(Elaboratable): - - def __init__(self): - self.axi4_aclk = Signal() # input - self.axi4_arstn = Signal() # input - self.drop_i = Signal() # input - self.done_o = Signal() # output - self.id_i = Signal(AXI_ID_WIDTH) # input - self.prefetch_i = Signal() # input - self.hit_i = Signal() # input - self.s_axi4_bid = Signal(AXI_ID_WIDTH) # output - self.s_axi4_bresp = Signal(2) # output - self.s_axi4_bvalid = Signal() # output - self.s_axi4_buser = Signal(AXI_USER_WIDTH) # output - self.s_axi4_bready = Signal() # input - self.m_axi4_bid = Signal(AXI_ID_WIDTH) # input - self.m_axi4_bresp = Signal(2) # input - self.m_axi4_bvalid = Signal() # input - self.m_axi4_buser = Signal(AXI_USER_WIDTH) # input - self.m_axi4_bready = Signal() # output - - def elaborate(self, platform=None): - m = Module() - m.d.comb += self.fifo_push.eq(self.None) - m.d.comb += self.done_o.eq(self.fifo_push) - m.d.comb += self.fifo_pop.eq(self.None) - m.d.comb += self.s_axi4_buser.eq(self.None) - m.d.comb += self.s_axi4_bid.eq(self.None) - m.d.comb += self.s_axi4_bresp.eq(self.None) - m.d.comb += self.s_axi4_bvalid.eq(self.None) - m.d.comb += self.m_axi4_bready.eq(self.None) - return m - -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# module axi4_b_sender -# #( -# parameter AXI_ID_WIDTH = 10, -# parameter AXI_USER_WIDTH = 4 -# ) -# ( -# input logic axi4_aclk, -# input logic axi4_arstn, -# -# input logic drop_i, -# output logic done_o, -# input logic [AXI_ID_WIDTH-1:0] id_i, -# input logic prefetch_i, -# input logic hit_i, -# -# output logic [AXI_ID_WIDTH-1:0] s_axi4_bid, -# output logic [1:0] s_axi4_bresp, -# output logic s_axi4_bvalid, -# output logic [AXI_USER_WIDTH-1:0] s_axi4_buser, -# input logic s_axi4_bready, -# -# input logic [AXI_ID_WIDTH-1:0] m_axi4_bid, -# input logic [1:0] m_axi4_bresp, -# input logic m_axi4_bvalid, -# input logic [AXI_USER_WIDTH-1:0] m_axi4_buser, -# output logic m_axi4_bready -# ); -# -# logic fifo_valid; -# logic fifo_pop; -# logic fifo_push; -# logic fifo_ready; -# logic [AXI_ID_WIDTH-1:0] id; -# logic prefetch; -# logic hit; -# -# logic dropping; -# -# axi_buffer_rab -# #( -# .DATA_WIDTH ( 2+AXI_ID_WIDTH ), -# .BUFFER_DEPTH ( 4 ) -# ) -# u_fifo -# ( -# .clk ( axi4_aclk ), -# .rstn ( axi4_arstn ), -# // Pop -# .data_out ( {prefetch, hit, id} ), -# .valid_out ( fifo_valid ), -# .ready_in ( fifo_pop ), -# // Push -# .valid_in ( fifo_push ), -# .data_in ( {prefetch_i, hit_i, id_i} ), -# .ready_out ( fifo_ready ) -# ); -# -# assign fifo_push = drop_i & fifo_ready; -# assign done_o = fifo_push; -# -# assign fifo_pop = dropping & s_axi4_bready; -# -# always @ (posedge axi4_aclk or negedge axi4_arstn) begin -# if (axi4_arstn == 1'b0) begin -# dropping <= 1'b0; -# end else begin -# if (fifo_valid && ~dropping) -# dropping <= 1'b1; -# else if (fifo_pop) -# dropping <= 1'b0; -# end -# end -# -# assign s_axi4_buser = dropping ? {AXI_USER_WIDTH{1'b0}} : m_axi4_buser; -# assign s_axi4_bid = dropping ? id : m_axi4_bid; -# -# assign s_axi4_bresp = (dropping & prefetch & hit) ? 2'b00 : // prefetch hit, mutli, prot -# (dropping & prefetch ) ? 2'b10 : // prefetch miss -# (dropping & hit) ? 2'b10 : // non-prefetch multi, prot -# (dropping ) ? 2'b10 : // non-prefetch miss -# m_axi4_bresp; -# -# assign s_axi4_bvalid = dropping | m_axi4_bvalid; -# assign m_axi4_bready = ~dropping & s_axi4_bready; -# -# endmodule -# -# diff --git a/src/soc/iommu/axi_rab/axi4_r_buffer.py b/src/soc/iommu/axi_rab/axi4_r_buffer.py deleted file mode 100644 index 91bdf0a5..00000000 --- a/src/soc/iommu/axi_rab/axi4_r_buffer.py +++ /dev/null @@ -1,120 +0,0 @@ -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class axi4_r_buffer(Elaboratable): - - def __init__(self): - self.axi4_aclk = Signal() # input - self.axi4_arstn = Signal() # input - self.s_axi4_rid = Signal(AXI_ID_WIDTH) # output - self.s_axi4_rresp = Signal(2) # output - self.s_axi4_rdata = Signal(AXI_DATA_WIDTH) # output - self.s_axi4_rlast = Signal() # output - self.s_axi4_rvalid = Signal() # output - self.s_axi4_ruser = Signal(AXI_USER_WIDTH) # output - self.s_axi4_rready = Signal() # input - self.m_axi4_rid = Signal(AXI_ID_WIDTH) # input - self.m_axi4_rresp = Signal(2) # input - self.m_axi4_rdata = Signal(AXI_DATA_WIDTH) # input - self.m_axi4_rlast = Signal() # input - self.m_axi4_rvalid = Signal() # input - self.m_axi4_ruser = Signal(AXI_USER_WIDTH) # input - self.m_axi4_rready = Signal() # output - - def elaborate(self, platform=None): - m = Module() - m.d.comb += self.None.eq(self.m_axi4_rresp) - m.d.comb += self.None.eq(self.m_axi4_rlast) - m.d.comb += self.None.eq(self.m_axi4_rid) - m.d.comb += self.None.eq(self.m_axi4_rdata) - m.d.comb += self.None.eq(self.m_axi4_ruser) - m.d.comb += self.s_axi4_rresp.eq(self.None) - m.d.comb += self.s_axi4_rlast.eq(self.None) - m.d.comb += self.s_axi4_rid.eq(self.None) - m.d.comb += self.s_axi4_rdata.eq(self.None) - m.d.comb += self.s_axi4_ruser.eq(self.None) - return m - -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# module axi4_r_buffer -# #( -# parameter AXI_DATA_WIDTH = 32, -# parameter AXI_ID_WIDTH = 4, -# parameter AXI_USER_WIDTH = 4 -# ) -# ( -# input logic axi4_aclk, -# input logic axi4_arstn, -# -# output logic [AXI_ID_WIDTH-1:0] s_axi4_rid, -# output logic [1:0] s_axi4_rresp, -# output logic [AXI_DATA_WIDTH-1:0] s_axi4_rdata, -# output logic s_axi4_rlast, -# output logic s_axi4_rvalid, -# output logic [AXI_USER_WIDTH-1:0] s_axi4_ruser, -# input logic s_axi4_rready, -# -# input logic [AXI_ID_WIDTH-1:0] m_axi4_rid, -# input logic [1:0] m_axi4_rresp, -# input logic [AXI_DATA_WIDTH-1:0] m_axi4_rdata, -# input logic m_axi4_rlast, -# input logic m_axi4_rvalid, -# input logic [AXI_USER_WIDTH-1:0] m_axi4_ruser, -# output logic m_axi4_rready -# ); -# -# wire [AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3-1:0] data_in; -# wire [AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3-1:0] data_out; -# -# localparam ID_START = 3; -# localparam ID_END = AXI_ID_WIDTH-1 + ID_START; -# localparam DATA_START = ID_END + 1; -# localparam DATA_END = AXI_DATA_WIDTH-1 + DATA_START; -# localparam USER_START = DATA_END + 1; -# localparam USER_END = AXI_USER_WIDTH-1 + USER_START; -# -# assign data_in [1:0] = m_axi4_rresp; -# assign data_in [2] = m_axi4_rlast; -# assign data_in [ID_END:ID_START] = m_axi4_rid; -# assign data_in[DATA_END:DATA_START] = m_axi4_rdata; -# assign data_in[USER_END:USER_START] = m_axi4_ruser; -# -# assign s_axi4_rresp = data_out [1:0]; -# assign s_axi4_rlast = data_out [2]; -# assign s_axi4_rid = data_out [ID_END:ID_START]; -# assign s_axi4_rdata = data_out[DATA_END:DATA_START]; -# assign s_axi4_ruser = data_out[USER_END:USER_START]; -# -# axi_buffer_rab -# #( -# .DATA_WIDTH ( AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3 ), -# .BUFFER_DEPTH ( 4 ) -# ) -# u_buffer -# ( -# .clk ( axi4_aclk ), -# .rstn ( axi4_arstn ), -# // Pop -# .valid_out ( s_axi4_rvalid ), -# .data_out ( data_out ), -# .ready_in ( s_axi4_rready ), -# // Push -# .valid_in ( m_axi4_rvalid ), -# .data_in ( data_in ), -# .ready_out ( m_axi4_rready ) -# ); -# -# endmodule -# -# diff --git a/src/soc/iommu/axi_rab/axi4_r_sender.py b/src/soc/iommu/axi_rab/axi4_r_sender.py deleted file mode 100644 index d4e22bb2..00000000 --- a/src/soc/iommu/axi_rab/axi4_r_sender.py +++ /dev/null @@ -1,206 +0,0 @@ -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class axi4_r_sender(Elaboratable): - - def __init__(self): - self.axi4_aclk = Signal() # input - self.axi4_arstn = Signal() # input - self.drop_i = Signal() # input - self.drop_len_i = Signal(8) # input - self.done_o = Signal() # output - self.id_i = Signal(AXI_ID_WIDTH) # input - self.prefetch_i = Signal() # input - self.hit_i = Signal() # input - self.s_axi4_rid = Signal(AXI_ID_WIDTH) # output - self.s_axi4_rresp = Signal(2) # output - self.s_axi4_rdata = Signal(AXI_DATA_WIDTH) # output - self.s_axi4_rlast = Signal() # output - self.s_axi4_rvalid = Signal() # output - self.s_axi4_ruser = Signal(AXI_USER_WIDTH) # output - self.s_axi4_rready = Signal() # input - self.m_axi4_rid = Signal(AXI_ID_WIDTH) # input - self.m_axi4_rresp = Signal(2) # input - self.m_axi4_rdata = Signal(AXI_DATA_WIDTH) # input - self.m_axi4_rlast = Signal() # input - self.m_axi4_rvalid = Signal() # input - self.m_axi4_ruser = Signal(AXI_USER_WIDTH) # input - self.m_axi4_rready = Signal() # output - - def elaborate(self, platform=None): - m = Module() - m.d.comb += self.fifo_push.eq(self.None) - m.d.comb += self.done_o.eq(self.fifo_push) - m.d.comb += self.s_axi4_rdata.eq(self.m_axi4_rdata) - m.d.comb += self.s_axi4_ruser.eq(self.None) - m.d.comb += self.s_axi4_rid.eq(self.None) - m.d.comb += self.s_axi4_rresp.eq(self.None) - m.d.comb += self.s_axi4_rvalid.eq(self.None) - m.d.comb += self.m_axi4_rready.eq(self.None) - return m - -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# //import CfMath::log2; -# -# module axi4_r_sender -# #( -# parameter AXI_DATA_WIDTH = 32, -# parameter AXI_ID_WIDTH = 4, -# parameter AXI_USER_WIDTH = 4 -# ) -# ( -# input logic axi4_aclk, -# input logic axi4_arstn, -# -# input logic drop_i, -# input logic [7:0] drop_len_i, -# output logic done_o, -# input logic [AXI_ID_WIDTH-1:0] id_i, -# input logic prefetch_i, -# input logic hit_i, -# -# output logic [AXI_ID_WIDTH-1:0] s_axi4_rid, -# output logic [1:0] s_axi4_rresp, -# output logic [AXI_DATA_WIDTH-1:0] s_axi4_rdata, -# output logic s_axi4_rlast, -# output logic s_axi4_rvalid, -# output logic [AXI_USER_WIDTH-1:0] s_axi4_ruser, -# input logic s_axi4_rready, -# -# input logic [AXI_ID_WIDTH-1:0] m_axi4_rid, -# input logic [1:0] m_axi4_rresp, -# input logic [AXI_DATA_WIDTH-1:0] m_axi4_rdata, -# input logic m_axi4_rlast, -# input logic m_axi4_rvalid, -# input logic [AXI_USER_WIDTH-1:0] m_axi4_ruser, -# output logic m_axi4_rready -# ); -# -# localparam BUFFER_DEPTH = 16; -# -# logic fifo_valid; -# logic fifo_pop; -# logic fifo_push; -# logic fifo_ready; -# logic [AXI_ID_WIDTH-1:0] id; -# logic [7:0] len; -# logic prefetch; -# logic hit; -# -# logic dropping; -# -# enum logic [1:0] { FORWARDING, DROPPING } -# state_d, state_q; -# logic burst_ongoing_d, burst_ongoing_q; -# logic [7:0] drop_cnt_d, drop_cnt_q; -# -# axi_buffer_rab -# #( -# .DATA_WIDTH ( 2+AXI_ID_WIDTH+8 ), -# .BUFFER_DEPTH ( BUFFER_DEPTH ) -# ) -# u_fifo -# ( -# .clk ( axi4_aclk ), -# .rstn ( axi4_arstn ), -# // Pop -# .data_out ( {prefetch, hit, id, len} ), -# .valid_out ( fifo_valid ), -# .ready_in ( fifo_pop ), -# // Push -# .valid_in ( fifo_push ), -# .data_in ( {prefetch_i, hit_i, id_i, drop_len_i} ), -# .ready_out ( fifo_ready ) -# ); -# -# assign fifo_push = drop_i & fifo_ready; -# assign done_o = fifo_push; -# -# always_comb begin -# burst_ongoing_d = burst_ongoing_q; -# drop_cnt_d = drop_cnt_q; -# dropping = 1'b0; -# s_axi4_rlast = 1'b0; -# fifo_pop = 1'b0; -# state_d = state_q; -# -# case (state_q) -# FORWARDING: begin -# s_axi4_rlast = m_axi4_rlast; -# // Remember whether there is currently a burst ongoing. -# if (m_axi4_rvalid && m_axi4_rready) begin -# if (m_axi4_rlast) begin -# burst_ongoing_d = 1'b0; -# end else begin -# burst_ongoing_d = 1'b1; -# end -# end -# // If there is no burst ongoing and the FIFO has a drop request ready, process it. -# if (!burst_ongoing_d && fifo_valid) begin -# drop_cnt_d = len; -# state_d = DROPPING; -# end -# end -# -# DROPPING: begin -# dropping = 1'b1; -# s_axi4_rlast = (drop_cnt_q == '0); -# // Handshake on slave interface -# if (s_axi4_rready) begin -# drop_cnt_d -= 1; -# if (drop_cnt_q == '0) begin -# drop_cnt_d = '0; -# fifo_pop = 1'b1; -# state_d = FORWARDING; -# end -# end -# end -# -# default: begin -# state_d = FORWARDING; -# end -# endcase -# end -# -# assign s_axi4_rdata = m_axi4_rdata; -# -# assign s_axi4_ruser = dropping ? {AXI_USER_WIDTH{1'b0}} : m_axi4_ruser; -# assign s_axi4_rid = dropping ? id : m_axi4_rid; -# -# assign s_axi4_rresp = (dropping & prefetch & hit) ? 2'b00 : // prefetch hit, mutli, prot -# (dropping & prefetch ) ? 2'b10 : // prefetch miss -# (dropping & hit) ? 2'b10 : // non-prefetch multi, prot -# (dropping ) ? 2'b10 : // non-prefetch miss -# m_axi4_rresp; -# -# assign s_axi4_rvalid = dropping | m_axi4_rvalid; -# assign m_axi4_rready = ~dropping & s_axi4_rready; -# -# always_ff @(posedge axi4_aclk, negedge axi4_arstn) begin -# if (axi4_arstn == 1'b0) begin -# burst_ongoing_q <= 1'b0; -# drop_cnt_q <= 'b0; -# state_q <= FORWARDING; -# end else begin -# burst_ongoing_q <= burst_ongoing_d; -# drop_cnt_q <= drop_cnt_d; -# state_q <= state_d; -# end -# end -# -# endmodule -# -# -# -# diff --git a/src/soc/iommu/axi_rab/axi4_w_buffer.py b/src/soc/iommu/axi_rab/axi4_w_buffer.py deleted file mode 100644 index aa06dc22..00000000 --- a/src/soc/iommu/axi_rab/axi4_w_buffer.py +++ /dev/null @@ -1,777 +0,0 @@ -# this file has been generated by sv2nmigen -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class axi4_w_buffer(Elaboratable): - - def __init__(self): - self.axi4_aclk = Signal() # input - self.axi4_arstn = Signal() # input - self.l1_done_o = Signal() # output - self.l1_accept_i = Signal() # input - self.l1_save_i = Signal() # input - self.l1_drop_i = Signal() # input - self.l1_master_i = Signal() # input - self.l1_id_i = Signal(AXI_ID_WIDTH) # input - self.l1_len_i = Signal(8) # input - self.l1_prefetch_i = Signal() # input - self.l1_hit_i = Signal() # input - self.l2_done_o = Signal() # output - self.l2_accept_i = Signal() # input - self.l2_drop_i = Signal() # input - self.l2_master_i = Signal() # input - self.l2_id_i = Signal(AXI_ID_WIDTH) # input - self.l2_len_i = Signal(8) # input - self.l2_prefetch_i = Signal() # input - self.l2_hit_i = Signal() # input - self.master_select_o = Signal() # output - self.input_stall_o = Signal() # output - self.output_stall_o = Signal() # output - self.b_drop_o = Signal() # output - self.b_done_i = Signal() # input - self.id_o = Signal(AXI_ID_WIDTH) # output - self.prefetch_o = Signal() # output - self.hit_o = Signal() # output - self.s_axi4_wdata = Signal(AXI_DATA_WIDTH) # input - self.s_axi4_wvalid = Signal() # input - self.s_axi4_wready = Signal() # output - self.s_axi4_wstrb = Signal(1+ERROR p_expression_25) # input - self.s_axi4_wlast = Signal() # input - self.s_axi4_wuser = Signal(AXI_USER_WIDTH) # input - self.m_axi4_wdata = Signal(AXI_DATA_WIDTH) # output - self.m_axi4_wvalid = Signal() # output - self.m_axi4_wready = Signal() # input - self.m_axi4_wstrb = Signal(1+ERROR p_expression_25) # output - self.m_axi4_wlast = Signal() # output - self.m_axi4_wuser = Signal(AXI_USER_WIDTH) # output - - def elaborate(self, platform=None): - m = Module() - return m - - -# -# //import CfMath::log2; -# -# module axi4_w_buffer -# #( -# parameter AXI_DATA_WIDTH = 32, -# parameter AXI_ID_WIDTH = 4, -# parameter AXI_USER_WIDTH = 4, -# parameter ENABLE_L2TLB = 0, -# parameter HUM_BUFFER_DEPTH = 16 -# ) -# ( -# input logic axi4_aclk, -# input logic axi4_arstn, -# -# // L1 & L2 interfaces -# output logic l1_done_o, -# input logic l1_accept_i, -# input logic l1_save_i, -# input logic l1_drop_i, -# input logic l1_master_i, -# input logic [AXI_ID_WIDTH-1:0] l1_id_i, -# input logic [7:0] l1_len_i, -# input logic l1_prefetch_i, -# input logic l1_hit_i, -# -# output logic l2_done_o, -# input logic l2_accept_i, -# input logic l2_drop_i, -# input logic l2_master_i, -# input logic [AXI_ID_WIDTH-1:0] l2_id_i, -# input logic [7:0] l2_len_i, -# input logic l2_prefetch_i, -# input logic l2_hit_i, -# -# output logic master_select_o, -# output logic input_stall_o, -# output logic output_stall_o, -# -# // B sender interface -# output logic b_drop_o, -# input logic b_done_i, -# output logic [AXI_ID_WIDTH-1:0] id_o, -# output logic prefetch_o, -# output logic hit_o, -# -# // AXI W channel interfaces -# input logic [AXI_DATA_WIDTH-1:0] s_axi4_wdata, -# input logic s_axi4_wvalid, -# output logic s_axi4_wready, -# input logic [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb, -# input logic s_axi4_wlast, -# input logic [AXI_USER_WIDTH-1:0] s_axi4_wuser, -# -# output logic [AXI_DATA_WIDTH-1:0] m_axi4_wdata, -# output logic m_axi4_wvalid, -# input logic m_axi4_wready, -# output logic [AXI_DATA_WIDTH/8-1:0] m_axi4_wstrb, -# output logic m_axi4_wlast, -# output logic [AXI_USER_WIDTH-1:0] m_axi4_wuser -# ); -# -""" - - localparam BUFFER_WIDTH = AXI_DATA_WIDTH+AXI_USER_WIDTH+AXI_DATA_WIDTH/8+1; - - localparam INPUT_BUFFER_DEPTH = 4; - localparam L1_FIFO_DEPTH = 8; - localparam L2_FIFO_DEPTH = 4; - - logic [AXI_DATA_WIDTH-1:0] axi4_wdata; - logic axi4_wvalid; - logic axi4_wready; - logic [AXI_DATA_WIDTH/8-1:0] axi4_wstrb; - logic axi4_wlast; - logic [AXI_USER_WIDTH-1:0] axi4_wuser; - - logic l1_fifo_valid_out; - logic l1_fifo_ready_in; - logic l1_fifo_valid_in; - logic l1_fifo_ready_out; - - logic l1_req; - logic l1_accept_cur, l1_save_cur, l1_drop_cur; - logic l1_master_cur; - logic [AXI_ID_WIDTH-1:0] l1_id_cur; - logic [7:0] l1_len_cur; - logic l1_hit_cur, l1_prefetch_cur; - logic l1_save_in, l1_save_out; - logic [log2(L1_FIFO_DEPTH)-1:0] n_l1_save_SP; - - logic l2_fifo_valid_out; - logic l2_fifo_ready_in; - logic l2_fifo_valid_in; - logic l2_fifo_ready_out; - - logic l2_req; - logic l2_accept_cur, l2_drop_cur; - logic l2_master_cur; - logic [AXI_ID_WIDTH-1:0] l2_id_cur; - logic [7:0] l2_len_cur; - logic l2_hit_cur, l2_prefetch_cur; - - logic fifo_select, fifo_select_SN, fifo_select_SP; - logic w_done; - logic b_drop_set; - - // HUM buffer signals - logic hum_buf_ready_out; - logic hum_buf_valid_in; - logic hum_buf_ready_in; - logic hum_buf_valid_out; - logic hum_buf_underfull; - - logic [AXI_DATA_WIDTH-1:0] hum_buf_wdata; - logic [AXI_DATA_WIDTH/8-1:0] hum_buf_wstrb; - logic hum_buf_wlast; - logic [AXI_USER_WIDTH-1:0] hum_buf_wuser; - - logic hum_buf_drop_req_SN, hum_buf_drop_req_SP; - logic [7:0] hum_buf_drop_len_SN, hum_buf_drop_len_SP; - logic hum_buf_almost_full; - - logic stop_store; - logic wlast_in, wlast_out; - logic signed [3:0] n_wlast_SN, n_wlast_SP; - logic block_forwarding; - - // Search FSM - typedef enum logic [3:0] {STORE, BYPASS, - WAIT_L1_BYPASS_YES, WAIT_L2_BYPASS_YES, - WAIT_L1_BYPASS_NO, WAIT_L2_BYPASS_NO, - FLUSH, DISCARD, - DISCARD_FINISH} - hum_buf_state_t; - hum_buf_state_t hum_buf_SP; // Present state - hum_buf_state_tbg hum_buf_SN; // Next State - - axi_buffer_rab - #( - .DATA_WIDTH ( BUFFER_WIDTH ), - .BUFFER_DEPTH ( INPUT_BUFFER_DEPTH ) - ) - u_input_buf - ( - .clk ( axi4_aclk ), - .rstn ( axi4_arstn ), - // Push - .data_in ( {s_axi4_wuser, s_axi4_wstrb, s_axi4_wdata, s_axi4_wlast} ), - .valid_in ( s_axi4_wvalid ), - .ready_out ( s_axi4_wready ), - // Pop - .data_out ( {axi4_wuser, axi4_wstrb, axi4_wdata, axi4_wlast} ), - .valid_out ( axi4_wvalid ), - .ready_in ( axi4_wready ) - ); - - axi_buffer_rab - #( - .DATA_WIDTH ( 2+AXI_ID_WIDTH+8+4 ), - .BUFFER_DEPTH ( L1_FIFO_DEPTH ) - ) - u_l1_fifo - ( - .clk ( axi4_aclk ), - .rstn ( axi4_arstn ), - // Push - .data_in ( {l1_prefetch_i, l1_hit_i, l1_id_i, l1_len_i, l1_master_i, l1_accept_i, l1_save_i, l1_drop_i} ), - .valid_in ( l1_fifo_valid_in ), - .ready_out ( l1_fifo_ready_out ), - // Pop - .data_out ( {l1_prefetch_cur, l1_hit_cur, l1_id_cur, l1_len_cur, l1_master_cur, l1_accept_cur, l1_save_cur, l1_drop_cur} ), - .valid_out ( l1_fifo_valid_out ), - .ready_in ( l1_fifo_ready_in ) - ); - - // Push upon receiving new requests from the TLB. - assign l1_req = l1_accept_i | l1_save_i | l1_drop_i; - assign l1_fifo_valid_in = l1_req & l1_fifo_ready_out; - - // Signal handshake - assign l1_done_o = l1_fifo_valid_in; - assign l2_done_o = l2_fifo_valid_in; - - // Stall AW input of L1 TLB - assign input_stall_o = ~(l1_fifo_ready_out & l2_fifo_ready_out); - - // Interface b_drop signals + handshake - always_comb begin - if (fifo_select == 1'b0) begin - prefetch_o = l1_prefetch_cur; - hit_o = l1_hit_cur; - id_o = l1_id_cur; - - l1_fifo_ready_in = w_done | b_done_i; - l2_fifo_ready_in = 1'b0; - end else begin - prefetch_o = l2_prefetch_cur; - hit_o = l2_hit_cur; - id_o = l2_id_cur; - - l1_fifo_ready_in = 1'b0; - l2_fifo_ready_in = w_done | b_done_i; - end - end - - // Detect when an L1 transaction save request enters or exits the L1 FIFO. - assign l1_save_in = l1_fifo_valid_in & l1_save_i; - assign l1_save_out = l1_fifo_ready_in & l1_save_cur; - - // Count the number of L1 transaction to save in the L1 FIFO. - always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin - if (axi4_arstn == 0) begin - n_l1_save_SP <= '0; - end else if (l1_save_in ^ l1_save_out) begin - if (l1_save_in) begin - n_l1_save_SP <= n_l1_save_SP + 1'b1; - end else if (l1_save_out) begin - n_l1_save_SP <= n_l1_save_SP - 1'b1; - end - end - end - - // Stall forwarding of AW L1 hits if: - // 1. The HUM buffer does not allow to be bypassed. - // 2. There are multiple L1 save requests in the FIFO, i.e., multiple L2 outputs pending. - assign output_stall_o = (n_l1_save_SP > 1) || (block_forwarding == 1'b1); - - generate - if (ENABLE_L2TLB == 1) begin : HUM_BUFFER - - axi_buffer_rab_bram - #( - .DATA_WIDTH ( BUFFER_WIDTH ), - .BUFFER_DEPTH ( HUM_BUFFER_DEPTH ) - ) - u_hum_buf - ( - .clk ( axi4_aclk ), - .rstn ( axi4_arstn ), - // Push - .data_in ( {axi4_wuser, axi4_wstrb, axi4_wdata, axi4_wlast} ), - .valid_in ( hum_buf_valid_in ), - .ready_out ( hum_buf_ready_out ), - // Pop - .data_out ( {hum_buf_wuser, hum_buf_wstrb, hum_buf_wdata, hum_buf_wlast} ), - .valid_out ( hum_buf_valid_out ), - .ready_in ( hum_buf_ready_in ), - // Clear - .almost_full ( hum_buf_almost_full ), - .underfull ( hum_buf_underfull ), - .drop_req ( hum_buf_drop_req_SP ), - .drop_len ( hum_buf_drop_len_SP ) - ); - - axi_buffer_rab - #( - .DATA_WIDTH ( 2+AXI_ID_WIDTH+8+3 ), - .BUFFER_DEPTH ( L2_FIFO_DEPTH ) - ) - u_l2_fifo - ( - .clk ( axi4_aclk ), - .rstn ( axi4_arstn ), - // Push - .data_in ( {l2_prefetch_i, l2_hit_i, l2_id_i, l2_len_i, l2_master_i, l2_accept_i, l2_drop_i} ), - .valid_in ( l2_fifo_valid_in ), - .ready_out ( l2_fifo_ready_out ), - // Pop - .data_out ( {l2_prefetch_cur, l2_hit_cur, l2_id_cur, l2_len_cur, l2_master_cur, l2_accept_cur, l2_drop_cur} ), - .valid_out ( l2_fifo_valid_out ), - .ready_in ( l2_fifo_ready_in ) - ); - - // Push upon receiving new result from TLB. - assign l2_req = l2_accept_i | l2_drop_i; - assign l2_fifo_valid_in = l2_req & l2_fifo_ready_out; - - assign wlast_in = axi4_wlast & hum_buf_valid_in & hum_buf_ready_out; - assign wlast_out = hum_buf_wlast & hum_buf_valid_out & hum_buf_ready_in; - - always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin - if (axi4_arstn == 0) begin - fifo_select_SP <= 1'b0; - hum_buf_drop_len_SP <= 'b0; - hum_buf_drop_req_SP <= 1'b0; - hum_buf_SP <= STORE; - n_wlast_SP <= 'b0; - end else begin - fifo_select_SP <= fifo_select_SN; - hum_buf_drop_len_SP <= hum_buf_drop_len_SN; - hum_buf_drop_req_SP <= hum_buf_drop_req_SN; - hum_buf_SP <= hum_buf_SN; - n_wlast_SP <= n_wlast_SN; - end - end - - always_comb begin - n_wlast_SN = n_wlast_SP; - if (hum_buf_drop_req_SP) begin // Happens exactly once per burst to be dropped. - n_wlast_SN -= 1; - end - if (wlast_in) begin - n_wlast_SN += 1; - end - if (wlast_out) begin - n_wlast_SN -= 1; - end - end - - always_comb begin : HUM_BUFFER_FSM - hum_buf_SN = hum_buf_SP; - - m_axi4_wlast = 1'b0; - m_axi4_wdata = 'b0; - m_axi4_wstrb = 'b0; - m_axi4_wuser = 'b0; - - m_axi4_wvalid = 1'b0; - axi4_wready = 1'b0; - - hum_buf_valid_in = 1'b0; - hum_buf_ready_in = 1'b0; - - hum_buf_drop_req_SN = hum_buf_drop_req_SP; - hum_buf_drop_len_SN = hum_buf_drop_len_SP; - master_select_o = 1'b0; - - w_done = 1'b0; // read from FIFO without handshake with B sender - b_drop_o = 1'b0; // send data from FIFO to B sender (with handshake) - fifo_select = 1'b0; - - fifo_select_SN = fifo_select_SP; - stop_store = 1'b0; - - block_forwarding = 1'b0; - - unique case (hum_buf_SP) - - STORE : begin - // Simply store the data in the buffer. - hum_buf_valid_in = axi4_wvalid & hum_buf_ready_out; - axi4_wready = hum_buf_ready_out; - - // We have got a full burst in the HUM buffer, thus stop storing. - if (wlast_in & !hum_buf_underfull | (n_wlast_SP > $signed(0))) begin - hum_buf_SN = WAIT_L1_BYPASS_YES; - - // The buffer is full, thus wait for decision. - end else if (~hum_buf_ready_out) begin - hum_buf_SN = WAIT_L1_BYPASS_NO; - end - - // Avoid the forwarding of L1 hits until we know whether we can bypass. - if (l1_fifo_valid_out & l1_save_cur) begin - block_forwarding = 1'b1; - end - end - - WAIT_L1_BYPASS_YES : begin - // Wait for orders from L1 TLB. - if (l1_fifo_valid_out) begin - - // L1 hit - forward data from buffer - if (l1_accept_cur) begin - m_axi4_wlast = hum_buf_wlast; - m_axi4_wdata = hum_buf_wdata; - m_axi4_wstrb = hum_buf_wstrb; - m_axi4_wuser = hum_buf_wuser; - - m_axi4_wvalid = hum_buf_valid_out; - hum_buf_ready_in = m_axi4_wready; - - master_select_o = l1_master_cur; - - // Detect last data beat. - if (wlast_out) begin - fifo_select = 1'b0; - w_done = 1'b1; - hum_buf_SN = STORE; - end - - // L1 miss - wait for L2 - end else if (l1_save_cur) begin - fifo_select = 1'b0; - w_done = 1'b1; - hum_buf_SN = WAIT_L2_BYPASS_YES; - - // L1 prefetch, prot, multi - drop data - end else if (l1_drop_cur) begin - fifo_select_SN = 1'b0; // L1 - hum_buf_drop_req_SN = 1'b1; - hum_buf_drop_len_SN = l1_len_cur; - hum_buf_SN = FLUSH; - end - end - end - - WAIT_L2_BYPASS_YES : begin - // Wait for orders from L2 TLB. - if (l2_fifo_valid_out) begin - - // L2 hit - forward data from buffer - if (l2_accept_cur) begin - m_axi4_wlast = hum_buf_wlast; - m_axi4_wdata = hum_buf_wdata; - m_axi4_wstrb = hum_buf_wstrb; - m_axi4_wuser = hum_buf_wuser; - - m_axi4_wvalid = hum_buf_valid_out; - hum_buf_ready_in = m_axi4_wready; - - master_select_o = l2_master_cur; - - // Detect last data beat. - if (wlast_out) begin - fifo_select = 1'b1; - w_done = 1'b1; - hum_buf_SN = STORE; - end - - // L2 miss/prefetch hit - end else if (l2_drop_cur) begin - fifo_select_SN = 1'b1; // L2 - hum_buf_drop_req_SN = 1'b1; - hum_buf_drop_len_SN = l2_len_cur; - hum_buf_SN = FLUSH; - end - - // While we wait for orders from L2 TLB, we can still drop and accept L1 transactions. - end else if (l1_fifo_valid_out) begin - - // L1 hit - if (l1_accept_cur) begin - hum_buf_SN = BYPASS; - - // L1 prefetch/prot/multi - end else if (l1_drop_cur) begin - hum_buf_SN = DISCARD; - end - end - end - - FLUSH : begin - // Clear HUM buffer flush request. - hum_buf_drop_req_SN = 1'b0; - - // perform handshake with B sender - fifo_select = fifo_select_SP; - b_drop_o = 1'b1; - if (b_done_i) begin - hum_buf_SN = STORE; - end - end - - BYPASS : begin - // Forward one full transaction from input buffer. - m_axi4_wlast = axi4_wlast; - m_axi4_wdata = axi4_wdata; - m_axi4_wstrb = axi4_wstrb; - m_axi4_wuser = axi4_wuser; - - m_axi4_wvalid = axi4_wvalid; - axi4_wready = m_axi4_wready; - - master_select_o = l1_master_cur; - - // We have got a full transaction. - if (axi4_wlast & axi4_wready & axi4_wvalid) begin - fifo_select = 1'b0; - w_done = 1'b1; - hum_buf_SN = WAIT_L2_BYPASS_YES; - end - end - - DISCARD : begin - // Discard one full transaction from input buffer. - axi4_wready = 1'b1; - - // We have got a full transaction. - if (axi4_wlast & axi4_wready & axi4_wvalid) begin - // Try to perform handshake with B sender. - fifo_select = 1'b0; - b_drop_o = 1'b1; - // We cannot wait here due to axi4_wready. - if (b_done_i) begin - hum_buf_SN = WAIT_L2_BYPASS_YES; - end else begin - hum_buf_SN = DISCARD_FINISH; - end - end - end - - DISCARD_FINISH : begin - // Perform handshake with B sender. - fifo_select = 1'b0; - b_drop_o = 1'b1; - if (b_done_i) begin - hum_buf_SN = WAIT_L2_BYPASS_YES; - end - end - - WAIT_L1_BYPASS_NO : begin - // Do not allow the forwarding of L1 hits. - block_forwarding = 1'b1; - - // Wait for orders from L1 TLB. - if (l1_fifo_valid_out) begin - - // L1 hit - forward data from/through HUM buffer and refill the buffer - if (l1_accept_cur) begin - // Forward data from HUM buffer. - m_axi4_wlast = hum_buf_wlast; - m_axi4_wdata = hum_buf_wdata; - m_axi4_wstrb = hum_buf_wstrb; - m_axi4_wuser = hum_buf_wuser; - - m_axi4_wvalid = hum_buf_valid_out; - hum_buf_ready_in = m_axi4_wready; - - master_select_o = l1_master_cur; - - // Refill the HUM buffer. Stop when buffer full. - stop_store = ~hum_buf_ready_out; - hum_buf_valid_in = stop_store ? 1'b0 : axi4_wvalid ; - axi4_wready = stop_store ? 1'b0 : hum_buf_ready_out; - - // Detect last data beat. - if (wlast_out) begin - fifo_select = 1'b0; - w_done = 1'b1; - if (~hum_buf_ready_out | hum_buf_almost_full) begin - hum_buf_SN = WAIT_L1_BYPASS_NO; - end else begin - hum_buf_SN = STORE; - end - end - - // Allow the forwarding of L1 hits. - block_forwarding = 1'b0; - - // L1 miss - wait for L2 - end else if (l1_save_cur) begin - fifo_select = 1'b0; - w_done = 1'b1; - hum_buf_SN = WAIT_L2_BYPASS_NO; - - // L1 prefetch, prot, multi - drop data - end else if (l1_drop_cur) begin - fifo_select_SN = 1'b0; // L1 - hum_buf_drop_req_SN = 1'b1; - hum_buf_drop_len_SN = l1_len_cur; - hum_buf_SN = FLUSH; - - // Allow the forwarding of L1 hits. - block_forwarding = 1'b0; - end - end - end - - WAIT_L2_BYPASS_NO : begin - // Do not allow the forwarding of L1 hits. - block_forwarding = 1'b1; - - // Wait for orders from L2 TLB. - if (l2_fifo_valid_out) begin - - // L2 hit - forward first part from HUM buffer, rest from input buffer - if (l2_accept_cur) begin - // Forward data from HUM buffer. - m_axi4_wlast = hum_buf_wlast; - m_axi4_wdata = hum_buf_wdata; - m_axi4_wstrb = hum_buf_wstrb; - m_axi4_wuser = hum_buf_wuser; - - m_axi4_wvalid = hum_buf_valid_out; - hum_buf_ready_in = m_axi4_wready; - - master_select_o = l2_master_cur; - - // Refill the HUM buffer. Stop when buffer full. - stop_store = ~hum_buf_ready_out; - hum_buf_valid_in = stop_store ? 1'b0 : axi4_wvalid ; - axi4_wready = stop_store ? 1'b0 : hum_buf_ready_out; - - // Detect last data beat. - if (wlast_out) begin - fifo_select = 1'b1; - w_done = 1'b1; - if (~hum_buf_ready_out | hum_buf_almost_full) begin - hum_buf_SN = WAIT_L1_BYPASS_NO; - end else begin - hum_buf_SN = STORE; - end - end - - // Allow the forwarding of L1 hits. - block_forwarding = 1'b0; - - // L2 miss/prefetch hit - drop data - end else if (l2_drop_cur) begin - fifo_select_SN = 1'b1; // L2 - hum_buf_drop_req_SN = 1'b1; - hum_buf_drop_len_SN = l2_len_cur; - hum_buf_SN = FLUSH; - - // Allow the forwarding of L1 hits. - block_forwarding = 1'b0; - end - end - end - - - default: begin - hum_buf_SN = STORE; - end - - endcase // hum_buf_SP - end // HUM_BUFFER_FSM - - assign b_drop_set = 1'b0; - - end else begin // HUM_BUFFER - - // register to perform the handshake with B sender - always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin - if (axi4_arstn == 0) begin - b_drop_o <= 1'b0; - end else if (b_done_i) begin - b_drop_o <= 1'b0; - end else if (b_drop_set) begin - b_drop_o <= 1'b1;; - end - end - - always_comb begin : OUTPUT_CTRL - - fifo_select = 1'b0; - w_done = 1'b0; - b_drop_set = 1'b0; - - m_axi4_wlast = 1'b0; - m_axi4_wdata = 'b0; - m_axi4_wstrb = 'b0; - m_axi4_wuser = 'b0; - - m_axi4_wvalid = 1'b0; - axi4_wready = 1'b0; - - if (l1_fifo_valid_out) begin - // forward data - if (l1_accept_cur) begin - m_axi4_wlast = axi4_wlast; - m_axi4_wdata = axi4_wdata; - m_axi4_wstrb = axi4_wstrb; - m_axi4_wuser = axi4_wuser; - - m_axi4_wvalid = axi4_wvalid; - axi4_wready = m_axi4_wready; - - // Simply pop from FIFO upon last data beat. - w_done = axi4_wlast & axi4_wvalid & axi4_wready; - - // discard entire burst - end else if (b_drop_o == 1'b0) begin - axi4_wready = 1'b1; - - // Simply pop from FIFO upon last data beat. Perform handshake with B sender. - if (axi4_wlast & axi4_wvalid & axi4_wready) - b_drop_set = 1'b1; - end - end - - end // OUTPUT_CTRL - - assign master_select_o = l1_master_cur; - assign l2_fifo_ready_out = 1'b1; - assign block_forwarding = 1'b0; - - // unused signals - assign hum_buf_ready_out = 1'b0; - assign hum_buf_valid_in = 1'b0; - assign hum_buf_ready_in = 1'b0; - assign hum_buf_valid_out = 1'b0; - assign hum_buf_wdata = 'b0; - assign hum_buf_wstrb = 'b0; - assign hum_buf_wlast = 1'b0; - assign hum_buf_wuser = 'b0; - assign hum_buf_drop_len_SN = 'b0; - assign hum_buf_drop_req_SN = 1'b0; - assign hum_buf_almost_full = 1'b0; - - assign l2_fifo_valid_in = 1'b0; - assign l2_fifo_valid_out = 1'b0; - assign l2_prefetch_cur = 1'b0; - assign l2_hit_cur = 1'b0; - assign l2_id_cur = 'b0; - assign l2_len_cur = 'b0; - assign l2_master_cur = 1'b0; - assign l2_accept_cur = 1'b0; - assign l2_drop_cur = 1'b0; - - assign l2_req = 1'b0; - - assign fifo_select_SN = 1'b0; - assign fifo_select_SP = 1'b0; - - assign stop_store = 1'b0; - assign n_wlast_SP = 'b0; - assign wlast_in = 1'b0; - assign wlast_out = 1'b0; - - end // HUM_BUFFER - - endgenerate -""" diff --git a/src/soc/iommu/axi_rab/axi4_w_sender.py b/src/soc/iommu/axi_rab/axi4_w_sender.py deleted file mode 100644 index 9916334f..00000000 --- a/src/soc/iommu/axi_rab/axi4_w_sender.py +++ /dev/null @@ -1,78 +0,0 @@ -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class axi4_w_sender(Elaboratable): - - def __init__(self): - self.axi4_aclk = Signal() # input - self.axi4_arstn = Signal() # input - self.s_axi4_wdata = Signal() # input - self.s_axi4_wvalid = Signal() # input - self.s_axi4_wready = Signal() # output - self.s_axi4_wstrb = Signal() # input - self.s_axi4_wlast = Signal() # input - self.s_axi4_wuser = Signal() # input - self.m_axi4_wdata = Signal() # output - self.m_axi4_wvalid = Signal() # output - self.m_axi4_wready = Signal() # input - self.m_axi4_wstrb = Signal() # output - self.m_axi4_wlast = Signal() # output - self.m_axi4_wuser = Signal() # output - - def elaborate(self, platform=None): - m = Module() - m.d.comb += self.m_axi4_wdata.eq(self.s_axi4_wdata) - m.d.comb += self.m_axi4_wstrb.eq(self.s_axi4_wstrb) - m.d.comb += self.m_axi4_wlast.eq(self.s_axi4_wlast) - m.d.comb += self.m_axi4_wuser.eq(self.s_axi4_wuser) - m.d.comb += self.m_axi4_wvalid.eq(self.s_axi4_wvalid) - m.d.comb += self.s_axi4_wready.eq(self.m_axi4_wready) - return m - -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# module axi4_w_sender -# #( -# parameter AXI_DATA_WIDTH = 32, -# parameter AXI_USER_WIDTH = 2 -# ) -# ( -# input axi4_aclk, -# input axi4_arstn, -# -# input [AXI_DATA_WIDTH-1:0] s_axi4_wdata, -# input s_axi4_wvalid, -# output s_axi4_wready, -# input [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb, -# input s_axi4_wlast, -# input [AXI_USER_WIDTH-1:0] s_axi4_wuser, -# -# output [AXI_DATA_WIDTH-1:0] m_axi4_wdata, -# output m_axi4_wvalid, -# input m_axi4_wready, -# output [AXI_DATA_WIDTH/8-1:0] m_axi4_wstrb, -# output m_axi4_wlast, -# output [AXI_USER_WIDTH-1:0] m_axi4_wuser -# ); -# -# assign m_axi4_wdata = s_axi4_wdata; -# assign m_axi4_wstrb = s_axi4_wstrb; -# assign m_axi4_wlast = s_axi4_wlast; -# assign m_axi4_wuser = s_axi4_wuser; -# -# assign m_axi4_wvalid = s_axi4_wvalid; -# assign s_axi4_wready = m_axi4_wready; -# -# endmodule -# -# diff --git a/src/soc/iommu/axi_rab/axi_buffer_rab.py b/src/soc/iommu/axi_rab/axi_buffer_rab.py deleted file mode 100644 index b4d99299..00000000 --- a/src/soc/iommu/axi_rab/axi_buffer_rab.py +++ /dev/null @@ -1,151 +0,0 @@ -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class axi_buffer_rab(Elaboratable): - - def __init__(self): - self.clk = Signal() # input - self.rstn = Signal() # input - self.data_out = Signal(DATA_WIDTH) # output - self.valid_out = Signal() # output - self.ready_in = Signal() # input - self.valid_in = Signal() # input - self.data_in = Signal(DATA_WIDTH) # input - self.ready_out = Signal() # output - - def elaborate(self, platform=None): - m = Module() - m.d.comb += self.full.eq(self.None) - m.d.comb += self.data_out.eq(self.None) - m.d.comb += self.valid_out.eq(self.None) - m.d.comb += self.ready_out.eq(self.None) - return m - -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# //import CfMath::log2; -# -# module axi_buffer_rab -# //#( -# // parameter DATA_WIDTH, -# // parameter BUFFER_DEPTH -# //) -# ( -# input logic clk, -# input logic rstn, -# -# // Downstream port -# output logic [DATA_WIDTH-1:0] data_out, -# output logic valid_out, -# input logic ready_in, -# -# // Upstream port -# input logic valid_in, -# input logic [DATA_WIDTH-1:0] data_in, -# output logic ready_out -# ); -# -# localparam integer LOG_BUFFER_DEPTH = log2(BUFFER_DEPTH); -# -# // Internal data structures -# reg [LOG_BUFFER_DEPTH - 1 : 0] pointer_in; // location to which we last wrote -# reg [LOG_BUFFER_DEPTH - 1 : 0] pointer_out; // location from which we last sent -# reg [LOG_BUFFER_DEPTH : 0] elements; // number of elements in the buffer -# reg [DATA_WIDTH - 1 : 0] buffer [BUFFER_DEPTH - 1 : 0]; -# -# wire full; -# -# integer loop1; -# -# assign full = (elements == BUFFER_DEPTH); -# -# always @(posedge clk or negedge rstn) -# begin: elements_sequential -# if (rstn == 1'b0) -# elements <= 0; -# else -# begin -# // ------------------ -# // Are we filling up? -# // ------------------ -# // One out, none in -# if (ready_in && valid_out && (!valid_in || full)) -# elements <= elements - 1; -# // None out, one in -# else if ((!valid_out || !ready_in) && valid_in && !full) -# elements <= elements + 1; -# // Else, either one out and one in, or none out and none in - stays unchanged -# end -# end -# -# always @(posedge clk or negedge rstn) -# begin: buffers_sequential -# if (rstn == 1'b0) -# begin -# for (loop1 = 0 ; loop1 < BUFFER_DEPTH ; loop1 = loop1 + 1) -# buffer[loop1] <= 0; -# end -# else -# begin -# // Update the memory -# if (valid_in && !full) -# buffer[pointer_in] <= data_in; -# end -# end -# -# always @(posedge clk or negedge rstn) -# begin: sequential -# if (rstn == 1'b0) -# begin -# pointer_out <= 0; -# pointer_in <= 0; -# end -# else -# begin -# // ------------------------------------ -# // Check what to do with the input side -# // ------------------------------------ -# // We have some input, increase by 1 the input pointer -# if (valid_in && !full) -# begin -# if (pointer_in == $unsigned(BUFFER_DEPTH - 1)) -# pointer_in <= 0; -# else -# pointer_in <= pointer_in + 1; -# end -# // Else we don't have any input, the input pointer stays the same -# -# // ------------------------------------- -# // Check what to do with the output side -# // ------------------------------------- -# // We had pushed one flit out, we can try to go for the next one -# if (ready_in && valid_out) -# begin -# if (pointer_out == $unsigned(BUFFER_DEPTH - 1)) -# pointer_out <= 0; -# else -# pointer_out <= pointer_out + 1; -# end -# // Else stay on the same output location -# end -# end -# -# // Update output ports -# assign data_out = buffer[pointer_out]; -# assign valid_out = (elements != 0); -# -# assign ready_out = ~full; -# -# endmodule -# -# diff --git a/src/soc/iommu/axi_rab/axi_buffer_rab_bram.py b/src/soc/iommu/axi_rab/axi_buffer_rab_bram.py deleted file mode 100644 index 349b314e..00000000 --- a/src/soc/iommu/axi_rab/axi_buffer_rab_bram.py +++ /dev/null @@ -1,209 +0,0 @@ -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class axi_buffer_rab_bram(Elaboratable): - - def __init__(self): - self.clk = Signal() # input - self.rstn = Signal() # input - self.data_out = Signal(DATA_WIDTH) # output - self.valid_out = Signal() # output - self.ready_in = Signal() # input - self.valid_in = Signal() # input - self.data_in = Signal(DATA_WIDTH) # input - self.ready_out = Signal() # output - self.almost_full = Signal() # output - self.underfull = Signal() # output - self.drop_req = Signal() # input - self.drop_len = Signal(8) # input - - def elaborate(self, platform=None): - m = Module() - return m - - -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# ////import CfMath::log2; -# -# module axi_buffer_rab_bram -# //#( -# // parameter DATA_WIDTH, -# // parameter BUFFER_DEPTH -# // ) -# ( -# input logic clk, -# input logic rstn, -# -# // Downstream port -# output logic [DATA_WIDTH-1:0] data_out, -# output logic valid_out, -# input logic ready_in, -# -# // Upstream port -# input logic valid_in, -# input logic [DATA_WIDTH-1:0] data_in, -# output logic ready_out, -# -# // Status and drop control -# output logic almost_full, -# output logic underfull, -# input logic drop_req, -# // Number of items to drop. As for AXI lengths, counting starts at zero, i.e., `drop_len == 0` -# // and `drop_req` means drop one item. -# input logic [7:0] drop_len -# ); -# -""" #docstring_begin - // The BRAM needs to be in "write-first" mode for first-word fall-through FIFO behavior. - // To still push and pop simultaneously if the buffer is full, we internally increase the - // buffer depth by 1. - localparam ACT_BUFFER_DEPTH = BUFFER_DEPTH+1; - localparam ACT_LOG_BUFFER_DEPTH = log2(ACT_BUFFER_DEPTH+1); - - /** - * Internal data structures - */ - // Location to which we last wrote - logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_in_d, ptr_in_q; - // Location from which we last sent - logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_out_d, ptr_out_q; - // Required for fall-through behavior on the first word - logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_out_bram; - // Number of elements in the buffer. Can be negative if elements that have been dropped have not - // yet been written. - logic signed [ACT_LOG_BUFFER_DEPTH:0] n_elems_d, n_elems_q; - - logic [DATA_WIDTH-1:0] data_out_bram, data_out_q; - logic valid_out_q; - - logic full; - - assign almost_full = (n_elems_q == BUFFER_DEPTH-1); - assign full = (n_elems_q == BUFFER_DEPTH); - - always_ff @(posedge clk, negedge rstn) begin - if (~rstn) begin - n_elems_q <= '0; - ptr_in_q <= '0; - ptr_out_q <= '0; - end else begin - n_elems_q <= n_elems_d; - ptr_in_q <= ptr_in_d; - ptr_out_q <= ptr_out_d; - end - end - - // Update the number of elements. - always_comb begin - n_elems_d = n_elems_q; - if (drop_req) begin - n_elems_d -= (drop_len + 1); - end - if (valid_in && ready_out) begin - n_elems_d += 1; - end - if (valid_out && ready_in) begin - n_elems_d -= 1; - end - end - - // Update the output pointer. - always_comb begin - ptr_out_d = ptr_out_q; - if (drop_req) begin - if ((ptr_out_q + drop_len + 1) > (ACT_BUFFER_DEPTH - 1)) begin - ptr_out_d = drop_len + 1 - (ACT_BUFFER_DEPTH - ptr_out_q); - end else begin - ptr_out_d += (drop_len + 1); - end - end - if (valid_out && ready_in) begin - if (ptr_out_d == (ACT_BUFFER_DEPTH - 1)) begin - ptr_out_d = '0; - end else begin - ptr_out_d += 1; - end - end - end - - // The BRAM has a read latency of one cycle, so apply the new address one cycle earlier for - // first-word fall-through FIFO behavior. - //assign ptr_out_bram = (ptr_out_q == (ACT_BUFFER_DEPTH-1)) ? '0 : (ptr_out_q + 1); - assign ptr_out_bram = ptr_out_d; - - // Update the input pointer. - always_comb begin - ptr_in_d = ptr_in_q; - if (valid_in && ready_out) begin - if (ptr_in_d == (ACT_BUFFER_DEPTH - 1)) begin - ptr_in_d = '0; - end else begin - ptr_in_d += 1; - end - end - end - - // Update output ports. - assign valid_out = (n_elems_q > $signed(0)); - assign underfull = (n_elems_q < $signed(0)); - assign ready_out = ~full; - - ram_tp_write_first #( - .ADDR_WIDTH ( ACT_LOG_BUFFER_DEPTH ), - .DATA_WIDTH ( DATA_WIDTH ) - ) - ram_tp_write_first_0 - ( - .clk ( clk ), - .we ( valid_in & ~full ), - .addr0 ( ptr_in_q ), - .addr1 ( ptr_out_bram ), - .d_i ( data_in ), - .d0_o ( ), - .d1_o ( data_out_bram ) - ); - - // When reading from/writing two the same address on both ports ("Write-Read Collision"), - // the data on the read port is invalid (during the write cycle). In this implementation, - // this can happen only when the buffer is empty. Thus, we forward the data from an - // register in this case. - always @(posedge clk) begin - if (rstn == 1'b0) begin - data_out_q <= 'b0; - end else if ( (ptr_out_bram == ptr_in_q) && (valid_in && !full) ) begin - data_out_q <= data_in; - end - end - - always @(posedge clk) begin - if (rstn == 1'b0) begin - valid_out_q <= 'b0; - end else begin - valid_out_q <= valid_out; - end - end - - // Drive output data - always_comb begin - if (valid_out && !valid_out_q) begin // We have just written to an empty FIFO - data_out = data_out_q; - end else begin - data_out = data_out_bram; - end - end - -""" -# endmodule -# -# diff --git a/src/soc/iommu/axi_rab/axi_rab_cfg.py b/src/soc/iommu/axi_rab/axi_rab_cfg.py deleted file mode 100644 index 43843b95..00000000 --- a/src/soc/iommu/axi_rab/axi_rab_cfg.py +++ /dev/null @@ -1,707 +0,0 @@ -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class axi_rab_cfg(Elaboratable): - - def __init__(self): - self.Clk_CI = Signal() # input - self.Rst_RBI = Signal() # input - self.s_axi_awaddr = Signal(AXI_ADDR_WIDTH) # input - self.s_axi_awvalid = Signal() # input - self.s_axi_awready = Signal() # output - self.s_axi_wdata = Signal() # input - self.s_axi_wstrb = Signal(1+ERROR p_expression_25) # input - self.s_axi_wvalid = Signal() # input - self.s_axi_wready = Signal() # output - self.s_axi_bresp = Signal(2) # output - self.s_axi_bvalid = Signal() # output - self.s_axi_bready = Signal() # input - self.s_axi_araddr = Signal(AXI_ADDR_WIDTH) # input - self.s_axi_arvalid = Signal() # input - self.s_axi_arready = Signal() # output - self.s_axi_rdata = Signal(AXI_DATA_WIDTH) # output - self.s_axi_rresp = Signal(2) # output - self.s_axi_rvalid = Signal() # output - self.s_axi_rready = Signal() # input - self.L1Cfg_DO = Signal() # output - self.L1AllowMultiHit_SO = Signal() # output - self.MissAddr_DI = Signal(ADDR_WIDTH_VIRT) # input - self.MissMeta_DI = Signal(MISS_META_WIDTH) # input - self.Miss_SI = Signal() # input - self.MhFifoFull_SO = Signal() # output - self.wdata_l2 = Signal() # output - self.waddr_l2 = Signal() # output - self.wren_l2 = Signal(N_PORTS) # output - - def elaborate(self, platform=None): - m = Module() - return m - - -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# // --=========================================================================-- -# // -# // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ██████╗███████╗ ██████╗ -# // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔════╝██╔════╝ -# // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ █████╗ ██║ ███╗ -# // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██╔══╝ ██║ ██║ -# // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ╚██████╗██║ ╚██████╔╝ -# // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝╚═╝ ╚═════╝ -# // -# // -# // Author: Pirmin Vogel - vogelpi@iis.ee.ethz.ch -# // -# // Purpose : AXI4-Lite configuration and miss handling interface for RAB -# // -# // --=========================================================================-- -# -# //import CfMath::log2; -# -# module axi_rab_cfg -# #( -# parameter N_PORTS = 3, -# parameter N_REGS = 196, -# parameter N_L2_SETS = 32, -# parameter N_L2_SET_ENTRIES= 32, -# parameter ADDR_WIDTH_PHYS = 40, -# parameter ADDR_WIDTH_VIRT = 32, -# parameter N_FLAGS = 4, -# parameter AXI_DATA_WIDTH = 64, -# parameter AXI_ADDR_WIDTH = 32, -# parameter MISS_META_WIDTH = 10, // <= FIFO_WIDTH -# parameter MH_FIFO_DEPTH = 16 -# ) -# ( -# input logic Clk_CI, -# input logic Rst_RBI, -# -# // AXI Lite interface -# input logic [AXI_ADDR_WIDTH-1:0] s_axi_awaddr, -# input logic s_axi_awvalid, -# output logic s_axi_awready, -# input logic [AXI_DATA_WIDTH/8-1:0][7:0] s_axi_wdata, -# input logic [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb, -# input logic s_axi_wvalid, -# output logic s_axi_wready, -# output logic [1:0] s_axi_bresp, -# output logic s_axi_bvalid, -# input logic s_axi_bready, -# input logic [AXI_ADDR_WIDTH-1:0] s_axi_araddr, -# input logic s_axi_arvalid, -# output logic s_axi_arready, -# output logic [AXI_DATA_WIDTH-1:0] s_axi_rdata, -# output logic [1:0] s_axi_rresp, -# output logic s_axi_rvalid, -# input logic s_axi_rready, -# -# // Slice configuration -# output logic [N_REGS-1:0][63:0] L1Cfg_DO, -# output logic L1AllowMultiHit_SO, -# -# // Miss handling -# input logic [ADDR_WIDTH_VIRT-1:0] MissAddr_DI, -# input logic [MISS_META_WIDTH-1:0] MissMeta_DI, -# input logic Miss_SI, -# output logic MhFifoFull_SO, -# -# // L2 TLB -# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] wdata_l2, -# output logic [N_PORTS-1:0] [AXI_ADDR_WIDTH-1:0] waddr_l2, -# output logic [N_PORTS-1:0] wren_l2 -# ); -# -""" #docstring_begin - - localparam ADDR_LSB = log2(64/8); // 64 even if the AXI Lite interface is 32, - // because RAB slices are 64 bit wide. - localparam ADDR_MSB = log2(N_REGS)+ADDR_LSB-1; - - localparam L2SINGLE_AMAP_SIZE = 16'h4000; // Maximum 2048 TLB entries in L2 - - localparam integer N_L2_ENTRIES = N_L2_SETS * N_L2_SET_ENTRIES; - - localparam logic [AXI_ADDR_WIDTH-1:0] L2_VA_MAX_ADDR = (N_L2_ENTRIES-1) << 2; - - logic [AXI_DATA_WIDTH/8-1:0][7:0] L1Cfg_DP[N_REGS]; // [Byte][Bit] - genvar j; - - // █████╗ ██╗ ██╗██╗██╗ ██╗ ██╗ ██╗████████╗███████╗ - // ██╔══██╗╚██╗██╔╝██║██║ ██║ ██║ ██║╚══██╔══╝██╔════╝ - // ███████║ ╚███╔╝ ██║███████║█████╗██║ ██║ ██║ █████╗ - // ██╔══██║ ██╔██╗ ██║╚════██║╚════╝██║ ██║ ██║ ██╔══╝ - // ██║ ██║██╔╝ ██╗██║ ██║ ███████╗██║ ██║ ███████╗ - // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚═╝ ╚══════╝ - // - logic [AXI_ADDR_WIDTH-1:0] awaddr_reg; - logic awaddr_done_rise; - logic awaddr_done_reg; - logic awaddr_done_reg_dly; - - logic [AXI_DATA_WIDTH/8-1:0][7:0] wdata_reg; - logic [AXI_DATA_WIDTH/8-1:0] wstrb_reg; - logic wdata_done_rise; - logic wdata_done_reg; - logic wdata_done_reg_dly; - - logic wresp_done_reg; - logic wresp_running_reg; - - logic [AXI_ADDR_WIDTH-1:0] araddr_reg; - logic araddr_done_reg; - - logic [AXI_DATA_WIDTH-1:0] rdata_reg; - logic rresp_done_reg; - logic rresp_running_reg; - - logic awready; - logic wready; - logic bvalid; - - logic arready; - logic rvalid; - - logic wren; - logic wren_l1; - - assign wren = ( wdata_done_rise & awaddr_done_reg ) | ( awaddr_done_rise & wdata_done_reg ); - assign wdata_done_rise = wdata_done_reg & ~wdata_done_reg_dly; - assign awaddr_done_rise = awaddr_done_reg & ~awaddr_done_reg_dly; - - // reg_dly - always @(posedge Clk_CI or negedge Rst_RBI) - begin - if (!Rst_RBI) - begin - wdata_done_reg_dly <= 1'b0; - awaddr_done_reg_dly <= 1'b0; - end - else - begin - wdata_done_reg_dly <= wdata_done_reg; - awaddr_done_reg_dly <= awaddr_done_reg; - end - end - - // AW Channel - always @(posedge Clk_CI or negedge Rst_RBI) - begin - if (!Rst_RBI) - begin - awaddr_done_reg <= 1'b0; - awaddr_reg <= '0; - awready <= 1'b1; - end - else - begin - if (awready && s_axi_awvalid) - begin - awready <= 1'b0; - awaddr_done_reg <= 1'b1; - awaddr_reg <= s_axi_awaddr; - end - else if (awaddr_done_reg && wresp_done_reg) - begin - awready <= 1'b1; - awaddr_done_reg <= 1'b0; - end - end - end - - // W Channel - always @(posedge Clk_CI or negedge Rst_RBI) - begin - if (!Rst_RBI) - begin - wdata_done_reg <= 1'b0; - wready <= 1'b1; - wdata_reg <= '0; - wstrb_reg <= '0; - end - else - begin - if (wready && s_axi_wvalid) - begin - wready <= 1'b0; - wdata_done_reg <= 1'b1; - wdata_reg <= s_axi_wdata; - wstrb_reg <= s_axi_wstrb; - end - else if (wdata_done_reg && wresp_done_reg) - begin - wready <= 1'b1; - wdata_done_reg <= 1'b0; - end - end - end - - // B Channel - always @(posedge Clk_CI or negedge Rst_RBI) - begin - if (!Rst_RBI) - begin - bvalid <= 1'b0; - wresp_done_reg <= 1'b0; - wresp_running_reg <= 1'b0; - end - else - begin - if (awaddr_done_reg && wdata_done_reg && !wresp_done_reg) - begin - if (!wresp_running_reg) - begin - bvalid <= 1'b1; - wresp_running_reg <= 1'b1; - end - else if (s_axi_bready) - begin - bvalid <= 1'b0; - wresp_done_reg <= 1'b1; - wresp_running_reg <= 1'b0; - end - end - else - begin - bvalid <= 1'b0; - wresp_done_reg <= 1'b0; - wresp_running_reg <= 1'b0; - end - end - end - - // AR Channel - always @(posedge Clk_CI or negedge Rst_RBI) - begin - if (!Rst_RBI) - begin - araddr_done_reg <= 1'b0; - arready <= 1'b1; - araddr_reg <= '0; - end - else - begin - if (arready && s_axi_arvalid) - begin - arready <= 1'b0; - araddr_done_reg <= 1'b1; - araddr_reg <= s_axi_araddr; - end - else if (araddr_done_reg && rresp_done_reg) - begin - arready <= 1'b1; - araddr_done_reg <= 1'b0; - end - end - end - - // R Channel - always @(posedge Clk_CI or negedge Rst_RBI) - begin - if (!Rst_RBI) - begin - rresp_done_reg <= 1'b0; - rvalid <= 1'b0; - rresp_running_reg <= 1'b0; - end - else - begin - if (araddr_done_reg && !rresp_done_reg) - begin - if (!rresp_running_reg) - begin - rvalid <= 1'b1; - rresp_running_reg <= 1'b1; - end - else if (s_axi_rready) - begin - rvalid <= 1'b0; - rresp_done_reg <= 1'b1; - rresp_running_reg <= 1'b0; - end - end - else - begin - rvalid <= 1'b0; - rresp_done_reg <= 1'b0; - rresp_running_reg <= 1'b0; - end - end - end - - // ██╗ ██╗ ██████╗███████╗ ██████╗ ██████╗ ███████╗ ██████╗ - // ██║ ███║ ██╔════╝██╔════╝██╔════╝ ██╔══██╗██╔════╝██╔════╝ - // ██║ ╚██║ ██║ █████╗ ██║ ███╗ ██████╔╝█████╗ ██║ ███╗ - // ██║ ██║ ██║ ██╔══╝ ██║ ██║ ██╔══██╗██╔══╝ ██║ ██║ - // ███████╗██║ ╚██████╗██║ ╚██████╔╝ ██║ ██║███████╗╚██████╔╝ - // ╚══════╝╚═╝ ╚═════╝╚═╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝ ╚═════╝ - // - assign wren_l1 = wren && (awaddr_reg < L2SINGLE_AMAP_SIZE); - - always @( posedge Clk_CI or negedge Rst_RBI ) - begin - var integer idx_reg, idx_byte; - if ( Rst_RBI == 1'b0 ) - begin - for ( idx_reg = 0; idx_reg < N_REGS; idx_reg++ ) - L1Cfg_DP[idx_reg] <= '0; - end - else if ( wren_l1 ) - begin - if ( awaddr_reg[ADDR_LSB+1] == 1'b0 ) begin // VIRT_ADDR - for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin - if ( (idx_byte < ADDR_WIDTH_VIRT/8) ) begin - if ( wstrb_reg[idx_byte] ) begin - L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte]; - end - end - else begin // Let synthesizer optimize away unused registers. - L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0; - end - end - end - else if ( awaddr_reg[ADDR_LSB+1:ADDR_LSB] == 2'b10 ) begin // PHYS_ADDR - for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin - if ( (idx_byte < ADDR_WIDTH_PHYS/8) ) begin - if ( wstrb_reg[idx_byte] ) begin - L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte]; - end - end - else begin // Let synthesizer optimize away unused registers. - L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0; - end - end - end - else begin // ( awaddr_reg[ADDR_LSB+1:ADDR_LSB] == 2'b11 ) // FLAGS - for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin - if ( (idx_byte < 1) ) begin - if ( wstrb_reg[idx_byte] ) begin - L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte] & { {{8-N_FLAGS}{1'b0}}, {{N_FLAGS}{1'b1}} }; - end - end - else begin // Let synthesizer optimize away unused registers. - L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0; - end - end - end - end - end // always @ ( posedge Clk_CI or negedge Rst_RBI ) - - generate - // Mask unused bits -> Synthesizer should optimize away unused registers - for( j=0; j= (j+1)*L2SINGLE_AMAP_SIZE) && (awaddr_reg[log2(L2SINGLE_AMAP_SIZE)-1:0] <= L2_VA_MAX_ADDR); - assign upper_word_is_written[j] = (wstrb_reg[7:4] != 4'b0000); - assign lower_word_is_written[j] = (wstrb_reg[3:0] != 4'b0000); - end else begin - assign l2_addr_is_in_va_rams[j] = 1'b0; - assign upper_word_is_written[j] = 1'b0; - assign lower_word_is_written[j] = 1'b0; - end - - always @( posedge Clk_CI or negedge Rst_RBI ) begin - var integer idx_byte, off_byte; - if ( Rst_RBI == 1'b0 ) - begin - wren_l2[j] <= 1'b0; - wdata_l2[j] <= '0; - end - else if (wren) - begin - if ( (awaddr_reg >= (j+1)*L2SINGLE_AMAP_SIZE) && (awaddr_reg < (j+2)*L2SINGLE_AMAP_SIZE) && (|wstrb_reg) ) - wren_l2[j] <= 1'b1; - if (AXI_DATA_WIDTH == 32) begin - for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) - wdata_l2[j][idx_byte*8 +: 8] <= wdata_reg[idx_byte] & {8{wstrb_reg[idx_byte]}}; - end - else if (AXI_DATA_WIDTH == 64) begin - if (lower_word_is_written[j] == 1'b1) - off_byte = 0; - else - off_byte = 4; - // always put the payload in the lower word and set upper word to 0 - for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8/2; idx_byte++ ) - wdata_l2[j][idx_byte*8 +: 8] <= wdata_reg[idx_byte+off_byte] & {8{wstrb_reg[idx_byte+off_byte]}}; - wdata_l2[j][AXI_DATA_WIDTH-1:AXI_DATA_WIDTH/2] <= 'b0; - end - // pragma translate_off - else - $fatal(1, "Unsupported AXI_DATA_WIDTH!"); - // pragma translate_on - end - else - wren_l2[j] <= '0; - end // always @ ( posedge Clk_CI or negedge Rst_RBI ) - - // Properly align the 32-bit word address when writing from 64-bit interface: - // Depending on the system, the incoming address is (non-)aligned to the 64-bit - // word when writing the upper 32-bit word. - always_comb begin - waddr_l2[j] = (awaddr_reg -(j+1)*L2SINGLE_AMAP_SIZE)/4; - if (wren_l2[j]) begin - if (AXI_DATA_WIDTH == 64) begin - if (upper_word_is_written[j] == 1'b1) begin - // address must be non-aligned - waddr_l2[j][0] = 1'b1; - end - end - // pragma translate_off - else if (AXI_DATA_WIDTH != 32) begin - $fatal(1, "Unsupported AXI_DATA_WIDTH!"); - end - // pragma translate_on - end - end - - // Assert that only one 32-bit word is ever written at a time to VA RAMs on 64-bit data - // systems. - // pragma translate_off - always_ff @ (posedge Clk_CI) begin - if (AXI_DATA_WIDTH == 64) begin - if (l2_addr_is_in_va_rams[j]) begin - if (upper_word_is_written[j]) begin - assert (!lower_word_is_written[j]) - else $error("Unsupported write across two 32-bit words to VA RAMs!"); - end - else if (lower_word_is_written[j]) begin - assert (!upper_word_is_written[j]) - else $error("Unsupported write across two 32-bit words to VA RAMs!"); - end - end - end - end - // pragma translate_on - - end // for (j=0; j< N_PORTS; j++) - endgenerate - - // ███╗ ███╗██╗ ██╗ ███████╗██╗███████╗ ██████╗ ███████╗ - // ████╗ ████║██║ ██║ ██╔════╝██║██╔════╝██╔═══██╗██╔════╝ - // ██╔████╔██║███████║ █████╗ ██║█████╗ ██║ ██║███████╗ - // ██║╚██╔╝██║██╔══██║ ██╔══╝ ██║██╔══╝ ██║ ██║╚════██║ - // ██║ ╚═╝ ██║██║ ██║ ██║ ██║██║ ╚██████╔╝███████║ - // ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═════╝ ╚══════╝ - // - logic [ADDR_WIDTH_VIRT-1:0] AddrFifoDin_D; - logic AddrFifoWen_S; - logic AddrFifoRen_S; - logic [ADDR_WIDTH_VIRT-1:0] AddrFifoDout_D; - logic AddrFifoFull_S; - logic AddrFifoEmpty_S; - logic AddrFifoEmpty_SB; - logic AddrFifoFull_SB; - - logic [MISS_META_WIDTH-1:0] MetaFifoDin_D; - logic MetaFifoWen_S; - logic MetaFifoRen_S; - logic [MISS_META_WIDTH-1:0] MetaFifoDout_D; - logic MetaFifoFull_S; - logic MetaFifoEmpty_S; - logic MetaFifoEmpty_SB; - logic MetaFifoFull_SB; - - logic FifosDisabled_S; - logic ConfRegWen_S; - logic [1:0] ConfReg_DN; - logic [1:0] ConfReg_DP; - - logic [AXI_DATA_WIDTH-1:0] wdata_reg_vec; - - assign FifosDisabled_S = ConfReg_DP[0]; - assign L1AllowMultiHit_SO = ConfReg_DP[1]; - - assign AddrFifoEmpty_S = ~AddrFifoEmpty_SB; - assign MetaFifoEmpty_S = ~MetaFifoEmpty_SB; - - assign AddrFifoFull_S = ~AddrFifoFull_SB; - assign MetaFifoFull_S = ~MetaFifoFull_SB; - - assign MhFifoFull_SO = (AddrFifoWen_S & AddrFifoFull_S) | (MetaFifoWen_S & MetaFifoFull_S); - - generate - for ( j=0; j -# * Conrad Burchert -# * Maheshwara Sharma -# * Andreas Kurth -# * Johannes Weinbuch -# * Pirmin Vogel -# */ -# -# //`include "pulp_soc_defines.sv" -# -# ////import CfMath::log2; -# -# module axi_rab_top -# -# // Parameters {{{ -# #( -# parameter N_PORTS = 2, -# parameter N_L2_SETS = 32, -# parameter N_L2_SET_ENTRIES = 32, -# parameter AXI_DATA_WIDTH = 64, -# parameter AXI_S_ADDR_WIDTH = 32, -# parameter AXI_M_ADDR_WIDTH = 40, -# parameter AXI_LITE_DATA_WIDTH = 64, -# parameter AXI_LITE_ADDR_WIDTH = 32, -# parameter AXI_ID_WIDTH = 10, -# parameter AXI_USER_WIDTH = 6, -# parameter MH_FIFO_DEPTH = 16 -# ) -# // }}} -# -# // Ports {{{ -# ( -# -# input logic Clk_CI, // This clock may be gated. -# input logic NonGatedClk_CI, -# input logic Rst_RBI, -# -# // For every slave port there are two master ports. The master -# // port to use can be set using the master_select flag of the protection -# // bits of a slice -# -# // AXI4 Slave {{{ -# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_awid, -# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] s_axi4_awaddr, -# input logic [N_PORTS-1:0] s_axi4_awvalid, -# output logic [N_PORTS-1:0] s_axi4_awready, -# input logic [N_PORTS-1:0] [7:0] s_axi4_awlen, -# input logic [N_PORTS-1:0] [2:0] s_axi4_awsize, -# input logic [N_PORTS-1:0] [1:0] s_axi4_awburst, -# input logic [N_PORTS-1:0] s_axi4_awlock, -# input logic [N_PORTS-1:0] [2:0] s_axi4_awprot, -# input logic [N_PORTS-1:0] [3:0] s_axi4_awcache, -# input logic [N_PORTS-1:0] [3:0] s_axi4_awregion, -# input logic [N_PORTS-1:0] [3:0] s_axi4_awqos, -# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_awuser, -# -# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] s_axi4_wdata, -# input logic [N_PORTS-1:0] s_axi4_wvalid, -# output logic [N_PORTS-1:0] s_axi4_wready, -# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb, -# input logic [N_PORTS-1:0] s_axi4_wlast, -# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_wuser, -# -# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_bid, -# output logic [N_PORTS-1:0] [1:0] s_axi4_bresp, -# output logic [N_PORTS-1:0] s_axi4_bvalid, -# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_buser, -# input logic [N_PORTS-1:0] s_axi4_bready, -# -# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_arid, -# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] s_axi4_araddr, -# input logic [N_PORTS-1:0] s_axi4_arvalid, -# output logic [N_PORTS-1:0] s_axi4_arready, -# input logic [N_PORTS-1:0] [7:0] s_axi4_arlen, -# input logic [N_PORTS-1:0] [2:0] s_axi4_arsize, -# input logic [N_PORTS-1:0] [1:0] s_axi4_arburst, -# input logic [N_PORTS-1:0] s_axi4_arlock, -# input logic [N_PORTS-1:0] [2:0] s_axi4_arprot, -# input logic [N_PORTS-1:0] [3:0] s_axi4_arcache, -# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_aruser, -# -# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_rid, -# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] s_axi4_rdata, -# output logic [N_PORTS-1:0] [1:0] s_axi4_rresp, -# output logic [N_PORTS-1:0] s_axi4_rvalid, -# input logic [N_PORTS-1:0] s_axi4_rready, -# output logic [N_PORTS-1:0] s_axi4_rlast, -# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_ruser, -# // }}} -# -# // AXI4 Master 0 {{{ -# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_awid, -# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m0_axi4_awaddr, -# output logic [N_PORTS-1:0] m0_axi4_awvalid, -# input logic [N_PORTS-1:0] m0_axi4_awready, -# output logic [N_PORTS-1:0] [7:0] m0_axi4_awlen, -# output logic [N_PORTS-1:0] [2:0] m0_axi4_awsize, -# output logic [N_PORTS-1:0] [1:0] m0_axi4_awburst, -# output logic [N_PORTS-1:0] m0_axi4_awlock, -# output logic [N_PORTS-1:0] [2:0] m0_axi4_awprot, -# output logic [N_PORTS-1:0] [3:0] m0_axi4_awcache, -# output logic [N_PORTS-1:0] [3:0] m0_axi4_awregion, -# output logic [N_PORTS-1:0] [3:0] m0_axi4_awqos, -# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_awuser, -# -# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m0_axi4_wdata, -# output logic [N_PORTS-1:0] m0_axi4_wvalid, -# input logic [N_PORTS-1:0] m0_axi4_wready, -# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] m0_axi4_wstrb, -# output logic [N_PORTS-1:0] m0_axi4_wlast, -# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_wuser, -# -# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_bid, -# input logic [N_PORTS-1:0] [1:0] m0_axi4_bresp, -# input logic [N_PORTS-1:0] m0_axi4_bvalid, -# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_buser, -# output logic [N_PORTS-1:0] m0_axi4_bready, -# -# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_arid, -# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m0_axi4_araddr, -# output logic [N_PORTS-1:0] m0_axi4_arvalid, -# input logic [N_PORTS-1:0] m0_axi4_arready, -# output logic [N_PORTS-1:0] [7:0] m0_axi4_arlen, -# output logic [N_PORTS-1:0] [2:0] m0_axi4_arsize, -# output logic [N_PORTS-1:0] [1:0] m0_axi4_arburst, -# output logic [N_PORTS-1:0] m0_axi4_arlock, -# output logic [N_PORTS-1:0] [2:0] m0_axi4_arprot, -# output logic [N_PORTS-1:0] [3:0] m0_axi4_arcache, -# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_aruser, -# -# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_rid, -# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m0_axi4_rdata, -# input logic [N_PORTS-1:0] [1:0] m0_axi4_rresp, -# input logic [N_PORTS-1:0] m0_axi4_rvalid, -# output logic [N_PORTS-1:0] m0_axi4_rready, -# input logic [N_PORTS-1:0] m0_axi4_rlast, -# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_ruser, -# // }}} -# -# // AXI4 Master 1 {{{ -# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_awid, -# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m1_axi4_awaddr, -# output logic [N_PORTS-1:0] m1_axi4_awvalid, -# input logic [N_PORTS-1:0] m1_axi4_awready, -# output logic [N_PORTS-1:0] [7:0] m1_axi4_awlen, -# output logic [N_PORTS-1:0] [2:0] m1_axi4_awsize, -# output logic [N_PORTS-1:0] [1:0] m1_axi4_awburst, -# output logic [N_PORTS-1:0] m1_axi4_awlock, -# output logic [N_PORTS-1:0] [2:0] m1_axi4_awprot, -# output logic [N_PORTS-1:0] [3:0] m1_axi4_awcache, -# output logic [N_PORTS-1:0] [3:0] m1_axi4_awregion, -# output logic [N_PORTS-1:0] [3:0] m1_axi4_awqos, -# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_awuser, -# -# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m1_axi4_wdata, -# output logic [N_PORTS-1:0] m1_axi4_wvalid, -# input logic [N_PORTS-1:0] m1_axi4_wready, -# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] m1_axi4_wstrb, -# output logic [N_PORTS-1:0] m1_axi4_wlast, -# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_wuser, -# -# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_bid, -# input logic [N_PORTS-1:0] [1:0] m1_axi4_bresp, -# input logic [N_PORTS-1:0] m1_axi4_bvalid, -# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_buser, -# output logic [N_PORTS-1:0] m1_axi4_bready, -# -# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_arid, -# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m1_axi4_araddr, -# output logic [N_PORTS-1:0] m1_axi4_arvalid, -# input logic [N_PORTS-1:0] m1_axi4_arready, -# output logic [N_PORTS-1:0] [7:0] m1_axi4_arlen, -# output logic [N_PORTS-1:0] [2:0] m1_axi4_arsize, -# output logic [N_PORTS-1:0] [1:0] m1_axi4_arburst, -# output logic [N_PORTS-1:0] m1_axi4_arlock, -# output logic [N_PORTS-1:0] [2:0] m1_axi4_arprot, -# output logic [N_PORTS-1:0] [3:0] m1_axi4_arcache, -# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_aruser, -# -# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_rid, -# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m1_axi4_rdata, -# input logic [N_PORTS-1:0] [1:0] m1_axi4_rresp, -# input logic [N_PORTS-1:0] m1_axi4_rvalid, -# output logic [N_PORTS-1:0] m1_axi4_rready, -# input logic [N_PORTS-1:0] m1_axi4_rlast, -# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_ruser, -# // }}} -# -# // AXI 4 Lite Slave (Configuration Interface) {{{ -# // AXI4-Lite port to setup the rab slices -# // use this to program the configuration registers -# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi4lite_awaddr, -# input logic s_axi4lite_awvalid, -# output logic s_axi4lite_awready, -# -# input logic [AXI_LITE_DATA_WIDTH-1:0] s_axi4lite_wdata, -# input logic s_axi4lite_wvalid, -# output logic s_axi4lite_wready, -# input logic [AXI_LITE_DATA_WIDTH/8-1:0] s_axi4lite_wstrb, -# -# output logic [1:0] s_axi4lite_bresp, -# output logic s_axi4lite_bvalid, -# input logic s_axi4lite_bready, -# -# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi4lite_araddr, -# input logic s_axi4lite_arvalid, -# output logic s_axi4lite_arready, -# -# output logic [AXI_LITE_DATA_WIDTH-1:0] s_axi4lite_rdata, -# output logic [1:0] s_axi4lite_rresp, -# output logic s_axi4lite_rvalid, -# input logic s_axi4lite_rready, -# // }}} -# -# // BRAMs {{{ -# //`ifdef RAB_AX_LOG_EN -# // BramPort.Slave ArBram_PS, -# // BramPort.Slave AwBram_PS, -# //`endif -# // }}} -# -# // Logger Control {{{ -# //`ifdef RAB_AX_LOG_EN -# // input logic LogEn_SI, -# // input logic ArLogClr_SI, -# // input logic AwLogClr_SI, -# // output logic ArLogRdy_SO, -# // output logic AwLogRdy_SO, -# //`endif -# // }}} -# -# // Interrupt Outputs {{{ -# // Interrupt lines to handle misses, collisions of slices/multiple hits, -# // protection faults and overflow of the miss handling fifo -# //`ifdef RAB_AX_LOG_EN -# // output logic int_ar_log_full, -# // output logic int_aw_log_full, -# //`endif -# output logic [N_PORTS-1:0] int_miss, -# output logic [N_PORTS-1:0] int_multi, -# output logic [N_PORTS-1:0] int_prot, -# output logic int_mhf_full -# // }}} -# -# ); -# -"""#docstring_begin - - // }}} - - // Signals {{{ - // ███████╗██╗ ██████╗ ███╗ ██╗ █████╗ ██╗ ███████╗ - // ██╔════╝██║██╔════╝ ████╗ ██║██╔══██╗██║ ██╔════╝ - // ███████╗██║██║ ███╗██╔██╗ ██║███████║██║ ███████╗ - // ╚════██║██║██║ ██║██║╚██╗██║██╔══██║██║ ╚════██║ - // ███████║██║╚██████╔╝██║ ╚████║██║ ██║███████╗███████║ - // ╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚══════╝ - // - - // Internal AXI4 lines, these connect buffers on the slave side to the rab core and - // multiplexers which switch between the two master outputs - logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_awid; - logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_awaddr; - logic [N_PORTS-1:0] int_awvalid; - logic [N_PORTS-1:0] int_awready; - logic [N_PORTS-1:0] [7:0] int_awlen; - logic [N_PORTS-1:0] [2:0] int_awsize; - logic [N_PORTS-1:0] [1:0] int_awburst; - logic [N_PORTS-1:0] int_awlock; - logic [N_PORTS-1:0] [2:0] int_awprot; - logic [N_PORTS-1:0] [3:0] int_awcache; - logic [N_PORTS-1:0] [3:0] int_awregion; - logic [N_PORTS-1:0] [3:0] int_awqos; - logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_awuser; - - logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_wdata; - logic [N_PORTS-1:0] int_wvalid; - logic [N_PORTS-1:0] int_wready; - logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] int_wstrb; - logic [N_PORTS-1:0] int_wlast; - logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_wuser; - - logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_bid; - logic [N_PORTS-1:0] [1:0] int_bresp; - logic [N_PORTS-1:0] int_bvalid; - logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_buser; - logic [N_PORTS-1:0] int_bready; - - logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_arid; - logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_araddr; - logic [N_PORTS-1:0] int_arvalid; - logic [N_PORTS-1:0] int_arready; - logic [N_PORTS-1:0] [7:0] int_arlen; - logic [N_PORTS-1:0] [2:0] int_arsize; - logic [N_PORTS-1:0] [1:0] int_arburst; - logic [N_PORTS-1:0] int_arlock; - logic [N_PORTS-1:0] [2:0] int_arprot; - logic [N_PORTS-1:0] [3:0] int_arcache; - logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_aruser; - - logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_rid; - logic [N_PORTS-1:0] [1:0] int_rresp; - logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_rdata; - logic [N_PORTS-1:0] int_rlast; - logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_ruser; - logic [N_PORTS-1:0] int_rvalid; - logic [N_PORTS-1:0] int_rready; - - // rab_core outputs - logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] int_wtrans_addr; - logic [N_PORTS-1:0] int_wtrans_accept; - logic [N_PORTS-1:0] int_wtrans_drop; - logic [N_PORTS-1:0] int_wtrans_miss; - logic [N_PORTS-1:0] int_wtrans_sent; - logic [N_PORTS-1:0] int_wtrans_cache_coherent; - logic [N_PORTS-1:0] int_wmaster_select; - - logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] int_rtrans_addr; - logic [N_PORTS-1:0] int_rtrans_accept; - logic [N_PORTS-1:0] int_rtrans_drop; - logic [N_PORTS-1:0] int_rtrans_miss; - logic [N_PORTS-1:0] int_rtrans_sent; - logic [N_PORTS-1:0] int_rtrans_cache_coherent; - logic [N_PORTS-1:0] int_rmaster_select; - - logic [N_PORTS-1:0] w_master_select; - - // Internal master0 AXI4 lines. These connect the first master port to the - // multiplexers - // For channels read address, write address and write data the other lines - // are ignored if valid is not set, therefore we only need to multiplex those - logic [N_PORTS-1:0] int_m0_awvalid; - logic [N_PORTS-1:0] int_m0_awready; - - logic [N_PORTS-1:0] int_m0_wvalid; - logic [N_PORTS-1:0] int_m0_wready; - - logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m0_bid; - logic [N_PORTS-1:0] [1:0] int_m0_bresp; - logic [N_PORTS-1:0] int_m0_bvalid; - logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m0_buser; - logic [N_PORTS-1:0] int_m0_bready; - - logic [N_PORTS-1:0] int_m0_arvalid; - logic [N_PORTS-1:0] int_m0_arready; - - logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m0_rid; - logic [N_PORTS-1:0] [1:0] int_m0_rresp; - logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_m0_rdata; - logic [N_PORTS-1:0] int_m0_rlast; - logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m0_ruser; - logic [N_PORTS-1:0] int_m0_rready; - logic [N_PORTS-1:0] int_m0_rvalid; - - logic [N_PORTS-1:0] l1_m0_ar_accept; - logic [N_PORTS-1:0] l1_m0_ar_drop; - logic [N_PORTS-1:0] l1_m0_ar_save; - logic [N_PORTS-1:0] l1_m0_ar_done; - logic [N_PORTS-1:0] l2_m0_ar_accept; - logic [N_PORTS-1:0] l2_m0_ar_drop; - logic [N_PORTS-1:0] l2_m0_ar_done; - logic [N_PORTS-1:0] l2_m0_ar_sending; - - logic [N_PORTS-1:0] l1_m0_aw_accept; - logic [N_PORTS-1:0] l1_m0_aw_drop; - logic [N_PORTS-1:0] l1_m0_aw_save; - logic [N_PORTS-1:0] l1_m0_aw_done; - logic [N_PORTS-1:0] l2_m0_aw_accept; - logic [N_PORTS-1:0] l2_m0_aw_drop; - logic [N_PORTS-1:0] l2_m0_aw_done; - logic [N_PORTS-1:0] l2_m0_aw_sending; - - // Internal master1 AXI4 lines. These connect the second master port to the - // multiplexers - // For channels read address, write address and write data the other lines - // are ignored if valid is not set, therefore we only need to multiplex those - logic [N_PORTS-1:0] int_m1_awvalid; - logic [N_PORTS-1:0] int_m1_awready; - - logic [N_PORTS-1:0] int_m1_wvalid; - logic [N_PORTS-1:0] int_m1_wready; - - logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m1_bid; - logic [N_PORTS-1:0] [1:0] int_m1_bresp; - logic [N_PORTS-1:0] int_m1_bvalid; - logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m1_buser; - logic [N_PORTS-1:0] int_m1_bready; - - logic [N_PORTS-1:0] int_m1_arvalid; - logic [N_PORTS-1:0] int_m1_arready; - - logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m1_rid; - logic [N_PORTS-1:0] [1:0] int_m1_rresp; - logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_m1_rdata; - logic [N_PORTS-1:0] int_m1_rlast; - logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m1_ruser; - logic [N_PORTS-1:0] int_m1_rvalid; - logic [N_PORTS-1:0] int_m1_rready; - - logic [N_PORTS-1:0] l1_m1_ar_accept; - logic [N_PORTS-1:0] l1_m1_ar_drop; - logic [N_PORTS-1:0] l1_m1_ar_save; - logic [N_PORTS-1:0] l1_m1_ar_done; - logic [N_PORTS-1:0] l2_m1_ar_accept; - logic [N_PORTS-1:0] l2_m1_ar_drop; - logic [N_PORTS-1:0] l2_m1_ar_done; - - logic [N_PORTS-1:0] l1_m1_aw_accept; - logic [N_PORTS-1:0] l1_m1_aw_drop; - logic [N_PORTS-1:0] l1_m1_aw_save; - logic [N_PORTS-1:0] l1_m1_aw_done; - logic [N_PORTS-1:0] l2_m1_aw_accept; - logic [N_PORTS-1:0] l2_m1_aw_drop; - logic [N_PORTS-1:0] l2_m1_aw_done; - - // L1 outputs - logic [N_PORTS-1:0] rab_miss; // L1 RAB miss - logic [N_PORTS-1:0] rab_prot; - logic [N_PORTS-1:0] rab_multi; - logic [N_PORTS-1:0] rab_prefetch; - - // - // Signals used to support L2 TLB - // - // L2 RAM configuration signals - logic [N_PORTS-1:0] [AXI_LITE_DATA_WIDTH-1:0] L2CfgWData_D; - logic [N_PORTS-1:0] [AXI_LITE_ADDR_WIDTH-1:0] L2CfgWAddr_D; - logic [N_PORTS-1:0] L2CfgWE_S; - - // L1 output and drop Buffer - logic [N_PORTS-1:0] L1OutRwType_D, L1DropRwType_DP; - logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L1OutUser_D, L1DropUser_DP; - logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L1OutId_D, L1DropId_DP; - logic [N_PORTS-1:0] [7:0] L1OutLen_D, L1DropLen_DP; - logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L1OutAddr_D, L1DropAddr_DP; - logic [N_PORTS-1:0] L1OutProt_D, L1DropProt_DP; - logic [N_PORTS-1:0] L1OutMulti_D, L1DropMulti_DP; - logic [N_PORTS-1:0] L1DropEn_S; - logic [N_PORTS-1:0] L1DropPrefetch_S; - - logic [N_PORTS-1:0] L1DropValid_SN, L1DropValid_SP; - - // L2 input Buffer - logic [N_PORTS-1:0] L2InRwType_DP; - logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L2InUser_DP; - logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L2InId_DP; - logic [N_PORTS-1:0] [7:0] L2InLen_DP; - logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L2InAddr_DP; - logic [N_PORTS-1:0] L2InEn_S; - - // L2 output Buffer - logic [N_PORTS-1:0] L2OutRwType_DP; - logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L2OutUser_DP; - logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L2OutId_DP; - logic [N_PORTS-1:0] [7:0] L2OutLen_DP; - logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L2OutInAddr_DP; - - logic [N_PORTS-1:0] L2OutHit_SN, L2OutHit_SP; - logic [N_PORTS-1:0] L2OutMiss_SN, L2OutMiss_SP; - logic [N_PORTS-1:0] L2OutProt_SN, L2OutProt_SP; - logic [N_PORTS-1:0] L2OutMulti_SN, L2OutMulti_SP; - logic [N_PORTS-1:0] L2OutCC_SN, L2OutCC_SP; - logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] L2OutAddr_DN, L2OutAddr_DP; - - logic [N_PORTS-1:0] L2OutValid_SN, L2OutValid_SP; - logic [N_PORTS-1:0] L2OutPrefetch_S; - logic [N_PORTS-1:0] L2OutReady_S; - logic [N_PORTS-1:0] L2OutEn_S; - - // L2 outputs - logic [N_PORTS-1:0] L2Busy_S; - logic [N_PORTS-1:0] L2OutValid_S; - - logic [N_PORTS-1:0] L2Miss_S; - - // Signals for interfacing the AXI modules - logic [N_PORTS-1:0] l1_ar_accept; - logic [N_PORTS-1:0] l1_aw_accept; - logic [N_PORTS-1:0] l1_w_accept; - logic [N_PORTS-1:0] l1_xw_accept; - - logic [N_PORTS-1:0] l1_ar_drop; - logic [N_PORTS-1:0] l1_aw_drop; - logic [N_PORTS-1:0] l1_w_drop; - logic [N_PORTS-1:0] l1_xw_drop; - - logic [N_PORTS-1:0] l1_ar_save; - logic [N_PORTS-1:0] l1_aw_save; - logic [N_PORTS-1:0] l1_w_save; - logic [N_PORTS-1:0] l1_xw_save; - - logic [N_PORTS-1:0] l1_ar_done; - logic [N_PORTS-1:0] l1_r_done; - logic [N_PORTS-1:0] l1_r_drop; - logic [N_PORTS-1:0] lx_r_drop; - logic [N_PORTS-1:0] lx_r_done; - - logic [N_PORTS-1:0] l1_aw_done; - logic [N_PORTS-1:0] l1_w_done; - logic [N_PORTS-1:0] l1_xw_done; - logic [N_PORTS-1:0] l1_aw_done_SP; - logic [N_PORTS-1:0] l1_w_done_SP; - - logic [N_PORTS-1:0] l2_ar_accept; - logic [N_PORTS-1:0] l2_aw_accept; - logic [N_PORTS-1:0] l2_w_accept; - logic [N_PORTS-1:0] l2_xw_accept; - - logic [N_PORTS-1:0] l2_ar_drop; - logic [N_PORTS-1:0] l2_r_drop; - logic [N_PORTS-1:0] l2_xr_drop; - logic [N_PORTS-1:0] l2_aw_drop; - logic [N_PORTS-1:0] l2_w_drop; - logic [N_PORTS-1:0] l2_xw_drop; - - logic [N_PORTS-1:0] l2_aw_done; - logic [N_PORTS-1:0] l2_w_done; - logic [N_PORTS-1:0] l2_xw_done; - logic [N_PORTS-1:0] l2_aw_done_SP; - logic [N_PORTS-1:0] l2_w_done_SP; - - logic [N_PORTS-1:0] l2_ar_done; - logic [N_PORTS-1:0] l2_r_done; - logic [N_PORTS-1:0] l2_xr_done; - logic [N_PORTS-1:0] l2_ar_done_SP; - logic [N_PORTS-1:0] l2_r_done_SP; - - logic [N_PORTS-1:0] l1_mx_aw_done; - logic [N_PORTS-1:0] l1_mx_ar_done; - logic [N_PORTS-1:0] l1_m0_aw_done_SP; - logic [N_PORTS-1:0] l1_m0_ar_done_SP; - logic [N_PORTS-1:0] l1_m1_aw_done_SP; - logic [N_PORTS-1:0] l1_m1_ar_done_SP; - - logic [N_PORTS-1:0] l2_mx_aw_done; - logic [N_PORTS-1:0] l2_mx_ar_done; - logic [N_PORTS-1:0] l2_m0_aw_done_SP; - logic [N_PORTS-1:0] l2_m0_ar_done_SP; - logic [N_PORTS-1:0] l2_m1_aw_done_SP; - logic [N_PORTS-1:0] l2_m1_ar_done_SP; - - logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] l1_id_drop, lx_id_drop, b_id_drop; - logic [N_PORTS-1:0] [7:0] l1_len_drop, lx_len_drop; - logic [N_PORTS-1:0] l1_prefetch_drop, lx_prefetch_drop, b_prefetch_drop; - logic [N_PORTS-1:0] l1_hit_drop, lx_hit_drop, b_hit_drop; - - logic [N_PORTS-1:0] b_drop; - logic [N_PORTS-1:0] b_done; - - logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] l2_aw_addr; - logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] l2_ar_addr; - - logic [N_PORTS-1:0] l2_cache_coherent; - logic [N_PORTS-1:0] l2_master_select; - - logic [N_PORTS-1:0] aw_in_stall; - logic [N_PORTS-1:0] aw_out_stall; - - genvar i; - - // RRESP FSM - typedef enum logic {IDLE, BUSY} r_resp_mux_ctrl_state_t; - r_resp_mux_ctrl_state_t [N_PORTS-1:0] RRespMuxCtrl_SN, RRespMuxCtrl_SP; - logic [N_PORTS-1:0] RRespSel_SN, RRespSel_SP; - logic [N_PORTS-1:0] RRespBurst_S; - logic [N_PORTS-1:0] RRespSelIm_S; - - // }}} - - // Local parameters {{{ - - // Enable L2 for select ports - localparam integer ENABLE_L2TLB[N_PORTS-1:0] = `EN_L2TLB_ARRAY; - - // L2TLB parameters - localparam integer HUM_BUFFER_DEPTH = (N_L2_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS)+13; - - // }}} - - // Derive `master_select` from cache coherency flag. {{{ - `ifdef EN_ACP - assign int_wmaster_select = int_wtrans_cache_coherent; - assign int_rmaster_select = int_rtrans_cache_coherent; - assign l2_master_select = l2_cache_coherent; - `else - assign int_wmaster_select = '0; - assign int_rmaster_select = '0; - assign l2_master_select = '0; - `endif - // }}} - - // Buf and Send {{{ - // ██████╗ ██╗ ██╗███████╗ ██╗ ███████╗███████╗███╗ ██╗██████╗ - // ██╔══██╗██║ ██║██╔════╝ ██║ ██╔════╝██╔════╝████╗ ██║██╔══██╗ - // ██████╔╝██║ ██║█████╗ ████████╗ ███████╗█████╗ ██╔██╗ ██║██║ ██║ - // ██╔══██╗██║ ██║██╔══╝ ██╔═██╔═╝ ╚════██║██╔══╝ ██║╚██╗██║██║ ██║ - // ██████╔╝╚██████╔╝██║ ██████║ ███████║███████╗██║ ╚████║██████╔╝ - // ╚═════╝ ╚═════╝ ╚═╝ ╚═════╝ ╚══════╝╚══════╝╚═╝ ╚═══╝╚═════╝ - // - logic[N_PORTS-1:0] m0_write_is_burst, m0_read_is_burst; - logic[N_PORTS-1:0] m1_write_is_burst, m1_read_is_burst; - - generate for (i = 0; i < N_PORTS; i++) begin : BUF_AND_SEND - - // Write Address channel (aw) {{{ - /* - * write address channel (aw) - * - * ██╗ ██╗██████╗ ██╗████████╗███████╗ █████╗ ██████╗ ██████╗ ██████╗ - * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔══██╗██╔══██╗██╔══██╗ - * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ███████║██║ ██║██║ ██║██████╔╝ - * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██╔══██║██║ ██║██║ ██║██╔══██╗ - * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██║ ██║██████╔╝██████╔╝██║ ██║ - * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝ - * - */ - - axi4_aw_buffer - #( - .AXI_ID_WIDTH ( AXI_ID_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ) - ) - u_aw_buffer - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - .s_axi4_awid ( s_axi4_awid[i] ), - .s_axi4_awaddr ( s_axi4_awaddr[i] ), - .s_axi4_awvalid ( s_axi4_awvalid[i] ), - .s_axi4_awready ( s_axi4_awready[i] ), - .s_axi4_awlen ( s_axi4_awlen[i] ), - .s_axi4_awsize ( s_axi4_awsize[i] ), - .s_axi4_awburst ( s_axi4_awburst[i] ), - .s_axi4_awlock ( s_axi4_awlock[i] ), - .s_axi4_awprot ( s_axi4_awprot[i] ), - .s_axi4_awcache ( s_axi4_awcache[i] ), - .s_axi4_awregion ( s_axi4_awregion[i] ), - .s_axi4_awqos ( s_axi4_awqos[i] ), - .s_axi4_awuser ( s_axi4_awuser[i] ), - .m_axi4_awid ( int_awid[i] ), - .m_axi4_awaddr ( int_awaddr[i] ), - .m_axi4_awvalid ( int_awvalid[i] ), - .m_axi4_awready ( int_awready[i] ), - .m_axi4_awlen ( int_awlen[i] ), - .m_axi4_awsize ( int_awsize[i] ), - .m_axi4_awburst ( int_awburst[i] ), - .m_axi4_awlock ( int_awlock[i] ), - .m_axi4_awprot ( int_awprot[i] ), - .m_axi4_awcache ( int_awcache[i] ), - .m_axi4_awregion ( int_awregion[i] ), - .m_axi4_awqos ( int_awqos[i] ), - .m_axi4_awuser ( int_awuser[i] ) - ); - - axi4_aw_sender - #( - .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), - .AXI_ID_WIDTH ( AXI_ID_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ), - .ENABLE_L2TLB ( ENABLE_L2TLB[i] ) - ) - u_aw_sender_m0 - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - .l1_done_o ( l1_m0_aw_done[i] ), - .l1_accept_i ( l1_m0_aw_accept[i] ), - .l1_drop_i ( l1_m0_aw_drop[i] ), - .l1_save_i ( l1_m0_aw_save[i] ), - .l2_done_o ( l2_m0_aw_done[i] ), - .l2_accept_i ( l2_m0_aw_accept[i] ), - .l2_drop_i ( l2_m0_aw_drop[i] ), - .l2_sending_o ( l2_m0_aw_sending[i] ), - .l1_awaddr_i ( int_wtrans_addr[i] ), - .l2_awaddr_i ( l2_aw_addr[i] ), - .s_axi4_awid ( int_awid[i] ), - .s_axi4_awvalid ( int_m0_awvalid[i] ), - .s_axi4_awready ( int_m0_awready[i] ), - .s_axi4_awlen ( int_awlen[i] ), - .s_axi4_awsize ( int_awsize[i] ), - .s_axi4_awburst ( int_awburst[i] ), - .s_axi4_awlock ( int_awlock[i] ), - .s_axi4_awprot ( int_awprot[i] ), - .s_axi4_awcache ( int_awcache[i] ), - .s_axi4_awregion ( int_awregion[i] ), - .s_axi4_awqos ( int_awqos[i] ), - .s_axi4_awuser ( int_awuser[i] ), - .m_axi4_awid ( m0_axi4_awid[i] ), - .m_axi4_awaddr ( m0_axi4_awaddr[i] ), - .m_axi4_awvalid ( m0_axi4_awvalid[i] ), - .m_axi4_awready ( m0_axi4_awready[i] ), - .m_axi4_awlen ( m0_axi4_awlen[i] ), - .m_axi4_awsize ( m0_axi4_awsize[i] ), - .m_axi4_awburst ( m0_axi4_awburst[i] ), - .m_axi4_awlock ( m0_axi4_awlock[i] ), - .m_axi4_awprot ( m0_axi4_awprot[i] ), - .m_axi4_awcache ( ), - .m_axi4_awregion ( m0_axi4_awregion[i] ), - .m_axi4_awqos ( m0_axi4_awqos[i] ), - .m_axi4_awuser ( m0_axi4_awuser[i] ) - ); - - // The AXCACHE signals are set according to burstiness and cache coherence or statically - // when not connected to ACP on Zynq (implemented below). - assign m0_write_is_burst[i] = (m0_axi4_awlen[i] != {8{1'b0}}) && (m0_axi4_awburst[i] != 2'b00); - `ifndef EN_ACP - always_comb begin - if ( (l2_m0_aw_sending[i] & l2_cache_coherent[i]) | int_wtrans_cache_coherent[i]) begin - if (m0_write_is_burst[i]) begin - m0_axi4_awcache[i] = 4'b0111; - end else begin - m0_axi4_awcache[i] = 4'b1111; - end - end else begin - m0_axi4_awcache[i] = 4'b0011; - end - end - `else - assign m0_axi4_awcache[i] = 4'b0011; - `endif - - axi4_aw_sender - #( - .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), - .AXI_ID_WIDTH ( AXI_ID_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ), - .ENABLE_L2TLB ( ENABLE_L2TLB[i] ) - ) - u_aw_sender_m1 - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - .l1_accept_i ( l1_m1_aw_accept[i] ), - .l1_drop_i ( l1_m1_aw_drop[i] ), - .l1_save_i ( l1_m1_aw_save[i] ), - .l1_done_o ( l1_m1_aw_done[i] ), - .l2_accept_i ( l2_m1_aw_accept[i] ), - .l2_drop_i ( l2_m1_aw_drop[i] ), - .l2_done_o ( l2_m1_aw_done[i] ), - .l2_sending_o ( ), // just helps to set axcache - .l1_awaddr_i ( int_wtrans_addr[i] ), - .l2_awaddr_i ( l2_aw_addr[i] ), - .s_axi4_awid ( int_awid[i] ), - .s_axi4_awvalid ( int_m1_awvalid[i] ), - .s_axi4_awready ( int_m1_awready[i] ), - .s_axi4_awlen ( int_awlen[i] ), - .s_axi4_awsize ( int_awsize[i] ), - .s_axi4_awburst ( int_awburst[i] ), - .s_axi4_awlock ( int_awlock[i] ), - .s_axi4_awprot ( int_awprot[i] ), - .s_axi4_awcache ( int_awcache[i] ), - .s_axi4_awregion ( int_awregion[i] ), - .s_axi4_awqos ( int_awqos[i] ), - .s_axi4_awuser ( int_awuser[i] ), - .m_axi4_awid ( m1_axi4_awid[i] ), - .m_axi4_awaddr ( m1_axi4_awaddr[i] ), - .m_axi4_awvalid ( m1_axi4_awvalid[i] ), - .m_axi4_awready ( m1_axi4_awready[i] ), - .m_axi4_awlen ( m1_axi4_awlen[i] ), - .m_axi4_awsize ( m1_axi4_awsize[i] ), - .m_axi4_awburst ( m1_axi4_awburst[i] ), - .m_axi4_awlock ( m1_axi4_awlock[i] ), - .m_axi4_awprot ( m1_axi4_awprot[i] ), - .m_axi4_awcache ( ), - .m_axi4_awregion ( m1_axi4_awregion[i] ), - .m_axi4_awqos ( m1_axi4_awqos[i] ), - .m_axi4_awuser ( m1_axi4_awuser[i] ) - ); - - // The AXCACHE signals are set according to burstiness and cache coherence or statically - // when not connected to ACP on Zynq (implemented below). - assign m1_write_is_burst[i] = (m1_axi4_awlen[i] != {8{1'b0}}) && (m1_axi4_awburst[i] != 2'b00); - `ifdef EN_ACP - always_comb begin - if (m1_write_is_burst[i]) begin - m1_axi4_awcache[i] = 4'b1011; - end else begin - m1_axi4_awcache[i] = 4'b1111; - end - end - `else - assign m1_axi4_awcache[i] = 4'b0011; - `endif - - // }}} - - // Write Data channel (w) {{{ - /* - * write data channel (w) - * - * ██╗ ██╗██████╗ ██╗████████╗███████╗ ██████╗ █████╗ ████████╗ █████╗ - * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔══██╗╚══██╔══╝██╔══██╗ - * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ██║ ██║███████║ ██║ ███████║ - * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██║ ██║██╔══██║ ██║ ██╔══██║ - * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██████╔╝██║ ██║ ██║ ██║ ██║ - * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ - * - */ - axi4_w_buffer - #( - .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), - .AXI_ID_WIDTH ( AXI_ID_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ), - .ENABLE_L2TLB ( ENABLE_L2TLB[i] ), - .HUM_BUFFER_DEPTH ( HUM_BUFFER_DEPTH ) - ) - u_w_buffer - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - - // L1 interface - .l1_done_o ( l1_w_done[i] ), - .l1_accept_i ( l1_w_accept[i] ), - .l1_save_i ( l1_w_save[i] ), - .l1_drop_i ( l1_w_drop[i] ), - .l1_master_i ( int_wmaster_select[i] ), - .l1_id_i ( l1_id_drop[i] ), - .l1_len_i ( l1_len_drop[i] ), - .l1_prefetch_i ( l1_prefetch_drop[i] ), - .l1_hit_i ( l1_hit_drop[i] ), - - // L2 interface - .l2_done_o ( l2_w_done[i] ), - .l2_accept_i ( l2_w_accept[i] ), - .l2_drop_i ( l2_w_drop[i] ), - .l2_master_i ( l2_master_select[i] ), - .l2_id_i ( lx_id_drop[i] ), - .l2_len_i ( lx_len_drop[i] ), - .l2_prefetch_i ( lx_prefetch_drop[i] ), - .l2_hit_i ( lx_hit_drop[i] ), - - // Top-level control outputs - .master_select_o ( w_master_select[i] ), - .input_stall_o ( aw_in_stall[i] ), // stall L1 AW input if request buffers full - .output_stall_o ( aw_out_stall[i] ), // stall L1 AW hit forwarding if bypass not possible - - // B sender interface - .b_drop_o ( b_drop[i] ), - .b_done_i ( b_done[i] ), - .id_o ( b_id_drop[i] ), - .prefetch_o ( b_prefetch_drop[i] ), - .hit_o ( b_hit_drop[i] ), - - // AXI W channel interfaces - .s_axi4_wdata ( s_axi4_wdata[i] ), - .s_axi4_wvalid ( s_axi4_wvalid[i] ), - .s_axi4_wready ( s_axi4_wready[i] ), - .s_axi4_wstrb ( s_axi4_wstrb[i] ), - .s_axi4_wlast ( s_axi4_wlast[i] ), - .s_axi4_wuser ( s_axi4_wuser[i] ), - .m_axi4_wdata ( int_wdata[i] ), - .m_axi4_wvalid ( int_wvalid[i] ), - .m_axi4_wready ( int_wready[i] ), - .m_axi4_wstrb ( int_wstrb[i] ), - .m_axi4_wlast ( int_wlast[i] ), - .m_axi4_wuser ( int_wuser[i] ) - ); - - axi4_w_sender - #( - .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ) - ) - u_w_sender_m0 - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - .s_axi4_wdata ( int_wdata[i] ), - .s_axi4_wvalid ( int_m0_wvalid[i] ), - .s_axi4_wready ( int_m0_wready[i] ), - .s_axi4_wstrb ( int_wstrb[i] ), - .s_axi4_wlast ( int_wlast[i] ), - .s_axi4_wuser ( int_wuser[i] ), - .m_axi4_wdata ( m0_axi4_wdata[i] ), - .m_axi4_wvalid ( m0_axi4_wvalid[i] ), - .m_axi4_wready ( m0_axi4_wready[i] ), - .m_axi4_wstrb ( m0_axi4_wstrb[i] ), - .m_axi4_wlast ( m0_axi4_wlast[i] ), - .m_axi4_wuser ( m0_axi4_wuser[i] ) - ); - - axi4_w_sender - #( - .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ) - - ) - u_w_sender_m1 - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - .s_axi4_wdata ( int_wdata[i] ), - .s_axi4_wvalid ( int_m1_wvalid[i] ), - .s_axi4_wready ( int_m1_wready[i] ), - .s_axi4_wstrb ( int_wstrb[i] ), - .s_axi4_wlast ( int_wlast[i] ), - .s_axi4_wuser ( int_wuser[i] ), - .m_axi4_wdata ( m1_axi4_wdata[i] ), - .m_axi4_wvalid ( m1_axi4_wvalid[i] ), - .m_axi4_wready ( m1_axi4_wready[i] ), - .m_axi4_wstrb ( m1_axi4_wstrb[i] ), - .m_axi4_wlast ( m1_axi4_wlast[i] ), - .m_axi4_wuser ( m1_axi4_wuser[i] ) - ); - - /* - * Multiplexer to switch between the two output master ports on the write data (w) channel - */ - always_comb begin - /* Only one output can be selected at any time */ - if (w_master_select[i] == 1'b0) begin - int_m0_wvalid[i] = int_wvalid[i]; - int_m1_wvalid[i] = 1'b0; - int_wready[i] = int_m0_wready[i]; - end else begin - int_m0_wvalid[i] = 1'b0; - int_m1_wvalid[i] = int_wvalid[i]; - int_wready[i] = int_m1_wready[i]; - end - end - - // }}} - - // Write Response channel (b) {{{ - /* - * write response channel (b) - * - * ██╗ ██╗██████╗ ██╗████████╗███████╗ ██████╗ ███████╗███████╗██████╗ - * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔════╝██╔════╝██╔══██╗ - * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ██████╔╝█████╗ ███████╗██████╔╝ - * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██╔══██╗██╔══╝ ╚════██║██╔═══╝ - * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██║ ██║███████╗███████║██║ - * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ - * - */ - axi4_b_buffer - #( - .AXI_ID_WIDTH ( AXI_ID_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ) - ) - u_b_buffer_m0 - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - .s_axi4_bid ( int_m0_bid[i] ), - .s_axi4_bresp ( int_m0_bresp[i] ), - .s_axi4_bvalid ( int_m0_bvalid[i] ), - .s_axi4_buser ( int_m0_buser[i] ), - .s_axi4_bready ( int_m0_bready[i] ), - .m_axi4_bid ( m0_axi4_bid[i] ), - .m_axi4_bresp ( m0_axi4_bresp[i] ), - .m_axi4_bvalid ( m0_axi4_bvalid[i] ), - .m_axi4_buser ( m0_axi4_buser[i] ), - .m_axi4_bready ( m0_axi4_bready[i] ) - ); - - axi4_b_buffer - #( - .AXI_ID_WIDTH ( AXI_ID_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ) - ) - u_b_buffer_m1 - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - .s_axi4_bid ( int_m1_bid[i] ), - .s_axi4_bresp ( int_m1_bresp[i] ), - .s_axi4_bvalid ( int_m1_bvalid[i] ), - .s_axi4_buser ( int_m1_buser[i] ), - .s_axi4_bready ( int_m1_bready[i] ), - .m_axi4_bid ( m1_axi4_bid[i] ), - .m_axi4_bresp ( m1_axi4_bresp[i] ), - .m_axi4_bvalid ( m1_axi4_bvalid[i] ), - .m_axi4_buser ( m1_axi4_buser[i] ), - .m_axi4_bready ( m1_axi4_bready[i] ) - ); - - axi4_b_sender - #( - .AXI_ID_WIDTH ( AXI_ID_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ) - ) - u_b_sender - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - .drop_i ( b_drop[i] ), - .done_o ( b_done[i] ), - .id_i ( b_id_drop[i] ), - .prefetch_i ( b_prefetch_drop[i] ), - .hit_i ( b_hit_drop[i] ), - .s_axi4_bid ( s_axi4_bid[i] ), - .s_axi4_bresp ( s_axi4_bresp[i] ), - .s_axi4_bvalid ( s_axi4_bvalid[i] ), - .s_axi4_buser ( s_axi4_buser[i] ), - .s_axi4_bready ( s_axi4_bready[i] ), - .m_axi4_bid ( int_bid[i] ), - .m_axi4_bresp ( int_bresp[i] ), - .m_axi4_bvalid ( int_bvalid[i] ), - .m_axi4_buser ( int_buser[i] ), - .m_axi4_bready ( int_bready[i] ) - ); - - /* - * Multiplexer to switch between the two output master ports on the write response (b) channel - */ - always_comb begin - /* Output 1 always gets priority, so if it has something to send connect - it and let output 0 wait using rready = 0 */ - if (int_m1_bvalid[i] == 1'b1) begin - int_m0_bready[i] = 1'b0; - int_m1_bready[i] = int_bready[i]; - - int_bid[i] = int_m1_bid[i]; - int_bresp[i] = int_m1_bresp[i]; - int_buser[i] = int_m1_buser[i]; - int_bvalid[i] = int_m1_bvalid[i]; - end else begin - int_m0_bready[i] = int_bready[i]; - int_m1_bready[i] = 1'b0; - - int_bid[i] = int_m0_bid[i]; - int_bresp[i] = int_m0_bresp[i]; - int_buser[i] = int_m0_buser[i]; - int_bvalid[i] = int_m0_bvalid[i]; - end - end - - // }}} - - // Read Address channel (ar) {{{ - /* - * read address channel (ar) - * - * ██████╗ ███████╗ █████╗ ██████╗ █████╗ ██████╗ ██████╗ ██████╗ - * ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ██╔══██╗██╔══██╗██╔══██╗██╔══██╗ - * ██████╔╝█████╗ ███████║██║ ██║ ███████║██║ ██║██║ ██║██████╔╝ - * ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ██╔══██║██║ ██║██║ ██║██╔══██╗ - * ██║ ██║███████╗██║ ██║██████╔╝ ██║ ██║██████╔╝██████╔╝██║ ██║ - * ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝ - * - */ - axi4_ar_buffer - #( - .AXI_ID_WIDTH ( AXI_ID_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ) - ) - u_ar_buffer - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - .s_axi4_arid ( s_axi4_arid[i] ), - .s_axi4_araddr ( s_axi4_araddr[i] ), - .s_axi4_arvalid ( s_axi4_arvalid[i] ), - .s_axi4_arready ( s_axi4_arready[i] ), - .s_axi4_arlen ( s_axi4_arlen[i] ), - .s_axi4_arsize ( s_axi4_arsize[i] ), - .s_axi4_arburst ( s_axi4_arburst[i] ), - .s_axi4_arlock ( s_axi4_arlock[i] ), - .s_axi4_arprot ( s_axi4_arprot[i] ), - .s_axi4_arcache ( s_axi4_arcache[i] ), - .s_axi4_aruser ( s_axi4_aruser[i] ), - .m_axi4_arid ( int_arid[i] ), - .m_axi4_araddr ( int_araddr[i] ), - .m_axi4_arvalid ( int_arvalid[i] ), - .m_axi4_arready ( int_arready[i] ), - .m_axi4_arlen ( int_arlen[i] ), - .m_axi4_arsize ( int_arsize[i] ), - .m_axi4_arburst ( int_arburst[i] ), - .m_axi4_arlock ( int_arlock[i] ), - .m_axi4_arprot ( int_arprot[i] ), - .m_axi4_arcache ( int_arcache[i] ), - .m_axi4_aruser ( int_aruser[i] ) - ); - - axi4_ar_sender - #( - .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), - .AXI_ID_WIDTH ( AXI_ID_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ), - .ENABLE_L2TLB ( ENABLE_L2TLB[i] ) - ) - u_ar_sender_m0 - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - .l1_done_o ( l1_m0_ar_done[i] ), - .l1_accept_i ( l1_m0_ar_accept[i] ), - .l1_drop_i ( l1_m0_ar_drop[i] ), - .l1_save_i ( l1_m0_ar_save[i] ), - .l2_done_o ( l2_m0_ar_done[i] ), - .l2_accept_i ( l2_m0_ar_accept[i] ), - .l2_drop_i ( l2_m0_ar_drop[i] ), - .l2_sending_o ( l2_m0_ar_sending[i] ), - .l1_araddr_i ( int_rtrans_addr[i] ), - .l2_araddr_i ( l2_ar_addr[i] ), - .s_axi4_arid ( int_arid[i] ), - .s_axi4_arvalid ( int_m0_arvalid[i] ), - .s_axi4_arready ( int_m0_arready[i] ), - .s_axi4_arlen ( int_arlen[i] ), - .s_axi4_arsize ( int_arsize[i] ), - .s_axi4_arburst ( int_arburst[i] ), - .s_axi4_arlock ( int_arlock[i] ), - .s_axi4_arprot ( int_arprot[i] ), - .s_axi4_arcache ( int_arcache[i] ), - .s_axi4_aruser ( int_aruser[i] ), - .m_axi4_arid ( m0_axi4_arid[i] ), - .m_axi4_araddr ( m0_axi4_araddr[i] ), - .m_axi4_arvalid ( m0_axi4_arvalid[i] ), - .m_axi4_arready ( m0_axi4_arready[i] ), - .m_axi4_arlen ( m0_axi4_arlen[i] ), - .m_axi4_arsize ( m0_axi4_arsize[i] ), - .m_axi4_arburst ( m0_axi4_arburst[i] ), - .m_axi4_arlock ( m0_axi4_arlock[i] ), - .m_axi4_arprot ( m0_axi4_arprot[i] ), - .m_axi4_arcache ( ), - .m_axi4_aruser ( m0_axi4_aruser[i] ) - ); - - // The AXCACHE signals are set according to burstiness and cache coherence or statically - // when not connected to ACP on Zynq (implemented below). - assign m0_read_is_burst[i] = (m0_axi4_arlen[i] != {8{1'b0}}) && (m0_axi4_arburst[i] != 2'b00); - `ifndef EN_ACP - always_comb begin - if ( (l2_m0_ar_sending[i] & l2_cache_coherent[i]) | int_rtrans_cache_coherent[i]) begin - if (m0_read_is_burst[i]) begin - m0_axi4_arcache[i] = 4'b1011; - end else begin - m0_axi4_arcache[i] = 4'b1111; - end - end else begin - m0_axi4_arcache[i] = 4'b0011; - end - end - `else - assign m0_axi4_arcache[i] = 4'b0011; - `endif - - axi4_ar_sender - #( - .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), - .AXI_ID_WIDTH ( AXI_ID_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ), - .ENABLE_L2TLB ( ENABLE_L2TLB[i] ) - ) - u_ar_sender_m1 - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - .l1_done_o ( l1_m1_ar_done[i] ), - .l1_accept_i ( l1_m1_ar_accept[i] ), - .l1_drop_i ( l1_m1_ar_drop[i] ), - .l1_save_i ( l1_m1_ar_save[i] ), - .l2_done_o ( l2_m1_ar_done[i] ), - .l2_accept_i ( l2_m1_ar_accept[i] ), - .l2_drop_i ( l2_m1_ar_drop[i] ), - .l2_sending_o ( ), // just helps to set axcache - .l1_araddr_i ( int_rtrans_addr[i] ), - .l2_araddr_i ( l2_ar_addr[i] ), - .s_axi4_arid ( int_arid[i] ), - .s_axi4_arvalid ( int_m1_arvalid[i] ), - .s_axi4_arready ( int_m1_arready[i] ), - .s_axi4_arlen ( int_arlen[i] ), - .s_axi4_arsize ( int_arsize[i] ), - .s_axi4_arburst ( int_arburst[i] ), - .s_axi4_arlock ( int_arlock[i] ), - .s_axi4_arprot ( int_arprot[i] ), - .s_axi4_arcache ( int_arcache[i] ), - .s_axi4_aruser ( int_aruser[i] ), - .m_axi4_arid ( m1_axi4_arid[i] ), - .m_axi4_araddr ( m1_axi4_araddr[i] ), - .m_axi4_arvalid ( m1_axi4_arvalid[i] ), - .m_axi4_arready ( m1_axi4_arready[i] ), - .m_axi4_arlen ( m1_axi4_arlen[i] ), - .m_axi4_arsize ( m1_axi4_arsize[i] ), - .m_axi4_arburst ( m1_axi4_arburst[i] ), - .m_axi4_arlock ( m1_axi4_arlock[i] ), - .m_axi4_arprot ( m1_axi4_arprot[i] ), - .m_axi4_arcache ( ), - .m_axi4_aruser ( m1_axi4_aruser[i] ) - ); - - // The AXCACHE signals are set according to burstiness and cache coherence or statically - // when not connected to ACP on Zynq (implemented below). - assign m1_read_is_burst[i] = (m1_axi4_arlen[i] != {8{1'b0}}) && (m1_axi4_arburst[i] != 2'b00); - `ifdef EN_ACP - always_comb begin - if (m1_read_is_burst[i]) begin - m1_axi4_arcache[i] = 4'b1011; - end else begin - m1_axi4_arcache[i] = 4'b1111; - end - end - `else - assign m1_axi4_arcache[i] = 4'b0011; - `endif - - // }}} - - // Read Response channel (r) {{{ - /* - * read response channel (r) - * - * ██████╗ ███████╗ █████╗ ██████╗ ██████╗ ███████╗███████╗██████╗ - * ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ██╔══██╗██╔════╝██╔════╝██╔══██╗ - * ██████╔╝█████╗ ███████║██║ ██║ ██████╔╝█████╗ ███████╗██████╔╝ - * ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ██╔══██╗██╔══╝ ╚════██║██╔═══╝ - * ██║ ██║███████╗██║ ██║██████╔╝ ██║ ██║███████╗███████║██║ - * ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ - * - */ - axi4_r_buffer - #( - .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), - .AXI_ID_WIDTH ( AXI_ID_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ) - ) - u_r_buffer_m0 - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - .s_axi4_rid ( int_m0_rid[i] ), - .s_axi4_rresp ( int_m0_rresp[i] ), - .s_axi4_rdata ( int_m0_rdata[i] ), - .s_axi4_rlast ( int_m0_rlast[i] ), - .s_axi4_rvalid ( int_m0_rvalid[i] ), - .s_axi4_ruser ( int_m0_ruser[i] ), - .s_axi4_rready ( int_m0_rready[i] ), - .m_axi4_rid ( m0_axi4_rid[i] ), - .m_axi4_rresp ( m0_axi4_rresp[i] ), - .m_axi4_rdata ( m0_axi4_rdata[i] ), - .m_axi4_rlast ( m0_axi4_rlast[i] ), - .m_axi4_rvalid ( m0_axi4_rvalid[i] ), - .m_axi4_ruser ( m0_axi4_ruser[i] ), - .m_axi4_rready ( m0_axi4_rready[i] ) - ); - - axi4_r_buffer - #( - .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), - .AXI_ID_WIDTH ( AXI_ID_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ) - ) - u_r_buffer_m1 - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - .s_axi4_rid ( int_m1_rid[i] ), - .s_axi4_rresp ( int_m1_rresp[i] ), - .s_axi4_rdata ( int_m1_rdata[i] ), - .s_axi4_rlast ( int_m1_rlast[i] ), - .s_axi4_rvalid ( int_m1_rvalid[i] ), - .s_axi4_ruser ( int_m1_ruser[i] ), - .s_axi4_rready ( int_m1_rready[i] ), - .m_axi4_rid ( m1_axi4_rid[i] ), - .m_axi4_rresp ( m1_axi4_rresp[i] ), - .m_axi4_rdata ( m1_axi4_rdata[i] ), - .m_axi4_rlast ( m1_axi4_rlast[i] ), - .m_axi4_rvalid ( m1_axi4_rvalid[i] ), - .m_axi4_ruser ( m1_axi4_ruser[i] ), - .m_axi4_rready ( m1_axi4_rready[i] ) - ); - - axi4_r_sender - #( - .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), - .AXI_ID_WIDTH ( AXI_ID_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ) - ) - u_r_sender - ( - .axi4_aclk ( Clk_CI ), - .axi4_arstn ( Rst_RBI ), - .drop_i ( lx_r_drop[i] ), - .drop_len_i ( lx_len_drop[i] ), - .done_o ( lx_r_done[i] ), - .id_i ( lx_id_drop[i] ), - .prefetch_i ( lx_prefetch_drop[i] ), - .hit_i ( lx_hit_drop[i] ), - .s_axi4_rid ( s_axi4_rid[i] ), - .s_axi4_rresp ( s_axi4_rresp[i] ), - .s_axi4_rdata ( s_axi4_rdata[i] ), - .s_axi4_rlast ( s_axi4_rlast[i] ), - .s_axi4_rvalid ( s_axi4_rvalid[i] ), - .s_axi4_ruser ( s_axi4_ruser[i] ), - .s_axi4_rready ( s_axi4_rready[i] ), - .m_axi4_rid ( int_rid[i] ), - .m_axi4_rresp ( int_rresp[i] ), - .m_axi4_rdata ( int_rdata[i] ), - .m_axi4_rlast ( int_rlast[i] ), - .m_axi4_rvalid ( int_rvalid[i] ), - .m_axi4_ruser ( int_ruser[i] ), - .m_axi4_rready ( int_rready[i] ) - ); - - /* - * Multiplexer to switch between the two output master ports on the read response(r) channel - * - * Do not perform read burst interleaving as the DMA does not support it. This means we can only - * switch between the two masters upon sending rlast or when idle. - * - * However, if the downstream already performs burst interleaving, this cannot be undone here. - * Also, the downstream may interleave a burst reponse with a single-beat transaction. In this - * case, the FSM below falls out of the burst mode. To avoid it performing burst interleaving - * after such an event, it gives priority to the master which received the last burst in case - * both have a have a burst ready (rvalid). - * - * Order of priority: - * 1. Ongoing burst transaction - * 2. Single-beat transaction on Master 1. - * 3. Single-beat transaction on Master 0. - * 4. Burst transaction on master that received the last burst. - */ - // Select signal - always_ff @(posedge Clk_CI) begin - if (Rst_RBI == 0) begin - RRespSel_SP[i] <= 1'b0; - end else begin - RRespSel_SP[i] <= RRespSel_SN[i]; - end - end - - // FSM - always_comb begin : RRespMuxFsm - RRespMuxCtrl_SN[i] = RRespMuxCtrl_SP[i]; - RRespSel_SN[i] = RRespSel_SP[i]; - - RRespBurst_S[i] = 1'b0; - RRespSelIm_S[i] = 1'b0; - - unique case (RRespMuxCtrl_SP[i]) - - IDLE: begin - // immediately forward single-beat transactions - if (int_m1_rvalid[i] && int_m1_rlast[i]) - RRespSelIm_S[i] = 1'b1; - else if (int_m0_rvalid[i] && int_m0_rlast[i]) - RRespSelIm_S[i] = 1'b0; - - // bursts - they also start immediately - else if (int_m1_rvalid[i] || int_m0_rvalid[i]) begin - RRespMuxCtrl_SN[i] = BUSY; - - // in case both are ready, continue with the master that had the last burst - if (int_m1_rvalid[i] && int_m0_rvalid[i]) begin - RRespSel_SN[i] = RRespSel_SP[i]; - RRespSelIm_S[i] = RRespSel_SP[i]; - end else if (int_m1_rvalid[i]) begin - RRespSel_SN[i] = 1'b1; - RRespSelIm_S[i] = 1'b1; - end else begin - RRespSel_SN[i] = 1'b0; - RRespSelIm_S[i] = 1'b0; - end - end - end - - BUSY: begin - RRespBurst_S[i] = 1'b1; - // detect last handshake of currently ongoing transfer - if (int_rvalid[i] && int_rready[i] && int_rlast[i]) - RRespMuxCtrl_SN[i] = IDLE; - end - - default: begin - RRespMuxCtrl_SN[i] = IDLE; - end - - endcase - end - - // FSM state - always_ff @(posedge Clk_CI) begin - if (Rst_RBI == 0) begin - RRespMuxCtrl_SP[i] <= IDLE; - end else begin - RRespMuxCtrl_SP[i] <= RRespMuxCtrl_SN[i]; - end - end - - // Actual multiplexer - always_comb begin - if ( (RRespBurst_S[i] && RRespSel_SP[i]) || (!RRespBurst_S[i] && RRespSelIm_S[i]) ) begin - int_m0_rready[i] = 1'b0; - int_m1_rready[i] = int_rready[i]; - - int_rid[i] = int_m1_rid[i]; - int_rresp[i] = int_m1_rresp[i]; - int_rdata[i] = int_m1_rdata[i]; - int_rlast[i] = int_m1_rlast[i]; - int_ruser[i] = int_m1_ruser[i]; - int_rvalid[i] = int_m1_rvalid[i]; - end else begin - int_m0_rready[i] = int_rready[i]; - int_m1_rready[i] = 1'b0; - - int_rid[i] = int_m0_rid[i]; - int_rresp[i] = int_m0_rresp[i]; - int_rdata[i] = int_m0_rdata[i]; - int_rlast[i] = int_m0_rlast[i]; - int_ruser[i] = int_m0_ruser[i]; - int_rvalid[i] = int_m0_rvalid[i]; - end - end - - end // BUF & SEND - - // }}} - - endgenerate // BUF & SEND }}} - - // Log {{{ - -`ifdef RAB_AX_LOG_EN - AxiBramLogger - #( - .AXI_ID_BITW ( AXI_ID_WIDTH ), - .AXI_ADDR_BITW ( AXI_S_ADDR_WIDTH ), - .NUM_LOG_ENTRIES ( `RAB_AX_LOG_ENTRIES ) - ) - u_aw_logger - ( - .Clk_CI ( NonGatedClk_CI ), - .TimestampClk_CI ( Clk_CI ), - .Rst_RBI ( Rst_RBI ), - .AxiValid_SI ( s_axi4_awvalid[1] ), - .AxiReady_SI ( s_axi4_awready[1] ), - .AxiId_DI ( s_axi4_awid[1] ), - .AxiAddr_DI ( s_axi4_awaddr[1] ), - .AxiLen_DI ( s_axi4_awlen[1] ), - .Clear_SI ( AwLogClr_SI ), - .LogEn_SI ( LogEn_SI ), - .Full_SO ( int_aw_log_full ), - .Ready_SO ( AwLogRdy_SO ), - .Bram_PS ( AwBram_PS ) - ); - - AxiBramLogger - #( - .AXI_ID_BITW ( AXI_ID_WIDTH ), - .AXI_ADDR_BITW ( AXI_S_ADDR_WIDTH ), - .NUM_LOG_ENTRIES ( `RAB_AX_LOG_ENTRIES ) - ) - u_ar_logger - ( - .Clk_CI ( NonGatedClk_CI ), - .TimestampClk_CI ( Clk_CI ), - .Rst_RBI ( Rst_RBI ), - .AxiValid_SI ( s_axi4_arvalid[1] ), - .AxiReady_SI ( s_axi4_arready[1] ), - .AxiId_DI ( s_axi4_arid[1] ), - .AxiAddr_DI ( s_axi4_araddr[1] ), - .AxiLen_DI ( s_axi4_arlen[1] ), - .Clear_SI ( ArLogClr_SI ), - .LogEn_SI ( LogEn_SI ), - .Full_SO ( int_ar_log_full ), - .Ready_SO ( ArLogRdy_SO ), - .Bram_PS ( ArBram_PS ) - ); -`endif - - // }}} - - // RAB Core {{{ - // ██████╗ █████╗ ██████╗ ██████╗ ██████╗ ██████╗ ███████╗ - // ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔═══██╗██╔══██╗██╔════╝ - // ██████╔╝███████║██████╔╝ ██║ ██║ ██║██████╔╝█████╗ - // ██╔══██╗██╔══██║██╔══██╗ ██║ ██║ ██║██╔══██╗██╔══╝ - // ██║ ██║██║ ██║██████╔╝ ╚██████╗╚██████╔╝██║ ██║███████╗ - // ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝ - // - /* - * rab_core - * - * The rab core translates addresses. It has two ports, which can be used - * independently, however they will compete for time internally, as lookups - * are serialized. - * - * type is the read(0) or write(1) used to check the protection flags. If they - * don't match an interrupt is created on the int_prot line. - */ - - rab_core - #( - .N_PORTS ( N_PORTS ), - .N_L2_SETS ( N_L2_SETS ), - .N_L2_SET_ENTRIES ( N_L2_SET_ENTRIES ), - .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), - .AXI_S_ADDR_WIDTH ( AXI_S_ADDR_WIDTH ), - .AXI_M_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), - .AXI_LITE_DATA_WIDTH ( AXI_LITE_DATA_WIDTH ), - .AXI_LITE_ADDR_WIDTH ( AXI_LITE_ADDR_WIDTH ), - .AXI_ID_WIDTH ( AXI_ID_WIDTH ), - .AXI_USER_WIDTH ( AXI_USER_WIDTH ), - .MH_FIFO_DEPTH ( MH_FIFO_DEPTH ) - ) - u_rab_core - ( - .Clk_CI ( Clk_CI ), - .Rst_RBI ( Rst_RBI ), - - // Config IF - .s_axi_awaddr ( s_axi4lite_awaddr ), - .s_axi_awvalid ( s_axi4lite_awvalid ), - .s_axi_awready ( s_axi4lite_awready ), - .s_axi_wdata ( s_axi4lite_wdata ), - .s_axi_wstrb ( s_axi4lite_wstrb ), - .s_axi_wvalid ( s_axi4lite_wvalid ), - .s_axi_wready ( s_axi4lite_wready ), - .s_axi_bresp ( s_axi4lite_bresp ), - .s_axi_bvalid ( s_axi4lite_bvalid ), - .s_axi_bready ( s_axi4lite_bready ), - .s_axi_araddr ( s_axi4lite_araddr ), - .s_axi_arvalid ( s_axi4lite_arvalid ), - .s_axi_arready ( s_axi4lite_arready ), - .s_axi_rready ( s_axi4lite_rready ), - .s_axi_rdata ( s_axi4lite_rdata ), - .s_axi_rresp ( s_axi4lite_rresp ), - .s_axi_rvalid ( s_axi4lite_rvalid ), - - // L1 miss info outputs -> L2 TLB arbitration - .int_miss ( rab_miss ), - .int_multi ( rab_multi ), - .int_prot ( rab_prot ), - .int_prefetch ( rab_prefetch ), - .int_mhf_full ( int_mhf_full ), - - // L1 transaction info outputs -> L2 TLB arbitration - .int_axaddr_o ( L1OutAddr_D ), - .int_axid_o ( L1OutId_D ), - .int_axlen_o ( L1OutLen_D ), - .int_axuser_o ( L1OutUser_D ), - - // Write Req IF - .port1_addr ( int_awaddr ), - .port1_id ( int_awid ), - .port1_len ( int_awlen ), - .port1_size ( int_awsize ), - .port1_addr_valid ( int_awvalid & ~aw_in_stall ), // avoid the FSM accepting new AW requests - .port1_type ( {N_PORTS{1'b1}} ), - .port1_user ( int_awuser ), - .port1_sent ( int_wtrans_sent ), // signal done to L1 FSM - .port1_out_addr ( int_wtrans_addr ), - .port1_cache_coherent ( int_wtrans_cache_coherent ), - .port1_accept ( int_wtrans_accept ), - .port1_drop ( int_wtrans_drop ), - .port1_miss ( int_wtrans_miss ), - - // Read Req IF - .port2_addr ( int_araddr ), - .port2_id ( int_arid ), - .port2_len ( int_arlen ), - .port2_size ( int_arsize ), - .port2_addr_valid ( int_arvalid ), - .port2_type ( {N_PORTS{1'b0}} ), - .port2_user ( int_aruser ), - .port2_sent ( int_rtrans_sent ), // signal done to L1 FSM - .port2_out_addr ( int_rtrans_addr ), - .port2_cache_coherent ( int_rtrans_cache_coherent ), - .port2_accept ( int_rtrans_accept ), - .port2_drop ( int_rtrans_drop ), - .port2_miss ( int_rtrans_miss ), - - // L2 miss info inputs -> axi_rab_cfg - .miss_l2_i ( L2Miss_S ), - .miss_l2_addr_i ( L2OutInAddr_DP ), - .miss_l2_id_i ( L2OutId_DP ), - .miss_l2_user_i ( L2OutUser_DP ), - - // L2 config outputs - .wdata_l2_o ( L2CfgWData_D ), - .waddr_l2_o ( L2CfgWAddr_D ), - .wren_l2_o ( L2CfgWE_S ) - ); - - // }}} - - // AX SPLITS {{{ - // █████╗ ██╗ ██╗ ███████╗██████╗ ██╗ ██╗████████╗ - // ██╔══██╗╚██╗██╔╝ ██╔════╝██╔══██╗██║ ██║╚══██╔══╝ - // ███████║ ╚███╔╝ ███████╗██████╔╝██║ ██║ ██║ - // ██╔══██║ ██╔██╗ ╚════██║██╔═══╝ ██║ ██║ ██║ - // ██║ ██║██╔╝ ██╗ ███████║██║ ███████╗██║ ██║ - // ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚══════╝╚═╝ ╚═╝ - // - /** - * Multiplex the two output master ports of the Read Address and Write Address (AR/AW) channels. - * - * Use the `int_xmaster_select` signal to route the signals to either Master 0 (to memory) or - * Master 1 (to ACP). In case of an L1 miss: Route the signals to both masters. They shall be - * saved until the L2 outputs are available. - */ - generate for (i = 0; i < N_PORTS; i++) begin : AX_SPLIT - - /* - * When accepting L1 transactions, we must just do so on the selected master. Drop requests must - * be performed on any one of the two masters. Save requests must be performed by both masters. - */ - always_comb begin : AW_L1_SPLIT - - // TLB handshake - l1_m0_aw_accept[i] = 1'b0; - l1_m1_aw_accept[i] = 1'b0; - l1_m0_aw_drop[i] = 1'b0; - l1_m1_aw_drop[i] = 1'b0; - l1_m0_aw_save[i] = 1'b0; - l1_m1_aw_save[i] = 1'b0; - - l1_mx_aw_done[i] = 1'b0; - - // AXI sender input handshake - int_m0_awvalid[i] = 1'b0; - int_m1_awvalid[i] = 1'b0; - int_awready[i] = 1'b0; - - // accept on selected master only - if (l1_aw_accept[i]) begin - if (int_wmaster_select[i]) begin - l1_m1_aw_accept[i] = 1'b1; - l1_mx_aw_done[i] = l1_m1_aw_done[i]; - - int_m1_awvalid[i] = int_awvalid[i]; - int_awready[i] = int_m1_awready[i]; - - end else begin - l1_m0_aw_accept[i] = 1'b1; - l1_mx_aw_done[i] = l1_m0_aw_done[i]; - - int_m0_awvalid[i] = int_awvalid[i]; - int_awready[i] = int_m0_awready[i]; - end - - // drop on Master 0 only - end else if (l1_aw_drop[i]) begin - l1_m0_aw_drop[i] = 1'b1; - l1_mx_aw_done[i] = l1_m0_aw_done[i]; - - int_m0_awvalid[i] = int_awvalid[i]; - int_awready[i] = l1_m0_aw_done[i]; - - // save on both masters - end else if (l1_aw_save[i]) begin - // split save - l1_m0_aw_save[i] = ~l1_m0_aw_done_SP[i]; - l1_m1_aw_save[i] = ~l1_m1_aw_done_SP[i]; - - // combine done - l1_mx_aw_done[i] = l1_m0_aw_done_SP[i] & l1_m1_aw_done_SP[i]; - - int_m0_awvalid[i] = int_awvalid[i]; - int_m1_awvalid[i] = int_awvalid[i]; - int_awready[i] = l1_mx_aw_done[i]; - end - end - - // signal back to handshake splitter - assign l1_aw_done[i] = l1_mx_aw_done[i]; - - always_ff @(posedge Clk_CI) begin : L1_MX_AW_DONE_REG - if (Rst_RBI == 0) begin - l1_m0_aw_done_SP[i] <= 1'b0; - l1_m1_aw_done_SP[i] <= 1'b0; - end else if (l1_mx_aw_done[i]) begin - l1_m0_aw_done_SP[i] <= 1'b0; - l1_m1_aw_done_SP[i] <= 1'b0; - end else begin - l1_m0_aw_done_SP[i] <= l1_m0_aw_done_SP[i] | l1_m0_aw_done[i]; - l1_m1_aw_done_SP[i] <= l1_m1_aw_done_SP[i] | l1_m1_aw_done[i]; - end - end - - /* - * When accepting L2 transactions, we must drop the corresponding transaction from the other - * master to make it available again for save requests from L1_DROP_SAVE. - */ - always_comb begin : AW_L2_SPLIT - - l2_m0_aw_accept[i] = 1'b0; - l2_m1_aw_accept[i] = 1'b0; - l2_m0_aw_drop[i] = 1'b0; - l2_m1_aw_drop[i] = 1'b0; - - // de-assert request signals individually upon handshakes - if (l2_aw_accept[i]) begin - if (l2_master_select[i]) begin - l2_m1_aw_accept[i] = ~l2_m1_aw_done_SP[i]; - l2_m0_aw_drop[i] = ~l2_m0_aw_done_SP[i]; - - end else begin - l2_m0_aw_accept[i] = ~l2_m0_aw_done_SP[i]; - l2_m1_aw_drop[i] = ~l2_m1_aw_done_SP[i]; - - end - end else begin - l2_m0_aw_drop[i] = ~l2_m0_aw_done_SP[i] ? l2_aw_drop[i] : 1'b0; - l2_m1_aw_drop[i] = ~l2_m1_aw_done_SP[i] ? l2_aw_drop[i] : 1'b0; - - end - - // combine done - l2_mx_aw_done[i] = l2_m0_aw_done_SP[i] & l2_m1_aw_done_SP[i]; - - l2_aw_done[i] = l2_mx_aw_done[i]; - end - - always_ff @(posedge Clk_CI) begin : L2_MX_AW_DONE_REG - if (Rst_RBI == 0) begin - l2_m0_aw_done_SP[i] <= 1'b0; - l2_m1_aw_done_SP[i] <= 1'b0; - end else if (l2_mx_aw_done[i]) begin - l2_m0_aw_done_SP[i] <= 1'b0; - l2_m1_aw_done_SP[i] <= 1'b0; - end else begin - l2_m0_aw_done_SP[i] <= l2_m0_aw_done_SP[i] | l2_m0_aw_done[i]; - l2_m1_aw_done_SP[i] <= l2_m1_aw_done_SP[i] | l2_m1_aw_done[i]; - end - end - - /* - * When accepting L1 transactions, we must just do so on the selected master. Drop requests must - * be performed on any one of the two masters. Save requests must be performed by both masters. - */ - always_comb begin : AR_L1_SPLIT - - // TLB handshake - l1_m0_ar_accept[i] = 1'b0; - l1_m1_ar_accept[i] = 1'b0; - l1_m0_ar_drop[i] = 1'b0; - l1_m1_ar_drop[i] = 1'b0; - l1_m0_ar_save[i] = 1'b0; - l1_m1_ar_save[i] = 1'b0; - - l1_mx_ar_done[i] = 1'b0; - - // AXI sender input handshake - int_m0_arvalid[i] = 1'b0; - int_m1_arvalid[i] = 1'b0; - int_arready[i] = 1'b0; - - // accept on selected master only - if (l1_ar_accept[i]) begin - if (int_rmaster_select[i]) begin - l1_m1_ar_accept[i] = 1'b1; - l1_mx_ar_done[i] = l1_m1_ar_done[i]; - - int_m1_arvalid[i] = int_arvalid[i]; - int_arready[i] = int_m1_arready[i]; - - end else begin - l1_m0_ar_accept[i] = 1'b1; - l1_mx_ar_done[i] = l1_m0_ar_done[i]; - - int_m0_arvalid[i] = int_arvalid[i]; - int_arready[i] = int_m0_arready[i]; - end - - // drop on Master 0 only - end else if (l1_ar_drop[i]) begin - l1_m0_ar_drop[i] = 1'b1; - l1_mx_ar_done[i] = l1_m0_ar_done[i]; - - int_m0_arvalid[i] = int_arvalid[i]; - int_arready[i] = l1_m0_ar_done[i]; - - // save on both masters - end else if (l1_ar_save[i]) begin - // split save - l1_m0_ar_save[i] = ~l1_m0_ar_done_SP[i]; - l1_m1_ar_save[i] = ~l1_m1_ar_done_SP[i]; - - // combine done - l1_mx_ar_done[i] = l1_m0_ar_done_SP[i] & l1_m1_ar_done_SP[i]; - - int_m0_arvalid[i] = int_arvalid[i]; - int_m1_arvalid[i] = int_arvalid[i]; - int_arready[i] = l1_mx_ar_done[i]; - end - end - - // signal back to handshake splitter - assign l1_ar_done[i] = l1_mx_ar_done[i]; - - always_ff @(posedge Clk_CI) begin : L1_MX_AR_DONE_REG - if (Rst_RBI == 0) begin - l1_m0_ar_done_SP[i] <= 1'b0; - l1_m1_ar_done_SP[i] <= 1'b0; - end else if (l1_mx_ar_done[i]) begin - l1_m0_ar_done_SP[i] <= 1'b0; - l1_m1_ar_done_SP[i] <= 1'b0; - end else begin - l1_m0_ar_done_SP[i] <= l1_m0_ar_done_SP[i] | l1_m0_ar_done[i]; - l1_m1_ar_done_SP[i] <= l1_m1_ar_done_SP[i] | l1_m1_ar_done[i]; - end - end - - /* - * When accepting L2 transactions, we must drop the corresponding transaction from the other - * master to make it available again for save requests from L1_DROP_SAVE. - */ - always_comb begin : AR_L2_SPLIT - - l2_m0_ar_accept[i] = 1'b0; - l2_m1_ar_accept[i] = 1'b0; - l2_m0_ar_drop[i] = 1'b0; - l2_m1_ar_drop[i] = 1'b0; - - // de-assert request signals individually upon handshakes - if (l2_ar_accept[i]) begin - if (l2_master_select[i]) begin - l2_m1_ar_accept[i] = ~l2_m1_ar_done_SP[i]; - l2_m0_ar_drop[i] = ~l2_m0_ar_done_SP[i]; - - end else begin - l2_m0_ar_accept[i] = ~l2_m0_ar_done_SP[i]; - l2_m1_ar_drop[i] = ~l2_m1_ar_done_SP[i]; - - end - end else if (l2_ar_drop[i]) begin - l2_m0_ar_drop[i] = ~l2_m0_ar_done_SP[i] ? l2_ar_drop[i] : 1'b0; - l2_m1_ar_drop[i] = ~l2_m1_ar_done_SP[i] ? l2_ar_drop[i] : 1'b0; - - end - - // combine done - l2_mx_ar_done[i] = l2_m0_ar_done_SP[i] & l2_m1_ar_done_SP[i]; - - l2_ar_done[i] = l2_mx_ar_done[i]; - end - - always_ff @(posedge Clk_CI) begin : L2_MX_AR_DONE_REG - if (Rst_RBI == 0) begin - l2_m0_ar_done_SP[i] <= 1'b0; - l2_m1_ar_done_SP[i] <= 1'b0; - end else if (l2_mx_ar_done[i]) begin - l2_m0_ar_done_SP[i] <= 1'b0; - l2_m1_ar_done_SP[i] <= 1'b0; - end else begin - l2_m0_ar_done_SP[i] <= l2_m0_ar_done_SP[i] | l2_m0_ar_done[i]; - l2_m1_ar_done_SP[i] <= l2_m1_ar_done_SP[i] | l2_m1_ar_done[i]; - end - end - - end // AX_SPLIT - endgenerate // AX_SPLIT - - // }}} - - // HANDSHAKE SPLITS {{{ - // ██╗ ██╗███████╗ ███████╗██████╗ ██╗ ██╗████████╗ - // ██║ ██║██╔════╝ ██╔════╝██╔══██╗██║ ██║╚══██╔══╝ - // ███████║███████╗ ███████╗██████╔╝██║ ██║ ██║ - // ██╔══██║╚════██║ ╚════██║██╔═══╝ ██║ ██║ ██║ - // ██║ ██║███████║ ███████║██║ ███████╗██║ ██║ - // ╚═╝ ╚═╝╚══════╝ ╚══════╝╚═╝ ╚══════╝╚═╝ ╚═╝ - // - /* - * We need to perform combined handshakes with multiple AXI modules - * upon transactions drops, accepts, saves etc. from two TLBs. - */ - generate for (i = 0; i < N_PORTS; i++) begin : HANDSHAKE_SPLIT - - assign l1_xw_accept[i] = int_wtrans_accept[i] & ~aw_out_stall[i]; - assign int_wtrans_sent[i] = l1_xw_done[i]; - - assign l1_ar_accept[i] = int_rtrans_accept[i]; - assign int_rtrans_sent[i] = l1_ar_done[i]; - - /* - * L1 AW sender + W buffer handshake split - */ - // forward - assign l1_aw_accept[i] = l1_xw_accept[i] & ~l1_aw_done_SP[i]; - assign l1_w_accept[i] = l1_xw_accept[i] & ~l1_w_done_SP[i]; - - assign l1_aw_save[i] = l1_xw_save[i] & ~l1_aw_done_SP[i]; - assign l1_w_save[i] = l1_xw_save[i] & ~l1_w_done_SP[i]; - - assign l1_aw_drop[i] = l1_xw_drop[i] & ~l1_aw_done_SP[i]; - assign l1_w_drop[i] = l1_xw_drop[i] & ~l1_w_done_SP[i]; - - // backward - assign l1_xw_done[i] = l1_aw_done_SP[i] & l1_w_done_SP[i]; - - always_ff @(posedge Clk_CI) begin : L1_XW_HS_SPLIT - if (Rst_RBI == 0) begin - l1_aw_done_SP[i] <= 1'b0; - l1_w_done_SP[i] <= 1'b0; - end else if (l1_xw_done[i]) begin - l1_aw_done_SP[i] <= 1'b0; - l1_w_done_SP[i] <= 1'b0; - end else begin - l1_aw_done_SP[i] <= l1_aw_done_SP[i] | l1_aw_done[i]; - l1_w_done_SP[i] <= l1_w_done_SP[i] | l1_w_done[i]; - end - end - - if (ENABLE_L2TLB[i] == 1) begin : L2_HS_SPLIT - - /* - * L1 AR sender + R sender handshake split - * - * AR and R do not need to be strictly in sync. We thus use separate handshakes. - * But the handshake signals for the R sender are multiplexed with the those for - * the L2. However, L2_ACCEPT_DROP_SAVE has always higher priority. - */ - assign lx_r_drop[i] = l2_r_drop[i] | l1_r_drop[i]; - assign l1_r_done[i] = l2_r_drop[i] ? 1'b0 : lx_r_done[i]; - assign l2_r_done[i] = l2_r_drop[i] ? lx_r_done[i] : 1'b0; - - /* - * L2 AW sender + W buffer handshake split - */ - // forward - assign l2_aw_accept[i] = l2_xw_accept[i] & ~l2_aw_done_SP[i]; - assign l2_w_accept[i] = l2_xw_accept[i] & ~l2_w_done_SP[i]; - - assign l2_aw_drop[i] = l2_xw_drop[i] & ~l2_aw_done_SP[i]; - assign l2_w_drop[i] = l2_xw_drop[i] & ~l2_w_done_SP[i]; - - // backward - assign l2_xw_done[i] = l2_aw_done_SP[i] & l2_w_done_SP[i]; - - always_ff @(posedge Clk_CI) begin : L2_XW_HS_SPLIT - if (Rst_RBI == 0) begin - l2_aw_done_SP[i] <= 1'b0; - l2_w_done_SP[i] <= 1'b0; - end else if (l2_xw_done[i]) begin - l2_aw_done_SP[i] <= 1'b0; - l2_w_done_SP[i] <= 1'b0; - end else begin - l2_aw_done_SP[i] <= l2_aw_done_SP[i] | l2_aw_done[i]; - l2_w_done_SP[i] <= l2_w_done_SP[i] | l2_w_done[i]; - end - end - - /* - * L2 AR + R sender handshake split - */ - // forward - assign l2_ar_drop[i] = l2_xr_drop[i] & ~l2_ar_done_SP[i]; - assign l2_r_drop[i] = l2_xr_drop[i] & ~l2_r_done_SP[i]; - - // backward - make sure to always clear L2_XR_HS_SPLIT - always_comb begin - if (l2_xr_drop[i]) begin - l2_xr_done[i] = l2_ar_done_SP[i] & l2_r_done_SP[i]; - end else begin - l2_xr_done[i] = l2_ar_done_SP[i]; - end - end - - always_ff @(posedge Clk_CI) begin : L2_XR_HS_SPLIT - if (Rst_RBI == 0) begin - l2_ar_done_SP[i] <= 1'b0; - l2_r_done_SP[i] <= 1'b0; - end else if (l2_xr_done[i]) begin - l2_ar_done_SP[i] <= 1'b0; - l2_r_done_SP[i] <= 1'b0; - end else begin - l2_ar_done_SP[i] <= l2_ar_done_SP[i] | l2_ar_done[i]; - l2_r_done_SP[i] <= l2_r_done_SP[i] | l2_r_done[i]; - end - end - - end else begin // if (ENABLE_L2TLB[i] == 1) - - assign lx_r_drop[i] = l1_r_drop[i]; - assign l1_r_done[i] = lx_r_done[i]; - - assign l2_aw_accept[i] = 1'b0; - assign l2_w_accept[i] = 1'b0; - assign l2_aw_drop[i] = 1'b0; - assign l2_w_drop[i] = 1'b0; - assign l2_xw_done[i] = 1'b0; - assign l2_aw_done_SP[i] = 1'b0; - assign l2_w_done_SP[i] = 1'b0; - - assign l2_ar_accept[i] = 1'b0; - assign l2_ar_drop[i] = 1'b0; - assign l2_r_drop[i] = 1'b0; - assign l2_xr_done[i] = 1'b0; - assign l2_r_done[i] = 1'b0; - assign l2_ar_done_SP[i] = 1'b0; - assign l2_r_done_SP[i] = 1'b0; - - end // if (ENABLE_L2TLB[i] == 1) - - end // HANDSHAKE_SPLIT - endgenerate // HANDSHAKE_SPLIT - - // }}} - - // L2 TLB {{{ - // ██╗ ██████╗ ████████╗██╗ ██████╗ - // ██║ ╚════██╗ ╚══██╔══╝██║ ██╔══██╗ - // ██║ █████╔╝ ██║ ██║ ██████╔╝ - // ██║ ██╔═══╝ ██║ ██║ ██╔══██╗ - // ███████╗███████╗ ██║ ███████╗██████╔╝ - // ╚══════╝╚══════╝ ╚═╝ ╚══════╝╚═════╝ - // - /* - * l2_tlb - * - * The L2 TLB translates addresses upon misses in the L1 TLB (rab_core). - * - * It supports one ongoing translation at a time. If an L1 miss occurs while the L2 is busy, - * the L1 is stalled untill the L2 is available again. - * - */ - generate for (i = 0; i < N_PORTS; i++) begin : L2_TLB - if (ENABLE_L2TLB[i] == 1) begin : L2_TLB - - /* - * L1 output selector - */ - assign L1OutRwType_D[i] = int_wtrans_drop[i] ? 1'b1 : 1'b0; - assign L1OutProt_D[i] = rab_prot[i]; - assign L1OutMulti_D[i] = rab_multi[i]; - - /* - * L1 output control + L1_DROP_BUF, L2_IN_BUF management - * - * Forward the L1 drop request to AR/AW sender modules if - * 1. the transactions needs to be dropped (L1 multi, prot, prefetch), or - * 2. if a lookup in the L2 TLB is required (L1 miss) and the input buffer is not full. - * - * The AR/AW senders do not support more than 1 oustanding L1 miss. The push back towards - * the upstream is realized by not accepting the save request (saving the L1 transaction) - * in the senders as long as the L2 TLB is busy or has valid output. This ultimately - * blocks the L1 TLB. - * - * Together with the AW drop/save, we also perform the W drop/save as AW and W need to - * absolutely remain in order. In contrast, the R drop is performed - */ - always_comb begin : L1_DROP_SAVE - - l1_ar_drop[i] = 1'b0; - l1_ar_save[i] = 1'b0; - l1_xw_drop[i] = 1'b0; - l1_xw_save[i] = 1'b0; - - l1_id_drop[i] = L1OutId_D[i]; - l1_len_drop[i] = L1OutLen_D[i]; - l1_prefetch_drop[i] = rab_prefetch[i]; - l1_hit_drop[i] = 1'b1; // there are no drops for L1 misses - - L1DropEn_S[i] = 1'b0; - L2InEn_S[i] = 1'b0; - - if ( rab_prot[i] | rab_multi[i] | rab_prefetch[i] ) begin - // 1. Drop - l1_ar_drop[i] = int_rtrans_drop[i] & ~L1DropValid_SP[i]; - l1_xw_drop[i] = int_wtrans_drop[i] & ~L1DropValid_SP[i]; - - // Store to L1_DROP_BUF upon handshake - L1DropEn_S[i] = (l1_ar_drop[i] & l1_ar_done[i]) | - (l1_xw_drop[i] & l1_xw_done[i]); - - end else if ( rab_miss[i] ) begin - // 2. Save - Make sure L2 is really available. - l1_ar_save[i] = int_rtrans_drop[i] & ~L2Busy_S[i]; - l1_xw_save[i] = int_wtrans_drop[i] & ~L2Busy_S[i]; - - // Store to L2_IN_BUF upon handshake - triggers the L2 TLB - L2InEn_S[i] = (l1_ar_save[i] & l1_ar_done[i]) | - (l1_xw_save[i] & l1_xw_done[i]); - end - end - - /* - * L2 output control + L2_OUT_BUF management + R/B sender control + W buffer control - * - * Perform L1 R transaction drops unless the L2 output buffer holds valid data. The AXI specs - * require the B response to be sent only after consuming/discarding the corresponding data - * in the W channel. Thus, we only send L2 drop request to the W buffer here. The drop - * request to the B sender is then sent by the W buffer autonomously. - * - * L1 AW/W drop requests are managed by L1_DROP_SAVE. - */ - always_comb begin : L2_ACCEPT_DROP_SAVE - - l2_ar_addr[i] = 'b0; - l2_aw_addr[i] = 'b0; - l2_ar_accept[i] = 1'b0; - l2_xr_drop[i] = 1'b0; - l2_xw_accept[i] = 1'b0; - l2_xw_drop[i] = 1'b0; - - l1_r_drop[i] = 1'b0; - - lx_id_drop[i] = 'b0; - lx_len_drop[i] = 'b0; - lx_prefetch_drop[i] = 1'b0; - lx_hit_drop[i] = 1'b0; - - L1DropValid_SN[i] = L1DropValid_SP[i] | L1DropEn_S[i]; - L2OutValid_SN[i] = L2OutValid_SP[i]; - L2OutReady_S[i] = 1'b0; - L2OutEn_S[i] = 1'b0; - - L2Miss_S[i] = 1'b0; - int_multi[i] = 1'b0; - int_prot[i] = 1'b0; - - if (L2OutValid_SP[i] == 1'b0) begin - - // Drop L1 from R senders - if (L1DropValid_SP[i] == 1'b1) begin - - // Only perform the R sender drop here. - if (~L1DropRwType_DP[i]) begin - - l1_r_drop[i] = 1'b1; - lx_id_drop[i] = L1DropId_DP[i]; - lx_len_drop[i] = L1DropLen_DP[i]; - lx_prefetch_drop[i] = L1DropPrefetch_S[i]; - lx_hit_drop[i] = 1'b1; // there are no drops for L1 misses - - // Invalidate L1_DROP_BUF upon handshake - if ( l1_r_drop[i] & l1_r_done[i] ) begin - - L1DropValid_SN[i] = 1'b0; - int_prot[i] = L1DropProt_DP[i]; - int_multi[i] = L1DropMulti_DP[i]; - end - - end else begin - // Invalidate L1_DROP_BUF - L1DropValid_SN[i] = 1'b0; - int_prot[i] = L1DropProt_DP[i]; - int_multi[i] = L1DropMulti_DP[i]; - end - end - - end else begin // L2_OUT_BUF has valid data - - if ( L2OutHit_SP[i] & ~(L2OutPrefetch_S[i] | L2OutProt_SP[i] | L2OutMulti_SP[i]) ) begin - - l2_ar_addr[i] = L2OutAddr_DP[i]; - l2_aw_addr[i] = L2OutAddr_DP[i]; - - l2_ar_accept[i] = L2OutRwType_DP[i] ? 1'b0 : 1'b1; - l2_xw_accept[i] = L2OutRwType_DP[i] ? 1'b1 : 1'b0; - - // Invalidate L2_OUT_BUF upon handshake - L2OutValid_SN[i] = ~( (l2_ar_accept[i] & l2_ar_done[i]) | - (l2_xw_accept[i] & l2_xw_done[i]) ); - end else begin - - lx_id_drop[i] = L2OutId_DP[i]; - lx_len_drop[i] = L2OutLen_DP[i]; - lx_prefetch_drop[i] = L2OutPrefetch_S[i]; - lx_hit_drop[i] = L2OutHit_SP[i]; - - // The l2_xr_drop will also perform the handshake with the R sender - l2_xr_drop[i] = L2OutRwType_DP[i] ? 1'b0 : 1'b1; - l2_xw_drop[i] = L2OutRwType_DP[i] ? 1'b1 : 1'b0; - - // Invalidate L1_DROP_BUF upon handshake - if ( (l2_xr_drop[i] & l2_xr_done[i]) | (l2_xw_drop[i] & l2_xw_done[i]) ) begin - - L2OutValid_SN[i] = 1'b0; - L2Miss_S[i] = ~L2OutHit_SP[i]; - int_prot[i] = L2OutProt_SP[i]; - int_multi[i] = L2OutMulti_SP[i]; - end - end - end - - // Only accept new L2 output after ongoing drops have finished. - if ( (l2_xr_drop[i] == l2_xr_done[i]) & - (l2_xw_drop[i] == l2_xw_done[i]) & - (l1_r_drop[i] == l1_r_done[i] ) ) begin - // Store to L2_OUT_BUF upon handshake with L2 TLB module - if ( (L2OutValid_SP[i] == 1'b0) && (L2OutValid_S[i] == 1'b1) ) begin - L2OutValid_SN[i] = 1'b1; - L2OutReady_S[i] = 1'b1; - L2OutEn_S[i] = 1'b1; - end - end - end - - /* - * L1 drop buffer - * - * Used in case of multi, prot and prefetch hits in the L1 TLB. - */ - always_ff @(posedge Clk_CI) begin : L1_DROP_BUF - if (Rst_RBI == 0) begin - L1DropProt_DP[i] <= 1'b0; - L1DropMulti_DP[i] <= 1'b0; - L1DropRwType_DP[i] <= 1'b0; - L1DropUser_DP[i] <= 'b0; - L1DropId_DP[i] <= 'b0; - L1DropLen_DP[i] <= 'b0; - L1DropAddr_DP[i] <= 'b0; - end else if (L1DropEn_S[i] == 1'b1) begin - L1DropProt_DP[i] <= L1OutProt_D[i] ; - L1DropMulti_DP[i] <= L1OutMulti_D[i] ; - L1DropRwType_DP[i] <= L1OutRwType_D[i]; - L1DropUser_DP[i] <= L1OutUser_D[i] ; - L1DropId_DP[i] <= L1OutId_D[i] ; - L1DropLen_DP[i] <= L1OutLen_D[i] ; - L1DropAddr_DP[i] <= L1OutAddr_D[i] ; - end - end // always_ff @ (posedge Clk_CI) - - /* - * L2 input buffer - * - * Make sure there are no combinational paths between L1 TLB/inputs and L2 TLB. - */ - always_ff @(posedge Clk_CI) begin : L2_IN_BUF - if (Rst_RBI == 0) begin - L2InRwType_DP[i] <= 1'b0; - L2InUser_DP[i] <= 'b0; - L2InId_DP[i] <= 'b0; - L2InLen_DP[i] <= 'b0; - L2InAddr_DP[i] <= 'b0; - end else if (L2InEn_S[i] == 1'b1) begin - L2InRwType_DP[i] <= L1OutRwType_D[i]; - L2InUser_DP[i] <= L1OutUser_D[i] ; - L2InId_DP[i] <= L1OutId_D[i] ; - L2InLen_DP[i] <= L1OutLen_D[i] ; - L2InAddr_DP[i] <= L1OutAddr_D[i] ; - end - end // always_ff @ (posedge Clk_CI) - - l2_tlb - #( - .AXI_S_ADDR_WIDTH ( AXI_S_ADDR_WIDTH ), - .AXI_M_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), - .AXI_LITE_DATA_WIDTH ( AXI_LITE_DATA_WIDTH ), - .AXI_LITE_ADDR_WIDTH ( AXI_LITE_ADDR_WIDTH ), - .N_SETS ( `RAB_L2_N_SETS ), - .N_OFFSETS ( `RAB_L2_N_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS ), - .N_PAR_VA_RAMS ( `RAB_L2_N_PAR_VA_RAMS ), - .HIT_OFFSET_STORE_WIDTH ( log2(`RAB_L2_N_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS) ) - ) - u_l2_tlb - ( - .clk_i ( Clk_CI ), - .rst_ni ( Rst_RBI ), - - // Config inputs - .we_i ( L2CfgWE_S[i] ), - .waddr_i ( L2CfgWAddr_D[i] ), - .wdata_i ( L2CfgWData_D[i] ), - - // Request input - .start_i ( L2InEn_S[i] ), - .busy_o ( L2Busy_S[i] ), - .rw_type_i ( L2InRwType_DP[i] ), - .in_addr_i ( L2InAddr_DP[i] ), - - // Response output - .out_ready_i ( L2OutReady_S[i] ), - .out_valid_o ( L2OutValid_S[i] ), - .hit_o ( L2OutHit_SN[i] ), - .miss_o ( L2OutMiss_SN[i] ), - .prot_o ( L2OutProt_SN[i] ), - .multi_o ( L2OutMulti_SN[i] ), - .cache_coherent_o ( L2OutCC_SN[i] ), - .out_addr_o ( L2OutAddr_DN[i] ) - ); - - /* - * L2 output buffer - * - * Make sure there are no combinational paths between L1 TLB/inputs and L2 TLB. - */ - always_ff @(posedge Clk_CI) begin : L2_OUT_BUF - if (Rst_RBI == 0) begin - L2OutRwType_DP[i] <= 1'b0; - L2OutUser_DP[i] <= 'b0; - L2OutLen_DP[i] <= 'b0; - L2OutId_DP[i] <= 'b0; - L2OutInAddr_DP[i] <= 'b0; - - L2OutHit_SP[i] <= 1'b0; - L2OutMiss_SP[i] <= 1'b0; - L2OutProt_SP[i] <= 1'b0; - L2OutMulti_SP[i] <= 1'b0; - L2OutCC_SP[i] <= 1'b0; - L2OutAddr_DP[i] <= 'b0; - end else if (L2OutEn_S[i] == 1'b1) begin - L2OutRwType_DP[i] <= L2InRwType_DP[i]; - L2OutUser_DP[i] <= L2InUser_DP[i] ; - L2OutLen_DP[i] <= L2InLen_DP[i] ; - L2OutId_DP[i] <= L2InId_DP[i] ; - L2OutInAddr_DP[i] <= L2InAddr_DP[i] ; - - L2OutHit_SP[i] <= L2OutHit_SN[i] ; - L2OutMiss_SP[i] <= L2OutMiss_SN[i] ; - L2OutProt_SP[i] <= L2OutProt_SN[i] ; - L2OutMulti_SP[i] <= L2OutMulti_SN[i]; - L2OutCC_SP[i] <= L2OutCC_SN[i] ; - L2OutAddr_DP[i] <= L2OutAddr_DN[i] ; - end - end // always_ff @ (posedge Clk_CI) - - always_ff @(posedge Clk_CI) begin : BUF_VALID - if (Rst_RBI == 0) begin - L1DropValid_SP[i] = 1'b0; - L2OutValid_SP[i] = 1'b0; - end else begin - L1DropValid_SP[i] = L1DropValid_SN[i]; - L2OutValid_SP[i] = L2OutValid_SN[i]; - end - end - - always_comb begin : BUF_TO_PREFETCH - // L1 Drop Buf - if (L1DropUser_DP[i] == {AXI_USER_WIDTH{1'b1}}) - L1DropPrefetch_S[i] = 1'b1; - else - L1DropPrefetch_S[i] = 1'b0; - - // L2 Out Buf - if (L2OutUser_DP[i] == {AXI_USER_WIDTH{1'b1}}) - L2OutPrefetch_S[i] = 1'b1; - else - L2OutPrefetch_S[i] = 1'b0; - end - - assign l2_cache_coherent[i] = L2OutCC_SP[i]; - assign int_miss[i] = L2Miss_S[i]; - - end else begin : L2_TLB_STUB // if (ENABLE_L2TLB[i] == 1) - - assign l1_ar_drop[i] = int_rtrans_drop[i]; - assign l1_r_drop[i] = int_rtrans_drop[i]; - assign l1_xw_drop[i] = int_wtrans_drop[i]; - - assign l1_ar_save[i] = 1'b0; - assign l1_xw_save[i] = 1'b0; - assign l2_xw_accept[i] = 1'b0; - assign l2_xr_drop[i] = 1'b0; - assign l2_xw_drop[i] = 1'b0; - - assign l2_ar_addr[i] = 'b0; - assign l2_aw_addr[i] = 'b0; - - assign l1_id_drop[i] = int_wtrans_drop[i] ? int_awid[i] : - int_rtrans_drop[i] ? int_arid[i] : - '0; - assign l1_len_drop[i] = int_wtrans_drop[i] ? int_awlen[i] : - int_rtrans_drop[i] ? int_arlen[i] : - '0; - assign l1_prefetch_drop[i] = rab_prefetch[i]; - assign l1_hit_drop[i] = ~rab_miss[i]; - - assign lx_id_drop[i] = int_wtrans_drop[i] ? int_awid[i] : - int_rtrans_drop[i] ? int_arid[i] : - '0; - assign lx_len_drop[i] = int_wtrans_drop[i] ? int_awlen[i] : - int_rtrans_drop[i] ? int_arlen[i] : - '0; - assign lx_prefetch_drop[i] = rab_prefetch[i]; - assign lx_hit_drop[i] = ~rab_miss[i]; - - assign l2_cache_coherent[i] = 1'b0; - - assign int_miss[i] = rab_miss[i]; - assign int_prot[i] = rab_prot[i]; - assign int_multi[i] = rab_multi[i]; - - // unused signals - assign L2Miss_S[i] = 1'b0; - - assign L1OutRwType_D[i] = 1'b0; - assign L1OutProt_D[i] = 1'b0; - assign L1OutMulti_D[i] = 1'b0; - - assign L1DropRwType_DP[i] = 1'b0; - assign L1DropUser_DP[i] = 'b0; - assign L1DropId_DP[i] = 'b0; - assign L1DropLen_DP[i] = 'b0; - assign L1DropAddr_DP[i] = 'b0; - assign L1DropProt_DP[i] = 1'b0; - assign L1DropMulti_DP[i] = 1'b0; - - assign L1DropEn_S[i] = 1'b0; - assign L1DropPrefetch_S[i] = 1'b0; - assign L1DropValid_SN[i] = 1'b0; - assign L1DropValid_SP[i] = 1'b0; - - assign L2InRwType_DP[i] = 1'b0; - assign L2InUser_DP[i] = 'b0; - assign L2InId_DP[i] = 'b0; - assign L2InLen_DP[i] = 'b0; - assign L2InAddr_DP[i] = 'b0; - - assign L2InEn_S[i] = 1'b0; - - assign L2OutHit_SN[i] = 1'b0; - assign L2OutMiss_SN[i] = 1'b0; - assign L2OutProt_SN[i] = 1'b0; - assign L2OutMulti_SN[i] = 1'b0; - assign L2OutCC_SN[i] = 1'b0; - assign L2OutAddr_DN[i] = 'b0; - - assign L2OutRwType_DP[i] = 1'b0; - assign L2OutUser_DP[i] = 'b0; - assign L2OutId_DP[i] = 'b0; - assign L2OutLen_DP[i] = 'b0; - assign L2OutInAddr_DP[i] = 'b0; - assign L2OutHit_SP[i] = 1'b0; - assign L2OutMiss_SP[i] = 1'b0; - assign L2OutProt_SP[i] = 1'b0; - assign L2OutMulti_SP[i] = 1'b0; - assign L2OutCC_SP[i] = 1'b0; - assign L2OutAddr_DP[i] = 'b0; - - assign L2OutEn_S[i] = 1'b0; - assign L2OutPrefetch_S[i] = 1'b0; - assign L2Busy_S[i] = 1'b0; - assign L2OutValid_S[i] = 1'b0; - assign L2OutValid_SN[i] = 1'b0; - assign L2OutValid_SP[i] = 1'b0; - assign L2OutReady_S[i] = 1'b0; - - end // !`ifdef ENABLE_L2TLB - end // for (i = 0; i < N_PORTS; i++) - endgenerate - -// }}} -""" -# endmodule -# -# -# // vim: ts=2 sw=2 sts=2 et nosmartindent autoindent foldmethod=marker -# -# diff --git a/src/soc/iommu/axi_rab/check_ram.py b/src/soc/iommu/axi_rab/check_ram.py deleted file mode 100644 index 31bf32ea..00000000 --- a/src/soc/iommu/axi_rab/check_ram.py +++ /dev/null @@ -1,240 +0,0 @@ -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class check_ram(Elaboratable): - - def __init__(self): - self.clk_i = Signal() # input - self.rst_ni = Signal() # input - self.in_addr = Signal(ADDR_WIDTH) # input - self.rw_type = Signal() # input - self.ram_we = Signal() # input - self.port0_addr = Signal(1+ERROR p_expression_25) # input - self.port1_addr = Signal(1+ERROR p_expression_25) # input - self.ram_wdata = Signal(RAM_DATA_WIDTH) # input - self.output_sent = Signal() # input - self.output_valid = Signal() # input - self.offset_addr_d = Signal(OFFSET_WIDTH) # input - self.hit_addr = Signal(1+ERROR p_expression_25) # output - self.master = Signal() # output - self.hit = Signal() # output - self.multi_hit = Signal() # output - self.prot = Signal() # output - - def elaborate(self, platform=None): - m = Module() - return m - - -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# //import CfMath::log2; -# -# //`define MULTI_HIT_FULL_SET -# -# module check_ram -# //#( -# // parameter ADDR_WIDTH = 32, -# // parameter RAM_DATA_WIDTH = 32, -# // parameter PAGE_SIZE = 4096, // 4kB -# // parameter SET_WIDTH = 5, -# // parameter OFFSET_WIDTH = 4 -# // ) -# ( -# input logic clk_i, -# input logic rst_ni, -# input logic [ADDR_WIDTH-1:0] in_addr, -# input logic rw_type, // 1 => write, 0=> read -# input logic ram_we, -# input logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr, -# input logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port1_addr, -# input logic [RAM_DATA_WIDTH-1:0] ram_wdata, -# input logic output_sent, -# input logic output_valid, -# input logic [OFFSET_WIDTH-1:0] offset_addr_d, -# output logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr, -# output logic master, -# output logic hit, -# output logic multi_hit, -# output logic prot -# ); -# -""" #docstring_begin - - localparam IGNORE_LSB = log2(PAGE_SIZE); // 12 - - logic [RAM_DATA_WIDTH-1:0] port0_data_o, port1_data_o; // RAM read data outputs - logic port0_hit, port1_hit; // Ram output matches in_addr - - logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr_saved, port1_addr_saved; - - // Hit FSM Signals - typedef enum logic {SEARCH, HIT} hit_state_t; - hit_state_t hit_SP; // Hit FSM state - hit_state_t hit_SN; // Hit FSM next state - - // Multi Hit FSM signals -`ifdef MULTI_HIT_FULL_SET - typedef enum logic[1:0] {NO_HITS, ONE_HIT, MULTI_HIT} multi_state_t; - multi_state_t multi_SP; // Multi Hit FSM state - multi_state_t multi_SN; // Multi Hit FSM next state - - logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr_saved; - logic master_saved; -`endif - - //// --------------- Block RAM (Dual Port) -------------- //// - - // The outputs of the BRAMs are only valid if in the previous cycle: - // 1. the inputs were valid, and - // 2. the BRAM was not written to. - // Otherwise, the outputs must be ignored which is controlled by the output_valid signal. - // This signal is driven by the uppler level L2 TLB module. - ram_tp_no_change #( - .ADDR_WIDTH( SET_WIDTH+OFFSET_WIDTH+1 ), - .DATA_WIDTH( RAM_DATA_WIDTH ) - ) - ram_tp_no_change_0 - ( - .clk ( clk_i ), - .we ( ram_we ), - .addr0 ( port0_addr ), - .addr1 ( port1_addr ), - .d_i ( ram_wdata ), - .d0_o ( port0_data_o ), - .d1_o ( port1_data_o ) - ); - - //// Check Ram Outputs - assign port0_hit = (port0_data_o[0] == 1'b1) && (in_addr[ADDR_WIDTH-1: IGNORE_LSB] == port0_data_o[RAM_DATA_WIDTH-1:4]); - assign port1_hit = (port1_data_o[0] == 1'b1) && (in_addr[ADDR_WIDTH-1: IGNORE_LSB] == port1_data_o[RAM_DATA_WIDTH-1:4]); - //// ----------------------------------------------------- ///// - - //// ------------------- Check if Hit ------------------------ //// - // FSM - always_ff @(posedge clk_i) begin - if (rst_ni == 0) begin - hit_SP <= SEARCH; - end else begin - hit_SP <= hit_SN; - end - end - - always_ff @(posedge clk_i, negedge rst_ni) begin - if (!rst_ni) begin - port0_addr_saved <= '0; - port1_addr_saved <= '0; - end else begin - port0_addr_saved <= port0_addr; - port1_addr_saved <= port1_addr; - end - end - - always_comb begin - hit_SN = hit_SP; - hit = 1'b0; - hit_addr = 0; - master = 1'b0; - unique case(hit_SP) - SEARCH : - if (output_valid) - if (port0_hit || port1_hit) begin - hit_SN = HIT; - hit = 1'b1; - hit_addr = port0_hit ? {port0_addr_saved[SET_WIDTH+OFFSET_WIDTH:OFFSET_WIDTH], offset_addr_d} : - port1_hit ? {port1_addr_saved[SET_WIDTH+OFFSET_WIDTH:OFFSET_WIDTH], offset_addr_d} : - 0; - master = port0_hit ? port0_data_o[3] : - port1_hit ? port1_data_o[3] : - 1'b0; - end - - HIT : begin -`ifdef MULTI_HIT_FULL_SET // Since the search continues after the first hit, it needs to be saved to be accessed later. - hit = 1'b1; - hit_addr = hit_addr_saved; - master = master_saved; -`endif - if (output_sent) - hit_SN = SEARCH; - end - - default : begin - hit_SN = SEARCH; - end - endcase // case (hit_SP) - end // always_comb begin - - //// ------------------------------------------- //// - - assign prot = output_valid && port0_hit ? ((~port0_data_o[2] && rw_type) || (~port0_data_o[1] && ~rw_type)) : - output_valid && port1_hit ? ((~port1_data_o[2] && rw_type) || (~port1_data_o[1] && ~rw_type)) : - 1'b0; - - //// ------------------- Multi ------------------- //// -`ifdef MULTI_HIT_FULL_SET - - always_ff @(posedge clk_i) begin - if (rst_ni == 0) begin - hit_addr_saved <= 0; - master_saved <= 1'b0; - end else if (output_valid) begin - hit_addr_saved <= hit_addr; - master_saved <= master; - end - end - - // FSM - always_ff @(posedge clk_i) begin - if (rst_ni == 0) begin - multi_SP <= NO_HITS; - end else begin - multi_SP <= multi_SN; - end - end - - always_comb begin - multi_SN = multi_SP; - multi_hit = 1'b0; - unique case(multi_SP) - NO_HITS : - if(output_valid && (port0_hit && port1_hit)) begin - multi_SN = MULTI_HIT; - multi_hit = 1'b1; - end else if(output_valid && (port0_hit || port1_hit)) - multi_SN = ONE_HIT; - - ONE_HIT : - if(output_valid && (port0_hit || port1_hit)) begin - multi_SN = MULTI_HIT; - multi_hit = 1'b1; - end else if (output_sent) - multi_SN = NO_HITS; - - MULTI_HIT : begin - multi_hit = 1'b1; - if (output_sent) - multi_SN = NO_HITS; - end - - endcase // case (multi_SP) - end // always_comb begin - -`else // !`ifdef MULTI_HIT_FULL_SET - assign multi_hit = output_valid && port0_hit && port1_hit; -`endif // !`ifdef MULTI_HIT_FULL_SET - //// ------------------------------------------- //// -""" -# endmodule -# -# diff --git a/src/soc/iommu/axi_rab/coreconfig.py b/src/soc/iommu/axi_rab/coreconfig.py deleted file mode 100644 index 247d0ce3..00000000 --- a/src/soc/iommu/axi_rab/coreconfig.py +++ /dev/null @@ -1,6 +0,0 @@ -class CoreConfig: - def __init__(self): - self.N_SLICES = 16 - self.N_REGS = 4*self.N_SLICES - self.ADDR_WIDTH_PHYS = 40 - self.ADDR_WIDTH_VIRT = 32 diff --git a/src/soc/iommu/axi_rab/fsm.py b/src/soc/iommu/axi_rab/fsm.py deleted file mode 100644 index d64b1cb4..00000000 --- a/src/soc/iommu/axi_rab/fsm.py +++ /dev/null @@ -1,243 +0,0 @@ -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class fsm(Elaboratable): - - def __init__(self): - self.Clk_CI = Signal() # input - self.Rst_RBI = Signal() # input - self.port1_addr_valid_i = Signal() # input - self.port2_addr_valid_i = Signal() # input - self.port1_sent_i = Signal() # input - self.port2_sent_i = Signal() # input - self.select_i = Signal() # input - self.no_hit_i = Signal() # input - self.multi_hit_i = Signal() # input - self.no_prot_i = Signal() # input - self.prefetch_i = Signal() # input - self.out_addr_i = Signal(AXI_M_ADDR_WIDTH) # input - self.cache_coherent_i = Signal() # input - self.port1_accept_o = Signal() # output - self.port1_drop_o = Signal() # output - self.port1_miss_o = Signal() # output - self.port2_accept_o = Signal() # output - self.port2_drop_o = Signal() # output - self.port2_miss_o = Signal() # output - self.out_addr_o = Signal(AXI_M_ADDR_WIDTH) # output - self.cache_coherent_o = Signal() # output - self.miss_o = Signal() # output - self.multi_o = Signal() # output - self.prot_o = Signal() # output - self.prefetch_o = Signal() # output - self.in_addr_i = Signal(AXI_S_ADDR_WIDTH) # input - self.in_id_i = Signal(AXI_ID_WIDTH) # input - self.in_len_i = Signal(8) # input - self.in_user_i = Signal(AXI_USER_WIDTH) # input - self.in_addr_o = Signal(AXI_S_ADDR_WIDTH) # output - self.in_id_o = Signal(AXI_ID_WIDTH) # output - self.in_len_o = Signal(8) # output - self.in_user_o = Signal(AXI_USER_WIDTH) # output - - def elaborate(self, platform=None): - m = Module() - return m - - -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# //`timescale 1ns / 1ps -# -# module fsm -# #( -# parameter AXI_M_ADDR_WIDTH = 40, -# parameter AXI_S_ADDR_WIDTH = 32, -# parameter AXI_ID_WIDTH = 8, -# parameter AXI_USER_WIDTH = 6 -# ) -# ( -# input logic Clk_CI, -# input logic Rst_RBI, -# -# input logic port1_addr_valid_i, -# input logic port2_addr_valid_i, -# input logic port1_sent_i, -# input logic port2_sent_i, -# input logic select_i, -# input logic no_hit_i, -# input logic multi_hit_i, -# input logic no_prot_i, -# input logic prefetch_i, -# input logic [AXI_M_ADDR_WIDTH-1:0] out_addr_i, -# input logic cache_coherent_i, -# output logic port1_accept_o, -# output logic port1_drop_o, -# output logic port1_miss_o, -# output logic port2_accept_o, -# output logic port2_drop_o, -# output logic port2_miss_o, -# output logic [AXI_M_ADDR_WIDTH-1:0] out_addr_o, -# output logic cache_coherent_o, -# output logic miss_o, -# output logic multi_o, -# output logic prot_o, -# output logic prefetch_o, -# input logic [AXI_S_ADDR_WIDTH-1:0] in_addr_i, -# input logic [AXI_ID_WIDTH-1:0] in_id_i, -# input logic [7:0] in_len_i, -# input logic [AXI_USER_WIDTH-1:0] in_user_i, -# output logic [AXI_S_ADDR_WIDTH-1:0] in_addr_o, -# output logic [AXI_ID_WIDTH-1:0] in_id_o, -# output logic [7:0] in_len_o, -# output logic [AXI_USER_WIDTH-1:0] in_user_o -# ); -# -""" #docstring_begin - - //-------------Internal Signals---------------------- - - typedef enum logic {IDLE, WAIT} state_t; - logic state_SP; // Present state - logic state_SN; // Next State - - logic port1_accept_SN; - logic port1_drop_SN; - logic port1_miss_SN; - logic port2_accept_SN; - logic port2_drop_SN; - logic port2_miss_SN; - logic miss_SN; - logic multi_SN; - logic prot_SN; - logic prefetch_SN; - logic cache_coherent_SN; - logic [AXI_M_ADDR_WIDTH-1:0] out_addr_DN; - - logic out_reg_en_S; - - //----------FSM comb------------------------------ - - always_comb begin: FSM_COMBO - state_SN = state_SP; - - port1_accept_SN = 1'b0; - port1_drop_SN = 1'b0; - port1_miss_SN = 1'b0; - port2_accept_SN = 1'b0; - port2_drop_SN = 1'b0; - port2_miss_SN = 1'b0; - miss_SN = 1'b0; - multi_SN = 1'b0; - prot_SN = 1'b0; - prefetch_SN = 1'b0; - cache_coherent_SN = 1'b0; - out_addr_DN = '0; - - out_reg_en_S = 1'b0; // by default hold register output - - unique case(state_SP) - IDLE : - if ( (port1_addr_valid_i & select_i) | (port2_addr_valid_i & ~select_i) ) begin - out_reg_en_S = 1'b1; - state_SN = WAIT; - - // Select inputs for output registers - if (port1_addr_valid_i & select_i) begin - port1_accept_SN = ~(no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i); - port1_drop_SN = (no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i); - port1_miss_SN = no_hit_i; - port2_accept_SN = 1'b0; - port2_drop_SN = 1'b0; - port2_miss_SN = 1'b0; - end else if (port2_addr_valid_i & ~select_i) begin - port1_accept_SN = 1'b0; - port1_drop_SN = 1'b0; - port1_miss_SN = 1'b0; - port2_accept_SN = ~(no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i); - port2_drop_SN = (no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i); - port2_miss_SN = no_hit_i; - end - - miss_SN = port1_miss_SN | port2_miss_SN; - multi_SN = multi_hit_i; - prot_SN = ~no_prot_i; - prefetch_SN = ~no_hit_i & prefetch_i; - - cache_coherent_SN = cache_coherent_i; - out_addr_DN = out_addr_i; - end - - WAIT : - if ( port1_sent_i | port2_sent_i ) begin - out_reg_en_S = 1'b1; // "clear" the register - state_SN = IDLE; - end - - default : begin - state_SN = IDLE; - end - endcase - end - - //----------FSM seq------------------------------- - - always_ff @(posedge Clk_CI, negedge Rst_RBI) begin: FSM_SEQ - if (Rst_RBI == 1'b0) - state_SP <= IDLE; - else - state_SP <= state_SN; - end - - //----------Output seq-------------------------- - - always_ff @(posedge Clk_CI, negedge Rst_RBI) begin: OUTPUT_SEQ - if (Rst_RBI == 1'b0) begin - port1_accept_o = 1'b0; - port1_drop_o = 1'b0; - port1_miss_o = 1'b0; - port2_accept_o = 1'b0; - port2_drop_o = 1'b0; - port2_miss_o = 1'b0; - miss_o = 1'b0; - multi_o = 1'b0; - prot_o = 1'b0; - prefetch_o = 1'b0; - cache_coherent_o = 1'b0; - out_addr_o = '0; - in_addr_o = '0; - in_id_o = '0; - in_len_o = '0; - in_user_o = '0; - end else if (out_reg_en_S == 1'b1) begin - port1_accept_o = port1_accept_SN; - port1_drop_o = port1_drop_SN; - port1_miss_o = port1_miss_SN; - port2_accept_o = port2_accept_SN; - port2_drop_o = port2_drop_SN; - port2_miss_o = port2_miss_SN; - miss_o = miss_SN; - multi_o = multi_SN; - prot_o = prot_SN; - prefetch_o = prefetch_SN; - cache_coherent_o = cache_coherent_SN; - out_addr_o = out_addr_DN; - in_addr_o = in_addr_i; - in_id_o = in_id_i; - in_len_o = in_len_i; - in_user_o = in_user_i; - end - end // block: OUTPUT_SEQ -""" -# -# endmodule -# -# diff --git a/src/soc/iommu/axi_rab/l2_tlb.py b/src/soc/iommu/axi_rab/l2_tlb.py deleted file mode 100644 index 11983f64..00000000 --- a/src/soc/iommu/axi_rab/l2_tlb.py +++ /dev/null @@ -1,550 +0,0 @@ -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable - - -class l2_tlb(Elaboratable): - - def __init__(self): - self.clk_i = Signal() # input - self.rst_ni = Signal() # input - self.we_i = Signal() # input - self.waddr_i = Signal(AXI_LITE_ADDR_WIDTH) # input - self.wdata_i = Signal(AXI_LITE_DATA_WIDTH) # input - self.start_i = Signal() # input - self.busy_o = Signal() # output - self.in_addr_i = Signal(AXI_S_ADDR_WIDTH) # input - self.rw_type_i = Signal() # input - self.out_ready_i = Signal() # input - self.out_valid_o = Signal() # output - self.hit_o = Signal() # output - self.miss_o = Signal() # output - self.prot_o = Signal() # output - self.multi_o = Signal() # output - self.cache_coherent_o = Signal() # output - self.out_addr_o = Signal(AXI_M_ADDR_WIDTH) # output - - def elaborate(self, platform=None): - m = Module() - return m - - -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# //`include "pulp_soc_defines.sv" -# -# ////import CfMath::log2; -# -# //`define MULTI_HIT_FULL_SET // Enable full multi hit detection. Always the entire set is searched. -# //`define MULTI_HIT_CUR_CYCLE // Enable partial multi hit detection. Only multi hits in the same search cycle are detected. -# -# //`ifdef MULTI_HIT_FULL_SET -# // `ifndef MULTI_HIT_CUR_CYCLE -# // `define MULTI_HIT_CUR_CYCLE -# // `endif -# //`endif -# -# module l2_tlb -# //#( -# // parameter AXI_S_ADDR_WIDTH = 32, -# // parameter AXI_M_ADDR_WIDTH = 40, -# // parameter AXI_LITE_DATA_WIDTH = 64, -# // parameter AXI_LITE_ADDR_WIDTH = 32, -# // parameter N_SETS = 32, -# // parameter N_OFFSETS = 4, //per port. There are 2 ports. -# // parameter PAGE_SIZE = 4096, // 4kB -# // parameter N_PAR_VA_RAMS = 4, -# // parameter HIT_OFFSET_STORE_WIDTH = 2 // Num of bits of VA RAM offset stored. This should not be greater than OFFSET_WIDTH -# // ) -# ( -# input logic clk_i, -# input logic rst_ni, -# -# input logic we_i, -# input logic [AXI_LITE_ADDR_WIDTH-1:0] waddr_i, -# input logic [AXI_LITE_DATA_WIDTH-1:0] wdata_i, -# -# input logic start_i, -# output logic busy_o, -# input logic [AXI_S_ADDR_WIDTH-1:0] in_addr_i, -# input logic rw_type_i, //1 => write, 0=> read -# -# input logic out_ready_i, -# output logic out_valid_o, -# output logic hit_o, -# output logic miss_o, -# output logic prot_o, -# output logic multi_o, -# output logic cache_coherent_o, -# output logic [AXI_M_ADDR_WIDTH-1:0] out_addr_o -# ); -# -""" #docstring_begin - - localparam VA_RAM_DEPTH = N_SETS * N_OFFSETS * 2; - localparam PA_RAM_DEPTH = VA_RAM_DEPTH * N_PAR_VA_RAMS; - localparam VA_RAM_ADDR_WIDTH = log2(VA_RAM_DEPTH); - localparam PA_RAM_ADDR_WIDTH = log2(PA_RAM_DEPTH); - localparam SET_WIDTH = log2(N_SETS); - localparam OFFSET_WIDTH = log2(N_OFFSETS); - localparam LL_WIDTH = log2(N_PAR_VA_RAMS); - localparam IGNORE_LSB = log2(PAGE_SIZE); - - localparam VA_RAM_DATA_WIDTH = AXI_S_ADDR_WIDTH - IGNORE_LSB + 4; - localparam PA_RAM_DATA_WIDTH = AXI_M_ADDR_WIDTH - IGNORE_LSB; - - logic [N_PAR_VA_RAMS-1:0] hit, prot, multi_hit, cache_coherent; - logic [N_PAR_VA_RAMS-1:0] ram_we; - logic last_search, last_search_next; - logic first_search, first_search_next; - logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] ram_waddr; - logic [N_PAR_VA_RAMS-1:0][SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr; - logic pa_ram_we; - logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_raddr, pa_port0_waddr; // PA RAM read, Write addr; - logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_raddr_reg_SN, pa_port0_raddr_reg_SP; // registered addresses, needed for WAIT_ON_WRITE; - logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_addr; // PA RAM addr - logic [PA_RAM_DATA_WIDTH-1:0] pa_port0_data, pa_data, pa_port0_data_reg; // PA RAM data - logic pa_ram_store_data_SN, pa_ram_store_data_SP; - logic hit_top, prot_top, multi_hit_top, first_hit_top; - logic output_sent; - int hit_block_num; - - logic searching, search_done; - logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr, port0_raddr; // VA RAM port0 addr - logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port1_addr; // VA RAM port1 addr - logic [OFFSET_WIDTH-1:0] offset_addr, offset_addr_d; - logic [OFFSET_WIDTH-1:0] offset_start_addr, offset_end_addr; - logic [SET_WIDTH-1:0] set_num; - - logic va_output_valid; - logic searching_q; - - genvar z; - - // Search FSM - typedef enum logic [1:0] {IDLE, SEARCH, DONE} search_state_t; - search_state_t search_SP; // Present state - search_state_t search_SN; // Next State - - // Output FSM - typedef enum logic [1:0] {OUT_IDLE, SEND_OUTPUT, WAIT_ON_WRITE} out_state_t; - out_state_t out_SP; // Present state - out_state_t out_SN; // Next State - - logic miss_next; - logic hit_next; - logic prot_next; - logic multi_next; - logic cache_coherent_next; - - // Generate the VA Block rams and their surrounding logic - generate - for (z = 0; z < N_PAR_VA_RAMS; z++) begin : VA_RAMS - check_ram - #( - .ADDR_WIDTH ( AXI_S_ADDR_WIDTH ), - .RAM_DATA_WIDTH ( VA_RAM_DATA_WIDTH ), - .PAGE_SIZE ( PAGE_SIZE ), - .SET_WIDTH ( SET_WIDTH ), - .OFFSET_WIDTH ( OFFSET_WIDTH ) - ) - u_check_ram - ( - .clk_i ( clk_i ), - .rst_ni ( rst_ni ), - .in_addr ( in_addr_i ), - .rw_type ( rw_type_i ), - .ram_we ( ram_we[z] ), - .port0_addr ( port0_addr ), - .port1_addr ( port1_addr ), - .ram_wdata ( wdata_i[VA_RAM_DATA_WIDTH-1:0] ), - .output_sent ( output_sent ), - .output_valid ( va_output_valid ), - .offset_addr_d ( offset_addr_d ), - .hit_addr ( hit_addr[z] ), - .master ( cache_coherent[z] ), - .hit ( hit[z] ), - .multi_hit ( multi_hit[z] ), - .prot ( prot[z] ) - ); - end // for (z = 0; z < N_PORTS; z++) - endgenerate - - ////////////////// ---------------- Control and Address --------------- //////////////////////// - // FSM - always_ff @(posedge clk_i) begin - if (rst_ni == 0) begin - search_SP <= IDLE; - end else begin - search_SP <= search_SN; - end - end - - always_comb begin : SEARCH_FSM - search_SN = search_SP; - busy_o = 1'b0; - searching = 1'b0; - search_done = 1'b0; - last_search_next = 1'b0; - first_search_next = first_search; - - unique case (search_SP) - IDLE : begin - if (start_i) begin - search_SN = SEARCH; - first_search_next = 1'b1; - end - end - - SEARCH : begin - busy_o = 1'b1; - - // detect last search cycle - if ( (first_search == 1'b0) && (offset_addr == offset_end_addr) ) - last_search_next = 1'b1; - - // pause search during VA RAM reconfigration - if (|ram_we) begin - searching = 1'b0; - end else begin - searching = 1'b1; - first_search_next = 1'b0; - end - - if (va_output_valid) begin - // stop search -`ifdef MULTI_HIT_FULL_SET - if (last_search | prot_top | multi_hit_top) begin -`else - if (last_search | prot_top | multi_hit_top | hit_top ) begin -`endif - search_SN = DONE; - search_done = 1'b1; - end - end - end - - DONE : begin - busy_o = 1'b1; - if (out_valid_o & out_ready_i) - search_SN = IDLE; - end - - default : begin - search_SN = IDLE; - end - endcase // case (prot_SP) - end // always_comb begin - - always_ff @(posedge clk_i) begin - if (rst_ni == 0) begin - last_search <= 1'b0; - first_search <= 1'b0; - end else begin - last_search <= last_search_next; - first_search <= first_search_next; - end - end - - /* - * VA RAM address generation - * - * The input address and set number, and thus the offset start address, are available in the - * cycle after the start signal. The buffered offset_addr becomes available one cycle later. - * During the first search cycle, we therefore directly use offset_addr_start for the lookup. - */ - assign set_num = in_addr_i[SET_WIDTH+IGNORE_LSB -1 : IGNORE_LSB]; - - assign port0_raddr[OFFSET_WIDTH] = 1'b0; - assign port1_addr [OFFSET_WIDTH] = 1'b1; - - assign port0_raddr[OFFSET_WIDTH-1:0] = first_search ? offset_start_addr : offset_addr; - assign port1_addr [OFFSET_WIDTH-1:0] = first_search ? offset_start_addr : offset_addr; - - assign port0_raddr[SET_WIDTH+OFFSET_WIDTH : OFFSET_WIDTH+1] = set_num; - assign port1_addr [SET_WIDTH+OFFSET_WIDTH : OFFSET_WIDTH+1] = set_num; - - assign port0_addr = ram_we ? ram_waddr : port0_raddr; - - // The outputs of the BRAMs are only valid if in the previous cycle: - // 1. the inputs were valid, and - // 2. the BRAMs were not written to. - // Otherwise, the outputs must be ignored. - always_ff @(posedge clk_i) begin - if (rst_ni == 0) begin - searching_q <= 1'b0; - end else begin - searching_q <= searching; - end - end - assign va_output_valid = searching_q; - - // Address offset for looking up the VA RAMs - always_ff @(posedge clk_i) begin - if (rst_ni == 0) begin - offset_addr <= 0; - end else if (first_search) begin - offset_addr <= offset_start_addr + 1'b1; - end else if (searching) begin - offset_addr <= offset_addr + 1'b1; - end - end - - // Delayed address offest for looking up the PA RAM upon a hit in the VA RAMs - always_ff @(posedge clk_i) begin - if (rst_ni == 0) begin - offset_addr_d <= 0; - end else if (first_search) begin - offset_addr_d <= offset_start_addr; - end else if (searching) begin - offset_addr_d <= offset_addr_d + 1'b1; - end - end - - // Store the offset addr for hit to reduce latency for next search. - generate - if (HIT_OFFSET_STORE_WIDTH > 0) begin : OFFSET_STORE -`ifndef MULTI_HIT_FULL_SET - logic [N_SETS-1:0][HIT_OFFSET_STORE_WIDTH-1:0] hit_offset_addr; // Contains offset addr for previous hit for every SET. - logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr_reg; - - assign offset_start_addr = { hit_offset_addr[set_num] , {{OFFSET_WIDTH-HIT_OFFSET_STORE_WIDTH}{1'b0}} }; - assign offset_end_addr = hit_offset_addr[set_num]-1'b1; - - // Register the hit addr - always_ff @(posedge clk_i) begin - if (rst_ni == 0) begin - hit_addr_reg <= 0; - end else if (hit_top) begin - hit_addr_reg <= hit_addr[hit_block_num]; - end - end - - // Store hit addr for each set. The next search in the same set will start from the saved addr. - always_ff @(posedge clk_i) begin - if (rst_ni == 0) begin - hit_offset_addr <= 0; - end else if (hit_o) begin - hit_offset_addr[set_num][HIT_OFFSET_STORE_WIDTH-1:0] <= hit_addr_reg[OFFSET_WIDTH-1 : (OFFSET_WIDTH - HIT_OFFSET_STORE_WIDTH)]; - end - end -`else // No need to store offset if full multi hit detection is enabled because the entire SET is searched. - assign offset_start_addr = 0; - assign offset_end_addr = {OFFSET_WIDTH{1'b1}}; -`endif - end else begin // if (HIT_OFFSET_STORE_WIDTH > 0) - assign offset_start_addr = 0; - assign offset_end_addr = {OFFSET_WIDTH{1'b1}}; - end - endgenerate - - assign prot_top = |prot; - - ////////////////////////////////////////////////////////////////////////////////////// - // check for hit, multi hit - // In case of a multi hit, the hit_block_num indicates the lowest VA RAM with a hit. - // In case of a multi hit in the same VA RAM, Port 0 is given priority. - always_comb begin : HIT_CHECK - hit_top = |hit; - hit_block_num = 0; - first_hit_top = 1'b0; - multi_hit_top = 1'b0; - for (int i=N_PAR_VA_RAMS-1; i>=0; i--) begin - if (hit[i] == 1'b1) begin -`ifdef MULTI_HIT_CUR_CYCLE - if (multi_hit[i] | first_hit_top ) begin - multi_hit_top = 1'b1; - end -`endif - first_hit_top = 1'b1; - hit_block_num = i; - end - end // for (int i=0; i port1 active - // select = 0 -> port2 active - select[idx] = (curr_priority[idx] & port1_addr_valid[idx]) | ~port2_addr_valid[idx]; - - p1_burst_size[idx] = (port1_len[idx] + 1) << port1_size[idx]; - p2_burst_size[idx] = (port2_len[idx] + 1) << port2_size[idx]; - - // align min addr for max addr computation to allow for smart AXI bursts around the 4k boundary - if (port1_size[idx] == 3'b001) - p1_mask[idx] = 3'b110; - else if (port1_size[idx] == 3'b010) - p1_mask[idx] = 3'b100; - else if (port1_size[idx] == 3'b011) - p1_mask[idx] = 3'b000; - else - p1_mask[idx] = 3'b111; - - p1_align_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH] = port1_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH]; - p1_align_addr[idx][AXI_SIZE_WIDTH-1:0] = port1_addr[idx][AXI_SIZE_WIDTH-1:0] & p1_mask[idx]; - - if (port2_size[idx] == 3'b001) - p2_mask[idx] = 3'b110; - else if (port2_size[idx] == 3'b010) - p2_mask[idx] = 3'b100; - else if (port2_size[idx] == 3'b011) - p2_mask[idx] = 3'b000; - else - p2_mask[idx] = 3'b111; - - if (port1_user[idx] == {AXI_USER_WIDTH{1'b1}}) - p1_prefetch[idx] = 1'b1; - else - p1_prefetch[idx] = 1'b0; - - if (port2_user[idx] == {AXI_USER_WIDTH{1'b1}}) - p2_prefetch[idx] = 1'b1; - else - p2_prefetch[idx] = 1'b0; - - p2_align_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH] = port2_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH]; - p2_align_addr[idx][AXI_SIZE_WIDTH-1:0] = port2_addr[idx][AXI_SIZE_WIDTH-1:0] & p2_mask[idx]; - - p1_max_addr[idx] = p1_align_addr[idx] + p1_burst_size[idx] - 1; - p2_max_addr[idx] = p2_align_addr[idx] + p2_burst_size[idx] - 1; - - int_addr_min[idx] = select[idx] ? port1_addr[idx] : port2_addr[idx]; - int_addr_max[idx] = select[idx] ? p1_max_addr[idx] : p2_max_addr[idx]; - int_rw[idx] = select[idx] ? port1_type[idx] : port2_type[idx]; - int_id[idx] = select[idx] ? port1_id[idx] : port2_id[idx]; - int_len[idx] = select[idx] ? port1_len[idx] : port2_len[idx]; - int_user[idx] = select[idx] ? port1_user[idx] : port2_user[idx]; - prefetch[idx] = select[idx] ? p1_prefetch[idx] : p2_prefetch[idx]; - - hit [idx] = | hit_slices [idx]; - prot[idx] = | prot_slices[idx]; - - no_hit [idx] = ~hit [idx]; - no_prot[idx] = ~prot[idx]; - - port1_out_addr[idx] = out_addr_reg[idx]; - port2_out_addr[idx] = out_addr_reg[idx]; - - port1_cache_coherent[idx] = cache_coherent_reg[idx]; - port2_cache_coherent[idx] = cache_coherent_reg[idx]; - end - end - - always_comb - begin - var integer idx_port, idx_slice; - var integer reg_num; - reg_num=0; - for ( idx_port = 0; idx_port < N_PORTS; idx_port++ ) begin - for ( idx_slice = 0; idx_slice < 4*N_SLICES[idx_port]; idx_slice++ ) begin - int_cfg_regs_slices[idx_port][idx_slice] = int_cfg_regs[4+reg_num]; - reg_num++; - end - // int_cfg_regs_slices[idx_port][N_SLICES_MAX:N_SLICES[idx_port]] will be dangling - // Fix to zero. Synthesis will remove these signals. - // int_cfg_regs_slices[idx_port][4*N_SLICES_MAX-1:4*N_SLICES[idx_port]] = 0; - end - end - - always @(posedge Clk_CI or negedge Rst_RBI) - begin : PORT_PRIORITY - var integer idx; - if (Rst_RBI == 1'b0) - curr_priority = 'h0; - else begin - for (idx=0; idx= cfg_min) ? 1'b1 : 1'b0; - # assign min_below_max = (in_addr_min <= cfg_max) ? 1'b1 : 1'b0; - # assign max_below_max = (in_addr_max <= cfg_max) ? 1'b1 : 1'b0; - # assign out_hit = cfg_en & min_above_min & min_below_max & max_below_max; - # assign out_prot = out_hit & ((in_trans_type & ~cfg_wen) | (~in_trans_type & ~cfg_ren)); - # assign out_addr = in_addr_min - cfg_min + cfg_offset; - m.d.comb += [ - min_above_min.eq(self.in_addr_min >= self.cfg_min), - min_below_max.eq(self.in_addr_min <= self.cfg_max), - max_below_max.eq(self.in_addr_max <= self.cfg_max), - self.out_hit.eq(self.cfg_en & min_above_min & - min_below_max & max_below_max), - self.out_prot.eq(self.out_hit & ( - (self.in_trans_type & ~self.cfg_wen) | (~self.in_trans_type & ~self.cfg_ren))), - self.out_addr.eq(self.in_addr_min - self.cfg_min + self.cfg_offset) - ] - - return m diff --git a/src/soc/iommu/axi_rab/ram_tp_no_change.py b/src/soc/iommu/axi_rab/ram_tp_no_change.py deleted file mode 100644 index d0104735..00000000 --- a/src/soc/iommu/axi_rab/ram_tp_no_change.py +++ /dev/null @@ -1,97 +0,0 @@ -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# /* -# * ram_tp_no_change -# * -# * This code implements a parameterizable two-port memory. Port 0 can read and -# * write while Port 1 can read only. The Xilinx tools will infer a BRAM with -# * Port 0 in "no change" mode, i.e., during a write, it retains the last read -# * value on the output. Port 1 (read-only) is in "write first" mode. Still, it -# * outputs the old data during the write cycle. Note: Port 1 outputs invalid -# * data in the cycle after the write when reading the same address. -# * -# * For more information, see Xilinx PG058 Block Memory Generator Product Guide. -# */ - -from nmigen import Signal, Module, Const, Cat, Elaboratable -from nmigen import Memory - -import math - -# -# module ram_tp_no_change -# #( -ADDR_WIDTH = 10 -DATA_WIDTH = 36 -# ) -# ( -# input clk, -# input we, -# input [ADDR_WIDTH-1:0] addr0, -# input [ADDR_WIDTH-1:0] addr1, -# input [DATA_WIDTH-1:0] d_i, -# output [DATA_WIDTH-1:0] d0_o, -# output [DATA_WIDTH-1:0] d1_o -# ); - - -class ram_tp_no_change(Elaboratable): - - def __init__(self): - self.we = Signal() # input - self.addr0 = Signal(ADDR_WIDTH) # input - self.addr1 = Signal(ADDR_WIDTH) # input - self.d_i = Signal(DATA_WIDTH) # input - self.d0_o = Signal(DATA_WIDTH) # output - self.d1_o = Signal(DATA_WIDTH) # output - - DEPTH = int(math.pow(2, ADDR_WIDTH)) - self.ram = Memory(width=DATA_WIDTH, depth=DEPTH) - # - # localparam DEPTH = 2**ADDR_WIDTH; - # - # (* ram_style = "block" *) reg [DATA_WIDTH-1:0] ram[DEPTH]; - # reg [DATA_WIDTH-1:0] d0; - # reg [DATA_WIDTH-1:0] d1; - # - # always_ff @(posedge clk) begin - # if(we == 1'b1) begin - # ram[addr0] <= d_i; - # end else begin - # only change data if we==false - # d0 <= ram[addr0]; - # end - # d1 <= ram[addr1]; - # end - # - # assign d0_o = d0; - # assign d1_o = d1; - # - - def elaborate(self, platform=None): - m = Module() - m.submodules.read_ram0 = read_ram0 = self.ram.read_port() - m.submodules.read_ram1 = read_ram1 = self.ram.read_port() - m.submodules.write_ram = write_ram = self.ram.write_port() - - # write port - m.d.comb += write_ram.en.eq(self.we) - m.d.comb += write_ram.addr.eq(self.addr0) - m.d.comb += write_ram.data.eq(self.d_i) - - # read ports - m.d.comb += read_ram0.addr.eq(self.addr0) - m.d.comb += read_ram1.addr.eq(self.addr1) - with m.If(self.we == 0): - m.d.sync += self.d0_o.eq(read_ram0.data) - m.d.sync += self.d1_o.eq(read_ram1.data) - - return m diff --git a/src/soc/iommu/axi_rab/ram_tp_write_first.py b/src/soc/iommu/axi_rab/ram_tp_write_first.py deleted file mode 100644 index 8fd2abb7..00000000 --- a/src/soc/iommu/axi_rab/ram_tp_write_first.py +++ /dev/null @@ -1,93 +0,0 @@ -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. -# -# /* -# * ram_tp_write_first -# * -# * This code implements a parameterizable two-port memory. Port 0 can read and -# * write while Port 1 can read only. Xilinx Vivado will infer a BRAM in -# * "write first" mode, i.e., upon a read and write to the same address, the -# * new value is read. Note: Port 1 outputs invalid data in the cycle after -# * the write when reading the same address. -# * -# * For more information, see Xilinx PG058 Block Memory Generator Product Guide. -# */ - -from nmigen import Signal, Module, Const, Cat, Elaboratable -from nmigen import Memory - -import math -# -# module ram_tp_write_first -# #( -ADDR_WIDTH = 10 -DATA_WIDTH = 36 -# ) -# ( -# input clk, -# input we, -# input [ADDR_WIDTH-1:0] addr0, -# input [ADDR_WIDTH-1:0] addr1, -# input [DATA_WIDTH-1:0] d_i, -# output [DATA_WIDTH-1:0] d0_o, -# output [DATA_WIDTH-1:0] d1_o -# ); - - -class ram_tp_write_first(Elaboratable): - - def __init__(self): - self.we = Signal() # input - self.addr0 = Signal(ADDR_WIDTH) # input - self.addr1 = Signal(ADDR_WIDTH) # input - self.d_i = Signal(DATA_WIDTH) # input - self.d0_o = Signal(DATA_WIDTH) # output - self.d1_o = Signal(DATA_WIDTH) # output - - DEPTH = int(math.pow(2, ADDR_WIDTH)) - self.ram = Memory(width=DATA_WIDTH, depth=DEPTH) - - # - # localparam DEPTH = 2**ADDR_WIDTH; - # - # (* ram_style = "block" *) reg [DATA_WIDTH-1:0] ram[DEPTH]; - # reg [ADDR_WIDTH-1:0] raddr0; - # reg [ADDR_WIDTH-1:0] raddr1; - # - # always_ff @(posedge clk) begin - # if(we == 1'b1) begin - # ram[addr0] <= d_i; - # end - # raddr0 <= addr0; - # raddr1 <= addr1; - # end - # - # assign d0_o = ram[raddr0]; - # assign d1_o = ram[raddr1]; - # - - def elaborate(self, platform=None): - m = Module() - m.submodules.read_ram0 = read_ram0 = self.ram.read_port() - m.submodules.read_ram1 = read_ram1 = self.ram.read_port() - m.submodules.write_ram = write_ram = self.ram.write_port() - - # write port - m.d.comb += write_ram.en.eq(self.we) - m.d.comb += write_ram.addr.eq(self.addr0) - m.d.comb += write_ram.data.eq(self.d_i) - - # read ports - m.d.comb += read_ram0.addr.eq(self.addr0) - m.d.comb += read_ram1.addr.eq(self.addr1) - m.d.sync += self.d0_o.eq(read_ram0.data) - m.d.sync += self.d1_o.eq(read_ram1.data) - - return m diff --git a/src/soc/iommu/axi_rab/slice_top.py b/src/soc/iommu/axi_rab/slice_top.py deleted file mode 100644 index 6eedb1cd..00000000 --- a/src/soc/iommu/axi_rab/slice_top.py +++ /dev/null @@ -1,141 +0,0 @@ -# // Copyright 2018 ETH Zurich and University of Bologna. -# // Copyright and related rights are licensed under the Solderpad Hardware -# // License, Version 0.51 (the "License"); you may not use this file except in -# // compliance with the License. You may obtain a copy of the License at -# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# // or agreed to in writing, software, hardware and materials distributed under -# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# // CONDITIONS OF ANY KIND, either express or implied. See the License for the -# // specific language governing permissions and limitations under the License. - -# this file has been generated by sv2nmigen - -from nmigen import Signal, Module, Const, Cat, Elaboratable -import rab_slice -import coreconfig - -# -# module slice_top -# //#( -# // parameter N_SLICES = 16, -# // parameter N_REGS = 4*N_SLICES, -# // parameter ADDR_WIDTH_PHYS = 40, -# // parameter ADDR_WIDTH_VIRT = 32 -# // ) -# ( -# input logic [N_REGS-1:0] [63:0] int_cfg_regs, -# input logic int_rw, -# input logic [ADDR_WIDTH_VIRT-1:0] int_addr_min, -# input logic [ADDR_WIDTH_VIRT-1:0] int_addr_max, -# input logic multi_hit_allow, -# output logic multi_hit, -# output logic [N_SLICES-1:0] prot, -# output logic [N_SLICES-1:0] hit, -# output logic cache_coherent, -# output logic [ADDR_WIDTH_PHYS-1:0] out_addr -# ); -# - - -class slice_top(Elaboratable): - - def __init__(self): - # FIXME self.int_cfg_regs = Signal() # input - self.params = coreconfig.CoreConfig() # rename ? - self.int_rw = Signal() # input - self.int_addr_min = Signal(self.params.ADDR_WIDTH_VIRT) # input - self.int_addr_max = Signal(self.params.ADDR_WIDTH_VIRT) # input - self.multi_hit_allow = Signal() # input - self.multi_hit = Signal() # output - self.prot = Signal(self.params.N_SLICES) # output - self.hit = Signal(self.params.N_SLICES) # output - self.cache_coherent = Signal() # output - self.out_addr = Signal(self.params.ADDR_WIDTH_PHYS) # output - - def elaborate(self, platform=None): - m = Module() - - first_hit = Signal() - - for i in range(self.params.N_SLICES): - # TODO pass params / core config here - u_slice = rab_slice.rab_slice(self.params) - setattr(m.submodules, "u_slice%d" % i, u_slice) - # TODO set param and connect ports - - # In case of a multi hit, the lowest slice with a hit is selected. - # TODO always_comb begin : HIT_CHECK - m.d.comb += [ - first_hit.eq(0), - self.multi_hit.eq(0), - self.out_addr.eq(0), - self.cache_coherent.eq(0)] - - for j in range(self.params.N_SLICES): - with m.If(self.hit[j] == 1): - with m.If(first_hit == 1): - with m.If(self.multi_hit_allow == 0): - m.d.comb += [self.multi_hit.eq(1)] - with m.Elif(first_hit == 1): - m.d.comb += [first_hit.eq(1) - # only output first slice that was hit - # SV self.out_addr.eq(slice_out_addr[ADDR_WIDTH_PHYS*j + : ADDR_WIDTH_PHYS]), - # SV self.cache_coherent.eq(int_cfg_regs[4*j+3][3]), - ] - return m - - # TODO translate generate statement - - -""" - logic [ADDR_WIDTH_PHYS*N_SLICES-1:0] slice_out_addr; - - generate - for ( i=0; i 0 + with m.Else(): + # Multiple Match if encoder n is invalid + with m.If(self.encoder.n): + m.d.comb += [ + self.single_match.eq(0), + self.multiple_match.eq(1) + ] + # Single Match if encoder n is valid + with m.Else(): + m.d.comb += [ + self.single_match.eq(1), + self.multiple_match.eq(0) + ] + # Always set output based on priority encoder output + m.d.comb += self.o.eq(self.p_encoder.o) + return m diff --git a/src/soc/unused/TLB/Cam.py b/src/soc/unused/TLB/Cam.py new file mode 100644 index 00000000..c5fd0699 --- /dev/null +++ b/src/soc/unused/TLB/Cam.py @@ -0,0 +1,126 @@ +from nmigen import Array, Cat, Module, Signal, Elaboratable +from nmigen.lib.coding import Decoder +from nmigen.cli import main # , verilog + +from .CamEntry import CamEntry +from .AddressEncoder import AddressEncoder + + +class Cam(Elaboratable): + """ Content Addressable Memory (CAM) + + The purpose of this module is to quickly look up whether an + entry exists given a data key. + This module will search for the given data in all internal entries + and output whether a single or multiple match was found. + If an single entry is found the address be returned and single_match + is set HIGH. If multiple entries are found the lowest address is + returned and multiple_match is set HIGH. If neither single_match or + multiple_match are HIGH this implies no match was found. To write + to the CAM set the address bus to the desired entry and set write_enable + HIGH. Entry managment should be performed one level above this block + as lookup is performed within. + + Notes: + The read and write operations take one clock cycle to complete. + Currently the read_warning line is present for interfacing but + is not necessary for this design. This module is capable of writing + in the first cycle, reading on the second, and output the correct + address on the third. + """ + + def __init__(self, data_size, cam_size): + """ Arguments: + * data_size: (bits) The bit size of the data + * cam_size: (number) The number of entries in the CAM + """ + + # Internal + self.cam_size = cam_size + self.encoder = AddressEncoder(cam_size) + self.decoder = Decoder(cam_size) + self.entry_array = Array(CamEntry(data_size) for x in range(cam_size)) + + # Input + self.enable = Signal(1) + self.write_enable = Signal(1) + self.data_in = Signal(data_size) # The data to be written + self.data_mask = Signal(data_size) # mask for ternary writes + # address of CAM Entry to write + self.address_in = Signal(range(cam_size)) + + # Output + self.read_warning = Signal(1) # High when a read interrupts a write + self.single_match = Signal(1) # High when there is only one match + self.multiple_match = Signal(1) # High when there at least two matches + # The lowest address matched + self.match_address = Signal(range(cam_size)) + + def elaborate(self, platform=None): + m = Module() + # AddressEncoder for match types and output address + m.submodules.AddressEncoder = self.encoder + # Decoder is used to select which entry will be written to + m.submodules.Decoder = self.decoder + # CamEntry Array Submodules + # Note these area added anonymously + entry_array = self.entry_array + m.submodules += entry_array + + # Decoder logic + m.d.comb += [ + self.decoder.i.eq(self.address_in), + self.decoder.n.eq(0) + ] + + encoder_vector = [] + with m.If(self.enable): + # Set the key value for every CamEntry + for index in range(self.cam_size): + + # Write Operation + with m.If(self.write_enable): + with m.If(self.decoder.o[index]): + m.d.comb += entry_array[index].command.eq(2) + with m.Else(): + m.d.comb += entry_array[index].command.eq(0) + + # Read Operation + with m.Else(): + m.d.comb += entry_array[index].command.eq(1) + + # Send data input to all entries + m.d.comb += entry_array[index].data_in.eq(self.data_in) + # Send all entry matches to encoder + ematch = entry_array[index].match + encoder_vector.append(ematch) + + # Give input to and accept output from encoder module + m.d.comb += [ + self.encoder.i.eq(Cat(*encoder_vector)), + self.single_match.eq(self.encoder.single_match), + self.multiple_match.eq(self.encoder.multiple_match), + self.match_address.eq(self.encoder.o) + ] + + # If the CAM is not enabled set all outputs to 0 + with m.Else(): + m.d.comb += [ + self.read_warning.eq(0), + self.single_match.eq(0), + self.multiple_match.eq(0), + self.match_address.eq(0) + ] + + return m + + def ports(self): + return [self.enable, self.write_enable, + self.data_in, self.data_mask, + self.read_warning, self.single_match, + self.multiple_match, self.match_address] + + +if __name__ == '__main__': + cam = Cam(4, 4) + main(cam, ports=cam.ports()) diff --git a/src/soc/unused/TLB/CamEntry.py b/src/soc/unused/TLB/CamEntry.py new file mode 100644 index 00000000..b1d93082 --- /dev/null +++ b/src/soc/unused/TLB/CamEntry.py @@ -0,0 +1,46 @@ +from nmigen import Module, Signal, Elaboratable + + +class CamEntry(Elaboratable): + """ Content Addressable Memory (CAM) Entry + + The purpose of this module is to represent an entry within a CAM. + This module when given a read command will compare the given data + and output whether a match was found or not. When given a write + command it will write the given data into internal registers. + """ + + def __init__(self, data_size): + """ Arguments: + * data_size: (bit count) The size of the data + """ + # Input + self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset + self.data_in = Signal(data_size) # Data input when writing + + # Output + self.match = Signal(1) # Result of the internal/input key comparison + self.data = Signal(data_size) + + def elaborate(self, platform=None): + m = Module() + with m.Switch(self.command): + with m.Case("00"): + m.d.sync += self.match.eq(0) + with m.Case("01"): + with m.If(self.data == self.data_in): + m.d.sync += self.match.eq(1) + with m.Else(): + m.d.sync += self.match.eq(0) + with m.Case("10"): + m.d.sync += [ + self.data.eq(self.data_in), + self.match.eq(0) + ] + with m.Case(): + m.d.sync += [ + self.match.eq(0), + self.data.eq(0) + ] + + return m diff --git a/src/soc/unused/TLB/LFSR.py b/src/soc/unused/TLB/LFSR.py new file mode 100644 index 00000000..d8b606ec --- /dev/null +++ b/src/soc/unused/TLB/LFSR.py @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# See Notices.txt for copyright information +from nmigen import Signal, Module, Const, Cat, Elaboratable +from nmigen.cli import verilog, rtlil + + +class LFSRPolynomial(set): + """ implements a polynomial for use in LFSR + """ + def __init__(self, exponents=()): + for e in exponents: + assert isinstance(e, int), TypeError("%s must be an int" % repr(e)) + assert (e >= 0), ValueError("%d must not be negative" % e) + set.__init__(self, set(exponents).union({0})) # must contain zero + + @property + def max_exponent(self): + return max(self) # derived from set, so this returns the max exponent + + @property + def exponents(self): + exponents = list(self) # get elements of set as a list + exponents.sort(reverse=True) + return exponents + + def __str__(self): + expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2) + retval = map(lambda i: expd[min(i,2)].format(i), self.exponents) + return " + ".join(retval) + + def __repr__(self): + return "LFSRPolynomial(%s)" % self.exponents + + +# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs # noqa +LFSR_POLY_2 = LFSRPolynomial([2, 1, 0]) +LFSR_POLY_3 = LFSRPolynomial([3, 2, 0]) +LFSR_POLY_4 = LFSRPolynomial([4, 3, 0]) +LFSR_POLY_5 = LFSRPolynomial([5, 3, 0]) +LFSR_POLY_6 = LFSRPolynomial([6, 5, 0]) +LFSR_POLY_7 = LFSRPolynomial([7, 6, 0]) +LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0]) +LFSR_POLY_9 = LFSRPolynomial([9, 5, 0]) +LFSR_POLY_10 = LFSRPolynomial([10, 7, 0]) +LFSR_POLY_11 = LFSRPolynomial([11, 9, 0]) +LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0]) +LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0]) +LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0]) +LFSR_POLY_15 = LFSRPolynomial([15, 14, 0]) +LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0]) +LFSR_POLY_17 = LFSRPolynomial([17, 14, 0]) +LFSR_POLY_18 = LFSRPolynomial([18, 11, 0]) +LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0]) +LFSR_POLY_20 = LFSRPolynomial([20, 17, 0]) +LFSR_POLY_21 = LFSRPolynomial([21, 19, 0]) +LFSR_POLY_22 = LFSRPolynomial([22, 21, 0]) +LFSR_POLY_23 = LFSRPolynomial([23, 18, 0]) +LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0]) + + +class LFSR(LFSRPolynomial, Elaboratable): + """ implements a Linear Feedback Shift Register + """ + def __init__(self, polynomial): + """ Inputs: + ------ + :polynomial: the polynomial to feedback on. may be a LFSRPolynomial + instance or an iterable of ints (list/tuple/generator) + :enable: enable (set LO to disable. NOTE: defaults to HI) + + Outputs: + ------- + :state: the LFSR state. bitwidth is taken from the polynomial + maximum exponent. + + Note: if an LFSRPolynomial is passed in as the input, because + LFSRPolynomial is derived from set() it's ok: + LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p) + """ + LFSRPolynomial.__init__(self, polynomial) + self.state = Signal(self.max_exponent, reset=1) + self.enable = Signal(reset=1) + + def elaborate(self, platform): + m = Module() + # do absolutely nothing if the polynomial is empty (always has a zero) + if self.max_exponent <= 1: + return m + + # create XOR-bunch, select bits from state based on exponent + feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain) + for exponent in self: + if exponent > 0: # don't have to skip, saves CPU cycles though + feedback ^= self.state[exponent - 1] + + # if enabled, shift-and-feedback + with m.If(self.enable): + # shift up lower bits by Cat'ing in a new bit zero (feedback) + newstate = Cat(feedback, self.state[:-1]) + m.d.sync += self.state.eq(newstate) + + return m + + +# example: Poly24 +if __name__ == '__main__': + p24 = rtlil.convert(LFSR(LFSR_POLY_24)) + with open("lfsr2_p24.il", "w") as f: + f.write(p24) diff --git a/src/soc/unused/TLB/LFSR.pyi b/src/soc/unused/TLB/LFSR.pyi new file mode 100644 index 00000000..64eb9115 --- /dev/null +++ b/src/soc/unused/TLB/LFSR.pyi @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# See Notices.txt for copyright information +from nmigen import Module +from typing import Iterable, Optional, Iterator, Any, Union +from typing_extensions import final + + +@final +class LFSRPolynomial(set): + def __init__(self, exponents: Iterable[int] = ()): + def elements() -> Iterable[int]: ... + @property + def exponents(self) -> list[int]: ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + + +@final +class LFSR: + def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ... + @property + def width(self) -> int: ... + def elaborate(self, platform: Any) -> Module: ... diff --git a/src/soc/unused/TLB/Makefile b/src/soc/unused/TLB/Makefile new file mode 100644 index 00000000..1eb67acc --- /dev/null +++ b/src/soc/unused/TLB/Makefile @@ -0,0 +1,2 @@ +verilog: + python3 Cam.py generate -t v > Cam.v diff --git a/src/soc/unused/TLB/MemorySet.py b/src/soc/unused/TLB/MemorySet.py new file mode 100644 index 00000000..11890edf --- /dev/null +++ b/src/soc/unused/TLB/MemorySet.py @@ -0,0 +1,66 @@ +from nmigen import Cat, Memory, Module, Signal, Elaboratable +from nmigen.cli import main +from nmigen.cli import verilog, rtlil + + +class MemorySet(Elaboratable): + def __init__(self, data_size, tag_size, set_count, active): + self.active = active + input_size = tag_size + data_size # Size of the input data + memory_width = input_size + 1 # The width of the cache memory + self.active = active + self.data_size = data_size + self.tag_size = tag_size + + # XXX TODO, use rd-enable and wr-enable? + self.mem = Memory(width=memory_width, depth=set_count) + self.r = self.mem.read_port() + self.w = self.mem.write_port() + + # inputs (address) + self.cset = Signal(range(set_count)) # The set to be checked + self.tag = Signal(tag_size) # The tag to find + self.data_i = Signal(data_size) # Incoming data + + # outputs + self.valid = Signal() + self.data_o = Signal(data_size) # Outgoing data (excludes tag) + + def elaborate(self, platform): + m = Module() + m.submodules.mem = self.mem + m.submodules.r = self.r + m.submodules.w = self.w + + # temporaries + active_bit = Signal() + tag_valid = Signal() + data_start = self.active + 1 + data_end = data_start + self.data_size + tag_start = data_end + tag_end = tag_start + self.tag_size + + # connect the read port address to the set/entry + read_port = self.r + m.d.comb += read_port.addr.eq(self.cset) + # Pull out active bit from data + data = read_port.data + m.d.comb += active_bit.eq(data[self.active]) + # Validate given tag vs stored tag + tag = data[tag_start:tag_end] + m.d.comb += tag_valid.eq(self.tag == tag) + # An entry is only valid if the tags match AND + # is marked as a valid entry + m.d.comb += self.valid.eq(tag_valid & active_bit) + + # output data: TODO, check rd-enable? + m.d.comb += self.data_o.eq(data[data_start:data_end]) + + # connect the write port addr to the set/entry (only if write enabled) + # (which is only done on a match, see SAC.write_entry below) + write_port = self.w + with m.If(write_port.en): + m.d.comb += write_port.addr.eq(self.cset) + m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag)) + + return m diff --git a/src/soc/unused/TLB/PermissionValidator.py b/src/soc/unused/TLB/PermissionValidator.py new file mode 100644 index 00000000..5bc90b2f --- /dev/null +++ b/src/soc/unused/TLB/PermissionValidator.py @@ -0,0 +1,68 @@ +from nmigen import Module, Signal, Elaboratable +from nmigen.cli import main + +from soc.TLB.PteEntry import PteEntry + + +class PermissionValidator(Elaboratable): + """ The purpose of this Module is to check the Permissions of a given PTE + against the requested access permissions. + + This module will either validate (by setting the valid bit HIGH) + the request or find a permission fault and invalidate (by setting + the valid bit LOW) the request + """ + + def __init__(self, asid_size, pte_size): + """ Arguments: + * asid_size: (bit count) The size of the asid to be processed + * pte_size: (bit count) The size of the pte to be processed + + Return: + * valid HIGH when permissions are correct + """ + # Internal + self.pte_entry = PteEntry(asid_size, pte_size) + + # Input + self.data = Signal(asid_size + pte_size) + self.xwr = Signal(3) # Execute, Write, Read + self.super_mode = Signal(1) # Supervisor Mode + self.super_access = Signal(1) # Supervisor Access + self.asid = Signal(15) # Address Space IDentifier (ASID) + + # Output + self.valid = Signal(1) # Denotes if the permissions are correct + + def elaborate(self, platform=None): + m = Module() + + m.submodules.pte_entry = self.pte_entry + + m.d.comb += self.pte_entry.i.eq(self.data) + + # Check if the entry is valid + with m.If(self.pte_entry.v): + # ASID match or Global Permission + # Note that the MSB bound is exclusive + with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g): + # Check Execute, Write, Read (XWR) Permissions + with m.If(self.pte_entry.xwr == self.xwr): + # Supervisor Logic + with m.If(self.super_mode): + # Valid if entry is not in user mode or supervisor + # has Supervisor User Memory (SUM) access via the + # SUM bit in the sstatus register + m.d.comb += self.valid.eq((~self.pte_entry.u) + | self.super_access) + # User logic + with m.Else(): + # Valid if the entry is in user mode only + m.d.comb += self.valid.eq(self.pte_entry.u) + with m.Else(): + m.d.comb += self.valid.eq(0) + with m.Else(): + m.d.comb += self.valid.eq(0) + with m.Else(): + m.d.comb += self.valid.eq(0) + return m diff --git a/src/soc/unused/TLB/PteEntry.py b/src/soc/unused/TLB/PteEntry.py new file mode 100644 index 00000000..73ea9220 --- /dev/null +++ b/src/soc/unused/TLB/PteEntry.py @@ -0,0 +1,67 @@ +from nmigen import Module, Signal, Elaboratable +from nmigen.cli import main + + +class PteEntry(Elaboratable): + """ The purpose of this Module is to centralize the parsing of Page + Table Entries (PTE) into one module to prevent common mistakes + and duplication of code. The control bits are parsed out for + ease of use. + + This module parses according to the standard PTE given by the + Volume II: RISC-V Privileged Architectures V1.10 Pg 60. + The Address Space IDentifier (ASID) is appended to the MSB of the input + and is parsed out as such. + + An valid input Signal would be: + ASID PTE + Bits:[78-64][63-0] + + The output PTE value will include the control bits. + """ + def __init__(self, asid_size, pte_size): + """ Arguments: + * asid_size: (bit count) The size of the asid to be processed + * pte_size: (bit count) The size of the pte to be processed + + Return: + * d The Dirty bit from the PTE portion of i + * a The Accessed bit from the PTE portion of i + * g The Global bit from the PTE portion of i + * u The User Mode bit from the PTE portion of i + * xwr The Execute/Write/Read bit from the PTE portion of i + * v The Valid bit from the PTE portion of i + * asid The asid portion of i + * pte The pte portion of i + """ + # Internal + self.asid_start = pte_size + self.asid_end = pte_size + asid_size + + # Input + self.i = Signal(asid_size + pte_size) + + # Output + self.d = Signal(1) # Dirty bit (From pte) + self.a = Signal(1) # Accessed bit (From pte) + self.g = Signal(1) # Global Access (From pte) + self.u = Signal(1) # User Mode (From pte) + self.xwr = Signal(3) # Execute Read Write (From pte) + self.v = Signal(1) # Valid (From pte) + self.asid = Signal(asid_size) # Associated Address Space IDentifier + self.pte = Signal(pte_size) # Full Page Table Entry + + def elaborate(self, platform=None): + m = Module() + # Pull out all control bites from PTE + m.d.comb += [ + self.d.eq(self.i[7]), + self.a.eq(self.i[6]), + self.g.eq(self.i[5]), + self.u.eq(self.i[4]), + self.xwr.eq(self.i[1:4]), + self.v.eq(self.i[0]) + ] + m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end]) + m.d.comb += self.pte.eq(self.i[0:self.asid_start]) + return m diff --git a/src/soc/unused/TLB/SetAssociativeCache.py b/src/soc/unused/TLB/SetAssociativeCache.py new file mode 100644 index 00000000..30ad8090 --- /dev/null +++ b/src/soc/unused/TLB/SetAssociativeCache.py @@ -0,0 +1,274 @@ +""" + +Online simulator of 4-way set-associative cache: +http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/sa4.html + +Python simulator of a N-way set-associative cache: +https://github.com/vaskevich/CacheSim/blob/master/cachesim.py +""" + +from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable +from nmigen.compat.genlib import fsm +from nmigen.cli import main +from nmigen.cli import verilog, rtlil + +from .AddressEncoder import AddressEncoder +from .MemorySet import MemorySet + +# TODO: use a LFSR that advances continuously and picking the bottom +# few bits from it to select which cache line to replace, instead of PLRU +# http://bugs.libre-riscv.org/show_bug.cgi?id=71 +from .ariane.plru import PLRU +from .LFSR import LFSR, LFSR_POLY_24 + +SA_NA = "00" # no action (none) +SA_RD = "01" # read +SA_WR = "10" # write + + +class SetAssociativeCache(Elaboratable): + """ Set Associative Cache Memory + + The purpose of this module is to generate a memory cache given the + constraints passed in. This will create a n-way set associative cache. + It is expected for the SV TLB that the VMA will provide the set number + while the ASID provides the tag (still to be decided). + + """ + + def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False): + """ Arguments + * tag_size (bits): The bit count of the tag + * data_size (bits): The bit count of the data to be stored + * set_count (number): The number of sets/entries in the cache + * way_count (number): The number of slots a data can be stored + in one set + * lfsr: if set, use an LFSR for (pseudo-randomly) selecting + set/entry to write to. otherwise, use a PLRU + """ + # Internals + self.lfsr_mode = lfsr + self.way_count = way_count # The number of slots in one set + self.tag_size = tag_size # The bit count of the tag + self.data_size = data_size # The bit count of the data to be stored + + # set up Memory array + self.mem_array = Array() # memory array + for i in range(way_count): + ms = MemorySet(data_size, tag_size, set_count, active=0) + self.mem_array.append(ms) + + # Finds valid entries + self.encoder = AddressEncoder(way_count) + + # setup PLRU or LFSR + if lfsr: + # LFSR mode + self.lfsr = LFSR(LFSR_POLY_24) + else: + # PLRU mode + # One block to handle plru calculations + self.plru = PLRU(way_count) + self.plru_array = Array() # PLRU data on each set + for i in range(set_count): + name = "plru%d" % i + self.plru_array.append(Signal(self.plru.TLBSZ, name=name)) + + # Input + self.enable = Signal(1) # Whether the cache is enabled + self.command = Signal(2) # 00=None, 01=Read, 10=Write (see SA_XX) + self.cset = Signal(range(set_count)) # The set to be checked + self.tag = Signal(tag_size) # The tag to find + self.data_i = Signal(data_size) # The input data + + # Output + self.ready = Signal(1) # 0 => Processing 1 => Ready for commands + self.hit = Signal(1) # Tag matched one way in the given set + # Tag matched many ways in the given set + self.multiple_hit = Signal(1) + self.data_o = Signal(data_size) # The data linked to the matched tag + + def check_tags(self, m): + """ Validate the tags in the selected set. If one and only one + tag matches set its state to zero and increment all others + by one. We only advance to next state if a single hit is found. + """ + # Vector to store way valid results + # A zero denotes a way is invalid + valid_vector = [] + # Loop through memory to prep read/write ports and set valid_vector + for i in range(self.way_count): + valid_vector.append(self.mem_array[i].valid) + + # Pass encoder the valid vector + m.d.comb += self.encoder.i.eq(Cat(*valid_vector)) + + # Only one entry should be marked + # This is due to already verifying the tags + # matched and the valid bit is high + with m.If(self.hit): + m.next = "FINISHED_READ" + # Pull out data from the read port + data = self.mem_array[self.encoder.o].data_o + m.d.comb += self.data_o.eq(data) + if not self.lfsr_mode: + self.access_plru(m) + + # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k + with m.Elif(self.multiple_hit): + # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck + m.d.comb += self.data_o.eq(0) + + # No tag matches means no data + with m.Else(): + # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck + m.d.comb += self.data_o.eq(0) + + def access_plru(self, m): + """ An entry was accessed and the plru tree must now be updated + """ + # Pull out the set's entry being edited + plru_entry = self.plru_array[self.cset] + m.d.comb += [ + # Set the plru data to the current state + self.plru.plru_tree.eq(plru_entry), + # Set that the cache was accessed + self.plru.lu_access_i.eq(1) + ] + + def read(self, m): + """ Go through the read process of the cache. + This takes two cycles to complete. First it checks for a valid tag + and secondly it updates the LRU values. + """ + with m.FSM() as fsm_read: + with m.State("READY"): + m.d.comb += self.ready.eq(0) + # check_tags will set the state if the conditions are met + self.check_tags(m) + with m.State("FINISHED_READ"): + m.next = "READY" + m.d.comb += self.ready.eq(1) + if not self.lfsr_mode: + plru_tree_o = self.plru.plru_tree_o + m.d.sync += self.plru_array[self.cset].eq(plru_tree_o) + + def write_entry(self, m): + if not self.lfsr_mode: + m.d.comb += [ # set cset (mem address) into PLRU + self.plru.plru_tree.eq(self.plru_array[self.cset]), + # and connect plru to encoder for write + self.encoder.i.eq(self.plru.replace_en_o) + ] + write_port = self.mem_array[self.encoder.o].w + else: + # use the LFSR to generate a random(ish) one of the mem array + lfsr_output = Signal(range(self.way_count)) + lfsr_random = Signal(range(self.way_count)) + m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits + # address too big, limit to range of array + m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count, + lfsr_output - self.way_count, + lfsr_output)) + write_port = self.mem_array[lfsr_random].w + + # then if there is a match from the encoder, enable the selected write + with m.If(self.encoder.single_match): + m.d.comb += write_port.en.eq(1) + + def write(self, m): + """ Go through the write process of the cache. + This takes two cycles to complete. First it writes the entry, + and secondly it updates the PLRU (in plru mode) + """ + with m.FSM() as fsm_write: + with m.State("READY"): + m.d.comb += self.ready.eq(0) + self.write_entry(m) + m.next = "FINISHED_WRITE" + with m.State("FINISHED_WRITE"): + m.d.comb += self.ready.eq(1) + if not self.lfsr_mode: + plru_entry = self.plru_array[self.cset] + m.d.sync += plru_entry.eq(self.plru.plru_tree_o) + m.next = "READY" + + def elaborate(self, platform=None): + m = Module() + + # ---- + # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array + # ---- + + m.submodules.AddressEncoder = self.encoder + if self.lfsr_mode: + m.submodules.LFSR = self.lfsr + else: + m.submodules.PLRU = self.plru + + for i, mem in enumerate(self.mem_array): + setattr(m.submodules, "mem%d" % i, mem) + + # ---- + # select mode: PLRU connect to encoder, LFSR do... something + # ---- + + if not self.lfsr_mode: + # Set what entry was hit + m.d.comb += self.plru.lu_hit.eq(self.encoder.o) + else: + # enable LFSR + m.d.comb += self.lfsr.enable.eq(self.enable) + + # ---- + # connect hit/multiple hit to encoder output + # ---- + + m.d.comb += [ + self.hit.eq(self.encoder.single_match), + self.multiple_hit.eq(self.encoder.multiple_match), + ] + + # ---- + # connect incoming data/tag/cset(addr) to mem_array + # ---- + + for mem in self.mem_array: + write_port = mem.w + m.d.comb += [mem.cset.eq(self.cset), + mem.tag.eq(self.tag), + mem.data_i.eq(self.data_i), + write_port.en.eq(0), # default: disable write + ] + # ---- + # Commands: READ/WRITE/TODO + # ---- + + with m.If(self.enable): + with m.Switch(self.command): + # Search all sets at a particular tag + with m.Case(SA_RD): + self.read(m) + with m.Case(SA_WR): + self.write(m) + # Maybe catch multiple tags write here? + # TODO + # TODO: invalidate/flush, flush-all? + + return m + + def ports(self): + return [self.enable, self.command, self.cset, self.tag, self.data_i, + self.ready, self.hit, self.multiple_hit, self.data_o] + + +if __name__ == '__main__': + sac = SetAssociativeCache(4, 8, 4, 6) + vl = rtlil.convert(sac, ports=sac.ports()) + with open("SetAssociativeCache.il", "w") as f: + f.write(vl) + + sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True) + vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports()) + with open("SetAssociativeCacheLFSR.il", "w") as f: + f.write(vl) diff --git a/src/soc/unused/TLB/TLB.py b/src/soc/unused/TLB/TLB.py new file mode 100644 index 00000000..a3c02247 --- /dev/null +++ b/src/soc/unused/TLB/TLB.py @@ -0,0 +1,177 @@ +""" TLB Module + + The expected form of the data is: + * Item (Bits) + * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0) +""" + +from nmigen import Memory, Module, Signal, Cat, Elaboratable +from nmigen.cli import main + +from .PermissionValidator import PermissionValidator +from .Cam import Cam + + +class TLB(Elaboratable): + def __init__(self, asid_size, vma_size, pte_size, L1_size): + """ Arguments + * asid_size: Address Space IDentifier (ASID) typically 15 bits + * vma_size: Virtual Memory Address (VMA) typically 36 bits + * pte_size: Page Table Entry (PTE) typically 64 bits + + Notes: + These arguments should represent the largest possible size + defined by the MODE settings. See + Volume II: RISC-V Privileged Architectures V1.10 Page 57 + """ + + # Internal + self.state = 0 + # L1 Cache Modules + self.cam_L1 = Cam(vma_size, L1_size) + self.mem_L1 = Memory(width=asid_size + pte_size, depth=L1_size) + + # Permission Validator + self.perm_validator = PermissionValidator(asid_size, pte_size) + + # Inputs + self.supermode = Signal(1) # Supervisor Mode + self.super_access = Signal(1) # Supervisor Access + # 00=None, 01=Search, 10=Write L1, 11=Write L2 + self.command = Signal(2) + self.xwr = Signal(3) # Execute, Write, Read + self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64 + self.address_L1 = Signal(range(L1_size)) + self.asid = Signal(asid_size) # Address Space IDentifier (ASID) + self.vma = Signal(vma_size) # Virtual Memory Address (VMA) + self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE) + + # Outputs + self.hit = Signal(1) # Denotes if the VMA had a mapped PTE + self.perm_valid = Signal(1) # Denotes if the permissions are correct + self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA + + def search(self, m, read_L1, write_L1): + """ searches the TLB + """ + m.d.comb += [ + write_L1.en.eq(0), + self.cam_L1.write_enable.eq(0), + self.cam_L1.data_in.eq(self.vma) + ] + # Match found in L1 CAM + match_found = Signal(reset_less=True) + m.d.comb += match_found.eq(self.cam_L1.single_match + | self.cam_L1.multiple_match) + with m.If(match_found): + # Memory shortcut variables + mem_address = self.cam_L1.match_address + # Memory Logic + m.d.comb += read_L1.addr.eq(mem_address) + # Permission Validator Logic + m.d.comb += [ + self.hit.eq(1), + # Set permission validator data to the correct + # register file data according to CAM match + # address + self.perm_validator.data.eq(read_L1.data), + # Execute, Read, Write + self.perm_validator.xwr.eq(self.xwr), + # Supervisor Mode + self.perm_validator.super_mode.eq(self.supermode), + # Supverisor Access + self.perm_validator.super_access.eq(self.super_access), + # Address Space IDentifier (ASID) + self.perm_validator.asid.eq(self.asid), + # Output result of permission validation + self.perm_valid.eq(self.perm_validator.valid) + ] + # Only output PTE if permissions are valid + with m.If(self.perm_validator.valid): + # XXX TODO - dummy for now + reg_data = Signal.like(self.pte_out) + m.d.comb += [ + self.pte_out.eq(reg_data) + ] + with m.Else(): + m.d.comb += [ + self.pte_out.eq(0) + ] + # Miss Logic + with m.Else(): + m.d.comb += [ + self.hit.eq(0), + self.perm_valid.eq(0), + self.pte_out.eq(0) + ] + + def write_l1(self, m, read_L1, write_L1): + """ writes to the L1 cache + """ + # Memory_L1 Logic + m.d.comb += [ + write_L1.en.eq(1), + write_L1.addr.eq(self.address_L1), + # The Cat places arguments from LSB -> MSB + write_L1.data.eq(Cat(self.pte_in, self.asid)) + ] + # CAM_L1 Logic + m.d.comb += [ + self.cam_L1.write_enable.eq(1), + self.cam_L1.data_in.eq(self.vma), # data_in is sent to all entries + # self.cam_L1.address_in.eq(todo) # a CAM entry needs to be selected + + ] + + def elaborate(self, platform): + m = Module() + # Add submodules + # Submodules for L1 Cache + m.submodules.cam_L1 = self.cam_L1 + m.submodules.read_L1 = read_L1 = self.mem_L1.read_port() + m.submodules.write_L1 = write_L1 = self.mem_L1.write_port() + + # Permission Validator Submodule + m.submodules.perm_valididator = self.perm_validator + + # When MODE specifies translation + # TODO add in different bit length handling ie prefix 0s + tlb_enable = Signal(reset_less=True) + m.d.comb += tlb_enable.eq(self.mode != 0) + + with m.If(tlb_enable): + m.d.comb += [ + self.cam_L1.enable.eq(1) + ] + with m.Switch(self.command): + # Search + with m.Case("01"): + self.search(m, read_L1, write_L1) + + # Write L1 + # Expected that the miss will be handled in software + with m.Case("10"): + self.write_l1(m, read_L1, write_L1) + + # TODO + # with m.Case("11"): + + # When disabled + with m.Else(): + m.d.comb += [ + self.cam_L1.enable.eq(0), + # XXX TODO - self.reg_file.enable.eq(0), + self.hit.eq(0), + self.perm_valid.eq(0), # XXX TODO, check this + self.pte_out.eq(0) + ] + return m + + +if __name__ == '__main__': + tlb = TLB(15, 36, 64, 4) + main(tlb, ports=[tlb.supermode, tlb.super_access, tlb.command, + tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid, + tlb.vma, tlb.pte_in, + tlb.hit, tlb.perm_valid, tlb.pte_out, + ] + tlb.cam_L1.ports()) diff --git a/src/soc/unused/TLB/__init__.py b/src/soc/unused/TLB/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/soc/unused/TLB/ariane/TreePLRU.cpp b/src/soc/unused/TLB/ariane/TreePLRU.cpp new file mode 100644 index 00000000..2f6aeea5 --- /dev/null +++ b/src/soc/unused/TLB/ariane/TreePLRU.cpp @@ -0,0 +1,211 @@ +#include +#include +#include + + +#define NWAY 4 +#define NLINE 256 +#define HIT 0 +#define MISS 1 +#define MS 1000 +/* +Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing +Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt +four-way set associative - three bits + each bit represents one branch point in a binary decision tree; let 1 + represent that the left side has been referenced more recently than the + right side, and 0 vice-versa + are all 4 lines valid? + / \ + yes no, use an invalid line + | + | + | + bit_0 == 0? state | replace ref to | next state + / \ ------+-------- -------+----------- + y n 00x | line_0 line_0 | 11_ + / \ 01x | line_1 line_1 | 10_ + bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1 + / \ / \ 1x1 | line_3 line_3 | 0_0 + y n y n + / \ / \ ('x' means ('_' means unchanged) + line_0 line_1 line_2 line_3 don't care) + 8-way set associative - 7 = 1+2+4 bits +16-way set associative - 15 = 1+2+4+8 bits +32-way set associative - 31 = 1+2+4+8+16 bits +64-way set associative - 63 = 1+2+4+8+16+32 bits +*/ +using namespace std; +struct AddressField { + uint64_t wd_idx : 2;//Unused + uint64_t offset : 4;//Unused + uint64_t index : 8;//NLINE = 256 = 2^8 + uint64_t tag : 50; +}; + +union Address { + uint32_t* p; + AddressField fields; +}; + +struct Cell { + bool v; + uint64_t tag; + + Cell() : v(false), tag(0) {} + + bool isHit(uint64_t tag) { + return v && (tag == this->tag); + } + + void fetch(uint32_t* address) { + Address addr; + addr.p = address; + addr.fields.offset = 0; + addr.fields.wd_idx = 0; + tag = addr.fields.tag; + v = true; + } +}; + +ostream& operator<<(ostream & out, const Cell& cell) { + out << " v:" << cell.v << " tag:" << hex << cell.tag; + return out; +} + +struct Block { + Cell cell[NWAY]; + uint32_t state; + uint64_t *mask;//Mask the state to get accurate value for specified 1 bit. + uint64_t *value; + uint64_t *next_value; + + Block() : state(0) { + switch (NWAY) { + case 4: + mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101}; + value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101}; + next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000}; + break; + case 8: + mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001, + 0b1010001}; + value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000, + 0b1010001}; + next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000, + 0b0000001, 0b0000000}; + break; + //TODO - more NWAY goes here. + default: + std::cout << "Error definition NWAY = " << NWAY << std::endl; + } + } + + uint32_t *getByTag(uint64_t tag, uint32_t *pway) { + for (int i = 0; i < NWAY; ++i) { + if (cell[i].isHit(tag)) { + *pway = i; + return pway; + } + } + return NULL; + } + + void setLRU(uint32_t *address) { + int way = 0; + uint32_t st = state; + for (int i = 0; i < NWAY; ++i) { + if ((state & mask[i]) == value[i]) { + state ^= mask[i]; + way = i; + break; + } + } + cell[way].fetch(address); + cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl; + } + + uint32_t *get(uint32_t *address, uint32_t *pway) { + Address addr; + addr.p = address; + uint32_t *d = getByTag(addr.fields.tag, pway); + if (d != NULL) { + return &d[addr.fields.offset]; + } + return d; + } + + int set(uint32_t *address) { + uint32_t way = 0; + uint32_t *p = get(address, &way); + if (p != NULL) { + printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state); + state &= ~mask[way]; + printf("%X --> ", state); + state |= next_value[way]; + printf("%X\n", state); + // *p = *address; //skip since address is fake. + return HIT; + } else { + setLRU(address); + return MISS; + } + } +}; + +ostream& operator<<(ostream & out, const Block& block) { + out << "state:" << block.state << " "; + for (int i = 0; i cacheline refill) + self.miss_gnt_o = Signal(NR_PORTS) + self.active_serving_o = Signal(NR_PORTS) + + self.critical_word_o = Signal(64) + self.critical_word_valid_o = Signal() + output ariane_axi::req_t axi_data_o, + input ariane_axi::resp_t axi_data_i, + + self.mshr_addr_i = Array(Signal(name="bdata_o", 56) \ + for i in range(NR_PORTS)) + self.mshr_addr_matches_o = Signal(NR_PORTS) + self.mshr_index_matches_o = Signal(NR_PORTS) + + # AMO + self.amo_req_i = AMOReq() + self.amo_resp_o = AMOResp() + # Port to SRAMs, for refill and eviction + self.req_o = Signal(DCACHE_SET_ASSOC) + self.addr_o = Signal(DCACHE_INDEX_WIDTH) # address into cache array + self.data_o = CacheLine() + self.be_o = CLBE() + self.data_i = Array(CacheLine() \ + for i in range(DCACHE_SET_ASSOC)) + self.we_o = Signal() + + def elaborate(self, platform): + # Registers + mshr_t mshr_d, mshr_q; + logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q; + logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q; + # cache line to evict + cache_line_t evict_cl_d, evict_cl_q; + + logic serve_amo_d, serve_amo_q; + # Request from one FSM + miss_req_valid = Signal(self.NR_PORTS) + miss_req_bypass = Signal(self.NR_PORTS) + miss_req_addr = Array(Signal(name="miss_req_addr", 64) \ + for i in range(NR_PORTS)) + miss_req_wdata = Array(Signal(name="miss_req_wdata", 64) \ + for i in range(NR_PORTS)) + miss_req_we = Signal(self.NR_PORTS) + miss_req_be = Array(Signal(name="miss_req_be", 8) \ + for i in range(NR_PORTS)) + miss_req_size = Array(Signal(name="miss_req_size", 2) \ + for i in range(NR_PORTS)) + + # Cache Line Refill <-> AXI + req_fsm_miss_valid = Signal() + req_fsm_miss_addr = Signal(64) + req_fsm_miss_wdata = Signal(DCACHE_LINE_WIDTH) + req_fsm_miss_we = Signal() + req_fsm_miss_be = Signal(DCACHE_LINE_WIDTH//8) + ariane_axi::ad_req_t req_fsm_miss_req; + req_fsm_miss_size = Signal(2) + + gnt_miss_fsm = Signal() + valid_miss_fsm = Signal() + nmiss = DCACHE_LINE_WIDTH//64 + data_miss_fsm = Array(Signal(name="data_miss_fsm", 64) \ + for i in range(nmiss)) + + # Cache Management <-> LFSR + lfsr_enable = Signal() + lfsr_oh = Signal(DCACHE_SET_ASSOC) + lfsr_bin = Signal($clog2(DCACHE_SET_ASSOC-1)) + # AMOs + ariane_pkg::amo_t amo_op; + amo_operand_a = Signal(64) + amo_operand_b = Signal(64) + amo_result_o = Signal(64) + + struct packed { + logic [63:3] address; + logic valid; + } reservation_d, reservation_q; + + # ------------------------------ + # Cache Management + # ------------------------------ + evict_way = Signal(DCACHE_SET_ASSOC) + valid_way = Signal(DCACHE_SET_ASSOC) + + for (i in range(DCACHE_SET_ASSOC): + comb += evict_way[i].eq(data_i[i].valid & data_i[i].dirty) + comb += valid_way[i].eq(data_i[i].valid) + + # ---------------------- + # Default Assignments + # ---------------------- + # to AXI refill + req_fsm_miss_req = ariane_axi::CACHE_LINE_REQ; + req_fsm_miss_size = Const(0b11, 2) + # core + serve_amo_d = serve_amo_q; + # -------------------------------- + # Flush and Miss operation + # -------------------------------- + state_d = state_q; + cnt_d = cnt_q; + evict_way_d = evict_way_q; + evict_cl_d = evict_cl_q; + mshr_d = mshr_q; + # communicate to the requester which unit we are currently serving + active_serving_o[mshr_q.id] = mshr_q.valid; + # AMOs + # silence the unit when not used + amo_op = amo_req_i.amo_op; + + reservation_d = reservation_q; + with m.FSM() as state_q: + + with m.Case("IDLE"): + # lowest priority are AMOs, wait until everything else + # is served before going for the AMOs + with m.If (amo_req_i.req & ~busy_i): + # 1. Flush the cache + with m.If(~serve_amo_q): + m.next = "FLUSH_REQ_STATUS" + serve_amo_d.eq(0b1 + cnt_d.eq(0 + # 2. Do the AMO + with m.Else(): + m.next = "AMO_LOAD" + serve_amo_d.eq(0b0 + + # check if we want to flush and can flush + # e.g.: we are not busy anymore + # TODO: Check that the busy flag is indeed needed + with m.If (flush_i & ~busy_i): + m.next = "FLUSH_REQ_STATUS" + cnt_d = 0 + + # check if one of the state machines missed + for i in range(NR_PORTS): + # here comes the refill portion of code + with m.If (miss_req_valid[i] & ~miss_req_bypass[i]): + m.next = "MISS" + # we are taking another request so don't + # take the AMO + serve_amo_d = 0b0; + # save to MSHR + wid = DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH + comb += [ mshr_d.valid.eq(0b1), + mshr_d.we.eq(miss_req_we[i]), + mshr_d.id.eq(i), + mshr_d.addr.eq(miss_req_addr[i][0:wid]), + mshr_d.wdata.eq(miss_req_wdata[i]), + mshr_d.be.eq(miss_req_be[i]), + ] + break + + # ~> we missed on the cache + with m.Case("MISS"): + # 1. Check if there is an empty cache-line + # 2. If not -> evict one + comb += req_o.eq(1) + sync += addr_o.eq(mshr_q.addr[:DCACHE_INDEX_WIDTH] + m.next = "MISS_REPL" + comb += miss_o.eq(1) + + # ~> second miss cycle + with m.Case("MISS_REPL"): + # if all are valid we need to evict one, + # pseudo random from LFSR + with m.If(~(~valid_way).bool()): + comb += lfsr_enable.eq(0b1) + comb += evict_way_d.eq(lfsr_oh) + # do we need to write back the cache line? + with m.If(data_i[lfsr_bin].dirty): + state_d = WB_CACHELINE_MISS; + comb += evict_cl_d.tag.eq(data_i[lfsr_bin].tag) + comb += evict_cl_d.data.eq(data_i[lfsr_bin].data) + comb += cnt_d.eq(mshr_q.addr[:DCACHE_INDEX_WIDTH]) + # no - we can request a cache line now + with m.Else(): + m.next = "REQ_CACHELINE" + # we have at least one free way + with m.Else(): + # get victim cache-line by looking for the + # first non-valid bit + comb += evict_way_d.eq(get_victim_cl(~valid_way) + m.next = "REQ_CACHELINE" + + # ~> we can just load the cache-line, + # the way is store in evict_way_q + with m.Case("REQ_CACHELINE"): + comb += req_fsm_miss_valid .eq(1) + sync += req_fsm_miss_addr .eq(mshr_q.addr) + + with m.If (gnt_miss_fsm): + m.next = "SAVE_CACHELINE" + comb += miss_gnt_o[mshr_q.id].eq(1) + + # ~> replace the cacheline + with m.Case("SAVE_CACHELINE"): + # calculate cacheline offset + automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset; + sync += cl_offset.eq(mshr_q.addr[3:DCACHE_BYTE_OFFSET] << 6) + # we've got a valid response from refill unit + with m.If (valid_miss_fsm): + wid = DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH + sync += addr_o .eq(mshr_q.addr[:DCACHE_INDEX_WIDTH]) + sync += req_o .eq(evict_way_q) + comb += we_o .eq(1) + comb += be_o .eq(1) + sync += be_o.vldrty .eq(evict_way_q) + sync += data_o.tag .eq(mshr_q.addr[DCACHE_INDEX_WIDTH:wid] + comb += data_o.data .eq(data_miss_fsm) + comb += data_o.valid.eq(1) + comb += data_o.dirty.eq(0) + + # is this a write? + with m.If (mshr_q.we): + # Yes, so safe the updated data now + for i in range(8): + # check if we really want to write + # the corresponding byte + with m.If (mshr_q.be[i]): + sync += data_o.data[(cl_offset + i*8) +: 8].eq(mshr_q.wdata[i]; + # it's immediately dirty if we write + comb += data_o.dirty.eq(1) + + # reset MSHR + comb += mshr_d.valid.eq(0) + # go back to idle + m.next = 'IDLE' + + # ------------------------------ + # Write Back Operation + # ------------------------------ + # ~> evict a cache line from way saved in evict_way_q + with m.Case("WB_CACHELINE_FLUSH"): + with m.Case("WB_CACHELINE_MISS"): + + comb += req_fsm_miss_valid .eq(0b1) + sync += req_fsm_miss_addr .eq({evict_cl_q.tag, cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET], {{DCACHE_BYTE_OFFSET}{0b0}}}; + comb += req_fsm_miss_be .eq(1) + comb += req_fsm_miss_we .eq(0b1) + sync += req_fsm_miss_wdata .eq(evict_cl_q.data; + + # we've got a grant --> this is timing critical, think about it + if (gnt_miss_fsm) begin + # write status array + sync += addr_o .eq(cnt_q) + comb += req_o .eq(0b1) + comb += we_o .eq(0b1) + comb += data_o.valid.eq(INVALIDATE_ON_FLUSH ? 0b0 : 0b1) + # invalidate + sync += be_o.vldrty.eq(evict_way_q) + # go back to handling the miss or flushing, + # depending on where we came from + with m.If(state_q == WB_CACHELINE_MISS): + m.next = "MISS" + with m.Else(): + m.next = "FLUSH_REQ_STATUS" + + # ------------------------------ + # Flushing & Initialization + # ------------------------------ + # ~> make another request to check the same + # cache-line if there are still some valid entries + with m.Case("FLUSH_REQ_STATUS"): + comb += req_o .eq(1) + sync += addr_o .eq(cnt_q) + m.next = "FLUSHING" + + with m.Case("FLUSHING"): + # this has priority + # at least one of the cache lines is dirty + with m.If(~evict_way): + # evict cache line, look for the first + # cache-line which is dirty + comb += evict_way_d.eq(get_victim_cl(evict_way)) + comb += evict_cl_d .eq(data_i[one_hot_to_bin(evict_way)]) + state_d = WB_CACHELINE_FLUSH; + # not dirty ~> increment and continue + with m.Else(): + # increment and re-request + sync += cnt_d.eq(cnt_q + (1 << DCACHE_BYTE_OFFSET)) + m.next = "FLUSH_REQ_STATUS" + sync += addr_o .eq(cnt_q) + comb += req_o .eq(1) + comb += be_o.vldrty.eq(INVALIDATE_ON_FLUSH ? 1 : 0) + comb += we_o .eq(1) + # finished with flushing operation, go back to idle + with m.If (cnt_q[DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH] \ + == DCACHE_NUM_WORDS-1): + # only acknowledge if the flush wasn't + # triggered by an atomic + sync += flush_ack_o.eq(~serve_amo_q) + m.next = "IDLE" + + # ~> only called after reset + with m.Case("INIT"): + # initialize status array + sync += addr_o.eq(cnt_q) + comb += req_o .eq(1) + comb += we_o .eq(1) + # only write the dirty array + comb += be_o.vldrty.eq(1) + sync += cnt_d .eq(cnt_q + (1 << DCACHE_BYTE_OFFSET)) + # finished initialization + with m.If (cnt_q[DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH] \ + == DCACHE_NUM_WORDS-1) + m.next = "IDLE" + + # ---------------------- + # AMOs + # ---------------------- + # TODO(zarubaf) Move this closer to memory + # ~> we are here because we need to do the AMO, + # the cache is clean at this point + # start by executing the load + with m.Case("AMO_LOAD"): + comb += req_fsm_miss_valid.eq(1) + # address is in operand a + comb += req_fsm_miss_addr.eq(amo_req_i.operand_a) + comb += req_fsm_miss_req.eq(ariane_axi::SINGLE_REQ) + comb += req_fsm_miss_size.eq(amo_req_i.size) + # the request has been granted + with m.If(gnt_miss_fsm): + m.next = "AMO_SAVE_LOAD" + # save the load value + with m.Case("AMO_SAVE_LOAD"): + with m.If (valid_miss_fsm): + # we are only concerned about the lower 64-bit + comb += mshr_d.wdata.eq(data_miss_fsm[0]) + m.next = "AMO_STORE" + # and do the store + with m.Case("AMO_STORE"): + load_data = Signal(64) + # re-align load data + comb += load_data.eq(data_align(amo_req_i.operand_a[:3], + mshr_q.wdata)) + # Sign-extend for word operation + with m.If (amo_req_i.size == 0b10): + comb += amo_operand_a.eq(sext32(load_data[:32])) + comb += amo_operand_b.eq(sext32(amo_req_i.operand_b[:32])) + with m.Else(): + comb += amo_operand_a.eq(load_data) + comb += amo_operand_b.eq(amo_req_i.operand_b) + + # we do not need a store request for load reserved + # or a failing store conditional + # we can bail-out without making any further requests + with m.If ((amo_req_i.amo_op == AMO_LR) | \ + ((amo_req_i.amo_op == AMO_SC) & \ + ((reservation_q.valid & \ + (reservation_q.address != \ + amo_req_i.operand_a[3:64])) | \ + ~reservation_q.valid))): + comb += req_fsm_miss_valid.eq(0) + m.next = "IDLE" + comb += amo_resp_o.ack.eq(1) + # write-back the result + comb += amo_resp_o.result.eq(amo_operand_a) + # we know that the SC failed + with m.If (amo_req_i.amo_op == AMO_SC): + comb += amo_resp_o.result.eq(1) + # also clear the reservation + comb += reservation_d.valid.eq(0) + with m.Else(): + comb += req_fsm_miss_valid.eq(1) + + comb += req_fsm_miss_we .eq(1) + comb += req_fsm_miss_req .eq(ariane_axi::SINGLE_REQ) + comb += req_fsm_miss_size.eq(amo_req_i.size) + comb += req_fsm_miss_addr.eq(amo_req_i.operand_a) + + comb += req_fsm_miss_wdata.eq( + data_align(amo_req_i.operand_a[0:3], amo_result_o)) + comb += req_fsm_miss_be.eq( + be_gen(amo_req_i.operand_a[0:3], amo_req_i.size)) + + # place a reservation on the memory + with m.If (amo_req_i.amo_op == AMO_LR): + comb += reservation_d.address.eq(amo_req_i.operand_a[3:64]) + comb += reservation_d.valid.eq(1) + + # the request is valid or we didn't need to go for another store + with m.If (valid_miss_fsm): + m.next = "IDLE" + comb += amo_resp_o.ack.eq(1) + # write-back the result + comb += amo_resp_o.result.eq(amo_operand_a; + + if (amo_req_i.amo_op == AMO_SC) begin + comb += amo_resp_o.result.eq(0) + # An SC must fail if there is another SC + # (to any address) between the LR and the SC in + # program order (even to the same address). + # in any case destroy the reservation + comb += reservation_d.valid.eq(0) + + # check MSHR for aliasing + + comb += mshr_addr_matches_o .eq(0) + comb += mshr_index_matches_o.eq() + + for i in range(NR_PORTS): + # check mshr for potential matching of other units, + # exclude the unit currently being served + with m.If (mshr_q.valid & \ + (mshr_addr_i[i][DCACHE_BYTE_OFFSET:56] == \ + mshr_q.addr[DCACHE_BYTE_OFFSET:56])): + comb += mshr_addr_matches_o[i].eq(1) + + # same as previous, but checking only the index + with m.If (mshr_q.valid & \ + (mshr_addr_i[i][DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH] == \ + mshr_q.addr[DCACHE_BYTE_OFFSET:DCACHE_INDEX_WIDTH])): + mshr_index_matches_o[i].eq(1) + + # -------------------- + # Sequential Process + # -------------------- + + """ + #pragma translate_off + `ifndef VERILATOR + # assert that cache only hits on one way + assert property ( + @(posedge clk_i) $onehot0(evict_way_q)) else $warning("Evict-way should be one-hot encoded"); + `endif + #pragma translate_on + """ + + # ---------------------- + # Bypass Arbiter + # ---------------------- + # Connection Arbiter <-> AXI + req_fsm_bypass_valid = Signal() + req_fsm_bypass_addr = Signal(64) + req_fsm_bypass_wdata = Signal(64) + req_fsm_bypass_we = Signal() + req_fsm_bypass_be = Signal(8) + req_fsm_bypass_size = Signal(2) + gnt_bypass_fsm = Signal() + valid_bypass_fsm = Signal() + data_bypass_fsm = Signal(64) + logic [$clog2(NR_PORTS)-1:0] id_fsm_bypass; + logic [3:0] id_bypass_fsm; + logic [3:0] gnt_id_bypass_fsm; + + i_bypass_arbiter = ib = AXIArbiter( NR_PORTS, 64) + comb += [ + # Master Side + ib.data_req_i .eq( miss_req_valid & miss_req_bypass ), + ib.address_i .eq( miss_req_addr ), + ib.data_wdata_i .eq( miss_req_wdata ), + ib.data_we_i .eq( miss_req_we ), + ib.data_be_i .eq( miss_req_be ), + ib.data_size_i .eq( miss_req_size ), + ib.data_gnt_o .eq( bypass_gnt_o ), + ib.data_rvalid_o .eq( bypass_valid_o ), + ib.data_rdata_o .eq( bypass_data_o ), + # Slave Sid + ib.id_i .eq( id_bypass_fsm[$clog2(NR_PORTS)-1:0] ), + ib.id_o .eq( id_fsm_bypass ), + ib.gnt_id_i .eq( gnt_id_bypass_fsm[$clog2(NR_PORTS)-1:0] ), + ib.address_o .eq( req_fsm_bypass_addr ), + ib.data_wdata_o .eq( req_fsm_bypass_wdata ), + ib.data_req_o .eq( req_fsm_bypass_valid ), + ib.data_we_o .eq( req_fsm_bypass_we ), + ib.data_be_o .eq( req_fsm_bypass_be ), + ib.data_size_o .eq( req_fsm_bypass_size ), + ib.data_gnt_i .eq( gnt_bypass_fsm ), + ib.data_rvalid_i .eq( valid_bypass_fsm ), + ib.data_rdata_i .eq( data_bypass_fsm ), + ] + + axi_adapter #( + .DATA_WIDTH ( 64 ), + .AXI_ID_WIDTH ( 4 ), + .CACHELINE_BYTE_OFFSET ( DCACHE_BYTE_OFFSET ) + ) i_bypass_axi_adapter ( + .clk_i, + .rst_ni, + .req_i ( req_fsm_bypass_valid ), + .type_i ( ariane_axi::SINGLE_REQ ), + .gnt_o ( gnt_bypass_fsm ), + .addr_i ( req_fsm_bypass_addr ), + .we_i ( req_fsm_bypass_we ), + .wdata_i ( req_fsm_bypass_wdata ), + .be_i ( req_fsm_bypass_be ), + .size_i ( req_fsm_bypass_size ), + .id_i ( Cat(id_fsm_bypass, 0, 0) ), + .valid_o ( valid_bypass_fsm ), + .rdata_o ( data_bypass_fsm ), + .gnt_id_o ( gnt_id_bypass_fsm ), + .id_o ( id_bypass_fsm ), + .critical_word_o ( ), # not used for single requests + .critical_word_valid_o ( ), # not used for single requests + .axi_req_o ( axi_bypass_o ), + .axi_resp_i ( axi_bypass_i ) + ); + + # ---------------------- + # Cache Line AXI Refill + # ---------------------- + axi_adapter #( + .DATA_WIDTH ( DCACHE_LINE_WIDTH ), + .AXI_ID_WIDTH ( 4 ), + .CACHELINE_BYTE_OFFSET ( DCACHE_BYTE_OFFSET ) + ) i_miss_axi_adapter ( + .clk_i, + .rst_ni, + .req_i ( req_fsm_miss_valid ), + .type_i ( req_fsm_miss_req ), + .gnt_o ( gnt_miss_fsm ), + .addr_i ( req_fsm_miss_addr ), + .we_i ( req_fsm_miss_we ), + .wdata_i ( req_fsm_miss_wdata ), + .be_i ( req_fsm_miss_be ), + .size_i ( req_fsm_miss_size ), + .id_i ( Const(0b1100, 4) ), + .gnt_id_o ( ), # open + .valid_o ( valid_miss_fsm ), + .rdata_o ( data_miss_fsm ), + .id_o ( ), + .critical_word_o, + .critical_word_valid_o, + .axi_req_o ( axi_data_o ), + .axi_resp_i ( axi_data_i ) + ); + + # ----------------- + # Replacement LFSR + # ----------------- + lfsr_8bit #(.WIDTH (DCACHE_SET_ASSOC)) i_lfsr ( + .en_i ( lfsr_enable ), + .refill_way_oh ( lfsr_oh ), + .refill_way_bin ( lfsr_bin ), + .* + ); + + # ----------------- + # AMO ALU + # ----------------- + amo_alu i_amo_alu ( + .amo_op_i ( amo_op ), + .amo_operand_a_i ( amo_operand_a ), + .amo_operand_b_i ( amo_operand_b ), + .amo_result_o ( amo_result_o ) + ); + + # ----------------- + # Struct Split + # ----------------- + + for i in range(NR_PORTS): + miss_req = MissReq() + comb += miss_req.eq(miss_req_i[i]); + comb += miss_req_valid [i] .eq(miss_req.valid) + comb += miss_req_bypass [i] .eq(miss_req.bypass) + comb += miss_req_addr [i] .eq(miss_req.addr) + comb += miss_req_wdata [i] .eq(miss_req.wdata) + comb += miss_req_we [i] .eq(miss_req.we) + comb += miss_req_be [i] .eq(miss_req.be) + comb += miss_req_size [i] .eq(miss_req.size) + + # -------------- + # AXI Arbiter + # --------------s + # + # Description: Arbitrates access to AXI refill/bypass + # +class AXIArbiter: + def __init__(self, NR_PORTS = 3, DATA_WIDTH = 64): + self.NR_PORTS = NR_PORTS + self.DATA_WIDTH = DATA_WIDTH + self.pwid = pwid = ceil(log(NR_PORTS) / log(2)) + rst_ni = ResetSignal() # Asynchronous reset active low + # master ports + self.data_req_i = Signal(NR_PORTS) + self.address_i = Array(Signal(name="address_i", 64) \ + for i in range(NR_PORTS)) + self.data_wdata_i = Array(Signal(name="data_wdata_i", 64) \ + for i in range(NR_PORTS)) + self.data_we_i = Signal(NR_PORTS) + self.data_be_i = Array(Signal(name="data_wdata_i", DATA_WIDTH/8) \ + for i in range(NR_PORTS)) + self.data_size_i = Array(Signal(name="data_size_i", 2) \ + for i in range(NR_PORTS)) + self.data_gnt_o = Signal(NR_PORTS) + self.data_rvalid_o = Signal(NR_PORTS) + self.data_rdata_o = Array(Signal(name="data_rdata_o", 64) \ + for i in range(NR_PORTS)) + + # slave port + self.id_i = Signal(pwid) + self.id_o = Signal(pwid) + self.gnt_id_i = Signal(pwid) + self.data_req_o = Signal() + self.address_o = Signal(64) + self.data_wdata_o = Signal(DATA_WIDTH) + self.data_we_o = Signal() + self.data_be_o = Signal(DATA_WIDTH/8) + self.data_size_o = Signal(2) + self.data_gnt_i = Signal() + self.data_rvalid_i = Signal() + self.data_rdata_i = Signal(DATA_WIDTH) + + def elaborate(self, platform): + #enum logic [1:0] { IDLE, REQ, SERVING } state_d, state_q; + + class Packet: + def __init__(self, pwid, DATA_WIDTH): + self.id = Signal(pwid) + self.address = Signal(64) + self.data = Signal(64) + self.size = Signal(2) + self.be = Signal(DATA_WIDTH/8) + self.we = Signal() + + request_index = Signal(self.pwid) + req_q = Packet(self.pwid, self.DATA_WIDTH) + req_d = Packet(self.pwid, self.DATA_WIDTH) + + # request register + sync += req_q.eq(req_d) + + # request port + comb += self.address_o .eq(req_q.address) + comb += self.data_wdata_o .eq(req_q.data) + comb += self.data_be_o .eq(req_q.be) + comb += self.data_size_o .eq(req_q.size) + comb += self.data_we_o .eq(req_q.we) + comb += self.id_o .eq(req_q.id) + comb += self.data_gnt_o .eq(0) + # read port + comb += self.data_rvalid_o .eq(0) + comb += self.data_rdata_o .eq(0) + comb += self.data_rdata_o[req_q.id].eq(data_rdata_i) + + m.submodules.pp = pp = PriorityEncoder(self.NR_PORTS) + comb += pp.i.eq(self.data_req_i) # select one request (priority-based) + comb += request_index.eq(pp.o) + + with m.Switch("state") as s: + + with m.Case("IDLE"): + # wait for incoming requests (priority encoder data_req_i) + with m.If(~pp.n): # one output valid from encoder + comb += self.data_req_o .eq(self.data_req_i[i]) + comb += self.data_gnt_o[i].eq(self.data_req_i[i]) + # save the request + comb += req_d.address.eq(self.address_i[i]) + comb += req_d.id.eq(request_index) + comb += req_d.data.eq(self.data_wdata_i[i]) + comb += req_d.size.eq(self.data_size_i[i]) + comb += req_d.be.eq(self.data_be_i[i]) + comb += req_d.we.eq(self.data_we_i[i]) + m.next = "SERVING" + + comb += self.address_o .eq(self.address_i[request_index]) + comb += self.data_wdata_o .eq(self.data_wdata_i[request_index]) + comb += self.data_be_o .eq(self.data_be_i[request_index]) + comb += self.data_size_o .eq(self.data_size_i[request_index]) + comb += self.data_we_o .eq(self.data_we_i[request_index]) + comb += self.id_o .eq(request_index) + + with m.Case("SERVING"): + comb += self.data_req_o.eq(1) + with m.If (self.data_rvalid_i): + comb += self.data_rvalid_o[req_q.id].eq(1) + m.next = "IDLE" + + # ------------ + # Assertions + # ------------ + + """ +#pragma translate_off +`ifndef VERILATOR +# make sure that we eventually get an rvalid after we received a grant +assert property (@(posedge clk_i) data_gnt_i |-> ##[1:$] data_rvalid_i ) + else begin $error("There was a grant without a rvalid"); $stop(); end +# assert that there is no grant without a request +assert property (@(negedge clk_i) data_gnt_i |-> data_req_o) + else begin $error("There was a grant without a request."); $stop(); end +# assert that the address does not contain X when request is sent +assert property ( @(posedge clk_i) (data_req_o) |-> (!$isunknown(address_o)) ) + else begin $error("address contains X when request is set"); $stop(); end + +`endif +#pragma translate_on + """ + diff --git a/src/soc/unused/TLB/ariane/mmu.py b/src/soc/unused/TLB/ariane/mmu.py new file mode 100644 index 00000000..a14862cd --- /dev/null +++ b/src/soc/unused/TLB/ariane/mmu.py @@ -0,0 +1,474 @@ +""" +# Copyright 2018 ETH Zurich and University of Bologna. +# Copyright and related rights are licensed under the Solderpad Hardware +# License, Version 0.51 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# or agreed to in writing, software, hardware and materials distributed under +# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Author: Florian Zaruba, ETH Zurich +# Date: 19/04/2017 +# Description: Memory Management Unit for Ariane, contains TLB and +# address translation unit. SV48 as defined in +# Volume II: RISC-V Privileged Architectures V1.10 Page 63 + +import ariane_pkg::*; +""" + +from nmigen import Const, Signal, Cat, Module, Mux +from nmigen.cli import verilog, rtlil + +from ptw import DCacheReqI, DCacheReqO, TLBUpdate, PTE, PTW +from tlb import TLB +from exceptcause import (INSTR_ACCESS_FAULT, INSTR_PAGE_FAULT, + LOAD_PAGE_FAULT, STORE_PAGE_FAULT) + +PRIV_LVL_M = Const(0b11, 2) +PRIV_LVL_S = Const(0b01, 2) +PRIV_LVL_U = Const(0b00, 2) + + +class RVException: + def __init__(self): + self.cause = Signal(64) # cause of exception + self.tval = Signal(64) # more info of causing exception + # (e.g.: instruction causing it), + # address of LD/ST fault + self.valid = Signal() + + def eq(self, inp): + res = [] + for (o, i) in zip(self.ports(), inp.ports()): + res.append(o.eq(i)) + return res + + def __iter__(self): + yield self.cause + yield self.tval + yield self.valid + + def ports(self): + return list(self) + + +class ICacheReqI: + def __init__(self): + self.fetch_valid = Signal() # address translation valid + self.fetch_paddr = Signal(64) # physical address in + self.fetch_exception = RVException() # exception occurred during fetch + + def __iter__(self): + yield self.fetch_valid + yield self.fetch_paddr + yield from self.fetch_exception + + def ports(self): + return list(self) + + +class ICacheReqO: + def __init__(self): + self.fetch_req = Signal() # address translation request + self.fetch_vaddr = Signal(64) # virtual address out + + def __iter__(self): + yield self.fetch_req + yield self.fetch_vaddr + + def ports(self): + return list(self) + + +class MMU: + def __init__(self, instr_tlb_entries = 4, + data_tlb_entries = 4, + asid_width = 1): + self.instr_tlb_entries = instr_tlb_entries + self.data_tlb_entries = data_tlb_entries + self.asid_width = asid_width + + self.flush_i = Signal() + self.enable_translation_i = Signal() + self.en_ld_st_translation_i = Signal() # enable VM translation for LD/ST + # IF interface + self.icache_areq_i = ICacheReqO() + self.icache_areq_o = ICacheReqI() + # LSU interface + # this is a more minimalistic interface because the actual addressing + # logic is handled in the LSU as we distinguish load and stores, + # what we do here is simple address translation + self.misaligned_ex_i = RVException() + self.lsu_req_i = Signal() # request address translation + self.lsu_vaddr_i = Signal(64) # virtual address in + self.lsu_is_store_i = Signal() # the translation is requested by a store + # if we need to walk the page table we can't grant in the same cycle + + # Cycle 0 + self.lsu_dtlb_hit_o = Signal() # sent in the same cycle as the request + # if translation hits in the DTLB + # Cycle 1 + self.lsu_valid_o = Signal() # translation is valid + self.lsu_paddr_o = Signal(64) # translated address + self.lsu_exception_o = RVException() # addr translate threw exception + + # General control signals + self.priv_lvl_i = Signal(2) + self.ld_st_priv_lvl_i = Signal(2) + self.sum_i = Signal() + self.mxr_i = Signal() + # input logic flag_mprv_i, + self.satp_ppn_i = Signal(44) + self.asid_i = Signal(self.asid_width) + self.flush_tlb_i = Signal() + # Performance counters + self.itlb_miss_o = Signal() + self.dtlb_miss_o = Signal() + # PTW memory interface + self.req_port_i = DCacheReqO() + self.req_port_o = DCacheReqI() + + def elaborate(self, platform): + m = Module() + + iaccess_err = Signal() # insufficient priv to access instr page + daccess_err = Signal() # insufficient priv to access data page + ptw_active = Signal() # PTW is currently walking a page table + walking_instr = Signal() # PTW is walking because of an ITLB miss + ptw_error = Signal() # PTW threw an exception + + update_vaddr = Signal(48) # guessed + uaddr64 = Cat(update_vaddr, Const(0, 25)) # extend to 64bit with zeros + update_ptw_itlb = TLBUpdate(self.asid_width) + update_ptw_dtlb = TLBUpdate(self.asid_width) + + itlb_lu_access = Signal() + itlb_content = PTE() + itlb_is_2M = Signal() + itlb_is_1G = Signal() + itlb_is_512G = Signal() + itlb_lu_hit = Signal() + + dtlb_lu_access = Signal() + dtlb_content = PTE() + dtlb_is_2M = Signal() + dtlb_is_1G = Signal() + dtlb_is_512G = Signal() + dtlb_lu_hit = Signal() + + # Assignments + m.d.comb += [itlb_lu_access.eq(self.icache_areq_i.fetch_req), + dtlb_lu_access.eq(self.lsu_req_i) + ] + + # ITLB + m.submodules.i_tlb = i_tlb = TLB(self.instr_tlb_entries, + self.asid_width) + m.d.comb += [i_tlb.flush_i.eq(self.flush_tlb_i), + i_tlb.update_i.eq(update_ptw_itlb), + i_tlb.lu_access_i.eq(itlb_lu_access), + i_tlb.lu_asid_i.eq(self.asid_i), + i_tlb.lu_vaddr_i.eq(self.icache_areq_i.fetch_vaddr), + itlb_content.eq(i_tlb.lu_content_o), + itlb_is_2M.eq(i_tlb.lu_is_2M_o), + itlb_is_1G.eq(i_tlb.lu_is_1G_o), + itlb_is_512G.eq(i_tlb.lu_is_512G_o), + itlb_lu_hit.eq(i_tlb.lu_hit_o), + ] + + # DTLB + m.submodules.d_tlb = d_tlb = TLB(self.data_tlb_entries, + self.asid_width) + m.d.comb += [d_tlb.flush_i.eq(self.flush_tlb_i), + d_tlb.update_i.eq(update_ptw_dtlb), + d_tlb.lu_access_i.eq(dtlb_lu_access), + d_tlb.lu_asid_i.eq(self.asid_i), + d_tlb.lu_vaddr_i.eq(self.lsu_vaddr_i), + dtlb_content.eq(d_tlb.lu_content_o), + dtlb_is_2M.eq(d_tlb.lu_is_2M_o), + dtlb_is_1G.eq(d_tlb.lu_is_1G_o), + dtlb_is_512G.eq(d_tlb.lu_is_512G_o), + dtlb_lu_hit.eq(d_tlb.lu_hit_o), + ] + + # PTW + m.submodules.ptw = ptw = PTW(self.asid_width) + m.d.comb += [ptw_active.eq(ptw.ptw_active_o), + walking_instr.eq(ptw.walking_instr_o), + ptw_error.eq(ptw.ptw_error_o), + ptw.enable_translation_i.eq(self.enable_translation_i), + + update_vaddr.eq(ptw.update_vaddr_o), + update_ptw_itlb.eq(ptw.itlb_update_o), + update_ptw_dtlb.eq(ptw.dtlb_update_o), + + ptw.itlb_access_i.eq(itlb_lu_access), + ptw.itlb_hit_i.eq(itlb_lu_hit), + ptw.itlb_vaddr_i.eq(self.icache_areq_i.fetch_vaddr), + + ptw.dtlb_access_i.eq(dtlb_lu_access), + ptw.dtlb_hit_i.eq(dtlb_lu_hit), + ptw.dtlb_vaddr_i.eq(self.lsu_vaddr_i), + + ptw.req_port_i.eq(self.req_port_i), + self.req_port_o.eq(ptw.req_port_o), + ] + + # ila_1 i_ila_1 ( + # .clk(clk_i), # input wire clk + # .probe0({req_port_o.address_tag, req_port_o.address_index}), + # .probe1(req_port_o.data_req), # input wire [63:0] probe1 + # .probe2(req_port_i.data_gnt), # input wire [0:0] probe2 + # .probe3(req_port_i.data_rdata), # input wire [0:0] probe3 + # .probe4(req_port_i.data_rvalid), # input wire [0:0] probe4 + # .probe5(ptw_error), # input wire [1:0] probe5 + # .probe6(update_vaddr), # input wire [0:0] probe6 + # .probe7(update_ptw_itlb.valid), # input wire [0:0] probe7 + # .probe8(update_ptw_dtlb.valid), # input wire [0:0] probe8 + # .probe9(dtlb_lu_access), # input wire [0:0] probe9 + # .probe10(lsu_vaddr_i), # input wire [0:0] probe10 + # .probe11(dtlb_lu_hit), # input wire [0:0] probe11 + # .probe12(itlb_lu_access), # input wire [0:0] probe12 + # .probe13(icache_areq_i.fetch_vaddr), # input wire [0:0] probe13 + # .probe14(itlb_lu_hit) # input wire [0:0] probe13 + # ); + + #----------------------- + # Instruction Interface + #----------------------- + # The instruction interface is a simple request response interface + + # MMU disabled: just pass through + m.d.comb += [self.icache_areq_o.fetch_valid.eq( + self.icache_areq_i.fetch_req), + # play through in case we disabled address translation + self.icache_areq_o.fetch_paddr.eq( + self.icache_areq_i.fetch_vaddr) + ] + # two potential exception sources: + # 1. HPTW threw an exception -> signal with a page fault exception + # 2. We got an access error because of insufficient permissions -> + # throw an access exception + m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0) + # Check whether we are allowed to access this memory region + # from a fetch perspective + + # PLATEN TODO: use PermissionValidator instead [we like modules] + m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \ + (((self.priv_lvl_i == PRIV_LVL_U) & \ + ~itlb_content.u) | \ + ((self.priv_lvl_i == PRIV_LVL_S) & \ + itlb_content.u))) + + # MMU enabled: address from TLB, request delayed until hit. + # Error when TLB hit and no access right or TLB hit and + # translated address not valid (e.g. AXI decode error), + # or when PTW performs walk due to ITLB miss and raises + # an error. + with m.If (self.enable_translation_i): + # we work with SV48, so if VM is enabled, check that + # all bits [47:38] are equal + with m.If (self.icache_areq_i.fetch_req & \ + ~(((~self.icache_areq_i.fetch_vaddr[47:64]) == 0) | \ + (self.icache_areq_i.fetch_vaddr[47:64]) == 0)): + fe = self.icache_areq_o.fetch_exception + m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT), + fe.tval.eq(self.icache_areq_i.fetch_vaddr), + fe.valid.eq(1) + ] + + m.d.comb += self.icache_areq_o.fetch_valid.eq(0) + + # 4K page + paddr = Signal.like(self.icache_areq_o.fetch_paddr) + paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12], + itlb_content.ppn) + m.d.comb += paddr.eq(paddr4k) + # Mega page + with m.If(itlb_is_2M): + m.d.comb += paddr[12:21].eq( + self.icache_areq_i.fetch_vaddr[12:21]) + # Giga page + with m.If(itlb_is_1G): + m.d.comb += paddr[12:30].eq( + self.icache_areq_i.fetch_vaddr[12:30]) + m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr) + # Tera page + with m.If(itlb_is_512G): + m.d.comb += paddr[12:39].eq( + self.icache_areq_i.fetch_vaddr[12:39]) + m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr) + + # --------- + # ITLB Hit + # -------- + # if we hit the ITLB output the request signal immediately + with m.If(itlb_lu_hit): + m.d.comb += self.icache_areq_o.fetch_valid.eq( + self.icache_areq_i.fetch_req) + # we got an access error + with m.If (iaccess_err): + # throw a page fault + fe = self.icache_areq_o.fetch_exception + m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT), + fe.tval.eq(self.icache_areq_i.fetch_vaddr), + fe.valid.eq(1) + ] + # --------- + # ITLB Miss + # --------- + # watch out for exceptions happening during walking the page table + with m.Elif(ptw_active & walking_instr): + m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error) + fe = self.icache_areq_o.fetch_exception + m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT), + fe.tval.eq(uaddr64), + fe.valid.eq(1) + ] + + #----------------------- + # Data Interface + #----------------------- + + lsu_vaddr = Signal(64) + dtlb_pte = PTE() + misaligned_ex = RVException() + lsu_req = Signal() + lsu_is_store = Signal() + dtlb_hit = Signal() + #dtlb_is_2M = Signal() + #dtlb_is_1G = Signal() + #dtlb_is_512 = Signal() + + # check if we need to do translation or if we are always + # ready (e.g.: we are not translating anything) + m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i, + dtlb_lu_hit, 1)) + + # The data interface is simpler and only consists of a + # request/response interface + m.d.comb += [ + # save request and DTLB response + lsu_vaddr.eq(self.lsu_vaddr_i), + lsu_req.eq(self.lsu_req_i), + misaligned_ex.eq(self.misaligned_ex_i), + dtlb_pte.eq(dtlb_content), + dtlb_hit.eq(dtlb_lu_hit), + lsu_is_store.eq(self.lsu_is_store_i), + #dtlb_is_2M.eq(dtlb_is_2M), + #dtlb_is_1G.eq(dtlb_is_1G), + ##dtlb_is_512.eq(self.dtlb_is_512G) #???? + ] + m.d.sync += [ + self.lsu_paddr_o.eq(lsu_vaddr), + self.lsu_valid_o.eq(lsu_req), + self.lsu_exception_o.eq(misaligned_ex), + ] + + sverr = Signal() + usrerr = Signal() + + m.d.comb += [ + # mute misaligned exceptions if there is no request + # otherwise they will throw accidental exceptions + misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i), + + # SUM is not set and we are trying to access a user + # page in supervisor mode + sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \ + dtlb_pte.u), + # this is not a user page but we are in user mode and + # trying to access it + usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u), + + # Check if the User flag is set, then we may only + # access it in supervisor mode if SUM is enabled + daccess_err.eq(sverr | usrerr), + ] + + # translation is enabled and no misaligned exception occurred + with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid): + m.d.comb += lsu_req.eq(0) + # 4K page + paddr = Signal.like(lsu_vaddr) + paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn) + m.d.comb += paddr.eq(paddr4k) + # Mega page + with m.If(dtlb_is_2M): + m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21]) + # Giga page + with m.If(dtlb_is_1G): + m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30]) + m.d.sync += self.lsu_paddr_o.eq(paddr) + # TODO platen tera_page + + # --------- + # DTLB Hit + # -------- + with m.If(dtlb_hit & lsu_req): + m.d.comb += lsu_req.eq(1) + # this is a store + with m.If (lsu_is_store): + # check if the page is write-able and + # we are not violating privileges + # also check if the dirty flag is set + with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d): + le = self.lsu_exception_o + m.d.sync += [le.cause.eq(STORE_PAGE_FAULT), + le.tval.eq(lsu_vaddr), + le.valid.eq(1) + ] + + # this is a load, check for sufficient access + # privileges - throw a page fault if necessary + with m.Elif(daccess_err): + le = self.lsu_exception_o + m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT), + le.tval.eq(lsu_vaddr), + le.valid.eq(1) + ] + # --------- + # DTLB Miss + # --------- + # watch out for exceptions + with m.Elif (ptw_active & ~walking_instr): + # page table walker threw an exception + with m.If (ptw_error): + # an error makes the translation valid + m.d.comb += lsu_req.eq(1) + # the page table walker can only throw page faults + with m.If (lsu_is_store): + le = self.lsu_exception_o + m.d.sync += [le.cause.eq(STORE_PAGE_FAULT), + le.tval.eq(uaddr64), + le.valid.eq(1) + ] + with m.Else(): + m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT), + le.tval.eq(uaddr64), + le.valid.eq(1) + ] + + return m + + def ports(self): + return [self.flush_i, self.enable_translation_i, + self.en_ld_st_translation_i, + self.lsu_req_i, + self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o, + self.lsu_valid_o, self.lsu_paddr_o, + self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i, + self.satp_ppn_i, self.asid_i, self.flush_tlb_i, + self.itlb_miss_o, self.dtlb_miss_o] + \ + self.icache_areq_i.ports() + self.icache_areq_o.ports() + \ + self.req_port_i.ports() + self.req_port_o.ports() + \ + self.misaligned_ex_i.ports() + self.lsu_exception_o.ports() + +if __name__ == '__main__': + mmu = MMU() + vl = rtlil.convert(mmu, ports=mmu.ports()) + with open("test_mmu.il", "w") as f: + f.write(vl) + diff --git a/src/soc/unused/TLB/ariane/p_lru.txt b/src/soc/unused/TLB/ariane/p_lru.txt new file mode 100644 index 00000000..4bac7680 --- /dev/null +++ b/src/soc/unused/TLB/ariane/p_lru.txt @@ -0,0 +1,51 @@ +pseudo-LRU + +two-way set associative - one bit + + indicates which line of the two has been reference more recently + + +four-way set associative - three bits + + each bit represents one branch point in a binary decision tree; let 1 + represent that the left side has been referenced more recently than the + right side, and 0 vice-versa + + are all 4 lines valid? + / \ + yes no, use an invalid line + | + | + | + bit_0 == 0? state | replace ref to | next state + / \ ------+-------- -------+----------- + y n 00x | line_0 line_0 | 11_ + / \ 01x | line_1 line_1 | 10_ + bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1 + / \ / \ 1x1 | line_3 line_3 | 0_0 + y n y n + / \ / \ ('x' means ('_' means unchanged) + line_0 line_1 line_2 line_3 don't care) + + (see Figure 3-7, p. 3-18, in Intel Embedded Pentium Processor Family Dev. + Manual, 1998, http://www.intel.com/design/intarch/manuals/273204.htm) + + +note that there is a 6-bit encoding for true LRU for four-way set associative + + bit 0: bank[1] more recently used than bank[0] + bit 1: bank[2] more recently used than bank[0] + bit 2: bank[2] more recently used than bank[1] + bit 3: bank[3] more recently used than bank[0] + bit 4: bank[3] more recently used than bank[1] + bit 5: bank[3] more recently used than bank[2] + + this results in 24 valid bit patterns within the 64 possible bit patterns + (4! possible valid traces for bank references) + + e.g., a trace of 0 1 2 3, where 0 is LRU and 3 is MRU, is encoded as 111111 + + you can implement a state machine with a 256x6 ROM (6-bit state encoding + appended with a 2-bit bank reference input will yield a new 6-bit state), + and you can implement an LRU bank indicator with a 64x2 ROM + diff --git a/src/soc/unused/TLB/ariane/plru.py b/src/soc/unused/TLB/ariane/plru.py new file mode 100644 index 00000000..a8db5c27 --- /dev/null +++ b/src/soc/unused/TLB/ariane/plru.py @@ -0,0 +1,105 @@ +from nmigen import Signal, Module, Cat, Const +from nmigen.hdl.ir import Elaboratable +from math import log2 + + +class PLRU(Elaboratable): + """ PLRU - Pseudo Least Recently Used Replacement + + PLRU-tree indexing: + lvl0 0 + / \ + / \ + lvl1 1 2 + / \ / \ + lvl2 3 4 5 6 + / \ /\/\ /\ + ... ... ... ... + """ + def __init__(self, entries): + self.entries = entries + self.lu_hit = Signal(entries) + self.replace_en_o = Signal(entries) + self.lu_access_i = Signal() + # Tree (bit per entry) + self.TLBSZ = 2*(self.entries-1) + self.plru_tree = Signal(self.TLBSZ) + self.plru_tree_o = Signal(self.TLBSZ) + + def elaborate(self, platform=None): + m = Module() + + # Just predefine which nodes will be set/cleared + # E.g. for a TLB with 8 entries, the for-loop is semantically + # equivalent to the following pseudo-code: + # unique case (1'b1) + # lu_hit[7]: plru_tree[0, 2, 6] = {1, 1, 1}; + # lu_hit[6]: plru_tree[0, 2, 6] = {1, 1, 0}; + # lu_hit[5]: plru_tree[0, 2, 5] = {1, 0, 1}; + # lu_hit[4]: plru_tree[0, 2, 5] = {1, 0, 0}; + # lu_hit[3]: plru_tree[0, 1, 4] = {0, 1, 1}; + # lu_hit[2]: plru_tree[0, 1, 4] = {0, 1, 0}; + # lu_hit[1]: plru_tree[0, 1, 3] = {0, 0, 1}; + # lu_hit[0]: plru_tree[0, 1, 3] = {0, 0, 0}; + # default: begin /* No hit */ end + # endcase + LOG_TLB = int(log2(self.entries)) + print(LOG_TLB) + for i in range(self.entries): + # we got a hit so update the pointer as it was least recently used + hit = Signal(reset_less=True) + m.d.comb += hit.eq(self.lu_hit[i] & self.lu_access_i) + with m.If(hit): + # Set the nodes to the values we would expect + for lvl in range(LOG_TLB): + idx_base = (1< MSB, lvl1 <=> MSB-1, ... + shift = LOG_TLB - lvl; + new_idx = Const(~((i >> (shift-1)) & 1), (1, False)) + plru_idx = idx_base + (i >> shift) + print ("plru", i, lvl, hex(idx_base), + plru_idx, shift, new_idx) + m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx) + + # Decode tree to write enable signals + # Next for-loop basically creates the following logic for e.g. + # an 8 entry TLB (note: pseudo-code obviously): + # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1} + # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0} + # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1} + # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0} + # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1} + # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0} + # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1} + # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0} + # For each entry traverse the tree. If every tree-node matches + # the corresponding bit of the entry's index, this is + # the next entry to replace. + replace = [] + for i in range(self.entries): + en = [] + for lvl in range(LOG_TLB): + idx_base = (1< MSB, lvl1 <=> MSB-1, ... + shift = LOG_TLB - lvl; + new_idx = (i >> (shift-1)) & 1; + plru_idx = idx_base + (i>>shift) + plru = Signal(reset_less=True, + name="plru-%d-%d-%d" % (i, lvl, plru_idx)) + m.d.comb += plru.eq(self.plru_tree[plru_idx]) + # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx; + if new_idx: + en.append(~plru) # yes inverted (using bool()) + else: + en.append(plru) # yes inverted (using bool()) + print ("plru", i, en) + # boolean logic manipulation: + # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2) + replace.append(~Cat(*en).bool()) + m.d.comb += self.replace_en_o.eq(Cat(*replace)) + + return m + + def ports(self): + return [self.entries, self.lu_hit, self.replace_en_o, + self.lu_access_i, self.plru_tree, self.plru_tree_o] diff --git a/src/soc/unused/TLB/ariane/ptw.py b/src/soc/unused/TLB/ariane/ptw.py new file mode 100644 index 00000000..4046c711 --- /dev/null +++ b/src/soc/unused/TLB/ariane/ptw.py @@ -0,0 +1,556 @@ +""" +# Copyright 2018 ETH Zurich and University of Bologna. +# Copyright and related rights are licensed under the Solderpad Hardware +# License, Version 0.51 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# or agreed to in writing, software, hardware and materials distributed under +# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Author: David Schaffenrath, TU Graz +# Author: Florian Zaruba, ETH Zurich +# Date: 24.4.2017 +# Description: Hardware-PTW + +/* verilator lint_off WIDTH */ +import ariane_pkg::*; + +see linux kernel source: + +* "arch/riscv/include/asm/page.h" +* "arch/riscv/include/asm/mmu_context.h" +* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET) + +""" + +from nmigen import Const, Signal, Cat, Module, Elaboratable +from nmigen.hdl.ast import ArrayProxy +from nmigen.cli import verilog, rtlil +from math import log2 + + +DCACHE_SET_ASSOC = 8 +CONFIG_L1D_SIZE = 32*1024 +DCACHE_INDEX_WIDTH = int(log2(CONFIG_L1D_SIZE / DCACHE_SET_ASSOC)) +DCACHE_TAG_WIDTH = 56 - DCACHE_INDEX_WIDTH + +ASID_WIDTH = 8 + + +class DCacheReqI: + def __init__(self): + self.address_index = Signal(DCACHE_INDEX_WIDTH) + self.address_tag = Signal(DCACHE_TAG_WIDTH) + self.data_wdata = Signal(64) + self.data_req = Signal() + self.data_we = Signal() + self.data_be = Signal(8) + self.data_size = Signal(2) + self.kill_req = Signal() + self.tag_valid = Signal() + + def eq(self, inp): + res = [] + for (o, i) in zip(self.ports(), inp.ports()): + res.append(o.eq(i)) + return res + + def ports(self): + return [self.address_index, self.address_tag, + self.data_wdata, self.data_req, + self.data_we, self.data_be, self.data_size, + self.kill_req, self.tag_valid, + ] + +class DCacheReqO: + def __init__(self): + self.data_gnt = Signal() + self.data_rvalid = Signal() + self.data_rdata = Signal(64) # actually in PTE object format + + def eq(self, inp): + res = [] + for (o, i) in zip(self.ports(), inp.ports()): + res.append(o.eq(i)) + return res + + def ports(self): + return [self.data_gnt, self.data_rvalid, self.data_rdata] + + +class PTE: #(RecordObject): + def __init__(self): + self.v = Signal() + self.r = Signal() + self.w = Signal() + self.x = Signal() + self.u = Signal() + self.g = Signal() + self.a = Signal() + self.d = Signal() + self.rsw = Signal(2) + self.ppn = Signal(44) + self.reserved = Signal(10) + + def flatten(self): + return Cat(*self.ports()) + + def eq(self, x): + if isinstance(x, ArrayProxy): + res = [] + for o in self.ports(): + i = getattr(x, o.name) + res.append(i) + x = Cat(*res) + else: + x = x.flatten() + return self.flatten().eq(x) + + def __iter__(self): + """ order is critical so that flatten creates LSB to MSB + """ + yield self.v + yield self.r + yield self.w + yield self.x + yield self.u + yield self.g + yield self.a + yield self.d + yield self.rsw + yield self.ppn + yield self.reserved + + def ports(self): + return list(self) + + +class TLBUpdate: + def __init__(self, asid_width): + self.valid = Signal() # valid flag + self.is_2M = Signal() + self.is_1G = Signal() + self.is_512G = Signal() + self.vpn = Signal(36) + self.asid = Signal(asid_width) + self.content = PTE() + + def flatten(self): + return Cat(*self.ports()) + + def eq(self, x): + return self.flatten().eq(x.flatten()) + + def ports(self): + return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \ + self.content.ports() + + +# SV48 defines four levels of page tables +LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1 +LVL2 = Const(1, 2) +LVL3 = Const(2, 2) +LVL4 = Const(3, 2) + + +class PTW(Elaboratable): + def __init__(self, asid_width=8): + self.asid_width = asid_width + + self.flush_i = Signal() # flush everything, we need to do this because + # actually everything we do is speculative at this stage + # e.g.: there could be a CSR instruction that changes everything + self.ptw_active_o = Signal(reset=1) # active if not IDLE + self.walking_instr_o = Signal() # set when walking for TLB + self.ptw_error_o = Signal() # set when an error occurred + self.enable_translation_i = Signal() # CSRs indicate to enable SV48 + self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st + + self.lsu_is_store_i = Signal() # translation triggered by store + # PTW memory interface + self.req_port_i = DCacheReqO() + self.req_port_o = DCacheReqI() + + # to TLBs, update logic + self.itlb_update_o = TLBUpdate(asid_width) + self.dtlb_update_o = TLBUpdate(asid_width) + + self.update_vaddr_o = Signal(48) + + self.asid_i = Signal(self.asid_width) + # from TLBs + # did we miss? + self.itlb_access_i = Signal() + self.itlb_hit_i = Signal() + self.itlb_vaddr_i = Signal(64) + + self.dtlb_access_i = Signal() + self.dtlb_hit_i = Signal() + self.dtlb_vaddr_i = Signal(64) + # from CSR file + self.satp_ppn_i = Signal(44) # ppn from satp + self.mxr_i = Signal() + # Performance counters + self.itlb_miss_o = Signal() + self.dtlb_miss_o = Signal() + + def ports(self): + return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o, + ] + return [ + self.enable_translation_i, self.en_ld_st_translation_i, + self.lsu_is_store_i, self.req_port_i, self.req_port_o, + self.update_vaddr_o, + self.asid_i, + self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i, + self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i, + self.satp_ppn_i, self.mxr_i, + self.itlb_miss_o, self.dtlb_miss_o + ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports() + + def elaborate(self, platform): + m = Module() + + # input registers + data_rvalid = Signal() + data_rdata = Signal(64) + + # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata + # is spec'd in 64-bit binary-format: better to spec as Record? + pte = PTE() + m.d.comb += pte.flatten().eq(data_rdata) + + # SV48 defines four levels of page tables + ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above) + ptw_lvl1 = Signal() + ptw_lvl2 = Signal() + ptw_lvl3 = Signal() + ptw_lvl4 = Signal() + m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1), + ptw_lvl2.eq(ptw_lvl == LVL2), + ptw_lvl3.eq(ptw_lvl == LVL3), + ptw_lvl4.eq(ptw_lvl == LVL4) + ] + + # is this an instruction page table walk? + is_instr_ptw = Signal() + global_mapping = Signal() + # latched tag signal + tag_valid = Signal() + # register the ASID + tlb_update_asid = Signal(self.asid_width) + # register VPN we need to walk, SV48 defines a 48 bit virtual addr + vaddr = Signal(64) + # 4 byte aligned physical pointer + ptw_pptr = Signal(56) + + end = DCACHE_INDEX_WIDTH + DCACHE_TAG_WIDTH + m.d.sync += [ + # Assignments + self.update_vaddr_o.eq(vaddr), + + self.walking_instr_o.eq(is_instr_ptw), + # directly output the correct physical address + self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]), + self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]), + # we are never going to kill this request + self.req_port_o.kill_req.eq(0), # XXX assign comb? + # we are never going to write with the HPTW + self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb? + # ----------- + # TLB Update + # ----------- + self.itlb_update_o.vpn.eq(vaddr[12:48]), + self.dtlb_update_o.vpn.eq(vaddr[12:48]), + # update the correct page table level + self.itlb_update_o.is_2M.eq(ptw_lvl3), + self.itlb_update_o.is_1G.eq(ptw_lvl2), + self.itlb_update_o.is_512G.eq(ptw_lvl1), + self.dtlb_update_o.is_2M.eq(ptw_lvl3), + self.dtlb_update_o.is_1G.eq(ptw_lvl2), + self.dtlb_update_o.is_512G.eq(ptw_lvl1), + + # output the correct ASID + self.itlb_update_o.asid.eq(tlb_update_asid), + self.dtlb_update_o.asid.eq(tlb_update_asid), + # set the global mapping bit + self.itlb_update_o.content.eq(pte), + self.itlb_update_o.content.g.eq(global_mapping), + self.dtlb_update_o.content.eq(pte), + self.dtlb_update_o.content.g.eq(global_mapping), + + self.req_port_o.tag_valid.eq(tag_valid), + ] + + #------------------- + # Page table walker #needs update + #------------------- + # A virtual address va is translated into a physical address pa as + # follows: + # 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv48, + # PAGESIZE=2^12 and LEVELS=4.) + # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. + # (For Sv32, PTESIZE=4.) + # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an + # access exception. + # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to + # step 5. Otherwise, this PTE is a pointer to the next level of + # the page table. + # Let i=i-1. If i < 0, stop and raise an access exception. + # Otherwise, let a = pte.ppn × PAGESIZE and go to step 2. + # 5. A leaf PTE has been found. Determine if the requested memory + # access is allowed by the pte.r, pte.w, and pte.x bits. If not, + # stop and raise an access exception. Otherwise, the translation is + # successful. Set pte.a to 1, and, if the memory access is a + # store, set pte.d to 1. + # The translated physical address is given as follows: + # - pa.pgoff = va.pgoff. + # - If i > 0, then this is a superpage translation and + # pa.ppn[i-1:0] = va.vpn[i-1:0]. + # - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i]. + # 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned + # superpage stop and raise a page-fault exception. + + m.d.sync += tag_valid.eq(0) + + # default assignments + m.d.comb += [ + # PTW memory interface + self.req_port_o.data_req.eq(0), + self.req_port_o.data_be.eq(Const(0xFF, 8)), + self.req_port_o.data_size.eq(Const(0b11, 2)), + self.req_port_o.data_we.eq(0), + self.ptw_error_o.eq(0), + self.itlb_update_o.valid.eq(0), + self.dtlb_update_o.valid.eq(0), + + self.itlb_miss_o.eq(0), + self.dtlb_miss_o.eq(0), + ] + + # ------------ + # State Machine + # ------------ + + with m.FSM() as fsm: + + with m.State("IDLE"): + self.idle(m, is_instr_ptw, ptw_lvl, global_mapping, + ptw_pptr, vaddr, tlb_update_asid) + + with m.State("WAIT_GRANT"): + self.grant(m, tag_valid, data_rvalid) + + with m.State("PTE_LOOKUP"): + # we wait for the valid signal + with m.If(data_rvalid): + self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, ptw_lvl4, + data_rvalid, global_mapping, + is_instr_ptw, ptw_pptr) + + # Propagate error to MMU/LSU + with m.State("PROPAGATE_ERROR"): + m.next = "IDLE" + m.d.comb += self.ptw_error_o.eq(1) + + # wait for the rvalid before going back to IDLE + with m.State("WAIT_RVALID"): + with m.If(data_rvalid): + m.next = "IDLE" + + m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata), + data_rvalid.eq(self.req_port_i.data_rvalid) + ] + + return m + + def set_grant_state(self, m): + # should we have flushed before we got an rvalid, + # wait for it until going back to IDLE + with m.If(self.flush_i): + with m.If (self.req_port_i.data_gnt): + m.next = "WAIT_RVALID" + with m.Else(): + m.next = "IDLE" + with m.Else(): + m.next = "WAIT_GRANT" + + def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping, + ptw_pptr, vaddr, tlb_update_asid): + # by default we start with the top-most page table + m.d.sync += [is_instr_ptw.eq(0), + ptw_lvl.eq(LVL1), + global_mapping.eq(0), + self.ptw_active_o.eq(0), # deactive (IDLE) + ] + # work out itlb/dtlb miss + m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \ + self.itlb_access_i & \ + ~self.itlb_hit_i & \ + ~self.dtlb_access_i) + m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \ + self.dtlb_access_i & \ + ~self.dtlb_hit_i) + # we got an ITLB miss? + with m.If(self.itlb_miss_o): + pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:48], + self.satp_ppn_i) + m.d.sync += [ptw_pptr.eq(pptr), + is_instr_ptw.eq(1), + vaddr.eq(self.itlb_vaddr_i), + tlb_update_asid.eq(self.asid_i), + ] + self.set_grant_state(m) + + # we got a DTLB miss? + with m.Elif(self.dtlb_miss_o): + pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:48], + self.satp_ppn_i) + m.d.sync += [ptw_pptr.eq(pptr), + vaddr.eq(self.dtlb_vaddr_i), + tlb_update_asid.eq(self.asid_i), + ] + self.set_grant_state(m) + + def grant(self, m, tag_valid, data_rvalid): + # we've got a data WAIT_GRANT so tell the + # cache that the tag is valid + + # send a request out + m.d.comb += self.req_port_o.data_req.eq(1) + # wait for the WAIT_GRANT + with m.If(self.req_port_i.data_gnt): + # send the tag valid signal one cycle later + m.d.sync += tag_valid.eq(1) + # should we have flushed before we got an rvalid, + # wait for it until going back to IDLE + with m.If(self.flush_i): + with m.If (~data_rvalid): + m.next = "WAIT_RVALID" + with m.Else(): + m.next = "IDLE" + with m.Else(): + m.next = "PTE_LOOKUP" + + def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, ptw_lvl4, + data_rvalid, global_mapping, + is_instr_ptw, ptw_pptr): + # temporaries + pte_rx = Signal(reset_less=True) + pte_exe = Signal(reset_less=True) + pte_inv = Signal(reset_less=True) + pte_a = Signal(reset_less=True) + st_wd = Signal(reset_less=True) + m.d.comb += [pte_rx.eq(pte.r | pte.x), + pte_exe.eq(~pte.x | ~pte.a), + pte_inv.eq(~pte.v | (~pte.r & pte.w)), + pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))), + st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))] + + l1err = Signal(reset_less=True) + l2err = Signal(reset_less=True) + l3err = Signal(reset_less=True) + m.d.comb += [l3err.eq((ptw_lvl3) & pte.ppn[0:9] != Const(0,0)), + l2err.eq((ptw_lvl2) & pte.ppn[0:18] != Const(0, 18)), + l1err.eq((ptw_lvl1) & pte.ppn[0:27] != Const(0, 27))] + + # check if the global mapping bit is set + with m.If (pte.g): + m.d.sync += global_mapping.eq(1) + + m.next = "IDLE" + + # ------------- + # Invalid PTE + # ------------- + # If pte.v = 0, or if pte.r = 0 and pte.w = 1, + # stop and raise a page-fault exception. + with m.If (pte_inv): + m.next = "PROPAGATE_ERROR" + + # ----------- + # Valid PTE + # ----------- + + # it is a valid PTE + # if pte.r = 1 or pte.x = 1 it is a valid PTE + with m.Elif (pte_rx): + # Valid translation found (either 1G, 2M or 4K) + with m.If(is_instr_ptw): + # ------------ + # Update ITLB + # ------------ + # If page not executable, we can directly raise error. + # This doesn't put a useless entry into the TLB. + # The same idea applies to the access flag since we let + # the access flag be managed by SW. + with m.If (pte_exe): + m.next = "IDLE" + with m.Else(): + m.d.comb += self.itlb_update_o.valid.eq(1) + + with m.Else(): + # ------------ + # Update DTLB + # ------------ + # Check if the access flag has been set, otherwise + # throw page-fault and let software handle those bits. + # If page not readable (there are no write-only pages) + # directly raise an error. This doesn't put a useless + # entry into the TLB. + with m.If(pte_a): + m.d.comb += self.dtlb_update_o.valid.eq(1) + with m.Else(): + m.next = "PROPAGATE_ERROR" + # Request is a store: perform additional checks + # If the request was a store and the page not + # write-able, raise an error + # the same applies if the dirty flag is not set + with m.If (st_wd): + m.d.comb += self.dtlb_update_o.valid.eq(0) + m.next = "PROPAGATE_ERROR" + + # check if the ppn is correctly aligned: Case (6) + with m.If(l1err | l2err | l3err): + m.next = "PROPAGATE_ERROR" + m.d.comb += [self.dtlb_update_o.valid.eq(0), + self.itlb_update_o.valid.eq(0)] + + # this is a pointer to the next TLB level + with m.Else(): + # pointer to next level of page table + with m.If (ptw_lvl1): + # we are in the second level now + pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39], pte.ppn) + m.d.sync += [ptw_pptr.eq(pptr), + ptw_lvl.eq(LVL2) + ] + with m.If(ptw_lvl2): + # here we received a pointer to the third level + pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn) + m.d.sync += [ptw_pptr.eq(pptr), + ptw_lvl.eq(LVL3) + ] + with m.If(ptw_lvl3): #guess: shift page levels by one + # here we received a pointer to the fourth level + # the last one is near the page offset + pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn) + m.d.sync += [ptw_pptr.eq(pptr), + ptw_lvl.eq(LVL4) + ] + self.set_grant_state(m) + + with m.If (ptw_lvl4): + # Should already be the last level + # page table => Error + m.d.sync += ptw_lvl.eq(LVL4) + m.next = "PROPAGATE_ERROR" + + +if __name__ == '__main__': + ptw = PTW() + vl = rtlil.convert(ptw, ports=ptw.ports()) + with open("test_ptw.il", "w") as f: + f.write(vl) diff --git a/src/soc/unused/TLB/ariane/test/test_plru.py b/src/soc/unused/TLB/ariane/test/test_plru.py new file mode 100644 index 00000000..9222d796 --- /dev/null +++ b/src/soc/unused/TLB/ariane/test/test_plru.py @@ -0,0 +1,13 @@ +import sys +from soc.TLB.ariane.plru import PLRU +from nmigen.compat.sim import run_simulation + + +def tbench(dut): + yield + + +if __name__ == "__main__": + dut = PLRU(4) + run_simulation(dut, tbench(dut), vcd_name="test_plru.vcd") + print("PLRU Unit Test Success") diff --git a/src/soc/unused/TLB/ariane/test/test_ptw.py b/src/soc/unused/TLB/ariane/test/test_ptw.py new file mode 100644 index 00000000..39697566 --- /dev/null +++ b/src/soc/unused/TLB/ariane/test/test_ptw.py @@ -0,0 +1,127 @@ +from nmigen.compat.sim import run_simulation +from soc.TLB.ariane.ptw import PTW, PTE + +# unit was changed, test needs to be changed + + +def tbench(dut): + + addr = 0x8000000 + + #pte = PTE() + # yield pte.v.eq(1) + # yield pte.r.eq(1) + + yield dut.req_port_i.data_gnt.eq(1) + yield dut.req_port_i.data_rvalid.eq(1) + yield dut.req_port_i.data_rdata.eq(0x43) # pte.flatten()) + + # data lookup + yield dut.en_ld_st_translation_i.eq(1) + yield dut.asid_i.eq(1) + + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(0x400000000) + + yield + yield + yield + + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(0x200000) + + yield + yield + yield + + yield dut.req_port_i.data_gnt.eq(0) + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(0x400000011) + + yield + yield dut.req_port_i.data_gnt.eq(1) + yield + yield + + # data lookup, PTW levels 1-2-3 + addr = 0x4000000 + yield dut.dtlb_vaddr_i.eq(addr) + yield dut.mxr_i.eq(0x1) + yield dut.req_port_i.data_gnt.eq(1) + yield dut.req_port_i.data_rvalid.eq(1) + # pte.flatten()) + yield dut.req_port_i.data_rdata.eq(0x41 | (addr >> 12) << 10) + + yield dut.en_ld_st_translation_i.eq(1) + yield dut.asid_i.eq(1) + + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(addr) + + yield + yield + yield + yield + yield + yield + yield + yield + + yield dut.req_port_i.data_gnt.eq(0) + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(0x400000011) + + yield + yield dut.req_port_i.data_gnt.eq(1) + yield + yield + yield + yield + + # instruction lookup + yield dut.en_ld_st_translation_i.eq(0) + yield dut.enable_translation_i.eq(1) + yield dut.asid_i.eq(1) + + yield dut.itlb_access_i.eq(1) + yield dut.itlb_hit_i.eq(0) + yield dut.itlb_vaddr_i.eq(0x800000) + + yield + yield + yield + + yield dut.itlb_access_i.eq(1) + yield dut.itlb_hit_i.eq(0) + yield dut.itlb_vaddr_i.eq(0x200000) + + yield + yield + yield + + yield dut.req_port_i.data_gnt.eq(0) + yield dut.itlb_access_i.eq(1) + yield dut.itlb_hit_i.eq(0) + yield dut.itlb_vaddr_i.eq(0x800011) + + yield + yield dut.req_port_i.data_gnt.eq(1) + yield + yield + + yield + + +def test_ptw(): + dut = PTW() + run_simulation(dut, tbench(dut), vcd_name="test_ptw.vcd") + print("PTW Unit Test Success") + + +if __name__ == "__main__": + test_ptw() diff --git a/src/soc/unused/TLB/ariane/test/test_tlb.py b/src/soc/unused/TLB/ariane/test/test_tlb.py new file mode 100644 index 00000000..e1b17b8b --- /dev/null +++ b/src/soc/unused/TLB/ariane/test/test_tlb.py @@ -0,0 +1,67 @@ +from nmigen.compat.sim import run_simulation + +from soc.TLB.ariane.tlb import TLB + + +def set_vaddr(addr): + yield dut.lu_vaddr_i.eq(addr) + yield dut.update_i.vpn.eq(addr >> 12) + + +def tbench(dut): + yield dut.lu_access_i.eq(1) + yield dut.lu_asid_i.eq(1) + yield dut.update_i.valid.eq(1) + yield dut.update_i.is_1G.eq(0) + yield dut.update_i.is_2M.eq(0) + yield dut.update_i.asid.eq(1) + yield dut.update_i.content.ppn.eq(0) + yield dut.update_i.content.rsw.eq(0) + yield dut.update_i.content.r.eq(1) + + yield + + addr = 0x80000 + yield from set_vaddr(addr) + yield + + addr = 0x90001 + yield from set_vaddr(addr) + yield + + addr = 0x28000000 + yield from set_vaddr(addr) + yield + + addr = 0x28000001 + yield from set_vaddr(addr) + + addr = 0x28000001 + yield from set_vaddr(addr) + yield + + addr = 0x1000040000 + yield from set_vaddr(addr) + yield + + addr = 0x1000040001 + yield from set_vaddr(addr) + yield + + yield dut.update_i.is_1G.eq(1) + addr = 0x2040000 + yield from set_vaddr(addr) + yield + + yield dut.update_i.is_1G.eq(1) + addr = 0x2040001 + yield from set_vaddr(addr) + yield + + yield + + +if __name__ == "__main__": + dut = TLB() + run_simulation(dut, tbench(dut), vcd_name="test_tlb.vcd") + print("TLB Unit Test Success") diff --git a/src/soc/unused/TLB/ariane/test/test_tlb_content.py b/src/soc/unused/TLB/ariane/test/test_tlb_content.py new file mode 100644 index 00000000..1bc60d88 --- /dev/null +++ b/src/soc/unused/TLB/ariane/test/test_tlb_content.py @@ -0,0 +1,63 @@ +from nmigen.compat.sim import run_simulation + +from soc.TLB.ariane.tlb_content import TLBContent +from soc.TestUtil.test_helper import assert_op, assert_eq + + +def update(dut, a, t, g, m): + yield dut.replace_en_i.eq(1) + yield dut.update_i.valid.eq(1) + yield dut.update_i.is_512G.eq(t) + yield dut.update_i.is_1G.eq(g) + yield dut.update_i.is_2M.eq(m) + yield dut.update_i.vpn.eq(a) + yield + yield + + +def check_hit(dut, hit, pagesize): + hit_d = yield dut.lu_hit_o + assert_eq("hit", hit_d, hit) + + if(hit): + if(pagesize == "t"): + hitp = yield dut.lu_is_512G_o + assert_eq("lu_is_512G_o", hitp, 1) + elif(pagesize == "g"): + hitp = yield dut.lu_is_1G_o + assert_eq("lu_is_1G_o", hitp, 1) + elif(pagesize == "m"): + hitp = yield dut.lu_is_2M_o + assert_eq("lu_is_2M_o", hitp, 1) + + +def addr(a, b, c, d): + return a | b << 9 | c << 18 | d << 27 + + +def tbench(dut): + yield dut.vpn0.eq(0x0A) + yield dut.vpn1.eq(0x0B) + yield dut.vpn2.eq(0x0C) + yield dut.vpn3.eq(0x0D) + yield from update(dut, addr(0xFF, 0xFF, 0xFF, 0x0D), 1, 0, 0) + yield from check_hit(dut, 1, "t") + + yield from update(dut, addr(0xFF, 0xFF, 0x0C, 0x0D), 0, 1, 0) + yield from check_hit(dut, 1, "g") + + yield from update(dut, addr(0xFF, 0x0B, 0x0C, 0x0D), 0, 0, 1) + yield from check_hit(dut, 1, "m") + + yield from update(dut, addr(0x0A, 0x0B, 0x0C, 0x0D), 0, 0, 0) + yield from check_hit(dut, 1, "") + + yield from update(dut, addr(0xAA, 0xBB, 0xCC, 0xDD), 0, 0, 0) + yield from check_hit(dut, 0, "miss") + + +if __name__ == "__main__": + dut = TLBContent(4, 4) + # + run_simulation(dut, tbench(dut), vcd_name="test_tlb_content.vcd") + print("TLBContent Unit Test Success") diff --git a/src/soc/unused/TLB/ariane/tlb.py b/src/soc/unused/TLB/ariane/tlb.py new file mode 100644 index 00000000..72b67a2d --- /dev/null +++ b/src/soc/unused/TLB/ariane/tlb.py @@ -0,0 +1,176 @@ +""" +# Copyright 2018 ETH Zurich and University of Bologna. +# Copyright and related rights are licensed under the Solderpad Hardware +# License, Version 0.51 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# or agreed to in writing, software, hardware and materials distributed under +# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Author: David Schaffenrath, TU Graz +# Author: Florian Zaruba, ETH Zurich +# Date: 21.4.2017 +# Description: Translation Lookaside Buffer, SV48 +# fully set-associative + +Implementation in c++: +https://raw.githubusercontent.com/Tony-Hu/TreePLRU/master/TreePLRU.cpp + +Text description: +https://people.cs.clemson.edu/~mark/464/p_lru.txt + +Online simulator: +http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/vm.html +""" +from math import log2 +from nmigen import Signal, Module, Cat, Const, Array, Elaboratable +from nmigen.cli import verilog, rtlil +from nmigen.lib.coding import Encoder + +from soc.TLB.ariane.ptw import TLBUpdate, PTE, ASID_WIDTH +from soc.TLB.ariane.plru import PLRU +from soc.TLB.ariane.tlb_content import TLBContent + +TLB_ENTRIES = 8 + + +class TLB(Elaboratable): + def __init__(self, tlb_entries=8, asid_width=8): + self.tlb_entries = tlb_entries + self.asid_width = asid_width + + self.flush_i = Signal() # Flush signal + # Lookup signals + self.lu_access_i = Signal() + self.lu_asid_i = Signal(self.asid_width) + self.lu_vaddr_i = Signal(64) + self.lu_content_o = PTE() + self.lu_is_2M_o = Signal() + self.lu_is_1G_o = Signal() + self.lu_is_512G_o = Signal() + self.lu_hit_o = Signal() + # Update TLB + self.pte_width = len(self.lu_content_o.flatten()) + self.update_i = TLBUpdate(asid_width) + + def elaborate(self, platform): + m = Module() + + vpn3 = Signal(9) # FIXME unused signal + vpn2 = Signal(9) + vpn1 = Signal(9) + vpn0 = Signal(9) + + # ------------- + # Translation + # ------------- + + # SV48 defines four levels of page tables + m.d.comb += [vpn0.eq(self.lu_vaddr_i[12:21]), + vpn1.eq(self.lu_vaddr_i[21:30]), + vpn2.eq(self.lu_vaddr_i[30:39]), + vpn3.eq(self.lu_vaddr_i[39:48]), # FIXME + ] + + tc = [] + for i in range(self.tlb_entries): + tlc = TLBContent(self.pte_width, self.asid_width) + setattr(m.submodules, "tc%d" % i, tlc) + tc.append(tlc) + # connect inputs + tlc.update_i = self.update_i # saves a lot of graphviz links + m.d.comb += [tlc.vpn0.eq(vpn0), + tlc.vpn1.eq(vpn1), + tlc.vpn2.eq(vpn2), + # TODO 4th + tlc.flush_i.eq(self.flush_i), + # tlc.update_i.eq(self.update_i), + tlc.lu_asid_i.eq(self.lu_asid_i)] + tc = Array(tc) + + # -------------- + # Select hit + # -------------- + + # use Encoder to select hit index + # XXX TODO: assert that there's only one valid entry (one lu_hit) + hitsel = Encoder(self.tlb_entries) + m.submodules.hitsel = hitsel + + hits = [] + for i in range(self.tlb_entries): + hits.append(tc[i].lu_hit_o) + m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well) + idx = hitsel.o + + active = Signal(reset_less=True) + m.d.comb += active.eq(~hitsel.n) + with m.If(active): + # active hit, send selected as output + m.d.comb += [self.lu_is_512G_o.eq(tc[idx].lu_is_512G_o), + self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o), + self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o), + self.lu_hit_o.eq(1), + self.lu_content_o.flatten().eq(tc[idx].lu_content_o), + ] + + # -------------- + # PLRU. + # -------------- + + p = PLRU(self.tlb_entries) + plru_tree = Signal(p.TLBSZ) + m.submodules.plru = p + + # connect PLRU inputs/outputs + # XXX TODO: assert that there's only one valid entry (one replace_en) + en = [] + for i in range(self.tlb_entries): + en.append(tc[i].replace_en_i) + m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags + p.lu_hit.eq(hitsel.i), + p.lu_access_i.eq(self.lu_access_i), + p.plru_tree.eq(plru_tree)] + m.d.sync += plru_tree.eq(p.plru_tree_o) + + # -------------- + # Sanity checks + # -------------- + + assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \ + "TLB size must be a multiple of 2 and greater than 1" + assert (self.asid_width >= 1), \ + "ASID width must be at least 1" + + return m + + """ + # Just for checking + function int countSetBits(logic[self.tlb_entries-1:0] vector); + automatic int count = 0; + foreach (vector[idx]) begin + count += vector[idx]; + end + return count; + endfunction + + assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1)) + else $error("More then one hit in TLB!"); $stop(); end + assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1)) + else $error("More then one TLB entry selected for next replace!"); + """ + + def ports(self): + return [self.flush_i, self.lu_access_i, + self.lu_asid_i, self.lu_vaddr_i, + self.lu_is_2M_o, self.lu_1G_o, self.lu_is_512G_o, self.lu_hit_o + ] + self.lu_content_o.ports() + self.update_i.ports() + + +if __name__ == '__main__': + tlb = TLB() + vl = rtlil.convert(tlb, ports=tlb.ports()) + with open("test_tlb.il", "w") as f: + f.write(vl) diff --git a/src/soc/unused/TLB/ariane/tlb_content.py b/src/soc/unused/TLB/ariane/tlb_content.py new file mode 100644 index 00000000..bfd17c13 --- /dev/null +++ b/src/soc/unused/TLB/ariane/tlb_content.py @@ -0,0 +1,143 @@ +from nmigen import Signal, Module, Cat, Const, Elaboratable + +from soc.TLB.ariane.ptw import TLBUpdate, PTE + + +class TLBEntry: + def __init__(self, asid_width): + self.asid = Signal(asid_width, name="ent_asid") + # SV48 defines four levels of page tables + self.vpn0 = Signal(9, name="ent_vpn0") + self.vpn1 = Signal(9, name="ent_vpn1") + self.vpn2 = Signal(9, name="ent_vpn2") + self.vpn3 = Signal(9, name="ent_vpn3") + self.is_2M = Signal(name="ent_is_2M") + self.is_1G = Signal(name="ent_is_1G") + self.is_512G = Signal(name="ent_is_512G") + self.valid = Signal(name="ent_valid") + + def flatten(self): + return Cat(*self.ports()) + + def eq(self, x): + return self.flatten().eq(x.flatten()) + + def ports(self): + return [self.asid, self.vpn0, self.vpn1, self.vpn2, + self.is_2M, self.is_1G, self.valid] + + +class TLBContent(Elaboratable): + def __init__(self, pte_width, asid_width): + self.asid_width = asid_width + self.pte_width = pte_width + self.flush_i = Signal() # Flush signal + # Update TLB + self.update_i = TLBUpdate(asid_width) + self.vpn3 = Signal(9) + self.vpn2 = Signal(9) + self.vpn1 = Signal(9) + self.vpn0 = Signal(9) + self.replace_en_i = Signal() # replace the following entry, + # set by replacement strategy + # Lookup signals + self.lu_asid_i = Signal(asid_width) + self.lu_content_o = Signal(pte_width) + self.lu_is_512G_o = Signal() + self.lu_is_2M_o = Signal() + self.lu_is_1G_o = Signal() + self.lu_hit_o = Signal() + + def elaborate(self, platform): + m = Module() + + tags = TLBEntry(self.asid_width) + + content = Signal(self.pte_width) + + m.d.comb += [self.lu_hit_o.eq(0), + self.lu_is_512G_o.eq(0), + self.lu_is_2M_o.eq(0), + self.lu_is_1G_o.eq(0)] + + # temporaries for lookup + asid_ok = Signal(reset_less=True) + # tags_ok = Signal(reset_less=True) + + vpn3_ok = Signal(reset_less=True) + vpn2_ok = Signal(reset_less=True) + vpn1_ok = Signal(reset_less=True) + vpn0_ok = Signal(reset_less=True) + + #tags_2M = Signal(reset_less=True) + vpn0_or_2M = Signal(reset_less=True) + + m.d.comb += [ + # compare asid and vpn* + asid_ok.eq(tags.asid == self.lu_asid_i), + vpn3_ok.eq(tags.vpn3 == self.vpn3), + vpn2_ok.eq(tags.vpn2 == self.vpn2), + vpn1_ok.eq(tags.vpn1 == self.vpn1), + vpn0_ok.eq(tags.vpn0 == self.vpn0), + vpn0_or_2M.eq(tags.is_2M | vpn0_ok) + ] + + with m.If(asid_ok & tags.valid): + # first level, only vpn3 needs to match + with m.If(tags.is_512G & vpn3_ok): + m.d.comb += [self.lu_content_o.eq(content), + self.lu_is_512G_o.eq(1), + self.lu_hit_o.eq(1), + ] + # second level , second level vpn2 and vpn3 need to match + with m.Elif(tags.is_1G & vpn2_ok & vpn3_ok): + m.d.comb += [self.lu_content_o.eq(content), + self.lu_is_1G_o.eq(1), + self.lu_hit_o.eq(1), + ] + # not a giga page hit nor a tera page hit so check further + with m.Elif(vpn1_ok): + # this could be a 2 mega page hit or a 4 kB hit + # output accordingly + with m.If(vpn0_or_2M): + m.d.comb += [self.lu_content_o.eq(content), + self.lu_is_2M_o.eq(tags.is_2M), + self.lu_hit_o.eq(1), + ] + # ------------------ + # Update or Flush + # ------------------ + + # temporaries + replace_valid = Signal(reset_less=True) + m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i) + + # flush + with m.If(self.flush_i): + # invalidate (flush) conditions: all if zero or just this ASID + with m.If(self.lu_asid_i == Const(0, self.asid_width) | + (self.lu_asid_i == tags.asid)): + m.d.sync += tags.valid.eq(0) + + # normal replacement + with m.Elif(replace_valid): + m.d.sync += [ # update tag array + tags.asid.eq(self.update_i.asid), + tags.vpn3.eq(self.update_i.vpn[27:36]), + tags.vpn2.eq(self.update_i.vpn[18:27]), + tags.vpn1.eq(self.update_i.vpn[9:18]), + tags.vpn0.eq(self.update_i.vpn[0:9]), + tags.is_512G.eq(self.update_i.is_512G), + tags.is_1G.eq(self.update_i.is_1G), + tags.is_2M.eq(self.update_i.is_2M), + tags.valid.eq(1), + # and content as well + content.eq(self.update_i.content.flatten()) + ] + return m + + def ports(self): + return [self.flush_i, + self.lu_asid_i, + self.lu_is_2M_o, self.lu_is_1G_o, self.lu_is_512G_o, self.lu_hit_o, + ] + self.update_i.content.ports() + self.update_i.ports() diff --git a/src/soc/unused/TLB/test/__init__.py b/src/soc/unused/TLB/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/soc/unused/TLB/test/test_LFSR2.py b/src/soc/unused/TLB/test/test_LFSR2.py new file mode 100644 index 00000000..33208f83 --- /dev/null +++ b/src/soc/unused/TLB/test/test_LFSR2.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# See Notices.txt for copyright information +from soc.TLB.LFSR import LFSR, LFSRPolynomial, LFSR_POLY_3 + +from nmigen.back.pysim import Simulator, Delay, Tick +import unittest + + +class TestLFSR(unittest.TestCase): + def test_poly(self): + v = LFSRPolynomial() + self.assertEqual(repr(v), "LFSRPolynomial([0])") + self.assertEqual(str(v), "1") + v = LFSRPolynomial([1]) + self.assertEqual(repr(v), "LFSRPolynomial([1, 0])") + self.assertEqual(str(v), "x + 1") + v = LFSRPolynomial([0, 1]) + self.assertEqual(repr(v), "LFSRPolynomial([1, 0])") + self.assertEqual(str(v), "x + 1") + v = LFSRPolynomial([1, 2]) + self.assertEqual(repr(v), "LFSRPolynomial([2, 1, 0])") + self.assertEqual(str(v), "x^2 + x + 1") + v = LFSRPolynomial([2]) + self.assertEqual(repr(v), "LFSRPolynomial([2, 0])") + self.assertEqual(str(v), "x^2 + 1") + self.assertEqual(str(LFSR_POLY_3), "x^3 + x^2 + 1") + + def test_lfsr_3(self): + module = LFSR(LFSR_POLY_3) + traces = [module.state, module.enable] + with Simulator(module, + vcd_file=open("Waveforms/test_LFSR2.vcd", "w"), + gtkw_file=open("Waveforms/test_LFSR2.gtkw", "w"), + traces=traces) as sim: + sim.add_clock(1e-6, phase=0.25e-6) + delay = Delay(1e-7) + + def async_process(): + yield module.enable.eq(0) + yield Tick() + self.assertEqual((yield module.state), 0x1) + yield Tick() + self.assertEqual((yield module.state), 0x1) + yield module.enable.eq(1) + yield Tick() + yield delay + self.assertEqual((yield module.state), 0x2) + yield Tick() + yield delay + self.assertEqual((yield module.state), 0x5) + yield Tick() + yield delay + self.assertEqual((yield module.state), 0x3) + yield Tick() + yield delay + self.assertEqual((yield module.state), 0x7) + yield Tick() + yield delay + self.assertEqual((yield module.state), 0x6) + yield Tick() + yield delay + self.assertEqual((yield module.state), 0x4) + yield Tick() + yield delay + self.assertEqual((yield module.state), 0x1) + yield Tick() + + sim.add_process(async_process) + sim.run() diff --git a/src/soc/unused/TLB/test/test_address_encoder.py b/src/soc/unused/TLB/test/test_address_encoder.py new file mode 100644 index 00000000..70d435d6 --- /dev/null +++ b/src/soc/unused/TLB/test/test_address_encoder.py @@ -0,0 +1,116 @@ +from nmigen.compat.sim import run_simulation +from soc.TLB.AddressEncoder import AddressEncoder +from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op + + +# This function allows for the easy setting of values to the AddressEncoder +# Arguments: +# dut: The AddressEncoder being tested +# i (Input): The array of single bits to be written +def set_encoder(dut, i): + yield dut.i.eq(i) + yield + +# Checks the single match of the AddressEncoder +# Arguments: +# dut: The AddressEncoder being tested +# sm (Single Match): The expected match result +# op (Operation): (0 => ==), (1 => !=) + + +def check_single_match(dut, sm, op): + out_sm = yield dut.single_match + assert_op("Single Match", out_sm, sm, op) + +# Checks the multiple match of the AddressEncoder +# Arguments: +# dut: The AddressEncoder being tested +# mm (Multiple Match): The expected match result +# op (Operation): (0 => ==), (1 => !=) + + +def check_multiple_match(dut, mm, op): + out_mm = yield dut.multiple_match + assert_op("Multiple Match", out_mm, mm, op) + +# Checks the output of the AddressEncoder +# Arguments: +# dut: The AddressEncoder being tested +# o (Output): The expected output +# op (Operation): (0 => ==), (1 => !=) + + +def check_output(dut, o, op): + out_o = yield dut.o + assert_op("Output", out_o, o, op) + +# Checks the state of the AddressEncoder +# Arguments: +# dut: The AddressEncoder being tested +# sm (Single Match): The expected match result +# mm (Multiple Match): The expected match result +# o (Output): The expected output +# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) +# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) +# o_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) + + +def check_all(dut, sm, mm, o, sm_op, mm_op, o_op): + yield from check_single_match(dut, sm, sm_op) + yield from check_multiple_match(dut, mm, mm_op) + yield from check_output(dut, o, o_op) + + +def tbench(dut): + # Check invalid input + in_val = 0b000 + single_match = 0 + multiple_match = 0 + output = 0 + yield from set_encoder(dut, in_val) + yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) + + # Check single bit + in_val = 0b001 + single_match = 1 + multiple_match = 0 + output = 0 + yield from set_encoder(dut, in_val) + yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) + + # Check another single bit + in_val = 0b100 + single_match = 1 + multiple_match = 0 + output = 2 + yield from set_encoder(dut, in_val) + yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) + + # Check multiple match + # We expected the lowest bit to be returned which is address 0 + in_val = 0b101 + single_match = 0 + multiple_match = 1 + output = 0 + yield from set_encoder(dut, in_val) + yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) + + # Check another multiple match + # We expected the lowest bit to be returned which is address 1 + in_val = 0b110 + single_match = 0 + multiple_match = 1 + output = 1 + yield from set_encoder(dut, in_val) + yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) + + +def test_addr(): + dut = AddressEncoder(4) + run_simulation(dut, tbench(dut), + vcd_name="Waveforms/test_address_encoder.vcd") + print("AddressEncoder Unit Test Success") + + +if __name__ == "__main__": + test_addr() diff --git a/src/soc/unused/TLB/test/test_cam.py b/src/soc/unused/TLB/test/test_cam.py new file mode 100644 index 00000000..d11cd974 --- /dev/null +++ b/src/soc/unused/TLB/test/test_cam.py @@ -0,0 +1,218 @@ +from nmigen.compat.sim import run_simulation + +from soc.TLB.Cam import Cam + +from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op + +# This function allows for the easy setting of values to the Cam +# Arguments: +# dut: The Cam being tested +# e (Enable): Whether the block is going to be enabled +# we (Write Enable): Whether the Cam will write on the next cycle +# a (Address): Where the data will be written if write enable is high +# d (Data): Either what we are looking for or will write to the address + + +def set_cam(dut, e, we, a, d): + yield dut.enable.eq(e) + yield dut.write_enable.eq(we) + yield dut.address_in.eq(a) + yield dut.data_in.eq(d) + yield + +# Checks the multiple match of the Cam +# Arguments: +# dut: The Cam being tested +# mm (Multiple Match): The expected match result +# op (Operation): (0 => ==), (1 => !=) + + +def check_multiple_match(dut, mm, op): + out_mm = yield dut.multiple_match + assert_op("Multiple Match", out_mm, mm, op) + +# Checks the single match of the Cam +# Arguments: +# dut: The Cam being tested +# sm (Single Match): The expected match result +# op (Operation): (0 => ==), (1 => !=) + + +def check_single_match(dut, sm, op): + out_sm = yield dut.single_match + assert_op("Single Match", out_sm, sm, op) + +# Checks the address output of the Cam +# Arguments: +# dut: The Cam being tested +# ma (Match Address): The expected match result +# op (Operation): (0 => ==), (1 => !=) + + +def check_match_address(dut, ma, op): + out_ma = yield dut.match_address + assert_op("Match Address", out_ma, ma, op) + +# Checks the state of the Cam +# Arguments: +# dut: The Cam being tested +# sm (Single Match): The expected match result +# mm (Multiple Match): The expected match result +# ma: (Match Address): The expected address output +# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) +# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) +# ma_op (Operation): Operation for the address assertion (0 => ==), (1 => !=) + + +def check_all(dut, mm, sm, ma, mm_op, sm_op, ma_op): + yield from check_multiple_match(dut, mm, mm_op) + yield from check_single_match(dut, sm, sm_op) + yield from check_match_address(dut, ma, ma_op) + + +def tbench(dut): + # NA + enable = 0 + write_enable = 0 + address = 0 + data = 0 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_single_match(dut, single_match, 0) + + # Read Miss Multiple + # Note that the default starting entry data bits are all 0 + enable = 1 + write_enable = 0 + address = 0 + data = 0 + multiple_match = 1 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_multiple_match(dut, multiple_match, 0) + + # Read Miss + # Note that the default starting entry data bits are all 0 + enable = 1 + write_enable = 0 + address = 0 + data = 1 + multiple_match = 0 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_single_match(dut, single_match, 0) + + # Write Entry 0 + enable = 1 + write_enable = 1 + address = 0 + data = 4 + multiple_match = 0 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_single_match(dut, single_match, 0) + + # Read Hit Entry 0 + enable = 1 + write_enable = 0 + address = 0 + data = 4 + multiple_match = 0 + single_match = 1 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0) + + # Search Hit + enable = 1 + write_enable = 0 + address = 0 + data = 4 + multiple_match = 0 + single_match = 1 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0) + + # Search Miss + enable = 1 + write_enable = 0 + address = 0 + data = 5 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_single_match(dut, single_match, 0) + + # Multiple Match test + # Write Entry 1 + enable = 1 + write_enable = 1 + address = 1 + data = 5 + multiple_match = 0 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_single_match(dut, single_match, 0) + + # Write Entry 2 + # Same data as Entry 1 + enable = 1 + write_enable = 1 + address = 2 + data = 5 + multiple_match = 0 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_single_match(dut, single_match, 0) + + # Read Hit Data 5 + enable = 1 + write_enable = 0 + address = 1 + data = 5 + multiple_match = 1 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0) + + # Verify read_warning is not caused + # Write Entry 0 + enable = 1 + write_enable = 1 + address = 0 + data = 7 + multiple_match = 0 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + # Note there is no yield we immediately attempt to read in the next cycle + + # Read Hit Data 7 + enable = 1 + write_enable = 0 + address = 0 + data = 7 + multiple_match = 0 + single_match = 1 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_single_match(dut, single_match, 0) + + yield + + +def test_cam(): + dut = Cam(4, 4) + run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_cam.vcd") + print("Cam Unit Test Success") + + +if __name__ == "__main__": + test_cam() diff --git a/src/soc/unused/TLB/test/test_cam_entry.py b/src/soc/unused/TLB/test/test_cam_entry.py new file mode 100644 index 00000000..961445b6 --- /dev/null +++ b/src/soc/unused/TLB/test/test_cam_entry.py @@ -0,0 +1,119 @@ +from nmigen.compat.sim import run_simulation + +from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op +from soc.TLB.CamEntry import CamEntry + +# This function allows for the easy setting of values to the Cam Entry +# Arguments: +# dut: The CamEntry being tested +# c (command): NA (0), Read (1), Write (2), Reserve (3) +# d (data): The data to be set + + +def set_cam_entry(dut, c, d): + # Write desired values + yield dut.command.eq(c) + yield dut.data_in.eq(d) + yield + # Reset all lines + yield dut.command.eq(0) + yield dut.data_in.eq(0) + yield + +# Checks the data state of the CAM entry +# Arguments: +# dut: The CamEntry being tested +# d (Data): The expected data +# op (Operation): (0 => ==), (1 => !=) + + +def check_data(dut, d, op): + out_d = yield dut.data + assert_op("Data", out_d, d, op) + +# Checks the match state of the CAM entry +# Arguments: +# dut: The CamEntry being tested +# m (Match): The expected match +# op (Operation): (0 => ==), (1 => !=) + + +def check_match(dut, m, op): + out_m = yield dut.match + assert_op("Match", out_m, m, op) + +# Checks the state of the CAM entry +# Arguments: +# dut: The CamEntry being tested +# d (data): The expected data +# m (match): The expected match +# d_op (Operation): Operation for the data assertion (0 => ==), (1 => !=) +# m_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) + + +def check_all(dut, d, m, d_op, m_op): + yield from check_data(dut, d, d_op) + yield from check_match(dut, m, m_op) + +# This tbench goes through the paces of testing the CamEntry module +# It is done by writing and then reading various combinations of key/data pairs +# and reading the results with varying keys to verify the resulting stored +# data is correct. + + +def tbench(dut): + # Check write + command = 2 + data = 1 + match = 0 + yield from set_cam_entry(dut, command, data) + yield from check_all(dut, data, match, 0, 0) + + # Check read miss + command = 1 + data = 2 + match = 0 + yield from set_cam_entry(dut, command, data) + yield from check_all(dut, data, match, 1, 0) + + # Check read hit + command = 1 + data = 1 + match = 1 + yield from set_cam_entry(dut, command, data) + yield from check_all(dut, data, match, 0, 0) + + # Check overwrite + command = 2 + data = 5 + match = 0 + yield from set_cam_entry(dut, command, data) + yield + yield from check_all(dut, data, match, 0, 0) + + # Check read hit + command = 1 + data = 5 + match = 1 + yield from set_cam_entry(dut, command, data) + yield from check_all(dut, data, match, 0, 0) + + # Check reset + command = 3 + data = 0 + match = 0 + yield from set_cam_entry(dut, command, data) + yield from check_all(dut, data, match, 0, 0) + + # Extra clock cycle for waveform + yield + + +def test_camentry(): + dut = CamEntry(4) + run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_cam_entry.vcd") + print("CamEntry Unit Test Success") + + +if __name__ == "__main__": + test_camentry() diff --git a/src/soc/unused/TLB/test/test_permission_validator.py b/src/soc/unused/TLB/test/test_permission_validator.py new file mode 100644 index 00000000..b52b5459 --- /dev/null +++ b/src/soc/unused/TLB/test/test_permission_validator.py @@ -0,0 +1,150 @@ +from nmigen.compat.sim import run_simulation + +from soc.TLB.PermissionValidator import PermissionValidator + +from soc.TestUtil.test_helper import assert_op + + +def set_validator(dut, d, xwr, sm, sa, asid): + yield dut.data.eq(d) + yield dut.xwr.eq(xwr) + yield dut.super_mode.eq(sm) + yield dut.super_access.eq(sa) + yield dut.asid.eq(asid) + yield + + +def check_valid(dut, v, op): + out_v = yield dut.valid + assert_op("Valid", out_v, v, op) + + +def tbench(dut): + # 80 bits represented. Ignore the MSB as it will be truncated + # ASID is bits first 4 hex values (bits 64 - 78) + + # Test user mode entry valid + # Global Bit matching ASID + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000031 + # Ignore MSB it will be truncated + asid = 0x7FFF + super_mode = 0 + super_access = 0 + xwr = 0 + valid = 1 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test user mode entry valid + # Global Bit nonmatching ASID + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000031 + # Ignore MSB it will be truncated + asid = 0x7FF6 + super_mode = 0 + super_access = 0 + xwr = 0 + valid = 1 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test user mode entry invalid + # Global Bit nonmatching ASID + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000021 + # Ignore MSB it will be truncated + asid = 0x7FF6 + super_mode = 0 + super_access = 0 + xwr = 0 + valid = 0 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test user mode entry valid + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000011 + # Ignore MSB it will be truncated + asid = 0x7FFF + super_mode = 0 + super_access = 0 + xwr = 0 + valid = 1 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test user mode entry invalid + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000011 + # Ignore MSB it will be truncated + asid = 0x7FF6 + super_mode = 0 + super_access = 0 + xwr = 0 + valid = 0 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test supervisor mode entry valid + # The entry is NOT in user mode + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000001 + # Ignore MSB it will be truncated + asid = 0x7FFF + super_mode = 1 + super_access = 0 + xwr = 0 + valid = 1 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test supervisor mode entry invalid + # The entry is in user mode + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000011 + # Ignore MSB it will be truncated + asid = 0x7FFF + super_mode = 1 + super_access = 0 + xwr = 0 + valid = 0 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test supervisor mode entry valid + # The entry is NOT in user mode with access + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000001 + # Ignore MSB it will be truncated + asid = 0x7FFF + super_mode = 1 + super_access = 1 + xwr = 0 + valid = 1 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test supervisor mode entry valid + # The entry is in user mode with access + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000011 + # Ignore MSB it will be truncated + asid = 0x7FFF + super_mode = 1 + super_access = 1 + xwr = 0 + valid = 1 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + +def test_permv(): + dut = PermissionValidator(15, 64) + run_simulation(dut, tbench( + dut), vcd_name="Waveforms/test_permission_validator.vcd") + print("PermissionValidator Unit Test Success") + + +if __name__ == "__main__": + test_permv() diff --git a/src/soc/unused/TLB/test/test_pte_entry.py b/src/soc/unused/TLB/test/test_pte_entry.py new file mode 100644 index 00000000..51b3dcf0 --- /dev/null +++ b/src/soc/unused/TLB/test/test_pte_entry.py @@ -0,0 +1,114 @@ +from nmigen.compat.sim import run_simulation + +from soc.TLB.PteEntry import PteEntry + +from soc.TestUtil.test_helper import assert_op + + +def set_entry(dut, i): + yield dut.i.eq(i) + yield + + +def check_dirty(dut, d, op): + out_d = yield dut.d + assert_op("Dirty", out_d, d, op) + + +def check_accessed(dut, a, op): + out_a = yield dut.a + assert_op("Accessed", out_a, a, op) + + +def check_global(dut, o, op): + out = yield dut.g + assert_op("Global", out, o, op) + + +def check_user(dut, o, op): + out = yield dut.u + assert_op("User Mode", out, o, op) + + +def check_xwr(dut, o, op): + out = yield dut.xwr + assert_op("XWR", out, o, op) + + +def check_asid(dut, o, op): + out = yield dut.asid + assert_op("ASID", out, o, op) + + +def check_pte(dut, o, op): + out = yield dut.pte + assert_op("ASID", out, o, op) + + +def check_valid(dut, v, op): + out_v = yield dut.v + assert_op("Valid", out_v, v, op) + + +def check_all(dut, d, a, g, u, xwr, v, asid, pte): + yield from check_dirty(dut, d, 0) + yield from check_accessed(dut, a, 0) + yield from check_global(dut, g, 0) + yield from check_user(dut, u, 0) + yield from check_xwr(dut, xwr, 0) + yield from check_asid(dut, asid, 0) + yield from check_pte(dut, pte, 0) + yield from check_valid(dut, v, 0) + + +def tbench(dut): + # 80 bits represented. Ignore the MSB as it will be truncated + # ASID is bits first 4 hex values (bits 64 - 78) + + i = 0x7FFF0000000000000031 + dirty = 0 + access = 0 + glob = 1 + user = 1 + xwr = 0 + valid = 1 + asid = 0x7FFF + pte = 0x0000000000000031 + yield from set_entry(dut, i) + yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte) + + i = 0x0FFF00000000000000FF + dirty = 1 + access = 1 + glob = 1 + user = 1 + xwr = 7 + valid = 1 + asid = 0x0FFF + pte = 0x00000000000000FF + yield from set_entry(dut, i) + yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte) + + i = 0x0721000000001100001F + dirty = 0 + access = 0 + glob = 0 + user = 1 + xwr = 7 + valid = 1 + asid = 0x0721 + pte = 0x000000001100001F + yield from set_entry(dut, i) + yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte) + + yield + + +def test_pteentry(): + dut = PteEntry(15, 64) + run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_pte_entry.vcd") + print("PteEntry Unit Test Success") + + +if __name__ == "__main__": + test_pteentry() diff --git a/src/soc/unused/TLB/test/test_set_associative_cache.py b/src/soc/unused/TLB/test/test_set_associative_cache.py new file mode 100644 index 00000000..edec055b --- /dev/null +++ b/src/soc/unused/TLB/test/test_set_associative_cache.py @@ -0,0 +1,43 @@ +from nmigen.compat.sim import run_simulation + +from soc.TLB.SetAssociativeCache import SetAssociativeCache + +from soc.TestUtil.test_helper import assert_eq, assert_ne, assert_op + + +def set_sac(dut, e, c, s, t, d): + yield dut.enable.eq(e) + yield dut.command.eq(c) + yield dut.cset.eq(s) + yield dut.tag.eq(t) + yield dut.data_i.eq(d) + yield + + +def tbench(dut): + enable = 1 + command = 2 + cset = 1 + tag = 2 + data = 3 + yield from set_sac(dut, enable, command, cset, tag, data) + yield + + enable = 1 + command = 2 + cset = 1 + tag = 5 + data = 8 + yield from set_sac(dut, enable, command, cset, tag, data) + yield + + +def test_assoc_cache(): + dut = SetAssociativeCache(4, 4, 4, 4) + run_simulation(dut, tbench( + dut), vcd_name="Waveforms/test_set_associative_cache.vcd") + print("Set Associative Cache Unit Test Success") + + +if __name__ == "__main__": + test_assoc_cache() diff --git a/src/soc/unused/TLB/test/test_tlb.py b/src/soc/unused/TLB/test/test_tlb.py new file mode 100644 index 00000000..38656623 --- /dev/null +++ b/src/soc/unused/TLB/test/test_tlb.py @@ -0,0 +1,86 @@ +#import tracemalloc +# tracemalloc.start() + +from nmigen.compat.sim import run_simulation + +from soc.TLB.TLB import TLB + +from soc.TestUtil.test_helper import assert_op, assert_eq + +# self.supermode = Signal(1) # Supervisor Mode +# self.super_access = Signal(1) # Supervisor Access +# self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2 +# self.xwr = Signal(3) # Execute, Write, Read +# self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64 +#self.address_L1 = Signal(range(L1_size)) +# self.asid = Signal(asid_size) # Address Space IDentifier (ASID) +# self.vma = Signal(vma_size) # Virtual Memory Address (VMA) +# self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE) +# +# self.hit = Signal(1) # Denotes if the VMA had a mapped PTE +# self.perm_valid = Signal(1) # Denotes if the permissions are correct +# self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA + +COMMAND_READ = 1 +COMMAND_WRITE_L1 = 2 + +# Checks the data state of the CAM entry +# Arguments: +# dut: The CamEntry being tested +# d (Data): The expected data +# op (Operation): (0 => ==), (1 => !=) + + +def check_hit(dut, d): + hit_d = yield dut.hit + #assert_eq("hit", hit_d, d) + + +def tst_command(dut, cmd, xwr, cycles): + yield dut.command.eq(cmd) + yield dut.xwr.eq(xwr) + for i in range(0, cycles): + yield + + +def tst_write_L1(dut, vma, address_L1, asid, pte_in): + yield dut.address_L1.eq(address_L1) + yield dut.asid.eq(asid) + yield dut.vma.eq(vma) + yield dut.pte_in.eq(pte_in) + yield from tst_command(dut, COMMAND_WRITE_L1, 7, 2) + + +def tst_search(dut, vma, found): + yield dut.vma.eq(vma) + yield from tst_command(dut, COMMAND_READ, 7, 1) + yield from check_hit(dut, found) + + +def zero(dut): + yield dut.supermode.eq(0) + yield dut.super_access.eq(0) + yield dut.mode.eq(0) + yield dut.address_L1.eq(0) + yield dut.asid.eq(0) + yield dut.vma.eq(0) + yield dut.pte_in.eq(0) + + +def tbench(dut): + yield from zero(dut) + yield dut.mode.eq(0xF) # enable TLB + # test hit + yield from tst_write_L1(dut, 0xFEEDFACE, 0, 0xFFFF, 0xF0F0) + yield from tst_search(dut, 0xFEEDFACE, 1) + yield from tst_search(dut, 0xFACEFEED, 0) + + +def test_tlb(): + dut = TLB(15, 36, 64, 8) + run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_tlb.vcd") + print("TLB Unit Test Success") + + +if __name__ == "__main__": + test_tlb() diff --git a/src/soc/unused/iommu/axi_rab/axi4_ar_buffer.py b/src/soc/unused/iommu/axi_rab/axi4_ar_buffer.py new file mode 100644 index 00000000..1f3a5ff3 --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/axi4_ar_buffer.py @@ -0,0 +1,135 @@ +# Copyright 2018 ETH Zurich and University of Bologna. +# Copyright and related rights are licensed under the Solderpad Hardware +# License, Version 0.51 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# or agreed to in writing, software, hardware and materials distributed under +# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. + +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + +# module axi4_ar_buffer +# #( +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# input logic [AXI_ID_WIDTH-1:0] s_axi4_arid, +# input logic [31:0] s_axi4_araddr, +# input logic s_axi4_arvalid, +# output logic s_axi4_arready, +# input logic [7:0] s_axi4_arlen, +# input logic [2:0] s_axi4_arsize, +# input logic [1:0] s_axi4_arburst, +# input logic s_axi4_arlock, +# input logic [2:0] s_axi4_arprot, +# input logic [3:0] s_axi4_arcache, +# input logic [AXI_USER_WIDTH-1:0] s_axi4_aruser, +# +# output logic [AXI_ID_WIDTH-1:0] m_axi4_arid, +# output logic [31:0] m_axi4_araddr, +# output logic m_axi4_arvalid, +# input logic m_axi4_arready, +# output logic [7:0] m_axi4_arlen, +# output logic [2:0] m_axi4_arsize, +# output logic [1:0] m_axi4_arburst, +# output logic m_axi4_arlock, +# output logic [2:0] m_axi4_arprot, +# output logic [3:0] m_axi4_arcache, +# output logic [AXI_USER_WIDTH-1:0] m_axi4_aruser +# ); + + +class axi4_ar_buffer(Elaboratable): + + def __init__(self): + # self.axi4_aclk = Signal() # input + # self.axi4_arstn = Signal() # input + self.s_axi4_arid = Signal(AXI_ID_WIDTH) # input + self.s_axi4_araddr = Signal(32) # input + self.s_axi4_arvalid = Signal() # input + self.s_axi4_arready = Signal() # output + self.s_axi4_arlen = Signal(8) # input + self.s_axi4_arsize = Signal(3) # input + self.s_axi4_arburst = Signal(2) # input + self.s_axi4_arlock = Signal() # input + self.s_axi4_arprot = Signal(3) # input + self.s_axi4_arcache = Signal(4) # input + self.s_axi4_aruser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_arid = Signal(AXI_ID_WIDTH) # output + self.m_axi4_araddr = Signal(32) # output + self.m_axi4_arvalid = Signal() # output + self.m_axi4_arready = Signal() # input + self.m_axi4_arlen = Signal(8) # output + self.m_axi4_arsize = Signal(3) # output + self.m_axi4_arburst = Signal(2) # output + self.m_axi4_arlock = Signal() # output + self.m_axi4_arprot = Signal(3) # output + self.m_axi4_arcache = Signal(4) # output + self.m_axi4_aruser = Signal(AXI_USER_WIDTH) # output + + def elaborate(self, platform=None): + m = Module() + # #TODO use record types here + # wire [AXI_ID_WIDTH+AXI_USER_WIDTH+52:0] data_in; + # wire [AXI_ID_WIDTH+AXI_USER_WIDTH+52:0] data_out; + + # assign data_in [3:0] = s_axi4_arcache; + # assign data_in [6:4] = s_axi4_arprot; + # assign data_in [7] = s_axi4_arlock; + # assign data_in [9:8] = s_axi4_arburst; + # assign data_in [12:10] = s_axi4_arsize; + # assign data_in [20:13] = s_axi4_arlen; + # assign data_in [52:21] = s_axi4_araddr; + # assign data_in [52+AXI_ID_WIDTH:53] = s_axi4_arid; + # assign data_in[52+AXI_ID_WIDTH+AXI_USER_WIDTH:53+AXI_ID_WIDTH] = s_axi4_aruser; + # + # assign m_axi4_arcache = data_out[3:0]; + # assign m_axi4_arprot = data_out[6:4]; + # assign m_axi4_arlock = data_out[7]; + # assign m_axi4_arburst = data_out[9:8]; + # assign m_axi4_arsize = data_out[12:10]; + # assign m_axi4_arlen = data_out[20:13]; + # assign m_axi4_araddr = data_out[52:21]; + # assign m_axi4_arid = data_out[52+AXI_ID_WIDTH:53]; + # assign m_axi4_aruser = data_out[52+AXI_ID_WIDTH+AXI_USER_WIDTH:53+AXI_ID_WIDTH]; + + # m.d.comb += self.m_axi4_arcache.eq(..) + # m.d.comb += self.m_axi4_arprot.eq(..) + # m.d.comb += self.m_axi4_arlock.eq(..) + # m.d.comb += self.m_axi4_arburst.eq(..) + # m.d.comb += self.m_axi4_arsize.eq(..) + # m.d.comb += self.m_axi4_arlen.eq(..) + # m.d.comb += self.m_axi4_araddr.eq(..) + # m.d.comb += self.m_axi4_arid.eq(..) + # m.d.comb += self.m_axi4_aruser.eq(..) + return m + +# TODO convert axi_buffer_rab.sv +# +# axi_buffer_rab +# #( +# .DATA_WIDTH ( AXI_ID_WIDTH+AXI_USER_WIDTH+53 ), +# .BUFFER_DEPTH ( 4 ) +# ) +# u_buffer +# ( +# .clk ( axi4_aclk ), +# .rstn ( axi4_arstn ), +# .valid_out ( m_axi4_arvalid ), +# .data_out ( data_out ), +# .ready_in ( m_axi4_arready ), +# .valid_in ( s_axi4_arvalid ), +# .data_in ( data_in ), +# .ready_out ( s_axi4_arready ) +# ); +# + +# endmodule diff --git a/src/soc/unused/iommu/axi_rab/axi4_ar_sender.py b/src/soc/unused/iommu/axi_rab/axi4_ar_sender.py new file mode 100644 index 00000000..4cbd97d5 --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/axi4_ar_sender.py @@ -0,0 +1,232 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_ar_sender(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.l1_done_o = Signal() # output + self.l1_accept_i = Signal() # input + self.l1_drop_i = Signal() # input + self.l1_save_i = Signal() # input + self.l2_done_o = Signal() # output + self.l2_accept_i = Signal() # input + self.l2_drop_i = Signal() # input + self.l2_sending_o = Signal() # output + self.l1_araddr_i = Signal(AXI_ADDR_WIDTH) # input + self.l2_araddr_i = Signal(AXI_ADDR_WIDTH) # input + self.s_axi4_arid = Signal(AXI_ID_WIDTH) # input + self.s_axi4_arvalid = Signal() # input + self.s_axi4_arready = Signal() # output + self.s_axi4_arlen = Signal(8) # input + self.s_axi4_arsize = Signal(3) # input + self.s_axi4_arburst = Signal(2) # input + self.s_axi4_arlock = Signal() # input + self.s_axi4_arprot = Signal(3) # input + self.s_axi4_arcache = Signal(4) # input + self.s_axi4_aruser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_arid = Signal(AXI_ID_WIDTH) # output + self.m_axi4_araddr = Signal(AXI_ADDR_WIDTH) # output + self.m_axi4_arvalid = Signal() # output + self.m_axi4_arready = Signal() # input + self.m_axi4_arlen = Signal(8) # output + self.m_axi4_arsize = Signal(3) # output + self.m_axi4_arburst = Signal(2) # output + self.m_axi4_arlock = Signal() # output + self.m_axi4_arprot = Signal(3) # output + self.m_axi4_arcache = Signal(4) # output + self.m_axi4_aruser = Signal(AXI_USER_WIDTH) # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.l1_save.eq(self.None) + m.d.comb += self.l1_done_o.eq(self.None) + m.d.comb += self.m_axi4_arvalid.eq(self.None) + m.d.comb += self.s_axi4_arready.eq(self.None) + m.d.comb += self.m_axi4_aruser.eq(self.None) + m.d.comb += self.m_axi4_arcache.eq(self.None) + m.d.comb += self.m_axi4_arprot.eq(self.None) + m.d.comb += self.m_axi4_arlock.eq(self.None) + m.d.comb += self.m_axi4_arburst.eq(self.None) + m.d.comb += self.m_axi4_arsize.eq(self.None) + m.d.comb += self.m_axi4_arlen.eq(self.None) + m.d.comb += self.m_axi4_araddr.eq(self.None) + m.d.comb += self.m_axi4_arid.eq(self.None) + m.d.comb += self.l2_sending_o.eq(self.None) + m.d.comb += self.l2_sent.eq(self.None) + m.d.comb += self.l2_done_o.eq(self.None) + m.d.comb += self.m_axi4_aruser.eq(self.s_axi4_aruser) + m.d.comb += self.m_axi4_arcache.eq(self.s_axi4_arcache) + m.d.comb += self.m_axi4_arprot.eq(self.s_axi4_arprot) + m.d.comb += self.m_axi4_arlock.eq(self.s_axi4_arlock) + m.d.comb += self.m_axi4_arburst.eq(self.s_axi4_arburst) + m.d.comb += self.m_axi4_arsize.eq(self.s_axi4_arsize) + m.d.comb += self.m_axi4_arlen.eq(self.s_axi4_arlen) + m.d.comb += self.m_axi4_araddr.eq(self.l1_araddr_i) + m.d.comb += self.m_axi4_arid.eq(self.s_axi4_arid) + m.d.comb += self.l2_sending_o.eq(self.1: 'b0) + m.d.comb += self.l2_available_q.eq(self.1: 'b0) + m.d.comb += self.l2_done_o.eq(self.1: 'b0) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module axi4_ar_sender +# #( +# parameter AXI_ADDR_WIDTH = 40, +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4, +# parameter ENABLE_L2TLB = 0 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# output logic l1_done_o, +# input logic l1_accept_i, +# input logic l1_drop_i, +# input logic l1_save_i, +# +# output logic l2_done_o, +# input logic l2_accept_i, +# input logic l2_drop_i, +# output logic l2_sending_o, +# +# input logic [AXI_ADDR_WIDTH-1:0] l1_araddr_i, +# input logic [AXI_ADDR_WIDTH-1:0] l2_araddr_i, +# +# input logic [AXI_ID_WIDTH-1:0] s_axi4_arid, +# input logic s_axi4_arvalid, +# output logic s_axi4_arready, +# input logic [7:0] s_axi4_arlen, +# input logic [2:0] s_axi4_arsize, +# input logic [1:0] s_axi4_arburst, +# input logic s_axi4_arlock, +# input logic [2:0] s_axi4_arprot, +# input logic [3:0] s_axi4_arcache, +# input logic [AXI_USER_WIDTH-1:0] s_axi4_aruser, +# +# output logic [AXI_ID_WIDTH-1:0] m_axi4_arid, +# output logic [AXI_ADDR_WIDTH-1:0] m_axi4_araddr, +# output logic m_axi4_arvalid, +# input logic m_axi4_arready, +# output logic [7:0] m_axi4_arlen, +# output logic [2:0] m_axi4_arsize, +# output logic [1:0] m_axi4_arburst, +# output logic m_axi4_arlock, +# output logic [2:0] m_axi4_arprot, +# output logic [3:0] m_axi4_arcache, +# output logic [AXI_USER_WIDTH-1:0] m_axi4_aruser +# ); +# +# logic l1_save; +# +# logic l2_sent; +# logic l2_available_q; +# +# assign l1_save = l1_save_i & l2_available_q; +# +# assign l1_done_o = s_axi4_arvalid & s_axi4_arready ; +# +# // if 1: accept and forward a transaction translated by L1 +# // 2: drop or save request (if L2 slot not occupied already) +# assign m_axi4_arvalid = (s_axi4_arvalid & l1_accept_i) | +# l2_sending_o; +# assign s_axi4_arready = (m_axi4_arvalid & m_axi4_arready & ~l2_sending_o) | +# (s_axi4_arvalid & (l1_drop_i | l1_save)); +# +# generate +# if (ENABLE_L2TLB == 1) begin +# logic [AXI_USER_WIDTH-1:0] l2_axi4_aruser ; +# logic [3:0] l2_axi4_arcache ; +# logic [3:0] l2_axi4_arregion; +# logic [3:0] l2_axi4_arqos ; +# logic [2:0] l2_axi4_arprot ; +# logic l2_axi4_arlock ; +# logic [1:0] l2_axi4_arburst ; +# logic [2:0] l2_axi4_arsize ; +# logic [7:0] l2_axi4_arlen ; +# logic [AXI_ID_WIDTH-1:0] l2_axi4_arid ; +# +# assign m_axi4_aruser = l2_sending_o ? l2_axi4_aruser : s_axi4_aruser; +# assign m_axi4_arcache = l2_sending_o ? l2_axi4_arcache : s_axi4_arcache; +# assign m_axi4_arprot = l2_sending_o ? l2_axi4_arprot : s_axi4_arprot; +# assign m_axi4_arlock = l2_sending_o ? l2_axi4_arlock : s_axi4_arlock; +# assign m_axi4_arburst = l2_sending_o ? l2_axi4_arburst : s_axi4_arburst; +# assign m_axi4_arsize = l2_sending_o ? l2_axi4_arsize : s_axi4_arsize; +# assign m_axi4_arlen = l2_sending_o ? l2_axi4_arlen : s_axi4_arlen; +# assign m_axi4_araddr = l2_sending_o ? l2_araddr_i : l1_araddr_i; +# assign m_axi4_arid = l2_sending_o ? l2_axi4_arid : s_axi4_arid; +# +# // Buffer AXI signals in case of L1 miss +# always @(posedge axi4_aclk or negedge axi4_arstn) begin +# if (axi4_arstn == 1'b0) begin +# l2_axi4_aruser <= 'b0; +# l2_axi4_arcache <= 'b0; +# l2_axi4_arprot <= 'b0; +# l2_axi4_arlock <= 1'b0; +# l2_axi4_arburst <= 'b0; +# l2_axi4_arsize <= 'b0; +# l2_axi4_arlen <= 'b0; +# l2_axi4_arid <= 'b0; +# end else if (l1_save) begin +# l2_axi4_aruser <= s_axi4_aruser; +# l2_axi4_arcache <= s_axi4_arcache; +# l2_axi4_arprot <= s_axi4_arprot; +# l2_axi4_arlock <= s_axi4_arlock; +# l2_axi4_arburst <= s_axi4_arburst; +# l2_axi4_arsize <= s_axi4_arsize; +# l2_axi4_arlen <= s_axi4_arlen; +# l2_axi4_arid <= s_axi4_arid; +# end +# end +# +# // signal that an l1_save_i can be accepted +# always @(posedge axi4_aclk or negedge axi4_arstn) begin +# if (axi4_arstn == 1'b0) begin +# l2_available_q <= 1'b1; +# end else if (l2_sent | l2_drop_i) begin +# l2_available_q <= 1'b1; +# end else if (l1_save) begin +# l2_available_q <= 1'b0; +# end +# end +# +# assign l2_sending_o = l2_accept_i & ~l2_available_q; +# assign l2_sent = l2_sending_o & m_axi4_arvalid & m_axi4_arready; +# +# // if 1: having sent out a transaction translated by L2 +# // 2: drop request (L2 slot is available again) +# assign l2_done_o = l2_sent | l2_drop_i; +# +# end else begin // !`ifdef ENABLE_L2TLB +# assign m_axi4_aruser = s_axi4_aruser; +# assign m_axi4_arcache = s_axi4_arcache; +# assign m_axi4_arprot = s_axi4_arprot; +# assign m_axi4_arlock = s_axi4_arlock; +# assign m_axi4_arburst = s_axi4_arburst; +# assign m_axi4_arsize = s_axi4_arsize; +# assign m_axi4_arlen = s_axi4_arlen; +# assign m_axi4_araddr = l1_araddr_i; +# assign m_axi4_arid = s_axi4_arid; +# +# assign l2_sending_o = 1'b0; +# assign l2_available_q = 1'b0; +# assign l2_done_o = 1'b0; +# end // else: !if(ENABLE_L2TLB == 1) +# endgenerate +# +# endmodule +# +# diff --git a/src/soc/unused/iommu/axi_rab/axi4_aw_buffer.py b/src/soc/unused/iommu/axi_rab/axi4_aw_buffer.py new file mode 100644 index 00000000..f5ca37d1 --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/axi4_aw_buffer.py @@ -0,0 +1,157 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_aw_buffer(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.s_axi4_awid = Signal(AXI_ID_WIDTH) # input + self.s_axi4_awaddr = Signal(32) # input + self.s_axi4_awvalid = Signal() # input + self.s_axi4_awready = Signal() # output + self.s_axi4_awlen = Signal(8) # input + self.s_axi4_awsize = Signal(3) # input + self.s_axi4_awburst = Signal(2) # input + self.s_axi4_awlock = Signal() # input + self.s_axi4_awprot = Signal(3) # input + self.s_axi4_awcache = Signal(4) # input + self.s_axi4_awregion = Signal(4) # input + self.s_axi4_awqos = Signal(4) # input + self.s_axi4_awuser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_awid = Signal(AXI_ID_WIDTH) # output + self.m_axi4_awaddr = Signal(32) # output + self.m_axi4_awvalid = Signal() # output + self.m_axi4_awready = Signal() # input + self.m_axi4_awlen = Signal(8) # output + self.m_axi4_awsize = Signal(3) # output + self.m_axi4_awburst = Signal(2) # output + self.m_axi4_awlock = Signal() # output + self.m_axi4_awprot = Signal(3) # output + self.m_axi4_awcache = Signal(4) # output + self.m_axi4_awregion = Signal(4) # output + self.m_axi4_awqos = Signal(4) # output + self.m_axi4_awuser = Signal(AXI_USER_WIDTH) # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.None.eq(self.s_axi4_awcache) + m.d.comb += self.None.eq(self.s_axi4_awprot) + m.d.comb += self.None.eq(self.s_axi4_awlock) + m.d.comb += self.None.eq(self.s_axi4_awburst) + m.d.comb += self.None.eq(self.s_axi4_awsize) + m.d.comb += self.None.eq(self.s_axi4_awlen) + m.d.comb += self.None.eq(self.s_axi4_awaddr) + m.d.comb += self.None.eq(self.s_axi4_awregion) + m.d.comb += self.None.eq(self.s_axi4_awqos) + m.d.comb += self.None.eq(self.s_axi4_awid) + m.d.comb += self.None.eq(self.s_axi4_awuser) + m.d.comb += self.m_axi4_awcache.eq(self.None) + m.d.comb += self.m_axi4_awprot.eq(self.None) + m.d.comb += self.m_axi4_awlock.eq(self.None) + m.d.comb += self.m_axi4_awburst.eq(self.None) + m.d.comb += self.m_axi4_awsize.eq(self.None) + m.d.comb += self.m_axi4_awlen.eq(self.None) + m.d.comb += self.m_axi4_awaddr.eq(self.None) + m.d.comb += self.m_axi4_awregion.eq(self.None) + m.d.comb += self.m_axi4_awqos.eq(self.None) + m.d.comb += self.m_axi4_awid.eq(self.None) + m.d.comb += self.m_axi4_awuser.eq(self.None) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module axi4_aw_buffer +# #( +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# input logic [AXI_ID_WIDTH-1:0] s_axi4_awid, +# input logic [31:0] s_axi4_awaddr, +# input logic s_axi4_awvalid, +# output logic s_axi4_awready, +# input logic [7:0] s_axi4_awlen, +# input logic [2:0] s_axi4_awsize, +# input logic [1:0] s_axi4_awburst, +# input logic s_axi4_awlock, +# input logic [2:0] s_axi4_awprot, +# input logic [3:0] s_axi4_awcache, +# input logic [3:0] s_axi4_awregion, +# input logic [3:0] s_axi4_awqos, +# input logic [AXI_USER_WIDTH-1:0] s_axi4_awuser, +# +# output logic [AXI_ID_WIDTH-1:0] m_axi4_awid, +# output logic [31:0] m_axi4_awaddr, +# output logic m_axi4_awvalid, +# input logic m_axi4_awready, +# output logic [7:0] m_axi4_awlen, +# output logic [2:0] m_axi4_awsize, +# output logic [1:0] m_axi4_awburst, +# output logic m_axi4_awlock, +# output logic [2:0] m_axi4_awprot, +# output logic [3:0] m_axi4_awcache, +# output logic [3:0] m_axi4_awregion, +# output logic [3:0] m_axi4_awqos, +# output logic [AXI_USER_WIDTH-1:0] m_axi4_awuser +# ); +# +# wire [AXI_USER_WIDTH+AXI_ID_WIDTH+60:0] data_in; +# wire [AXI_USER_WIDTH+AXI_ID_WIDTH+60:0] data_out; +# +# assign data_in [3:0] = s_axi4_awcache; +# assign data_in [6:4] = s_axi4_awprot; +# assign data_in [7] = s_axi4_awlock; +# assign data_in [9:8] = s_axi4_awburst; +# assign data_in [12:10] = s_axi4_awsize; +# assign data_in [20:13] = s_axi4_awlen; +# assign data_in [52:21] = s_axi4_awaddr; +# assign data_in [56:53] = s_axi4_awregion; +# assign data_in [60:57] = s_axi4_awqos; +# assign data_in [60+AXI_ID_WIDTH:61] = s_axi4_awid; +# assign data_in [60+AXI_ID_WIDTH+AXI_USER_WIDTH:61+AXI_ID_WIDTH] = s_axi4_awuser; +# +# assign m_axi4_awcache = data_out[3:0]; +# assign m_axi4_awprot = data_out[6:4]; +# assign m_axi4_awlock = data_out[7]; +# assign m_axi4_awburst = data_out[9:8]; +# assign m_axi4_awsize = data_out[12:10]; +# assign m_axi4_awlen = data_out[20:13]; +# assign m_axi4_awaddr = data_out[52:21]; +# assign m_axi4_awregion = data_out[56:53]; +# assign m_axi4_awqos = data_out[60:57]; +# assign m_axi4_awid = data_out[60+AXI_ID_WIDTH:61]; +# assign m_axi4_awuser = data_out[60+AXI_ID_WIDTH+AXI_USER_WIDTH:61+AXI_ID_WIDTH]; +# +# axi_buffer_rab +# #( +# .DATA_WIDTH ( AXI_ID_WIDTH+AXI_USER_WIDTH+61 ), +# .BUFFER_DEPTH ( 4 ) +# ) +# u_buffer +# ( +# .clk ( axi4_aclk ), +# .rstn ( axi4_arstn ), +# .valid_out ( m_axi4_awvalid ), +# .data_out ( data_out ), +# .ready_in ( m_axi4_awready ), +# .valid_in ( s_axi4_awvalid ), +# .data_in ( data_in ), +# .ready_out ( s_axi4_awready ) +# ); +# endmodule +# +# diff --git a/src/soc/unused/iommu/axi_rab/axi4_aw_sender.py b/src/soc/unused/iommu/axi_rab/axi4_aw_sender.py new file mode 100644 index 00000000..fbc917df --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/axi4_aw_sender.py @@ -0,0 +1,252 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_aw_sender(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.l1_done_o = Signal() # output + self.l1_accept_i = Signal() # input + self.l1_drop_i = Signal() # input + self.l1_save_i = Signal() # input + self.l2_done_o = Signal() # output + self.l2_accept_i = Signal() # input + self.l2_drop_i = Signal() # input + self.l2_sending_o = Signal() # output + self.l1_awaddr_i = Signal(AXI_ADDR_WIDTH) # input + self.l2_awaddr_i = Signal(AXI_ADDR_WIDTH) # input + self.s_axi4_awid = Signal(AXI_ID_WIDTH) # input + self.s_axi4_awvalid = Signal() # input + self.s_axi4_awready = Signal() # output + self.s_axi4_awlen = Signal(8) # input + self.s_axi4_awsize = Signal(3) # input + self.s_axi4_awburst = Signal(2) # input + self.s_axi4_awlock = Signal() # input + self.s_axi4_awprot = Signal(3) # input + self.s_axi4_awcache = Signal(4) # input + self.s_axi4_awregion = Signal(4) # input + self.s_axi4_awqos = Signal(4) # input + self.s_axi4_awuser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_awid = Signal(AXI_ID_WIDTH) # output + self.m_axi4_awaddr = Signal(AXI_ADDR_WIDTH) # output + self.m_axi4_awvalid = Signal() # output + self.m_axi4_awready = Signal() # input + self.m_axi4_awlen = Signal(8) # output + self.m_axi4_awsize = Signal(3) # output + self.m_axi4_awburst = Signal(2) # output + self.m_axi4_awlock = Signal() # output + self.m_axi4_awprot = Signal(3) # output + self.m_axi4_awcache = Signal(4) # output + self.m_axi4_awregion = Signal(4) # output + self.m_axi4_awqos = Signal(4) # output + self.m_axi4_awuser = Signal(AXI_USER_WIDTH) # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.l1_save.eq(self.None) + m.d.comb += self.l1_done_o.eq(self.None) + m.d.comb += self.m_axi4_awvalid.eq(self.None) + m.d.comb += self.s_axi4_awready.eq(self.None) + m.d.comb += self.m_axi4_awuser.eq(self.None) + m.d.comb += self.m_axi4_awcache.eq(self.None) + m.d.comb += self.m_axi4_awregion.eq(self.None) + m.d.comb += self.m_axi4_awqos.eq(self.None) + m.d.comb += self.m_axi4_awprot.eq(self.None) + m.d.comb += self.m_axi4_awlock.eq(self.None) + m.d.comb += self.m_axi4_awburst.eq(self.None) + m.d.comb += self.m_axi4_awsize.eq(self.None) + m.d.comb += self.m_axi4_awlen.eq(self.None) + m.d.comb += self.m_axi4_awaddr.eq(self.None) + m.d.comb += self.m_axi4_awid.eq(self.None) + m.d.comb += self.l2_sending_o.eq(self.None) + m.d.comb += self.l2_sent.eq(self.None) + m.d.comb += self.l2_done_o.eq(self.None) + m.d.comb += self.m_axi4_awuser.eq(self.s_axi4_awuser) + m.d.comb += self.m_axi4_awcache.eq(self.s_axi4_awcache) + m.d.comb += self.m_axi4_awregion.eq(self.s_axi4_awregion) + m.d.comb += self.m_axi4_awqos.eq(self.s_axi4_awqos) + m.d.comb += self.m_axi4_awprot.eq(self.s_axi4_awprot) + m.d.comb += self.m_axi4_awlock.eq(self.s_axi4_awlock) + m.d.comb += self.m_axi4_awburst.eq(self.s_axi4_awburst) + m.d.comb += self.m_axi4_awsize.eq(self.s_axi4_awsize) + m.d.comb += self.m_axi4_awlen.eq(self.s_axi4_awlen) + m.d.comb += self.m_axi4_awaddr.eq(self.l1_awaddr_i) + m.d.comb += self.m_axi4_awid.eq(self.s_axi4_awid) + m.d.comb += self.l2_sending_o.eq(self.1: 'b0) + m.d.comb += self.l2_available_q.eq(self.1: 'b0) + m.d.comb += self.l2_done_o.eq(self.1: 'b0) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module axi4_aw_sender +# #( +# parameter AXI_ADDR_WIDTH = 40, +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4, +# parameter ENABLE_L2TLB = 0 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# output logic l1_done_o, +# input logic l1_accept_i, +# input logic l1_drop_i, +# input logic l1_save_i, +# +# output logic l2_done_o, +# input logic l2_accept_i, +# input logic l2_drop_i, +# output logic l2_sending_o, +# +# input logic [AXI_ADDR_WIDTH-1:0] l1_awaddr_i, +# input logic [AXI_ADDR_WIDTH-1:0] l2_awaddr_i, +# +# input logic [AXI_ID_WIDTH-1:0] s_axi4_awid, +# input logic s_axi4_awvalid, +# output logic s_axi4_awready, +# input logic [7:0] s_axi4_awlen, +# input logic [2:0] s_axi4_awsize, +# input logic [1:0] s_axi4_awburst, +# input logic s_axi4_awlock, +# input logic [2:0] s_axi4_awprot, +# input logic [3:0] s_axi4_awcache, +# input logic [3:0] s_axi4_awregion, +# input logic [3:0] s_axi4_awqos, +# input logic [AXI_USER_WIDTH-1:0] s_axi4_awuser, +# +# output logic [AXI_ID_WIDTH-1:0] m_axi4_awid, +# output logic [AXI_ADDR_WIDTH-1:0] m_axi4_awaddr, +# output logic m_axi4_awvalid, +# input logic m_axi4_awready, +# output logic [7:0] m_axi4_awlen, +# output logic [2:0] m_axi4_awsize, +# output logic [1:0] m_axi4_awburst, +# output logic m_axi4_awlock, +# output logic [2:0] m_axi4_awprot, +# output logic [3:0] m_axi4_awcache, +# output logic [3:0] m_axi4_awregion, +# output logic [3:0] m_axi4_awqos, +# output logic [AXI_USER_WIDTH-1:0] m_axi4_awuser +# ); +# +# logic l1_save; +# +# logic l2_sent; +# logic l2_available_q; +# +# assign l1_save = l1_save_i & l2_available_q; +# +# assign l1_done_o = s_axi4_awvalid & s_axi4_awready ; +# +# // if 1: accept and forward a transaction translated by L1 +# // 2: drop or save request (if L2 slot not occupied already) +# assign m_axi4_awvalid = (s_axi4_awvalid & l1_accept_i) | +# l2_sending_o; +# assign s_axi4_awready = (m_axi4_awvalid & m_axi4_awready & ~l2_sending_o) | +# (s_axi4_awvalid & (l1_drop_i | l1_save)); +# +# generate +# if (ENABLE_L2TLB == 1) begin +# logic [AXI_USER_WIDTH-1:0] l2_axi4_awuser ; +# logic [3:0] l2_axi4_awcache ; +# logic [3:0] l2_axi4_awregion; +# logic [3:0] l2_axi4_awqos ; +# logic [2:0] l2_axi4_awprot ; +# logic l2_axi4_awlock ; +# logic [1:0] l2_axi4_awburst ; +# logic [2:0] l2_axi4_awsize ; +# logic [7:0] l2_axi4_awlen ; +# logic [AXI_ID_WIDTH-1:0] l2_axi4_awid ; +# +# assign m_axi4_awuser = l2_sending_o ? l2_axi4_awuser : s_axi4_awuser; +# assign m_axi4_awcache = l2_sending_o ? l2_axi4_awcache : s_axi4_awcache; +# assign m_axi4_awregion = l2_sending_o ? l2_axi4_awregion : s_axi4_awregion; +# assign m_axi4_awqos = l2_sending_o ? l2_axi4_awqos : s_axi4_awqos; +# assign m_axi4_awprot = l2_sending_o ? l2_axi4_awprot : s_axi4_awprot; +# assign m_axi4_awlock = l2_sending_o ? l2_axi4_awlock : s_axi4_awlock; +# assign m_axi4_awburst = l2_sending_o ? l2_axi4_awburst : s_axi4_awburst; +# assign m_axi4_awsize = l2_sending_o ? l2_axi4_awsize : s_axi4_awsize; +# assign m_axi4_awlen = l2_sending_o ? l2_axi4_awlen : s_axi4_awlen; +# assign m_axi4_awaddr = l2_sending_o ? l2_awaddr_i : l1_awaddr_i; +# assign m_axi4_awid = l2_sending_o ? l2_axi4_awid : s_axi4_awid; +# +# // buffer AXI signals in case of L1 miss +# always @(posedge axi4_aclk or negedge axi4_arstn) begin +# if (axi4_arstn == 1'b0) begin +# l2_axi4_awuser <= 'b0; +# l2_axi4_awcache <= 'b0; +# l2_axi4_awregion <= 'b0; +# l2_axi4_awqos <= 'b0; +# l2_axi4_awprot <= 'b0; +# l2_axi4_awlock <= 1'b0; +# l2_axi4_awburst <= 'b0; +# l2_axi4_awsize <= 'b0; +# l2_axi4_awlen <= 'b0; +# l2_axi4_awid <= 'b0; +# end else if (l1_save) begin +# l2_axi4_awuser <= s_axi4_awuser; +# l2_axi4_awcache <= s_axi4_awcache; +# l2_axi4_awregion <= s_axi4_awregion; +# l2_axi4_awqos <= s_axi4_awqos; +# l2_axi4_awprot <= s_axi4_awprot; +# l2_axi4_awlock <= s_axi4_awlock; +# l2_axi4_awburst <= s_axi4_awburst; +# l2_axi4_awsize <= s_axi4_awsize; +# l2_axi4_awlen <= s_axi4_awlen; +# l2_axi4_awid <= s_axi4_awid; +# end +# end +# +# // signal that an l1_save_i can be accepted +# always @(posedge axi4_aclk or negedge axi4_arstn) begin +# if (axi4_arstn == 1'b0) begin +# l2_available_q <= 1'b1; +# end else if (l2_sent | l2_drop_i) begin +# l2_available_q <= 1'b1; +# end else if (l1_save) begin +# l2_available_q <= 1'b0; +# end +# end +# +# assign l2_sending_o = l2_accept_i & ~l2_available_q; +# assign l2_sent = l2_sending_o & m_axi4_awvalid & m_axi4_awready; +# +# // if 1: having sent out a transaction translated by L2 +# // 2: drop request (L2 slot is available again) +# assign l2_done_o = l2_sent | l2_drop_i; +# +# end else begin // !`ifdef ENABLE_L2TLB +# assign m_axi4_awuser = s_axi4_awuser; +# assign m_axi4_awcache = s_axi4_awcache; +# assign m_axi4_awregion = s_axi4_awregion; +# assign m_axi4_awqos = s_axi4_awqos; +# assign m_axi4_awprot = s_axi4_awprot; +# assign m_axi4_awlock = s_axi4_awlock; +# assign m_axi4_awburst = s_axi4_awburst; +# assign m_axi4_awsize = s_axi4_awsize; +# assign m_axi4_awlen = s_axi4_awlen; +# assign m_axi4_awaddr = l1_awaddr_i; +# assign m_axi4_awid = s_axi4_awid; +# +# assign l2_sending_o = 1'b0; +# assign l2_available_q = 1'b0; +# assign l2_done_o = 1'b0; +# end // !`ifdef ENABLE_L2TLB +# endgenerate +# +# endmodule +# +# diff --git a/src/soc/unused/iommu/axi_rab/axi4_b_buffer.py b/src/soc/unused/iommu/axi_rab/axi4_b_buffer.py new file mode 100644 index 00000000..42fce1ad --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/axi4_b_buffer.py @@ -0,0 +1,94 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_b_buffer(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.s_axi4_bid = Signal(AXI_ID_WIDTH) # output + self.s_axi4_bresp = Signal(2) # output + self.s_axi4_bvalid = Signal() # output + self.s_axi4_buser = Signal(AXI_USER_WIDTH) # output + self.s_axi4_bready = Signal() # input + self.m_axi4_bid = Signal(AXI_ID_WIDTH) # input + self.m_axi4_bresp = Signal(2) # input + self.m_axi4_bvalid = Signal() # input + self.m_axi4_buser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_bready = Signal() # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.None.eq(self.m_axi4_bresp) + m.d.comb += self.None.eq(self.m_axi4_bid) + m.d.comb += self.None.eq(self.m_axi4_buser) + m.d.comb += self.s_axi4_buser.eq(self.None) + m.d.comb += self.s_axi4_bid.eq(self.None) + m.d.comb += self.s_axi4_bresp.eq(self.None) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module axi4_b_buffer +# #( +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# output logic [AXI_ID_WIDTH-1:0] s_axi4_bid, +# output logic [1:0] s_axi4_bresp, +# output logic s_axi4_bvalid, +# output logic [AXI_USER_WIDTH-1:0] s_axi4_buser, +# input logic s_axi4_bready, +# +# input logic [AXI_ID_WIDTH-1:0] m_axi4_bid, +# input logic [1:0] m_axi4_bresp, +# input logic m_axi4_bvalid, +# input logic [AXI_USER_WIDTH-1:0] m_axi4_buser, +# output logic m_axi4_bready +# ); +# +# wire [AXI_ID_WIDTH+AXI_USER_WIDTH+1:0] data_in; +# wire [AXI_ID_WIDTH+AXI_USER_WIDTH+1:0] data_out; +# +# assign data_in [1:0] = m_axi4_bresp; +# assign data_in [AXI_ID_WIDTH+1:2] = m_axi4_bid; +# assign data_in[AXI_ID_WIDTH+AXI_USER_WIDTH+1:AXI_ID_WIDTH+2] = m_axi4_buser; +# +# assign s_axi4_buser = data_out[AXI_ID_WIDTH+AXI_USER_WIDTH+1:AXI_ID_WIDTH+2]; +# assign s_axi4_bid = data_out[AXI_ID_WIDTH+1:2]; +# assign s_axi4_bresp = data_out[1:0]; +# +# axi_buffer_rab +# #( +# .DATA_WIDTH ( AXI_ID_WIDTH+AXI_USER_WIDTH+2 ), +# .BUFFER_DEPTH ( 4 ) +# ) +# u_buffer +# ( +# .clk ( axi4_aclk ), +# .rstn ( axi4_arstn ), +# .valid_out( s_axi4_bvalid ), +# .data_out ( data_out ), +# .ready_in ( s_axi4_bready ), +# .valid_in ( m_axi4_bvalid ), +# .data_in ( data_in ), +# .ready_out( m_axi4_bready ) +# ); +# +# endmodule +# +# diff --git a/src/soc/unused/iommu/axi_rab/axi4_b_sender.py b/src/soc/unused/iommu/axi_rab/axi4_b_sender.py new file mode 100644 index 00000000..1c61a2a5 --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/axi4_b_sender.py @@ -0,0 +1,136 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_b_sender(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.drop_i = Signal() # input + self.done_o = Signal() # output + self.id_i = Signal(AXI_ID_WIDTH) # input + self.prefetch_i = Signal() # input + self.hit_i = Signal() # input + self.s_axi4_bid = Signal(AXI_ID_WIDTH) # output + self.s_axi4_bresp = Signal(2) # output + self.s_axi4_bvalid = Signal() # output + self.s_axi4_buser = Signal(AXI_USER_WIDTH) # output + self.s_axi4_bready = Signal() # input + self.m_axi4_bid = Signal(AXI_ID_WIDTH) # input + self.m_axi4_bresp = Signal(2) # input + self.m_axi4_bvalid = Signal() # input + self.m_axi4_buser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_bready = Signal() # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.fifo_push.eq(self.None) + m.d.comb += self.done_o.eq(self.fifo_push) + m.d.comb += self.fifo_pop.eq(self.None) + m.d.comb += self.s_axi4_buser.eq(self.None) + m.d.comb += self.s_axi4_bid.eq(self.None) + m.d.comb += self.s_axi4_bresp.eq(self.None) + m.d.comb += self.s_axi4_bvalid.eq(self.None) + m.d.comb += self.m_axi4_bready.eq(self.None) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module axi4_b_sender +# #( +# parameter AXI_ID_WIDTH = 10, +# parameter AXI_USER_WIDTH = 4 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# input logic drop_i, +# output logic done_o, +# input logic [AXI_ID_WIDTH-1:0] id_i, +# input logic prefetch_i, +# input logic hit_i, +# +# output logic [AXI_ID_WIDTH-1:0] s_axi4_bid, +# output logic [1:0] s_axi4_bresp, +# output logic s_axi4_bvalid, +# output logic [AXI_USER_WIDTH-1:0] s_axi4_buser, +# input logic s_axi4_bready, +# +# input logic [AXI_ID_WIDTH-1:0] m_axi4_bid, +# input logic [1:0] m_axi4_bresp, +# input logic m_axi4_bvalid, +# input logic [AXI_USER_WIDTH-1:0] m_axi4_buser, +# output logic m_axi4_bready +# ); +# +# logic fifo_valid; +# logic fifo_pop; +# logic fifo_push; +# logic fifo_ready; +# logic [AXI_ID_WIDTH-1:0] id; +# logic prefetch; +# logic hit; +# +# logic dropping; +# +# axi_buffer_rab +# #( +# .DATA_WIDTH ( 2+AXI_ID_WIDTH ), +# .BUFFER_DEPTH ( 4 ) +# ) +# u_fifo +# ( +# .clk ( axi4_aclk ), +# .rstn ( axi4_arstn ), +# // Pop +# .data_out ( {prefetch, hit, id} ), +# .valid_out ( fifo_valid ), +# .ready_in ( fifo_pop ), +# // Push +# .valid_in ( fifo_push ), +# .data_in ( {prefetch_i, hit_i, id_i} ), +# .ready_out ( fifo_ready ) +# ); +# +# assign fifo_push = drop_i & fifo_ready; +# assign done_o = fifo_push; +# +# assign fifo_pop = dropping & s_axi4_bready; +# +# always @ (posedge axi4_aclk or negedge axi4_arstn) begin +# if (axi4_arstn == 1'b0) begin +# dropping <= 1'b0; +# end else begin +# if (fifo_valid && ~dropping) +# dropping <= 1'b1; +# else if (fifo_pop) +# dropping <= 1'b0; +# end +# end +# +# assign s_axi4_buser = dropping ? {AXI_USER_WIDTH{1'b0}} : m_axi4_buser; +# assign s_axi4_bid = dropping ? id : m_axi4_bid; +# +# assign s_axi4_bresp = (dropping & prefetch & hit) ? 2'b00 : // prefetch hit, mutli, prot +# (dropping & prefetch ) ? 2'b10 : // prefetch miss +# (dropping & hit) ? 2'b10 : // non-prefetch multi, prot +# (dropping ) ? 2'b10 : // non-prefetch miss +# m_axi4_bresp; +# +# assign s_axi4_bvalid = dropping | m_axi4_bvalid; +# assign m_axi4_bready = ~dropping & s_axi4_bready; +# +# endmodule +# +# diff --git a/src/soc/unused/iommu/axi_rab/axi4_r_buffer.py b/src/soc/unused/iommu/axi_rab/axi4_r_buffer.py new file mode 100644 index 00000000..91bdf0a5 --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/axi4_r_buffer.py @@ -0,0 +1,120 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_r_buffer(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.s_axi4_rid = Signal(AXI_ID_WIDTH) # output + self.s_axi4_rresp = Signal(2) # output + self.s_axi4_rdata = Signal(AXI_DATA_WIDTH) # output + self.s_axi4_rlast = Signal() # output + self.s_axi4_rvalid = Signal() # output + self.s_axi4_ruser = Signal(AXI_USER_WIDTH) # output + self.s_axi4_rready = Signal() # input + self.m_axi4_rid = Signal(AXI_ID_WIDTH) # input + self.m_axi4_rresp = Signal(2) # input + self.m_axi4_rdata = Signal(AXI_DATA_WIDTH) # input + self.m_axi4_rlast = Signal() # input + self.m_axi4_rvalid = Signal() # input + self.m_axi4_ruser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_rready = Signal() # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.None.eq(self.m_axi4_rresp) + m.d.comb += self.None.eq(self.m_axi4_rlast) + m.d.comb += self.None.eq(self.m_axi4_rid) + m.d.comb += self.None.eq(self.m_axi4_rdata) + m.d.comb += self.None.eq(self.m_axi4_ruser) + m.d.comb += self.s_axi4_rresp.eq(self.None) + m.d.comb += self.s_axi4_rlast.eq(self.None) + m.d.comb += self.s_axi4_rid.eq(self.None) + m.d.comb += self.s_axi4_rdata.eq(self.None) + m.d.comb += self.s_axi4_ruser.eq(self.None) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module axi4_r_buffer +# #( +# parameter AXI_DATA_WIDTH = 32, +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# output logic [AXI_ID_WIDTH-1:0] s_axi4_rid, +# output logic [1:0] s_axi4_rresp, +# output logic [AXI_DATA_WIDTH-1:0] s_axi4_rdata, +# output logic s_axi4_rlast, +# output logic s_axi4_rvalid, +# output logic [AXI_USER_WIDTH-1:0] s_axi4_ruser, +# input logic s_axi4_rready, +# +# input logic [AXI_ID_WIDTH-1:0] m_axi4_rid, +# input logic [1:0] m_axi4_rresp, +# input logic [AXI_DATA_WIDTH-1:0] m_axi4_rdata, +# input logic m_axi4_rlast, +# input logic m_axi4_rvalid, +# input logic [AXI_USER_WIDTH-1:0] m_axi4_ruser, +# output logic m_axi4_rready +# ); +# +# wire [AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3-1:0] data_in; +# wire [AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3-1:0] data_out; +# +# localparam ID_START = 3; +# localparam ID_END = AXI_ID_WIDTH-1 + ID_START; +# localparam DATA_START = ID_END + 1; +# localparam DATA_END = AXI_DATA_WIDTH-1 + DATA_START; +# localparam USER_START = DATA_END + 1; +# localparam USER_END = AXI_USER_WIDTH-1 + USER_START; +# +# assign data_in [1:0] = m_axi4_rresp; +# assign data_in [2] = m_axi4_rlast; +# assign data_in [ID_END:ID_START] = m_axi4_rid; +# assign data_in[DATA_END:DATA_START] = m_axi4_rdata; +# assign data_in[USER_END:USER_START] = m_axi4_ruser; +# +# assign s_axi4_rresp = data_out [1:0]; +# assign s_axi4_rlast = data_out [2]; +# assign s_axi4_rid = data_out [ID_END:ID_START]; +# assign s_axi4_rdata = data_out[DATA_END:DATA_START]; +# assign s_axi4_ruser = data_out[USER_END:USER_START]; +# +# axi_buffer_rab +# #( +# .DATA_WIDTH ( AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3 ), +# .BUFFER_DEPTH ( 4 ) +# ) +# u_buffer +# ( +# .clk ( axi4_aclk ), +# .rstn ( axi4_arstn ), +# // Pop +# .valid_out ( s_axi4_rvalid ), +# .data_out ( data_out ), +# .ready_in ( s_axi4_rready ), +# // Push +# .valid_in ( m_axi4_rvalid ), +# .data_in ( data_in ), +# .ready_out ( m_axi4_rready ) +# ); +# +# endmodule +# +# diff --git a/src/soc/unused/iommu/axi_rab/axi4_r_sender.py b/src/soc/unused/iommu/axi_rab/axi4_r_sender.py new file mode 100644 index 00000000..d4e22bb2 --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/axi4_r_sender.py @@ -0,0 +1,206 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_r_sender(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.drop_i = Signal() # input + self.drop_len_i = Signal(8) # input + self.done_o = Signal() # output + self.id_i = Signal(AXI_ID_WIDTH) # input + self.prefetch_i = Signal() # input + self.hit_i = Signal() # input + self.s_axi4_rid = Signal(AXI_ID_WIDTH) # output + self.s_axi4_rresp = Signal(2) # output + self.s_axi4_rdata = Signal(AXI_DATA_WIDTH) # output + self.s_axi4_rlast = Signal() # output + self.s_axi4_rvalid = Signal() # output + self.s_axi4_ruser = Signal(AXI_USER_WIDTH) # output + self.s_axi4_rready = Signal() # input + self.m_axi4_rid = Signal(AXI_ID_WIDTH) # input + self.m_axi4_rresp = Signal(2) # input + self.m_axi4_rdata = Signal(AXI_DATA_WIDTH) # input + self.m_axi4_rlast = Signal() # input + self.m_axi4_rvalid = Signal() # input + self.m_axi4_ruser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_rready = Signal() # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.fifo_push.eq(self.None) + m.d.comb += self.done_o.eq(self.fifo_push) + m.d.comb += self.s_axi4_rdata.eq(self.m_axi4_rdata) + m.d.comb += self.s_axi4_ruser.eq(self.None) + m.d.comb += self.s_axi4_rid.eq(self.None) + m.d.comb += self.s_axi4_rresp.eq(self.None) + m.d.comb += self.s_axi4_rvalid.eq(self.None) + m.d.comb += self.m_axi4_rready.eq(self.None) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# //import CfMath::log2; +# +# module axi4_r_sender +# #( +# parameter AXI_DATA_WIDTH = 32, +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# input logic drop_i, +# input logic [7:0] drop_len_i, +# output logic done_o, +# input logic [AXI_ID_WIDTH-1:0] id_i, +# input logic prefetch_i, +# input logic hit_i, +# +# output logic [AXI_ID_WIDTH-1:0] s_axi4_rid, +# output logic [1:0] s_axi4_rresp, +# output logic [AXI_DATA_WIDTH-1:0] s_axi4_rdata, +# output logic s_axi4_rlast, +# output logic s_axi4_rvalid, +# output logic [AXI_USER_WIDTH-1:0] s_axi4_ruser, +# input logic s_axi4_rready, +# +# input logic [AXI_ID_WIDTH-1:0] m_axi4_rid, +# input logic [1:0] m_axi4_rresp, +# input logic [AXI_DATA_WIDTH-1:0] m_axi4_rdata, +# input logic m_axi4_rlast, +# input logic m_axi4_rvalid, +# input logic [AXI_USER_WIDTH-1:0] m_axi4_ruser, +# output logic m_axi4_rready +# ); +# +# localparam BUFFER_DEPTH = 16; +# +# logic fifo_valid; +# logic fifo_pop; +# logic fifo_push; +# logic fifo_ready; +# logic [AXI_ID_WIDTH-1:0] id; +# logic [7:0] len; +# logic prefetch; +# logic hit; +# +# logic dropping; +# +# enum logic [1:0] { FORWARDING, DROPPING } +# state_d, state_q; +# logic burst_ongoing_d, burst_ongoing_q; +# logic [7:0] drop_cnt_d, drop_cnt_q; +# +# axi_buffer_rab +# #( +# .DATA_WIDTH ( 2+AXI_ID_WIDTH+8 ), +# .BUFFER_DEPTH ( BUFFER_DEPTH ) +# ) +# u_fifo +# ( +# .clk ( axi4_aclk ), +# .rstn ( axi4_arstn ), +# // Pop +# .data_out ( {prefetch, hit, id, len} ), +# .valid_out ( fifo_valid ), +# .ready_in ( fifo_pop ), +# // Push +# .valid_in ( fifo_push ), +# .data_in ( {prefetch_i, hit_i, id_i, drop_len_i} ), +# .ready_out ( fifo_ready ) +# ); +# +# assign fifo_push = drop_i & fifo_ready; +# assign done_o = fifo_push; +# +# always_comb begin +# burst_ongoing_d = burst_ongoing_q; +# drop_cnt_d = drop_cnt_q; +# dropping = 1'b0; +# s_axi4_rlast = 1'b0; +# fifo_pop = 1'b0; +# state_d = state_q; +# +# case (state_q) +# FORWARDING: begin +# s_axi4_rlast = m_axi4_rlast; +# // Remember whether there is currently a burst ongoing. +# if (m_axi4_rvalid && m_axi4_rready) begin +# if (m_axi4_rlast) begin +# burst_ongoing_d = 1'b0; +# end else begin +# burst_ongoing_d = 1'b1; +# end +# end +# // If there is no burst ongoing and the FIFO has a drop request ready, process it. +# if (!burst_ongoing_d && fifo_valid) begin +# drop_cnt_d = len; +# state_d = DROPPING; +# end +# end +# +# DROPPING: begin +# dropping = 1'b1; +# s_axi4_rlast = (drop_cnt_q == '0); +# // Handshake on slave interface +# if (s_axi4_rready) begin +# drop_cnt_d -= 1; +# if (drop_cnt_q == '0) begin +# drop_cnt_d = '0; +# fifo_pop = 1'b1; +# state_d = FORWARDING; +# end +# end +# end +# +# default: begin +# state_d = FORWARDING; +# end +# endcase +# end +# +# assign s_axi4_rdata = m_axi4_rdata; +# +# assign s_axi4_ruser = dropping ? {AXI_USER_WIDTH{1'b0}} : m_axi4_ruser; +# assign s_axi4_rid = dropping ? id : m_axi4_rid; +# +# assign s_axi4_rresp = (dropping & prefetch & hit) ? 2'b00 : // prefetch hit, mutli, prot +# (dropping & prefetch ) ? 2'b10 : // prefetch miss +# (dropping & hit) ? 2'b10 : // non-prefetch multi, prot +# (dropping ) ? 2'b10 : // non-prefetch miss +# m_axi4_rresp; +# +# assign s_axi4_rvalid = dropping | m_axi4_rvalid; +# assign m_axi4_rready = ~dropping & s_axi4_rready; +# +# always_ff @(posedge axi4_aclk, negedge axi4_arstn) begin +# if (axi4_arstn == 1'b0) begin +# burst_ongoing_q <= 1'b0; +# drop_cnt_q <= 'b0; +# state_q <= FORWARDING; +# end else begin +# burst_ongoing_q <= burst_ongoing_d; +# drop_cnt_q <= drop_cnt_d; +# state_q <= state_d; +# end +# end +# +# endmodule +# +# +# +# diff --git a/src/soc/unused/iommu/axi_rab/axi4_w_buffer.py b/src/soc/unused/iommu/axi_rab/axi4_w_buffer.py new file mode 100644 index 00000000..aa06dc22 --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/axi4_w_buffer.py @@ -0,0 +1,777 @@ +# this file has been generated by sv2nmigen +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_w_buffer(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.l1_done_o = Signal() # output + self.l1_accept_i = Signal() # input + self.l1_save_i = Signal() # input + self.l1_drop_i = Signal() # input + self.l1_master_i = Signal() # input + self.l1_id_i = Signal(AXI_ID_WIDTH) # input + self.l1_len_i = Signal(8) # input + self.l1_prefetch_i = Signal() # input + self.l1_hit_i = Signal() # input + self.l2_done_o = Signal() # output + self.l2_accept_i = Signal() # input + self.l2_drop_i = Signal() # input + self.l2_master_i = Signal() # input + self.l2_id_i = Signal(AXI_ID_WIDTH) # input + self.l2_len_i = Signal(8) # input + self.l2_prefetch_i = Signal() # input + self.l2_hit_i = Signal() # input + self.master_select_o = Signal() # output + self.input_stall_o = Signal() # output + self.output_stall_o = Signal() # output + self.b_drop_o = Signal() # output + self.b_done_i = Signal() # input + self.id_o = Signal(AXI_ID_WIDTH) # output + self.prefetch_o = Signal() # output + self.hit_o = Signal() # output + self.s_axi4_wdata = Signal(AXI_DATA_WIDTH) # input + self.s_axi4_wvalid = Signal() # input + self.s_axi4_wready = Signal() # output + self.s_axi4_wstrb = Signal(1+ERROR p_expression_25) # input + self.s_axi4_wlast = Signal() # input + self.s_axi4_wuser = Signal(AXI_USER_WIDTH) # input + self.m_axi4_wdata = Signal(AXI_DATA_WIDTH) # output + self.m_axi4_wvalid = Signal() # output + self.m_axi4_wready = Signal() # input + self.m_axi4_wstrb = Signal(1+ERROR p_expression_25) # output + self.m_axi4_wlast = Signal() # output + self.m_axi4_wuser = Signal(AXI_USER_WIDTH) # output + + def elaborate(self, platform=None): + m = Module() + return m + + +# +# //import CfMath::log2; +# +# module axi4_w_buffer +# #( +# parameter AXI_DATA_WIDTH = 32, +# parameter AXI_ID_WIDTH = 4, +# parameter AXI_USER_WIDTH = 4, +# parameter ENABLE_L2TLB = 0, +# parameter HUM_BUFFER_DEPTH = 16 +# ) +# ( +# input logic axi4_aclk, +# input logic axi4_arstn, +# +# // L1 & L2 interfaces +# output logic l1_done_o, +# input logic l1_accept_i, +# input logic l1_save_i, +# input logic l1_drop_i, +# input logic l1_master_i, +# input logic [AXI_ID_WIDTH-1:0] l1_id_i, +# input logic [7:0] l1_len_i, +# input logic l1_prefetch_i, +# input logic l1_hit_i, +# +# output logic l2_done_o, +# input logic l2_accept_i, +# input logic l2_drop_i, +# input logic l2_master_i, +# input logic [AXI_ID_WIDTH-1:0] l2_id_i, +# input logic [7:0] l2_len_i, +# input logic l2_prefetch_i, +# input logic l2_hit_i, +# +# output logic master_select_o, +# output logic input_stall_o, +# output logic output_stall_o, +# +# // B sender interface +# output logic b_drop_o, +# input logic b_done_i, +# output logic [AXI_ID_WIDTH-1:0] id_o, +# output logic prefetch_o, +# output logic hit_o, +# +# // AXI W channel interfaces +# input logic [AXI_DATA_WIDTH-1:0] s_axi4_wdata, +# input logic s_axi4_wvalid, +# output logic s_axi4_wready, +# input logic [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb, +# input logic s_axi4_wlast, +# input logic [AXI_USER_WIDTH-1:0] s_axi4_wuser, +# +# output logic [AXI_DATA_WIDTH-1:0] m_axi4_wdata, +# output logic m_axi4_wvalid, +# input logic m_axi4_wready, +# output logic [AXI_DATA_WIDTH/8-1:0] m_axi4_wstrb, +# output logic m_axi4_wlast, +# output logic [AXI_USER_WIDTH-1:0] m_axi4_wuser +# ); +# +""" + + localparam BUFFER_WIDTH = AXI_DATA_WIDTH+AXI_USER_WIDTH+AXI_DATA_WIDTH/8+1; + + localparam INPUT_BUFFER_DEPTH = 4; + localparam L1_FIFO_DEPTH = 8; + localparam L2_FIFO_DEPTH = 4; + + logic [AXI_DATA_WIDTH-1:0] axi4_wdata; + logic axi4_wvalid; + logic axi4_wready; + logic [AXI_DATA_WIDTH/8-1:0] axi4_wstrb; + logic axi4_wlast; + logic [AXI_USER_WIDTH-1:0] axi4_wuser; + + logic l1_fifo_valid_out; + logic l1_fifo_ready_in; + logic l1_fifo_valid_in; + logic l1_fifo_ready_out; + + logic l1_req; + logic l1_accept_cur, l1_save_cur, l1_drop_cur; + logic l1_master_cur; + logic [AXI_ID_WIDTH-1:0] l1_id_cur; + logic [7:0] l1_len_cur; + logic l1_hit_cur, l1_prefetch_cur; + logic l1_save_in, l1_save_out; + logic [log2(L1_FIFO_DEPTH)-1:0] n_l1_save_SP; + + logic l2_fifo_valid_out; + logic l2_fifo_ready_in; + logic l2_fifo_valid_in; + logic l2_fifo_ready_out; + + logic l2_req; + logic l2_accept_cur, l2_drop_cur; + logic l2_master_cur; + logic [AXI_ID_WIDTH-1:0] l2_id_cur; + logic [7:0] l2_len_cur; + logic l2_hit_cur, l2_prefetch_cur; + + logic fifo_select, fifo_select_SN, fifo_select_SP; + logic w_done; + logic b_drop_set; + + // HUM buffer signals + logic hum_buf_ready_out; + logic hum_buf_valid_in; + logic hum_buf_ready_in; + logic hum_buf_valid_out; + logic hum_buf_underfull; + + logic [AXI_DATA_WIDTH-1:0] hum_buf_wdata; + logic [AXI_DATA_WIDTH/8-1:0] hum_buf_wstrb; + logic hum_buf_wlast; + logic [AXI_USER_WIDTH-1:0] hum_buf_wuser; + + logic hum_buf_drop_req_SN, hum_buf_drop_req_SP; + logic [7:0] hum_buf_drop_len_SN, hum_buf_drop_len_SP; + logic hum_buf_almost_full; + + logic stop_store; + logic wlast_in, wlast_out; + logic signed [3:0] n_wlast_SN, n_wlast_SP; + logic block_forwarding; + + // Search FSM + typedef enum logic [3:0] {STORE, BYPASS, + WAIT_L1_BYPASS_YES, WAIT_L2_BYPASS_YES, + WAIT_L1_BYPASS_NO, WAIT_L2_BYPASS_NO, + FLUSH, DISCARD, + DISCARD_FINISH} + hum_buf_state_t; + hum_buf_state_t hum_buf_SP; // Present state + hum_buf_state_tbg hum_buf_SN; // Next State + + axi_buffer_rab + #( + .DATA_WIDTH ( BUFFER_WIDTH ), + .BUFFER_DEPTH ( INPUT_BUFFER_DEPTH ) + ) + u_input_buf + ( + .clk ( axi4_aclk ), + .rstn ( axi4_arstn ), + // Push + .data_in ( {s_axi4_wuser, s_axi4_wstrb, s_axi4_wdata, s_axi4_wlast} ), + .valid_in ( s_axi4_wvalid ), + .ready_out ( s_axi4_wready ), + // Pop + .data_out ( {axi4_wuser, axi4_wstrb, axi4_wdata, axi4_wlast} ), + .valid_out ( axi4_wvalid ), + .ready_in ( axi4_wready ) + ); + + axi_buffer_rab + #( + .DATA_WIDTH ( 2+AXI_ID_WIDTH+8+4 ), + .BUFFER_DEPTH ( L1_FIFO_DEPTH ) + ) + u_l1_fifo + ( + .clk ( axi4_aclk ), + .rstn ( axi4_arstn ), + // Push + .data_in ( {l1_prefetch_i, l1_hit_i, l1_id_i, l1_len_i, l1_master_i, l1_accept_i, l1_save_i, l1_drop_i} ), + .valid_in ( l1_fifo_valid_in ), + .ready_out ( l1_fifo_ready_out ), + // Pop + .data_out ( {l1_prefetch_cur, l1_hit_cur, l1_id_cur, l1_len_cur, l1_master_cur, l1_accept_cur, l1_save_cur, l1_drop_cur} ), + .valid_out ( l1_fifo_valid_out ), + .ready_in ( l1_fifo_ready_in ) + ); + + // Push upon receiving new requests from the TLB. + assign l1_req = l1_accept_i | l1_save_i | l1_drop_i; + assign l1_fifo_valid_in = l1_req & l1_fifo_ready_out; + + // Signal handshake + assign l1_done_o = l1_fifo_valid_in; + assign l2_done_o = l2_fifo_valid_in; + + // Stall AW input of L1 TLB + assign input_stall_o = ~(l1_fifo_ready_out & l2_fifo_ready_out); + + // Interface b_drop signals + handshake + always_comb begin + if (fifo_select == 1'b0) begin + prefetch_o = l1_prefetch_cur; + hit_o = l1_hit_cur; + id_o = l1_id_cur; + + l1_fifo_ready_in = w_done | b_done_i; + l2_fifo_ready_in = 1'b0; + end else begin + prefetch_o = l2_prefetch_cur; + hit_o = l2_hit_cur; + id_o = l2_id_cur; + + l1_fifo_ready_in = 1'b0; + l2_fifo_ready_in = w_done | b_done_i; + end + end + + // Detect when an L1 transaction save request enters or exits the L1 FIFO. + assign l1_save_in = l1_fifo_valid_in & l1_save_i; + assign l1_save_out = l1_fifo_ready_in & l1_save_cur; + + // Count the number of L1 transaction to save in the L1 FIFO. + always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin + if (axi4_arstn == 0) begin + n_l1_save_SP <= '0; + end else if (l1_save_in ^ l1_save_out) begin + if (l1_save_in) begin + n_l1_save_SP <= n_l1_save_SP + 1'b1; + end else if (l1_save_out) begin + n_l1_save_SP <= n_l1_save_SP - 1'b1; + end + end + end + + // Stall forwarding of AW L1 hits if: + // 1. The HUM buffer does not allow to be bypassed. + // 2. There are multiple L1 save requests in the FIFO, i.e., multiple L2 outputs pending. + assign output_stall_o = (n_l1_save_SP > 1) || (block_forwarding == 1'b1); + + generate + if (ENABLE_L2TLB == 1) begin : HUM_BUFFER + + axi_buffer_rab_bram + #( + .DATA_WIDTH ( BUFFER_WIDTH ), + .BUFFER_DEPTH ( HUM_BUFFER_DEPTH ) + ) + u_hum_buf + ( + .clk ( axi4_aclk ), + .rstn ( axi4_arstn ), + // Push + .data_in ( {axi4_wuser, axi4_wstrb, axi4_wdata, axi4_wlast} ), + .valid_in ( hum_buf_valid_in ), + .ready_out ( hum_buf_ready_out ), + // Pop + .data_out ( {hum_buf_wuser, hum_buf_wstrb, hum_buf_wdata, hum_buf_wlast} ), + .valid_out ( hum_buf_valid_out ), + .ready_in ( hum_buf_ready_in ), + // Clear + .almost_full ( hum_buf_almost_full ), + .underfull ( hum_buf_underfull ), + .drop_req ( hum_buf_drop_req_SP ), + .drop_len ( hum_buf_drop_len_SP ) + ); + + axi_buffer_rab + #( + .DATA_WIDTH ( 2+AXI_ID_WIDTH+8+3 ), + .BUFFER_DEPTH ( L2_FIFO_DEPTH ) + ) + u_l2_fifo + ( + .clk ( axi4_aclk ), + .rstn ( axi4_arstn ), + // Push + .data_in ( {l2_prefetch_i, l2_hit_i, l2_id_i, l2_len_i, l2_master_i, l2_accept_i, l2_drop_i} ), + .valid_in ( l2_fifo_valid_in ), + .ready_out ( l2_fifo_ready_out ), + // Pop + .data_out ( {l2_prefetch_cur, l2_hit_cur, l2_id_cur, l2_len_cur, l2_master_cur, l2_accept_cur, l2_drop_cur} ), + .valid_out ( l2_fifo_valid_out ), + .ready_in ( l2_fifo_ready_in ) + ); + + // Push upon receiving new result from TLB. + assign l2_req = l2_accept_i | l2_drop_i; + assign l2_fifo_valid_in = l2_req & l2_fifo_ready_out; + + assign wlast_in = axi4_wlast & hum_buf_valid_in & hum_buf_ready_out; + assign wlast_out = hum_buf_wlast & hum_buf_valid_out & hum_buf_ready_in; + + always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin + if (axi4_arstn == 0) begin + fifo_select_SP <= 1'b0; + hum_buf_drop_len_SP <= 'b0; + hum_buf_drop_req_SP <= 1'b0; + hum_buf_SP <= STORE; + n_wlast_SP <= 'b0; + end else begin + fifo_select_SP <= fifo_select_SN; + hum_buf_drop_len_SP <= hum_buf_drop_len_SN; + hum_buf_drop_req_SP <= hum_buf_drop_req_SN; + hum_buf_SP <= hum_buf_SN; + n_wlast_SP <= n_wlast_SN; + end + end + + always_comb begin + n_wlast_SN = n_wlast_SP; + if (hum_buf_drop_req_SP) begin // Happens exactly once per burst to be dropped. + n_wlast_SN -= 1; + end + if (wlast_in) begin + n_wlast_SN += 1; + end + if (wlast_out) begin + n_wlast_SN -= 1; + end + end + + always_comb begin : HUM_BUFFER_FSM + hum_buf_SN = hum_buf_SP; + + m_axi4_wlast = 1'b0; + m_axi4_wdata = 'b0; + m_axi4_wstrb = 'b0; + m_axi4_wuser = 'b0; + + m_axi4_wvalid = 1'b0; + axi4_wready = 1'b0; + + hum_buf_valid_in = 1'b0; + hum_buf_ready_in = 1'b0; + + hum_buf_drop_req_SN = hum_buf_drop_req_SP; + hum_buf_drop_len_SN = hum_buf_drop_len_SP; + master_select_o = 1'b0; + + w_done = 1'b0; // read from FIFO without handshake with B sender + b_drop_o = 1'b0; // send data from FIFO to B sender (with handshake) + fifo_select = 1'b0; + + fifo_select_SN = fifo_select_SP; + stop_store = 1'b0; + + block_forwarding = 1'b0; + + unique case (hum_buf_SP) + + STORE : begin + // Simply store the data in the buffer. + hum_buf_valid_in = axi4_wvalid & hum_buf_ready_out; + axi4_wready = hum_buf_ready_out; + + // We have got a full burst in the HUM buffer, thus stop storing. + if (wlast_in & !hum_buf_underfull | (n_wlast_SP > $signed(0))) begin + hum_buf_SN = WAIT_L1_BYPASS_YES; + + // The buffer is full, thus wait for decision. + end else if (~hum_buf_ready_out) begin + hum_buf_SN = WAIT_L1_BYPASS_NO; + end + + // Avoid the forwarding of L1 hits until we know whether we can bypass. + if (l1_fifo_valid_out & l1_save_cur) begin + block_forwarding = 1'b1; + end + end + + WAIT_L1_BYPASS_YES : begin + // Wait for orders from L1 TLB. + if (l1_fifo_valid_out) begin + + // L1 hit - forward data from buffer + if (l1_accept_cur) begin + m_axi4_wlast = hum_buf_wlast; + m_axi4_wdata = hum_buf_wdata; + m_axi4_wstrb = hum_buf_wstrb; + m_axi4_wuser = hum_buf_wuser; + + m_axi4_wvalid = hum_buf_valid_out; + hum_buf_ready_in = m_axi4_wready; + + master_select_o = l1_master_cur; + + // Detect last data beat. + if (wlast_out) begin + fifo_select = 1'b0; + w_done = 1'b1; + hum_buf_SN = STORE; + end + + // L1 miss - wait for L2 + end else if (l1_save_cur) begin + fifo_select = 1'b0; + w_done = 1'b1; + hum_buf_SN = WAIT_L2_BYPASS_YES; + + // L1 prefetch, prot, multi - drop data + end else if (l1_drop_cur) begin + fifo_select_SN = 1'b0; // L1 + hum_buf_drop_req_SN = 1'b1; + hum_buf_drop_len_SN = l1_len_cur; + hum_buf_SN = FLUSH; + end + end + end + + WAIT_L2_BYPASS_YES : begin + // Wait for orders from L2 TLB. + if (l2_fifo_valid_out) begin + + // L2 hit - forward data from buffer + if (l2_accept_cur) begin + m_axi4_wlast = hum_buf_wlast; + m_axi4_wdata = hum_buf_wdata; + m_axi4_wstrb = hum_buf_wstrb; + m_axi4_wuser = hum_buf_wuser; + + m_axi4_wvalid = hum_buf_valid_out; + hum_buf_ready_in = m_axi4_wready; + + master_select_o = l2_master_cur; + + // Detect last data beat. + if (wlast_out) begin + fifo_select = 1'b1; + w_done = 1'b1; + hum_buf_SN = STORE; + end + + // L2 miss/prefetch hit + end else if (l2_drop_cur) begin + fifo_select_SN = 1'b1; // L2 + hum_buf_drop_req_SN = 1'b1; + hum_buf_drop_len_SN = l2_len_cur; + hum_buf_SN = FLUSH; + end + + // While we wait for orders from L2 TLB, we can still drop and accept L1 transactions. + end else if (l1_fifo_valid_out) begin + + // L1 hit + if (l1_accept_cur) begin + hum_buf_SN = BYPASS; + + // L1 prefetch/prot/multi + end else if (l1_drop_cur) begin + hum_buf_SN = DISCARD; + end + end + end + + FLUSH : begin + // Clear HUM buffer flush request. + hum_buf_drop_req_SN = 1'b0; + + // perform handshake with B sender + fifo_select = fifo_select_SP; + b_drop_o = 1'b1; + if (b_done_i) begin + hum_buf_SN = STORE; + end + end + + BYPASS : begin + // Forward one full transaction from input buffer. + m_axi4_wlast = axi4_wlast; + m_axi4_wdata = axi4_wdata; + m_axi4_wstrb = axi4_wstrb; + m_axi4_wuser = axi4_wuser; + + m_axi4_wvalid = axi4_wvalid; + axi4_wready = m_axi4_wready; + + master_select_o = l1_master_cur; + + // We have got a full transaction. + if (axi4_wlast & axi4_wready & axi4_wvalid) begin + fifo_select = 1'b0; + w_done = 1'b1; + hum_buf_SN = WAIT_L2_BYPASS_YES; + end + end + + DISCARD : begin + // Discard one full transaction from input buffer. + axi4_wready = 1'b1; + + // We have got a full transaction. + if (axi4_wlast & axi4_wready & axi4_wvalid) begin + // Try to perform handshake with B sender. + fifo_select = 1'b0; + b_drop_o = 1'b1; + // We cannot wait here due to axi4_wready. + if (b_done_i) begin + hum_buf_SN = WAIT_L2_BYPASS_YES; + end else begin + hum_buf_SN = DISCARD_FINISH; + end + end + end + + DISCARD_FINISH : begin + // Perform handshake with B sender. + fifo_select = 1'b0; + b_drop_o = 1'b1; + if (b_done_i) begin + hum_buf_SN = WAIT_L2_BYPASS_YES; + end + end + + WAIT_L1_BYPASS_NO : begin + // Do not allow the forwarding of L1 hits. + block_forwarding = 1'b1; + + // Wait for orders from L1 TLB. + if (l1_fifo_valid_out) begin + + // L1 hit - forward data from/through HUM buffer and refill the buffer + if (l1_accept_cur) begin + // Forward data from HUM buffer. + m_axi4_wlast = hum_buf_wlast; + m_axi4_wdata = hum_buf_wdata; + m_axi4_wstrb = hum_buf_wstrb; + m_axi4_wuser = hum_buf_wuser; + + m_axi4_wvalid = hum_buf_valid_out; + hum_buf_ready_in = m_axi4_wready; + + master_select_o = l1_master_cur; + + // Refill the HUM buffer. Stop when buffer full. + stop_store = ~hum_buf_ready_out; + hum_buf_valid_in = stop_store ? 1'b0 : axi4_wvalid ; + axi4_wready = stop_store ? 1'b0 : hum_buf_ready_out; + + // Detect last data beat. + if (wlast_out) begin + fifo_select = 1'b0; + w_done = 1'b1; + if (~hum_buf_ready_out | hum_buf_almost_full) begin + hum_buf_SN = WAIT_L1_BYPASS_NO; + end else begin + hum_buf_SN = STORE; + end + end + + // Allow the forwarding of L1 hits. + block_forwarding = 1'b0; + + // L1 miss - wait for L2 + end else if (l1_save_cur) begin + fifo_select = 1'b0; + w_done = 1'b1; + hum_buf_SN = WAIT_L2_BYPASS_NO; + + // L1 prefetch, prot, multi - drop data + end else if (l1_drop_cur) begin + fifo_select_SN = 1'b0; // L1 + hum_buf_drop_req_SN = 1'b1; + hum_buf_drop_len_SN = l1_len_cur; + hum_buf_SN = FLUSH; + + // Allow the forwarding of L1 hits. + block_forwarding = 1'b0; + end + end + end + + WAIT_L2_BYPASS_NO : begin + // Do not allow the forwarding of L1 hits. + block_forwarding = 1'b1; + + // Wait for orders from L2 TLB. + if (l2_fifo_valid_out) begin + + // L2 hit - forward first part from HUM buffer, rest from input buffer + if (l2_accept_cur) begin + // Forward data from HUM buffer. + m_axi4_wlast = hum_buf_wlast; + m_axi4_wdata = hum_buf_wdata; + m_axi4_wstrb = hum_buf_wstrb; + m_axi4_wuser = hum_buf_wuser; + + m_axi4_wvalid = hum_buf_valid_out; + hum_buf_ready_in = m_axi4_wready; + + master_select_o = l2_master_cur; + + // Refill the HUM buffer. Stop when buffer full. + stop_store = ~hum_buf_ready_out; + hum_buf_valid_in = stop_store ? 1'b0 : axi4_wvalid ; + axi4_wready = stop_store ? 1'b0 : hum_buf_ready_out; + + // Detect last data beat. + if (wlast_out) begin + fifo_select = 1'b1; + w_done = 1'b1; + if (~hum_buf_ready_out | hum_buf_almost_full) begin + hum_buf_SN = WAIT_L1_BYPASS_NO; + end else begin + hum_buf_SN = STORE; + end + end + + // Allow the forwarding of L1 hits. + block_forwarding = 1'b0; + + // L2 miss/prefetch hit - drop data + end else if (l2_drop_cur) begin + fifo_select_SN = 1'b1; // L2 + hum_buf_drop_req_SN = 1'b1; + hum_buf_drop_len_SN = l2_len_cur; + hum_buf_SN = FLUSH; + + // Allow the forwarding of L1 hits. + block_forwarding = 1'b0; + end + end + end + + + default: begin + hum_buf_SN = STORE; + end + + endcase // hum_buf_SP + end // HUM_BUFFER_FSM + + assign b_drop_set = 1'b0; + + end else begin // HUM_BUFFER + + // register to perform the handshake with B sender + always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin + if (axi4_arstn == 0) begin + b_drop_o <= 1'b0; + end else if (b_done_i) begin + b_drop_o <= 1'b0; + end else if (b_drop_set) begin + b_drop_o <= 1'b1;; + end + end + + always_comb begin : OUTPUT_CTRL + + fifo_select = 1'b0; + w_done = 1'b0; + b_drop_set = 1'b0; + + m_axi4_wlast = 1'b0; + m_axi4_wdata = 'b0; + m_axi4_wstrb = 'b0; + m_axi4_wuser = 'b0; + + m_axi4_wvalid = 1'b0; + axi4_wready = 1'b0; + + if (l1_fifo_valid_out) begin + // forward data + if (l1_accept_cur) begin + m_axi4_wlast = axi4_wlast; + m_axi4_wdata = axi4_wdata; + m_axi4_wstrb = axi4_wstrb; + m_axi4_wuser = axi4_wuser; + + m_axi4_wvalid = axi4_wvalid; + axi4_wready = m_axi4_wready; + + // Simply pop from FIFO upon last data beat. + w_done = axi4_wlast & axi4_wvalid & axi4_wready; + + // discard entire burst + end else if (b_drop_o == 1'b0) begin + axi4_wready = 1'b1; + + // Simply pop from FIFO upon last data beat. Perform handshake with B sender. + if (axi4_wlast & axi4_wvalid & axi4_wready) + b_drop_set = 1'b1; + end + end + + end // OUTPUT_CTRL + + assign master_select_o = l1_master_cur; + assign l2_fifo_ready_out = 1'b1; + assign block_forwarding = 1'b0; + + // unused signals + assign hum_buf_ready_out = 1'b0; + assign hum_buf_valid_in = 1'b0; + assign hum_buf_ready_in = 1'b0; + assign hum_buf_valid_out = 1'b0; + assign hum_buf_wdata = 'b0; + assign hum_buf_wstrb = 'b0; + assign hum_buf_wlast = 1'b0; + assign hum_buf_wuser = 'b0; + assign hum_buf_drop_len_SN = 'b0; + assign hum_buf_drop_req_SN = 1'b0; + assign hum_buf_almost_full = 1'b0; + + assign l2_fifo_valid_in = 1'b0; + assign l2_fifo_valid_out = 1'b0; + assign l2_prefetch_cur = 1'b0; + assign l2_hit_cur = 1'b0; + assign l2_id_cur = 'b0; + assign l2_len_cur = 'b0; + assign l2_master_cur = 1'b0; + assign l2_accept_cur = 1'b0; + assign l2_drop_cur = 1'b0; + + assign l2_req = 1'b0; + + assign fifo_select_SN = 1'b0; + assign fifo_select_SP = 1'b0; + + assign stop_store = 1'b0; + assign n_wlast_SP = 'b0; + assign wlast_in = 1'b0; + assign wlast_out = 1'b0; + + end // HUM_BUFFER + + endgenerate +""" diff --git a/src/soc/unused/iommu/axi_rab/axi4_w_sender.py b/src/soc/unused/iommu/axi_rab/axi4_w_sender.py new file mode 100644 index 00000000..9916334f --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/axi4_w_sender.py @@ -0,0 +1,78 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi4_w_sender(Elaboratable): + + def __init__(self): + self.axi4_aclk = Signal() # input + self.axi4_arstn = Signal() # input + self.s_axi4_wdata = Signal() # input + self.s_axi4_wvalid = Signal() # input + self.s_axi4_wready = Signal() # output + self.s_axi4_wstrb = Signal() # input + self.s_axi4_wlast = Signal() # input + self.s_axi4_wuser = Signal() # input + self.m_axi4_wdata = Signal() # output + self.m_axi4_wvalid = Signal() # output + self.m_axi4_wready = Signal() # input + self.m_axi4_wstrb = Signal() # output + self.m_axi4_wlast = Signal() # output + self.m_axi4_wuser = Signal() # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.m_axi4_wdata.eq(self.s_axi4_wdata) + m.d.comb += self.m_axi4_wstrb.eq(self.s_axi4_wstrb) + m.d.comb += self.m_axi4_wlast.eq(self.s_axi4_wlast) + m.d.comb += self.m_axi4_wuser.eq(self.s_axi4_wuser) + m.d.comb += self.m_axi4_wvalid.eq(self.s_axi4_wvalid) + m.d.comb += self.s_axi4_wready.eq(self.m_axi4_wready) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# module axi4_w_sender +# #( +# parameter AXI_DATA_WIDTH = 32, +# parameter AXI_USER_WIDTH = 2 +# ) +# ( +# input axi4_aclk, +# input axi4_arstn, +# +# input [AXI_DATA_WIDTH-1:0] s_axi4_wdata, +# input s_axi4_wvalid, +# output s_axi4_wready, +# input [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb, +# input s_axi4_wlast, +# input [AXI_USER_WIDTH-1:0] s_axi4_wuser, +# +# output [AXI_DATA_WIDTH-1:0] m_axi4_wdata, +# output m_axi4_wvalid, +# input m_axi4_wready, +# output [AXI_DATA_WIDTH/8-1:0] m_axi4_wstrb, +# output m_axi4_wlast, +# output [AXI_USER_WIDTH-1:0] m_axi4_wuser +# ); +# +# assign m_axi4_wdata = s_axi4_wdata; +# assign m_axi4_wstrb = s_axi4_wstrb; +# assign m_axi4_wlast = s_axi4_wlast; +# assign m_axi4_wuser = s_axi4_wuser; +# +# assign m_axi4_wvalid = s_axi4_wvalid; +# assign s_axi4_wready = m_axi4_wready; +# +# endmodule +# +# diff --git a/src/soc/unused/iommu/axi_rab/axi_buffer_rab.py b/src/soc/unused/iommu/axi_rab/axi_buffer_rab.py new file mode 100644 index 00000000..b4d99299 --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/axi_buffer_rab.py @@ -0,0 +1,151 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi_buffer_rab(Elaboratable): + + def __init__(self): + self.clk = Signal() # input + self.rstn = Signal() # input + self.data_out = Signal(DATA_WIDTH) # output + self.valid_out = Signal() # output + self.ready_in = Signal() # input + self.valid_in = Signal() # input + self.data_in = Signal(DATA_WIDTH) # input + self.ready_out = Signal() # output + + def elaborate(self, platform=None): + m = Module() + m.d.comb += self.full.eq(self.None) + m.d.comb += self.data_out.eq(self.None) + m.d.comb += self.valid_out.eq(self.None) + m.d.comb += self.ready_out.eq(self.None) + return m + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# //import CfMath::log2; +# +# module axi_buffer_rab +# //#( +# // parameter DATA_WIDTH, +# // parameter BUFFER_DEPTH +# //) +# ( +# input logic clk, +# input logic rstn, +# +# // Downstream port +# output logic [DATA_WIDTH-1:0] data_out, +# output logic valid_out, +# input logic ready_in, +# +# // Upstream port +# input logic valid_in, +# input logic [DATA_WIDTH-1:0] data_in, +# output logic ready_out +# ); +# +# localparam integer LOG_BUFFER_DEPTH = log2(BUFFER_DEPTH); +# +# // Internal data structures +# reg [LOG_BUFFER_DEPTH - 1 : 0] pointer_in; // location to which we last wrote +# reg [LOG_BUFFER_DEPTH - 1 : 0] pointer_out; // location from which we last sent +# reg [LOG_BUFFER_DEPTH : 0] elements; // number of elements in the buffer +# reg [DATA_WIDTH - 1 : 0] buffer [BUFFER_DEPTH - 1 : 0]; +# +# wire full; +# +# integer loop1; +# +# assign full = (elements == BUFFER_DEPTH); +# +# always @(posedge clk or negedge rstn) +# begin: elements_sequential +# if (rstn == 1'b0) +# elements <= 0; +# else +# begin +# // ------------------ +# // Are we filling up? +# // ------------------ +# // One out, none in +# if (ready_in && valid_out && (!valid_in || full)) +# elements <= elements - 1; +# // None out, one in +# else if ((!valid_out || !ready_in) && valid_in && !full) +# elements <= elements + 1; +# // Else, either one out and one in, or none out and none in - stays unchanged +# end +# end +# +# always @(posedge clk or negedge rstn) +# begin: buffers_sequential +# if (rstn == 1'b0) +# begin +# for (loop1 = 0 ; loop1 < BUFFER_DEPTH ; loop1 = loop1 + 1) +# buffer[loop1] <= 0; +# end +# else +# begin +# // Update the memory +# if (valid_in && !full) +# buffer[pointer_in] <= data_in; +# end +# end +# +# always @(posedge clk or negedge rstn) +# begin: sequential +# if (rstn == 1'b0) +# begin +# pointer_out <= 0; +# pointer_in <= 0; +# end +# else +# begin +# // ------------------------------------ +# // Check what to do with the input side +# // ------------------------------------ +# // We have some input, increase by 1 the input pointer +# if (valid_in && !full) +# begin +# if (pointer_in == $unsigned(BUFFER_DEPTH - 1)) +# pointer_in <= 0; +# else +# pointer_in <= pointer_in + 1; +# end +# // Else we don't have any input, the input pointer stays the same +# +# // ------------------------------------- +# // Check what to do with the output side +# // ------------------------------------- +# // We had pushed one flit out, we can try to go for the next one +# if (ready_in && valid_out) +# begin +# if (pointer_out == $unsigned(BUFFER_DEPTH - 1)) +# pointer_out <= 0; +# else +# pointer_out <= pointer_out + 1; +# end +# // Else stay on the same output location +# end +# end +# +# // Update output ports +# assign data_out = buffer[pointer_out]; +# assign valid_out = (elements != 0); +# +# assign ready_out = ~full; +# +# endmodule +# +# diff --git a/src/soc/unused/iommu/axi_rab/axi_buffer_rab_bram.py b/src/soc/unused/iommu/axi_rab/axi_buffer_rab_bram.py new file mode 100644 index 00000000..349b314e --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/axi_buffer_rab_bram.py @@ -0,0 +1,209 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi_buffer_rab_bram(Elaboratable): + + def __init__(self): + self.clk = Signal() # input + self.rstn = Signal() # input + self.data_out = Signal(DATA_WIDTH) # output + self.valid_out = Signal() # output + self.ready_in = Signal() # input + self.valid_in = Signal() # input + self.data_in = Signal(DATA_WIDTH) # input + self.ready_out = Signal() # output + self.almost_full = Signal() # output + self.underfull = Signal() # output + self.drop_req = Signal() # input + self.drop_len = Signal(8) # input + + def elaborate(self, platform=None): + m = Module() + return m + + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# ////import CfMath::log2; +# +# module axi_buffer_rab_bram +# //#( +# // parameter DATA_WIDTH, +# // parameter BUFFER_DEPTH +# // ) +# ( +# input logic clk, +# input logic rstn, +# +# // Downstream port +# output logic [DATA_WIDTH-1:0] data_out, +# output logic valid_out, +# input logic ready_in, +# +# // Upstream port +# input logic valid_in, +# input logic [DATA_WIDTH-1:0] data_in, +# output logic ready_out, +# +# // Status and drop control +# output logic almost_full, +# output logic underfull, +# input logic drop_req, +# // Number of items to drop. As for AXI lengths, counting starts at zero, i.e., `drop_len == 0` +# // and `drop_req` means drop one item. +# input logic [7:0] drop_len +# ); +# +""" #docstring_begin + // The BRAM needs to be in "write-first" mode for first-word fall-through FIFO behavior. + // To still push and pop simultaneously if the buffer is full, we internally increase the + // buffer depth by 1. + localparam ACT_BUFFER_DEPTH = BUFFER_DEPTH+1; + localparam ACT_LOG_BUFFER_DEPTH = log2(ACT_BUFFER_DEPTH+1); + + /** + * Internal data structures + */ + // Location to which we last wrote + logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_in_d, ptr_in_q; + // Location from which we last sent + logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_out_d, ptr_out_q; + // Required for fall-through behavior on the first word + logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_out_bram; + // Number of elements in the buffer. Can be negative if elements that have been dropped have not + // yet been written. + logic signed [ACT_LOG_BUFFER_DEPTH:0] n_elems_d, n_elems_q; + + logic [DATA_WIDTH-1:0] data_out_bram, data_out_q; + logic valid_out_q; + + logic full; + + assign almost_full = (n_elems_q == BUFFER_DEPTH-1); + assign full = (n_elems_q == BUFFER_DEPTH); + + always_ff @(posedge clk, negedge rstn) begin + if (~rstn) begin + n_elems_q <= '0; + ptr_in_q <= '0; + ptr_out_q <= '0; + end else begin + n_elems_q <= n_elems_d; + ptr_in_q <= ptr_in_d; + ptr_out_q <= ptr_out_d; + end + end + + // Update the number of elements. + always_comb begin + n_elems_d = n_elems_q; + if (drop_req) begin + n_elems_d -= (drop_len + 1); + end + if (valid_in && ready_out) begin + n_elems_d += 1; + end + if (valid_out && ready_in) begin + n_elems_d -= 1; + end + end + + // Update the output pointer. + always_comb begin + ptr_out_d = ptr_out_q; + if (drop_req) begin + if ((ptr_out_q + drop_len + 1) > (ACT_BUFFER_DEPTH - 1)) begin + ptr_out_d = drop_len + 1 - (ACT_BUFFER_DEPTH - ptr_out_q); + end else begin + ptr_out_d += (drop_len + 1); + end + end + if (valid_out && ready_in) begin + if (ptr_out_d == (ACT_BUFFER_DEPTH - 1)) begin + ptr_out_d = '0; + end else begin + ptr_out_d += 1; + end + end + end + + // The BRAM has a read latency of one cycle, so apply the new address one cycle earlier for + // first-word fall-through FIFO behavior. + //assign ptr_out_bram = (ptr_out_q == (ACT_BUFFER_DEPTH-1)) ? '0 : (ptr_out_q + 1); + assign ptr_out_bram = ptr_out_d; + + // Update the input pointer. + always_comb begin + ptr_in_d = ptr_in_q; + if (valid_in && ready_out) begin + if (ptr_in_d == (ACT_BUFFER_DEPTH - 1)) begin + ptr_in_d = '0; + end else begin + ptr_in_d += 1; + end + end + end + + // Update output ports. + assign valid_out = (n_elems_q > $signed(0)); + assign underfull = (n_elems_q < $signed(0)); + assign ready_out = ~full; + + ram_tp_write_first #( + .ADDR_WIDTH ( ACT_LOG_BUFFER_DEPTH ), + .DATA_WIDTH ( DATA_WIDTH ) + ) + ram_tp_write_first_0 + ( + .clk ( clk ), + .we ( valid_in & ~full ), + .addr0 ( ptr_in_q ), + .addr1 ( ptr_out_bram ), + .d_i ( data_in ), + .d0_o ( ), + .d1_o ( data_out_bram ) + ); + + // When reading from/writing two the same address on both ports ("Write-Read Collision"), + // the data on the read port is invalid (during the write cycle). In this implementation, + // this can happen only when the buffer is empty. Thus, we forward the data from an + // register in this case. + always @(posedge clk) begin + if (rstn == 1'b0) begin + data_out_q <= 'b0; + end else if ( (ptr_out_bram == ptr_in_q) && (valid_in && !full) ) begin + data_out_q <= data_in; + end + end + + always @(posedge clk) begin + if (rstn == 1'b0) begin + valid_out_q <= 'b0; + end else begin + valid_out_q <= valid_out; + end + end + + // Drive output data + always_comb begin + if (valid_out && !valid_out_q) begin // We have just written to an empty FIFO + data_out = data_out_q; + end else begin + data_out = data_out_bram; + end + end + +""" +# endmodule +# +# diff --git a/src/soc/unused/iommu/axi_rab/axi_rab_cfg.py b/src/soc/unused/iommu/axi_rab/axi_rab_cfg.py new file mode 100644 index 00000000..43843b95 --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/axi_rab_cfg.py @@ -0,0 +1,707 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class axi_rab_cfg(Elaboratable): + + def __init__(self): + self.Clk_CI = Signal() # input + self.Rst_RBI = Signal() # input + self.s_axi_awaddr = Signal(AXI_ADDR_WIDTH) # input + self.s_axi_awvalid = Signal() # input + self.s_axi_awready = Signal() # output + self.s_axi_wdata = Signal() # input + self.s_axi_wstrb = Signal(1+ERROR p_expression_25) # input + self.s_axi_wvalid = Signal() # input + self.s_axi_wready = Signal() # output + self.s_axi_bresp = Signal(2) # output + self.s_axi_bvalid = Signal() # output + self.s_axi_bready = Signal() # input + self.s_axi_araddr = Signal(AXI_ADDR_WIDTH) # input + self.s_axi_arvalid = Signal() # input + self.s_axi_arready = Signal() # output + self.s_axi_rdata = Signal(AXI_DATA_WIDTH) # output + self.s_axi_rresp = Signal(2) # output + self.s_axi_rvalid = Signal() # output + self.s_axi_rready = Signal() # input + self.L1Cfg_DO = Signal() # output + self.L1AllowMultiHit_SO = Signal() # output + self.MissAddr_DI = Signal(ADDR_WIDTH_VIRT) # input + self.MissMeta_DI = Signal(MISS_META_WIDTH) # input + self.Miss_SI = Signal() # input + self.MhFifoFull_SO = Signal() # output + self.wdata_l2 = Signal() # output + self.waddr_l2 = Signal() # output + self.wren_l2 = Signal(N_PORTS) # output + + def elaborate(self, platform=None): + m = Module() + return m + + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# // --=========================================================================-- +# // +# // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ██████╗███████╗ ██████╗ +# // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔════╝██╔════╝ +# // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ █████╗ ██║ ███╗ +# // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██╔══╝ ██║ ██║ +# // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ╚██████╗██║ ╚██████╔╝ +# // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝╚═╝ ╚═════╝ +# // +# // +# // Author: Pirmin Vogel - vogelpi@iis.ee.ethz.ch +# // +# // Purpose : AXI4-Lite configuration and miss handling interface for RAB +# // +# // --=========================================================================-- +# +# //import CfMath::log2; +# +# module axi_rab_cfg +# #( +# parameter N_PORTS = 3, +# parameter N_REGS = 196, +# parameter N_L2_SETS = 32, +# parameter N_L2_SET_ENTRIES= 32, +# parameter ADDR_WIDTH_PHYS = 40, +# parameter ADDR_WIDTH_VIRT = 32, +# parameter N_FLAGS = 4, +# parameter AXI_DATA_WIDTH = 64, +# parameter AXI_ADDR_WIDTH = 32, +# parameter MISS_META_WIDTH = 10, // <= FIFO_WIDTH +# parameter MH_FIFO_DEPTH = 16 +# ) +# ( +# input logic Clk_CI, +# input logic Rst_RBI, +# +# // AXI Lite interface +# input logic [AXI_ADDR_WIDTH-1:0] s_axi_awaddr, +# input logic s_axi_awvalid, +# output logic s_axi_awready, +# input logic [AXI_DATA_WIDTH/8-1:0][7:0] s_axi_wdata, +# input logic [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb, +# input logic s_axi_wvalid, +# output logic s_axi_wready, +# output logic [1:0] s_axi_bresp, +# output logic s_axi_bvalid, +# input logic s_axi_bready, +# input logic [AXI_ADDR_WIDTH-1:0] s_axi_araddr, +# input logic s_axi_arvalid, +# output logic s_axi_arready, +# output logic [AXI_DATA_WIDTH-1:0] s_axi_rdata, +# output logic [1:0] s_axi_rresp, +# output logic s_axi_rvalid, +# input logic s_axi_rready, +# +# // Slice configuration +# output logic [N_REGS-1:0][63:0] L1Cfg_DO, +# output logic L1AllowMultiHit_SO, +# +# // Miss handling +# input logic [ADDR_WIDTH_VIRT-1:0] MissAddr_DI, +# input logic [MISS_META_WIDTH-1:0] MissMeta_DI, +# input logic Miss_SI, +# output logic MhFifoFull_SO, +# +# // L2 TLB +# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] wdata_l2, +# output logic [N_PORTS-1:0] [AXI_ADDR_WIDTH-1:0] waddr_l2, +# output logic [N_PORTS-1:0] wren_l2 +# ); +# +""" #docstring_begin + + localparam ADDR_LSB = log2(64/8); // 64 even if the AXI Lite interface is 32, + // because RAB slices are 64 bit wide. + localparam ADDR_MSB = log2(N_REGS)+ADDR_LSB-1; + + localparam L2SINGLE_AMAP_SIZE = 16'h4000; // Maximum 2048 TLB entries in L2 + + localparam integer N_L2_ENTRIES = N_L2_SETS * N_L2_SET_ENTRIES; + + localparam logic [AXI_ADDR_WIDTH-1:0] L2_VA_MAX_ADDR = (N_L2_ENTRIES-1) << 2; + + logic [AXI_DATA_WIDTH/8-1:0][7:0] L1Cfg_DP[N_REGS]; // [Byte][Bit] + genvar j; + + // █████╗ ██╗ ██╗██╗██╗ ██╗ ██╗ ██╗████████╗███████╗ + // ██╔══██╗╚██╗██╔╝██║██║ ██║ ██║ ██║╚══██╔══╝██╔════╝ + // ███████║ ╚███╔╝ ██║███████║█████╗██║ ██║ ██║ █████╗ + // ██╔══██║ ██╔██╗ ██║╚════██║╚════╝██║ ██║ ██║ ██╔══╝ + // ██║ ██║██╔╝ ██╗██║ ██║ ███████╗██║ ██║ ███████╗ + // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚═╝ ╚══════╝ + // + logic [AXI_ADDR_WIDTH-1:0] awaddr_reg; + logic awaddr_done_rise; + logic awaddr_done_reg; + logic awaddr_done_reg_dly; + + logic [AXI_DATA_WIDTH/8-1:0][7:0] wdata_reg; + logic [AXI_DATA_WIDTH/8-1:0] wstrb_reg; + logic wdata_done_rise; + logic wdata_done_reg; + logic wdata_done_reg_dly; + + logic wresp_done_reg; + logic wresp_running_reg; + + logic [AXI_ADDR_WIDTH-1:0] araddr_reg; + logic araddr_done_reg; + + logic [AXI_DATA_WIDTH-1:0] rdata_reg; + logic rresp_done_reg; + logic rresp_running_reg; + + logic awready; + logic wready; + logic bvalid; + + logic arready; + logic rvalid; + + logic wren; + logic wren_l1; + + assign wren = ( wdata_done_rise & awaddr_done_reg ) | ( awaddr_done_rise & wdata_done_reg ); + assign wdata_done_rise = wdata_done_reg & ~wdata_done_reg_dly; + assign awaddr_done_rise = awaddr_done_reg & ~awaddr_done_reg_dly; + + // reg_dly + always @(posedge Clk_CI or negedge Rst_RBI) + begin + if (!Rst_RBI) + begin + wdata_done_reg_dly <= 1'b0; + awaddr_done_reg_dly <= 1'b0; + end + else + begin + wdata_done_reg_dly <= wdata_done_reg; + awaddr_done_reg_dly <= awaddr_done_reg; + end + end + + // AW Channel + always @(posedge Clk_CI or negedge Rst_RBI) + begin + if (!Rst_RBI) + begin + awaddr_done_reg <= 1'b0; + awaddr_reg <= '0; + awready <= 1'b1; + end + else + begin + if (awready && s_axi_awvalid) + begin + awready <= 1'b0; + awaddr_done_reg <= 1'b1; + awaddr_reg <= s_axi_awaddr; + end + else if (awaddr_done_reg && wresp_done_reg) + begin + awready <= 1'b1; + awaddr_done_reg <= 1'b0; + end + end + end + + // W Channel + always @(posedge Clk_CI or negedge Rst_RBI) + begin + if (!Rst_RBI) + begin + wdata_done_reg <= 1'b0; + wready <= 1'b1; + wdata_reg <= '0; + wstrb_reg <= '0; + end + else + begin + if (wready && s_axi_wvalid) + begin + wready <= 1'b0; + wdata_done_reg <= 1'b1; + wdata_reg <= s_axi_wdata; + wstrb_reg <= s_axi_wstrb; + end + else if (wdata_done_reg && wresp_done_reg) + begin + wready <= 1'b1; + wdata_done_reg <= 1'b0; + end + end + end + + // B Channel + always @(posedge Clk_CI or negedge Rst_RBI) + begin + if (!Rst_RBI) + begin + bvalid <= 1'b0; + wresp_done_reg <= 1'b0; + wresp_running_reg <= 1'b0; + end + else + begin + if (awaddr_done_reg && wdata_done_reg && !wresp_done_reg) + begin + if (!wresp_running_reg) + begin + bvalid <= 1'b1; + wresp_running_reg <= 1'b1; + end + else if (s_axi_bready) + begin + bvalid <= 1'b0; + wresp_done_reg <= 1'b1; + wresp_running_reg <= 1'b0; + end + end + else + begin + bvalid <= 1'b0; + wresp_done_reg <= 1'b0; + wresp_running_reg <= 1'b0; + end + end + end + + // AR Channel + always @(posedge Clk_CI or negedge Rst_RBI) + begin + if (!Rst_RBI) + begin + araddr_done_reg <= 1'b0; + arready <= 1'b1; + araddr_reg <= '0; + end + else + begin + if (arready && s_axi_arvalid) + begin + arready <= 1'b0; + araddr_done_reg <= 1'b1; + araddr_reg <= s_axi_araddr; + end + else if (araddr_done_reg && rresp_done_reg) + begin + arready <= 1'b1; + araddr_done_reg <= 1'b0; + end + end + end + + // R Channel + always @(posedge Clk_CI or negedge Rst_RBI) + begin + if (!Rst_RBI) + begin + rresp_done_reg <= 1'b0; + rvalid <= 1'b0; + rresp_running_reg <= 1'b0; + end + else + begin + if (araddr_done_reg && !rresp_done_reg) + begin + if (!rresp_running_reg) + begin + rvalid <= 1'b1; + rresp_running_reg <= 1'b1; + end + else if (s_axi_rready) + begin + rvalid <= 1'b0; + rresp_done_reg <= 1'b1; + rresp_running_reg <= 1'b0; + end + end + else + begin + rvalid <= 1'b0; + rresp_done_reg <= 1'b0; + rresp_running_reg <= 1'b0; + end + end + end + + // ██╗ ██╗ ██████╗███████╗ ██████╗ ██████╗ ███████╗ ██████╗ + // ██║ ███║ ██╔════╝██╔════╝██╔════╝ ██╔══██╗██╔════╝██╔════╝ + // ██║ ╚██║ ██║ █████╗ ██║ ███╗ ██████╔╝█████╗ ██║ ███╗ + // ██║ ██║ ██║ ██╔══╝ ██║ ██║ ██╔══██╗██╔══╝ ██║ ██║ + // ███████╗██║ ╚██████╗██║ ╚██████╔╝ ██║ ██║███████╗╚██████╔╝ + // ╚══════╝╚═╝ ╚═════╝╚═╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝ ╚═════╝ + // + assign wren_l1 = wren && (awaddr_reg < L2SINGLE_AMAP_SIZE); + + always @( posedge Clk_CI or negedge Rst_RBI ) + begin + var integer idx_reg, idx_byte; + if ( Rst_RBI == 1'b0 ) + begin + for ( idx_reg = 0; idx_reg < N_REGS; idx_reg++ ) + L1Cfg_DP[idx_reg] <= '0; + end + else if ( wren_l1 ) + begin + if ( awaddr_reg[ADDR_LSB+1] == 1'b0 ) begin // VIRT_ADDR + for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin + if ( (idx_byte < ADDR_WIDTH_VIRT/8) ) begin + if ( wstrb_reg[idx_byte] ) begin + L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte]; + end + end + else begin // Let synthesizer optimize away unused registers. + L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0; + end + end + end + else if ( awaddr_reg[ADDR_LSB+1:ADDR_LSB] == 2'b10 ) begin // PHYS_ADDR + for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin + if ( (idx_byte < ADDR_WIDTH_PHYS/8) ) begin + if ( wstrb_reg[idx_byte] ) begin + L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte]; + end + end + else begin // Let synthesizer optimize away unused registers. + L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0; + end + end + end + else begin // ( awaddr_reg[ADDR_LSB+1:ADDR_LSB] == 2'b11 ) // FLAGS + for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin + if ( (idx_byte < 1) ) begin + if ( wstrb_reg[idx_byte] ) begin + L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte] & { {{8-N_FLAGS}{1'b0}}, {{N_FLAGS}{1'b1}} }; + end + end + else begin // Let synthesizer optimize away unused registers. + L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0; + end + end + end + end + end // always @ ( posedge Clk_CI or negedge Rst_RBI ) + + generate + // Mask unused bits -> Synthesizer should optimize away unused registers + for( j=0; j= (j+1)*L2SINGLE_AMAP_SIZE) && (awaddr_reg[log2(L2SINGLE_AMAP_SIZE)-1:0] <= L2_VA_MAX_ADDR); + assign upper_word_is_written[j] = (wstrb_reg[7:4] != 4'b0000); + assign lower_word_is_written[j] = (wstrb_reg[3:0] != 4'b0000); + end else begin + assign l2_addr_is_in_va_rams[j] = 1'b0; + assign upper_word_is_written[j] = 1'b0; + assign lower_word_is_written[j] = 1'b0; + end + + always @( posedge Clk_CI or negedge Rst_RBI ) begin + var integer idx_byte, off_byte; + if ( Rst_RBI == 1'b0 ) + begin + wren_l2[j] <= 1'b0; + wdata_l2[j] <= '0; + end + else if (wren) + begin + if ( (awaddr_reg >= (j+1)*L2SINGLE_AMAP_SIZE) && (awaddr_reg < (j+2)*L2SINGLE_AMAP_SIZE) && (|wstrb_reg) ) + wren_l2[j] <= 1'b1; + if (AXI_DATA_WIDTH == 32) begin + for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) + wdata_l2[j][idx_byte*8 +: 8] <= wdata_reg[idx_byte] & {8{wstrb_reg[idx_byte]}}; + end + else if (AXI_DATA_WIDTH == 64) begin + if (lower_word_is_written[j] == 1'b1) + off_byte = 0; + else + off_byte = 4; + // always put the payload in the lower word and set upper word to 0 + for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8/2; idx_byte++ ) + wdata_l2[j][idx_byte*8 +: 8] <= wdata_reg[idx_byte+off_byte] & {8{wstrb_reg[idx_byte+off_byte]}}; + wdata_l2[j][AXI_DATA_WIDTH-1:AXI_DATA_WIDTH/2] <= 'b0; + end + // pragma translate_off + else + $fatal(1, "Unsupported AXI_DATA_WIDTH!"); + // pragma translate_on + end + else + wren_l2[j] <= '0; + end // always @ ( posedge Clk_CI or negedge Rst_RBI ) + + // Properly align the 32-bit word address when writing from 64-bit interface: + // Depending on the system, the incoming address is (non-)aligned to the 64-bit + // word when writing the upper 32-bit word. + always_comb begin + waddr_l2[j] = (awaddr_reg -(j+1)*L2SINGLE_AMAP_SIZE)/4; + if (wren_l2[j]) begin + if (AXI_DATA_WIDTH == 64) begin + if (upper_word_is_written[j] == 1'b1) begin + // address must be non-aligned + waddr_l2[j][0] = 1'b1; + end + end + // pragma translate_off + else if (AXI_DATA_WIDTH != 32) begin + $fatal(1, "Unsupported AXI_DATA_WIDTH!"); + end + // pragma translate_on + end + end + + // Assert that only one 32-bit word is ever written at a time to VA RAMs on 64-bit data + // systems. + // pragma translate_off + always_ff @ (posedge Clk_CI) begin + if (AXI_DATA_WIDTH == 64) begin + if (l2_addr_is_in_va_rams[j]) begin + if (upper_word_is_written[j]) begin + assert (!lower_word_is_written[j]) + else $error("Unsupported write across two 32-bit words to VA RAMs!"); + end + else if (lower_word_is_written[j]) begin + assert (!upper_word_is_written[j]) + else $error("Unsupported write across two 32-bit words to VA RAMs!"); + end + end + end + end + // pragma translate_on + + end // for (j=0; j< N_PORTS; j++) + endgenerate + + // ███╗ ███╗██╗ ██╗ ███████╗██╗███████╗ ██████╗ ███████╗ + // ████╗ ████║██║ ██║ ██╔════╝██║██╔════╝██╔═══██╗██╔════╝ + // ██╔████╔██║███████║ █████╗ ██║█████╗ ██║ ██║███████╗ + // ██║╚██╔╝██║██╔══██║ ██╔══╝ ██║██╔══╝ ██║ ██║╚════██║ + // ██║ ╚═╝ ██║██║ ██║ ██║ ██║██║ ╚██████╔╝███████║ + // ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═════╝ ╚══════╝ + // + logic [ADDR_WIDTH_VIRT-1:0] AddrFifoDin_D; + logic AddrFifoWen_S; + logic AddrFifoRen_S; + logic [ADDR_WIDTH_VIRT-1:0] AddrFifoDout_D; + logic AddrFifoFull_S; + logic AddrFifoEmpty_S; + logic AddrFifoEmpty_SB; + logic AddrFifoFull_SB; + + logic [MISS_META_WIDTH-1:0] MetaFifoDin_D; + logic MetaFifoWen_S; + logic MetaFifoRen_S; + logic [MISS_META_WIDTH-1:0] MetaFifoDout_D; + logic MetaFifoFull_S; + logic MetaFifoEmpty_S; + logic MetaFifoEmpty_SB; + logic MetaFifoFull_SB; + + logic FifosDisabled_S; + logic ConfRegWen_S; + logic [1:0] ConfReg_DN; + logic [1:0] ConfReg_DP; + + logic [AXI_DATA_WIDTH-1:0] wdata_reg_vec; + + assign FifosDisabled_S = ConfReg_DP[0]; + assign L1AllowMultiHit_SO = ConfReg_DP[1]; + + assign AddrFifoEmpty_S = ~AddrFifoEmpty_SB; + assign MetaFifoEmpty_S = ~MetaFifoEmpty_SB; + + assign AddrFifoFull_S = ~AddrFifoFull_SB; + assign MetaFifoFull_S = ~MetaFifoFull_SB; + + assign MhFifoFull_SO = (AddrFifoWen_S & AddrFifoFull_S) | (MetaFifoWen_S & MetaFifoFull_S); + + generate + for ( j=0; j +# * Conrad Burchert +# * Maheshwara Sharma +# * Andreas Kurth +# * Johannes Weinbuch +# * Pirmin Vogel +# */ +# +# //`include "pulp_soc_defines.sv" +# +# ////import CfMath::log2; +# +# module axi_rab_top +# +# // Parameters {{{ +# #( +# parameter N_PORTS = 2, +# parameter N_L2_SETS = 32, +# parameter N_L2_SET_ENTRIES = 32, +# parameter AXI_DATA_WIDTH = 64, +# parameter AXI_S_ADDR_WIDTH = 32, +# parameter AXI_M_ADDR_WIDTH = 40, +# parameter AXI_LITE_DATA_WIDTH = 64, +# parameter AXI_LITE_ADDR_WIDTH = 32, +# parameter AXI_ID_WIDTH = 10, +# parameter AXI_USER_WIDTH = 6, +# parameter MH_FIFO_DEPTH = 16 +# ) +# // }}} +# +# // Ports {{{ +# ( +# +# input logic Clk_CI, // This clock may be gated. +# input logic NonGatedClk_CI, +# input logic Rst_RBI, +# +# // For every slave port there are two master ports. The master +# // port to use can be set using the master_select flag of the protection +# // bits of a slice +# +# // AXI4 Slave {{{ +# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_awid, +# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] s_axi4_awaddr, +# input logic [N_PORTS-1:0] s_axi4_awvalid, +# output logic [N_PORTS-1:0] s_axi4_awready, +# input logic [N_PORTS-1:0] [7:0] s_axi4_awlen, +# input logic [N_PORTS-1:0] [2:0] s_axi4_awsize, +# input logic [N_PORTS-1:0] [1:0] s_axi4_awburst, +# input logic [N_PORTS-1:0] s_axi4_awlock, +# input logic [N_PORTS-1:0] [2:0] s_axi4_awprot, +# input logic [N_PORTS-1:0] [3:0] s_axi4_awcache, +# input logic [N_PORTS-1:0] [3:0] s_axi4_awregion, +# input logic [N_PORTS-1:0] [3:0] s_axi4_awqos, +# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_awuser, +# +# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] s_axi4_wdata, +# input logic [N_PORTS-1:0] s_axi4_wvalid, +# output logic [N_PORTS-1:0] s_axi4_wready, +# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb, +# input logic [N_PORTS-1:0] s_axi4_wlast, +# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_wuser, +# +# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_bid, +# output logic [N_PORTS-1:0] [1:0] s_axi4_bresp, +# output logic [N_PORTS-1:0] s_axi4_bvalid, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_buser, +# input logic [N_PORTS-1:0] s_axi4_bready, +# +# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_arid, +# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] s_axi4_araddr, +# input logic [N_PORTS-1:0] s_axi4_arvalid, +# output logic [N_PORTS-1:0] s_axi4_arready, +# input logic [N_PORTS-1:0] [7:0] s_axi4_arlen, +# input logic [N_PORTS-1:0] [2:0] s_axi4_arsize, +# input logic [N_PORTS-1:0] [1:0] s_axi4_arburst, +# input logic [N_PORTS-1:0] s_axi4_arlock, +# input logic [N_PORTS-1:0] [2:0] s_axi4_arprot, +# input logic [N_PORTS-1:0] [3:0] s_axi4_arcache, +# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_aruser, +# +# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_rid, +# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] s_axi4_rdata, +# output logic [N_PORTS-1:0] [1:0] s_axi4_rresp, +# output logic [N_PORTS-1:0] s_axi4_rvalid, +# input logic [N_PORTS-1:0] s_axi4_rready, +# output logic [N_PORTS-1:0] s_axi4_rlast, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_ruser, +# // }}} +# +# // AXI4 Master 0 {{{ +# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_awid, +# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m0_axi4_awaddr, +# output logic [N_PORTS-1:0] m0_axi4_awvalid, +# input logic [N_PORTS-1:0] m0_axi4_awready, +# output logic [N_PORTS-1:0] [7:0] m0_axi4_awlen, +# output logic [N_PORTS-1:0] [2:0] m0_axi4_awsize, +# output logic [N_PORTS-1:0] [1:0] m0_axi4_awburst, +# output logic [N_PORTS-1:0] m0_axi4_awlock, +# output logic [N_PORTS-1:0] [2:0] m0_axi4_awprot, +# output logic [N_PORTS-1:0] [3:0] m0_axi4_awcache, +# output logic [N_PORTS-1:0] [3:0] m0_axi4_awregion, +# output logic [N_PORTS-1:0] [3:0] m0_axi4_awqos, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_awuser, +# +# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m0_axi4_wdata, +# output logic [N_PORTS-1:0] m0_axi4_wvalid, +# input logic [N_PORTS-1:0] m0_axi4_wready, +# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] m0_axi4_wstrb, +# output logic [N_PORTS-1:0] m0_axi4_wlast, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_wuser, +# +# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_bid, +# input logic [N_PORTS-1:0] [1:0] m0_axi4_bresp, +# input logic [N_PORTS-1:0] m0_axi4_bvalid, +# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_buser, +# output logic [N_PORTS-1:0] m0_axi4_bready, +# +# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_arid, +# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m0_axi4_araddr, +# output logic [N_PORTS-1:0] m0_axi4_arvalid, +# input logic [N_PORTS-1:0] m0_axi4_arready, +# output logic [N_PORTS-1:0] [7:0] m0_axi4_arlen, +# output logic [N_PORTS-1:0] [2:0] m0_axi4_arsize, +# output logic [N_PORTS-1:0] [1:0] m0_axi4_arburst, +# output logic [N_PORTS-1:0] m0_axi4_arlock, +# output logic [N_PORTS-1:0] [2:0] m0_axi4_arprot, +# output logic [N_PORTS-1:0] [3:0] m0_axi4_arcache, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_aruser, +# +# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_rid, +# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m0_axi4_rdata, +# input logic [N_PORTS-1:0] [1:0] m0_axi4_rresp, +# input logic [N_PORTS-1:0] m0_axi4_rvalid, +# output logic [N_PORTS-1:0] m0_axi4_rready, +# input logic [N_PORTS-1:0] m0_axi4_rlast, +# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_ruser, +# // }}} +# +# // AXI4 Master 1 {{{ +# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_awid, +# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m1_axi4_awaddr, +# output logic [N_PORTS-1:0] m1_axi4_awvalid, +# input logic [N_PORTS-1:0] m1_axi4_awready, +# output logic [N_PORTS-1:0] [7:0] m1_axi4_awlen, +# output logic [N_PORTS-1:0] [2:0] m1_axi4_awsize, +# output logic [N_PORTS-1:0] [1:0] m1_axi4_awburst, +# output logic [N_PORTS-1:0] m1_axi4_awlock, +# output logic [N_PORTS-1:0] [2:0] m1_axi4_awprot, +# output logic [N_PORTS-1:0] [3:0] m1_axi4_awcache, +# output logic [N_PORTS-1:0] [3:0] m1_axi4_awregion, +# output logic [N_PORTS-1:0] [3:0] m1_axi4_awqos, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_awuser, +# +# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m1_axi4_wdata, +# output logic [N_PORTS-1:0] m1_axi4_wvalid, +# input logic [N_PORTS-1:0] m1_axi4_wready, +# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] m1_axi4_wstrb, +# output logic [N_PORTS-1:0] m1_axi4_wlast, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_wuser, +# +# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_bid, +# input logic [N_PORTS-1:0] [1:0] m1_axi4_bresp, +# input logic [N_PORTS-1:0] m1_axi4_bvalid, +# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_buser, +# output logic [N_PORTS-1:0] m1_axi4_bready, +# +# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_arid, +# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m1_axi4_araddr, +# output logic [N_PORTS-1:0] m1_axi4_arvalid, +# input logic [N_PORTS-1:0] m1_axi4_arready, +# output logic [N_PORTS-1:0] [7:0] m1_axi4_arlen, +# output logic [N_PORTS-1:0] [2:0] m1_axi4_arsize, +# output logic [N_PORTS-1:0] [1:0] m1_axi4_arburst, +# output logic [N_PORTS-1:0] m1_axi4_arlock, +# output logic [N_PORTS-1:0] [2:0] m1_axi4_arprot, +# output logic [N_PORTS-1:0] [3:0] m1_axi4_arcache, +# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_aruser, +# +# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_rid, +# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m1_axi4_rdata, +# input logic [N_PORTS-1:0] [1:0] m1_axi4_rresp, +# input logic [N_PORTS-1:0] m1_axi4_rvalid, +# output logic [N_PORTS-1:0] m1_axi4_rready, +# input logic [N_PORTS-1:0] m1_axi4_rlast, +# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_ruser, +# // }}} +# +# // AXI 4 Lite Slave (Configuration Interface) {{{ +# // AXI4-Lite port to setup the rab slices +# // use this to program the configuration registers +# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi4lite_awaddr, +# input logic s_axi4lite_awvalid, +# output logic s_axi4lite_awready, +# +# input logic [AXI_LITE_DATA_WIDTH-1:0] s_axi4lite_wdata, +# input logic s_axi4lite_wvalid, +# output logic s_axi4lite_wready, +# input logic [AXI_LITE_DATA_WIDTH/8-1:0] s_axi4lite_wstrb, +# +# output logic [1:0] s_axi4lite_bresp, +# output logic s_axi4lite_bvalid, +# input logic s_axi4lite_bready, +# +# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi4lite_araddr, +# input logic s_axi4lite_arvalid, +# output logic s_axi4lite_arready, +# +# output logic [AXI_LITE_DATA_WIDTH-1:0] s_axi4lite_rdata, +# output logic [1:0] s_axi4lite_rresp, +# output logic s_axi4lite_rvalid, +# input logic s_axi4lite_rready, +# // }}} +# +# // BRAMs {{{ +# //`ifdef RAB_AX_LOG_EN +# // BramPort.Slave ArBram_PS, +# // BramPort.Slave AwBram_PS, +# //`endif +# // }}} +# +# // Logger Control {{{ +# //`ifdef RAB_AX_LOG_EN +# // input logic LogEn_SI, +# // input logic ArLogClr_SI, +# // input logic AwLogClr_SI, +# // output logic ArLogRdy_SO, +# // output logic AwLogRdy_SO, +# //`endif +# // }}} +# +# // Interrupt Outputs {{{ +# // Interrupt lines to handle misses, collisions of slices/multiple hits, +# // protection faults and overflow of the miss handling fifo +# //`ifdef RAB_AX_LOG_EN +# // output logic int_ar_log_full, +# // output logic int_aw_log_full, +# //`endif +# output logic [N_PORTS-1:0] int_miss, +# output logic [N_PORTS-1:0] int_multi, +# output logic [N_PORTS-1:0] int_prot, +# output logic int_mhf_full +# // }}} +# +# ); +# +"""#docstring_begin + + // }}} + + // Signals {{{ + // ███████╗██╗ ██████╗ ███╗ ██╗ █████╗ ██╗ ███████╗ + // ██╔════╝██║██╔════╝ ████╗ ██║██╔══██╗██║ ██╔════╝ + // ███████╗██║██║ ███╗██╔██╗ ██║███████║██║ ███████╗ + // ╚════██║██║██║ ██║██║╚██╗██║██╔══██║██║ ╚════██║ + // ███████║██║╚██████╔╝██║ ╚████║██║ ██║███████╗███████║ + // ╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚══════╝ + // + + // Internal AXI4 lines, these connect buffers on the slave side to the rab core and + // multiplexers which switch between the two master outputs + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_awid; + logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_awaddr; + logic [N_PORTS-1:0] int_awvalid; + logic [N_PORTS-1:0] int_awready; + logic [N_PORTS-1:0] [7:0] int_awlen; + logic [N_PORTS-1:0] [2:0] int_awsize; + logic [N_PORTS-1:0] [1:0] int_awburst; + logic [N_PORTS-1:0] int_awlock; + logic [N_PORTS-1:0] [2:0] int_awprot; + logic [N_PORTS-1:0] [3:0] int_awcache; + logic [N_PORTS-1:0] [3:0] int_awregion; + logic [N_PORTS-1:0] [3:0] int_awqos; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_awuser; + + logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_wdata; + logic [N_PORTS-1:0] int_wvalid; + logic [N_PORTS-1:0] int_wready; + logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] int_wstrb; + logic [N_PORTS-1:0] int_wlast; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_wuser; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_bid; + logic [N_PORTS-1:0] [1:0] int_bresp; + logic [N_PORTS-1:0] int_bvalid; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_buser; + logic [N_PORTS-1:0] int_bready; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_arid; + logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_araddr; + logic [N_PORTS-1:0] int_arvalid; + logic [N_PORTS-1:0] int_arready; + logic [N_PORTS-1:0] [7:0] int_arlen; + logic [N_PORTS-1:0] [2:0] int_arsize; + logic [N_PORTS-1:0] [1:0] int_arburst; + logic [N_PORTS-1:0] int_arlock; + logic [N_PORTS-1:0] [2:0] int_arprot; + logic [N_PORTS-1:0] [3:0] int_arcache; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_aruser; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_rid; + logic [N_PORTS-1:0] [1:0] int_rresp; + logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_rdata; + logic [N_PORTS-1:0] int_rlast; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_ruser; + logic [N_PORTS-1:0] int_rvalid; + logic [N_PORTS-1:0] int_rready; + + // rab_core outputs + logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] int_wtrans_addr; + logic [N_PORTS-1:0] int_wtrans_accept; + logic [N_PORTS-1:0] int_wtrans_drop; + logic [N_PORTS-1:0] int_wtrans_miss; + logic [N_PORTS-1:0] int_wtrans_sent; + logic [N_PORTS-1:0] int_wtrans_cache_coherent; + logic [N_PORTS-1:0] int_wmaster_select; + + logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] int_rtrans_addr; + logic [N_PORTS-1:0] int_rtrans_accept; + logic [N_PORTS-1:0] int_rtrans_drop; + logic [N_PORTS-1:0] int_rtrans_miss; + logic [N_PORTS-1:0] int_rtrans_sent; + logic [N_PORTS-1:0] int_rtrans_cache_coherent; + logic [N_PORTS-1:0] int_rmaster_select; + + logic [N_PORTS-1:0] w_master_select; + + // Internal master0 AXI4 lines. These connect the first master port to the + // multiplexers + // For channels read address, write address and write data the other lines + // are ignored if valid is not set, therefore we only need to multiplex those + logic [N_PORTS-1:0] int_m0_awvalid; + logic [N_PORTS-1:0] int_m0_awready; + + logic [N_PORTS-1:0] int_m0_wvalid; + logic [N_PORTS-1:0] int_m0_wready; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m0_bid; + logic [N_PORTS-1:0] [1:0] int_m0_bresp; + logic [N_PORTS-1:0] int_m0_bvalid; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m0_buser; + logic [N_PORTS-1:0] int_m0_bready; + + logic [N_PORTS-1:0] int_m0_arvalid; + logic [N_PORTS-1:0] int_m0_arready; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m0_rid; + logic [N_PORTS-1:0] [1:0] int_m0_rresp; + logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_m0_rdata; + logic [N_PORTS-1:0] int_m0_rlast; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m0_ruser; + logic [N_PORTS-1:0] int_m0_rready; + logic [N_PORTS-1:0] int_m0_rvalid; + + logic [N_PORTS-1:0] l1_m0_ar_accept; + logic [N_PORTS-1:0] l1_m0_ar_drop; + logic [N_PORTS-1:0] l1_m0_ar_save; + logic [N_PORTS-1:0] l1_m0_ar_done; + logic [N_PORTS-1:0] l2_m0_ar_accept; + logic [N_PORTS-1:0] l2_m0_ar_drop; + logic [N_PORTS-1:0] l2_m0_ar_done; + logic [N_PORTS-1:0] l2_m0_ar_sending; + + logic [N_PORTS-1:0] l1_m0_aw_accept; + logic [N_PORTS-1:0] l1_m0_aw_drop; + logic [N_PORTS-1:0] l1_m0_aw_save; + logic [N_PORTS-1:0] l1_m0_aw_done; + logic [N_PORTS-1:0] l2_m0_aw_accept; + logic [N_PORTS-1:0] l2_m0_aw_drop; + logic [N_PORTS-1:0] l2_m0_aw_done; + logic [N_PORTS-1:0] l2_m0_aw_sending; + + // Internal master1 AXI4 lines. These connect the second master port to the + // multiplexers + // For channels read address, write address and write data the other lines + // are ignored if valid is not set, therefore we only need to multiplex those + logic [N_PORTS-1:0] int_m1_awvalid; + logic [N_PORTS-1:0] int_m1_awready; + + logic [N_PORTS-1:0] int_m1_wvalid; + logic [N_PORTS-1:0] int_m1_wready; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m1_bid; + logic [N_PORTS-1:0] [1:0] int_m1_bresp; + logic [N_PORTS-1:0] int_m1_bvalid; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m1_buser; + logic [N_PORTS-1:0] int_m1_bready; + + logic [N_PORTS-1:0] int_m1_arvalid; + logic [N_PORTS-1:0] int_m1_arready; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m1_rid; + logic [N_PORTS-1:0] [1:0] int_m1_rresp; + logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_m1_rdata; + logic [N_PORTS-1:0] int_m1_rlast; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m1_ruser; + logic [N_PORTS-1:0] int_m1_rvalid; + logic [N_PORTS-1:0] int_m1_rready; + + logic [N_PORTS-1:0] l1_m1_ar_accept; + logic [N_PORTS-1:0] l1_m1_ar_drop; + logic [N_PORTS-1:0] l1_m1_ar_save; + logic [N_PORTS-1:0] l1_m1_ar_done; + logic [N_PORTS-1:0] l2_m1_ar_accept; + logic [N_PORTS-1:0] l2_m1_ar_drop; + logic [N_PORTS-1:0] l2_m1_ar_done; + + logic [N_PORTS-1:0] l1_m1_aw_accept; + logic [N_PORTS-1:0] l1_m1_aw_drop; + logic [N_PORTS-1:0] l1_m1_aw_save; + logic [N_PORTS-1:0] l1_m1_aw_done; + logic [N_PORTS-1:0] l2_m1_aw_accept; + logic [N_PORTS-1:0] l2_m1_aw_drop; + logic [N_PORTS-1:0] l2_m1_aw_done; + + // L1 outputs + logic [N_PORTS-1:0] rab_miss; // L1 RAB miss + logic [N_PORTS-1:0] rab_prot; + logic [N_PORTS-1:0] rab_multi; + logic [N_PORTS-1:0] rab_prefetch; + + // + // Signals used to support L2 TLB + // + // L2 RAM configuration signals + logic [N_PORTS-1:0] [AXI_LITE_DATA_WIDTH-1:0] L2CfgWData_D; + logic [N_PORTS-1:0] [AXI_LITE_ADDR_WIDTH-1:0] L2CfgWAddr_D; + logic [N_PORTS-1:0] L2CfgWE_S; + + // L1 output and drop Buffer + logic [N_PORTS-1:0] L1OutRwType_D, L1DropRwType_DP; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L1OutUser_D, L1DropUser_DP; + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L1OutId_D, L1DropId_DP; + logic [N_PORTS-1:0] [7:0] L1OutLen_D, L1DropLen_DP; + logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L1OutAddr_D, L1DropAddr_DP; + logic [N_PORTS-1:0] L1OutProt_D, L1DropProt_DP; + logic [N_PORTS-1:0] L1OutMulti_D, L1DropMulti_DP; + logic [N_PORTS-1:0] L1DropEn_S; + logic [N_PORTS-1:0] L1DropPrefetch_S; + + logic [N_PORTS-1:0] L1DropValid_SN, L1DropValid_SP; + + // L2 input Buffer + logic [N_PORTS-1:0] L2InRwType_DP; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L2InUser_DP; + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L2InId_DP; + logic [N_PORTS-1:0] [7:0] L2InLen_DP; + logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L2InAddr_DP; + logic [N_PORTS-1:0] L2InEn_S; + + // L2 output Buffer + logic [N_PORTS-1:0] L2OutRwType_DP; + logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L2OutUser_DP; + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L2OutId_DP; + logic [N_PORTS-1:0] [7:0] L2OutLen_DP; + logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L2OutInAddr_DP; + + logic [N_PORTS-1:0] L2OutHit_SN, L2OutHit_SP; + logic [N_PORTS-1:0] L2OutMiss_SN, L2OutMiss_SP; + logic [N_PORTS-1:0] L2OutProt_SN, L2OutProt_SP; + logic [N_PORTS-1:0] L2OutMulti_SN, L2OutMulti_SP; + logic [N_PORTS-1:0] L2OutCC_SN, L2OutCC_SP; + logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] L2OutAddr_DN, L2OutAddr_DP; + + logic [N_PORTS-1:0] L2OutValid_SN, L2OutValid_SP; + logic [N_PORTS-1:0] L2OutPrefetch_S; + logic [N_PORTS-1:0] L2OutReady_S; + logic [N_PORTS-1:0] L2OutEn_S; + + // L2 outputs + logic [N_PORTS-1:0] L2Busy_S; + logic [N_PORTS-1:0] L2OutValid_S; + + logic [N_PORTS-1:0] L2Miss_S; + + // Signals for interfacing the AXI modules + logic [N_PORTS-1:0] l1_ar_accept; + logic [N_PORTS-1:0] l1_aw_accept; + logic [N_PORTS-1:0] l1_w_accept; + logic [N_PORTS-1:0] l1_xw_accept; + + logic [N_PORTS-1:0] l1_ar_drop; + logic [N_PORTS-1:0] l1_aw_drop; + logic [N_PORTS-1:0] l1_w_drop; + logic [N_PORTS-1:0] l1_xw_drop; + + logic [N_PORTS-1:0] l1_ar_save; + logic [N_PORTS-1:0] l1_aw_save; + logic [N_PORTS-1:0] l1_w_save; + logic [N_PORTS-1:0] l1_xw_save; + + logic [N_PORTS-1:0] l1_ar_done; + logic [N_PORTS-1:0] l1_r_done; + logic [N_PORTS-1:0] l1_r_drop; + logic [N_PORTS-1:0] lx_r_drop; + logic [N_PORTS-1:0] lx_r_done; + + logic [N_PORTS-1:0] l1_aw_done; + logic [N_PORTS-1:0] l1_w_done; + logic [N_PORTS-1:0] l1_xw_done; + logic [N_PORTS-1:0] l1_aw_done_SP; + logic [N_PORTS-1:0] l1_w_done_SP; + + logic [N_PORTS-1:0] l2_ar_accept; + logic [N_PORTS-1:0] l2_aw_accept; + logic [N_PORTS-1:0] l2_w_accept; + logic [N_PORTS-1:0] l2_xw_accept; + + logic [N_PORTS-1:0] l2_ar_drop; + logic [N_PORTS-1:0] l2_r_drop; + logic [N_PORTS-1:0] l2_xr_drop; + logic [N_PORTS-1:0] l2_aw_drop; + logic [N_PORTS-1:0] l2_w_drop; + logic [N_PORTS-1:0] l2_xw_drop; + + logic [N_PORTS-1:0] l2_aw_done; + logic [N_PORTS-1:0] l2_w_done; + logic [N_PORTS-1:0] l2_xw_done; + logic [N_PORTS-1:0] l2_aw_done_SP; + logic [N_PORTS-1:0] l2_w_done_SP; + + logic [N_PORTS-1:0] l2_ar_done; + logic [N_PORTS-1:0] l2_r_done; + logic [N_PORTS-1:0] l2_xr_done; + logic [N_PORTS-1:0] l2_ar_done_SP; + logic [N_PORTS-1:0] l2_r_done_SP; + + logic [N_PORTS-1:0] l1_mx_aw_done; + logic [N_PORTS-1:0] l1_mx_ar_done; + logic [N_PORTS-1:0] l1_m0_aw_done_SP; + logic [N_PORTS-1:0] l1_m0_ar_done_SP; + logic [N_PORTS-1:0] l1_m1_aw_done_SP; + logic [N_PORTS-1:0] l1_m1_ar_done_SP; + + logic [N_PORTS-1:0] l2_mx_aw_done; + logic [N_PORTS-1:0] l2_mx_ar_done; + logic [N_PORTS-1:0] l2_m0_aw_done_SP; + logic [N_PORTS-1:0] l2_m0_ar_done_SP; + logic [N_PORTS-1:0] l2_m1_aw_done_SP; + logic [N_PORTS-1:0] l2_m1_ar_done_SP; + + logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] l1_id_drop, lx_id_drop, b_id_drop; + logic [N_PORTS-1:0] [7:0] l1_len_drop, lx_len_drop; + logic [N_PORTS-1:0] l1_prefetch_drop, lx_prefetch_drop, b_prefetch_drop; + logic [N_PORTS-1:0] l1_hit_drop, lx_hit_drop, b_hit_drop; + + logic [N_PORTS-1:0] b_drop; + logic [N_PORTS-1:0] b_done; + + logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] l2_aw_addr; + logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] l2_ar_addr; + + logic [N_PORTS-1:0] l2_cache_coherent; + logic [N_PORTS-1:0] l2_master_select; + + logic [N_PORTS-1:0] aw_in_stall; + logic [N_PORTS-1:0] aw_out_stall; + + genvar i; + + // RRESP FSM + typedef enum logic {IDLE, BUSY} r_resp_mux_ctrl_state_t; + r_resp_mux_ctrl_state_t [N_PORTS-1:0] RRespMuxCtrl_SN, RRespMuxCtrl_SP; + logic [N_PORTS-1:0] RRespSel_SN, RRespSel_SP; + logic [N_PORTS-1:0] RRespBurst_S; + logic [N_PORTS-1:0] RRespSelIm_S; + + // }}} + + // Local parameters {{{ + + // Enable L2 for select ports + localparam integer ENABLE_L2TLB[N_PORTS-1:0] = `EN_L2TLB_ARRAY; + + // L2TLB parameters + localparam integer HUM_BUFFER_DEPTH = (N_L2_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS)+13; + + // }}} + + // Derive `master_select` from cache coherency flag. {{{ + `ifdef EN_ACP + assign int_wmaster_select = int_wtrans_cache_coherent; + assign int_rmaster_select = int_rtrans_cache_coherent; + assign l2_master_select = l2_cache_coherent; + `else + assign int_wmaster_select = '0; + assign int_rmaster_select = '0; + assign l2_master_select = '0; + `endif + // }}} + + // Buf and Send {{{ + // ██████╗ ██╗ ██╗███████╗ ██╗ ███████╗███████╗███╗ ██╗██████╗ + // ██╔══██╗██║ ██║██╔════╝ ██║ ██╔════╝██╔════╝████╗ ██║██╔══██╗ + // ██████╔╝██║ ██║█████╗ ████████╗ ███████╗█████╗ ██╔██╗ ██║██║ ██║ + // ██╔══██╗██║ ██║██╔══╝ ██╔═██╔═╝ ╚════██║██╔══╝ ██║╚██╗██║██║ ██║ + // ██████╔╝╚██████╔╝██║ ██████║ ███████║███████╗██║ ╚████║██████╔╝ + // ╚═════╝ ╚═════╝ ╚═╝ ╚═════╝ ╚══════╝╚══════╝╚═╝ ╚═══╝╚═════╝ + // + logic[N_PORTS-1:0] m0_write_is_burst, m0_read_is_burst; + logic[N_PORTS-1:0] m1_write_is_burst, m1_read_is_burst; + + generate for (i = 0; i < N_PORTS; i++) begin : BUF_AND_SEND + + // Write Address channel (aw) {{{ + /* + * write address channel (aw) + * + * ██╗ ██╗██████╗ ██╗████████╗███████╗ █████╗ ██████╗ ██████╗ ██████╗ + * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔══██╗██╔══██╗██╔══██╗ + * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ███████║██║ ██║██║ ██║██████╔╝ + * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██╔══██║██║ ██║██║ ██║██╔══██╗ + * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██║ ██║██████╔╝██████╔╝██║ ██║ + * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝ + * + */ + + axi4_aw_buffer + #( + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_aw_buffer + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_awid ( s_axi4_awid[i] ), + .s_axi4_awaddr ( s_axi4_awaddr[i] ), + .s_axi4_awvalid ( s_axi4_awvalid[i] ), + .s_axi4_awready ( s_axi4_awready[i] ), + .s_axi4_awlen ( s_axi4_awlen[i] ), + .s_axi4_awsize ( s_axi4_awsize[i] ), + .s_axi4_awburst ( s_axi4_awburst[i] ), + .s_axi4_awlock ( s_axi4_awlock[i] ), + .s_axi4_awprot ( s_axi4_awprot[i] ), + .s_axi4_awcache ( s_axi4_awcache[i] ), + .s_axi4_awregion ( s_axi4_awregion[i] ), + .s_axi4_awqos ( s_axi4_awqos[i] ), + .s_axi4_awuser ( s_axi4_awuser[i] ), + .m_axi4_awid ( int_awid[i] ), + .m_axi4_awaddr ( int_awaddr[i] ), + .m_axi4_awvalid ( int_awvalid[i] ), + .m_axi4_awready ( int_awready[i] ), + .m_axi4_awlen ( int_awlen[i] ), + .m_axi4_awsize ( int_awsize[i] ), + .m_axi4_awburst ( int_awburst[i] ), + .m_axi4_awlock ( int_awlock[i] ), + .m_axi4_awprot ( int_awprot[i] ), + .m_axi4_awcache ( int_awcache[i] ), + .m_axi4_awregion ( int_awregion[i] ), + .m_axi4_awqos ( int_awqos[i] ), + .m_axi4_awuser ( int_awuser[i] ) + ); + + axi4_aw_sender + #( + .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .ENABLE_L2TLB ( ENABLE_L2TLB[i] ) + ) + u_aw_sender_m0 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .l1_done_o ( l1_m0_aw_done[i] ), + .l1_accept_i ( l1_m0_aw_accept[i] ), + .l1_drop_i ( l1_m0_aw_drop[i] ), + .l1_save_i ( l1_m0_aw_save[i] ), + .l2_done_o ( l2_m0_aw_done[i] ), + .l2_accept_i ( l2_m0_aw_accept[i] ), + .l2_drop_i ( l2_m0_aw_drop[i] ), + .l2_sending_o ( l2_m0_aw_sending[i] ), + .l1_awaddr_i ( int_wtrans_addr[i] ), + .l2_awaddr_i ( l2_aw_addr[i] ), + .s_axi4_awid ( int_awid[i] ), + .s_axi4_awvalid ( int_m0_awvalid[i] ), + .s_axi4_awready ( int_m0_awready[i] ), + .s_axi4_awlen ( int_awlen[i] ), + .s_axi4_awsize ( int_awsize[i] ), + .s_axi4_awburst ( int_awburst[i] ), + .s_axi4_awlock ( int_awlock[i] ), + .s_axi4_awprot ( int_awprot[i] ), + .s_axi4_awcache ( int_awcache[i] ), + .s_axi4_awregion ( int_awregion[i] ), + .s_axi4_awqos ( int_awqos[i] ), + .s_axi4_awuser ( int_awuser[i] ), + .m_axi4_awid ( m0_axi4_awid[i] ), + .m_axi4_awaddr ( m0_axi4_awaddr[i] ), + .m_axi4_awvalid ( m0_axi4_awvalid[i] ), + .m_axi4_awready ( m0_axi4_awready[i] ), + .m_axi4_awlen ( m0_axi4_awlen[i] ), + .m_axi4_awsize ( m0_axi4_awsize[i] ), + .m_axi4_awburst ( m0_axi4_awburst[i] ), + .m_axi4_awlock ( m0_axi4_awlock[i] ), + .m_axi4_awprot ( m0_axi4_awprot[i] ), + .m_axi4_awcache ( ), + .m_axi4_awregion ( m0_axi4_awregion[i] ), + .m_axi4_awqos ( m0_axi4_awqos[i] ), + .m_axi4_awuser ( m0_axi4_awuser[i] ) + ); + + // The AXCACHE signals are set according to burstiness and cache coherence or statically + // when not connected to ACP on Zynq (implemented below). + assign m0_write_is_burst[i] = (m0_axi4_awlen[i] != {8{1'b0}}) && (m0_axi4_awburst[i] != 2'b00); + `ifndef EN_ACP + always_comb begin + if ( (l2_m0_aw_sending[i] & l2_cache_coherent[i]) | int_wtrans_cache_coherent[i]) begin + if (m0_write_is_burst[i]) begin + m0_axi4_awcache[i] = 4'b0111; + end else begin + m0_axi4_awcache[i] = 4'b1111; + end + end else begin + m0_axi4_awcache[i] = 4'b0011; + end + end + `else + assign m0_axi4_awcache[i] = 4'b0011; + `endif + + axi4_aw_sender + #( + .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .ENABLE_L2TLB ( ENABLE_L2TLB[i] ) + ) + u_aw_sender_m1 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .l1_accept_i ( l1_m1_aw_accept[i] ), + .l1_drop_i ( l1_m1_aw_drop[i] ), + .l1_save_i ( l1_m1_aw_save[i] ), + .l1_done_o ( l1_m1_aw_done[i] ), + .l2_accept_i ( l2_m1_aw_accept[i] ), + .l2_drop_i ( l2_m1_aw_drop[i] ), + .l2_done_o ( l2_m1_aw_done[i] ), + .l2_sending_o ( ), // just helps to set axcache + .l1_awaddr_i ( int_wtrans_addr[i] ), + .l2_awaddr_i ( l2_aw_addr[i] ), + .s_axi4_awid ( int_awid[i] ), + .s_axi4_awvalid ( int_m1_awvalid[i] ), + .s_axi4_awready ( int_m1_awready[i] ), + .s_axi4_awlen ( int_awlen[i] ), + .s_axi4_awsize ( int_awsize[i] ), + .s_axi4_awburst ( int_awburst[i] ), + .s_axi4_awlock ( int_awlock[i] ), + .s_axi4_awprot ( int_awprot[i] ), + .s_axi4_awcache ( int_awcache[i] ), + .s_axi4_awregion ( int_awregion[i] ), + .s_axi4_awqos ( int_awqos[i] ), + .s_axi4_awuser ( int_awuser[i] ), + .m_axi4_awid ( m1_axi4_awid[i] ), + .m_axi4_awaddr ( m1_axi4_awaddr[i] ), + .m_axi4_awvalid ( m1_axi4_awvalid[i] ), + .m_axi4_awready ( m1_axi4_awready[i] ), + .m_axi4_awlen ( m1_axi4_awlen[i] ), + .m_axi4_awsize ( m1_axi4_awsize[i] ), + .m_axi4_awburst ( m1_axi4_awburst[i] ), + .m_axi4_awlock ( m1_axi4_awlock[i] ), + .m_axi4_awprot ( m1_axi4_awprot[i] ), + .m_axi4_awcache ( ), + .m_axi4_awregion ( m1_axi4_awregion[i] ), + .m_axi4_awqos ( m1_axi4_awqos[i] ), + .m_axi4_awuser ( m1_axi4_awuser[i] ) + ); + + // The AXCACHE signals are set according to burstiness and cache coherence or statically + // when not connected to ACP on Zynq (implemented below). + assign m1_write_is_burst[i] = (m1_axi4_awlen[i] != {8{1'b0}}) && (m1_axi4_awburst[i] != 2'b00); + `ifdef EN_ACP + always_comb begin + if (m1_write_is_burst[i]) begin + m1_axi4_awcache[i] = 4'b1011; + end else begin + m1_axi4_awcache[i] = 4'b1111; + end + end + `else + assign m1_axi4_awcache[i] = 4'b0011; + `endif + + // }}} + + // Write Data channel (w) {{{ + /* + * write data channel (w) + * + * ██╗ ██╗██████╗ ██╗████████╗███████╗ ██████╗ █████╗ ████████╗ █████╗ + * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔══██╗╚══██╔══╝██╔══██╗ + * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ██║ ██║███████║ ██║ ███████║ + * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██║ ██║██╔══██║ ██║ ██╔══██║ + * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██████╔╝██║ ██║ ██║ ██║ ██║ + * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ + * + */ + axi4_w_buffer + #( + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .ENABLE_L2TLB ( ENABLE_L2TLB[i] ), + .HUM_BUFFER_DEPTH ( HUM_BUFFER_DEPTH ) + ) + u_w_buffer + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + + // L1 interface + .l1_done_o ( l1_w_done[i] ), + .l1_accept_i ( l1_w_accept[i] ), + .l1_save_i ( l1_w_save[i] ), + .l1_drop_i ( l1_w_drop[i] ), + .l1_master_i ( int_wmaster_select[i] ), + .l1_id_i ( l1_id_drop[i] ), + .l1_len_i ( l1_len_drop[i] ), + .l1_prefetch_i ( l1_prefetch_drop[i] ), + .l1_hit_i ( l1_hit_drop[i] ), + + // L2 interface + .l2_done_o ( l2_w_done[i] ), + .l2_accept_i ( l2_w_accept[i] ), + .l2_drop_i ( l2_w_drop[i] ), + .l2_master_i ( l2_master_select[i] ), + .l2_id_i ( lx_id_drop[i] ), + .l2_len_i ( lx_len_drop[i] ), + .l2_prefetch_i ( lx_prefetch_drop[i] ), + .l2_hit_i ( lx_hit_drop[i] ), + + // Top-level control outputs + .master_select_o ( w_master_select[i] ), + .input_stall_o ( aw_in_stall[i] ), // stall L1 AW input if request buffers full + .output_stall_o ( aw_out_stall[i] ), // stall L1 AW hit forwarding if bypass not possible + + // B sender interface + .b_drop_o ( b_drop[i] ), + .b_done_i ( b_done[i] ), + .id_o ( b_id_drop[i] ), + .prefetch_o ( b_prefetch_drop[i] ), + .hit_o ( b_hit_drop[i] ), + + // AXI W channel interfaces + .s_axi4_wdata ( s_axi4_wdata[i] ), + .s_axi4_wvalid ( s_axi4_wvalid[i] ), + .s_axi4_wready ( s_axi4_wready[i] ), + .s_axi4_wstrb ( s_axi4_wstrb[i] ), + .s_axi4_wlast ( s_axi4_wlast[i] ), + .s_axi4_wuser ( s_axi4_wuser[i] ), + .m_axi4_wdata ( int_wdata[i] ), + .m_axi4_wvalid ( int_wvalid[i] ), + .m_axi4_wready ( int_wready[i] ), + .m_axi4_wstrb ( int_wstrb[i] ), + .m_axi4_wlast ( int_wlast[i] ), + .m_axi4_wuser ( int_wuser[i] ) + ); + + axi4_w_sender + #( + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_w_sender_m0 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_wdata ( int_wdata[i] ), + .s_axi4_wvalid ( int_m0_wvalid[i] ), + .s_axi4_wready ( int_m0_wready[i] ), + .s_axi4_wstrb ( int_wstrb[i] ), + .s_axi4_wlast ( int_wlast[i] ), + .s_axi4_wuser ( int_wuser[i] ), + .m_axi4_wdata ( m0_axi4_wdata[i] ), + .m_axi4_wvalid ( m0_axi4_wvalid[i] ), + .m_axi4_wready ( m0_axi4_wready[i] ), + .m_axi4_wstrb ( m0_axi4_wstrb[i] ), + .m_axi4_wlast ( m0_axi4_wlast[i] ), + .m_axi4_wuser ( m0_axi4_wuser[i] ) + ); + + axi4_w_sender + #( + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + + ) + u_w_sender_m1 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_wdata ( int_wdata[i] ), + .s_axi4_wvalid ( int_m1_wvalid[i] ), + .s_axi4_wready ( int_m1_wready[i] ), + .s_axi4_wstrb ( int_wstrb[i] ), + .s_axi4_wlast ( int_wlast[i] ), + .s_axi4_wuser ( int_wuser[i] ), + .m_axi4_wdata ( m1_axi4_wdata[i] ), + .m_axi4_wvalid ( m1_axi4_wvalid[i] ), + .m_axi4_wready ( m1_axi4_wready[i] ), + .m_axi4_wstrb ( m1_axi4_wstrb[i] ), + .m_axi4_wlast ( m1_axi4_wlast[i] ), + .m_axi4_wuser ( m1_axi4_wuser[i] ) + ); + + /* + * Multiplexer to switch between the two output master ports on the write data (w) channel + */ + always_comb begin + /* Only one output can be selected at any time */ + if (w_master_select[i] == 1'b0) begin + int_m0_wvalid[i] = int_wvalid[i]; + int_m1_wvalid[i] = 1'b0; + int_wready[i] = int_m0_wready[i]; + end else begin + int_m0_wvalid[i] = 1'b0; + int_m1_wvalid[i] = int_wvalid[i]; + int_wready[i] = int_m1_wready[i]; + end + end + + // }}} + + // Write Response channel (b) {{{ + /* + * write response channel (b) + * + * ██╗ ██╗██████╗ ██╗████████╗███████╗ ██████╗ ███████╗███████╗██████╗ + * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔════╝██╔════╝██╔══██╗ + * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ██████╔╝█████╗ ███████╗██████╔╝ + * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██╔══██╗██╔══╝ ╚════██║██╔═══╝ + * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██║ ██║███████╗███████║██║ + * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ + * + */ + axi4_b_buffer + #( + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_b_buffer_m0 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_bid ( int_m0_bid[i] ), + .s_axi4_bresp ( int_m0_bresp[i] ), + .s_axi4_bvalid ( int_m0_bvalid[i] ), + .s_axi4_buser ( int_m0_buser[i] ), + .s_axi4_bready ( int_m0_bready[i] ), + .m_axi4_bid ( m0_axi4_bid[i] ), + .m_axi4_bresp ( m0_axi4_bresp[i] ), + .m_axi4_bvalid ( m0_axi4_bvalid[i] ), + .m_axi4_buser ( m0_axi4_buser[i] ), + .m_axi4_bready ( m0_axi4_bready[i] ) + ); + + axi4_b_buffer + #( + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_b_buffer_m1 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_bid ( int_m1_bid[i] ), + .s_axi4_bresp ( int_m1_bresp[i] ), + .s_axi4_bvalid ( int_m1_bvalid[i] ), + .s_axi4_buser ( int_m1_buser[i] ), + .s_axi4_bready ( int_m1_bready[i] ), + .m_axi4_bid ( m1_axi4_bid[i] ), + .m_axi4_bresp ( m1_axi4_bresp[i] ), + .m_axi4_bvalid ( m1_axi4_bvalid[i] ), + .m_axi4_buser ( m1_axi4_buser[i] ), + .m_axi4_bready ( m1_axi4_bready[i] ) + ); + + axi4_b_sender + #( + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_b_sender + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .drop_i ( b_drop[i] ), + .done_o ( b_done[i] ), + .id_i ( b_id_drop[i] ), + .prefetch_i ( b_prefetch_drop[i] ), + .hit_i ( b_hit_drop[i] ), + .s_axi4_bid ( s_axi4_bid[i] ), + .s_axi4_bresp ( s_axi4_bresp[i] ), + .s_axi4_bvalid ( s_axi4_bvalid[i] ), + .s_axi4_buser ( s_axi4_buser[i] ), + .s_axi4_bready ( s_axi4_bready[i] ), + .m_axi4_bid ( int_bid[i] ), + .m_axi4_bresp ( int_bresp[i] ), + .m_axi4_bvalid ( int_bvalid[i] ), + .m_axi4_buser ( int_buser[i] ), + .m_axi4_bready ( int_bready[i] ) + ); + + /* + * Multiplexer to switch between the two output master ports on the write response (b) channel + */ + always_comb begin + /* Output 1 always gets priority, so if it has something to send connect + it and let output 0 wait using rready = 0 */ + if (int_m1_bvalid[i] == 1'b1) begin + int_m0_bready[i] = 1'b0; + int_m1_bready[i] = int_bready[i]; + + int_bid[i] = int_m1_bid[i]; + int_bresp[i] = int_m1_bresp[i]; + int_buser[i] = int_m1_buser[i]; + int_bvalid[i] = int_m1_bvalid[i]; + end else begin + int_m0_bready[i] = int_bready[i]; + int_m1_bready[i] = 1'b0; + + int_bid[i] = int_m0_bid[i]; + int_bresp[i] = int_m0_bresp[i]; + int_buser[i] = int_m0_buser[i]; + int_bvalid[i] = int_m0_bvalid[i]; + end + end + + // }}} + + // Read Address channel (ar) {{{ + /* + * read address channel (ar) + * + * ██████╗ ███████╗ █████╗ ██████╗ █████╗ ██████╗ ██████╗ ██████╗ + * ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ██╔══██╗██╔══██╗██╔══██╗██╔══██╗ + * ██████╔╝█████╗ ███████║██║ ██║ ███████║██║ ██║██║ ██║██████╔╝ + * ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ██╔══██║██║ ██║██║ ██║██╔══██╗ + * ██║ ██║███████╗██║ ██║██████╔╝ ██║ ██║██████╔╝██████╔╝██║ ██║ + * ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝ + * + */ + axi4_ar_buffer + #( + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_ar_buffer + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_arid ( s_axi4_arid[i] ), + .s_axi4_araddr ( s_axi4_araddr[i] ), + .s_axi4_arvalid ( s_axi4_arvalid[i] ), + .s_axi4_arready ( s_axi4_arready[i] ), + .s_axi4_arlen ( s_axi4_arlen[i] ), + .s_axi4_arsize ( s_axi4_arsize[i] ), + .s_axi4_arburst ( s_axi4_arburst[i] ), + .s_axi4_arlock ( s_axi4_arlock[i] ), + .s_axi4_arprot ( s_axi4_arprot[i] ), + .s_axi4_arcache ( s_axi4_arcache[i] ), + .s_axi4_aruser ( s_axi4_aruser[i] ), + .m_axi4_arid ( int_arid[i] ), + .m_axi4_araddr ( int_araddr[i] ), + .m_axi4_arvalid ( int_arvalid[i] ), + .m_axi4_arready ( int_arready[i] ), + .m_axi4_arlen ( int_arlen[i] ), + .m_axi4_arsize ( int_arsize[i] ), + .m_axi4_arburst ( int_arburst[i] ), + .m_axi4_arlock ( int_arlock[i] ), + .m_axi4_arprot ( int_arprot[i] ), + .m_axi4_arcache ( int_arcache[i] ), + .m_axi4_aruser ( int_aruser[i] ) + ); + + axi4_ar_sender + #( + .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .ENABLE_L2TLB ( ENABLE_L2TLB[i] ) + ) + u_ar_sender_m0 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .l1_done_o ( l1_m0_ar_done[i] ), + .l1_accept_i ( l1_m0_ar_accept[i] ), + .l1_drop_i ( l1_m0_ar_drop[i] ), + .l1_save_i ( l1_m0_ar_save[i] ), + .l2_done_o ( l2_m0_ar_done[i] ), + .l2_accept_i ( l2_m0_ar_accept[i] ), + .l2_drop_i ( l2_m0_ar_drop[i] ), + .l2_sending_o ( l2_m0_ar_sending[i] ), + .l1_araddr_i ( int_rtrans_addr[i] ), + .l2_araddr_i ( l2_ar_addr[i] ), + .s_axi4_arid ( int_arid[i] ), + .s_axi4_arvalid ( int_m0_arvalid[i] ), + .s_axi4_arready ( int_m0_arready[i] ), + .s_axi4_arlen ( int_arlen[i] ), + .s_axi4_arsize ( int_arsize[i] ), + .s_axi4_arburst ( int_arburst[i] ), + .s_axi4_arlock ( int_arlock[i] ), + .s_axi4_arprot ( int_arprot[i] ), + .s_axi4_arcache ( int_arcache[i] ), + .s_axi4_aruser ( int_aruser[i] ), + .m_axi4_arid ( m0_axi4_arid[i] ), + .m_axi4_araddr ( m0_axi4_araddr[i] ), + .m_axi4_arvalid ( m0_axi4_arvalid[i] ), + .m_axi4_arready ( m0_axi4_arready[i] ), + .m_axi4_arlen ( m0_axi4_arlen[i] ), + .m_axi4_arsize ( m0_axi4_arsize[i] ), + .m_axi4_arburst ( m0_axi4_arburst[i] ), + .m_axi4_arlock ( m0_axi4_arlock[i] ), + .m_axi4_arprot ( m0_axi4_arprot[i] ), + .m_axi4_arcache ( ), + .m_axi4_aruser ( m0_axi4_aruser[i] ) + ); + + // The AXCACHE signals are set according to burstiness and cache coherence or statically + // when not connected to ACP on Zynq (implemented below). + assign m0_read_is_burst[i] = (m0_axi4_arlen[i] != {8{1'b0}}) && (m0_axi4_arburst[i] != 2'b00); + `ifndef EN_ACP + always_comb begin + if ( (l2_m0_ar_sending[i] & l2_cache_coherent[i]) | int_rtrans_cache_coherent[i]) begin + if (m0_read_is_burst[i]) begin + m0_axi4_arcache[i] = 4'b1011; + end else begin + m0_axi4_arcache[i] = 4'b1111; + end + end else begin + m0_axi4_arcache[i] = 4'b0011; + end + end + `else + assign m0_axi4_arcache[i] = 4'b0011; + `endif + + axi4_ar_sender + #( + .AXI_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .ENABLE_L2TLB ( ENABLE_L2TLB[i] ) + ) + u_ar_sender_m1 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .l1_done_o ( l1_m1_ar_done[i] ), + .l1_accept_i ( l1_m1_ar_accept[i] ), + .l1_drop_i ( l1_m1_ar_drop[i] ), + .l1_save_i ( l1_m1_ar_save[i] ), + .l2_done_o ( l2_m1_ar_done[i] ), + .l2_accept_i ( l2_m1_ar_accept[i] ), + .l2_drop_i ( l2_m1_ar_drop[i] ), + .l2_sending_o ( ), // just helps to set axcache + .l1_araddr_i ( int_rtrans_addr[i] ), + .l2_araddr_i ( l2_ar_addr[i] ), + .s_axi4_arid ( int_arid[i] ), + .s_axi4_arvalid ( int_m1_arvalid[i] ), + .s_axi4_arready ( int_m1_arready[i] ), + .s_axi4_arlen ( int_arlen[i] ), + .s_axi4_arsize ( int_arsize[i] ), + .s_axi4_arburst ( int_arburst[i] ), + .s_axi4_arlock ( int_arlock[i] ), + .s_axi4_arprot ( int_arprot[i] ), + .s_axi4_arcache ( int_arcache[i] ), + .s_axi4_aruser ( int_aruser[i] ), + .m_axi4_arid ( m1_axi4_arid[i] ), + .m_axi4_araddr ( m1_axi4_araddr[i] ), + .m_axi4_arvalid ( m1_axi4_arvalid[i] ), + .m_axi4_arready ( m1_axi4_arready[i] ), + .m_axi4_arlen ( m1_axi4_arlen[i] ), + .m_axi4_arsize ( m1_axi4_arsize[i] ), + .m_axi4_arburst ( m1_axi4_arburst[i] ), + .m_axi4_arlock ( m1_axi4_arlock[i] ), + .m_axi4_arprot ( m1_axi4_arprot[i] ), + .m_axi4_arcache ( ), + .m_axi4_aruser ( m1_axi4_aruser[i] ) + ); + + // The AXCACHE signals are set according to burstiness and cache coherence or statically + // when not connected to ACP on Zynq (implemented below). + assign m1_read_is_burst[i] = (m1_axi4_arlen[i] != {8{1'b0}}) && (m1_axi4_arburst[i] != 2'b00); + `ifdef EN_ACP + always_comb begin + if (m1_read_is_burst[i]) begin + m1_axi4_arcache[i] = 4'b1011; + end else begin + m1_axi4_arcache[i] = 4'b1111; + end + end + `else + assign m1_axi4_arcache[i] = 4'b0011; + `endif + + // }}} + + // Read Response channel (r) {{{ + /* + * read response channel (r) + * + * ██████╗ ███████╗ █████╗ ██████╗ ██████╗ ███████╗███████╗██████╗ + * ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ██╔══██╗██╔════╝██╔════╝██╔══██╗ + * ██████╔╝█████╗ ███████║██║ ██║ ██████╔╝█████╗ ███████╗██████╔╝ + * ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ██╔══██╗██╔══╝ ╚════██║██╔═══╝ + * ██║ ██║███████╗██║ ██║██████╔╝ ██║ ██║███████╗███████║██║ + * ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ + * + */ + axi4_r_buffer + #( + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_r_buffer_m0 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_rid ( int_m0_rid[i] ), + .s_axi4_rresp ( int_m0_rresp[i] ), + .s_axi4_rdata ( int_m0_rdata[i] ), + .s_axi4_rlast ( int_m0_rlast[i] ), + .s_axi4_rvalid ( int_m0_rvalid[i] ), + .s_axi4_ruser ( int_m0_ruser[i] ), + .s_axi4_rready ( int_m0_rready[i] ), + .m_axi4_rid ( m0_axi4_rid[i] ), + .m_axi4_rresp ( m0_axi4_rresp[i] ), + .m_axi4_rdata ( m0_axi4_rdata[i] ), + .m_axi4_rlast ( m0_axi4_rlast[i] ), + .m_axi4_rvalid ( m0_axi4_rvalid[i] ), + .m_axi4_ruser ( m0_axi4_ruser[i] ), + .m_axi4_rready ( m0_axi4_rready[i] ) + ); + + axi4_r_buffer + #( + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_r_buffer_m1 + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .s_axi4_rid ( int_m1_rid[i] ), + .s_axi4_rresp ( int_m1_rresp[i] ), + .s_axi4_rdata ( int_m1_rdata[i] ), + .s_axi4_rlast ( int_m1_rlast[i] ), + .s_axi4_rvalid ( int_m1_rvalid[i] ), + .s_axi4_ruser ( int_m1_ruser[i] ), + .s_axi4_rready ( int_m1_rready[i] ), + .m_axi4_rid ( m1_axi4_rid[i] ), + .m_axi4_rresp ( m1_axi4_rresp[i] ), + .m_axi4_rdata ( m1_axi4_rdata[i] ), + .m_axi4_rlast ( m1_axi4_rlast[i] ), + .m_axi4_rvalid ( m1_axi4_rvalid[i] ), + .m_axi4_ruser ( m1_axi4_ruser[i] ), + .m_axi4_rready ( m1_axi4_rready[i] ) + ); + + axi4_r_sender + #( + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ) + ) + u_r_sender + ( + .axi4_aclk ( Clk_CI ), + .axi4_arstn ( Rst_RBI ), + .drop_i ( lx_r_drop[i] ), + .drop_len_i ( lx_len_drop[i] ), + .done_o ( lx_r_done[i] ), + .id_i ( lx_id_drop[i] ), + .prefetch_i ( lx_prefetch_drop[i] ), + .hit_i ( lx_hit_drop[i] ), + .s_axi4_rid ( s_axi4_rid[i] ), + .s_axi4_rresp ( s_axi4_rresp[i] ), + .s_axi4_rdata ( s_axi4_rdata[i] ), + .s_axi4_rlast ( s_axi4_rlast[i] ), + .s_axi4_rvalid ( s_axi4_rvalid[i] ), + .s_axi4_ruser ( s_axi4_ruser[i] ), + .s_axi4_rready ( s_axi4_rready[i] ), + .m_axi4_rid ( int_rid[i] ), + .m_axi4_rresp ( int_rresp[i] ), + .m_axi4_rdata ( int_rdata[i] ), + .m_axi4_rlast ( int_rlast[i] ), + .m_axi4_rvalid ( int_rvalid[i] ), + .m_axi4_ruser ( int_ruser[i] ), + .m_axi4_rready ( int_rready[i] ) + ); + + /* + * Multiplexer to switch between the two output master ports on the read response(r) channel + * + * Do not perform read burst interleaving as the DMA does not support it. This means we can only + * switch between the two masters upon sending rlast or when idle. + * + * However, if the downstream already performs burst interleaving, this cannot be undone here. + * Also, the downstream may interleave a burst reponse with a single-beat transaction. In this + * case, the FSM below falls out of the burst mode. To avoid it performing burst interleaving + * after such an event, it gives priority to the master which received the last burst in case + * both have a have a burst ready (rvalid). + * + * Order of priority: + * 1. Ongoing burst transaction + * 2. Single-beat transaction on Master 1. + * 3. Single-beat transaction on Master 0. + * 4. Burst transaction on master that received the last burst. + */ + // Select signal + always_ff @(posedge Clk_CI) begin + if (Rst_RBI == 0) begin + RRespSel_SP[i] <= 1'b0; + end else begin + RRespSel_SP[i] <= RRespSel_SN[i]; + end + end + + // FSM + always_comb begin : RRespMuxFsm + RRespMuxCtrl_SN[i] = RRespMuxCtrl_SP[i]; + RRespSel_SN[i] = RRespSel_SP[i]; + + RRespBurst_S[i] = 1'b0; + RRespSelIm_S[i] = 1'b0; + + unique case (RRespMuxCtrl_SP[i]) + + IDLE: begin + // immediately forward single-beat transactions + if (int_m1_rvalid[i] && int_m1_rlast[i]) + RRespSelIm_S[i] = 1'b1; + else if (int_m0_rvalid[i] && int_m0_rlast[i]) + RRespSelIm_S[i] = 1'b0; + + // bursts - they also start immediately + else if (int_m1_rvalid[i] || int_m0_rvalid[i]) begin + RRespMuxCtrl_SN[i] = BUSY; + + // in case both are ready, continue with the master that had the last burst + if (int_m1_rvalid[i] && int_m0_rvalid[i]) begin + RRespSel_SN[i] = RRespSel_SP[i]; + RRespSelIm_S[i] = RRespSel_SP[i]; + end else if (int_m1_rvalid[i]) begin + RRespSel_SN[i] = 1'b1; + RRespSelIm_S[i] = 1'b1; + end else begin + RRespSel_SN[i] = 1'b0; + RRespSelIm_S[i] = 1'b0; + end + end + end + + BUSY: begin + RRespBurst_S[i] = 1'b1; + // detect last handshake of currently ongoing transfer + if (int_rvalid[i] && int_rready[i] && int_rlast[i]) + RRespMuxCtrl_SN[i] = IDLE; + end + + default: begin + RRespMuxCtrl_SN[i] = IDLE; + end + + endcase + end + + // FSM state + always_ff @(posedge Clk_CI) begin + if (Rst_RBI == 0) begin + RRespMuxCtrl_SP[i] <= IDLE; + end else begin + RRespMuxCtrl_SP[i] <= RRespMuxCtrl_SN[i]; + end + end + + // Actual multiplexer + always_comb begin + if ( (RRespBurst_S[i] && RRespSel_SP[i]) || (!RRespBurst_S[i] && RRespSelIm_S[i]) ) begin + int_m0_rready[i] = 1'b0; + int_m1_rready[i] = int_rready[i]; + + int_rid[i] = int_m1_rid[i]; + int_rresp[i] = int_m1_rresp[i]; + int_rdata[i] = int_m1_rdata[i]; + int_rlast[i] = int_m1_rlast[i]; + int_ruser[i] = int_m1_ruser[i]; + int_rvalid[i] = int_m1_rvalid[i]; + end else begin + int_m0_rready[i] = int_rready[i]; + int_m1_rready[i] = 1'b0; + + int_rid[i] = int_m0_rid[i]; + int_rresp[i] = int_m0_rresp[i]; + int_rdata[i] = int_m0_rdata[i]; + int_rlast[i] = int_m0_rlast[i]; + int_ruser[i] = int_m0_ruser[i]; + int_rvalid[i] = int_m0_rvalid[i]; + end + end + + end // BUF & SEND + + // }}} + + endgenerate // BUF & SEND }}} + + // Log {{{ + +`ifdef RAB_AX_LOG_EN + AxiBramLogger + #( + .AXI_ID_BITW ( AXI_ID_WIDTH ), + .AXI_ADDR_BITW ( AXI_S_ADDR_WIDTH ), + .NUM_LOG_ENTRIES ( `RAB_AX_LOG_ENTRIES ) + ) + u_aw_logger + ( + .Clk_CI ( NonGatedClk_CI ), + .TimestampClk_CI ( Clk_CI ), + .Rst_RBI ( Rst_RBI ), + .AxiValid_SI ( s_axi4_awvalid[1] ), + .AxiReady_SI ( s_axi4_awready[1] ), + .AxiId_DI ( s_axi4_awid[1] ), + .AxiAddr_DI ( s_axi4_awaddr[1] ), + .AxiLen_DI ( s_axi4_awlen[1] ), + .Clear_SI ( AwLogClr_SI ), + .LogEn_SI ( LogEn_SI ), + .Full_SO ( int_aw_log_full ), + .Ready_SO ( AwLogRdy_SO ), + .Bram_PS ( AwBram_PS ) + ); + + AxiBramLogger + #( + .AXI_ID_BITW ( AXI_ID_WIDTH ), + .AXI_ADDR_BITW ( AXI_S_ADDR_WIDTH ), + .NUM_LOG_ENTRIES ( `RAB_AX_LOG_ENTRIES ) + ) + u_ar_logger + ( + .Clk_CI ( NonGatedClk_CI ), + .TimestampClk_CI ( Clk_CI ), + .Rst_RBI ( Rst_RBI ), + .AxiValid_SI ( s_axi4_arvalid[1] ), + .AxiReady_SI ( s_axi4_arready[1] ), + .AxiId_DI ( s_axi4_arid[1] ), + .AxiAddr_DI ( s_axi4_araddr[1] ), + .AxiLen_DI ( s_axi4_arlen[1] ), + .Clear_SI ( ArLogClr_SI ), + .LogEn_SI ( LogEn_SI ), + .Full_SO ( int_ar_log_full ), + .Ready_SO ( ArLogRdy_SO ), + .Bram_PS ( ArBram_PS ) + ); +`endif + + // }}} + + // RAB Core {{{ + // ██████╗ █████╗ ██████╗ ██████╗ ██████╗ ██████╗ ███████╗ + // ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔═══██╗██╔══██╗██╔════╝ + // ██████╔╝███████║██████╔╝ ██║ ██║ ██║██████╔╝█████╗ + // ██╔══██╗██╔══██║██╔══██╗ ██║ ██║ ██║██╔══██╗██╔══╝ + // ██║ ██║██║ ██║██████╔╝ ╚██████╗╚██████╔╝██║ ██║███████╗ + // ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝ + // + /* + * rab_core + * + * The rab core translates addresses. It has two ports, which can be used + * independently, however they will compete for time internally, as lookups + * are serialized. + * + * type is the read(0) or write(1) used to check the protection flags. If they + * don't match an interrupt is created on the int_prot line. + */ + + rab_core + #( + .N_PORTS ( N_PORTS ), + .N_L2_SETS ( N_L2_SETS ), + .N_L2_SET_ENTRIES ( N_L2_SET_ENTRIES ), + .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), + .AXI_S_ADDR_WIDTH ( AXI_S_ADDR_WIDTH ), + .AXI_M_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), + .AXI_LITE_DATA_WIDTH ( AXI_LITE_DATA_WIDTH ), + .AXI_LITE_ADDR_WIDTH ( AXI_LITE_ADDR_WIDTH ), + .AXI_ID_WIDTH ( AXI_ID_WIDTH ), + .AXI_USER_WIDTH ( AXI_USER_WIDTH ), + .MH_FIFO_DEPTH ( MH_FIFO_DEPTH ) + ) + u_rab_core + ( + .Clk_CI ( Clk_CI ), + .Rst_RBI ( Rst_RBI ), + + // Config IF + .s_axi_awaddr ( s_axi4lite_awaddr ), + .s_axi_awvalid ( s_axi4lite_awvalid ), + .s_axi_awready ( s_axi4lite_awready ), + .s_axi_wdata ( s_axi4lite_wdata ), + .s_axi_wstrb ( s_axi4lite_wstrb ), + .s_axi_wvalid ( s_axi4lite_wvalid ), + .s_axi_wready ( s_axi4lite_wready ), + .s_axi_bresp ( s_axi4lite_bresp ), + .s_axi_bvalid ( s_axi4lite_bvalid ), + .s_axi_bready ( s_axi4lite_bready ), + .s_axi_araddr ( s_axi4lite_araddr ), + .s_axi_arvalid ( s_axi4lite_arvalid ), + .s_axi_arready ( s_axi4lite_arready ), + .s_axi_rready ( s_axi4lite_rready ), + .s_axi_rdata ( s_axi4lite_rdata ), + .s_axi_rresp ( s_axi4lite_rresp ), + .s_axi_rvalid ( s_axi4lite_rvalid ), + + // L1 miss info outputs -> L2 TLB arbitration + .int_miss ( rab_miss ), + .int_multi ( rab_multi ), + .int_prot ( rab_prot ), + .int_prefetch ( rab_prefetch ), + .int_mhf_full ( int_mhf_full ), + + // L1 transaction info outputs -> L2 TLB arbitration + .int_axaddr_o ( L1OutAddr_D ), + .int_axid_o ( L1OutId_D ), + .int_axlen_o ( L1OutLen_D ), + .int_axuser_o ( L1OutUser_D ), + + // Write Req IF + .port1_addr ( int_awaddr ), + .port1_id ( int_awid ), + .port1_len ( int_awlen ), + .port1_size ( int_awsize ), + .port1_addr_valid ( int_awvalid & ~aw_in_stall ), // avoid the FSM accepting new AW requests + .port1_type ( {N_PORTS{1'b1}} ), + .port1_user ( int_awuser ), + .port1_sent ( int_wtrans_sent ), // signal done to L1 FSM + .port1_out_addr ( int_wtrans_addr ), + .port1_cache_coherent ( int_wtrans_cache_coherent ), + .port1_accept ( int_wtrans_accept ), + .port1_drop ( int_wtrans_drop ), + .port1_miss ( int_wtrans_miss ), + + // Read Req IF + .port2_addr ( int_araddr ), + .port2_id ( int_arid ), + .port2_len ( int_arlen ), + .port2_size ( int_arsize ), + .port2_addr_valid ( int_arvalid ), + .port2_type ( {N_PORTS{1'b0}} ), + .port2_user ( int_aruser ), + .port2_sent ( int_rtrans_sent ), // signal done to L1 FSM + .port2_out_addr ( int_rtrans_addr ), + .port2_cache_coherent ( int_rtrans_cache_coherent ), + .port2_accept ( int_rtrans_accept ), + .port2_drop ( int_rtrans_drop ), + .port2_miss ( int_rtrans_miss ), + + // L2 miss info inputs -> axi_rab_cfg + .miss_l2_i ( L2Miss_S ), + .miss_l2_addr_i ( L2OutInAddr_DP ), + .miss_l2_id_i ( L2OutId_DP ), + .miss_l2_user_i ( L2OutUser_DP ), + + // L2 config outputs + .wdata_l2_o ( L2CfgWData_D ), + .waddr_l2_o ( L2CfgWAddr_D ), + .wren_l2_o ( L2CfgWE_S ) + ); + + // }}} + + // AX SPLITS {{{ + // █████╗ ██╗ ██╗ ███████╗██████╗ ██╗ ██╗████████╗ + // ██╔══██╗╚██╗██╔╝ ██╔════╝██╔══██╗██║ ██║╚══██╔══╝ + // ███████║ ╚███╔╝ ███████╗██████╔╝██║ ██║ ██║ + // ██╔══██║ ██╔██╗ ╚════██║██╔═══╝ ██║ ██║ ██║ + // ██║ ██║██╔╝ ██╗ ███████║██║ ███████╗██║ ██║ + // ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚══════╝╚═╝ ╚═╝ + // + /** + * Multiplex the two output master ports of the Read Address and Write Address (AR/AW) channels. + * + * Use the `int_xmaster_select` signal to route the signals to either Master 0 (to memory) or + * Master 1 (to ACP). In case of an L1 miss: Route the signals to both masters. They shall be + * saved until the L2 outputs are available. + */ + generate for (i = 0; i < N_PORTS; i++) begin : AX_SPLIT + + /* + * When accepting L1 transactions, we must just do so on the selected master. Drop requests must + * be performed on any one of the two masters. Save requests must be performed by both masters. + */ + always_comb begin : AW_L1_SPLIT + + // TLB handshake + l1_m0_aw_accept[i] = 1'b0; + l1_m1_aw_accept[i] = 1'b0; + l1_m0_aw_drop[i] = 1'b0; + l1_m1_aw_drop[i] = 1'b0; + l1_m0_aw_save[i] = 1'b0; + l1_m1_aw_save[i] = 1'b0; + + l1_mx_aw_done[i] = 1'b0; + + // AXI sender input handshake + int_m0_awvalid[i] = 1'b0; + int_m1_awvalid[i] = 1'b0; + int_awready[i] = 1'b0; + + // accept on selected master only + if (l1_aw_accept[i]) begin + if (int_wmaster_select[i]) begin + l1_m1_aw_accept[i] = 1'b1; + l1_mx_aw_done[i] = l1_m1_aw_done[i]; + + int_m1_awvalid[i] = int_awvalid[i]; + int_awready[i] = int_m1_awready[i]; + + end else begin + l1_m0_aw_accept[i] = 1'b1; + l1_mx_aw_done[i] = l1_m0_aw_done[i]; + + int_m0_awvalid[i] = int_awvalid[i]; + int_awready[i] = int_m0_awready[i]; + end + + // drop on Master 0 only + end else if (l1_aw_drop[i]) begin + l1_m0_aw_drop[i] = 1'b1; + l1_mx_aw_done[i] = l1_m0_aw_done[i]; + + int_m0_awvalid[i] = int_awvalid[i]; + int_awready[i] = l1_m0_aw_done[i]; + + // save on both masters + end else if (l1_aw_save[i]) begin + // split save + l1_m0_aw_save[i] = ~l1_m0_aw_done_SP[i]; + l1_m1_aw_save[i] = ~l1_m1_aw_done_SP[i]; + + // combine done + l1_mx_aw_done[i] = l1_m0_aw_done_SP[i] & l1_m1_aw_done_SP[i]; + + int_m0_awvalid[i] = int_awvalid[i]; + int_m1_awvalid[i] = int_awvalid[i]; + int_awready[i] = l1_mx_aw_done[i]; + end + end + + // signal back to handshake splitter + assign l1_aw_done[i] = l1_mx_aw_done[i]; + + always_ff @(posedge Clk_CI) begin : L1_MX_AW_DONE_REG + if (Rst_RBI == 0) begin + l1_m0_aw_done_SP[i] <= 1'b0; + l1_m1_aw_done_SP[i] <= 1'b0; + end else if (l1_mx_aw_done[i]) begin + l1_m0_aw_done_SP[i] <= 1'b0; + l1_m1_aw_done_SP[i] <= 1'b0; + end else begin + l1_m0_aw_done_SP[i] <= l1_m0_aw_done_SP[i] | l1_m0_aw_done[i]; + l1_m1_aw_done_SP[i] <= l1_m1_aw_done_SP[i] | l1_m1_aw_done[i]; + end + end + + /* + * When accepting L2 transactions, we must drop the corresponding transaction from the other + * master to make it available again for save requests from L1_DROP_SAVE. + */ + always_comb begin : AW_L2_SPLIT + + l2_m0_aw_accept[i] = 1'b0; + l2_m1_aw_accept[i] = 1'b0; + l2_m0_aw_drop[i] = 1'b0; + l2_m1_aw_drop[i] = 1'b0; + + // de-assert request signals individually upon handshakes + if (l2_aw_accept[i]) begin + if (l2_master_select[i]) begin + l2_m1_aw_accept[i] = ~l2_m1_aw_done_SP[i]; + l2_m0_aw_drop[i] = ~l2_m0_aw_done_SP[i]; + + end else begin + l2_m0_aw_accept[i] = ~l2_m0_aw_done_SP[i]; + l2_m1_aw_drop[i] = ~l2_m1_aw_done_SP[i]; + + end + end else begin + l2_m0_aw_drop[i] = ~l2_m0_aw_done_SP[i] ? l2_aw_drop[i] : 1'b0; + l2_m1_aw_drop[i] = ~l2_m1_aw_done_SP[i] ? l2_aw_drop[i] : 1'b0; + + end + + // combine done + l2_mx_aw_done[i] = l2_m0_aw_done_SP[i] & l2_m1_aw_done_SP[i]; + + l2_aw_done[i] = l2_mx_aw_done[i]; + end + + always_ff @(posedge Clk_CI) begin : L2_MX_AW_DONE_REG + if (Rst_RBI == 0) begin + l2_m0_aw_done_SP[i] <= 1'b0; + l2_m1_aw_done_SP[i] <= 1'b0; + end else if (l2_mx_aw_done[i]) begin + l2_m0_aw_done_SP[i] <= 1'b0; + l2_m1_aw_done_SP[i] <= 1'b0; + end else begin + l2_m0_aw_done_SP[i] <= l2_m0_aw_done_SP[i] | l2_m0_aw_done[i]; + l2_m1_aw_done_SP[i] <= l2_m1_aw_done_SP[i] | l2_m1_aw_done[i]; + end + end + + /* + * When accepting L1 transactions, we must just do so on the selected master. Drop requests must + * be performed on any one of the two masters. Save requests must be performed by both masters. + */ + always_comb begin : AR_L1_SPLIT + + // TLB handshake + l1_m0_ar_accept[i] = 1'b0; + l1_m1_ar_accept[i] = 1'b0; + l1_m0_ar_drop[i] = 1'b0; + l1_m1_ar_drop[i] = 1'b0; + l1_m0_ar_save[i] = 1'b0; + l1_m1_ar_save[i] = 1'b0; + + l1_mx_ar_done[i] = 1'b0; + + // AXI sender input handshake + int_m0_arvalid[i] = 1'b0; + int_m1_arvalid[i] = 1'b0; + int_arready[i] = 1'b0; + + // accept on selected master only + if (l1_ar_accept[i]) begin + if (int_rmaster_select[i]) begin + l1_m1_ar_accept[i] = 1'b1; + l1_mx_ar_done[i] = l1_m1_ar_done[i]; + + int_m1_arvalid[i] = int_arvalid[i]; + int_arready[i] = int_m1_arready[i]; + + end else begin + l1_m0_ar_accept[i] = 1'b1; + l1_mx_ar_done[i] = l1_m0_ar_done[i]; + + int_m0_arvalid[i] = int_arvalid[i]; + int_arready[i] = int_m0_arready[i]; + end + + // drop on Master 0 only + end else if (l1_ar_drop[i]) begin + l1_m0_ar_drop[i] = 1'b1; + l1_mx_ar_done[i] = l1_m0_ar_done[i]; + + int_m0_arvalid[i] = int_arvalid[i]; + int_arready[i] = l1_m0_ar_done[i]; + + // save on both masters + end else if (l1_ar_save[i]) begin + // split save + l1_m0_ar_save[i] = ~l1_m0_ar_done_SP[i]; + l1_m1_ar_save[i] = ~l1_m1_ar_done_SP[i]; + + // combine done + l1_mx_ar_done[i] = l1_m0_ar_done_SP[i] & l1_m1_ar_done_SP[i]; + + int_m0_arvalid[i] = int_arvalid[i]; + int_m1_arvalid[i] = int_arvalid[i]; + int_arready[i] = l1_mx_ar_done[i]; + end + end + + // signal back to handshake splitter + assign l1_ar_done[i] = l1_mx_ar_done[i]; + + always_ff @(posedge Clk_CI) begin : L1_MX_AR_DONE_REG + if (Rst_RBI == 0) begin + l1_m0_ar_done_SP[i] <= 1'b0; + l1_m1_ar_done_SP[i] <= 1'b0; + end else if (l1_mx_ar_done[i]) begin + l1_m0_ar_done_SP[i] <= 1'b0; + l1_m1_ar_done_SP[i] <= 1'b0; + end else begin + l1_m0_ar_done_SP[i] <= l1_m0_ar_done_SP[i] | l1_m0_ar_done[i]; + l1_m1_ar_done_SP[i] <= l1_m1_ar_done_SP[i] | l1_m1_ar_done[i]; + end + end + + /* + * When accepting L2 transactions, we must drop the corresponding transaction from the other + * master to make it available again for save requests from L1_DROP_SAVE. + */ + always_comb begin : AR_L2_SPLIT + + l2_m0_ar_accept[i] = 1'b0; + l2_m1_ar_accept[i] = 1'b0; + l2_m0_ar_drop[i] = 1'b0; + l2_m1_ar_drop[i] = 1'b0; + + // de-assert request signals individually upon handshakes + if (l2_ar_accept[i]) begin + if (l2_master_select[i]) begin + l2_m1_ar_accept[i] = ~l2_m1_ar_done_SP[i]; + l2_m0_ar_drop[i] = ~l2_m0_ar_done_SP[i]; + + end else begin + l2_m0_ar_accept[i] = ~l2_m0_ar_done_SP[i]; + l2_m1_ar_drop[i] = ~l2_m1_ar_done_SP[i]; + + end + end else if (l2_ar_drop[i]) begin + l2_m0_ar_drop[i] = ~l2_m0_ar_done_SP[i] ? l2_ar_drop[i] : 1'b0; + l2_m1_ar_drop[i] = ~l2_m1_ar_done_SP[i] ? l2_ar_drop[i] : 1'b0; + + end + + // combine done + l2_mx_ar_done[i] = l2_m0_ar_done_SP[i] & l2_m1_ar_done_SP[i]; + + l2_ar_done[i] = l2_mx_ar_done[i]; + end + + always_ff @(posedge Clk_CI) begin : L2_MX_AR_DONE_REG + if (Rst_RBI == 0) begin + l2_m0_ar_done_SP[i] <= 1'b0; + l2_m1_ar_done_SP[i] <= 1'b0; + end else if (l2_mx_ar_done[i]) begin + l2_m0_ar_done_SP[i] <= 1'b0; + l2_m1_ar_done_SP[i] <= 1'b0; + end else begin + l2_m0_ar_done_SP[i] <= l2_m0_ar_done_SP[i] | l2_m0_ar_done[i]; + l2_m1_ar_done_SP[i] <= l2_m1_ar_done_SP[i] | l2_m1_ar_done[i]; + end + end + + end // AX_SPLIT + endgenerate // AX_SPLIT + + // }}} + + // HANDSHAKE SPLITS {{{ + // ██╗ ██╗███████╗ ███████╗██████╗ ██╗ ██╗████████╗ + // ██║ ██║██╔════╝ ██╔════╝██╔══██╗██║ ██║╚══██╔══╝ + // ███████║███████╗ ███████╗██████╔╝██║ ██║ ██║ + // ██╔══██║╚════██║ ╚════██║██╔═══╝ ██║ ██║ ██║ + // ██║ ██║███████║ ███████║██║ ███████╗██║ ██║ + // ╚═╝ ╚═╝╚══════╝ ╚══════╝╚═╝ ╚══════╝╚═╝ ╚═╝ + // + /* + * We need to perform combined handshakes with multiple AXI modules + * upon transactions drops, accepts, saves etc. from two TLBs. + */ + generate for (i = 0; i < N_PORTS; i++) begin : HANDSHAKE_SPLIT + + assign l1_xw_accept[i] = int_wtrans_accept[i] & ~aw_out_stall[i]; + assign int_wtrans_sent[i] = l1_xw_done[i]; + + assign l1_ar_accept[i] = int_rtrans_accept[i]; + assign int_rtrans_sent[i] = l1_ar_done[i]; + + /* + * L1 AW sender + W buffer handshake split + */ + // forward + assign l1_aw_accept[i] = l1_xw_accept[i] & ~l1_aw_done_SP[i]; + assign l1_w_accept[i] = l1_xw_accept[i] & ~l1_w_done_SP[i]; + + assign l1_aw_save[i] = l1_xw_save[i] & ~l1_aw_done_SP[i]; + assign l1_w_save[i] = l1_xw_save[i] & ~l1_w_done_SP[i]; + + assign l1_aw_drop[i] = l1_xw_drop[i] & ~l1_aw_done_SP[i]; + assign l1_w_drop[i] = l1_xw_drop[i] & ~l1_w_done_SP[i]; + + // backward + assign l1_xw_done[i] = l1_aw_done_SP[i] & l1_w_done_SP[i]; + + always_ff @(posedge Clk_CI) begin : L1_XW_HS_SPLIT + if (Rst_RBI == 0) begin + l1_aw_done_SP[i] <= 1'b0; + l1_w_done_SP[i] <= 1'b0; + end else if (l1_xw_done[i]) begin + l1_aw_done_SP[i] <= 1'b0; + l1_w_done_SP[i] <= 1'b0; + end else begin + l1_aw_done_SP[i] <= l1_aw_done_SP[i] | l1_aw_done[i]; + l1_w_done_SP[i] <= l1_w_done_SP[i] | l1_w_done[i]; + end + end + + if (ENABLE_L2TLB[i] == 1) begin : L2_HS_SPLIT + + /* + * L1 AR sender + R sender handshake split + * + * AR and R do not need to be strictly in sync. We thus use separate handshakes. + * But the handshake signals for the R sender are multiplexed with the those for + * the L2. However, L2_ACCEPT_DROP_SAVE has always higher priority. + */ + assign lx_r_drop[i] = l2_r_drop[i] | l1_r_drop[i]; + assign l1_r_done[i] = l2_r_drop[i] ? 1'b0 : lx_r_done[i]; + assign l2_r_done[i] = l2_r_drop[i] ? lx_r_done[i] : 1'b0; + + /* + * L2 AW sender + W buffer handshake split + */ + // forward + assign l2_aw_accept[i] = l2_xw_accept[i] & ~l2_aw_done_SP[i]; + assign l2_w_accept[i] = l2_xw_accept[i] & ~l2_w_done_SP[i]; + + assign l2_aw_drop[i] = l2_xw_drop[i] & ~l2_aw_done_SP[i]; + assign l2_w_drop[i] = l2_xw_drop[i] & ~l2_w_done_SP[i]; + + // backward + assign l2_xw_done[i] = l2_aw_done_SP[i] & l2_w_done_SP[i]; + + always_ff @(posedge Clk_CI) begin : L2_XW_HS_SPLIT + if (Rst_RBI == 0) begin + l2_aw_done_SP[i] <= 1'b0; + l2_w_done_SP[i] <= 1'b0; + end else if (l2_xw_done[i]) begin + l2_aw_done_SP[i] <= 1'b0; + l2_w_done_SP[i] <= 1'b0; + end else begin + l2_aw_done_SP[i] <= l2_aw_done_SP[i] | l2_aw_done[i]; + l2_w_done_SP[i] <= l2_w_done_SP[i] | l2_w_done[i]; + end + end + + /* + * L2 AR + R sender handshake split + */ + // forward + assign l2_ar_drop[i] = l2_xr_drop[i] & ~l2_ar_done_SP[i]; + assign l2_r_drop[i] = l2_xr_drop[i] & ~l2_r_done_SP[i]; + + // backward - make sure to always clear L2_XR_HS_SPLIT + always_comb begin + if (l2_xr_drop[i]) begin + l2_xr_done[i] = l2_ar_done_SP[i] & l2_r_done_SP[i]; + end else begin + l2_xr_done[i] = l2_ar_done_SP[i]; + end + end + + always_ff @(posedge Clk_CI) begin : L2_XR_HS_SPLIT + if (Rst_RBI == 0) begin + l2_ar_done_SP[i] <= 1'b0; + l2_r_done_SP[i] <= 1'b0; + end else if (l2_xr_done[i]) begin + l2_ar_done_SP[i] <= 1'b0; + l2_r_done_SP[i] <= 1'b0; + end else begin + l2_ar_done_SP[i] <= l2_ar_done_SP[i] | l2_ar_done[i]; + l2_r_done_SP[i] <= l2_r_done_SP[i] | l2_r_done[i]; + end + end + + end else begin // if (ENABLE_L2TLB[i] == 1) + + assign lx_r_drop[i] = l1_r_drop[i]; + assign l1_r_done[i] = lx_r_done[i]; + + assign l2_aw_accept[i] = 1'b0; + assign l2_w_accept[i] = 1'b0; + assign l2_aw_drop[i] = 1'b0; + assign l2_w_drop[i] = 1'b0; + assign l2_xw_done[i] = 1'b0; + assign l2_aw_done_SP[i] = 1'b0; + assign l2_w_done_SP[i] = 1'b0; + + assign l2_ar_accept[i] = 1'b0; + assign l2_ar_drop[i] = 1'b0; + assign l2_r_drop[i] = 1'b0; + assign l2_xr_done[i] = 1'b0; + assign l2_r_done[i] = 1'b0; + assign l2_ar_done_SP[i] = 1'b0; + assign l2_r_done_SP[i] = 1'b0; + + end // if (ENABLE_L2TLB[i] == 1) + + end // HANDSHAKE_SPLIT + endgenerate // HANDSHAKE_SPLIT + + // }}} + + // L2 TLB {{{ + // ██╗ ██████╗ ████████╗██╗ ██████╗ + // ██║ ╚════██╗ ╚══██╔══╝██║ ██╔══██╗ + // ██║ █████╔╝ ██║ ██║ ██████╔╝ + // ██║ ██╔═══╝ ██║ ██║ ██╔══██╗ + // ███████╗███████╗ ██║ ███████╗██████╔╝ + // ╚══════╝╚══════╝ ╚═╝ ╚══════╝╚═════╝ + // + /* + * l2_tlb + * + * The L2 TLB translates addresses upon misses in the L1 TLB (rab_core). + * + * It supports one ongoing translation at a time. If an L1 miss occurs while the L2 is busy, + * the L1 is stalled untill the L2 is available again. + * + */ + generate for (i = 0; i < N_PORTS; i++) begin : L2_TLB + if (ENABLE_L2TLB[i] == 1) begin : L2_TLB + + /* + * L1 output selector + */ + assign L1OutRwType_D[i] = int_wtrans_drop[i] ? 1'b1 : 1'b0; + assign L1OutProt_D[i] = rab_prot[i]; + assign L1OutMulti_D[i] = rab_multi[i]; + + /* + * L1 output control + L1_DROP_BUF, L2_IN_BUF management + * + * Forward the L1 drop request to AR/AW sender modules if + * 1. the transactions needs to be dropped (L1 multi, prot, prefetch), or + * 2. if a lookup in the L2 TLB is required (L1 miss) and the input buffer is not full. + * + * The AR/AW senders do not support more than 1 oustanding L1 miss. The push back towards + * the upstream is realized by not accepting the save request (saving the L1 transaction) + * in the senders as long as the L2 TLB is busy or has valid output. This ultimately + * blocks the L1 TLB. + * + * Together with the AW drop/save, we also perform the W drop/save as AW and W need to + * absolutely remain in order. In contrast, the R drop is performed + */ + always_comb begin : L1_DROP_SAVE + + l1_ar_drop[i] = 1'b0; + l1_ar_save[i] = 1'b0; + l1_xw_drop[i] = 1'b0; + l1_xw_save[i] = 1'b0; + + l1_id_drop[i] = L1OutId_D[i]; + l1_len_drop[i] = L1OutLen_D[i]; + l1_prefetch_drop[i] = rab_prefetch[i]; + l1_hit_drop[i] = 1'b1; // there are no drops for L1 misses + + L1DropEn_S[i] = 1'b0; + L2InEn_S[i] = 1'b0; + + if ( rab_prot[i] | rab_multi[i] | rab_prefetch[i] ) begin + // 1. Drop + l1_ar_drop[i] = int_rtrans_drop[i] & ~L1DropValid_SP[i]; + l1_xw_drop[i] = int_wtrans_drop[i] & ~L1DropValid_SP[i]; + + // Store to L1_DROP_BUF upon handshake + L1DropEn_S[i] = (l1_ar_drop[i] & l1_ar_done[i]) | + (l1_xw_drop[i] & l1_xw_done[i]); + + end else if ( rab_miss[i] ) begin + // 2. Save - Make sure L2 is really available. + l1_ar_save[i] = int_rtrans_drop[i] & ~L2Busy_S[i]; + l1_xw_save[i] = int_wtrans_drop[i] & ~L2Busy_S[i]; + + // Store to L2_IN_BUF upon handshake - triggers the L2 TLB + L2InEn_S[i] = (l1_ar_save[i] & l1_ar_done[i]) | + (l1_xw_save[i] & l1_xw_done[i]); + end + end + + /* + * L2 output control + L2_OUT_BUF management + R/B sender control + W buffer control + * + * Perform L1 R transaction drops unless the L2 output buffer holds valid data. The AXI specs + * require the B response to be sent only after consuming/discarding the corresponding data + * in the W channel. Thus, we only send L2 drop request to the W buffer here. The drop + * request to the B sender is then sent by the W buffer autonomously. + * + * L1 AW/W drop requests are managed by L1_DROP_SAVE. + */ + always_comb begin : L2_ACCEPT_DROP_SAVE + + l2_ar_addr[i] = 'b0; + l2_aw_addr[i] = 'b0; + l2_ar_accept[i] = 1'b0; + l2_xr_drop[i] = 1'b0; + l2_xw_accept[i] = 1'b0; + l2_xw_drop[i] = 1'b0; + + l1_r_drop[i] = 1'b0; + + lx_id_drop[i] = 'b0; + lx_len_drop[i] = 'b0; + lx_prefetch_drop[i] = 1'b0; + lx_hit_drop[i] = 1'b0; + + L1DropValid_SN[i] = L1DropValid_SP[i] | L1DropEn_S[i]; + L2OutValid_SN[i] = L2OutValid_SP[i]; + L2OutReady_S[i] = 1'b0; + L2OutEn_S[i] = 1'b0; + + L2Miss_S[i] = 1'b0; + int_multi[i] = 1'b0; + int_prot[i] = 1'b0; + + if (L2OutValid_SP[i] == 1'b0) begin + + // Drop L1 from R senders + if (L1DropValid_SP[i] == 1'b1) begin + + // Only perform the R sender drop here. + if (~L1DropRwType_DP[i]) begin + + l1_r_drop[i] = 1'b1; + lx_id_drop[i] = L1DropId_DP[i]; + lx_len_drop[i] = L1DropLen_DP[i]; + lx_prefetch_drop[i] = L1DropPrefetch_S[i]; + lx_hit_drop[i] = 1'b1; // there are no drops for L1 misses + + // Invalidate L1_DROP_BUF upon handshake + if ( l1_r_drop[i] & l1_r_done[i] ) begin + + L1DropValid_SN[i] = 1'b0; + int_prot[i] = L1DropProt_DP[i]; + int_multi[i] = L1DropMulti_DP[i]; + end + + end else begin + // Invalidate L1_DROP_BUF + L1DropValid_SN[i] = 1'b0; + int_prot[i] = L1DropProt_DP[i]; + int_multi[i] = L1DropMulti_DP[i]; + end + end + + end else begin // L2_OUT_BUF has valid data + + if ( L2OutHit_SP[i] & ~(L2OutPrefetch_S[i] | L2OutProt_SP[i] | L2OutMulti_SP[i]) ) begin + + l2_ar_addr[i] = L2OutAddr_DP[i]; + l2_aw_addr[i] = L2OutAddr_DP[i]; + + l2_ar_accept[i] = L2OutRwType_DP[i] ? 1'b0 : 1'b1; + l2_xw_accept[i] = L2OutRwType_DP[i] ? 1'b1 : 1'b0; + + // Invalidate L2_OUT_BUF upon handshake + L2OutValid_SN[i] = ~( (l2_ar_accept[i] & l2_ar_done[i]) | + (l2_xw_accept[i] & l2_xw_done[i]) ); + end else begin + + lx_id_drop[i] = L2OutId_DP[i]; + lx_len_drop[i] = L2OutLen_DP[i]; + lx_prefetch_drop[i] = L2OutPrefetch_S[i]; + lx_hit_drop[i] = L2OutHit_SP[i]; + + // The l2_xr_drop will also perform the handshake with the R sender + l2_xr_drop[i] = L2OutRwType_DP[i] ? 1'b0 : 1'b1; + l2_xw_drop[i] = L2OutRwType_DP[i] ? 1'b1 : 1'b0; + + // Invalidate L1_DROP_BUF upon handshake + if ( (l2_xr_drop[i] & l2_xr_done[i]) | (l2_xw_drop[i] & l2_xw_done[i]) ) begin + + L2OutValid_SN[i] = 1'b0; + L2Miss_S[i] = ~L2OutHit_SP[i]; + int_prot[i] = L2OutProt_SP[i]; + int_multi[i] = L2OutMulti_SP[i]; + end + end + end + + // Only accept new L2 output after ongoing drops have finished. + if ( (l2_xr_drop[i] == l2_xr_done[i]) & + (l2_xw_drop[i] == l2_xw_done[i]) & + (l1_r_drop[i] == l1_r_done[i] ) ) begin + // Store to L2_OUT_BUF upon handshake with L2 TLB module + if ( (L2OutValid_SP[i] == 1'b0) && (L2OutValid_S[i] == 1'b1) ) begin + L2OutValid_SN[i] = 1'b1; + L2OutReady_S[i] = 1'b1; + L2OutEn_S[i] = 1'b1; + end + end + end + + /* + * L1 drop buffer + * + * Used in case of multi, prot and prefetch hits in the L1 TLB. + */ + always_ff @(posedge Clk_CI) begin : L1_DROP_BUF + if (Rst_RBI == 0) begin + L1DropProt_DP[i] <= 1'b0; + L1DropMulti_DP[i] <= 1'b0; + L1DropRwType_DP[i] <= 1'b0; + L1DropUser_DP[i] <= 'b0; + L1DropId_DP[i] <= 'b0; + L1DropLen_DP[i] <= 'b0; + L1DropAddr_DP[i] <= 'b0; + end else if (L1DropEn_S[i] == 1'b1) begin + L1DropProt_DP[i] <= L1OutProt_D[i] ; + L1DropMulti_DP[i] <= L1OutMulti_D[i] ; + L1DropRwType_DP[i] <= L1OutRwType_D[i]; + L1DropUser_DP[i] <= L1OutUser_D[i] ; + L1DropId_DP[i] <= L1OutId_D[i] ; + L1DropLen_DP[i] <= L1OutLen_D[i] ; + L1DropAddr_DP[i] <= L1OutAddr_D[i] ; + end + end // always_ff @ (posedge Clk_CI) + + /* + * L2 input buffer + * + * Make sure there are no combinational paths between L1 TLB/inputs and L2 TLB. + */ + always_ff @(posedge Clk_CI) begin : L2_IN_BUF + if (Rst_RBI == 0) begin + L2InRwType_DP[i] <= 1'b0; + L2InUser_DP[i] <= 'b0; + L2InId_DP[i] <= 'b0; + L2InLen_DP[i] <= 'b0; + L2InAddr_DP[i] <= 'b0; + end else if (L2InEn_S[i] == 1'b1) begin + L2InRwType_DP[i] <= L1OutRwType_D[i]; + L2InUser_DP[i] <= L1OutUser_D[i] ; + L2InId_DP[i] <= L1OutId_D[i] ; + L2InLen_DP[i] <= L1OutLen_D[i] ; + L2InAddr_DP[i] <= L1OutAddr_D[i] ; + end + end // always_ff @ (posedge Clk_CI) + + l2_tlb + #( + .AXI_S_ADDR_WIDTH ( AXI_S_ADDR_WIDTH ), + .AXI_M_ADDR_WIDTH ( AXI_M_ADDR_WIDTH ), + .AXI_LITE_DATA_WIDTH ( AXI_LITE_DATA_WIDTH ), + .AXI_LITE_ADDR_WIDTH ( AXI_LITE_ADDR_WIDTH ), + .N_SETS ( `RAB_L2_N_SETS ), + .N_OFFSETS ( `RAB_L2_N_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS ), + .N_PAR_VA_RAMS ( `RAB_L2_N_PAR_VA_RAMS ), + .HIT_OFFSET_STORE_WIDTH ( log2(`RAB_L2_N_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS) ) + ) + u_l2_tlb + ( + .clk_i ( Clk_CI ), + .rst_ni ( Rst_RBI ), + + // Config inputs + .we_i ( L2CfgWE_S[i] ), + .waddr_i ( L2CfgWAddr_D[i] ), + .wdata_i ( L2CfgWData_D[i] ), + + // Request input + .start_i ( L2InEn_S[i] ), + .busy_o ( L2Busy_S[i] ), + .rw_type_i ( L2InRwType_DP[i] ), + .in_addr_i ( L2InAddr_DP[i] ), + + // Response output + .out_ready_i ( L2OutReady_S[i] ), + .out_valid_o ( L2OutValid_S[i] ), + .hit_o ( L2OutHit_SN[i] ), + .miss_o ( L2OutMiss_SN[i] ), + .prot_o ( L2OutProt_SN[i] ), + .multi_o ( L2OutMulti_SN[i] ), + .cache_coherent_o ( L2OutCC_SN[i] ), + .out_addr_o ( L2OutAddr_DN[i] ) + ); + + /* + * L2 output buffer + * + * Make sure there are no combinational paths between L1 TLB/inputs and L2 TLB. + */ + always_ff @(posedge Clk_CI) begin : L2_OUT_BUF + if (Rst_RBI == 0) begin + L2OutRwType_DP[i] <= 1'b0; + L2OutUser_DP[i] <= 'b0; + L2OutLen_DP[i] <= 'b0; + L2OutId_DP[i] <= 'b0; + L2OutInAddr_DP[i] <= 'b0; + + L2OutHit_SP[i] <= 1'b0; + L2OutMiss_SP[i] <= 1'b0; + L2OutProt_SP[i] <= 1'b0; + L2OutMulti_SP[i] <= 1'b0; + L2OutCC_SP[i] <= 1'b0; + L2OutAddr_DP[i] <= 'b0; + end else if (L2OutEn_S[i] == 1'b1) begin + L2OutRwType_DP[i] <= L2InRwType_DP[i]; + L2OutUser_DP[i] <= L2InUser_DP[i] ; + L2OutLen_DP[i] <= L2InLen_DP[i] ; + L2OutId_DP[i] <= L2InId_DP[i] ; + L2OutInAddr_DP[i] <= L2InAddr_DP[i] ; + + L2OutHit_SP[i] <= L2OutHit_SN[i] ; + L2OutMiss_SP[i] <= L2OutMiss_SN[i] ; + L2OutProt_SP[i] <= L2OutProt_SN[i] ; + L2OutMulti_SP[i] <= L2OutMulti_SN[i]; + L2OutCC_SP[i] <= L2OutCC_SN[i] ; + L2OutAddr_DP[i] <= L2OutAddr_DN[i] ; + end + end // always_ff @ (posedge Clk_CI) + + always_ff @(posedge Clk_CI) begin : BUF_VALID + if (Rst_RBI == 0) begin + L1DropValid_SP[i] = 1'b0; + L2OutValid_SP[i] = 1'b0; + end else begin + L1DropValid_SP[i] = L1DropValid_SN[i]; + L2OutValid_SP[i] = L2OutValid_SN[i]; + end + end + + always_comb begin : BUF_TO_PREFETCH + // L1 Drop Buf + if (L1DropUser_DP[i] == {AXI_USER_WIDTH{1'b1}}) + L1DropPrefetch_S[i] = 1'b1; + else + L1DropPrefetch_S[i] = 1'b0; + + // L2 Out Buf + if (L2OutUser_DP[i] == {AXI_USER_WIDTH{1'b1}}) + L2OutPrefetch_S[i] = 1'b1; + else + L2OutPrefetch_S[i] = 1'b0; + end + + assign l2_cache_coherent[i] = L2OutCC_SP[i]; + assign int_miss[i] = L2Miss_S[i]; + + end else begin : L2_TLB_STUB // if (ENABLE_L2TLB[i] == 1) + + assign l1_ar_drop[i] = int_rtrans_drop[i]; + assign l1_r_drop[i] = int_rtrans_drop[i]; + assign l1_xw_drop[i] = int_wtrans_drop[i]; + + assign l1_ar_save[i] = 1'b0; + assign l1_xw_save[i] = 1'b0; + assign l2_xw_accept[i] = 1'b0; + assign l2_xr_drop[i] = 1'b0; + assign l2_xw_drop[i] = 1'b0; + + assign l2_ar_addr[i] = 'b0; + assign l2_aw_addr[i] = 'b0; + + assign l1_id_drop[i] = int_wtrans_drop[i] ? int_awid[i] : + int_rtrans_drop[i] ? int_arid[i] : + '0; + assign l1_len_drop[i] = int_wtrans_drop[i] ? int_awlen[i] : + int_rtrans_drop[i] ? int_arlen[i] : + '0; + assign l1_prefetch_drop[i] = rab_prefetch[i]; + assign l1_hit_drop[i] = ~rab_miss[i]; + + assign lx_id_drop[i] = int_wtrans_drop[i] ? int_awid[i] : + int_rtrans_drop[i] ? int_arid[i] : + '0; + assign lx_len_drop[i] = int_wtrans_drop[i] ? int_awlen[i] : + int_rtrans_drop[i] ? int_arlen[i] : + '0; + assign lx_prefetch_drop[i] = rab_prefetch[i]; + assign lx_hit_drop[i] = ~rab_miss[i]; + + assign l2_cache_coherent[i] = 1'b0; + + assign int_miss[i] = rab_miss[i]; + assign int_prot[i] = rab_prot[i]; + assign int_multi[i] = rab_multi[i]; + + // unused signals + assign L2Miss_S[i] = 1'b0; + + assign L1OutRwType_D[i] = 1'b0; + assign L1OutProt_D[i] = 1'b0; + assign L1OutMulti_D[i] = 1'b0; + + assign L1DropRwType_DP[i] = 1'b0; + assign L1DropUser_DP[i] = 'b0; + assign L1DropId_DP[i] = 'b0; + assign L1DropLen_DP[i] = 'b0; + assign L1DropAddr_DP[i] = 'b0; + assign L1DropProt_DP[i] = 1'b0; + assign L1DropMulti_DP[i] = 1'b0; + + assign L1DropEn_S[i] = 1'b0; + assign L1DropPrefetch_S[i] = 1'b0; + assign L1DropValid_SN[i] = 1'b0; + assign L1DropValid_SP[i] = 1'b0; + + assign L2InRwType_DP[i] = 1'b0; + assign L2InUser_DP[i] = 'b0; + assign L2InId_DP[i] = 'b0; + assign L2InLen_DP[i] = 'b0; + assign L2InAddr_DP[i] = 'b0; + + assign L2InEn_S[i] = 1'b0; + + assign L2OutHit_SN[i] = 1'b0; + assign L2OutMiss_SN[i] = 1'b0; + assign L2OutProt_SN[i] = 1'b0; + assign L2OutMulti_SN[i] = 1'b0; + assign L2OutCC_SN[i] = 1'b0; + assign L2OutAddr_DN[i] = 'b0; + + assign L2OutRwType_DP[i] = 1'b0; + assign L2OutUser_DP[i] = 'b0; + assign L2OutId_DP[i] = 'b0; + assign L2OutLen_DP[i] = 'b0; + assign L2OutInAddr_DP[i] = 'b0; + assign L2OutHit_SP[i] = 1'b0; + assign L2OutMiss_SP[i] = 1'b0; + assign L2OutProt_SP[i] = 1'b0; + assign L2OutMulti_SP[i] = 1'b0; + assign L2OutCC_SP[i] = 1'b0; + assign L2OutAddr_DP[i] = 'b0; + + assign L2OutEn_S[i] = 1'b0; + assign L2OutPrefetch_S[i] = 1'b0; + assign L2Busy_S[i] = 1'b0; + assign L2OutValid_S[i] = 1'b0; + assign L2OutValid_SN[i] = 1'b0; + assign L2OutValid_SP[i] = 1'b0; + assign L2OutReady_S[i] = 1'b0; + + end // !`ifdef ENABLE_L2TLB + end // for (i = 0; i < N_PORTS; i++) + endgenerate + +// }}} +""" +# endmodule +# +# +# // vim: ts=2 sw=2 sts=2 et nosmartindent autoindent foldmethod=marker +# +# diff --git a/src/soc/unused/iommu/axi_rab/check_ram.py b/src/soc/unused/iommu/axi_rab/check_ram.py new file mode 100644 index 00000000..31bf32ea --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/check_ram.py @@ -0,0 +1,240 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class check_ram(Elaboratable): + + def __init__(self): + self.clk_i = Signal() # input + self.rst_ni = Signal() # input + self.in_addr = Signal(ADDR_WIDTH) # input + self.rw_type = Signal() # input + self.ram_we = Signal() # input + self.port0_addr = Signal(1+ERROR p_expression_25) # input + self.port1_addr = Signal(1+ERROR p_expression_25) # input + self.ram_wdata = Signal(RAM_DATA_WIDTH) # input + self.output_sent = Signal() # input + self.output_valid = Signal() # input + self.offset_addr_d = Signal(OFFSET_WIDTH) # input + self.hit_addr = Signal(1+ERROR p_expression_25) # output + self.master = Signal() # output + self.hit = Signal() # output + self.multi_hit = Signal() # output + self.prot = Signal() # output + + def elaborate(self, platform=None): + m = Module() + return m + + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# //import CfMath::log2; +# +# //`define MULTI_HIT_FULL_SET +# +# module check_ram +# //#( +# // parameter ADDR_WIDTH = 32, +# // parameter RAM_DATA_WIDTH = 32, +# // parameter PAGE_SIZE = 4096, // 4kB +# // parameter SET_WIDTH = 5, +# // parameter OFFSET_WIDTH = 4 +# // ) +# ( +# input logic clk_i, +# input logic rst_ni, +# input logic [ADDR_WIDTH-1:0] in_addr, +# input logic rw_type, // 1 => write, 0=> read +# input logic ram_we, +# input logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr, +# input logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port1_addr, +# input logic [RAM_DATA_WIDTH-1:0] ram_wdata, +# input logic output_sent, +# input logic output_valid, +# input logic [OFFSET_WIDTH-1:0] offset_addr_d, +# output logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr, +# output logic master, +# output logic hit, +# output logic multi_hit, +# output logic prot +# ); +# +""" #docstring_begin + + localparam IGNORE_LSB = log2(PAGE_SIZE); // 12 + + logic [RAM_DATA_WIDTH-1:0] port0_data_o, port1_data_o; // RAM read data outputs + logic port0_hit, port1_hit; // Ram output matches in_addr + + logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr_saved, port1_addr_saved; + + // Hit FSM Signals + typedef enum logic {SEARCH, HIT} hit_state_t; + hit_state_t hit_SP; // Hit FSM state + hit_state_t hit_SN; // Hit FSM next state + + // Multi Hit FSM signals +`ifdef MULTI_HIT_FULL_SET + typedef enum logic[1:0] {NO_HITS, ONE_HIT, MULTI_HIT} multi_state_t; + multi_state_t multi_SP; // Multi Hit FSM state + multi_state_t multi_SN; // Multi Hit FSM next state + + logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr_saved; + logic master_saved; +`endif + + //// --------------- Block RAM (Dual Port) -------------- //// + + // The outputs of the BRAMs are only valid if in the previous cycle: + // 1. the inputs were valid, and + // 2. the BRAM was not written to. + // Otherwise, the outputs must be ignored which is controlled by the output_valid signal. + // This signal is driven by the uppler level L2 TLB module. + ram_tp_no_change #( + .ADDR_WIDTH( SET_WIDTH+OFFSET_WIDTH+1 ), + .DATA_WIDTH( RAM_DATA_WIDTH ) + ) + ram_tp_no_change_0 + ( + .clk ( clk_i ), + .we ( ram_we ), + .addr0 ( port0_addr ), + .addr1 ( port1_addr ), + .d_i ( ram_wdata ), + .d0_o ( port0_data_o ), + .d1_o ( port1_data_o ) + ); + + //// Check Ram Outputs + assign port0_hit = (port0_data_o[0] == 1'b1) && (in_addr[ADDR_WIDTH-1: IGNORE_LSB] == port0_data_o[RAM_DATA_WIDTH-1:4]); + assign port1_hit = (port1_data_o[0] == 1'b1) && (in_addr[ADDR_WIDTH-1: IGNORE_LSB] == port1_data_o[RAM_DATA_WIDTH-1:4]); + //// ----------------------------------------------------- ///// + + //// ------------------- Check if Hit ------------------------ //// + // FSM + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + hit_SP <= SEARCH; + end else begin + hit_SP <= hit_SN; + end + end + + always_ff @(posedge clk_i, negedge rst_ni) begin + if (!rst_ni) begin + port0_addr_saved <= '0; + port1_addr_saved <= '0; + end else begin + port0_addr_saved <= port0_addr; + port1_addr_saved <= port1_addr; + end + end + + always_comb begin + hit_SN = hit_SP; + hit = 1'b0; + hit_addr = 0; + master = 1'b0; + unique case(hit_SP) + SEARCH : + if (output_valid) + if (port0_hit || port1_hit) begin + hit_SN = HIT; + hit = 1'b1; + hit_addr = port0_hit ? {port0_addr_saved[SET_WIDTH+OFFSET_WIDTH:OFFSET_WIDTH], offset_addr_d} : + port1_hit ? {port1_addr_saved[SET_WIDTH+OFFSET_WIDTH:OFFSET_WIDTH], offset_addr_d} : + 0; + master = port0_hit ? port0_data_o[3] : + port1_hit ? port1_data_o[3] : + 1'b0; + end + + HIT : begin +`ifdef MULTI_HIT_FULL_SET // Since the search continues after the first hit, it needs to be saved to be accessed later. + hit = 1'b1; + hit_addr = hit_addr_saved; + master = master_saved; +`endif + if (output_sent) + hit_SN = SEARCH; + end + + default : begin + hit_SN = SEARCH; + end + endcase // case (hit_SP) + end // always_comb begin + + //// ------------------------------------------- //// + + assign prot = output_valid && port0_hit ? ((~port0_data_o[2] && rw_type) || (~port0_data_o[1] && ~rw_type)) : + output_valid && port1_hit ? ((~port1_data_o[2] && rw_type) || (~port1_data_o[1] && ~rw_type)) : + 1'b0; + + //// ------------------- Multi ------------------- //// +`ifdef MULTI_HIT_FULL_SET + + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + hit_addr_saved <= 0; + master_saved <= 1'b0; + end else if (output_valid) begin + hit_addr_saved <= hit_addr; + master_saved <= master; + end + end + + // FSM + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + multi_SP <= NO_HITS; + end else begin + multi_SP <= multi_SN; + end + end + + always_comb begin + multi_SN = multi_SP; + multi_hit = 1'b0; + unique case(multi_SP) + NO_HITS : + if(output_valid && (port0_hit && port1_hit)) begin + multi_SN = MULTI_HIT; + multi_hit = 1'b1; + end else if(output_valid && (port0_hit || port1_hit)) + multi_SN = ONE_HIT; + + ONE_HIT : + if(output_valid && (port0_hit || port1_hit)) begin + multi_SN = MULTI_HIT; + multi_hit = 1'b1; + end else if (output_sent) + multi_SN = NO_HITS; + + MULTI_HIT : begin + multi_hit = 1'b1; + if (output_sent) + multi_SN = NO_HITS; + end + + endcase // case (multi_SP) + end // always_comb begin + +`else // !`ifdef MULTI_HIT_FULL_SET + assign multi_hit = output_valid && port0_hit && port1_hit; +`endif // !`ifdef MULTI_HIT_FULL_SET + //// ------------------------------------------- //// +""" +# endmodule +# +# diff --git a/src/soc/unused/iommu/axi_rab/coreconfig.py b/src/soc/unused/iommu/axi_rab/coreconfig.py new file mode 100644 index 00000000..247d0ce3 --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/coreconfig.py @@ -0,0 +1,6 @@ +class CoreConfig: + def __init__(self): + self.N_SLICES = 16 + self.N_REGS = 4*self.N_SLICES + self.ADDR_WIDTH_PHYS = 40 + self.ADDR_WIDTH_VIRT = 32 diff --git a/src/soc/unused/iommu/axi_rab/fsm.py b/src/soc/unused/iommu/axi_rab/fsm.py new file mode 100644 index 00000000..d64b1cb4 --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/fsm.py @@ -0,0 +1,243 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class fsm(Elaboratable): + + def __init__(self): + self.Clk_CI = Signal() # input + self.Rst_RBI = Signal() # input + self.port1_addr_valid_i = Signal() # input + self.port2_addr_valid_i = Signal() # input + self.port1_sent_i = Signal() # input + self.port2_sent_i = Signal() # input + self.select_i = Signal() # input + self.no_hit_i = Signal() # input + self.multi_hit_i = Signal() # input + self.no_prot_i = Signal() # input + self.prefetch_i = Signal() # input + self.out_addr_i = Signal(AXI_M_ADDR_WIDTH) # input + self.cache_coherent_i = Signal() # input + self.port1_accept_o = Signal() # output + self.port1_drop_o = Signal() # output + self.port1_miss_o = Signal() # output + self.port2_accept_o = Signal() # output + self.port2_drop_o = Signal() # output + self.port2_miss_o = Signal() # output + self.out_addr_o = Signal(AXI_M_ADDR_WIDTH) # output + self.cache_coherent_o = Signal() # output + self.miss_o = Signal() # output + self.multi_o = Signal() # output + self.prot_o = Signal() # output + self.prefetch_o = Signal() # output + self.in_addr_i = Signal(AXI_S_ADDR_WIDTH) # input + self.in_id_i = Signal(AXI_ID_WIDTH) # input + self.in_len_i = Signal(8) # input + self.in_user_i = Signal(AXI_USER_WIDTH) # input + self.in_addr_o = Signal(AXI_S_ADDR_WIDTH) # output + self.in_id_o = Signal(AXI_ID_WIDTH) # output + self.in_len_o = Signal(8) # output + self.in_user_o = Signal(AXI_USER_WIDTH) # output + + def elaborate(self, platform=None): + m = Module() + return m + + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# //`timescale 1ns / 1ps +# +# module fsm +# #( +# parameter AXI_M_ADDR_WIDTH = 40, +# parameter AXI_S_ADDR_WIDTH = 32, +# parameter AXI_ID_WIDTH = 8, +# parameter AXI_USER_WIDTH = 6 +# ) +# ( +# input logic Clk_CI, +# input logic Rst_RBI, +# +# input logic port1_addr_valid_i, +# input logic port2_addr_valid_i, +# input logic port1_sent_i, +# input logic port2_sent_i, +# input logic select_i, +# input logic no_hit_i, +# input logic multi_hit_i, +# input logic no_prot_i, +# input logic prefetch_i, +# input logic [AXI_M_ADDR_WIDTH-1:0] out_addr_i, +# input logic cache_coherent_i, +# output logic port1_accept_o, +# output logic port1_drop_o, +# output logic port1_miss_o, +# output logic port2_accept_o, +# output logic port2_drop_o, +# output logic port2_miss_o, +# output logic [AXI_M_ADDR_WIDTH-1:0] out_addr_o, +# output logic cache_coherent_o, +# output logic miss_o, +# output logic multi_o, +# output logic prot_o, +# output logic prefetch_o, +# input logic [AXI_S_ADDR_WIDTH-1:0] in_addr_i, +# input logic [AXI_ID_WIDTH-1:0] in_id_i, +# input logic [7:0] in_len_i, +# input logic [AXI_USER_WIDTH-1:0] in_user_i, +# output logic [AXI_S_ADDR_WIDTH-1:0] in_addr_o, +# output logic [AXI_ID_WIDTH-1:0] in_id_o, +# output logic [7:0] in_len_o, +# output logic [AXI_USER_WIDTH-1:0] in_user_o +# ); +# +""" #docstring_begin + + //-------------Internal Signals---------------------- + + typedef enum logic {IDLE, WAIT} state_t; + logic state_SP; // Present state + logic state_SN; // Next State + + logic port1_accept_SN; + logic port1_drop_SN; + logic port1_miss_SN; + logic port2_accept_SN; + logic port2_drop_SN; + logic port2_miss_SN; + logic miss_SN; + logic multi_SN; + logic prot_SN; + logic prefetch_SN; + logic cache_coherent_SN; + logic [AXI_M_ADDR_WIDTH-1:0] out_addr_DN; + + logic out_reg_en_S; + + //----------FSM comb------------------------------ + + always_comb begin: FSM_COMBO + state_SN = state_SP; + + port1_accept_SN = 1'b0; + port1_drop_SN = 1'b0; + port1_miss_SN = 1'b0; + port2_accept_SN = 1'b0; + port2_drop_SN = 1'b0; + port2_miss_SN = 1'b0; + miss_SN = 1'b0; + multi_SN = 1'b0; + prot_SN = 1'b0; + prefetch_SN = 1'b0; + cache_coherent_SN = 1'b0; + out_addr_DN = '0; + + out_reg_en_S = 1'b0; // by default hold register output + + unique case(state_SP) + IDLE : + if ( (port1_addr_valid_i & select_i) | (port2_addr_valid_i & ~select_i) ) begin + out_reg_en_S = 1'b1; + state_SN = WAIT; + + // Select inputs for output registers + if (port1_addr_valid_i & select_i) begin + port1_accept_SN = ~(no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i); + port1_drop_SN = (no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i); + port1_miss_SN = no_hit_i; + port2_accept_SN = 1'b0; + port2_drop_SN = 1'b0; + port2_miss_SN = 1'b0; + end else if (port2_addr_valid_i & ~select_i) begin + port1_accept_SN = 1'b0; + port1_drop_SN = 1'b0; + port1_miss_SN = 1'b0; + port2_accept_SN = ~(no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i); + port2_drop_SN = (no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i); + port2_miss_SN = no_hit_i; + end + + miss_SN = port1_miss_SN | port2_miss_SN; + multi_SN = multi_hit_i; + prot_SN = ~no_prot_i; + prefetch_SN = ~no_hit_i & prefetch_i; + + cache_coherent_SN = cache_coherent_i; + out_addr_DN = out_addr_i; + end + + WAIT : + if ( port1_sent_i | port2_sent_i ) begin + out_reg_en_S = 1'b1; // "clear" the register + state_SN = IDLE; + end + + default : begin + state_SN = IDLE; + end + endcase + end + + //----------FSM seq------------------------------- + + always_ff @(posedge Clk_CI, negedge Rst_RBI) begin: FSM_SEQ + if (Rst_RBI == 1'b0) + state_SP <= IDLE; + else + state_SP <= state_SN; + end + + //----------Output seq-------------------------- + + always_ff @(posedge Clk_CI, negedge Rst_RBI) begin: OUTPUT_SEQ + if (Rst_RBI == 1'b0) begin + port1_accept_o = 1'b0; + port1_drop_o = 1'b0; + port1_miss_o = 1'b0; + port2_accept_o = 1'b0; + port2_drop_o = 1'b0; + port2_miss_o = 1'b0; + miss_o = 1'b0; + multi_o = 1'b0; + prot_o = 1'b0; + prefetch_o = 1'b0; + cache_coherent_o = 1'b0; + out_addr_o = '0; + in_addr_o = '0; + in_id_o = '0; + in_len_o = '0; + in_user_o = '0; + end else if (out_reg_en_S == 1'b1) begin + port1_accept_o = port1_accept_SN; + port1_drop_o = port1_drop_SN; + port1_miss_o = port1_miss_SN; + port2_accept_o = port2_accept_SN; + port2_drop_o = port2_drop_SN; + port2_miss_o = port2_miss_SN; + miss_o = miss_SN; + multi_o = multi_SN; + prot_o = prot_SN; + prefetch_o = prefetch_SN; + cache_coherent_o = cache_coherent_SN; + out_addr_o = out_addr_DN; + in_addr_o = in_addr_i; + in_id_o = in_id_i; + in_len_o = in_len_i; + in_user_o = in_user_i; + end + end // block: OUTPUT_SEQ +""" +# +# endmodule +# +# diff --git a/src/soc/unused/iommu/axi_rab/l2_tlb.py b/src/soc/unused/iommu/axi_rab/l2_tlb.py new file mode 100644 index 00000000..11983f64 --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/l2_tlb.py @@ -0,0 +1,550 @@ +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable + + +class l2_tlb(Elaboratable): + + def __init__(self): + self.clk_i = Signal() # input + self.rst_ni = Signal() # input + self.we_i = Signal() # input + self.waddr_i = Signal(AXI_LITE_ADDR_WIDTH) # input + self.wdata_i = Signal(AXI_LITE_DATA_WIDTH) # input + self.start_i = Signal() # input + self.busy_o = Signal() # output + self.in_addr_i = Signal(AXI_S_ADDR_WIDTH) # input + self.rw_type_i = Signal() # input + self.out_ready_i = Signal() # input + self.out_valid_o = Signal() # output + self.hit_o = Signal() # output + self.miss_o = Signal() # output + self.prot_o = Signal() # output + self.multi_o = Signal() # output + self.cache_coherent_o = Signal() # output + self.out_addr_o = Signal(AXI_M_ADDR_WIDTH) # output + + def elaborate(self, platform=None): + m = Module() + return m + + +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# //`include "pulp_soc_defines.sv" +# +# ////import CfMath::log2; +# +# //`define MULTI_HIT_FULL_SET // Enable full multi hit detection. Always the entire set is searched. +# //`define MULTI_HIT_CUR_CYCLE // Enable partial multi hit detection. Only multi hits in the same search cycle are detected. +# +# //`ifdef MULTI_HIT_FULL_SET +# // `ifndef MULTI_HIT_CUR_CYCLE +# // `define MULTI_HIT_CUR_CYCLE +# // `endif +# //`endif +# +# module l2_tlb +# //#( +# // parameter AXI_S_ADDR_WIDTH = 32, +# // parameter AXI_M_ADDR_WIDTH = 40, +# // parameter AXI_LITE_DATA_WIDTH = 64, +# // parameter AXI_LITE_ADDR_WIDTH = 32, +# // parameter N_SETS = 32, +# // parameter N_OFFSETS = 4, //per port. There are 2 ports. +# // parameter PAGE_SIZE = 4096, // 4kB +# // parameter N_PAR_VA_RAMS = 4, +# // parameter HIT_OFFSET_STORE_WIDTH = 2 // Num of bits of VA RAM offset stored. This should not be greater than OFFSET_WIDTH +# // ) +# ( +# input logic clk_i, +# input logic rst_ni, +# +# input logic we_i, +# input logic [AXI_LITE_ADDR_WIDTH-1:0] waddr_i, +# input logic [AXI_LITE_DATA_WIDTH-1:0] wdata_i, +# +# input logic start_i, +# output logic busy_o, +# input logic [AXI_S_ADDR_WIDTH-1:0] in_addr_i, +# input logic rw_type_i, //1 => write, 0=> read +# +# input logic out_ready_i, +# output logic out_valid_o, +# output logic hit_o, +# output logic miss_o, +# output logic prot_o, +# output logic multi_o, +# output logic cache_coherent_o, +# output logic [AXI_M_ADDR_WIDTH-1:0] out_addr_o +# ); +# +""" #docstring_begin + + localparam VA_RAM_DEPTH = N_SETS * N_OFFSETS * 2; + localparam PA_RAM_DEPTH = VA_RAM_DEPTH * N_PAR_VA_RAMS; + localparam VA_RAM_ADDR_WIDTH = log2(VA_RAM_DEPTH); + localparam PA_RAM_ADDR_WIDTH = log2(PA_RAM_DEPTH); + localparam SET_WIDTH = log2(N_SETS); + localparam OFFSET_WIDTH = log2(N_OFFSETS); + localparam LL_WIDTH = log2(N_PAR_VA_RAMS); + localparam IGNORE_LSB = log2(PAGE_SIZE); + + localparam VA_RAM_DATA_WIDTH = AXI_S_ADDR_WIDTH - IGNORE_LSB + 4; + localparam PA_RAM_DATA_WIDTH = AXI_M_ADDR_WIDTH - IGNORE_LSB; + + logic [N_PAR_VA_RAMS-1:0] hit, prot, multi_hit, cache_coherent; + logic [N_PAR_VA_RAMS-1:0] ram_we; + logic last_search, last_search_next; + logic first_search, first_search_next; + logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] ram_waddr; + logic [N_PAR_VA_RAMS-1:0][SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr; + logic pa_ram_we; + logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_raddr, pa_port0_waddr; // PA RAM read, Write addr; + logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_raddr_reg_SN, pa_port0_raddr_reg_SP; // registered addresses, needed for WAIT_ON_WRITE; + logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_addr; // PA RAM addr + logic [PA_RAM_DATA_WIDTH-1:0] pa_port0_data, pa_data, pa_port0_data_reg; // PA RAM data + logic pa_ram_store_data_SN, pa_ram_store_data_SP; + logic hit_top, prot_top, multi_hit_top, first_hit_top; + logic output_sent; + int hit_block_num; + + logic searching, search_done; + logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr, port0_raddr; // VA RAM port0 addr + logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port1_addr; // VA RAM port1 addr + logic [OFFSET_WIDTH-1:0] offset_addr, offset_addr_d; + logic [OFFSET_WIDTH-1:0] offset_start_addr, offset_end_addr; + logic [SET_WIDTH-1:0] set_num; + + logic va_output_valid; + logic searching_q; + + genvar z; + + // Search FSM + typedef enum logic [1:0] {IDLE, SEARCH, DONE} search_state_t; + search_state_t search_SP; // Present state + search_state_t search_SN; // Next State + + // Output FSM + typedef enum logic [1:0] {OUT_IDLE, SEND_OUTPUT, WAIT_ON_WRITE} out_state_t; + out_state_t out_SP; // Present state + out_state_t out_SN; // Next State + + logic miss_next; + logic hit_next; + logic prot_next; + logic multi_next; + logic cache_coherent_next; + + // Generate the VA Block rams and their surrounding logic + generate + for (z = 0; z < N_PAR_VA_RAMS; z++) begin : VA_RAMS + check_ram + #( + .ADDR_WIDTH ( AXI_S_ADDR_WIDTH ), + .RAM_DATA_WIDTH ( VA_RAM_DATA_WIDTH ), + .PAGE_SIZE ( PAGE_SIZE ), + .SET_WIDTH ( SET_WIDTH ), + .OFFSET_WIDTH ( OFFSET_WIDTH ) + ) + u_check_ram + ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .in_addr ( in_addr_i ), + .rw_type ( rw_type_i ), + .ram_we ( ram_we[z] ), + .port0_addr ( port0_addr ), + .port1_addr ( port1_addr ), + .ram_wdata ( wdata_i[VA_RAM_DATA_WIDTH-1:0] ), + .output_sent ( output_sent ), + .output_valid ( va_output_valid ), + .offset_addr_d ( offset_addr_d ), + .hit_addr ( hit_addr[z] ), + .master ( cache_coherent[z] ), + .hit ( hit[z] ), + .multi_hit ( multi_hit[z] ), + .prot ( prot[z] ) + ); + end // for (z = 0; z < N_PORTS; z++) + endgenerate + + ////////////////// ---------------- Control and Address --------------- //////////////////////// + // FSM + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + search_SP <= IDLE; + end else begin + search_SP <= search_SN; + end + end + + always_comb begin : SEARCH_FSM + search_SN = search_SP; + busy_o = 1'b0; + searching = 1'b0; + search_done = 1'b0; + last_search_next = 1'b0; + first_search_next = first_search; + + unique case (search_SP) + IDLE : begin + if (start_i) begin + search_SN = SEARCH; + first_search_next = 1'b1; + end + end + + SEARCH : begin + busy_o = 1'b1; + + // detect last search cycle + if ( (first_search == 1'b0) && (offset_addr == offset_end_addr) ) + last_search_next = 1'b1; + + // pause search during VA RAM reconfigration + if (|ram_we) begin + searching = 1'b0; + end else begin + searching = 1'b1; + first_search_next = 1'b0; + end + + if (va_output_valid) begin + // stop search +`ifdef MULTI_HIT_FULL_SET + if (last_search | prot_top | multi_hit_top) begin +`else + if (last_search | prot_top | multi_hit_top | hit_top ) begin +`endif + search_SN = DONE; + search_done = 1'b1; + end + end + end + + DONE : begin + busy_o = 1'b1; + if (out_valid_o & out_ready_i) + search_SN = IDLE; + end + + default : begin + search_SN = IDLE; + end + endcase // case (prot_SP) + end // always_comb begin + + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + last_search <= 1'b0; + first_search <= 1'b0; + end else begin + last_search <= last_search_next; + first_search <= first_search_next; + end + end + + /* + * VA RAM address generation + * + * The input address and set number, and thus the offset start address, are available in the + * cycle after the start signal. The buffered offset_addr becomes available one cycle later. + * During the first search cycle, we therefore directly use offset_addr_start for the lookup. + */ + assign set_num = in_addr_i[SET_WIDTH+IGNORE_LSB -1 : IGNORE_LSB]; + + assign port0_raddr[OFFSET_WIDTH] = 1'b0; + assign port1_addr [OFFSET_WIDTH] = 1'b1; + + assign port0_raddr[OFFSET_WIDTH-1:0] = first_search ? offset_start_addr : offset_addr; + assign port1_addr [OFFSET_WIDTH-1:0] = first_search ? offset_start_addr : offset_addr; + + assign port0_raddr[SET_WIDTH+OFFSET_WIDTH : OFFSET_WIDTH+1] = set_num; + assign port1_addr [SET_WIDTH+OFFSET_WIDTH : OFFSET_WIDTH+1] = set_num; + + assign port0_addr = ram_we ? ram_waddr : port0_raddr; + + // The outputs of the BRAMs are only valid if in the previous cycle: + // 1. the inputs were valid, and + // 2. the BRAMs were not written to. + // Otherwise, the outputs must be ignored. + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + searching_q <= 1'b0; + end else begin + searching_q <= searching; + end + end + assign va_output_valid = searching_q; + + // Address offset for looking up the VA RAMs + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + offset_addr <= 0; + end else if (first_search) begin + offset_addr <= offset_start_addr + 1'b1; + end else if (searching) begin + offset_addr <= offset_addr + 1'b1; + end + end + + // Delayed address offest for looking up the PA RAM upon a hit in the VA RAMs + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + offset_addr_d <= 0; + end else if (first_search) begin + offset_addr_d <= offset_start_addr; + end else if (searching) begin + offset_addr_d <= offset_addr_d + 1'b1; + end + end + + // Store the offset addr for hit to reduce latency for next search. + generate + if (HIT_OFFSET_STORE_WIDTH > 0) begin : OFFSET_STORE +`ifndef MULTI_HIT_FULL_SET + logic [N_SETS-1:0][HIT_OFFSET_STORE_WIDTH-1:0] hit_offset_addr; // Contains offset addr for previous hit for every SET. + logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr_reg; + + assign offset_start_addr = { hit_offset_addr[set_num] , {{OFFSET_WIDTH-HIT_OFFSET_STORE_WIDTH}{1'b0}} }; + assign offset_end_addr = hit_offset_addr[set_num]-1'b1; + + // Register the hit addr + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + hit_addr_reg <= 0; + end else if (hit_top) begin + hit_addr_reg <= hit_addr[hit_block_num]; + end + end + + // Store hit addr for each set. The next search in the same set will start from the saved addr. + always_ff @(posedge clk_i) begin + if (rst_ni == 0) begin + hit_offset_addr <= 0; + end else if (hit_o) begin + hit_offset_addr[set_num][HIT_OFFSET_STORE_WIDTH-1:0] <= hit_addr_reg[OFFSET_WIDTH-1 : (OFFSET_WIDTH - HIT_OFFSET_STORE_WIDTH)]; + end + end +`else // No need to store offset if full multi hit detection is enabled because the entire SET is searched. + assign offset_start_addr = 0; + assign offset_end_addr = {OFFSET_WIDTH{1'b1}}; +`endif + end else begin // if (HIT_OFFSET_STORE_WIDTH > 0) + assign offset_start_addr = 0; + assign offset_end_addr = {OFFSET_WIDTH{1'b1}}; + end + endgenerate + + assign prot_top = |prot; + + ////////////////////////////////////////////////////////////////////////////////////// + // check for hit, multi hit + // In case of a multi hit, the hit_block_num indicates the lowest VA RAM with a hit. + // In case of a multi hit in the same VA RAM, Port 0 is given priority. + always_comb begin : HIT_CHECK + hit_top = |hit; + hit_block_num = 0; + first_hit_top = 1'b0; + multi_hit_top = 1'b0; + for (int i=N_PAR_VA_RAMS-1; i>=0; i--) begin + if (hit[i] == 1'b1) begin +`ifdef MULTI_HIT_CUR_CYCLE + if (multi_hit[i] | first_hit_top ) begin + multi_hit_top = 1'b1; + end +`endif + first_hit_top = 1'b1; + hit_block_num = i; + end + end // for (int i=0; i port1 active + // select = 0 -> port2 active + select[idx] = (curr_priority[idx] & port1_addr_valid[idx]) | ~port2_addr_valid[idx]; + + p1_burst_size[idx] = (port1_len[idx] + 1) << port1_size[idx]; + p2_burst_size[idx] = (port2_len[idx] + 1) << port2_size[idx]; + + // align min addr for max addr computation to allow for smart AXI bursts around the 4k boundary + if (port1_size[idx] == 3'b001) + p1_mask[idx] = 3'b110; + else if (port1_size[idx] == 3'b010) + p1_mask[idx] = 3'b100; + else if (port1_size[idx] == 3'b011) + p1_mask[idx] = 3'b000; + else + p1_mask[idx] = 3'b111; + + p1_align_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH] = port1_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH]; + p1_align_addr[idx][AXI_SIZE_WIDTH-1:0] = port1_addr[idx][AXI_SIZE_WIDTH-1:0] & p1_mask[idx]; + + if (port2_size[idx] == 3'b001) + p2_mask[idx] = 3'b110; + else if (port2_size[idx] == 3'b010) + p2_mask[idx] = 3'b100; + else if (port2_size[idx] == 3'b011) + p2_mask[idx] = 3'b000; + else + p2_mask[idx] = 3'b111; + + if (port1_user[idx] == {AXI_USER_WIDTH{1'b1}}) + p1_prefetch[idx] = 1'b1; + else + p1_prefetch[idx] = 1'b0; + + if (port2_user[idx] == {AXI_USER_WIDTH{1'b1}}) + p2_prefetch[idx] = 1'b1; + else + p2_prefetch[idx] = 1'b0; + + p2_align_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH] = port2_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH]; + p2_align_addr[idx][AXI_SIZE_WIDTH-1:0] = port2_addr[idx][AXI_SIZE_WIDTH-1:0] & p2_mask[idx]; + + p1_max_addr[idx] = p1_align_addr[idx] + p1_burst_size[idx] - 1; + p2_max_addr[idx] = p2_align_addr[idx] + p2_burst_size[idx] - 1; + + int_addr_min[idx] = select[idx] ? port1_addr[idx] : port2_addr[idx]; + int_addr_max[idx] = select[idx] ? p1_max_addr[idx] : p2_max_addr[idx]; + int_rw[idx] = select[idx] ? port1_type[idx] : port2_type[idx]; + int_id[idx] = select[idx] ? port1_id[idx] : port2_id[idx]; + int_len[idx] = select[idx] ? port1_len[idx] : port2_len[idx]; + int_user[idx] = select[idx] ? port1_user[idx] : port2_user[idx]; + prefetch[idx] = select[idx] ? p1_prefetch[idx] : p2_prefetch[idx]; + + hit [idx] = | hit_slices [idx]; + prot[idx] = | prot_slices[idx]; + + no_hit [idx] = ~hit [idx]; + no_prot[idx] = ~prot[idx]; + + port1_out_addr[idx] = out_addr_reg[idx]; + port2_out_addr[idx] = out_addr_reg[idx]; + + port1_cache_coherent[idx] = cache_coherent_reg[idx]; + port2_cache_coherent[idx] = cache_coherent_reg[idx]; + end + end + + always_comb + begin + var integer idx_port, idx_slice; + var integer reg_num; + reg_num=0; + for ( idx_port = 0; idx_port < N_PORTS; idx_port++ ) begin + for ( idx_slice = 0; idx_slice < 4*N_SLICES[idx_port]; idx_slice++ ) begin + int_cfg_regs_slices[idx_port][idx_slice] = int_cfg_regs[4+reg_num]; + reg_num++; + end + // int_cfg_regs_slices[idx_port][N_SLICES_MAX:N_SLICES[idx_port]] will be dangling + // Fix to zero. Synthesis will remove these signals. + // int_cfg_regs_slices[idx_port][4*N_SLICES_MAX-1:4*N_SLICES[idx_port]] = 0; + end + end + + always @(posedge Clk_CI or negedge Rst_RBI) + begin : PORT_PRIORITY + var integer idx; + if (Rst_RBI == 1'b0) + curr_priority = 'h0; + else begin + for (idx=0; idx= cfg_min) ? 1'b1 : 1'b0; + # assign min_below_max = (in_addr_min <= cfg_max) ? 1'b1 : 1'b0; + # assign max_below_max = (in_addr_max <= cfg_max) ? 1'b1 : 1'b0; + # assign out_hit = cfg_en & min_above_min & min_below_max & max_below_max; + # assign out_prot = out_hit & ((in_trans_type & ~cfg_wen) | (~in_trans_type & ~cfg_ren)); + # assign out_addr = in_addr_min - cfg_min + cfg_offset; + m.d.comb += [ + min_above_min.eq(self.in_addr_min >= self.cfg_min), + min_below_max.eq(self.in_addr_min <= self.cfg_max), + max_below_max.eq(self.in_addr_max <= self.cfg_max), + self.out_hit.eq(self.cfg_en & min_above_min & + min_below_max & max_below_max), + self.out_prot.eq(self.out_hit & ( + (self.in_trans_type & ~self.cfg_wen) | (~self.in_trans_type & ~self.cfg_ren))), + self.out_addr.eq(self.in_addr_min - self.cfg_min + self.cfg_offset) + ] + + return m diff --git a/src/soc/unused/iommu/axi_rab/ram_tp_no_change.py b/src/soc/unused/iommu/axi_rab/ram_tp_no_change.py new file mode 100644 index 00000000..d0104735 --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/ram_tp_no_change.py @@ -0,0 +1,97 @@ +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# /* +# * ram_tp_no_change +# * +# * This code implements a parameterizable two-port memory. Port 0 can read and +# * write while Port 1 can read only. The Xilinx tools will infer a BRAM with +# * Port 0 in "no change" mode, i.e., during a write, it retains the last read +# * value on the output. Port 1 (read-only) is in "write first" mode. Still, it +# * outputs the old data during the write cycle. Note: Port 1 outputs invalid +# * data in the cycle after the write when reading the same address. +# * +# * For more information, see Xilinx PG058 Block Memory Generator Product Guide. +# */ + +from nmigen import Signal, Module, Const, Cat, Elaboratable +from nmigen import Memory + +import math + +# +# module ram_tp_no_change +# #( +ADDR_WIDTH = 10 +DATA_WIDTH = 36 +# ) +# ( +# input clk, +# input we, +# input [ADDR_WIDTH-1:0] addr0, +# input [ADDR_WIDTH-1:0] addr1, +# input [DATA_WIDTH-1:0] d_i, +# output [DATA_WIDTH-1:0] d0_o, +# output [DATA_WIDTH-1:0] d1_o +# ); + + +class ram_tp_no_change(Elaboratable): + + def __init__(self): + self.we = Signal() # input + self.addr0 = Signal(ADDR_WIDTH) # input + self.addr1 = Signal(ADDR_WIDTH) # input + self.d_i = Signal(DATA_WIDTH) # input + self.d0_o = Signal(DATA_WIDTH) # output + self.d1_o = Signal(DATA_WIDTH) # output + + DEPTH = int(math.pow(2, ADDR_WIDTH)) + self.ram = Memory(width=DATA_WIDTH, depth=DEPTH) + # + # localparam DEPTH = 2**ADDR_WIDTH; + # + # (* ram_style = "block" *) reg [DATA_WIDTH-1:0] ram[DEPTH]; + # reg [DATA_WIDTH-1:0] d0; + # reg [DATA_WIDTH-1:0] d1; + # + # always_ff @(posedge clk) begin + # if(we == 1'b1) begin + # ram[addr0] <= d_i; + # end else begin + # only change data if we==false + # d0 <= ram[addr0]; + # end + # d1 <= ram[addr1]; + # end + # + # assign d0_o = d0; + # assign d1_o = d1; + # + + def elaborate(self, platform=None): + m = Module() + m.submodules.read_ram0 = read_ram0 = self.ram.read_port() + m.submodules.read_ram1 = read_ram1 = self.ram.read_port() + m.submodules.write_ram = write_ram = self.ram.write_port() + + # write port + m.d.comb += write_ram.en.eq(self.we) + m.d.comb += write_ram.addr.eq(self.addr0) + m.d.comb += write_ram.data.eq(self.d_i) + + # read ports + m.d.comb += read_ram0.addr.eq(self.addr0) + m.d.comb += read_ram1.addr.eq(self.addr1) + with m.If(self.we == 0): + m.d.sync += self.d0_o.eq(read_ram0.data) + m.d.sync += self.d1_o.eq(read_ram1.data) + + return m diff --git a/src/soc/unused/iommu/axi_rab/ram_tp_write_first.py b/src/soc/unused/iommu/axi_rab/ram_tp_write_first.py new file mode 100644 index 00000000..8fd2abb7 --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/ram_tp_write_first.py @@ -0,0 +1,93 @@ +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. +# +# /* +# * ram_tp_write_first +# * +# * This code implements a parameterizable two-port memory. Port 0 can read and +# * write while Port 1 can read only. Xilinx Vivado will infer a BRAM in +# * "write first" mode, i.e., upon a read and write to the same address, the +# * new value is read. Note: Port 1 outputs invalid data in the cycle after +# * the write when reading the same address. +# * +# * For more information, see Xilinx PG058 Block Memory Generator Product Guide. +# */ + +from nmigen import Signal, Module, Const, Cat, Elaboratable +from nmigen import Memory + +import math +# +# module ram_tp_write_first +# #( +ADDR_WIDTH = 10 +DATA_WIDTH = 36 +# ) +# ( +# input clk, +# input we, +# input [ADDR_WIDTH-1:0] addr0, +# input [ADDR_WIDTH-1:0] addr1, +# input [DATA_WIDTH-1:0] d_i, +# output [DATA_WIDTH-1:0] d0_o, +# output [DATA_WIDTH-1:0] d1_o +# ); + + +class ram_tp_write_first(Elaboratable): + + def __init__(self): + self.we = Signal() # input + self.addr0 = Signal(ADDR_WIDTH) # input + self.addr1 = Signal(ADDR_WIDTH) # input + self.d_i = Signal(DATA_WIDTH) # input + self.d0_o = Signal(DATA_WIDTH) # output + self.d1_o = Signal(DATA_WIDTH) # output + + DEPTH = int(math.pow(2, ADDR_WIDTH)) + self.ram = Memory(width=DATA_WIDTH, depth=DEPTH) + + # + # localparam DEPTH = 2**ADDR_WIDTH; + # + # (* ram_style = "block" *) reg [DATA_WIDTH-1:0] ram[DEPTH]; + # reg [ADDR_WIDTH-1:0] raddr0; + # reg [ADDR_WIDTH-1:0] raddr1; + # + # always_ff @(posedge clk) begin + # if(we == 1'b1) begin + # ram[addr0] <= d_i; + # end + # raddr0 <= addr0; + # raddr1 <= addr1; + # end + # + # assign d0_o = ram[raddr0]; + # assign d1_o = ram[raddr1]; + # + + def elaborate(self, platform=None): + m = Module() + m.submodules.read_ram0 = read_ram0 = self.ram.read_port() + m.submodules.read_ram1 = read_ram1 = self.ram.read_port() + m.submodules.write_ram = write_ram = self.ram.write_port() + + # write port + m.d.comb += write_ram.en.eq(self.we) + m.d.comb += write_ram.addr.eq(self.addr0) + m.d.comb += write_ram.data.eq(self.d_i) + + # read ports + m.d.comb += read_ram0.addr.eq(self.addr0) + m.d.comb += read_ram1.addr.eq(self.addr1) + m.d.sync += self.d0_o.eq(read_ram0.data) + m.d.sync += self.d1_o.eq(read_ram1.data) + + return m diff --git a/src/soc/unused/iommu/axi_rab/slice_top.py b/src/soc/unused/iommu/axi_rab/slice_top.py new file mode 100644 index 00000000..6eedb1cd --- /dev/null +++ b/src/soc/unused/iommu/axi_rab/slice_top.py @@ -0,0 +1,141 @@ +# // Copyright 2018 ETH Zurich and University of Bologna. +# // Copyright and related rights are licensed under the Solderpad Hardware +# // License, Version 0.51 (the "License"); you may not use this file except in +# // compliance with the License. You may obtain a copy of the License at +# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# // or agreed to in writing, software, hardware and materials distributed under +# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# // CONDITIONS OF ANY KIND, either express or implied. See the License for the +# // specific language governing permissions and limitations under the License. + +# this file has been generated by sv2nmigen + +from nmigen import Signal, Module, Const, Cat, Elaboratable +import rab_slice +import coreconfig + +# +# module slice_top +# //#( +# // parameter N_SLICES = 16, +# // parameter N_REGS = 4*N_SLICES, +# // parameter ADDR_WIDTH_PHYS = 40, +# // parameter ADDR_WIDTH_VIRT = 32 +# // ) +# ( +# input logic [N_REGS-1:0] [63:0] int_cfg_regs, +# input logic int_rw, +# input logic [ADDR_WIDTH_VIRT-1:0] int_addr_min, +# input logic [ADDR_WIDTH_VIRT-1:0] int_addr_max, +# input logic multi_hit_allow, +# output logic multi_hit, +# output logic [N_SLICES-1:0] prot, +# output logic [N_SLICES-1:0] hit, +# output logic cache_coherent, +# output logic [ADDR_WIDTH_PHYS-1:0] out_addr +# ); +# + + +class slice_top(Elaboratable): + + def __init__(self): + # FIXME self.int_cfg_regs = Signal() # input + self.params = coreconfig.CoreConfig() # rename ? + self.int_rw = Signal() # input + self.int_addr_min = Signal(self.params.ADDR_WIDTH_VIRT) # input + self.int_addr_max = Signal(self.params.ADDR_WIDTH_VIRT) # input + self.multi_hit_allow = Signal() # input + self.multi_hit = Signal() # output + self.prot = Signal(self.params.N_SLICES) # output + self.hit = Signal(self.params.N_SLICES) # output + self.cache_coherent = Signal() # output + self.out_addr = Signal(self.params.ADDR_WIDTH_PHYS) # output + + def elaborate(self, platform=None): + m = Module() + + first_hit = Signal() + + for i in range(self.params.N_SLICES): + # TODO pass params / core config here + u_slice = rab_slice.rab_slice(self.params) + setattr(m.submodules, "u_slice%d" % i, u_slice) + # TODO set param and connect ports + + # In case of a multi hit, the lowest slice with a hit is selected. + # TODO always_comb begin : HIT_CHECK + m.d.comb += [ + first_hit.eq(0), + self.multi_hit.eq(0), + self.out_addr.eq(0), + self.cache_coherent.eq(0)] + + for j in range(self.params.N_SLICES): + with m.If(self.hit[j] == 1): + with m.If(first_hit == 1): + with m.If(self.multi_hit_allow == 0): + m.d.comb += [self.multi_hit.eq(1)] + with m.Elif(first_hit == 1): + m.d.comb += [first_hit.eq(1) + # only output first slice that was hit + # SV self.out_addr.eq(slice_out_addr[ADDR_WIDTH_PHYS*j + : ADDR_WIDTH_PHYS]), + # SV self.cache_coherent.eq(int_cfg_regs[4*j+3][3]), + ] + return m + + # TODO translate generate statement + + +""" + logic [ADDR_WIDTH_PHYS*N_SLICES-1:0] slice_out_addr; + + generate + for ( i=0; i