+++ /dev/null
+++ /dev/null
-from nmigen import Module, Signal, Elaboratable
-from nmigen.lib.coding import Encoder, PriorityEncoder
-class AddressEncoder(Elaboratable):
- """Address Encoder
- The purpose of this module is to take in a vector and
- encode the bits that are one hot into an address. This module
- combines both nmigen's Encoder and PriorityEncoder and will state
- whether the input line has a single bit hot, multiple bits hot,
- or no bits hot. The output line will always have the lowest value
- address output.
- Usage:
- The output is valid when either single or multiple match is high.
- Otherwise output is 0.
- """
- def __init__(self, width):
- """ Arguments:
- * width: The desired length of the input vector
- """
- # Internal
- self.encoder = Encoder(width)
- self.p_encoder = PriorityEncoder(width)
- # Input
- self.i = Signal(width)
- # Output
- self.single_match = Signal(1)
- self.multiple_match = Signal(1)
- self.o = Signal(max=width)
- def elaborate(self, platform=None):
- m = Module()
- # Add internal submodules
- m.submodules.encoder = self.encoder
- m.submodules.p_encoder = self.p_encoder
- m.d.comb += [
- self.encoder.i.eq(self.i),
- self.p_encoder.i.eq(self.i)
- ]
- # Steps:
- # 1. check if the input vector is non-zero
- # 2. if non-zero, check if single match or multiple match
- # 3. set output line to be lowest value address output
- # If the priority encoder recieves an input of 0
- # If n is 1 then the output is not valid
- with m.If(self.p_encoder.n):
- m.d.comb += [
- self.single_match.eq(0),
- self.multiple_match.eq(0),
- self.o.eq(0)
- ]
- # If the priority encoder recieves an input > 0
- with m.Else():
- # Multiple Match if encoder n is invalid
- with m.If(self.encoder.n):
- m.d.comb += [
- self.single_match.eq(0),
- self.multiple_match.eq(1)
- ]
- # Single Match if encoder n is valid
- with m.Else():
- m.d.comb += [
- self.single_match.eq(1),
- self.multiple_match.eq(0)
- ]
- # Always set output based on priority encoder output
- m.d.comb += self.o.eq(self.p_encoder.o)
- return m
+++ /dev/null
-from nmigen import Array, Cat, Module, Signal, Elaboratable
-from nmigen.lib.coding import Decoder
-from nmigen.cli import main #, verilog
-from .CamEntry import CamEntry
-from .AddressEncoder import AddressEncoder
-class Cam(Elaboratable):
- """ Content Addressable Memory (CAM)
- The purpose of this module is to quickly look up whether an
- entry exists given a data key.
- This module will search for the given data in all internal entries
- and output whether a single or multiple match was found.
- If an single entry is found the address be returned and single_match
- is set HIGH. If multiple entries are found the lowest address is
- returned and multiple_match is set HIGH. If neither single_match or
- multiple_match are HIGH this implies no match was found. To write
- to the CAM set the address bus to the desired entry and set write_enable
- HIGH. Entry managment should be performed one level above this block
- as lookup is performed within.
- Notes:
- The read and write operations take one clock cycle to complete.
- Currently the read_warning line is present for interfacing but
- is not necessary for this design. This module is capable of writing
- in the first cycle, reading on the second, and output the correct
- address on the third.
- """
- def __init__(self, data_size, cam_size):
- """ Arguments:
- * data_size: (bits) The bit size of the data
- * cam_size: (number) The number of entries in the CAM
- """
- # Internal
- self.cam_size = cam_size
- self.encoder = AddressEncoder(cam_size)
- self.decoder = Decoder(cam_size)
- self.entry_array = Array(CamEntry(data_size) for x in range(cam_size))
- # Input
- self.enable = Signal(1)
- self.write_enable = Signal(1)
- self.data_in = Signal(data_size) # The data to be written
- self.data_mask = Signal(data_size) # mask for ternary writes
- self.address_in = Signal(max=cam_size) # address of CAM Entry to write
- # Output
- self.read_warning = Signal(1) # High when a read interrupts a write
- self.single_match = Signal(1) # High when there is only one match
- self.multiple_match = Signal(1) # High when there at least two matches
- self.match_address = Signal(max=cam_size) # The lowest address matched
- def elaborate(self, platform=None):
- m = Module()
- # AddressEncoder for match types and output address
- m.submodules.AddressEncoder = self.encoder
- # Decoder is used to select which entry will be written to
- m.submodules.Decoder = self.decoder
- # CamEntry Array Submodules
- # Note these area added anonymously
- entry_array = self.entry_array
- m.submodules += entry_array
- # Decoder logic
- m.d.comb += [
- self.decoder.i.eq(self.address_in),
- self.decoder.n.eq(0)
- ]
- encoder_vector = []
- with m.If(self.enable):
- # Set the key value for every CamEntry
- for index in range(self.cam_size):
- # Write Operation
- with m.If(self.write_enable):
- with m.If(self.decoder.o[index]):
- m.d.comb += entry_array[index].command.eq(2)
- with m.Else():
- m.d.comb += entry_array[index].command.eq(0)
- # Read Operation
- with m.Else():
- m.d.comb += entry_array[index].command.eq(1)
- # Send data input to all entries
- m.d.comb += entry_array[index].data_in.eq(self.data_in)
- # Send all entry matches to encoder
- ematch = entry_array[index].match
- encoder_vector.append(ematch)
- # Give input to and accept output from encoder module
- m.d.comb += [
- self.encoder.i.eq(Cat(*encoder_vector)),
- self.single_match.eq(self.encoder.single_match),
- self.multiple_match.eq(self.encoder.multiple_match),
- self.match_address.eq(self.encoder.o)
- ]
- # If the CAM is not enabled set all outputs to 0
- with m.Else():
- m.d.comb += [
- self.read_warning.eq(0),
- self.single_match.eq(0),
- self.multiple_match.eq(0),
- self.match_address.eq(0)
- ]
- return m
- def ports(self):
- return [self.enable, self.write_enable,
- self.data_in, self.data_mask,
- self.read_warning, self.single_match,
- self.multiple_match, self.match_address]
-if __name__ == '__main__':
- cam = Cam(4, 4)
- main(cam, ports=cam.ports())
+++ /dev/null
-from nmigen import Module, Signal, Elaboratable
-class CamEntry(Elaboratable):
- """ Content Addressable Memory (CAM) Entry
- The purpose of this module is to represent an entry within a CAM.
- This module when given a read command will compare the given data
- and output whether a match was found or not. When given a write
- command it will write the given data into internal registers.
- """
- def __init__(self, data_size):
- """ Arguments:
- * data_size: (bit count) The size of the data
- """
- # Input
- self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset
- self.data_in = Signal(data_size) # Data input when writing
- # Output
- self.match = Signal(1) # Result of the internal/input key comparison
- self.data = Signal(data_size)
- def elaborate(self, platform=None):
- m = Module()
- with m.Switch(self.command):
- with m.Case("00"):
- m.d.sync += self.match.eq(0)
- with m.Case("01"):
- with m.If(self.data == self.data_in):
- m.d.sync += self.match.eq(1)
- with m.Else():
- m.d.sync += self.match.eq(0)
- with m.Case("10"):
- m.d.sync += [
- self.data.eq(self.data_in),
- self.match.eq(0)
- ]
- with m.Case():
- m.d.sync += [
- self.match.eq(0),
- self.data.eq(0)
- ]
- return m
+++ /dev/null
-# SPDX-License-Identifier: LGPL-2.1-or-later
-# See Notices.txt for copyright information
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-from nmigen.cli import verilog, rtlil
-class LFSRPolynomial(set):
- """ implements a polynomial for use in LFSR
- """
- def __init__(self, exponents=()):
- for e in exponents:
- assert isinstance(e, int), TypeError("%s must be an int" % repr(e))
- assert (e >= 0), ValueError("%d must not be negative" % e)
- set.__init__(self, set(exponents).union({0})) # must contain zero
- @property
- def max_exponent(self):
- return max(self) # derived from set, so this returns the max exponent
- @property
- def exponents(self):
- exponents = list(self) # get elements of set as a list
- exponents.sort(reverse=True)
- return exponents
- def __str__(self):
- expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2)
- retval = map(lambda i: expd[min(i,2)].format(i), self.exponents)
- return " + ".join(retval)
- def __repr__(self):
- return "LFSRPolynomial(%s)" % self.exponents
-# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs # noqa
-LFSR_POLY_2 = LFSRPolynomial([2, 1, 0])
-LFSR_POLY_3 = LFSRPolynomial([3, 2, 0])
-LFSR_POLY_4 = LFSRPolynomial([4, 3, 0])
-LFSR_POLY_5 = LFSRPolynomial([5, 3, 0])
-LFSR_POLY_6 = LFSRPolynomial([6, 5, 0])
-LFSR_POLY_7 = LFSRPolynomial([7, 6, 0])
-LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0])
-LFSR_POLY_9 = LFSRPolynomial([9, 5, 0])
-LFSR_POLY_10 = LFSRPolynomial([10, 7, 0])
-LFSR_POLY_11 = LFSRPolynomial([11, 9, 0])
-LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0])
-LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0])
-LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0])
-LFSR_POLY_15 = LFSRPolynomial([15, 14, 0])
-LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0])
-LFSR_POLY_17 = LFSRPolynomial([17, 14, 0])
-LFSR_POLY_18 = LFSRPolynomial([18, 11, 0])
-LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0])
-LFSR_POLY_20 = LFSRPolynomial([20, 17, 0])
-LFSR_POLY_21 = LFSRPolynomial([21, 19, 0])
-LFSR_POLY_22 = LFSRPolynomial([22, 21, 0])
-LFSR_POLY_23 = LFSRPolynomial([23, 18, 0])
-LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0])
-class LFSR(LFSRPolynomial, Elaboratable):
- """ implements a Linear Feedback Shift Register
- """
- def __init__(self, polynomial):
- """ Inputs:
- ------
- :polynomial: the polynomial to feedback on. may be a LFSRPolynomial
- instance or an iterable of ints (list/tuple/generator)
- :enable: enable (set LO to disable. NOTE: defaults to HI)
- Outputs:
- -------
- :state: the LFSR state. bitwidth is taken from the polynomial
- maximum exponent.
- Note: if an LFSRPolynomial is passed in as the input, because
- LFSRPolynomial is derived from set() it's ok:
- LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p)
- """
- LFSRPolynomial.__init__(self, polynomial)
- self.state = Signal(self.max_exponent, reset=1)
- self.enable = Signal(reset=1)
- def elaborate(self, platform):
- m = Module()
- # do absolutely nothing if the polynomial is empty (always has a zero)
- if self.max_exponent <= 1:
- return m
- # create XOR-bunch, select bits from state based on exponent
- feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain)
- for exponent in self:
- if exponent > 0: # don't have to skip, saves CPU cycles though
- feedback ^= self.state[exponent - 1]
- # if enabled, shift-and-feedback
- with m.If(self.enable):
- # shift up lower bits by Cat'ing in a new bit zero (feedback)
- newstate = Cat(feedback, self.state[:-1])
- m.d.sync += self.state.eq(newstate)
- return m
-# example: Poly24
-if __name__ == '__main__':
- p24 = rtlil.convert(LFSR(LFSR_POLY_24))
- with open("lfsr2_p24.il", "w") as f:
- f.write(p24)
+++ /dev/null
-# SPDX-License-Identifier: LGPL-2.1-or-later
-# See Notices.txt for copyright information
-from nmigen import Module
-from typing import Iterable, Optional, Iterator, Any, Union
-from typing_extensions import final
-class LFSRPolynomial(set):
- def __init__(self, exponents: Iterable[int] = ()):
- def elements() -> Iterable[int]: ...
- @property
- def exponents(self) -> list[int]: ...
- def __str__(self) -> str: ...
- def __repr__(self) -> str: ...
-class LFSR:
- def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ...
- @property
- def width(self) -> int: ...
- def elaborate(self, platform: Any) -> Module: ...
+++ /dev/null
- python3 Cam.py generate -t v > Cam.v
+++ /dev/null
-from nmigen import Cat, Memory, Module, Signal, Elaboratable
-from nmigen.cli import main
-from nmigen.cli import verilog, rtlil
-class MemorySet(Elaboratable):
- def __init__(self, data_size, tag_size, set_count, active):
- self.active = active
- input_size = tag_size + data_size # Size of the input data
- memory_width = input_size + 1 # The width of the cache memory
- self.active = active
- self.data_size = data_size
- self.tag_size = tag_size
- # XXX TODO, use rd-enable and wr-enable?
- self.mem = Memory(memory_width, set_count)
- self.r = self.mem.read_port()
- self.w = self.mem.write_port()
- # inputs (address)
- self.cset = Signal(max=set_count) # The set to be checked
- self.tag = Signal(tag_size) # The tag to find
- self.data_i = Signal(data_size) # Incoming data
- # outputs
- self.valid = Signal()
- self.data_o = Signal(data_size) # Outgoing data (excludes tag)
- def elaborate(self, platform):
- m = Module()
- m.submodules.mem = self.mem
- m.submodules.r = self.r
- m.submodules.w = self.w
- # temporaries
- active_bit = Signal()
- tag_valid = Signal()
- data_start = self.active + 1
- data_end = data_start + self.data_size
- tag_start = data_end
- tag_end = tag_start + self.tag_size
- # connect the read port address to the set/entry
- read_port = self.r
- m.d.comb += read_port.addr.eq(self.cset)
- # Pull out active bit from data
- data = read_port.data
- m.d.comb += active_bit.eq(data[self.active])
- # Validate given tag vs stored tag
- tag = data[tag_start:tag_end]
- m.d.comb += tag_valid.eq(self.tag == tag)
- # An entry is only valid if the tags match AND
- # is marked as a valid entry
- m.d.comb += self.valid.eq(tag_valid & active_bit)
- # output data: TODO, check rd-enable?
- m.d.comb += self.data_o.eq(data[data_start:data_end])
- # connect the write port addr to the set/entry (only if write enabled)
- # (which is only done on a match, see SAC.write_entry below)
- write_port = self.w
- with m.If(write_port.en):
- m.d.comb += write_port.addr.eq(self.cset)
- m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag))
- return m
+++ /dev/null
-from nmigen import Module, Signal, Elaboratable
-from nmigen.cli import main
-from TLB.PteEntry import PteEntry
-class PermissionValidator(Elaboratable):
- """ The purpose of this Module is to check the Permissions of a given PTE
- against the requested access permissions.
- This module will either validate (by setting the valid bit HIGH)
- the request or find a permission fault and invalidate (by setting
- the valid bit LOW) the request
- """
- def __init__(self, asid_size, pte_size):
- """ Arguments:
- * asid_size: (bit count) The size of the asid to be processed
- * pte_size: (bit count) The size of the pte to be processed
- Return:
- * valid HIGH when permissions are correct
- """
- # Internal
- self.pte_entry = PteEntry(asid_size, pte_size)
- # Input
- self.data = Signal(asid_size + pte_size);
- self.xwr = Signal(3) # Execute, Write, Read
- self.super_mode = Signal(1) # Supervisor Mode
- self.super_access = Signal(1) # Supervisor Access
- self.asid = Signal(15) # Address Space IDentifier (ASID)
- # Output
- self.valid = Signal(1) # Denotes if the permissions are correct
- def elaborate(self, platform=None):
- m = Module()
- m.submodules.pte_entry = self.pte_entry
- m.d.comb += self.pte_entry.i.eq(self.data)
- # Check if the entry is valid
- with m.If(self.pte_entry.v):
- # ASID match or Global Permission
- # Note that the MSB bound is exclusive
- with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g):
- # Check Execute, Write, Read (XWR) Permissions
- with m.If(self.pte_entry.xwr == self.xwr):
- # Supervisor Logic
- with m.If(self.super_mode):
- # Valid if entry is not in user mode or supervisor
- # has Supervisor User Memory (SUM) access via the
- # SUM bit in the sstatus register
- m.d.comb += self.valid.eq((~self.pte_entry.u) \
- | self.super_access)
- # User logic
- with m.Else():
- # Valid if the entry is in user mode only
- m.d.comb += self.valid.eq(self.pte_entry.u)
- with m.Else():
- m.d.comb += self.valid.eq(0)
- with m.Else():
- m.d.comb += self.valid.eq(0)
- with m.Else():
- m.d.comb += self.valid.eq(0)
- return m
+++ /dev/null
-from nmigen import Module, Signal, Elaboratable
-from nmigen.cli import main
-class PteEntry(Elaboratable):
- """ The purpose of this Module is to centralize the parsing of Page
- Table Entries (PTE) into one module to prevent common mistakes
- and duplication of code. The control bits are parsed out for
- ease of use.
- This module parses according to the standard PTE given by the
- Volume II: RISC-V Privileged Architectures V1.10 Pg 60.
- The Address Space IDentifier (ASID) is appended to the MSB of the input
- and is parsed out as such.
- An valid input Signal would be:
- Bits:[78-64][63-0]
- The output PTE value will include the control bits.
- """
- def __init__(self, asid_size, pte_size):
- """ Arguments:
- * asid_size: (bit count) The size of the asid to be processed
- * pte_size: (bit count) The size of the pte to be processed
- Return:
- * d The Dirty bit from the PTE portion of i
- * a The Accessed bit from the PTE portion of i
- * g The Global bit from the PTE portion of i
- * u The User Mode bit from the PTE portion of i
- * xwr The Execute/Write/Read bit from the PTE portion of i
- * v The Valid bit from the PTE portion of i
- * asid The asid portion of i
- * pte The pte portion of i
- """
- # Internal
- self.asid_start = pte_size
- self.asid_end = pte_size + asid_size
- # Input
- self.i = Signal(asid_size + pte_size)
- # Output
- self.d = Signal(1) # Dirty bit (From pte)
- self.a = Signal(1) # Accessed bit (From pte)
- self.g = Signal(1) # Global Access (From pte)
- self.u = Signal(1) # User Mode (From pte)
- self.xwr = Signal(3) # Execute Read Write (From pte)
- self.v = Signal(1) # Valid (From pte)
- self.asid = Signal(asid_size) # Associated Address Space IDentifier
- self.pte = Signal(pte_size) # Full Page Table Entry
- def elaborate(self, platform=None):
- m = Module()
- # Pull out all control bites from PTE
- m.d.comb += [
- self.d.eq(self.i[7]),
- self.a.eq(self.i[6]),
- self.g.eq(self.i[5]),
- self.u.eq(self.i[4]),
- self.xwr.eq(self.i[1:4]),
- self.v.eq(self.i[0])
- ]
- m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end])
- m.d.comb += self.pte.eq(self.i[0:self.asid_start])
- return m
+++ /dev/null
-Online simulator of 4-way set-associative cache:
-Python simulator of a N-way set-associative cache:
-from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable
-from nmigen.compat.genlib import fsm
-from nmigen.cli import main
-from nmigen.cli import verilog, rtlil
-from .AddressEncoder import AddressEncoder
-from .MemorySet import MemorySet
-# TODO: use a LFSR that advances continuously and picking the bottom
-# few bits from it to select which cache line to replace, instead of PLRU
-# http://bugs.libre-riscv.org/show_bug.cgi?id=71
-from .ariane.plru import PLRU
-from .LFSR import LFSR, LFSR_POLY_24
-SA_NA = "00" # no action (none)
-SA_RD = "01" # read
-SA_WR = "10" # write
-class SetAssociativeCache(Elaboratable):
- """ Set Associative Cache Memory
- The purpose of this module is to generate a memory cache given the
- constraints passed in. This will create a n-way set associative cache.
- It is expected for the SV TLB that the VMA will provide the set number
- while the ASID provides the tag (still to be decided).
- """
- def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False):
- """ Arguments
- * tag_size (bits): The bit count of the tag
- * data_size (bits): The bit count of the data to be stored
- * set_count (number): The number of sets/entries in the cache
- * way_count (number): The number of slots a data can be stored
- in one set
- * lfsr: if set, use an LFSR for (pseudo-randomly) selecting
- set/entry to write to. otherwise, use a PLRU
- """
- # Internals
- self.lfsr_mode = lfsr
- self.way_count = way_count # The number of slots in one set
- self.tag_size = tag_size # The bit count of the tag
- self.data_size = data_size # The bit count of the data to be stored
- # set up Memory array
- self.mem_array = Array() # memory array
- for i in range(way_count):
- ms = MemorySet(data_size, tag_size, set_count, active=0)
- self.mem_array.append(ms)
- # Finds valid entries
- self.encoder = AddressEncoder(way_count)
- # setup PLRU or LFSR
- if lfsr:
- # LFSR mode
- self.lfsr = LFSR(LFSR_POLY_24)
- else:
- # PLRU mode
- self.plru = PLRU(way_count) # One block to handle plru calculations
- self.plru_array = Array() # PLRU data on each set
- for i in range(set_count):
- name="plru%d" % i
- self.plru_array.append(Signal(self.plru.TLBSZ, name=name))
- # Input
- self.enable = Signal(1) # Whether the cache is enabled
- self.command = Signal(2) # 00=None, 01=Read, 10=Write (see SA_XX)
- self.cset = Signal(max=set_count) # The set to be checked
- self.tag = Signal(tag_size) # The tag to find
- self.data_i = Signal(data_size) # The input data
- # Output
- self.ready = Signal(1) # 0 => Processing 1 => Ready for commands
- self.hit = Signal(1) # Tag matched one way in the given set
- self.multiple_hit = Signal(1) # Tag matched many ways in the given set
- self.data_o = Signal(data_size) # The data linked to the matched tag
- def check_tags(self, m):
- """ Validate the tags in the selected set. If one and only one
- tag matches set its state to zero and increment all others
- by one. We only advance to next state if a single hit is found.
- """
- # Vector to store way valid results
- # A zero denotes a way is invalid
- valid_vector = []
- # Loop through memory to prep read/write ports and set valid_vector
- for i in range(self.way_count):
- valid_vector.append(self.mem_array[i].valid)
- # Pass encoder the valid vector
- m.d.comb += self.encoder.i.eq(Cat(*valid_vector))
- # Only one entry should be marked
- # This is due to already verifying the tags
- # matched and the valid bit is high
- with m.If(self.hit):
- m.next = "FINISHED_READ"
- # Pull out data from the read port
- data = self.mem_array[self.encoder.o].data_o
- m.d.comb += self.data_o.eq(data)
- if not self.lfsr_mode:
- self.access_plru(m)
- # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k
- with m.Elif(self.multiple_hit):
- # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
- m.d.comb += self.data_o.eq(0)
- # No tag matches means no data
- with m.Else():
- # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
- m.d.comb += self.data_o.eq(0)
- def access_plru(self, m):
- """ An entry was accessed and the plru tree must now be updated
- """
- # Pull out the set's entry being edited
- plru_entry = self.plru_array[self.cset]
- m.d.comb += [
- # Set the plru data to the current state
- self.plru.plru_tree.eq(plru_entry),
- # Set that the cache was accessed
- self.plru.lu_access_i.eq(1)
- ]
- def read(self, m):
- """ Go through the read process of the cache.
- This takes two cycles to complete. First it checks for a valid tag
- and secondly it updates the LRU values.
- """
- with m.FSM() as fsm_read:
- with m.State("READY"):
- m.d.comb += self.ready.eq(0)
- # check_tags will set the state if the conditions are met
- self.check_tags(m)
- with m.State("FINISHED_READ"):
- m.next = "READY"
- m.d.comb += self.ready.eq(1)
- if not self.lfsr_mode:
- plru_tree_o = self.plru.plru_tree_o
- m.d.sync += self.plru_array[self.cset].eq(plru_tree_o)
- def write_entry(self, m):
- if not self.lfsr_mode:
- m.d.comb += [# set cset (mem address) into PLRU
- self.plru.plru_tree.eq(self.plru_array[self.cset]),
- # and connect plru to encoder for write
- self.encoder.i.eq(self.plru.replace_en_o)
- ]
- write_port = self.mem_array[self.encoder.o].w
- else:
- # use the LFSR to generate a random(ish) one of the mem array
- lfsr_output = Signal(max=self.way_count)
- lfsr_random = Signal(max=self.way_count)
- m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits
- # address too big, limit to range of array
- m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count,
- lfsr_output - self.way_count,
- lfsr_output))
- write_port = self.mem_array[lfsr_random].w
- # then if there is a match from the encoder, enable the selected write
- with m.If(self.encoder.single_match):
- m.d.comb += write_port.en.eq(1)
- def write(self, m):
- """ Go through the write process of the cache.
- This takes two cycles to complete. First it writes the entry,
- and secondly it updates the PLRU (in plru mode)
- """
- with m.FSM() as fsm_write:
- with m.State("READY"):
- m.d.comb += self.ready.eq(0)
- self.write_entry(m)
- m.next ="FINISHED_WRITE"
- with m.State("FINISHED_WRITE"):
- m.d.comb += self.ready.eq(1)
- if not self.lfsr_mode:
- plru_entry = self.plru_array[self.cset]
- m.d.sync += plru_entry.eq(self.plru.plru_tree_o)
- m.next = "READY"
- def elaborate(self, platform=None):
- m = Module()
- # ----
- # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array
- # ----
- m.submodules.AddressEncoder = self.encoder
- if self.lfsr_mode:
- m.submodules.LFSR = self.lfsr
- else:
- m.submodules.PLRU = self.plru
- for i, mem in enumerate(self.mem_array):
- setattr(m.submodules, "mem%d" % i, mem)
- # ----
- # select mode: PLRU connect to encoder, LFSR do... something
- # ----
- if not self.lfsr_mode:
- # Set what entry was hit
- m.d.comb += self.plru.lu_hit.eq(self.encoder.o)
- else:
- # enable LFSR
- m.d.comb += self.lfsr.enable.eq(self.enable)
- # ----
- # connect hit/multiple hit to encoder output
- # ----
- m.d.comb += [
- self.hit.eq(self.encoder.single_match),
- self.multiple_hit.eq(self.encoder.multiple_match),
- ]
- # ----
- # connect incoming data/tag/cset(addr) to mem_array
- # ----
- for mem in self.mem_array:
- write_port = mem.w
- m.d.comb += [mem.cset.eq(self.cset),
- mem.tag.eq(self.tag),
- mem.data_i.eq(self.data_i),
- write_port.en.eq(0), # default: disable write
- ]
- # ----
- # Commands: READ/WRITE/TODO
- # ----
- with m.If(self.enable):
- with m.Switch(self.command):
- # Search all sets at a particular tag
- with m.Case(SA_RD):
- self.read(m)
- with m.Case(SA_WR):
- self.write(m)
- # Maybe catch multiple tags write here?
- # TODO
- # TODO: invalidate/flush, flush-all?
- return m
- def ports(self):
- return [self.enable, self.command, self.cset, self.tag, self.data_i,
- self.ready, self.hit, self.multiple_hit, self.data_o]
-if __name__ == '__main__':
- sac = SetAssociativeCache(4, 8, 4, 6)
- vl = rtlil.convert(sac, ports=sac.ports())
- with open("SetAssociativeCache.il", "w") as f:
- f.write(vl)
- sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True)
- vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports())
- with open("SetAssociativeCacheLFSR.il", "w") as f:
- f.write(vl)
+++ /dev/null
-""" TLB Module
- The expected form of the data is:
- * Item (Bits)
- * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0)
-from nmigen import Memory, Module, Signal, Cat, Elaboratable
-from nmigen.cli import main
-from .PermissionValidator import PermissionValidator
-from .Cam import Cam
-class TLB(Elaboratable):
- def __init__(self, asid_size, vma_size, pte_size, L1_size):
- """ Arguments
- * asid_size: Address Space IDentifier (ASID) typically 15 bits
- * vma_size: Virtual Memory Address (VMA) typically 36 bits
- * pte_size: Page Table Entry (PTE) typically 64 bits
- Notes:
- These arguments should represent the largest possible size
- defined by the MODE settings. See
- Volume II: RISC-V Privileged Architectures V1.10 Page 57
- """
- # Internal
- self.state = 0
- # L1 Cache Modules
- self.cam_L1 = Cam(vma_size, L1_size)
- self.mem_L1 = Memory(asid_size + pte_size, L1_size)
- # Permission Validator
- self.perm_validator = PermissionValidator(asid_size, pte_size)
- # Inputs
- self.supermode = Signal(1) # Supervisor Mode
- self.super_access = Signal(1) # Supervisor Access
- self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2
- self.xwr = Signal(3) # Execute, Write, Read
- self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64
- self.address_L1 = Signal(max=L1_size)
- self.asid = Signal(asid_size) # Address Space IDentifier (ASID)
- self.vma = Signal(vma_size) # Virtual Memory Address (VMA)
- self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE)
- # Outputs
- self.hit = Signal(1) # Denotes if the VMA had a mapped PTE
- self.perm_valid = Signal(1) # Denotes if the permissions are correct
- self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA
- def search(self, m, read_L1, write_L1):
- """ searches the TLB
- """
- m.d.comb += [
- write_L1.en.eq(0),
- self.cam_L1.write_enable.eq(0),
- self.cam_L1.data_in.eq(self.vma)
- ]
- # Match found in L1 CAM
- match_found = Signal(reset_less=True)
- m.d.comb += match_found.eq(self.cam_L1.single_match
- | self.cam_L1.multiple_match)
- with m.If(match_found):
- # Memory shortcut variables
- mem_address = self.cam_L1.match_address
- # Memory Logic
- m.d.comb += read_L1.addr.eq(mem_address)
- # Permission Validator Logic
- m.d.comb += [
- self.hit.eq(1),
- # Set permission validator data to the correct
- # register file data according to CAM match
- # address
- self.perm_validator.data.eq(read_L1.data),
- # Execute, Read, Write
- self.perm_validator.xwr.eq(self.xwr),
- # Supervisor Mode
- self.perm_validator.super_mode.eq(self.supermode),
- # Supverisor Access
- self.perm_validator.super_access.eq(self.super_access),
- # Address Space IDentifier (ASID)
- self.perm_validator.asid.eq(self.asid),
- # Output result of permission validation
- self.perm_valid.eq(self.perm_validator.valid)
- ]
- # Only output PTE if permissions are valid
- with m.If(self.perm_validator.valid):
- # XXX TODO - dummy for now
- reg_data = Signal.like(self.pte_out)
- m.d.comb += [
- self.pte_out.eq(reg_data)
- ]
- with m.Else():
- m.d.comb += [
- self.pte_out.eq(0)
- ]
- # Miss Logic
- with m.Else():
- m.d.comb += [
- self.hit.eq(0),
- self.perm_valid.eq(0),
- self.pte_out.eq(0)
- ]
- def write_l1(self, m, read_L1, write_L1):
- """ writes to the L1 cache
- """
- # Memory_L1 Logic
- m.d.comb += [
- write_L1.en.eq(1),
- write_L1.addr.eq(self.address_L1),
- # The Cat places arguments from LSB -> MSB
- write_L1.data.eq(Cat(self.pte_in, self.asid))
- ]
- # CAM_L1 Logic
- m.d.comb += [
- self.cam_L1.write_enable.eq(1),
- self.cam_L1.data_in.eq(self.vma), #data_in is sent to all entries
- # self.cam_L1.address_in.eq(todo) # a CAM entry needs to be selected
- ]
- def elaborate(self, platform):
- m = Module()
- # Add submodules
- # Submodules for L1 Cache
- m.submodules.cam_L1 = self.cam_L1
- m.submodules.read_L1 = read_L1 = self.mem_L1.read_port()
- m.submodules.write_L1 = write_L1 = self.mem_L1.write_port()
- # Permission Validator Submodule
- m.submodules.perm_valididator = self.perm_validator
- # When MODE specifies translation
- # TODO add in different bit length handling ie prefix 0s
- tlb_enable = Signal(reset_less=True)
- m.d.comb += tlb_enable.eq(self.mode != 0)
- with m.If(tlb_enable):
- m.d.comb += [
- self.cam_L1.enable.eq(1)
- ]
- with m.Switch(self.command):
- # Search
- with m.Case("01"):
- self.search(m, read_L1, write_L1)
- # Write L1
- # Expected that the miss will be handled in software
- with m.Case("10"):
- self.write_l1(m, read_L1, write_L1)
- # TODO
- #with m.Case("11"):
- # When disabled
- with m.Else():
- m.d.comb += [
- self.cam_L1.enable.eq(0),
- # XXX TODO - self.reg_file.enable.eq(0),
- self.hit.eq(0),
- self.perm_valid.eq(0), # XXX TODO, check this
- self.pte_out.eq(0)
- ]
- return m
-if __name__ == '__main__':
- tlb = TLB(15, 36, 64, 4)
- main(tlb, ports=[ tlb.supermode, tlb.super_access, tlb.command,
- tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid,
- tlb.vma, tlb.pte_in,
- tlb.hit, tlb.perm_valid, tlb.pte_out,
- ] + tlb.cam_L1.ports())
+++ /dev/null
-#include <cstdint>
-#include <iostream>
-#include <cmath>
-#define NWAY 4
-#define NLINE 256
-#define HIT 0
-#define MISS 1
-#define MS 1000
-Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing
-Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt
-four-way set associative - three bits
- each bit represents one branch point in a binary decision tree; let 1
- represent that the left side has been referenced more recently than the
- right side, and 0 vice-versa
- are all 4 lines valid?
- / \
- yes no, use an invalid line
- |
- |
- |
- bit_0 == 0? state | replace ref to | next state
- / \ ------+-------- -------+-----------
- y n 00x | line_0 line_0 | 11_
- / \ 01x | line_1 line_1 | 10_
- bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
- / \ / \ 1x1 | line_3 line_3 | 0_0
- y n y n
- / \ / \ ('x' means ('_' means unchanged)
- line_0 line_1 line_2 line_3 don't care)
- 8-way set associative - 7 = 1+2+4 bits
-16-way set associative - 15 = 1+2+4+8 bits
-32-way set associative - 31 = 1+2+4+8+16 bits
-64-way set associative - 63 = 1+2+4+8+16+32 bits
-using namespace std;
-struct AddressField {
- uint64_t wd_idx : 2;//Unused
- uint64_t offset : 4;//Unused
- uint64_t index : 8;//NLINE = 256 = 2^8
- uint64_t tag : 50;
-union Address {
- uint32_t* p;
- AddressField fields;
-struct Cell {
- bool v;
- uint64_t tag;
- Cell() : v(false), tag(0) {}
- bool isHit(uint64_t tag) {
- return v && (tag == this->tag);
- }
- void fetch(uint32_t* address) {
- Address addr;
- addr.p = address;
- addr.fields.offset = 0;
- addr.fields.wd_idx = 0;
- tag = addr.fields.tag;
- v = true;
- }
-ostream& operator<<(ostream & out, const Cell& cell) {
- out << " v:" << cell.v << " tag:" << hex << cell.tag;
- return out;
-struct Block {
- Cell cell[NWAY];
- uint32_t state;
- uint64_t *mask;//Mask the state to get accurate value for specified 1 bit.
- uint64_t *value;
- uint64_t *next_value;
- Block() : state(0) {
- switch (NWAY) {
- case 4:
- mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101};
- value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101};
- next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000};
- break;
- case 8:
- mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001,
- 0b1010001};
- value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000,
- 0b1010001};
- next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000,
- 0b0000001, 0b0000000};
- break;
- //TODO - more NWAY goes here.
- default:
- std::cout << "Error definition NWAY = " << NWAY << std::endl;
- }
- }
- uint32_t *getByTag(uint64_t tag, uint32_t *pway) {
- for (int i = 0; i < NWAY; ++i) {
- if (cell[i].isHit(tag)) {
- *pway = i;
- return pway;
- }
- }
- return NULL;
- }
- void setLRU(uint32_t *address) {
- int way = 0;
- uint32_t st = state;
- for (int i = 0; i < NWAY; ++i) {
- if ((state & mask[i]) == value[i]) {
- state ^= mask[i];
- way = i;
- break;
- }
- }
- cell[way].fetch(address);
- cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl;
- }
- uint32_t *get(uint32_t *address, uint32_t *pway) {
- Address addr;
- addr.p = address;
- uint32_t *d = getByTag(addr.fields.tag, pway);
- if (d != NULL) {
- return &d[addr.fields.offset];
- }
- return d;
- }
- int set(uint32_t *address) {
- uint32_t way = 0;
- uint32_t *p = get(address, &way);
- if (p != NULL) {
- printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state);
- state &= ~mask[way];
- printf("%X --> ", state);
- state |= next_value[way];
- printf("%X\n", state);
- // *p = *address; //skip since address is fake.
- return HIT;
- } else {
- setLRU(address);
- return MISS;
- }
- }
-ostream& operator<<(ostream & out, const Block& block) {
- out << "state:" << block.state << " ";
- for (int i = 0; i<NWAY; i++) {
- out << block.cell[i];
- }
- return out;
-struct Cache {
- Block block[NLINE];
- uint32_t count[2];
- Cache() { count[HIT] = 0; count[MISS] = 0; }
- void access(uint32_t* address) {
- Address addr;
- addr.p = address;
- Block& b = block[addr.fields.index];
- ++count[b.set(address)];
- }
-ostream& operator<<(ostream & out, const Cache& cache) {
- out << "\n==Summary==\n\tHit: " << cache.count[HIT] << " Miss: " << cache.count[MISS] << std::endl;
- for (int i = 0; i < NLINE; i++) {
- out << cache.block[i] << endl;
- }
- return out;
-Cache cache;
-void multiply(uint32_t* m1, uint32_t* m2, uint32_t* res)
- int x, i, j;
- for (i = 0; i < MS; i++) {
- for (j = 0; j < MS; j++) {
- cache.access(res + i*MS +j);
- for (x = 0; x < MS; x++) {
- cache.access(m1 + i*MS + x);
- cache.access(m2 + x*MS + j);
- cache.access(res + i*MS +j);
- // res[i][j] += m1[i][x] * m2[x][j];
- cache.access(res + i*MS +j);
- }
- }
- }
-int main()
- uint32_t* m1 = (uint32_t*) 0xFACE00A000000000LL; // fake virtual address; don’t access it
- uint32_t* m2 = (uint32_t*) 0xFACE00B000000000LL; // fake virtual address; don’t access it
- uint32_t* res = (uint32_t*) 0xFACE00C000000000LL; // fake virtual address; don’t access it
- multiply(m1, m2, res);
- cout << cache << endl;
- return 0;
+++ /dev/null
-from nmigen import Const
-INSTR_ACCESS_FAULT = Const(1, 64)
-ILLEGAL_INSTR = Const(2, 64)
-BREAKPOINT = Const(3, 64)
-LD_ADDR_MISALIGNED = Const(4, 64)
-LD_ACCESS_FAULT = Const(5, 64)
-ST_ADDR_MISALIGNED = Const(6, 64)
-ST_ACCESS_FAULT = Const(7, 64)
-ENV_CALL_UMODE = Const(8, 64) # environment call from user mode
-ENV_CALL_SMODE = Const(9, 64) # environment call from supervisor mode
-ENV_CALL_MMODE = Const(11, 64) # environment call from machine mode
-INSTR_PAGE_FAULT = Const(12, 64) # Instruction page fault
-LOAD_PAGE_FAULT = Const(13, 64) # Load page fault
-STORE_PAGE_FAULT = Const(15, 64) # Store page fault
+++ /dev/null
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-# Author: Florian Zaruba, ETH Zurich
-# Date: 12.11.2017
-# Description: Handles cache misses.
-from nmigen.lib.coding import Encoder, PriorityEncoder
-# --------------
-# MISS Handler
-# --------------
-import ariane_pkg::*;
-import std_cache_pkg::*;
-unsigned NR_PORTS = 3
-class MissReq(RecordObject):
- def __init__(self, name=None):
- Record.__init__(self, name)
- self.valid = Signal()
- self.addr = Signal(64)
- self.be = Signal(8)
- self.size = Signal(2)
- self.we = Signal()
- self.wdata = Signal(64)
- bypass = Signal()
-class CacheLine:
- def __init__(self):
- self.tag = Signal(DCACHE_TAG_WIDTH) # tag array
- self.data = Signal(DCACHE_LINE_WIDTH) # data array
- self.valid = Signal() # state array
- self.dirty = Signal() # state array
-# cache line byte enable
-class CLBE:
- def __init__(self):
- self.tag = Signal(DCACHE_TAG_WIDTH+7)//8) # byte enable into tag array
- self.data = Signal(DCACHE_LINE_WIDTH+7)//8) # byte enable data array
- # bit enable into state array (valid for a pair of dirty/valid bits)
- self.vldrty = Signal(DCACHE_SET_ASSOC)
- } cl_be_t;
- # FSM states
- enum logic [3:0] {
- IDLE, # 0
- FLUSH, # 2
- MISS, # 7
- MISS_REPL, # 9
- INIT, # B
- } state_d, state_q;
-class MissHandler(Elaboratable):
- def __init__(self, NR_PORTS):
- self.pwid = pwid = ceil(log(NR_PORTS) / log(2))
- self.flush_i = Signal() # flush request
- self.flush_ack_o = Signal() # acknowledge successful flush
- self.miss_o = Signal()
- self.busy_i = Signal() # dcache is busy with something
- # Bypass or miss
- self.miss_req_i = Array(MissReq(name="missreq") for i in range(NR_PORTS))
- # Bypass handling
- self.bypass_gnt_o = Signal(NR_PORTS)
- self.bypass_valid_o = Signal(NR_PORTS)
- self.bypass_data_o = Array(Signal(name="bdata_o", 64) \
- for i in range(NR_PORTS))
- # AXI port
- output ariane_axi::req_t axi_bypass_o,
- input ariane_axi::resp_t axi_bypass_i,
- # Miss handling (~> cacheline refill)
- self.miss_gnt_o = Signal(NR_PORTS)
- self.active_serving_o = Signal(NR_PORTS)
- self.critical_word_o = Signal(64)
- self.critical_word_valid_o = Signal()
- output ariane_axi::req_t axi_data_o,
- input ariane_axi::resp_t axi_data_i,
- self.mshr_addr_i = Array(Signal(name="bdata_o", 56) \
- for i in range(NR_PORTS))
- self.mshr_addr_matches_o = Signal(NR_PORTS)
- self.mshr_index_matches_o = Signal(NR_PORTS)
- # AMO
- self.amo_req_i = AMOReq()
- self.amo_resp_o = AMOResp()
- # Port to SRAMs, for refill and eviction
- self.req_o = Signal(DCACHE_SET_ASSOC)
- self.addr_o = Signal(DCACHE_INDEX_WIDTH) # address into cache array
- self.data_o = CacheLine()
- self.be_o = CLBE()
- self.data_i = Array(CacheLine() \
- for i in range(DCACHE_SET_ASSOC))
- self.we_o = Signal()
- def elaborate(self, platform):
- # Registers
- mshr_t mshr_d, mshr_q;
- logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q;
- logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q;
- # cache line to evict
- cache_line_t evict_cl_d, evict_cl_q;
- logic serve_amo_d, serve_amo_q;
- # Request from one FSM
- miss_req_valid = Signal(self.NR_PORTS)
- miss_req_bypass = Signal(self.NR_PORTS)
- miss_req_addr = Array(Signal(name="miss_req_addr", 64) \
- for i in range(NR_PORTS))
- miss_req_wdata = Array(Signal(name="miss_req_wdata", 64) \
- for i in range(NR_PORTS))
- miss_req_we = Signal(self.NR_PORTS)
- miss_req_be = Array(Signal(name="miss_req_be", 8) \
- for i in range(NR_PORTS))
- miss_req_size = Array(Signal(name="miss_req_size", 2) \
- for i in range(NR_PORTS))
- # Cache Line Refill <-> AXI
- req_fsm_miss_valid = Signal()
- req_fsm_miss_addr = Signal(64)
- req_fsm_miss_wdata = Signal(DCACHE_LINE_WIDTH)
- req_fsm_miss_we = Signal()
- req_fsm_miss_be = Signal(DCACHE_LINE_WIDTH//8)
- ariane_axi::ad_req_t req_fsm_miss_req;
- req_fsm_miss_size = Signal(2)
- gnt_miss_fsm = Signal()
- valid_miss_fsm = Signal()
- nmiss = DCACHE_LINE_WIDTH//64
- data_miss_fsm = Array(Signal(name="data_miss_fsm", 64) \
- for i in range(nmiss))
- # Cache Management <-> LFSR
- lfsr_enable = Signal()
- lfsr_oh = Signal(DCACHE_SET_ASSOC)
- lfsr_bin = Signal($clog2(DCACHE_SET_ASSOC-1))
- # AMOs
- ariane_pkg::amo_t amo_op;
- amo_operand_a = Signal(64)
- amo_operand_b = Signal(64)
- amo_result_o = Signal(64)
- struct packed {
- logic [63:3] address;
- logic valid;
- } reservation_d, reservation_q;
- # ------------------------------
- # Cache Management
- # ------------------------------
- evict_way = Signal(DCACHE_SET_ASSOC)
- valid_way = Signal(DCACHE_SET_ASSOC)
- for (i in range(DCACHE_SET_ASSOC):
- comb += evict_way[i].eq(data_i[i].valid & data_i[i].dirty)
- comb += valid_way[i].eq(data_i[i].valid)
- # ----------------------
- # Default Assignments
- # ----------------------
- # to AXI refill
- req_fsm_miss_req = ariane_axi::CACHE_LINE_REQ;
- req_fsm_miss_size = Const(0b11, 2)
- # core
- serve_amo_d = serve_amo_q;
- # --------------------------------
- # Flush and Miss operation
- # --------------------------------
- state_d = state_q;
- cnt_d = cnt_q;
- evict_way_d = evict_way_q;
- evict_cl_d = evict_cl_q;
- mshr_d = mshr_q;
- # communicate to the requester which unit we are currently serving
- active_serving_o[mshr_q.id] = mshr_q.valid;
- # AMOs
- # silence the unit when not used
- amo_op = amo_req_i.amo_op;
- reservation_d = reservation_q;
- with m.FSM() as state_q:
- with m.Case("IDLE"):
- # lowest priority are AMOs, wait until everything else
- # is served before going for the AMOs
- with m.If (amo_req_i.req & ~busy_i):
- # 1. Flush the cache
- with m.If(~serve_amo_q):
- m.next = "FLUSH_REQ_STATUS"
- serve_amo_d.eq(0b1
- cnt_d.eq(0
- # 2. Do the AMO
- with m.Else():
- m.next = "AMO_LOAD"
- serve_amo_d.eq(0b0
- # check if we want to flush and can flush
- # e.g.: we are not busy anymore
- # TODO: Check that the busy flag is indeed needed
- with m.If (flush_i & ~busy_i):
- m.next = "FLUSH_REQ_STATUS"
- cnt_d = 0
- # check if one of the state machines missed
- for i in range(NR_PORTS):
- # here comes the refill portion of code
- with m.If (miss_req_valid[i] & ~miss_req_bypass[i]):
- m.next = "MISS"
- # we are taking another request so don't
- # take the AMO
- serve_amo_d = 0b0;
- # save to MSHR
- comb += [ mshr_d.valid.eq(0b1),
- mshr_d.we.eq(miss_req_we[i]),
- mshr_d.id.eq(i),
- mshr_d.addr.eq(miss_req_addr[i][0:wid]),
- mshr_d.wdata.eq(miss_req_wdata[i]),
- mshr_d.be.eq(miss_req_be[i]),
- ]
- break
- # ~> we missed on the cache
- with m.Case("MISS"):
- # 1. Check if there is an empty cache-line
- # 2. If not -> evict one
- comb += req_o.eq(1)
- sync += addr_o.eq(mshr_q.addr[:DCACHE_INDEX_WIDTH]
- m.next = "MISS_REPL"
- comb += miss_o.eq(1)
- # ~> second miss cycle
- with m.Case("MISS_REPL"):
- # if all are valid we need to evict one,
- # pseudo random from LFSR
- with m.If(~(~valid_way).bool()):
- comb += lfsr_enable.eq(0b1)
- comb += evict_way_d.eq(lfsr_oh)
- # do we need to write back the cache line?
- with m.If(data_i[lfsr_bin].dirty):
- state_d = WB_CACHELINE_MISS;
- comb += evict_cl_d.tag.eq(data_i[lfsr_bin].tag)
- comb += evict_cl_d.data.eq(data_i[lfsr_bin].data)
- comb += cnt_d.eq(mshr_q.addr[:DCACHE_INDEX_WIDTH])
- # no - we can request a cache line now
- with m.Else():
- m.next = "REQ_CACHELINE"
- # we have at least one free way
- with m.Else():
- # get victim cache-line by looking for the
- # first non-valid bit
- comb += evict_way_d.eq(get_victim_cl(~valid_way)
- m.next = "REQ_CACHELINE"
- # ~> we can just load the cache-line,
- # the way is store in evict_way_q
- with m.Case("REQ_CACHELINE"):
- comb += req_fsm_miss_valid .eq(1)
- sync += req_fsm_miss_addr .eq(mshr_q.addr)
- with m.If (gnt_miss_fsm):
- m.next = "SAVE_CACHELINE"
- comb += miss_gnt_o[mshr_q.id].eq(1)
- # ~> replace the cacheline
- with m.Case("SAVE_CACHELINE"):
- # calculate cacheline offset
- automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
- sync += cl_offset.eq(mshr_q.addr[3:DCACHE_BYTE_OFFSET] << 6)
- # we've got a valid response from refill unit
- with m.If (valid_miss_fsm):
- sync += addr_o .eq(mshr_q.addr[:DCACHE_INDEX_WIDTH])
- sync += req_o .eq(evict_way_q)
- comb += we_o .eq(1)
- comb += be_o .eq(1)
- sync += be_o.vldrty .eq(evict_way_q)
- sync += data_o.tag .eq(mshr_q.addr[DCACHE_INDEX_WIDTH:wid]
- comb += data_o.data .eq(data_miss_fsm)
- comb += data_o.valid.eq(1)
- comb += data_o.dirty.eq(0)
- # is this a write?
- with m.If (mshr_q.we):
- # Yes, so safe the updated data now
- for i in range(8):
- # check if we really want to write
- # the corresponding byte
- with m.If (mshr_q.be[i]):
- sync += data_o.data[(cl_offset + i*8) +: 8].eq(mshr_q.wdata[i];
- # it's immediately dirty if we write
- comb += data_o.dirty.eq(1)
- # reset MSHR
- comb += mshr_d.valid.eq(0)
- # go back to idle
- m.next = 'IDLE'
- # ------------------------------
- # Write Back Operation
- # ------------------------------
- # ~> evict a cache line from way saved in evict_way_q
- with m.Case("WB_CACHELINE_FLUSH"):
- with m.Case("WB_CACHELINE_MISS"):
- comb += req_fsm_miss_valid .eq(0b1)
- sync += req_fsm_miss_addr .eq({evict_cl_q.tag, cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET], {{DCACHE_BYTE_OFFSET}{0b0}}};
- comb += req_fsm_miss_be .eq(1)
- comb += req_fsm_miss_we .eq(0b1)
- sync += req_fsm_miss_wdata .eq(evict_cl_q.data;
- # we've got a grant --> this is timing critical, think about it
- if (gnt_miss_fsm) begin
- # write status array
- sync += addr_o .eq(cnt_q)
- comb += req_o .eq(0b1)
- comb += we_o .eq(0b1)
- comb += data_o.valid.eq(INVALIDATE_ON_FLUSH ? 0b0 : 0b1)
- # invalidate
- sync += be_o.vldrty.eq(evict_way_q)
- # go back to handling the miss or flushing,
- # depending on where we came from
- with m.If(state_q == WB_CACHELINE_MISS):
- m.next = "MISS"
- with m.Else():
- m.next = "FLUSH_REQ_STATUS"
- # ------------------------------
- # Flushing & Initialization
- # ------------------------------
- # ~> make another request to check the same
- # cache-line if there are still some valid entries
- with m.Case("FLUSH_REQ_STATUS"):
- comb += req_o .eq(1)
- sync += addr_o .eq(cnt_q)
- m.next = "FLUSHING"
- with m.Case("FLUSHING"):
- # this has priority
- # at least one of the cache lines is dirty
- with m.If(~evict_way):
- # evict cache line, look for the first
- # cache-line which is dirty
- comb += evict_way_d.eq(get_victim_cl(evict_way))
- comb += evict_cl_d .eq(data_i[one_hot_to_bin(evict_way)])
- # not dirty ~> increment and continue
- with m.Else():
- # increment and re-request
- sync += cnt_d.eq(cnt_q + (1 << DCACHE_BYTE_OFFSET))
- m.next = "FLUSH_REQ_STATUS"
- sync += addr_o .eq(cnt_q)
- comb += req_o .eq(1)
- comb += be_o.vldrty.eq(INVALIDATE_ON_FLUSH ? 1 : 0)
- comb += we_o .eq(1)
- # finished with flushing operation, go back to idle
- # only acknowledge if the flush wasn't
- # triggered by an atomic
- sync += flush_ack_o.eq(~serve_amo_q)
- m.next = "IDLE"
- # ~> only called after reset
- with m.Case("INIT"):
- # initialize status array
- sync += addr_o.eq(cnt_q)
- comb += req_o .eq(1)
- comb += we_o .eq(1)
- # only write the dirty array
- comb += be_o.vldrty.eq(1)
- sync += cnt_d .eq(cnt_q + (1 << DCACHE_BYTE_OFFSET))
- # finished initialization
- m.next = "IDLE"
- # ----------------------
- # AMOs
- # ----------------------
- # TODO(zarubaf) Move this closer to memory
- # ~> we are here because we need to do the AMO,
- # the cache is clean at this point
- # start by executing the load
- with m.Case("AMO_LOAD"):
- comb += req_fsm_miss_valid.eq(1)
- # address is in operand a
- comb += req_fsm_miss_addr.eq(amo_req_i.operand_a)
- comb += req_fsm_miss_req.eq(ariane_axi::SINGLE_REQ)
- comb += req_fsm_miss_size.eq(amo_req_i.size)
- # the request has been granted
- with m.If(gnt_miss_fsm):
- m.next = "AMO_SAVE_LOAD"
- # save the load value
- with m.Case("AMO_SAVE_LOAD"):
- with m.If (valid_miss_fsm):
- # we are only concerned about the lower 64-bit
- comb += mshr_d.wdata.eq(data_miss_fsm[0])
- m.next = "AMO_STORE"
- # and do the store
- with m.Case("AMO_STORE"):
- load_data = Signal(64)
- # re-align load data
- comb += load_data.eq(data_align(amo_req_i.operand_a[:3],
- mshr_q.wdata))
- # Sign-extend for word operation
- with m.If (amo_req_i.size == 0b10):
- comb += amo_operand_a.eq(sext32(load_data[:32]))
- comb += amo_operand_b.eq(sext32(amo_req_i.operand_b[:32]))
- with m.Else():
- comb += amo_operand_a.eq(load_data)
- comb += amo_operand_b.eq(amo_req_i.operand_b)
- # we do not need a store request for load reserved
- # or a failing store conditional
- # we can bail-out without making any further requests
- with m.If ((amo_req_i.amo_op == AMO_LR) | \
- ((amo_req_i.amo_op == AMO_SC) & \
- ((reservation_q.valid & \
- (reservation_q.address != \
- amo_req_i.operand_a[3:64])) | \
- ~reservation_q.valid))):
- comb += req_fsm_miss_valid.eq(0)
- m.next = "IDLE"
- comb += amo_resp_o.ack.eq(1)
- # write-back the result
- comb += amo_resp_o.result.eq(amo_operand_a)
- # we know that the SC failed
- with m.If (amo_req_i.amo_op == AMO_SC):
- comb += amo_resp_o.result.eq(1)
- # also clear the reservation
- comb += reservation_d.valid.eq(0)
- with m.Else():
- comb += req_fsm_miss_valid.eq(1)
- comb += req_fsm_miss_we .eq(1)
- comb += req_fsm_miss_req .eq(ariane_axi::SINGLE_REQ)
- comb += req_fsm_miss_size.eq(amo_req_i.size)
- comb += req_fsm_miss_addr.eq(amo_req_i.operand_a)
- comb += req_fsm_miss_wdata.eq(
- data_align(amo_req_i.operand_a[0:3], amo_result_o))
- comb += req_fsm_miss_be.eq(
- be_gen(amo_req_i.operand_a[0:3], amo_req_i.size))
- # place a reservation on the memory
- with m.If (amo_req_i.amo_op == AMO_LR):
- comb += reservation_d.address.eq(amo_req_i.operand_a[3:64])
- comb += reservation_d.valid.eq(1)
- # the request is valid or we didn't need to go for another store
- with m.If (valid_miss_fsm):
- m.next = "IDLE"
- comb += amo_resp_o.ack.eq(1)
- # write-back the result
- comb += amo_resp_o.result.eq(amo_operand_a;
- if (amo_req_i.amo_op == AMO_SC) begin
- comb += amo_resp_o.result.eq(0)
- # An SC must fail if there is another SC
- # (to any address) between the LR and the SC in
- # program order (even to the same address).
- # in any case destroy the reservation
- comb += reservation_d.valid.eq(0)
- # check MSHR for aliasing
- comb += mshr_addr_matches_o .eq(0)
- comb += mshr_index_matches_o.eq()
- for i in range(NR_PORTS):
- # check mshr for potential matching of other units,
- # exclude the unit currently being served
- with m.If (mshr_q.valid & \
- (mshr_addr_i[i][DCACHE_BYTE_OFFSET:56] == \
- mshr_q.addr[DCACHE_BYTE_OFFSET:56])):
- comb += mshr_addr_matches_o[i].eq(1)
- # same as previous, but checking only the index
- with m.If (mshr_q.valid & \
- mshr_index_matches_o[i].eq(1)
- # --------------------
- # Sequential Process
- # --------------------
- """
- #pragma translate_off
- `ifndef VERILATOR
- # assert that cache only hits on one way
- assert property (
- @(posedge clk_i) $onehot0(evict_way_q)) else $warning("Evict-way should be one-hot encoded");
- `endif
- #pragma translate_on
- """
- # ----------------------
- # Bypass Arbiter
- # ----------------------
- # Connection Arbiter <-> AXI
- req_fsm_bypass_valid = Signal()
- req_fsm_bypass_addr = Signal(64)
- req_fsm_bypass_wdata = Signal(64)
- req_fsm_bypass_we = Signal()
- req_fsm_bypass_be = Signal(8)
- req_fsm_bypass_size = Signal(2)
- gnt_bypass_fsm = Signal()
- valid_bypass_fsm = Signal()
- data_bypass_fsm = Signal(64)
- logic [$clog2(NR_PORTS)-1:0] id_fsm_bypass;
- logic [3:0] id_bypass_fsm;
- logic [3:0] gnt_id_bypass_fsm;
- i_bypass_arbiter = ib = AXIArbiter( NR_PORTS, 64)
- comb += [
- # Master Side
- ib.data_req_i .eq( miss_req_valid & miss_req_bypass ),
- ib.address_i .eq( miss_req_addr ),
- ib.data_wdata_i .eq( miss_req_wdata ),
- ib.data_we_i .eq( miss_req_we ),
- ib.data_be_i .eq( miss_req_be ),
- ib.data_size_i .eq( miss_req_size ),
- ib.data_gnt_o .eq( bypass_gnt_o ),
- ib.data_rvalid_o .eq( bypass_valid_o ),
- ib.data_rdata_o .eq( bypass_data_o ),
- # Slave Sid
- ib.id_i .eq( id_bypass_fsm[$clog2(NR_PORTS)-1:0] ),
- ib.id_o .eq( id_fsm_bypass ),
- ib.gnt_id_i .eq( gnt_id_bypass_fsm[$clog2(NR_PORTS)-1:0] ),
- ib.address_o .eq( req_fsm_bypass_addr ),
- ib.data_wdata_o .eq( req_fsm_bypass_wdata ),
- ib.data_req_o .eq( req_fsm_bypass_valid ),
- ib.data_we_o .eq( req_fsm_bypass_we ),
- ib.data_be_o .eq( req_fsm_bypass_be ),
- ib.data_size_o .eq( req_fsm_bypass_size ),
- ib.data_gnt_i .eq( gnt_bypass_fsm ),
- ib.data_rvalid_i .eq( valid_bypass_fsm ),
- ib.data_rdata_i .eq( data_bypass_fsm ),
- ]
- axi_adapter #(
- .DATA_WIDTH ( 64 ),
- .AXI_ID_WIDTH ( 4 ),
- ) i_bypass_axi_adapter (
- .clk_i,
- .rst_ni,
- .req_i ( req_fsm_bypass_valid ),
- .type_i ( ariane_axi::SINGLE_REQ ),
- .gnt_o ( gnt_bypass_fsm ),
- .addr_i ( req_fsm_bypass_addr ),
- .we_i ( req_fsm_bypass_we ),
- .wdata_i ( req_fsm_bypass_wdata ),
- .be_i ( req_fsm_bypass_be ),
- .size_i ( req_fsm_bypass_size ),
- .id_i ( Cat(id_fsm_bypass, 0, 0) ),
- .valid_o ( valid_bypass_fsm ),
- .rdata_o ( data_bypass_fsm ),
- .gnt_id_o ( gnt_id_bypass_fsm ),
- .id_o ( id_bypass_fsm ),
- .critical_word_o ( ), # not used for single requests
- .critical_word_valid_o ( ), # not used for single requests
- .axi_req_o ( axi_bypass_o ),
- .axi_resp_i ( axi_bypass_i )
- );
- # ----------------------
- # Cache Line AXI Refill
- # ----------------------
- axi_adapter #(
- .AXI_ID_WIDTH ( 4 ),
- ) i_miss_axi_adapter (
- .clk_i,
- .rst_ni,
- .req_i ( req_fsm_miss_valid ),
- .type_i ( req_fsm_miss_req ),
- .gnt_o ( gnt_miss_fsm ),
- .addr_i ( req_fsm_miss_addr ),
- .we_i ( req_fsm_miss_we ),
- .wdata_i ( req_fsm_miss_wdata ),
- .be_i ( req_fsm_miss_be ),
- .size_i ( req_fsm_miss_size ),
- .id_i ( Const(0b1100, 4) ),
- .gnt_id_o ( ), # open
- .valid_o ( valid_miss_fsm ),
- .rdata_o ( data_miss_fsm ),
- .id_o ( ),
- .critical_word_o,
- .critical_word_valid_o,
- .axi_req_o ( axi_data_o ),
- .axi_resp_i ( axi_data_i )
- );
- # -----------------
- # Replacement LFSR
- # -----------------
- lfsr_8bit #(.WIDTH (DCACHE_SET_ASSOC)) i_lfsr (
- .en_i ( lfsr_enable ),
- .refill_way_oh ( lfsr_oh ),
- .refill_way_bin ( lfsr_bin ),
- .*
- );
- # -----------------
- # -----------------
- amo_alu i_amo_alu (
- .amo_op_i ( amo_op ),
- .amo_operand_a_i ( amo_operand_a ),
- .amo_operand_b_i ( amo_operand_b ),
- .amo_result_o ( amo_result_o )
- );
- # -----------------
- # Struct Split
- # -----------------
- for i in range(NR_PORTS):
- miss_req = MissReq()
- comb += miss_req.eq(miss_req_i[i]);
- comb += miss_req_valid [i] .eq(miss_req.valid)
- comb += miss_req_bypass [i] .eq(miss_req.bypass)
- comb += miss_req_addr [i] .eq(miss_req.addr)
- comb += miss_req_wdata [i] .eq(miss_req.wdata)
- comb += miss_req_we [i] .eq(miss_req.we)
- comb += miss_req_be [i] .eq(miss_req.be)
- comb += miss_req_size [i] .eq(miss_req.size)
- # --------------
- # AXI Arbiter
- # --------------s
- #
- # Description: Arbitrates access to AXI refill/bypass
- #
-class AXIArbiter:
- def __init__(self, NR_PORTS = 3, DATA_WIDTH = 64):
- self.pwid = pwid = ceil(log(NR_PORTS) / log(2))
- rst_ni = ResetSignal() # Asynchronous reset active low
- # master ports
- self.data_req_i = Signal(NR_PORTS)
- self.address_i = Array(Signal(name="address_i", 64) \
- for i in range(NR_PORTS))
- self.data_wdata_i = Array(Signal(name="data_wdata_i", 64) \
- for i in range(NR_PORTS))
- self.data_we_i = Signal(NR_PORTS)
- self.data_be_i = Array(Signal(name="data_wdata_i", DATA_WIDTH/8) \
- for i in range(NR_PORTS))
- self.data_size_i = Array(Signal(name="data_size_i", 2) \
- for i in range(NR_PORTS))
- self.data_gnt_o = Signal(NR_PORTS)
- self.data_rvalid_o = Signal(NR_PORTS)
- self.data_rdata_o = Array(Signal(name="data_rdata_o", 64) \
- for i in range(NR_PORTS))
- # slave port
- self.id_i = Signal(pwid)
- self.id_o = Signal(pwid)
- self.gnt_id_i = Signal(pwid)
- self.data_req_o = Signal()
- self.address_o = Signal(64)
- self.data_wdata_o = Signal(DATA_WIDTH)
- self.data_we_o = Signal()
- self.data_be_o = Signal(DATA_WIDTH/8)
- self.data_size_o = Signal(2)
- self.data_gnt_i = Signal()
- self.data_rvalid_i = Signal()
- self.data_rdata_i = Signal(DATA_WIDTH)
- def elaborate(self, platform):
- #enum logic [1:0] { IDLE, REQ, SERVING } state_d, state_q;
- class Packet:
- def __init__(self, pwid, DATA_WIDTH):
- self.id = Signal(pwid)
- self.address = Signal(64)
- self.data = Signal(64)
- self.size = Signal(2)
- self.be = Signal(DATA_WIDTH/8)
- self.we = Signal()
- request_index = Signal(self.pwid)
- req_q = Packet(self.pwid, self.DATA_WIDTH)
- req_d = Packet(self.pwid, self.DATA_WIDTH)
- # request register
- sync += req_q.eq(req_d)
- # request port
- comb += self.address_o .eq(req_q.address)
- comb += self.data_wdata_o .eq(req_q.data)
- comb += self.data_be_o .eq(req_q.be)
- comb += self.data_size_o .eq(req_q.size)
- comb += self.data_we_o .eq(req_q.we)
- comb += self.id_o .eq(req_q.id)
- comb += self.data_gnt_o .eq(0)
- # read port
- comb += self.data_rvalid_o .eq(0)
- comb += self.data_rdata_o .eq(0)
- comb += self.data_rdata_o[req_q.id].eq(data_rdata_i)
- m.submodules.pp = pp = PriorityEncoder(self.NR_PORTS)
- comb += pp.i.eq(self.data_req_i) # select one request (priority-based)
- comb += request_index.eq(pp.o)
- with m.Switch("state") as s:
- with m.Case("IDLE"):
- # wait for incoming requests (priority encoder data_req_i)
- with m.If(~pp.n): # one output valid from encoder
- comb += self.data_req_o .eq(self.data_req_i[i])
- comb += self.data_gnt_o[i].eq(self.data_req_i[i])
- # save the request
- comb += req_d.address.eq(self.address_i[i])
- comb += req_d.id.eq(request_index)
- comb += req_d.data.eq(self.data_wdata_i[i])
- comb += req_d.size.eq(self.data_size_i[i])
- comb += req_d.be.eq(self.data_be_i[i])
- comb += req_d.we.eq(self.data_we_i[i])
- m.next = "SERVING"
- comb += self.address_o .eq(self.address_i[request_index])
- comb += self.data_wdata_o .eq(self.data_wdata_i[request_index])
- comb += self.data_be_o .eq(self.data_be_i[request_index])
- comb += self.data_size_o .eq(self.data_size_i[request_index])
- comb += self.data_we_o .eq(self.data_we_i[request_index])
- comb += self.id_o .eq(request_index)
- with m.Case("SERVING"):
- comb += self.data_req_o.eq(1)
- with m.If (self.data_rvalid_i):
- comb += self.data_rvalid_o[req_q.id].eq(1)
- m.next = "IDLE"
- # ------------
- # Assertions
- # ------------
- """
-#pragma translate_off
-`ifndef VERILATOR
-# make sure that we eventually get an rvalid after we received a grant
-assert property (@(posedge clk_i) data_gnt_i |-> ##[1:$] data_rvalid_i )
- else begin $error("There was a grant without a rvalid"); $stop(); end
-# assert that there is no grant without a request
-assert property (@(negedge clk_i) data_gnt_i |-> data_req_o)
- else begin $error("There was a grant without a request."); $stop(); end
-# assert that the address does not contain X when request is sent
-assert property ( @(posedge clk_i) (data_req_o) |-> (!$isunknown(address_o)) )
- else begin $error("address contains X when request is set"); $stop(); end
-#pragma translate_on
- """
+++ /dev/null
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-# Author: Florian Zaruba, ETH Zurich
-# Date: 19/04/2017
-# Description: Memory Management Unit for Ariane, contains TLB and
-# address translation unit. SV48 as defined in
-# Volume II: RISC-V Privileged Architectures V1.10 Page 63
-import ariane_pkg::*;
-from nmigen import Const, Signal, Cat, Module, Mux
-from nmigen.cli import verilog, rtlil
-from ptw import DCacheReqI, DCacheReqO, TLBUpdate, PTE, PTW
-from tlb import TLB
-from exceptcause import (INSTR_ACCESS_FAULT, INSTR_PAGE_FAULT,
-PRIV_LVL_M = Const(0b11, 2)
-PRIV_LVL_S = Const(0b01, 2)
-PRIV_LVL_U = Const(0b00, 2)
-class RVException:
- def __init__(self):
- self.cause = Signal(64) # cause of exception
- self.tval = Signal(64) # more info of causing exception
- # (e.g.: instruction causing it),
- # address of LD/ST fault
- self.valid = Signal()
- def eq(self, inp):
- res = []
- for (o, i) in zip(self.ports(), inp.ports()):
- res.append(o.eq(i))
- return res
- def __iter__(self):
- yield self.cause
- yield self.tval
- yield self.valid
- def ports(self):
- return list(self)
-class ICacheReqI:
- def __init__(self):
- self.fetch_valid = Signal() # address translation valid
- self.fetch_paddr = Signal(64) # physical address in
- self.fetch_exception = RVException() # exception occurred during fetch
- def __iter__(self):
- yield self.fetch_valid
- yield self.fetch_paddr
- yield from self.fetch_exception
- def ports(self):
- return list(self)
-class ICacheReqO:
- def __init__(self):
- self.fetch_req = Signal() # address translation request
- self.fetch_vaddr = Signal(64) # virtual address out
- def __iter__(self):
- yield self.fetch_req
- yield self.fetch_vaddr
- def ports(self):
- return list(self)
-class MMU:
- def __init__(self, instr_tlb_entries = 4,
- data_tlb_entries = 4,
- asid_width = 1):
- self.instr_tlb_entries = instr_tlb_entries
- self.data_tlb_entries = data_tlb_entries
- self.asid_width = asid_width
- self.flush_i = Signal()
- self.enable_translation_i = Signal()
- self.en_ld_st_translation_i = Signal() # enable VM translation for LD/ST
- # IF interface
- self.icache_areq_i = ICacheReqO()
- self.icache_areq_o = ICacheReqI()
- # LSU interface
- # this is a more minimalistic interface because the actual addressing
- # logic is handled in the LSU as we distinguish load and stores,
- # what we do here is simple address translation
- self.misaligned_ex_i = RVException()
- self.lsu_req_i = Signal() # request address translation
- self.lsu_vaddr_i = Signal(64) # virtual address in
- self.lsu_is_store_i = Signal() # the translation is requested by a store
- # if we need to walk the page table we can't grant in the same cycle
- # Cycle 0
- self.lsu_dtlb_hit_o = Signal() # sent in the same cycle as the request
- # if translation hits in the DTLB
- # Cycle 1
- self.lsu_valid_o = Signal() # translation is valid
- self.lsu_paddr_o = Signal(64) # translated address
- self.lsu_exception_o = RVException() # addr translate threw exception
- # General control signals
- self.priv_lvl_i = Signal(2)
- self.ld_st_priv_lvl_i = Signal(2)
- self.sum_i = Signal()
- self.mxr_i = Signal()
- # input logic flag_mprv_i,
- self.satp_ppn_i = Signal(44)
- self.asid_i = Signal(self.asid_width)
- self.flush_tlb_i = Signal()
- # Performance counters
- self.itlb_miss_o = Signal()
- self.dtlb_miss_o = Signal()
- # PTW memory interface
- self.req_port_i = DCacheReqO()
- self.req_port_o = DCacheReqI()
- def elaborate(self, platform):
- m = Module()
- iaccess_err = Signal() # insufficient priv to access instr page
- daccess_err = Signal() # insufficient priv to access data page
- ptw_active = Signal() # PTW is currently walking a page table
- walking_instr = Signal() # PTW is walking because of an ITLB miss
- ptw_error = Signal() # PTW threw an exception
- update_vaddr = Signal(48) # guessed
- uaddr64 = Cat(update_vaddr, Const(0, 25)) # extend to 64bit with zeros
- update_ptw_itlb = TLBUpdate(self.asid_width)
- update_ptw_dtlb = TLBUpdate(self.asid_width)
- itlb_lu_access = Signal()
- itlb_content = PTE()
- itlb_is_2M = Signal()
- itlb_is_1G = Signal()
- itlb_is_512G = Signal()
- itlb_lu_hit = Signal()
- dtlb_lu_access = Signal()
- dtlb_content = PTE()
- dtlb_is_2M = Signal()
- dtlb_is_1G = Signal()
- dtlb_is_512G = Signal()
- dtlb_lu_hit = Signal()
- # Assignments
- m.d.comb += [itlb_lu_access.eq(self.icache_areq_i.fetch_req),
- dtlb_lu_access.eq(self.lsu_req_i)
- ]
- # ITLB
- m.submodules.i_tlb = i_tlb = TLB(self.instr_tlb_entries,
- self.asid_width)
- m.d.comb += [i_tlb.flush_i.eq(self.flush_tlb_i),
- i_tlb.update_i.eq(update_ptw_itlb),
- i_tlb.lu_access_i.eq(itlb_lu_access),
- i_tlb.lu_asid_i.eq(self.asid_i),
- i_tlb.lu_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
- itlb_content.eq(i_tlb.lu_content_o),
- itlb_is_2M.eq(i_tlb.lu_is_2M_o),
- itlb_is_1G.eq(i_tlb.lu_is_1G_o),
- itlb_is_512G.eq(i_tlb.lu_is_512G_o),
- itlb_lu_hit.eq(i_tlb.lu_hit_o),
- ]
- # DTLB
- m.submodules.d_tlb = d_tlb = TLB(self.data_tlb_entries,
- self.asid_width)
- m.d.comb += [d_tlb.flush_i.eq(self.flush_tlb_i),
- d_tlb.update_i.eq(update_ptw_dtlb),
- d_tlb.lu_access_i.eq(dtlb_lu_access),
- d_tlb.lu_asid_i.eq(self.asid_i),
- d_tlb.lu_vaddr_i.eq(self.lsu_vaddr_i),
- dtlb_content.eq(d_tlb.lu_content_o),
- dtlb_is_2M.eq(d_tlb.lu_is_2M_o),
- dtlb_is_1G.eq(d_tlb.lu_is_1G_o),
- dtlb_is_512G.eq(d_tlb.lu_is_512G_o),
- dtlb_lu_hit.eq(d_tlb.lu_hit_o),
- ]
- # PTW
- m.submodules.ptw = ptw = PTW(self.asid_width)
- m.d.comb += [ptw_active.eq(ptw.ptw_active_o),
- walking_instr.eq(ptw.walking_instr_o),
- ptw_error.eq(ptw.ptw_error_o),
- ptw.enable_translation_i.eq(self.enable_translation_i),
- update_vaddr.eq(ptw.update_vaddr_o),
- update_ptw_itlb.eq(ptw.itlb_update_o),
- update_ptw_dtlb.eq(ptw.dtlb_update_o),
- ptw.itlb_access_i.eq(itlb_lu_access),
- ptw.itlb_hit_i.eq(itlb_lu_hit),
- ptw.itlb_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
- ptw.dtlb_access_i.eq(dtlb_lu_access),
- ptw.dtlb_hit_i.eq(dtlb_lu_hit),
- ptw.dtlb_vaddr_i.eq(self.lsu_vaddr_i),
- ptw.req_port_i.eq(self.req_port_i),
- self.req_port_o.eq(ptw.req_port_o),
- ]
- # ila_1 i_ila_1 (
- # .clk(clk_i), # input wire clk
- # .probe0({req_port_o.address_tag, req_port_o.address_index}),
- # .probe1(req_port_o.data_req), # input wire [63:0] probe1
- # .probe2(req_port_i.data_gnt), # input wire [0:0] probe2
- # .probe3(req_port_i.data_rdata), # input wire [0:0] probe3
- # .probe4(req_port_i.data_rvalid), # input wire [0:0] probe4
- # .probe5(ptw_error), # input wire [1:0] probe5
- # .probe6(update_vaddr), # input wire [0:0] probe6
- # .probe7(update_ptw_itlb.valid), # input wire [0:0] probe7
- # .probe8(update_ptw_dtlb.valid), # input wire [0:0] probe8
- # .probe9(dtlb_lu_access), # input wire [0:0] probe9
- # .probe10(lsu_vaddr_i), # input wire [0:0] probe10
- # .probe11(dtlb_lu_hit), # input wire [0:0] probe11
- # .probe12(itlb_lu_access), # input wire [0:0] probe12
- # .probe13(icache_areq_i.fetch_vaddr), # input wire [0:0] probe13
- # .probe14(itlb_lu_hit) # input wire [0:0] probe13
- # );
- #-----------------------
- # Instruction Interface
- #-----------------------
- # The instruction interface is a simple request response interface
- # MMU disabled: just pass through
- m.d.comb += [self.icache_areq_o.fetch_valid.eq(
- self.icache_areq_i.fetch_req),
- # play through in case we disabled address translation
- self.icache_areq_o.fetch_paddr.eq(
- self.icache_areq_i.fetch_vaddr)
- ]
- # two potential exception sources:
- # 1. HPTW threw an exception -> signal with a page fault exception
- # 2. We got an access error because of insufficient permissions ->
- # throw an access exception
- m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0)
- # Check whether we are allowed to access this memory region
- # from a fetch perspective
- # PLATEN TODO: use PermissionValidator instead [we like modules]
- m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \
- (((self.priv_lvl_i == PRIV_LVL_U) & \
- ~itlb_content.u) | \
- ((self.priv_lvl_i == PRIV_LVL_S) & \
- itlb_content.u)))
- # MMU enabled: address from TLB, request delayed until hit.
- # Error when TLB hit and no access right or TLB hit and
- # translated address not valid (e.g. AXI decode error),
- # or when PTW performs walk due to ITLB miss and raises
- # an error.
- with m.If (self.enable_translation_i):
- # we work with SV48, so if VM is enabled, check that
- # all bits [47:38] are equal
- with m.If (self.icache_areq_i.fetch_req & \
- ~(((~self.icache_areq_i.fetch_vaddr[47:64]) == 0) | \
- (self.icache_areq_i.fetch_vaddr[47:64]) == 0)):
- fe = self.icache_areq_o.fetch_exception
- m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
- fe.tval.eq(self.icache_areq_i.fetch_vaddr),
- fe.valid.eq(1)
- ]
- m.d.comb += self.icache_areq_o.fetch_valid.eq(0)
- # 4K page
- paddr = Signal.like(self.icache_areq_o.fetch_paddr)
- paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12],
- itlb_content.ppn)
- m.d.comb += paddr.eq(paddr4k)
- # Mega page
- with m.If(itlb_is_2M):
- m.d.comb += paddr[12:21].eq(
- self.icache_areq_i.fetch_vaddr[12:21])
- # Giga page
- with m.If(itlb_is_1G):
- m.d.comb += paddr[12:30].eq(
- self.icache_areq_i.fetch_vaddr[12:30])
- m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr)
- # Tera page
- with m.If(itlb_is_512G):
- m.d.comb += paddr[12:39].eq(
- self.icache_areq_i.fetch_vaddr[12:39])
- m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr)
- # ---------
- # ITLB Hit
- # --------
- # if we hit the ITLB output the request signal immediately
- with m.If(itlb_lu_hit):
- m.d.comb += self.icache_areq_o.fetch_valid.eq(
- self.icache_areq_i.fetch_req)
- # we got an access error
- with m.If (iaccess_err):
- # throw a page fault
- fe = self.icache_areq_o.fetch_exception
- m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
- fe.tval.eq(self.icache_areq_i.fetch_vaddr),
- fe.valid.eq(1)
- ]
- # ---------
- # ITLB Miss
- # ---------
- # watch out for exceptions happening during walking the page table
- with m.Elif(ptw_active & walking_instr):
- m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error)
- fe = self.icache_areq_o.fetch_exception
- m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT),
- fe.tval.eq(uaddr64),
- fe.valid.eq(1)
- ]
- #-----------------------
- # Data Interface
- #-----------------------
- lsu_vaddr = Signal(64)
- dtlb_pte = PTE()
- misaligned_ex = RVException()
- lsu_req = Signal()
- lsu_is_store = Signal()
- dtlb_hit = Signal()
- #dtlb_is_2M = Signal()
- #dtlb_is_1G = Signal()
- #dtlb_is_512 = Signal()
- # check if we need to do translation or if we are always
- # ready (e.g.: we are not translating anything)
- m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i,
- dtlb_lu_hit, 1))
- # The data interface is simpler and only consists of a
- # request/response interface
- m.d.comb += [
- # save request and DTLB response
- lsu_vaddr.eq(self.lsu_vaddr_i),
- lsu_req.eq(self.lsu_req_i),
- misaligned_ex.eq(self.misaligned_ex_i),
- dtlb_pte.eq(dtlb_content),
- dtlb_hit.eq(dtlb_lu_hit),
- lsu_is_store.eq(self.lsu_is_store_i),
- #dtlb_is_2M.eq(dtlb_is_2M),
- #dtlb_is_1G.eq(dtlb_is_1G),
- ##dtlb_is_512.eq(self.dtlb_is_512G) #????
- ]
- m.d.sync += [
- self.lsu_paddr_o.eq(lsu_vaddr),
- self.lsu_valid_o.eq(lsu_req),
- self.lsu_exception_o.eq(misaligned_ex),
- ]
- sverr = Signal()
- usrerr = Signal()
- m.d.comb += [
- # mute misaligned exceptions if there is no request
- # otherwise they will throw accidental exceptions
- misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i),
- # SUM is not set and we are trying to access a user
- # page in supervisor mode
- sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \
- dtlb_pte.u),
- # this is not a user page but we are in user mode and
- # trying to access it
- usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u),
- # Check if the User flag is set, then we may only
- # access it in supervisor mode if SUM is enabled
- daccess_err.eq(sverr | usrerr),
- ]
- # translation is enabled and no misaligned exception occurred
- with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid):
- m.d.comb += lsu_req.eq(0)
- # 4K page
- paddr = Signal.like(lsu_vaddr)
- paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn)
- m.d.comb += paddr.eq(paddr4k)
- # Mega page
- with m.If(dtlb_is_2M):
- m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21])
- # Giga page
- with m.If(dtlb_is_1G):
- m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30])
- m.d.sync += self.lsu_paddr_o.eq(paddr)
- # TODO platen tera_page
- # ---------
- # DTLB Hit
- # --------
- with m.If(dtlb_hit & lsu_req):
- m.d.comb += lsu_req.eq(1)
- # this is a store
- with m.If (lsu_is_store):
- # check if the page is write-able and
- # we are not violating privileges
- # also check if the dirty flag is set
- with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d):
- le = self.lsu_exception_o
- m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
- le.tval.eq(lsu_vaddr),
- le.valid.eq(1)
- ]
- # this is a load, check for sufficient access
- # privileges - throw a page fault if necessary
- with m.Elif(daccess_err):
- le = self.lsu_exception_o
- m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
- le.tval.eq(lsu_vaddr),
- le.valid.eq(1)
- ]
- # ---------
- # DTLB Miss
- # ---------
- # watch out for exceptions
- with m.Elif (ptw_active & ~walking_instr):
- # page table walker threw an exception
- with m.If (ptw_error):
- # an error makes the translation valid
- m.d.comb += lsu_req.eq(1)
- # the page table walker can only throw page faults
- with m.If (lsu_is_store):
- le = self.lsu_exception_o
- m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
- le.tval.eq(uaddr64),
- le.valid.eq(1)
- ]
- with m.Else():
- m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
- le.tval.eq(uaddr64),
- le.valid.eq(1)
- ]
- return m
- def ports(self):
- return [self.flush_i, self.enable_translation_i,
- self.en_ld_st_translation_i,
- self.lsu_req_i,
- self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o,
- self.lsu_valid_o, self.lsu_paddr_o,
- self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i,
- self.satp_ppn_i, self.asid_i, self.flush_tlb_i,
- self.itlb_miss_o, self.dtlb_miss_o] + \
- self.icache_areq_i.ports() + self.icache_areq_o.ports() + \
- self.req_port_i.ports() + self.req_port_o.ports() + \
- self.misaligned_ex_i.ports() + self.lsu_exception_o.ports()
-if __name__ == '__main__':
- mmu = MMU()
- vl = rtlil.convert(mmu, ports=mmu.ports())
- with open("test_mmu.il", "w") as f:
- f.write(vl)
+++ /dev/null
-two-way set associative - one bit
- indicates which line of the two has been reference more recently
-four-way set associative - three bits
- each bit represents one branch point in a binary decision tree; let 1
- represent that the left side has been referenced more recently than the
- right side, and 0 vice-versa
- are all 4 lines valid?
- / \
- yes no, use an invalid line
- |
- |
- |
- bit_0 == 0? state | replace ref to | next state
- / \ ------+-------- -------+-----------
- y n 00x | line_0 line_0 | 11_
- / \ 01x | line_1 line_1 | 10_
- bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
- / \ / \ 1x1 | line_3 line_3 | 0_0
- y n y n
- / \ / \ ('x' means ('_' means unchanged)
- line_0 line_1 line_2 line_3 don't care)
- (see Figure 3-7, p. 3-18, in Intel Embedded Pentium Processor Family Dev.
- Manual, 1998, http://www.intel.com/design/intarch/manuals/273204.htm)
-note that there is a 6-bit encoding for true LRU for four-way set associative
- bit 0: bank[1] more recently used than bank[0]
- bit 1: bank[2] more recently used than bank[0]
- bit 2: bank[2] more recently used than bank[1]
- bit 3: bank[3] more recently used than bank[0]
- bit 4: bank[3] more recently used than bank[1]
- bit 5: bank[3] more recently used than bank[2]
- this results in 24 valid bit patterns within the 64 possible bit patterns
- (4! possible valid traces for bank references)
- e.g., a trace of 0 1 2 3, where 0 is LRU and 3 is MRU, is encoded as 111111
- you can implement a state machine with a 256x6 ROM (6-bit state encoding
- appended with a 2-bit bank reference input will yield a new 6-bit state),
- and you can implement an LRU bank indicator with a 64x2 ROM
+++ /dev/null
-from nmigen import Signal, Module, Cat, Const
-from nmigen.hdl.ir import Elaboratable
-from math import log2
-class PLRU(Elaboratable):
- """ PLRU - Pseudo Least Recently Used Replacement
- PLRU-tree indexing:
- lvl0 0
- / \
- / \
- lvl1 1 2
- / \ / \
- lvl2 3 4 5 6
- / \ /\/\ /\
- ... ... ... ...
- """
- def __init__(self, entries):
- self.entries = entries
- self.lu_hit = Signal(entries)
- self.replace_en_o = Signal(entries)
- self.lu_access_i = Signal()
- # Tree (bit per entry)
- self.TLBSZ = 2*(self.entries-1)
- self.plru_tree = Signal(self.TLBSZ)
- self.plru_tree_o = Signal(self.TLBSZ)
- def elaborate(self, platform=None):
- m = Module()
- # Just predefine which nodes will be set/cleared
- # E.g. for a TLB with 8 entries, the for-loop is semantically
- # equivalent to the following pseudo-code:
- # unique case (1'b1)
- # lu_hit[7]: plru_tree[0, 2, 6] = {1, 1, 1};
- # lu_hit[6]: plru_tree[0, 2, 6] = {1, 1, 0};
- # lu_hit[5]: plru_tree[0, 2, 5] = {1, 0, 1};
- # lu_hit[4]: plru_tree[0, 2, 5] = {1, 0, 0};
- # lu_hit[3]: plru_tree[0, 1, 4] = {0, 1, 1};
- # lu_hit[2]: plru_tree[0, 1, 4] = {0, 1, 0};
- # lu_hit[1]: plru_tree[0, 1, 3] = {0, 0, 1};
- # lu_hit[0]: plru_tree[0, 1, 3] = {0, 0, 0};
- # default: begin /* No hit */ end
- # endcase
- LOG_TLB = int(log2(self.entries))
- print(LOG_TLB)
- for i in range(self.entries):
- # we got a hit so update the pointer as it was least recently used
- hit = Signal(reset_less=True)
- m.d.comb += hit.eq(self.lu_hit[i] & self.lu_access_i)
- with m.If(hit):
- # Set the nodes to the values we would expect
- for lvl in range(LOG_TLB):
- idx_base = (1<<lvl)-1
- # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
- shift = LOG_TLB - lvl;
- new_idx = Const(~((i >> (shift-1)) & 1), (1, False))
- plru_idx = idx_base + (i >> shift)
- print ("plru", i, lvl, hex(idx_base),
- plru_idx, shift, new_idx)
- m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx)
- # Decode tree to write enable signals
- # Next for-loop basically creates the following logic for e.g.
- # an 8 entry TLB (note: pseudo-code obviously):
- # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1}
- # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0}
- # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1}
- # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0}
- # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1}
- # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0}
- # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1}
- # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0}
- # For each entry traverse the tree. If every tree-node matches
- # the corresponding bit of the entry's index, this is
- # the next entry to replace.
- replace = []
- for i in range(self.entries):
- en = []
- for lvl in range(LOG_TLB):
- idx_base = (1<<lvl)-1
- # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
- shift = LOG_TLB - lvl;
- new_idx = (i >> (shift-1)) & 1;
- plru_idx = idx_base + (i>>shift)
- plru = Signal(reset_less=True,
- name="plru-%d-%d-%d" % (i, lvl, plru_idx))
- m.d.comb += plru.eq(self.plru_tree[plru_idx])
- # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx;
- if new_idx:
- en.append(~plru) # yes inverted (using bool())
- else:
- en.append(plru) # yes inverted (using bool())
- print ("plru", i, en)
- # boolean logic manipulation:
- # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2)
- replace.append(~Cat(*en).bool())
- m.d.comb += self.replace_en_o.eq(Cat(*replace))
- return m
- def ports(self):
- return [self.entries, self.lu_hit, self.replace_en_o,
- self.lu_access_i, self.plru_tree, self.plru_tree_o]
+++ /dev/null
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-# Author: David Schaffenrath, TU Graz
-# Author: Florian Zaruba, ETH Zurich
-# Date: 24.4.2017
-# Description: Hardware-PTW
-/* verilator lint_off WIDTH */
-import ariane_pkg::*;
-see linux kernel source:
-* "arch/riscv/include/asm/page.h"
-* "arch/riscv/include/asm/mmu_context.h"
-* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET)
-from nmigen import Const, Signal, Cat, Module, Elaboratable
-from nmigen.hdl.ast import ArrayProxy
-from nmigen.cli import verilog, rtlil
-from math import log2
-CONFIG_L1D_SIZE = 32*1024
-class DCacheReqI:
- def __init__(self):
- self.address_index = Signal(DCACHE_INDEX_WIDTH)
- self.address_tag = Signal(DCACHE_TAG_WIDTH)
- self.data_wdata = Signal(64)
- self.data_req = Signal()
- self.data_we = Signal()
- self.data_be = Signal(8)
- self.data_size = Signal(2)
- self.kill_req = Signal()
- self.tag_valid = Signal()
- def eq(self, inp):
- res = []
- for (o, i) in zip(self.ports(), inp.ports()):
- res.append(o.eq(i))
- return res
- def ports(self):
- return [self.address_index, self.address_tag,
- self.data_wdata, self.data_req,
- self.data_we, self.data_be, self.data_size,
- self.kill_req, self.tag_valid,
- ]
-class DCacheReqO:
- def __init__(self):
- self.data_gnt = Signal()
- self.data_rvalid = Signal()
- self.data_rdata = Signal(64) # actually in PTE object format
- def eq(self, inp):
- res = []
- for (o, i) in zip(self.ports(), inp.ports()):
- res.append(o.eq(i))
- return res
- def ports(self):
- return [self.data_gnt, self.data_rvalid, self.data_rdata]
-class PTE: #(RecordObject):
- def __init__(self):
- self.v = Signal()
- self.r = Signal()
- self.w = Signal()
- self.x = Signal()
- self.u = Signal()
- self.g = Signal()
- self.a = Signal()
- self.d = Signal()
- self.rsw = Signal(2)
- self.ppn = Signal(44)
- self.reserved = Signal(10)
- def flatten(self):
- return Cat(*self.ports())
- def eq(self, x):
- if isinstance(x, ArrayProxy):
- res = []
- for o in self.ports():
- i = getattr(x, o.name)
- res.append(i)
- x = Cat(*res)
- else:
- x = x.flatten()
- return self.flatten().eq(x)
- def __iter__(self):
- """ order is critical so that flatten creates LSB to MSB
- """
- yield self.v
- yield self.r
- yield self.w
- yield self.x
- yield self.u
- yield self.g
- yield self.a
- yield self.d
- yield self.rsw
- yield self.ppn
- yield self.reserved
- def ports(self):
- return list(self)
-class TLBUpdate:
- def __init__(self, asid_width):
- self.valid = Signal() # valid flag
- self.is_2M = Signal()
- self.is_1G = Signal()
- self.is_512G = Signal()
- self.vpn = Signal(36)
- self.asid = Signal(asid_width)
- self.content = PTE()
- def flatten(self):
- return Cat(*self.ports())
- def eq(self, x):
- return self.flatten().eq(x.flatten())
- def ports(self):
- return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \
- self.content.ports()
-# SV48 defines four levels of page tables
-LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1
-LVL2 = Const(1, 2)
-LVL3 = Const(2, 2)
-LVL4 = Const(3, 2)
-class PTW(Elaboratable):
- def __init__(self, asid_width=8):
- self.asid_width = asid_width
- self.flush_i = Signal() # flush everything, we need to do this because
- # actually everything we do is speculative at this stage
- # e.g.: there could be a CSR instruction that changes everything
- self.ptw_active_o = Signal(reset=1) # active if not IDLE
- self.walking_instr_o = Signal() # set when walking for TLB
- self.ptw_error_o = Signal() # set when an error occurred
- self.enable_translation_i = Signal() # CSRs indicate to enable SV48
- self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st
- self.lsu_is_store_i = Signal() # translation triggered by store
- # PTW memory interface
- self.req_port_i = DCacheReqO()
- self.req_port_o = DCacheReqI()
- # to TLBs, update logic
- self.itlb_update_o = TLBUpdate(asid_width)
- self.dtlb_update_o = TLBUpdate(asid_width)
- self.update_vaddr_o = Signal(48)
- self.asid_i = Signal(self.asid_width)
- # from TLBs
- # did we miss?
- self.itlb_access_i = Signal()
- self.itlb_hit_i = Signal()
- self.itlb_vaddr_i = Signal(64)
- self.dtlb_access_i = Signal()
- self.dtlb_hit_i = Signal()
- self.dtlb_vaddr_i = Signal(64)
- # from CSR file
- self.satp_ppn_i = Signal(44) # ppn from satp
- self.mxr_i = Signal()
- # Performance counters
- self.itlb_miss_o = Signal()
- self.dtlb_miss_o = Signal()
- def ports(self):
- return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o,
- ]
- return [
- self.enable_translation_i, self.en_ld_st_translation_i,
- self.lsu_is_store_i, self.req_port_i, self.req_port_o,
- self.update_vaddr_o,
- self.asid_i,
- self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i,
- self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i,
- self.satp_ppn_i, self.mxr_i,
- self.itlb_miss_o, self.dtlb_miss_o
- ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports()
- def elaborate(self, platform):
- m = Module()
- # input registers
- data_rvalid = Signal()
- data_rdata = Signal(64)
- # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata
- # is spec'd in 64-bit binary-format: better to spec as Record?
- pte = PTE()
- m.d.comb += pte.flatten().eq(data_rdata)
- # SV48 defines four levels of page tables
- ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above)
- ptw_lvl1 = Signal()
- ptw_lvl2 = Signal()
- ptw_lvl3 = Signal()
- ptw_lvl4 = Signal()
- m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1),
- ptw_lvl2.eq(ptw_lvl == LVL2),
- ptw_lvl3.eq(ptw_lvl == LVL3),
- ptw_lvl4.eq(ptw_lvl == LVL4)
- ]
- # is this an instruction page table walk?
- is_instr_ptw = Signal()
- global_mapping = Signal()
- # latched tag signal
- tag_valid = Signal()
- # register the ASID
- tlb_update_asid = Signal(self.asid_width)
- # register VPN we need to walk, SV48 defines a 48 bit virtual addr
- vaddr = Signal(64)
- # 4 byte aligned physical pointer
- ptw_pptr = Signal(56)
- m.d.sync += [
- # Assignments
- self.update_vaddr_o.eq(vaddr),
- self.walking_instr_o.eq(is_instr_ptw),
- # directly output the correct physical address
- self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]),
- self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]),
- # we are never going to kill this request
- self.req_port_o.kill_req.eq(0), # XXX assign comb?
- # we are never going to write with the HPTW
- self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb?
- # -----------
- # TLB Update
- # -----------
- self.itlb_update_o.vpn.eq(vaddr[12:48]),
- self.dtlb_update_o.vpn.eq(vaddr[12:48]),
- # update the correct page table level
- self.itlb_update_o.is_2M.eq(ptw_lvl3),
- self.itlb_update_o.is_1G.eq(ptw_lvl2),
- self.itlb_update_o.is_512G.eq(ptw_lvl1),
- self.dtlb_update_o.is_2M.eq(ptw_lvl3),
- self.dtlb_update_o.is_1G.eq(ptw_lvl2),
- self.dtlb_update_o.is_512G.eq(ptw_lvl1),
- # output the correct ASID
- self.itlb_update_o.asid.eq(tlb_update_asid),
- self.dtlb_update_o.asid.eq(tlb_update_asid),
- # set the global mapping bit
- self.itlb_update_o.content.eq(pte),
- self.itlb_update_o.content.g.eq(global_mapping),
- self.dtlb_update_o.content.eq(pte),
- self.dtlb_update_o.content.g.eq(global_mapping),
- self.req_port_o.tag_valid.eq(tag_valid),
- ]
- #-------------------
- # Page table walker #needs update
- #-------------------
- # A virtual address va is translated into a physical address pa as
- # follows:
- # 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv48,
- # PAGESIZE=2^12 and LEVELS=4.)
- # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE.
- # (For Sv32, PTESIZE=4.)
- # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an
- # access exception.
- # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to
- # step 5. Otherwise, this PTE is a pointer to the next level of
- # the page table.
- # Let i=i-1. If i < 0, stop and raise an access exception.
- # Otherwise, let a = pte.ppn × PAGESIZE and go to step 2.
- # 5. A leaf PTE has been found. Determine if the requested memory
- # access is allowed by the pte.r, pte.w, and pte.x bits. If not,
- # stop and raise an access exception. Otherwise, the translation is
- # successful. Set pte.a to 1, and, if the memory access is a
- # store, set pte.d to 1.
- # The translated physical address is given as follows:
- # - pa.pgoff = va.pgoff.
- # - If i > 0, then this is a superpage translation and
- # pa.ppn[i-1:0] = va.vpn[i-1:0].
- # - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
- # 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned
- # superpage stop and raise a page-fault exception.
- m.d.sync += tag_valid.eq(0)
- # default assignments
- m.d.comb += [
- # PTW memory interface
- self.req_port_o.data_req.eq(0),
- self.req_port_o.data_be.eq(Const(0xFF, 8)),
- self.req_port_o.data_size.eq(Const(0b11, 2)),
- self.req_port_o.data_we.eq(0),
- self.ptw_error_o.eq(0),
- self.itlb_update_o.valid.eq(0),
- self.dtlb_update_o.valid.eq(0),
- self.itlb_miss_o.eq(0),
- self.dtlb_miss_o.eq(0),
- ]
- # ------------
- # State Machine
- # ------------
- with m.FSM() as fsm:
- with m.State("IDLE"):
- self.idle(m, is_instr_ptw, ptw_lvl, global_mapping,
- ptw_pptr, vaddr, tlb_update_asid)
- with m.State("WAIT_GRANT"):
- self.grant(m, tag_valid, data_rvalid)
- with m.State("PTE_LOOKUP"):
- # we wait for the valid signal
- with m.If(data_rvalid):
- self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, ptw_lvl4,
- data_rvalid, global_mapping,
- is_instr_ptw, ptw_pptr)
- # Propagate error to MMU/LSU
- with m.State("PROPAGATE_ERROR"):
- m.next = "IDLE"
- m.d.comb += self.ptw_error_o.eq(1)
- # wait for the rvalid before going back to IDLE
- with m.State("WAIT_RVALID"):
- with m.If(data_rvalid):
- m.next = "IDLE"
- m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata),
- data_rvalid.eq(self.req_port_i.data_rvalid)
- ]
- return m
- def set_grant_state(self, m):
- # should we have flushed before we got an rvalid,
- # wait for it until going back to IDLE
- with m.If(self.flush_i):
- with m.If (self.req_port_i.data_gnt):
- m.next = "WAIT_RVALID"
- with m.Else():
- m.next = "IDLE"
- with m.Else():
- m.next = "WAIT_GRANT"
- def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping,
- ptw_pptr, vaddr, tlb_update_asid):
- # by default we start with the top-most page table
- m.d.sync += [is_instr_ptw.eq(0),
- ptw_lvl.eq(LVL1),
- global_mapping.eq(0),
- self.ptw_active_o.eq(0), # deactive (IDLE)
- ]
- # work out itlb/dtlb miss
- m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \
- self.itlb_access_i & \
- ~self.itlb_hit_i & \
- ~self.dtlb_access_i)
- m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \
- self.dtlb_access_i & \
- ~self.dtlb_hit_i)
- # we got an ITLB miss?
- with m.If(self.itlb_miss_o):
- pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:48],
- self.satp_ppn_i)
- m.d.sync += [ptw_pptr.eq(pptr),
- is_instr_ptw.eq(1),
- vaddr.eq(self.itlb_vaddr_i),
- tlb_update_asid.eq(self.asid_i),
- ]
- self.set_grant_state(m)
- # we got a DTLB miss?
- with m.Elif(self.dtlb_miss_o):
- pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:48],
- self.satp_ppn_i)
- m.d.sync += [ptw_pptr.eq(pptr),
- vaddr.eq(self.dtlb_vaddr_i),
- tlb_update_asid.eq(self.asid_i),
- ]
- self.set_grant_state(m)
- def grant(self, m, tag_valid, data_rvalid):
- # we've got a data WAIT_GRANT so tell the
- # cache that the tag is valid
- # send a request out
- m.d.comb += self.req_port_o.data_req.eq(1)
- # wait for the WAIT_GRANT
- with m.If(self.req_port_i.data_gnt):
- # send the tag valid signal one cycle later
- m.d.sync += tag_valid.eq(1)
- # should we have flushed before we got an rvalid,
- # wait for it until going back to IDLE
- with m.If(self.flush_i):
- with m.If (~data_rvalid):
- m.next = "WAIT_RVALID"
- with m.Else():
- m.next = "IDLE"
- with m.Else():
- m.next = "PTE_LOOKUP"
- def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, ptw_lvl4,
- data_rvalid, global_mapping,
- is_instr_ptw, ptw_pptr):
- # temporaries
- pte_rx = Signal(reset_less=True)
- pte_exe = Signal(reset_less=True)
- pte_inv = Signal(reset_less=True)
- pte_a = Signal(reset_less=True)
- st_wd = Signal(reset_less=True)
- m.d.comb += [pte_rx.eq(pte.r | pte.x),
- pte_exe.eq(~pte.x | ~pte.a),
- pte_inv.eq(~pte.v | (~pte.r & pte.w)),
- pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))),
- st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))]
- l1err = Signal(reset_less=True)
- l2err = Signal(reset_less=True)
- l3err = Signal(reset_less=True)
- m.d.comb += [l3err.eq((ptw_lvl3) & pte.ppn[0:9] != Const(0,0)),
- l2err.eq((ptw_lvl2) & pte.ppn[0:18] != Const(0, 18)),
- l1err.eq((ptw_lvl1) & pte.ppn[0:27] != Const(0, 27))]
- # check if the global mapping bit is set
- with m.If (pte.g):
- m.d.sync += global_mapping.eq(1)
- m.next = "IDLE"
- # -------------
- # Invalid PTE
- # -------------
- # If pte.v = 0, or if pte.r = 0 and pte.w = 1,
- # stop and raise a page-fault exception.
- with m.If (pte_inv):
- m.next = "PROPAGATE_ERROR"
- # -----------
- # Valid PTE
- # -----------
- # it is a valid PTE
- # if pte.r = 1 or pte.x = 1 it is a valid PTE
- with m.Elif (pte_rx):
- # Valid translation found (either 1G, 2M or 4K)
- with m.If(is_instr_ptw):
- # ------------
- # Update ITLB
- # ------------
- # If page not executable, we can directly raise error.
- # This doesn't put a useless entry into the TLB.
- # The same idea applies to the access flag since we let
- # the access flag be managed by SW.
- with m.If (pte_exe):
- m.next = "IDLE"
- with m.Else():
- m.d.comb += self.itlb_update_o.valid.eq(1)
- with m.Else():
- # ------------
- # Update DTLB
- # ------------
- # Check if the access flag has been set, otherwise
- # throw page-fault and let software handle those bits.
- # If page not readable (there are no write-only pages)
- # directly raise an error. This doesn't put a useless
- # entry into the TLB.
- with m.If(pte_a):
- m.d.comb += self.dtlb_update_o.valid.eq(1)
- with m.Else():
- m.next = "PROPAGATE_ERROR"
- # Request is a store: perform additional checks
- # If the request was a store and the page not
- # write-able, raise an error
- # the same applies if the dirty flag is not set
- with m.If (st_wd):
- m.d.comb += self.dtlb_update_o.valid.eq(0)
- m.next = "PROPAGATE_ERROR"
- # check if the ppn is correctly aligned: Case (6)
- with m.If(l1err | l2err | l3err):
- m.next = "PROPAGATE_ERROR"
- m.d.comb += [self.dtlb_update_o.valid.eq(0),
- self.itlb_update_o.valid.eq(0)]
- # this is a pointer to the next TLB level
- with m.Else():
- # pointer to next level of page table
- with m.If (ptw_lvl1):
- # we are in the second level now
- pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39], pte.ppn)
- m.d.sync += [ptw_pptr.eq(pptr),
- ptw_lvl.eq(LVL2)
- ]
- with m.If(ptw_lvl2):
- # here we received a pointer to the third level
- pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn)
- m.d.sync += [ptw_pptr.eq(pptr),
- ptw_lvl.eq(LVL3)
- ]
- with m.If(ptw_lvl3): #guess: shift page levels by one
- # here we received a pointer to the fourth level
- # the last one is near the page offset
- pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn)
- m.d.sync += [ptw_pptr.eq(pptr),
- ptw_lvl.eq(LVL4)
- ]
- self.set_grant_state(m)
- with m.If (ptw_lvl4):
- # Should already be the last level
- # page table => Error
- m.d.sync += ptw_lvl.eq(LVL4)
- m.next = "PROPAGATE_ERROR"
-if __name__ == '__main__':
- ptw = PTW()
- vl = rtlil.convert(ptw, ports=ptw.ports())
- with open("test_ptw.il", "w") as f:
- f.write(vl)
+++ /dev/null
-import sys
-from TLB.ariane.plru import PLRU
-from nmigen.compat.sim import run_simulation
-def tbench(dut):
- yield
-if __name__ == "__main__":
- dut = PLRU(4)
- run_simulation(dut, tbench(dut), vcd_name="test_plru.vcd")
- print("PLRU Unit Test Success")
+++ /dev/null
-import sys
-from nmigen.compat.sim import run_simulation
-from TLB.ariane.ptw import PTW, PTE
-# unit was changed, test needs to be changed
-def tbench(dut):
- addr = 0x8000000
- #pte = PTE()
- #yield pte.v.eq(1)
- #yield pte.r.eq(1)
- yield dut.req_port_i.data_gnt.eq(1)
- yield dut.req_port_i.data_rvalid.eq(1)
- yield dut.req_port_i.data_rdata.eq(0x43)#pte.flatten())
- # data lookup
- yield dut.en_ld_st_translation_i.eq(1)
- yield dut.asid_i.eq(1)
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x400000000)
- yield
- yield
- yield
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x200000)
- yield
- yield
- yield
- yield dut.req_port_i.data_gnt.eq(0)
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x400000011)
- yield
- yield dut.req_port_i.data_gnt.eq(1)
- yield
- yield
- # data lookup, PTW levels 1-2-3
- addr = 0x4000000
- yield dut.dtlb_vaddr_i.eq(addr)
- yield dut.mxr_i.eq(0x1)
- yield dut.req_port_i.data_gnt.eq(1)
- yield dut.req_port_i.data_rvalid.eq(1)
- yield dut.req_port_i.data_rdata.eq(0x41 | (addr>>12)<<10)#pte.flatten())
- yield dut.en_ld_st_translation_i.eq(1)
- yield dut.asid_i.eq(1)
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(addr)
- yield
- yield
- yield
- yield
- yield
- yield
- yield
- yield
- yield dut.req_port_i.data_gnt.eq(0)
- yield dut.dtlb_access_i.eq(1)
- yield dut.dtlb_hit_i.eq(0)
- yield dut.dtlb_vaddr_i.eq(0x400000011)
- yield
- yield dut.req_port_i.data_gnt.eq(1)
- yield
- yield
- yield
- yield
- # instruction lookup
- yield dut.en_ld_st_translation_i.eq(0)
- yield dut.enable_translation_i.eq(1)
- yield dut.asid_i.eq(1)
- yield dut.itlb_access_i.eq(1)
- yield dut.itlb_hit_i.eq(0)
- yield dut.itlb_vaddr_i.eq(0x800000)
- yield
- yield
- yield
- yield dut.itlb_access_i.eq(1)
- yield dut.itlb_hit_i.eq(0)
- yield dut.itlb_vaddr_i.eq(0x200000)
- yield
- yield
- yield
- yield dut.req_port_i.data_gnt.eq(0)
- yield dut.itlb_access_i.eq(1)
- yield dut.itlb_hit_i.eq(0)
- yield dut.itlb_vaddr_i.eq(0x800011)
- yield
- yield dut.req_port_i.data_gnt.eq(1)
- yield
- yield
- yield
-def test_ptw():
- dut = PTW()
- run_simulation(dut, tbench(dut), vcd_name="test_ptw.vcd")
- print("PTW Unit Test Success")
-if __name__ == "__main__":
- test_ptw()
+++ /dev/null
-import sys
-from nmigen.compat.sim import run_simulation
-from TLB.ariane.tlb import TLB
-def set_vaddr(addr):
- yield dut.lu_vaddr_i.eq(addr)
- yield dut.update_i.vpn.eq(addr>>12)
-def tbench(dut):
- yield dut.lu_access_i.eq(1)
- yield dut.lu_asid_i.eq(1)
- yield dut.update_i.valid.eq(1)
- yield dut.update_i.is_1G.eq(0)
- yield dut.update_i.is_2M.eq(0)
- yield dut.update_i.asid.eq(1)
- yield dut.update_i.content.ppn.eq(0)
- yield dut.update_i.content.rsw.eq(0)
- yield dut.update_i.content.r.eq(1)
- yield
- addr = 0x80000
- yield from set_vaddr(addr)
- yield
- addr = 0x90001
- yield from set_vaddr(addr)
- yield
- addr = 0x28000000
- yield from set_vaddr(addr)
- yield
- addr = 0x28000001
- yield from set_vaddr(addr)
- addr = 0x28000001
- yield from set_vaddr(addr)
- yield
- addr = 0x1000040000
- yield from set_vaddr(addr)
- yield
- addr = 0x1000040001
- yield from set_vaddr(addr)
- yield
- yield dut.update_i.is_1G.eq(1)
- addr = 0x2040000
- yield from set_vaddr(addr)
- yield
- yield dut.update_i.is_1G.eq(1)
- addr = 0x2040001
- yield from set_vaddr(addr)
- yield
- yield
-if __name__ == "__main__":
- dut = TLB()
- run_simulation(dut, tbench(dut), vcd_name="test_tlb.vcd")
- print("TLB Unit Test Success")
+++ /dev/null
-import sys
-from nmigen.compat.sim import run_simulation
-from TLB.ariane.tlb_content import TLBContent
-from TestUtil.test_helper import assert_op, assert_eq
-def update(dut,a,t,g,m):
- yield dut.replace_en_i.eq(1)
- yield dut.update_i.valid.eq(1)
- yield dut.update_i.is_512G.eq(t)
- yield dut.update_i.is_1G.eq(g)
- yield dut.update_i.is_2M.eq(m)
- yield dut.update_i.vpn.eq(a)
- yield
- yield
-def check_hit(dut,hit,pagesize):
- hit_d = yield dut.lu_hit_o
- assert_eq("hit", hit_d, hit)
- if(hit):
- if(pagesize=="t"):
- hitp = yield dut.lu_is_512G_o
- assert_eq("lu_is_512G_o", hitp, 1)
- elif(pagesize=="g"):
- hitp = yield dut.lu_is_1G_o
- assert_eq("lu_is_1G_o", hitp, 1)
- elif(pagesize=="m"):
- hitp = yield dut.lu_is_2M_o
- assert_eq("lu_is_2M_o", hitp, 1)
-def addr(a,b,c,d):
- return a | b << 9 | c << 18 | d << 27
-def tbench(dut):
- yield dut.vpn0.eq(0x0A)
- yield dut.vpn1.eq(0x0B)
- yield dut.vpn2.eq(0x0C)
- yield dut.vpn3.eq(0x0D)
- yield from update(dut,addr(0xFF,0xFF,0xFF,0x0D),1,0,0)
- yield from check_hit(dut,1,"t")
- yield from update(dut,addr(0xFF,0xFF,0x0C,0x0D),0,1,0)
- yield from check_hit(dut,1,"g")
- yield from update(dut,addr(0xFF,0x0B,0x0C,0x0D),0,0,1)
- yield from check_hit(dut,1,"m")
- yield from update(dut,addr(0x0A,0x0B,0x0C,0x0D),0,0,0)
- yield from check_hit(dut,1,"")
- yield from update(dut,addr(0xAA,0xBB,0xCC,0xDD),0,0,0)
- yield from check_hit(dut,0,"miss")
-if __name__ == "__main__":
- dut = TLBContent(4,4)
- #
- run_simulation(dut, tbench(dut), vcd_name="test_tlb_content.vcd")
- print("TLBContent Unit Test Success")
+++ /dev/null
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-# Author: David Schaffenrath, TU Graz
-# Author: Florian Zaruba, ETH Zurich
-# Date: 21.4.2017
-# Description: Translation Lookaside Buffer, SV48
-# fully set-associative
-Implementation in c++:
-Text description:
-Online simulator:
-from math import log2
-from nmigen import Signal, Module, Cat, Const, Array, Elaboratable
-from nmigen.cli import verilog, rtlil
-from nmigen.lib.coding import Encoder
-from TLB.ariane.ptw import TLBUpdate, PTE, ASID_WIDTH
-from TLB.ariane.plru import PLRU
-from TLB.ariane.tlb_content import TLBContent
-class TLB(Elaboratable):
- def __init__(self, tlb_entries=8, asid_width=8):
- self.tlb_entries = tlb_entries
- self.asid_width = asid_width
- self.flush_i = Signal() # Flush signal
- # Lookup signals
- self.lu_access_i = Signal()
- self.lu_asid_i = Signal(self.asid_width)
- self.lu_vaddr_i = Signal(64)
- self.lu_content_o = PTE()
- self.lu_is_2M_o = Signal()
- self.lu_is_1G_o = Signal()
- self.lu_is_512G_o = Signal()
- self.lu_hit_o = Signal()
- # Update TLB
- self.pte_width = len(self.lu_content_o.flatten())
- self.update_i = TLBUpdate(asid_width)
- def elaborate(self, platform):
- m = Module()
- vpn3 = Signal(9) #FIXME unused signal
- vpn2 = Signal(9)
- vpn1 = Signal(9)
- vpn0 = Signal(9)
- #-------------
- # Translation
- #-------------
- # SV48 defines four levels of page tables
- m.d.comb += [ vpn0.eq(self.lu_vaddr_i[12:21]),
- vpn1.eq(self.lu_vaddr_i[21:30]),
- vpn2.eq(self.lu_vaddr_i[30:39]),
- vpn3.eq(self.lu_vaddr_i[39:48]), ### FIXME
- ]
- tc = []
- for i in range(self.tlb_entries):
- tlc = TLBContent(self.pte_width, self.asid_width)
- setattr(m.submodules, "tc%d" % i, tlc)
- tc.append(tlc)
- # connect inputs
- tlc.update_i = self.update_i # saves a lot of graphviz links
- m.d.comb += [tlc.vpn0.eq(vpn0),
- tlc.vpn1.eq(vpn1),
- tlc.vpn2.eq(vpn2),
- # TODO 4th
- tlc.flush_i.eq(self.flush_i),
- #tlc.update_i.eq(self.update_i),
- tlc.lu_asid_i.eq(self.lu_asid_i)]
- tc = Array(tc)
- #--------------
- # Select hit
- #--------------
- # use Encoder to select hit index
- # XXX TODO: assert that there's only one valid entry (one lu_hit)
- hitsel = Encoder(self.tlb_entries)
- m.submodules.hitsel = hitsel
- hits = []
- for i in range(self.tlb_entries):
- hits.append(tc[i].lu_hit_o)
- m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well)
- idx = hitsel.o
- active = Signal(reset_less=True)
- m.d.comb += active.eq(~hitsel.n)
- with m.If(active):
- # active hit, send selected as output
- m.d.comb += [ self.lu_is_512G_o.eq(tc[idx].lu_is_512G_o),
- self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o),
- self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o),
- self.lu_hit_o.eq(1),
- self.lu_content_o.flatten().eq(tc[idx].lu_content_o),
- ]
- #--------------
- # PLRU.
- #--------------
- p = PLRU(self.tlb_entries)
- plru_tree = Signal(p.TLBSZ)
- m.submodules.plru = p
- # connect PLRU inputs/outputs
- # XXX TODO: assert that there's only one valid entry (one replace_en)
- en = []
- for i in range(self.tlb_entries):
- en.append(tc[i].replace_en_i)
- m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags
- p.lu_hit.eq(hitsel.i),
- p.lu_access_i.eq(self.lu_access_i),
- p.plru_tree.eq(plru_tree)]
- m.d.sync += plru_tree.eq(p.plru_tree_o)
- #--------------
- # Sanity checks
- #--------------
- assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \
- "TLB size must be a multiple of 2 and greater than 1"
- assert (self.asid_width >= 1), \
- "ASID width must be at least 1"
- return m
- """
- # Just for checking
- function int countSetBits(logic[self.tlb_entries-1:0] vector);
- automatic int count = 0;
- foreach (vector[idx]) begin
- count += vector[idx];
- end
- return count;
- endfunction
- assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1))
- else $error("More then one hit in TLB!"); $stop(); end
- assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1))
- else $error("More then one TLB entry selected for next replace!");
- """
- def ports(self):
- return [self.flush_i, self.lu_access_i,
- self.lu_asid_i, self.lu_vaddr_i,
- self.lu_is_2M_o, self.lu_1G_o, self.lu_is_512G_o, self.lu_hit_o
- ] + self.lu_content_o.ports() + self.update_i.ports()
-if __name__ == '__main__':
- tlb = TLB()
- vl = rtlil.convert(tlb, ports=tlb.ports())
- with open("test_tlb.il", "w") as f:
- f.write(vl)
+++ /dev/null
-from nmigen import Signal, Module, Cat, Const, Elaboratable
-from TLB.ariane.ptw import TLBUpdate, PTE
-class TLBEntry:
- def __init__(self, asid_width):
- self.asid = Signal(asid_width,name="ent_asid")
- # SV48 defines four levels of page tables
- self.vpn0 = Signal(9,name="ent_vpn0")
- self.vpn1 = Signal(9,name="ent_vpn1")
- self.vpn2 = Signal(9,name="ent_vpn2")
- self.vpn3 = Signal(9,name="ent_vpn3")
- self.is_2M = Signal(name="ent_is_2M")
- self.is_1G = Signal(name="ent_is_1G")
- self.is_512G = Signal(name="ent_is_512G")
- self.valid = Signal(name="ent_valid")
- def flatten(self):
- return Cat(*self.ports())
- def eq(self, x):
- return self.flatten().eq(x.flatten())
- def ports(self):
- return [self.asid, self.vpn0, self.vpn1, self.vpn2,
- self.is_2M, self.is_1G, self.valid]
-class TLBContent(Elaboratable):
- def __init__(self, pte_width, asid_width):
- self.asid_width = asid_width
- self.pte_width = pte_width
- self.flush_i = Signal() # Flush signal
- # Update TLB
- self.update_i = TLBUpdate(asid_width)
- self.vpn3 = Signal(9)
- self.vpn2 = Signal(9)
- self.vpn1 = Signal(9)
- self.vpn0 = Signal(9)
- self.replace_en_i = Signal() # replace the following entry,
- # set by replacement strategy
- # Lookup signals
- self.lu_asid_i = Signal(asid_width)
- self.lu_content_o = Signal(pte_width)
- self.lu_is_512G_o = Signal()
- self.lu_is_2M_o = Signal()
- self.lu_is_1G_o = Signal()
- self.lu_hit_o = Signal()
- def elaborate(self, platform):
- m = Module()
- tags = TLBEntry(self.asid_width)
- content = Signal(self.pte_width)
- m.d.comb += [self.lu_hit_o.eq(0),
- self.lu_is_512G_o.eq(0),
- self.lu_is_2M_o.eq(0),
- self.lu_is_1G_o.eq(0)]
- # temporaries for lookup
- asid_ok = Signal(reset_less=True)
- # tags_ok = Signal(reset_less=True)
- vpn3_ok = Signal(reset_less=True)
- vpn2_ok = Signal(reset_less=True)
- vpn1_ok = Signal(reset_less=True)
- vpn0_ok = Signal(reset_less=True)
- #tags_2M = Signal(reset_less=True)
- vpn0_or_2M = Signal(reset_less=True)
- m.d.comb += [
- #compare asid and vpn*
- asid_ok.eq(tags.asid == self.lu_asid_i),
- vpn3_ok.eq(tags.vpn3 == self.vpn3),
- vpn2_ok.eq(tags.vpn2 == self.vpn2),
- vpn1_ok.eq(tags.vpn1 == self.vpn1),
- vpn0_ok.eq(tags.vpn0 == self.vpn0),
- vpn0_or_2M.eq(tags.is_2M | vpn0_ok)
- ]
- with m.If(asid_ok & tags.valid):
- # first level, only vpn3 needs to match
- with m.If (tags.is_512G & vpn3_ok):
- m.d.comb += [ self.lu_content_o.eq(content),
- self.lu_is_512G_o.eq(1),
- self.lu_hit_o.eq(1),
- ]
- # second level , second level vpn2 and vpn3 need to match
- with m.Elif (tags.is_1G & vpn2_ok & vpn3_ok):
- m.d.comb += [ self.lu_content_o.eq(content),
- self.lu_is_1G_o.eq(1),
- self.lu_hit_o.eq(1),
- ]
- # not a giga page hit nor a tera page hit so check further
- with m.Elif(vpn1_ok):
- # this could be a 2 mega page hit or a 4 kB hit
- # output accordingly
- with m.If(vpn0_or_2M):
- m.d.comb += [ self.lu_content_o.eq(content),
- self.lu_is_2M_o.eq(tags.is_2M),
- self.lu_hit_o.eq(1),
- ]
- # ------------------
- # Update or Flush
- # ------------------
- # temporaries
- replace_valid = Signal(reset_less=True)
- m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i)
- # flush
- with m.If (self.flush_i):
- # invalidate (flush) conditions: all if zero or just this ASID
- with m.If (self.lu_asid_i == Const(0, self.asid_width) |
- (self.lu_asid_i == tags.asid)):
- m.d.sync += tags.valid.eq(0)
- # normal replacement
- with m.Elif(replace_valid):
- m.d.sync += [ # update tag array
- tags.asid.eq(self.update_i.asid),
- tags.vpn3.eq(self.update_i.vpn[27:36]),
- tags.vpn2.eq(self.update_i.vpn[18:27]),
- tags.vpn1.eq(self.update_i.vpn[9:18]),
- tags.vpn0.eq(self.update_i.vpn[0:9]),
- tags.is_512G.eq(self.update_i.is_512G),
- tags.is_1G.eq(self.update_i.is_1G),
- tags.is_2M.eq(self.update_i.is_2M),
- tags.valid.eq(1),
- # and content as well
- content.eq(self.update_i.content.flatten())
- ]
- return m
- def ports(self):
- return [self.flush_i,
- self.lu_asid_i,
- self.lu_is_2M_o, self.lu_is_1G_o,self.lu_is_512G_o, self.lu_hit_o,
- ] + self.update_i.content.ports() + self.update_i.ports()
+++ /dev/null
-# SPDX-License-Identifier: LGPL-2.1-or-later
-# See Notices.txt for copyright information
-from TLB.LFSR import LFSR, LFSRPolynomial, LFSR_POLY_3
-from nmigen.back.pysim import Simulator, Delay, Tick
-import unittest
-class TestLFSR(unittest.TestCase):
- def test_poly(self):
- v = LFSRPolynomial()
- self.assertEqual(repr(v), "LFSRPolynomial([0])")
- self.assertEqual(str(v), "1")
- v = LFSRPolynomial([1])
- self.assertEqual(repr(v), "LFSRPolynomial([1, 0])")
- self.assertEqual(str(v), "x + 1")
- v = LFSRPolynomial([0, 1])
- self.assertEqual(repr(v), "LFSRPolynomial([1, 0])")
- self.assertEqual(str(v), "x + 1")
- v = LFSRPolynomial([1, 2])
- self.assertEqual(repr(v), "LFSRPolynomial([2, 1, 0])")
- self.assertEqual(str(v), "x^2 + x + 1")
- v = LFSRPolynomial([2])
- self.assertEqual(repr(v), "LFSRPolynomial([2, 0])")
- self.assertEqual(str(v), "x^2 + 1")
- self.assertEqual(str(LFSR_POLY_3), "x^3 + x^2 + 1")
- def test_lfsr_3(self):
- module = LFSR(LFSR_POLY_3)
- traces = [module.state, module.enable]
- with Simulator(module,
- vcd_file=open("Waveforms/test_LFSR2.vcd", "w"),
- gtkw_file=open("Waveforms/test_LFSR2.gtkw", "w"),
- traces=traces) as sim:
- sim.add_clock(1e-6, 0.25e-6)
- delay = Delay(1e-7)
- def async_process():
- yield module.enable.eq(0)
- yield Tick()
- self.assertEqual((yield module.state), 0x1)
- yield Tick()
- self.assertEqual((yield module.state), 0x1)
- yield module.enable.eq(1)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x2)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x5)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x3)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x7)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x6)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x4)
- yield Tick()
- yield delay
- self.assertEqual((yield module.state), 0x1)
- yield Tick()
- sim.add_process(async_process)
- sim.run()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from TLB.AddressEncoder import AddressEncoder
-from TestUtil.test_helper import assert_eq, assert_ne, assert_op
-# This function allows for the easy setting of values to the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# i (Input): The array of single bits to be written
-def set_encoder(dut, i):
- yield dut.i.eq(i)
- yield
-# Checks the single match of the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# sm (Single Match): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-def check_single_match(dut, sm, op):
- out_sm = yield dut.single_match
- assert_op("Single Match", out_sm, sm, op)
-# Checks the multiple match of the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# mm (Multiple Match): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-def check_multiple_match(dut, mm, op):
- out_mm = yield dut.multiple_match
- assert_op("Multiple Match", out_mm, mm, op)
-# Checks the output of the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# o (Output): The expected output
-# op (Operation): (0 => ==), (1 => !=)
-def check_output(dut, o, op):
- out_o = yield dut.o
- assert_op("Output", out_o, o, op)
-# Checks the state of the AddressEncoder
-# Arguments:
-# dut: The AddressEncoder being tested
-# sm (Single Match): The expected match result
-# mm (Multiple Match): The expected match result
-# o (Output): The expected output
-# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-# o_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-def check_all(dut, sm, mm, o, sm_op, mm_op, o_op):
- yield from check_single_match(dut, sm, sm_op)
- yield from check_multiple_match(dut, mm, mm_op)
- yield from check_output(dut, o, o_op)
-def tbench(dut):
- # Check invalid input
- in_val = 0b000
- single_match = 0
- multiple_match = 0
- output = 0
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
- # Check single bit
- in_val = 0b001
- single_match = 1
- multiple_match = 0
- output = 0
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
- # Check another single bit
- in_val = 0b100
- single_match = 1
- multiple_match = 0
- output = 2
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
- # Check multiple match
- # We expected the lowest bit to be returned which is address 0
- in_val = 0b101
- single_match = 0
- multiple_match = 1
- output = 0
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
- # Check another multiple match
- # We expected the lowest bit to be returned which is address 1
- in_val = 0b110
- single_match = 0
- multiple_match = 1
- output = 1
- yield from set_encoder(dut, in_val)
- yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
-def test_addr():
- dut = AddressEncoder(4)
- run_simulation(dut, tbench(dut),
- vcd_name="Waveforms/test_address_encoder.vcd")
- print("AddressEncoder Unit Test Success")
-if __name__ == "__main__":
- test_addr()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from TLB.Cam import Cam
-from TestUtil.test_helper import assert_eq, assert_ne, assert_op
-# This function allows for the easy setting of values to the Cam
-# Arguments:
-# dut: The Cam being tested
-# e (Enable): Whether the block is going to be enabled
-# we (Write Enable): Whether the Cam will write on the next cycle
-# a (Address): Where the data will be written if write enable is high
-# d (Data): Either what we are looking for or will write to the address
-def set_cam(dut, e, we, a, d):
- yield dut.enable.eq(e)
- yield dut.write_enable.eq(we)
- yield dut.address_in.eq(a)
- yield dut.data_in.eq(d)
- yield
-# Checks the multiple match of the Cam
-# Arguments:
-# dut: The Cam being tested
-# mm (Multiple Match): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-def check_multiple_match(dut, mm, op):
- out_mm = yield dut.multiple_match
- assert_op("Multiple Match", out_mm, mm, op)
-# Checks the single match of the Cam
-# Arguments:
-# dut: The Cam being tested
-# sm (Single Match): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-def check_single_match(dut, sm, op):
- out_sm = yield dut.single_match
- assert_op("Single Match", out_sm, sm, op)
-# Checks the address output of the Cam
-# Arguments:
-# dut: The Cam being tested
-# ma (Match Address): The expected match result
-# op (Operation): (0 => ==), (1 => !=)
-def check_match_address(dut, ma, op):
- out_ma = yield dut.match_address
- assert_op("Match Address", out_ma, ma, op)
-# Checks the state of the Cam
-# Arguments:
-# dut: The Cam being tested
-# sm (Single Match): The expected match result
-# mm (Multiple Match): The expected match result
-# ma: (Match Address): The expected address output
-# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-# ma_op (Operation): Operation for the address assertion (0 => ==), (1 => !=)
-def check_all(dut, mm, sm, ma, mm_op, sm_op, ma_op):
- yield from check_multiple_match(dut, mm, mm_op)
- yield from check_single_match(dut, sm, sm_op)
- yield from check_match_address(dut, ma, ma_op)
-def tbench(dut):
- # NA
- enable = 0
- write_enable = 0
- address = 0
- data = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
- # Read Miss Multiple
- # Note that the default starting entry data bits are all 0
- enable = 1
- write_enable = 0
- address = 0
- data = 0
- multiple_match = 1
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_multiple_match(dut, multiple_match, 0)
- # Read Miss
- # Note that the default starting entry data bits are all 0
- enable = 1
- write_enable = 0
- address = 0
- data = 1
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
- # Write Entry 0
- enable = 1
- write_enable = 1
- address = 0
- data = 4
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
- # Read Hit Entry 0
- enable = 1
- write_enable = 0
- address = 0
- data = 4
- multiple_match = 0
- single_match = 1
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0)
- # Search Hit
- enable = 1
- write_enable = 0
- address = 0
- data = 4
- multiple_match = 0
- single_match = 1
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0)
- # Search Miss
- enable = 1
- write_enable = 0
- address = 0
- data = 5
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
- # Multiple Match test
- # Write Entry 1
- enable = 1
- write_enable = 1
- address = 1
- data = 5
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
- # Write Entry 2
- # Same data as Entry 1
- enable = 1
- write_enable = 1
- address = 2
- data = 5
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
- # Read Hit Data 5
- enable = 1
- write_enable = 0
- address = 1
- data = 5
- multiple_match = 1
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_all(dut, multiple_match, single_match, address,0,0,0)
- # Verify read_warning is not caused
- # Write Entry 0
- enable = 1
- write_enable = 1
- address = 0
- data = 7
- multiple_match = 0
- single_match = 0
- yield from set_cam(dut, enable, write_enable, address, data)
- # Note there is no yield we immediately attempt to read in the next cycle
- # Read Hit Data 7
- enable = 1
- write_enable = 0
- address = 0
- data = 7
- multiple_match = 0
- single_match = 1
- yield from set_cam(dut, enable, write_enable, address, data)
- yield
- yield from check_single_match(dut, single_match, 0)
- yield
-def test_cam():
- dut = Cam(4, 4)
- run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_cam.vcd")
- print("Cam Unit Test Success")
-if __name__ == "__main__":
- test_cam()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from TestUtil.test_helper import assert_eq, assert_ne, assert_op
-from TLB.CamEntry import CamEntry
-# This function allows for the easy setting of values to the Cam Entry
-# Arguments:
-# dut: The CamEntry being tested
-# c (command): NA (0), Read (1), Write (2), Reserve (3)
-# d (data): The data to be set
-def set_cam_entry(dut, c, d):
- # Write desired values
- yield dut.command.eq(c)
- yield dut.data_in.eq(d)
- yield
- # Reset all lines
- yield dut.command.eq(0)
- yield dut.data_in.eq(0)
- yield
-# Checks the data state of the CAM entry
-# Arguments:
-# dut: The CamEntry being tested
-# d (Data): The expected data
-# op (Operation): (0 => ==), (1 => !=)
-def check_data(dut, d, op):
- out_d = yield dut.data
- assert_op("Data", out_d, d, op)
-# Checks the match state of the CAM entry
-# Arguments:
-# dut: The CamEntry being tested
-# m (Match): The expected match
-# op (Operation): (0 => ==), (1 => !=)
-def check_match(dut, m, op):
- out_m = yield dut.match
- assert_op("Match", out_m, m, op)
-# Checks the state of the CAM entry
-# Arguments:
-# dut: The CamEntry being tested
-# d (data): The expected data
-# m (match): The expected match
-# d_op (Operation): Operation for the data assertion (0 => ==), (1 => !=)
-# m_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
-def check_all(dut, d, m, d_op, m_op):
- yield from check_data(dut, d, d_op)
- yield from check_match(dut, m, m_op)
-# This tbench goes through the paces of testing the CamEntry module
-# It is done by writing and then reading various combinations of key/data pairs
-# and reading the results with varying keys to verify the resulting stored
-# data is correct.
-def tbench(dut):
- # Check write
- command = 2
- data = 1
- match = 0
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 0, 0)
- # Check read miss
- command = 1
- data = 2
- match = 0
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 1, 0)
- # Check read hit
- command = 1
- data = 1
- match = 1
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 0, 0)
- # Check overwrite
- command = 2
- data = 5
- match = 0
- yield from set_cam_entry(dut, command, data)
- yield
- yield from check_all(dut, data, match, 0, 0)
- # Check read hit
- command = 1
- data = 5
- match = 1
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 0, 0)
- # Check reset
- command = 3
- data = 0
- match = 0
- yield from set_cam_entry(dut, command, data)
- yield from check_all(dut, data, match, 0, 0)
- # Extra clock cycle for waveform
- yield
-def test_camentry():
- dut = CamEntry(4)
- run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_cam_entry.vcd")
- print("CamEntry Unit Test Success")
-if __name__ == "__main__":
- test_camentry()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from TLB.PermissionValidator import PermissionValidator
-from TestUtil.test_helper import assert_op
-def set_validator(dut, d, xwr, sm, sa, asid):
- yield dut.data.eq(d)
- yield dut.xwr.eq(xwr)
- yield dut.super_mode.eq(sm)
- yield dut.super_access.eq(sa)
- yield dut.asid.eq(asid)
- yield
-def check_valid(dut, v, op):
- out_v = yield dut.valid
- assert_op("Valid", out_v, v, op)
-def tbench(dut):
- # 80 bits represented. Ignore the MSB as it will be truncated
- # ASID is bits first 4 hex values (bits 64 - 78)
- # Test user mode entry valid
- # Global Bit matching ASID
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000031
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test user mode entry valid
- # Global Bit nonmatching ASID
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000031
- # Ignore MSB it will be truncated
- asid = 0x7FF6
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test user mode entry invalid
- # Global Bit nonmatching ASID
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000021
- # Ignore MSB it will be truncated
- asid = 0x7FF6
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 0
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test user mode entry valid
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000011
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test user mode entry invalid
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000011
- # Ignore MSB it will be truncated
- asid = 0x7FF6
- super_mode = 0
- super_access = 0
- xwr = 0
- valid = 0
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test supervisor mode entry valid
- # The entry is NOT in user mode
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000001
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 1
- super_access = 0
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test supervisor mode entry invalid
- # The entry is in user mode
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000011
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 1
- super_access = 0
- xwr = 0
- valid = 0
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test supervisor mode entry valid
- # The entry is NOT in user mode with access
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000001
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 1
- super_access = 1
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
- # Test supervisor mode entry valid
- # The entry is in user mode with access
- # Ensure that user mode and valid is enabled!
- data = 0x7FFF0000000000000011
- # Ignore MSB it will be truncated
- asid = 0x7FFF
- super_mode = 1
- super_access = 1
- xwr = 0
- valid = 1
- yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
- yield from check_valid(dut, valid, 0)
-def test_permv():
- dut = PermissionValidator(15, 64);
- run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_permission_validator.vcd")
- print("PermissionValidator Unit Test Success")
-if __name__ == "__main__":
- test_permv()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from TLB.PteEntry import PteEntry
-from TestUtil.test_helper import assert_op
-def set_entry(dut, i):
- yield dut.i.eq(i)
- yield
-def check_dirty(dut, d, op):
- out_d = yield dut.d
- assert_op("Dirty", out_d, d, op)
-def check_accessed(dut, a, op):
- out_a = yield dut.a
- assert_op("Accessed", out_a, a, op)
-def check_global(dut, o, op):
- out = yield dut.g
- assert_op("Global", out, o, op)
-def check_user(dut, o, op):
- out = yield dut.u
- assert_op("User Mode", out, o, op)
-def check_xwr(dut, o, op):
- out = yield dut.xwr
- assert_op("XWR", out, o, op)
-def check_asid(dut, o, op):
- out = yield dut.asid
- assert_op("ASID", out, o, op)
-def check_pte(dut, o, op):
- out = yield dut.pte
- assert_op("ASID", out, o, op)
-def check_valid(dut, v, op):
- out_v = yield dut.v
- assert_op("Valid", out_v, v, op)
-def check_all(dut, d, a, g, u, xwr, v, asid, pte):
- yield from check_dirty(dut, d, 0)
- yield from check_accessed(dut, a, 0)
- yield from check_global(dut, g, 0)
- yield from check_user(dut, u, 0)
- yield from check_xwr(dut, xwr, 0)
- yield from check_asid(dut, asid, 0)
- yield from check_pte(dut, pte, 0)
- yield from check_valid(dut, v, 0)
-def tbench(dut):
- # 80 bits represented. Ignore the MSB as it will be truncated
- # ASID is bits first 4 hex values (bits 64 - 78)
- i = 0x7FFF0000000000000031
- dirty = 0
- access = 0
- glob = 1
- user = 1
- xwr = 0
- valid = 1
- asid = 0x7FFF
- pte = 0x0000000000000031
- yield from set_entry(dut, i)
- yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
- i = 0x0FFF00000000000000FF
- dirty = 1
- access = 1
- glob = 1
- user = 1
- xwr = 7
- valid = 1
- asid = 0x0FFF
- pte = 0x00000000000000FF
- yield from set_entry(dut, i)
- yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
- i = 0x0721000000001100001F
- dirty = 0
- access = 0
- glob = 0
- user = 1
- xwr = 7
- valid = 1
- asid = 0x0721
- pte = 0x000000001100001F
- yield from set_entry(dut, i)
- yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
- yield
-def test_pteentry():
- dut = PteEntry(15, 64);
- run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_pte_entry.vcd")
- print("PteEntry Unit Test Success")
-if __name__ == "__main__":
- test_pteentry()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from TLB.SetAssociativeCache import SetAssociativeCache
-from TestUtil.test_helper import assert_eq, assert_ne, assert_op
-def set_sac(dut, e, c, s, t, d):
- yield dut.enable.eq(e)
- yield dut.command.eq(c)
- yield dut.cset.eq(s)
- yield dut.tag.eq(t)
- yield dut.data_i.eq(d)
- yield
-def tbench(dut):
- enable = 1
- command = 2
- cset = 1
- tag = 2
- data = 3
- yield from set_sac(dut, enable, command, cset, tag, data)
- yield
- enable = 1
- command = 2
- cset = 1
- tag = 5
- data = 8
- yield from set_sac(dut, enable, command, cset, tag, data)
- yield
-def test_assoc_cache():
- dut = SetAssociativeCache(4, 4, 4, 4)
- run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_set_associative_cache.vcd")
- print("Set Associative Cache Unit Test Success")
-if __name__ == "__main__":
- test_assoc_cache()
+++ /dev/null
-#import tracemalloc
-from nmigen.compat.sim import run_simulation
-from TLB.TLB import TLB
-from TestUtil.test_helper import assert_op, assert_eq
-#self.supermode = Signal(1) # Supervisor Mode
-#self.super_access = Signal(1) # Supervisor Access
-#self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2
-#self.xwr = Signal(3) # Execute, Write, Read
-#self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64
-#self.address_L1 = Signal(max=L1_size)
-#self.asid = Signal(asid_size) # Address Space IDentifier (ASID)
-#self.vma = Signal(vma_size) # Virtual Memory Address (VMA)
-#self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE)
-#self.hit = Signal(1) # Denotes if the VMA had a mapped PTE
-#self.perm_valid = Signal(1) # Denotes if the permissions are correct
-#self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA
-# Checks the data state of the CAM entry
-# Arguments:
-# dut: The CamEntry being tested
-# d (Data): The expected data
-# op (Operation): (0 => ==), (1 => !=)
-def check_hit(dut, d):
- hit_d = yield dut.hit
- #assert_eq("hit", hit_d, d)
-def test_command(dut,cmd,xwr,cycles):
- yield dut.command.eq(cmd)
- yield dut.xwr.eq(xwr)
- for i in range(0,cycles):
- yield
-def test_write_L1(dut,vma,address_L1,asid,pte_in):
- yield dut.address_L1.eq(address_L1)
- yield dut.asid.eq(asid)
- yield dut.vma.eq(vma)
- yield dut.pte_in.eq(pte_in)
- yield from test_command(dut,COMMAND_WRITE_L1,7,2)
-def test_search(dut,vma,found):
- yield dut.vma.eq(vma)
- yield from test_command(dut,COMMAND_READ,7,1)
- yield from check_hit(dut,found)
-def zero(dut):
- yield dut.supermode.eq(0)
- yield dut.super_access.eq(0)
- yield dut.mode.eq(0)
- yield dut.address_L1.eq(0)
- yield dut.asid.eq(0)
- yield dut.vma.eq(0)
- yield dut.pte_in.eq(0)
-def tbench(dut):
- yield from zero(dut)
- yield dut.mode.eq(0xF) # enable TLB
- #test hit
- yield from test_write_L1(dut,0xFEEDFACE,0,0xFFFF,0xF0F0)
- yield from test_search(dut,0xFEEDFACE,1)
- yield from test_search(dut,0xFACEFEED,0)
-def test_tlb():
- dut = TLB(15,36,64,8)
- run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_tlb.vcd")
- print("TLB Unit Test Success")
-if __name__ == "__main__":
- test_tlb()
+++ /dev/null
-def assert_op(pre, o, e, op):
- """ Verifies the given values given the particular operand
- Arguments:
- p (Prefix): Appended to the front of the assert statement
- e (Expected): The expected value
- o (Output): The output result
- op (Operation): (0 => ==), (1 => !=)
- """
- if op == 0:
- assert_eq(pre, o, e)
- else:
- assert_ne(pre, o, e)
-def assert_eq(p, o, e):
- """ Verifies the given values are equal
- Arguments:
- p (Prefix): Appended to the front of the assert statement
- e (Expected): The expected value
- o (Output): The output result
- """
- assert o == e, p + " Output " + str(o) + " Expected " + str(e)
-def assert_ne(p, o, e):
- """ Verifies the given values are not equal
- Arguments:
- p (Prefix): Appended to the front of the assert statement
- e (Expected): The expected value
- o (Output): The output result
- """
- assert o != e, p + " Output " + str(o) + " Not Expecting " + str(e)
+++ /dev/null
-"""Cascading Power ISA Decoder
-This module uses CSV tables in a hierarchical/peer cascading fashion,
-to create a multi-level instruction decoder by recognising appropriate
-patterns. The output is a flattened (1-level) series of fields suitable
-for a simple RISC engine.
-This is based on Anton Blanchard's excellent microwatt work:
-The basic principle is that the python code does the heavy lifting
-(reading the CSV files, constructing the hierarchy), creating the HDL
-AST with for-loops generating switch-case statements.
-PowerDecoder takes a *list* of CSV files with an associated bit-range
-that it is requested to match against the "opcode" row of the CSV file.
-This pattern can be either an integer, a binary number, *or* a wildcard
-nmigen Case pattern of the form "001--1-100".
-Subdecoders are *additional* cases with further decoding. The "pattern"
-argument is specified as one of the Case statements (a peer of the opcode
-row in the CSV file), and thus further fields of the opcode may be decoded
-giving increasing levels of detail.
-Top Level:
- [ (extra.csv: bit-fields entire 32-bit range
- opcode -> matches
- 000000---------------01000000000 -> ILLEGAL instruction
- 01100000000000000000000000000000 -> SIM_CONFIG instruction
- ................................ ->
- ),
- (major.csv: first 6 bits ONLY
- opcode -> matches
- 001100 -> ALU,OP_ADD (add)
- 001101 -> ALU,OP_ADD (another type of add)
- ...... -> ...
- ...... -> ...
- subdecoders:
- 001011 this must match *MAJOR*.CSV
- [ (minor_19.csv: bits 21 through 30 inclusive:
- opcode -> matches
- 0b0000000000 -> ALU,OP_MCRF
- ............ -> ....
- ),
- (minor_19_00000.csv: bits 21 through 25 inclusive:
- opcode -> matches
- 0b00010 -> ALU,add_pcis
- )
- ]
- ),
- ]
-from nmigen import Module, Elaboratable, Signal
-from nmigen.cli import rtlil
-from power_enums import (Function, Form, InternalOp, In1Sel, In2Sel, In3Sel,
- OutSel, RC, LdstLen, CryIn, get_csv, single_bit_flags,
- get_signal_name, default_values)
-from collections import namedtuple
-from power_fields import DecodeFields
-from power_fieldsn import SigDecode, SignalBitRange
-Subdecoder = namedtuple("Subdecoder", ["pattern", "opcodes", "opint",
- "bitsel", "suffix", "subdecoders"])
-class PowerOp:
- """PowerOp: spec for execution. op type (ADD etc.) reg specs etc.
- """
- def __init__(self):
- self.function_unit = Signal(Function, reset_less=True)
- self.internal_op = Signal(InternalOp, reset_less=True)
- self.form = Signal(Form, reset_less=True)
- self.in1_sel = Signal(In1Sel, reset_less=True)
- self.in2_sel = Signal(In2Sel, reset_less=True)
- self.in3_sel = Signal(In3Sel, reset_less=True)
- self.out_sel = Signal(OutSel, reset_less=True)
- self.ldst_len = Signal(LdstLen, reset_less=True)
- self.rc_sel = Signal(RC, reset_less=True)
- self.cry_in = Signal(CryIn, reset_less=True)
- for bit in single_bit_flags:
- name = get_signal_name(bit)
- setattr(self, name, Signal(reset_less=True, name=name))
- def _eq(self, row=None):
- if row is None:
- row = default_values
- res = [self.function_unit.eq(Function[row['unit']]),
- self.form.eq(Form[row['form']]),
- self.internal_op.eq(InternalOp[row['internal op']]),
- self.in1_sel.eq(In1Sel[row['in1']]),
- self.in2_sel.eq(In2Sel[row['in2']]),
- self.in3_sel.eq(In3Sel[row['in3']]),
- self.out_sel.eq(OutSel[row['out']]),
- self.ldst_len.eq(LdstLen[row['ldst len']]),
- self.rc_sel.eq(RC[row['rc']]),
- self.cry_in.eq(CryIn[row['cry in']]),
- ]
- for bit in single_bit_flags:
- sig = getattr(self, get_signal_name(bit))
- res.append(sig.eq(int(row.get(bit, 0))))
- return res
- def eq(self, otherop):
- res = [self.function_unit.eq(otherop.function_unit),
- self.form.eq(otherop.form),
- self.internal_op.eq(otherop.internal_op),
- self.in1_sel.eq(otherop.in1_sel),
- self.in2_sel.eq(otherop.in2_sel),
- self.in3_sel.eq(otherop.in3_sel),
- self.out_sel.eq(otherop.out_sel),
- self.rc_sel.eq(otherop.rc_sel),
- self.ldst_len.eq(otherop.ldst_len),
- self.cry_in.eq(otherop.cry_in)]
- for bit in single_bit_flags:
- sig = getattr(self, get_signal_name(bit))
- res.append(sig.eq(getattr(otherop, get_signal_name(bit))))
- return res
- def ports(self):
- regular = [self.function_unit,
- self.in1_sel,
- self.in2_sel,
- self.in3_sel,
- self.out_sel,
- self.ldst_len,
- self.rc_sel,
- self.internal_op,
- self.form]
- single_bit_ports = [getattr(self, get_signal_name(x))
- for x in single_bit_flags]
- return regular + single_bit_ports
-class PowerDecoder(Elaboratable):
- """PowerDecoder - decodes an incoming opcode into the type of operation
- """
- def __init__(self, width, dec):
- if not isinstance(dec, list):
- dec = [dec]
- self.dec = dec
- self.opcode_in = Signal(width, reset_less=True)
- self.op = PowerOp()
- for d in dec:
- if d.suffix is not None and d.suffix >= width:
- d.suffix = None
- self.width = width
- def suffix_mask(self, d):
- return ((1 << d.suffix) - 1)
- def divide_opcodes(self, d):
- divided = {}
- mask = self.suffix_mask(d)
- print("mask", hex(mask))
- for row in d.opcodes:
- opcode = row['opcode']
- if d.opint and '-' not in opcode:
- opcode = int(opcode, 0)
- key = opcode & mask
- opcode = opcode >> d.suffix
- if key not in divided:
- divided[key] = []
- r = row.copy()
- r['opcode'] = opcode
- divided[key].append(r)
- return divided
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- # note: default opcode is "illegal" as this is a combinatorial block
- # go through the list of CSV decoders first
- for d in self.dec:
- opcode_switch = Signal(d.bitsel[1] - d.bitsel[0],
- reset_less=True)
- comb += opcode_switch.eq(self.opcode_in[d.bitsel[0]:d.bitsel[1]])
- if d.suffix:
- opcodes = self.divide_opcodes(d)
- opc_in = Signal(d.suffix, reset_less=True)
- comb += opc_in.eq(opcode_switch[:d.suffix])
- with m.Switch(opc_in):
- for key, row in opcodes.items():
- bitsel = (d.suffix+d.bitsel[0], d.bitsel[1])
- sd = Subdecoder(pattern=None, opcodes=row,
- bitsel=bitsel, suffix=None,
- opint=False, subdecoders=[])
- subdecoder = PowerDecoder(width=32, dec=sd)
- setattr(m.submodules, "dec_sub%d" % key, subdecoder)
- comb += subdecoder.opcode_in.eq(self.opcode_in)
- with m.Case(key):
- comb += self.op.eq(subdecoder.op)
- else:
- # TODO: arguments, here (all of them) need to be a list.
- # a for-loop around the *list* of decoder args.
- with m.Switch(opcode_switch):
- self.handle_subdecoders(m, d)
- for row in d.opcodes:
- opcode = row['opcode']
- if d.opint and '-' not in opcode:
- opcode = int(opcode, 0)
- if not row['unit']:
- continue
- with m.Case(opcode):
- comb += self.op._eq(row)
- return m
- def handle_subdecoders(self, m, d):
- for dec in d.subdecoders:
- subdecoder = PowerDecoder(self.width, dec)
- if isinstance(dec, list): # XXX HACK: take first pattern
- dec = dec[0]
- setattr(m.submodules, "dec%d" % dec.pattern, subdecoder)
- m.d.comb += subdecoder.opcode_in.eq(self.opcode_in)
- with m.Case(dec.pattern):
- m.d.comb += self.op.eq(subdecoder.op)
- def ports(self):
- return [self.opcode_in] + self.op.ports()
-class TopPowerDecoder(PowerDecoder, DecodeFields):
- def __init__(self, width, dec):
- PowerDecoder.__init__(self, width, dec)
- DecodeFields.__init__(self, SignalBitRange, [self.opcode_in])
- self.create_specs()
-def create_pdecode():
- # minor 19 has extra patterns
- m19 = []
- m19.append(Subdecoder(pattern=19, opcodes=get_csv("minor_19.csv"),
- opint=True, bitsel=(1, 11), suffix=None, subdecoders=[]))
- m19.append(Subdecoder(pattern=19, opcodes=get_csv("minor_19_00000.csv"),
- opint=True, bitsel=(1, 6), suffix=None, subdecoders=[]))
- # minor opcodes.
- pminor = [
- m19,
- Subdecoder(pattern=30, opcodes=get_csv("minor_30.csv"),
- opint=True, bitsel=(1, 6), suffix=None, subdecoders=[]),
- Subdecoder(pattern=31, opcodes=get_csv("minor_31.csv"),
- opint=True, bitsel=(1, 11), suffix=0b00101, subdecoders=[]),
- Subdecoder(pattern=58, opcodes=get_csv("minor_58.csv"),
- opint=True, bitsel=(0, 2), suffix=None, subdecoders=[]),
- Subdecoder(pattern=62, opcodes=get_csv("minor_62.csv"),
- opint=True, bitsel=(0, 2), suffix=None, subdecoders=[]),
- ]
- # top level: extra merged with major
- dec = []
- opcodes = get_csv("major.csv")
- dec.append(Subdecoder(pattern=None, opint=True, opcodes=opcodes,
- bitsel=(26, 32), suffix=None, subdecoders=pminor))
- opcodes = get_csv("extra.csv")
- dec.append(Subdecoder(pattern=None, opint=False, opcodes=opcodes,
- bitsel=(0, 32), suffix=None, subdecoders=[]))
- return TopPowerDecoder(32, dec)
-if __name__ == '__main__':
- pdecode = create_pdecode()
- vl = rtlil.convert(pdecode, ports=pdecode.ports())
- with open("decoder.il", "w") as f:
- f.write(vl)
+++ /dev/null
-"""Power ISA Decoder second stage
-based on Anton Blanchard microwatt decode2.vhdl
-from nmigen import Module, Elaboratable, Signal, Mux, Const
-from nmigen.cli import rtlil
-from power_decoder import create_pdecode
-from power_enums import (InternalOp, CryIn, Function, LdstLen,
- In1Sel, In2Sel, In3Sel, OutSel, SPR, RC)
-class DecodeA(Elaboratable):
- """DecodeA from instruction
- decodes register RA, whether immediate-zero, implicit and
- explicit CSRs
- """
- def __init__(self, dec):
- self.dec = dec
- self.sel_in = Signal(In1Sel, reset_less=True)
- self.insn_in = Signal(32, reset_less=True)
- self.reg_out = Data(5, name="reg_a")
- self.immz_out = Signal(reset_less=True)
- self.spr_out = Data(10, "spr_a")
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- # select Register A field
- with m.If((self.sel_in == In1Sel.RA) |
- ((self.sel_in == In1Sel.RA_OR_ZERO) &
- (self.reg_out.data != Const(0, 5)))):
- comb += self.reg_out.data.eq(self.dec.RA[0:-1])
- comb += self.reg_out.ok.eq(1)
- # zero immediate requested
- with m.If((self.sel_in == In1Sel.RA_OR_ZERO) &
- (self.reg_out.data == Const(0, 5))):
- comb += self.immz_out.eq(1)
- # decode SPR1 based on instruction type
- op = self.dec.op
- # BC or BCREG: potential implicit register (CTR)
- with m.If((op.internal_op == InternalOp.OP_BC) |
- (op.internal_op == InternalOp.OP_BCREG)):
- with m.If(~self.dec.BO[2]): # 3.0B p38 BO2=0, use CTR reg
- comb += self.spr_out.data.eq(SPR.CTR) # constant: CTR
- comb += self.spr_out.ok.eq(1)
- # MFSPR or MTSPR: move-from / move-to SPRs
- with m.If((op.internal_op == InternalOp.OP_MFSPR) |
- (op.internal_op == InternalOp.OP_MTSPR)):
- comb += self.spr_out.data.eq(self.dec.SPR[0:-1]) # SPR field, XFX
- comb += self.spr_out.ok.eq(1)
- return m
-class Data:
- def __init__(self, width, name):
- self.data = Signal(width, name=name, reset_less=True)
- self.ok = Signal(name="%s_ok" % name, reset_less=True)
- def eq(self, rhs):
- return [self.data.eq(rhs.data),
- self.ok.eq(rhs.ok)]
- def ports(self):
- return [self.data, self.ok]
-class DecodeB(Elaboratable):
- """DecodeB from instruction
- decodes register RB, different forms of immediate (signed, unsigned),
- and implicit SPRs
- """
- def __init__(self, dec):
- self.dec = dec
- self.sel_in = Signal(In2Sel, reset_less=True)
- self.insn_in = Signal(32, reset_less=True)
- self.reg_out = Data(5, "reg_b")
- self.imm_out = Data(64, "imm_b")
- self.spr_out = Data(10, "spr_b")
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- # select Register B field
- with m.Switch(self.sel_in):
- with m.Case(In2Sel.RB):
- comb += self.reg_out.data.eq(self.dec.RB[0:-1])
- comb += self.reg_out.ok.eq(1)
- with m.Case(In2Sel.CONST_UI):
- comb += self.imm_out.data.eq(self.dec.UI[0:-1])
- comb += self.imm_out.ok.eq(1)
- with m.Case(In2Sel.CONST_SI): # TODO: sign-extend here?
- comb += self.imm_out.data.eq(self.dec.SI[0:-1])
- comb += self.imm_out.ok.eq(1)
- with m.Case(In2Sel.CONST_UI_HI):
- comb += self.imm_out.data.eq(self.dec.UI[0:-1]<<4)
- comb += self.imm_out.ok.eq(1)
- with m.Case(In2Sel.CONST_SI_HI): # TODO: sign-extend here?
- comb += self.imm_out.data.eq(self.dec.SI[0:-1]<<4)
- comb += self.imm_out.ok.eq(1)
- with m.Case(In2Sel.CONST_LI):
- comb += self.imm_out.data.eq(self.dec.LI[0:-1]<<2)
- comb += self.imm_out.ok.eq(1)
- with m.Case(In2Sel.CONST_BD):
- comb += self.imm_out.data.eq(self.dec.BD[0:-1]<<2)
- comb += self.imm_out.ok.eq(1)
- with m.Case(In2Sel.CONST_DS):
- comb += self.imm_out.data.eq(self.dec.DS[0:-1]<<2)
- comb += self.imm_out.ok.eq(1)
- with m.Case(In2Sel.CONST_M1):
- comb += self.imm_out.data.eq(~Const(0, 64)) # all 1s
- comb += self.imm_out.ok.eq(1)
- with m.Case(In2Sel.CONST_SH):
- comb += self.imm_out.data.eq(self.dec.sh[0:-1])
- comb += self.imm_out.ok.eq(1)
- with m.Case(In2Sel.CONST_SH32):
- comb += self.imm_out.data.eq(self.dec.SH32[0:-1])
- comb += self.imm_out.ok.eq(1)
- # decode SPR2 based on instruction type
- op = self.dec.op
- # BCREG implicitly uses CTR or LR for 2nd reg
- with m.If(op.internal_op == InternalOp.OP_BCREG):
- with m.If(self.dec.FormXL.XO[9]): # 3.0B p38 top bit of XO
- comb += self.spr_out.data.eq(SPR.CTR)
- with m.Else():
- comb += self.spr_out.data.eq(SPR.LR)
- comb += self.spr_out.ok.eq(1)
- return m
-class DecodeC(Elaboratable):
- """DecodeC from instruction
- decodes register RC
- """
- def __init__(self, dec):
- self.dec = dec
- self.sel_in = Signal(In3Sel, reset_less=True)
- self.insn_in = Signal(32, reset_less=True)
- self.reg_out = Data(5, "reg_c")
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- # select Register C field
- with m.If(self.sel_in == In3Sel.RS):
- comb += self.reg_out.data.eq(self.dec.RS[0:-1])
- comb += self.reg_out.ok.eq(1)
- return m
-class DecodeOut(Elaboratable):
- """DecodeOut from instruction
- decodes output register RA, RT or SPR
- """
- def __init__(self, dec):
- self.dec = dec
- self.sel_in = Signal(OutSel, reset_less=True)
- self.insn_in = Signal(32, reset_less=True)
- self.reg_out = Data(5, "reg_o")
- self.spr_out = Data(10, "spr_o")
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- # select Register out field
- with m.Switch(self.sel_in):
- with m.Case(OutSel.RT):
- comb += self.reg_out.data.eq(self.dec.RT[0:-1])
- comb += self.reg_out.ok.eq(1)
- with m.Case(OutSel.RA):
- comb += self.reg_out.data.eq(self.dec.RA[0:-1])
- comb += self.reg_out.ok.eq(1)
- with m.Case(OutSel.SPR):
- comb += self.spr_out.data.eq(self.dec.SPR[0:-1]) # from XFX
- comb += self.spr_out.ok.eq(1)
- return m
-class DecodeRC(Elaboratable):
- """DecodeRc from instruction
- decodes Record bit Rc
- """
- def __init__(self, dec):
- self.dec = dec
- self.sel_in = Signal(RC, reset_less=True)
- self.insn_in = Signal(32, reset_less=True)
- self.rc_out = Data(1, "rc")
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- # select Record bit out field
- with m.Switch(self.sel_in):
- with m.Case(RC.RC):
- comb += self.rc_out.data.eq(self.dec.Rc[0:-1])
- comb += self.rc_out.ok.eq(1)
- with m.Case(RC.ONE):
- comb += self.rc_out.data.eq(1)
- comb += self.rc_out.ok.eq(1)
- with m.Case(RC.NONE):
- comb += self.rc_out.data.eq(0)
- comb += self.rc_out.ok.eq(1)
- return m
-class DecodeOE(Elaboratable):
- """DecodeOE from instruction
- decodes OE field: uses RC decode detection which might not be good
- -- For now, use "rc" in the decode table to decide whether oe exists.
- -- This is not entirely correct architecturally: For mulhd and
- -- mulhdu, the OE field is reserved. It remains to be seen what an
- -- actual POWER9 does if we set it on those instructions, for now we
- -- test that further down when assigning to the multiplier oe input.
- """
- def __init__(self, dec):
- self.dec = dec
- self.sel_in = Signal(RC, reset_less=True)
- self.insn_in = Signal(32, reset_less=True)
- self.oe_out = Data(1, "oe")
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- # select OE bit out field
- with m.Switch(self.sel_in):
- with m.Case(RC.RC):
- comb += self.oe_out.data.eq(self.dec.OE[0:-1])
- comb += self.oe_out.ok.eq(1)
- return m
-class XerBits:
- def __init__(self):
- self.ca = Signal(reset_less=True)
- self.ca32 = Signal(reset_less=True)
- self.ov = Signal(reset_less=True)
- self.ov32 = Signal(reset_less=True)
- self.so = Signal(reset_less=True)
- def ports(self):
- return [self.ca, self.ca32, self.ov, self.ov32, self.so, ]
-class Decode2ToExecute1Type:
- def __init__(self):
- self.valid = Signal(reset_less=True)
- self.insn_type = Signal(InternalOp, reset_less=True)
- self.nia = Signal(64, reset_less=True)
- self.write_reg = Data(5, name="rego")
- self.read_reg1 = Data(5, name="reg1")
- self.read_reg2 = Data(5, name="reg2")
- self.read_reg3 = Data(5, name="reg3")
- self.imm_data = Data(64, name="imm")
- self.write_spr = Data(10, name="spro")
- self.read_spr1 = Data(10, name="spr1")
- self.read_spr2 = Data(10, name="spr2")
- #self.read_data1 = Signal(64, reset_less=True)
- #self.read_data2 = Signal(64, reset_less=True)
- #self.read_data3 = Signal(64, reset_less=True)
- #self.cr = Signal(32, reset_less=True) # NO: this is from the CR SPR
- #self.xerc = XerBits() # NO: this is from the XER SPR
- self.lk = Signal(reset_less=True)
- self.rc = Data(1, "rc")
- self.oe = Data(1, "oe")
- self.invert_a = Signal(reset_less=True)
- self.invert_out = Signal(reset_less=True)
- self.input_carry = Signal(CryIn, reset_less=True)
- self.output_carry = Signal(reset_less=True)
- self.input_cr = Signal(reset_less=True)
- self.output_cr = Signal(reset_less=True)
- self.is_32bit = Signal(reset_less=True)
- self.is_signed = Signal(reset_less=True)
- self.insn = Signal(32, reset_less=True)
- self.data_len = Signal(4, reset_less=True) # bytes
- self.byte_reverse = Signal(reset_less=True)
- self.sign_extend = Signal(reset_less=True)# do we need this?
- self.update = Signal(reset_less=True) # is this an update instruction?
- def ports(self):
- return [self.valid, self.insn_type, self.nia,
- #self.read_data1, self.read_data2, self.read_data3,
- #self.cr,
- self.lk,
- self.invert_a, self.invert_out,
- self.input_carry, self.output_carry,
- self.input_cr, self.output_cr,
- self.is_32bit, self.is_signed,
- self.insn,
- self.data_len, self.byte_reverse , self.sign_extend ,
- self.update] + \
- self.oe.ports() + \
- self.rc.ports() + \
- self.write_spr.ports() + \
- self.read_spr1.ports() + \
- self.read_spr2.ports() + \
- self.write_reg.ports() + \
- self.read_reg1.ports() + \
- self.read_reg2.ports() + \
- self.read_reg3.ports() + \
- self.imm_data.ports()
- # + self.xerc.ports()
-class PowerDecode2(Elaboratable):
- def __init__(self, dec):
- self.dec = dec
- self.e = Decode2ToExecute1Type()
- def ports(self):
- return self.dec.ports() + self.e.ports()
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- # set up submodule decoders
- m.submodules.dec = self.dec
- m.submodules.dec_a = dec_a = DecodeA(self.dec)
- m.submodules.dec_b = dec_b = DecodeB(self.dec)
- m.submodules.dec_c = dec_c = DecodeC(self.dec)
- m.submodules.dec_o = dec_o = DecodeOut(self.dec)
- m.submodules.dec_rc = dec_rc = DecodeRC(self.dec)
- m.submodules.dec_oe = dec_oe = DecodeOE(self.dec)
- # copy instruction through...
- for i in [self.e.insn, dec_a.insn_in, dec_b.insn_in,
- dec_c.insn_in, dec_o.insn_in, dec_rc.insn_in,
- dec_oe.insn_in]:
- comb += i.eq(self.dec.opcode_in)
- # ...and subdecoders' input fields
- comb += dec_a.sel_in.eq(self.dec.op.in1_sel)
- comb += dec_b.sel_in.eq(self.dec.op.in2_sel)
- comb += dec_c.sel_in.eq(self.dec.op.in3_sel)
- comb += dec_o.sel_in.eq(self.dec.op.out_sel)
- comb += dec_rc.sel_in.eq(self.dec.op.rc_sel)
- comb += dec_oe.sel_in.eq(self.dec.op.rc_sel) # XXX should be OE sel
- # decode LD/ST length
- with m.Switch(self.dec.op.ldst_len):
- with m.Case(LdstLen.is1B):
- comb += self.e.data_len.eq(1)
- with m.Case(LdstLen.is2B):
- comb += self.e.data_len.eq(2)
- with m.Case(LdstLen.is4B):
- comb += self.e.data_len.eq(4)
- with m.Case(LdstLen.is8B):
- comb += self.e.data_len.eq(8)
- #comb += self.e.nia.eq(self.dec.nia) # XXX TODO
- itype = Mux(self.dec.op.function_unit == Function.NONE,
- InternalOp.OP_ILLEGAL,
- self.dec.op.internal_op)
- comb += self.e.insn_type.eq(itype)
- # registers a, b, c and out
- comb += self.e.read_reg1.eq(dec_a.reg_out)
- comb += self.e.read_reg2.eq(dec_b.reg_out)
- comb += self.e.read_reg3.eq(dec_c.reg_out)
- comb += self.e.write_reg.eq(dec_o.reg_out)
- comb += self.e.imm_data.eq(dec_b.imm_out)
- # rc and oe out
- comb += self.e.rc.eq(dec_rc.rc_out)
- comb += self.e.oe.eq(dec_oe.oe_out)
- # SPRs out
- comb += self.e.read_spr1.eq(dec_a.spr_out)
- comb += self.e.read_spr2.eq(dec_b.spr_out)
- comb += self.e.write_spr.eq(dec_o.spr_out)
- # decoded/selected instruction flags
- comb += self.e.invert_a.eq(self.dec.op.inv_a)
- comb += self.e.invert_out.eq(self.dec.op.inv_out)
- comb += self.e.input_carry.eq(self.dec.op.cry_in)
- comb += self.e.output_carry.eq(self.dec.op.cry_out)
- comb += self.e.is_32bit.eq(self.dec.op.is_32b)
- comb += self.e.is_signed.eq(self.dec.op.sgn)
- with m.If(self.dec.op.lk):
- comb += self.e.lk.eq(self.dec.LK[0:-1]) # XXX TODO: accessor
- comb += self.e.byte_reverse.eq(self.dec.op.br)
- comb += self.e.sign_extend.eq(self.dec.op.sgn_ext)
- comb += self.e.update.eq(self.dec.op.upd)
- comb += self.e.input_cr.eq(self.dec.op.cr_in)
- comb += self.e.output_cr.eq(self.dec.op.cr_out)
- return m
-if __name__ == '__main__':
- pdecode = create_pdecode()
- dec2 = PowerDecode2(pdecode)
- vl = rtlil.convert(dec2, ports=dec2.ports() + pdecode.ports())
- with open("dec2.il", "w") as f:
- f.write(vl)
+++ /dev/null
-from enum import Enum, unique
-import csv
-import os
-import requests
-def get_csv(name):
- file_dir = os.path.dirname(os.path.realpath(__file__))
- file_path = os.path.join(file_dir, name)
- if not os.path.isfile(file_path):
- url = 'https://libre-riscv.org/openpower/isatables/' + name
- r = requests.get(url, allow_redirects=True)
- with open(file_path, 'w') as outfile:
- outfile.write(r.content.decode("utf-8"))
- with open(file_path, 'r') as csvfile:
- reader = csv.DictReader(csvfile)
- return list(reader)
-# names of the fields in the tables that don't correspond to an enum
-single_bit_flags = ['CR in', 'CR out', 'inv A', 'inv out',
- 'cry out', 'BR', 'sgn ext', 'upd', 'rsrv', '32b',
- 'sgn', 'lk', 'sgl pipe']
-# default values for fields in the table
-default_values = {'unit': "NONE", 'internal op': "OP_ILLEGAL",
- 'in1': "RA", 'in2': 'NONE', 'in3': 'NONE', 'out': 'NONE',
- 'ldst len': 'NONE',
- 'rc' : 'NONE', 'cry in' : 'ZERO', 'form': 'NONE'}
-def get_signal_name(name):
- if name[0].isdigit():
- name = "is_" + name
- return name.lower().replace(' ', '_')
-class Function(Enum):
- NONE = 0
- ALU = 1
- LDST = 2
-class Form(Enum):
- NONE = 0
- I = 1
- B = 2
- SC = 3
- D = 4
- DS = 5
- DQ = 6
- DX = 7
- X = 8
- XL = 9
- XFX = 10
- XFL = 11
- XX1 = 12
- XX2 = 13
- XX3 = 14
- XX4 = 15
- XS = 16
- XO = 17
- A = 18
- M = 19
- MD = 20
- MDS = 21
- VA = 22
- VC = 23
- VX = 24
- EVX = 25
- EVS = 26
- Z22 = 27
- Z23 = 28
-class InternalOp(Enum):
- OP_NOP = 1
- OP_ADD = 2
- OP_AND = 4
- OP_ATTN = 5
- OP_B = 6
- OP_BC = 7
- OP_BCREG = 8
- OP_BPERM = 9
- OP_CMP = 10
- OP_CMPB = 11
- OP_CMPEQB = 12
- OP_CMPRB = 13
- OP_CNTZ = 14
- OP_CRAND = 15
- OP_CRANDC = 16
- OP_CREQV = 17
- OP_CRNAND = 18
- OP_CRNOR = 19
- OP_CROR = 20
- OP_CRORC = 21
- OP_CRXOR = 22
- OP_DARN = 23
- OP_DCBF = 24
- OP_DCBST = 25
- OP_DCBT = 26
- OP_DCBTST = 27
- OP_DCBZ = 28
- OP_DIV = 29
- OP_DIVE = 30
- OP_EXTS = 31
- OP_ICBI = 33
- OP_ICBT = 34
- OP_ISEL = 35
- OP_ISYNC = 36
- OP_LOAD = 37
- OP_STORE = 38
- OP_MADDHD = 39
- OP_MADDLD = 41
- OP_MCRF = 42
- OP_MCRXR = 43
- OP_MCRXRX = 44
- OP_MFCR = 45
- OP_MFSPR = 46
- OP_MOD = 47
- OP_MTCRF = 48
- OP_MTSPR = 49
- OP_MUL_L64 = 50
- OP_MUL_H64 = 51
- OP_MUL_H32 = 52
- OP_OR = 53
- OP_POPCNT = 54
- OP_PRTY = 55
- OP_RLC = 56
- OP_RLCL = 57
- OP_RLCR = 58
- OP_SETB = 59
- OP_SHL = 60
- OP_SHR = 61
- OP_SYNC = 62
- OP_TD = 63
- OP_TDI = 64
- OP_TW = 65
- OP_TWI = 66
- OP_XOR = 67
-class In1Sel(Enum):
- RA = 0
- RA_OR_ZERO = 1
- NONE = 2
- SPR = 3
-class In2Sel(Enum):
- NONE = 0
- RB = 1
- CONST_UI = 2
- CONST_SI = 3
- CONST_LI = 6
- CONST_BD = 7
- CONST_DS = 8
- CONST_M1 = 9
- CONST_SH = 10
- CONST_SH32 = 11
- SPR = 12
-class In3Sel(Enum):
- NONE = 0
- RS = 1
-class OutSel(Enum):
- NONE = 0
- RT = 1
- RA = 2
- SPR = 3
-class LdstLen(Enum):
- NONE = 0
- is1B = 1
- is2B = 2
- is4B = 3
- is8B = 4
-class RC(Enum):
- NONE = 0
- ONE = 1
- RC = 2
-class CryIn(Enum):
- ZERO = 0
- ONE = 1
- CA = 2
-class SPR(Enum):
- XER = 1
- LR = 8
- CTR = 9
- TB = 268
- SRR0 = 26
- SRR1 = 27
- HSRR0 = 314
- HSRR1 = 315
- SPRG0 = 272
- SPRG1 = 273
- SPRG2 = 274
- SPRG3 = 275
- SPRG3U = 259
- HSPRG0 = 304
- HSPRG1 = 305
+++ /dev/null
-from collections import OrderedDict, namedtuple
-class BitRange(OrderedDict):
- """BitRange: remaps from straight indices (0,1,2..) to bit numbers
- """
- def __getitem__(self, subscript):
- if isinstance(subscript, slice):
- return list(self)[subscript]
- else:
- return self[subscript]
-def decode_instructions(form):
- res = {}
- accum = []
- for l in form:
- if l.strip().startswith("Formats"):
- l = l.strip().split(":")[-1]
- l = l.replace(" ", "")
- l = l.split(",")
- for fmt in l:
- if fmt not in res:
- res[fmt] = [accum[0]]
- else:
- res[fmt].append(accum[0])
- accum = []
- else:
- accum.append(l.strip())
- return res
-def decode_form_header(hdr):
- res = {}
- count = 0
- hdr = hdr.strip()
- print (hdr.split('|'))
- for f in hdr.split("|"):
- if not f:
- continue
- if f[0].isdigit():
- idx = int(f.strip().split(' ')[0])
- res[count] = idx
- count += len(f) + 1
- return res
-def find_unique(d, key):
- if key not in d:
- return key
- idx = 1
- while "%s_%d" % (key, idx) in d:
- idx += 1
- return "%s_%d" % (key, idx)
-def decode_line(header, line):
- line = line.strip()
- res = {}
- count = 0
- print ("line", line)
- prev_fieldname = None
- for f in line.split("|"):
- if not f:
- continue
- end = count + len(f) + 1
- fieldname = f.strip()
- if not fieldname or fieldname.startswith('/'):
- if prev_fieldname is not None:
- res[prev_fieldname] = (res[prev_fieldname], header[count])
- prev_fieldname = None
- count = end
- continue
- bitstart = header[count]
- if prev_fieldname is not None:
- res[prev_fieldname] = (res[prev_fieldname], bitstart)
- res[fieldname] = bitstart
- count = end
- prev_fieldname = fieldname
- res[prev_fieldname] = (bitstart, 32)
- return res
-def decode_form(form):
- header = decode_form_header(form[0])
- res = []
- print ("header", header)
- for line in form[1:]:
- dec = decode_line(header, line)
- if dec:
- res.append(dec)
- fields = {}
- falternate = {}
- for l in res:
- for k, (start,end) in l.items():
- if k in fields:
- if (start, end) == fields[k]:
- continue # already in and matching for this Form
- if k in falternate:
- alternate = "%s_%d" % (k, falternate[k])
- if (start, end) == fields[alternate]:
- continue
- falternate[k] = fidx = falternate.get(k, 0) + 1
- fields["%s_%d" % (k, fidx)] = (start, end)
- else:
- fields[k] = (start, end)
- return fields
-class DecodeFields:
- def __init__(self, bitkls=BitRange, bitargs=(), fname="fields.txt"):
- self.bitkls = bitkls
- self.bitargs = bitargs
- self.fname = fname
- def create_specs(self):
- self.forms, self.instrs = self.decode_fields()
- self.form_names = forms = self.instrs.keys()
- for form in forms:
- fields = self.instrs[form]
- fk = fields.keys()
- Fields = namedtuple("Fields", fk)
- instr = Fields(**fields)
- setattr(self, "Form%s" % form, instr)
- # now add in some commonly-used fields (should be done automatically)
- # note that these should only be ones which are the same on all Forms
- # note: these are from microwatt insn_helpers.vhdl
- self.RS = self.FormX.RS
- self.RT = self.FormX.RT
- self.RA = self.FormX.RA
- self.RB = self.FormX.RB
- self.SI = self.FormD.SI
- self.UI = self.FormD.UI
- self.L = self.FormD.L
- self.SH32 = self.FormM.SH
- self.sh = self.FormMD.sh
- self.MB32 = self.FormM.MB
- self.ME32 = self.FormM.ME
- self.LI = self.FormI.LI
- self.LK = self.FormI.LK
- self.AA = self.FormB.AA
- self.Rc = self.FormX.Rc
- self.OE = self.FormXO.Rc
- self.BD = self.FormB.BD
- self.BF = self.FormX.BF
- self.CR = self.FormXL.XO # used by further mcrf decoding
- self.BB = self.FormXL.BB
- self.BA = self.FormXL.BA
- self.BT = self.FormXL.BT
- self.FXM = self.FormXFX.FXM
- self.BO = self.FormXL.BO
- self.BI = self.FormXL.BI
- self.BH = self.FormXL.BH
- self.D = self.FormD.D
- self.DS = self.FormDS.DS
- self.TO = self.FormX.TO
- self.BC = self.FormA.BC
- self.SH = self.FormX.SH
- self.ME = self.FormM.ME
- self.MB = self.FormM.MB
- self.SPR = self.FormXFX.SPR
- def decode_fields(self):
- with open(self.fname) as f:
- txt = f.readlines()
- forms = {}
- reading_data = False
- for l in txt:
- print ("line", l)
- l = l.strip()
- if len(l) == 0:
- continue
- if reading_data:
- if l[0] == '#':
- reading_data = False
- else:
- forms[heading].append(l)
- if not reading_data:
- assert l[0] == '#'
- heading = l[1:].strip()
- #if heading.startswith('1.6.28'): # skip instr fields for now
- #break
- heading = heading.split(' ')[-1]
- print ("heading", heading)
- reading_data = True
- forms[heading] = []
- res = {}
- inst = {}
- for hdr, form in forms.items():
- print ("heading", hdr)
- if heading == 'Fields':
- i = decode_instructions(form)
- for form, field in i.items():
- inst[form] = self.decode_instruction_fields(field)
- #else:
- # res[hdr] = decode_form(form)
- return res, inst
- def decode_instruction_fields(self, fields):
- res = {}
- for field in fields:
- f, spec = field.strip().split(" ")
- d = self.bitkls(*self.bitargs)
- idx = 0
- for s in spec[1:-1].split(","):
- s = s.split(':')
- if len(s) == 1:
- d[idx] = int(s[0])
- idx += 1
- else:
- start = int(s[0])
- end = int(s[1])
- while start <= end:
- d[idx] = start
- idx += 1
- start += 1
- f = f.replace(",", "_")
- unique = find_unique(res, f)
- res[unique] = d
- return res
-if __name__ == '__main__':
- dec = DecodeFields()
- dec.create_specs()
- forms, instrs = dec.forms, dec.instrs
- for hdr, form in forms.items():
- print ()
- print (hdr)
- for k, v in form.items():
- #print ("line", l)
- #for k, v in l.items():
- print ("%s: %d-%d" % (k, v[0], v[1]))
- for form, field in instrs.items():
- print ()
- print (form)
- for f, vals in field.items():
- print (" ", f, vals)
- print (dec.FormX)
- print (dec.FormX.A)
- print (dir(dec.FormX))
- print (dec.FormX._fields)
+++ /dev/null
-from collections import OrderedDict
-from power_fields import DecodeFields, BitRange
-from nmigen import Module, Elaboratable, Signal, Cat
-from nmigen.cli import rtlil
-class SignalBitRange(BitRange):
- def __init__(self, signal):
- BitRange.__init__(self)
- self.signal = signal
- def __getitem__(self, subs):
- # *sigh* field numberings are bit-inverted. PowerISA 3.0B section 1.3.2
- width = self.signal.shape()[0]
- print (dir(self))
- print (self.items())
- if isinstance(subs, slice):
- res = []
- print (subs)
- start, stop, step = subs.start, subs.stop, subs.step
- if step is None:
- step = 1
- if start is None:
- start = 0
- if stop is None:
- stop = -1
- if start < 0:
- start = len(self) - start - 1
- if stop < 0:
- stop = len(self) - stop - 1
- print ("range", start, stop, step)
- for t in range(start, stop, step):
- k = OrderedDict.__getitem__(self, t)
- print ("t", t, k)
- res.append(self.signal[width-k-1])
- return Cat(*res)
- else:
- k = OrderedDict.__getitem__(self, subs)
- return self.signal[width-k-1]
- print ("translated", subs, translated)
-class SigDecode(Elaboratable):
- def __init__(self, width):
- self.opcode_in = Signal(width, reset_less=False)
- self.df = DecodeFields(SignalBitRange, [self.opcode_in])
- self.df.create_specs()
- self.x_s = Signal(len(self.df.FormX.S), reset_less=True)
- self.x_sh = Signal(len(self.df.FormX.SH), reset_less=True)
- self.dq_xs_s = Signal(len(self.df.FormDQ.SX_S), reset_less=True)
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- comb += self.x_s.eq(self.df.FormX.S[0])
- comb += self.x_sh.eq(self.df.FormX.SH[0:-1])
- comb += self.dq_xs_s.eq(self.df.FormDQ.SX_S[0:-1])
- return m
- def ports(self):
- return [self.opcode_in, self.x_s, self.x_sh]
-def create_sigdecode():
- s = SigDecode(32)
- return s
-if __name__ == '__main__':
- sigdecode = create_sigdecode()
- vl = rtlil.convert(sigdecode, ports=sigdecode.ports())
- with open("decoder.il", "w") as f:
- f.write(vl)
+++ /dev/null
-from nmigen import Module, Signal
-from nmigen.back.pysim import Simulator, Delay
-from nmigen.test.utils import FHDLTestCase
-from nmigen.cli import rtlil
-import sys
-import os
-import unittest
-from power_decoder import (PowerDecoder, pdecode)
-from power_enums import (Function, InternalOp, In1Sel, In2Sel, In3Sel,
- OutSel, RC, LdstLen, CryIn, single_bit_flags,
- get_signal_name, get_csv)
-class DecoderTestCase(FHDLTestCase):
- def run_tst(self, bitsel, csvname, minor=None, suffix=None, opint=True):
- m = Module()
- comb = m.d.comb
- opcode = Signal(32)
- function_unit = Signal(Function)
- internal_op = Signal(InternalOp)
- in1_sel = Signal(In1Sel)
- in2_sel = Signal(In2Sel)
- in3_sel = Signal(In3Sel)
- out_sel = Signal(OutSel)
- rc_sel = Signal(RC)
- ldst_len = Signal(LdstLen)
- cry_in = Signal(CryIn)
- # opcodes = get_csv(csvname)
- # m.submodules.dut = dut = PowerDecoder(32, opcodes, bitsel=bitsel,
- # opint=opint, suffix=suffix)
- m.submodules.dut = dut = pdecode
- comb += [dut.opcode_in.eq(opcode),
- function_unit.eq(dut.op.function_unit),
- in1_sel.eq(dut.op.in1_sel),
- in2_sel.eq(dut.op.in2_sel),
- in3_sel.eq(dut.op.in3_sel),
- out_sel.eq(dut.op.out_sel),
- rc_sel.eq(dut.op.rc_sel),
- ldst_len.eq(dut.op.ldst_len),
- cry_in.eq(dut.op.cry_in),
- internal_op.eq(dut.op.internal_op)]
- sim = Simulator(m)
- opcodes = get_csv(csvname)
- def process():
- for row in opcodes:
- if not row['unit']:
- continue
- op = row['opcode']
- if not opint: # HACK: convert 001---10 to 0b00100010
- op = "0b" + op.replace('-', '0')
- print ("opint", opint, row['opcode'], op)
- print(row)
- yield opcode.eq(0)
- yield opcode[bitsel[0]:bitsel[1]].eq(int(op, 0))
- if minor:
- print(minor)
- minorbits = minor[1]
- yield opcode[minorbits[0]:minorbits[1]].eq(minor[0])
- yield Delay(1e-6)
- signals = [(function_unit, Function, 'unit'),
- (internal_op, InternalOp, 'internal op'),
- (in1_sel, In1Sel, 'in1'),
- (in2_sel, In2Sel, 'in2'),
- (in3_sel, In3Sel, 'in3'),
- (out_sel, OutSel, 'out'),
- (rc_sel, RC, 'rc'),
- (cry_in, CryIn, 'cry in'),
- (ldst_len, LdstLen, 'ldst len')]
- for sig, enm, name in signals:
- result = yield sig
- expected = enm[row[name]]
- msg = f"{sig.name} == {enm(result)}, expected: {expected}"
- self.assertEqual(enm(result), expected, msg)
- for bit in single_bit_flags:
- sig = getattr(dut.op, get_signal_name(bit))
- result = yield sig
- expected = int(row[bit])
- msg = f"{sig.name} == {result}, expected: {expected}"
- self.assertEqual(expected, result, msg)
- sim.add_process(process)
- prefix = os.path.splitext(csvname)[0]
- with sim.write_vcd("%s.vcd" % prefix, "%s.gtkw" % prefix, traces=[
- opcode, function_unit, internal_op,
- in1_sel, in2_sel]):
- sim.run()
- def generate_ilang(self):
- vl = rtlil.convert(pdecode, ports=pdecode.ports())
- with open("decoder.il", "w") as f:
- f.write(vl)
- def test_major(self):
- self.run_tst((26, 32), "major.csv")
- self.generate_ilang()
- def test_minor_19(self):
- self.run_tst((1, 11), "minor_19.csv", minor=(19, (26, 32)),
- suffix=(0, 5))
- # def test_minor_19_00000(self):
- # self.run_tst((1, 11), "minor_19_00000.csv")
- def test_minor_30(self):
- self.run_tst((1, 5), "minor_30.csv", minor=(30, (26, 32)))
- def test_minor_31(self):
- self.run_tst((1, 11), "minor_31.csv", minor=(31, (26, 32)))
- def test_minor_58(self):
- self.run_tst((0, 2), "minor_58.csv", minor=(58, (26, 32)))
- def test_minor_62(self):
- self.run_tst((0, 2), "minor_62.csv", minor=(62, (26, 32)))
- # #def test_minor_31_prefix(self):
- # # self.run_tst(10, "minor_31.csv", suffix=(5, 10))
- # def test_extra(self):
- # self.run_tst(32, "extra.csv", opint=False)
- # self.generate_ilang(32, "extra.csv", opint=False)
-if __name__ == "__main__":
- unittest.main()
+++ /dev/null
-from nmigen import Elaboratable, Signal, Module, Const, Mux
-from nmigen.cli import main
-from nmigen.cli import verilog, rtlil
-import operator
-class Adder(Elaboratable):
- def __init__(self, width):
- self.a = Signal(width)
- self.b = Signal(width)
- self.o = Signal(width)
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.o.eq(self.a + self.b)
- return m
-class Subtractor(Elaboratable):
- def __init__(self, width):
- self.a = Signal(width)
- self.b = Signal(width)
- self.o = Signal(width)
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.o.eq(self.a - self.b)
- return m
-class Multiplier(Elaboratable):
- def __init__(self, width):
- self.a = Signal(width)
- self.b = Signal(width)
- self.o = Signal(width)
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.o.eq(self.a * self.b)
- return m
-class Shifter(Elaboratable):
- def __init__(self, width):
- self.width = width
- self.a = Signal(width)
- self.b = Signal(width)
- self.o = Signal(width)
- def elaborate(self, platform):
- m = Module()
- btrunc = Signal(self.width)
- m.d.comb += btrunc.eq(self.b & Const((1<<self.width)-1))
- m.d.comb += self.o.eq(self.a >> btrunc)
- return m
-class ALU(Elaboratable):
- def __init__(self, width):
- self.p_valid_i = Signal()
- self.p_ready_o = Signal()
- self.n_ready_i = Signal()
- self.n_valid_o = Signal()
- self.counter = Signal(4)
- self.op = Signal(2)
- self.a = Signal(width)
- self.b = Signal(width)
- self.o = Signal(width)
- self.width = width
- def elaborate(self, platform):
- m = Module()
- add = Adder(self.width)
- sub = Subtractor(self.width)
- mul = Multiplier(self.width)
- shf = Shifter(self.width)
- m.submodules.add = add
- m.submodules.sub = sub
- m.submodules.mul = mul
- m.submodules.shf = shf
- for mod in [add, sub, mul, shf]:
- m.d.comb += [
- mod.a.eq(self.a),
- mod.b.eq(self.b),
- ]
- go_now = Signal(reset_less=True) # testing no-delay ALU
- with m.If(self.p_valid_i):
- # input is valid. next check, if we already said "ready" or not
- with m.If(~self.p_ready_o):
- # we didn't say "ready" yet, so say so and initialise
- m.d.sync += self.p_ready_o.eq(1)
- # as this is a "fake" pipeline, just grab the output right now
- with m.Switch(self.op):
- for i, mod in enumerate([add, sub, mul, shf]):
- with m.Case(i):
- m.d.sync += self.o.eq(mod.o)
- with m.If(self.op == 2): # MUL, to take 5 instructions
- m.d.sync += self.counter.eq(5)
- with m.Elif(self.op == 3): # SHIFT to take 7
- m.d.sync += self.counter.eq(7)
- with m.Elif(self.op == 1): # SUB to take 1, straight away
- m.d.sync += self.counter.eq(1)
- m.d.comb += go_now.eq(1)
- with m.Else(): # ADD to take 2
- m.d.sync += self.counter.eq(2)
- with m.Else():
- # input says no longer valid, so drop ready as well.
- # a "proper" ALU would have had to sync in the opcode and a/b ops
- m.d.sync += self.p_ready_o.eq(0)
- # ok so the counter's running: when it gets to 1, fire the output
- with m.If((self.counter == 1) | go_now):
- # set the output as valid if the recipient is ready for it
- m.d.sync += self.n_valid_o.eq(1)
- with m.If(self.n_ready_i & self.n_valid_o):
- m.d.sync += self.n_valid_o.eq(0)
- # recipient said it was ready: reset back to known-good.
- m.d.sync += self.counter.eq(0) # reset the counter
- m.d.sync += self.o.eq(0) # clear the output for tidiness sake
- # countdown to 1 (transition from 1 to 0 only on acknowledgement)
- with m.If(self.counter > 1):
- m.d.sync += self.counter.eq(self.counter - 1)
- return m
- def __iter__(self):
- yield self.op
- yield self.a
- yield self.b
- yield self.o
- def ports(self):
- return list(self)
-class BranchOp(Elaboratable):
- def __init__(self, width, op):
- self.a = Signal(width)
- self.b = Signal(width)
- self.o = Signal(width)
- self.op = op
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
- return m
-class BranchALU(Elaboratable):
- def __init__(self, width):
- self.p_valid_i = Signal()
- self.p_ready_o = Signal()
- self.n_ready_i = Signal()
- self.n_valid_o = Signal()
- self.counter = Signal(4)
- self.op = Signal(2)
- self.a = Signal(width)
- self.b = Signal(width)
- self.o = Signal(width)
- self.width = width
- def elaborate(self, platform):
- m = Module()
- bgt = BranchOp(self.width, operator.gt)
- blt = BranchOp(self.width, operator.lt)
- beq = BranchOp(self.width, operator.eq)
- bne = BranchOp(self.width, operator.ne)
- m.submodules.bgt = bgt
- m.submodules.blt = blt
- m.submodules.beq = beq
- m.submodules.bne = bne
- for mod in [bgt, blt, beq, bne]:
- m.d.comb += [
- mod.a.eq(self.a),
- mod.b.eq(self.b),
- ]
- go_now = Signal(reset_less=True) # testing no-delay ALU
- with m.If(self.p_valid_i):
- # input is valid. next check, if we already said "ready" or not
- with m.If(~self.p_ready_o):
- # we didn't say "ready" yet, so say so and initialise
- m.d.sync += self.p_ready_o.eq(1)
- # as this is a "fake" pipeline, just grab the output right now
- with m.Switch(self.op):
- for i, mod in enumerate([bgt, blt, beq, bne]):
- with m.Case(i):
- m.d.sync += self.o.eq(mod.o)
- m.d.sync += self.counter.eq(5) # branch to take 5 cycles (fake)
- #m.d.comb += go_now.eq(1)
- with m.Else():
- # input says no longer valid, so drop ready as well.
- # a "proper" ALU would have had to sync in the opcode and a/b ops
- m.d.sync += self.p_ready_o.eq(0)
- # ok so the counter's running: when it gets to 1, fire the output
- with m.If((self.counter == 1) | go_now):
- # set the output as valid if the recipient is ready for it
- m.d.sync += self.n_valid_o.eq(1)
- with m.If(self.n_ready_i & self.n_valid_o):
- m.d.sync += self.n_valid_o.eq(0)
- # recipient said it was ready: reset back to known-good.
- m.d.sync += self.counter.eq(0) # reset the counter
- m.d.sync += self.o.eq(0) # clear the output for tidiness sake
- # countdown to 1 (transition from 1 to 0 only on acknowledgement)
- with m.If(self.counter > 1):
- m.d.sync += self.counter.eq(self.counter - 1)
- return m
- def __iter__(self):
- yield self.op
- yield self.a
- yield self.b
- yield self.o
- def ports(self):
- return list(self)
-if __name__ == "__main__":
- alu = ALU(width=16)
- vl = rtlil.convert(alu, ports=alu.ports())
- with open("test_alu.il", "w") as f:
- f.write(vl)
- alu = BranchALU(width=16)
- vl = rtlil.convert(alu, ports=alu.ports())
- with open("test_branch_alu.il", "w") as f:
- f.write(vl)
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Mux, Elaboratable
-from nmutil.latch import SRLatch, latchregister
-""" Computation Unit (aka "ALU Manager").
- This module runs a "revolving door" set of three latches, based on
- * Issue
- * Go_Read
- * Go_Write
- where one of them cannot be set on any given cycle.
- (Note however that opc_l has been inverted (and qn used), due to SRLatch
- default reset state being "0" rather than "1")
- * When issue is first raised, a busy signal is sent out.
- The src1 and src2 registers and the operand can be latched in
- at this point
- * Read request is set, which is acknowledged through the Scoreboard
- to the priority picker, which generates (one and only one) Go_Read
- at a time. One of those will (eventually) be this Computation Unit.
- * Once Go_Read is set, the src1/src2/operand latch door shuts (locking
- src1/src2/operand in place), and the ALU is told to proceed.
- * As this is currently a "demo" unit, a countdown timer is activated
- to simulate an ALU "pipeline", which activates "write request release",
- and the ALU's output is captured into a temporary register.
- * Write request release will go through a similar process as Read request,
- resulting (eventually) in Go_Write being asserted.
- * When Go_Write is asserted, two things happen: (1) the data in the temp
- register is placed combinatorially onto the output, and (2) the
- req_l latch is cleared, busy is dropped, and the Comp Unit is back
- through its revolving door to do another task.
- Notes on oper_i:
- * bits[0:2] are for the ALU, add=0, sub=1, shift=2, mul=3
- * bit[2] are the immediate (bit[2]=1 == immediate mode)
-class ComputationUnitNoDelay(Elaboratable):
- def __init__(self, rwid, opwid, alu):
- self.opwid = opwid
- self.rwid = rwid
- self.alu = alu
- self.counter = Signal(4)
- self.go_rd_i = Signal(reset_less=True) # go read in
- self.go_wr_i = Signal(reset_less=True) # go write in
- self.issue_i = Signal(reset_less=True) # fn issue in
- self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
- self.go_die_i = Signal() # go die (reset)
- self.oper_i = Signal(opwid, reset_less=True) # opcode in
- self.imm_i = Signal(rwid, reset_less=True) # immediate in
- self.src1_i = Signal(rwid, reset_less=True) # oper1 in
- self.src2_i = Signal(rwid, reset_less=True) # oper2 in
- self.busy_o = Signal(reset_less=True) # fn busy out
- self.data_o = Signal(rwid, reset_less=True) # Dest out
- self.rd_rel_o = Signal(reset_less=True) # release src1/src2 request
- self.req_rel_o = Signal(reset_less=True) # release request out (valid_o)
- def elaborate(self, platform):
- m = Module()
- m.submodules.alu = self.alu
- m.submodules.src_l = src_l = SRLatch(sync=False)
- m.submodules.opc_l = opc_l = SRLatch(sync=False)
- m.submodules.req_l = req_l = SRLatch(sync=False)
- # shadow/go_die
- reset_w = Signal(reset_less=True)
- reset_r = Signal(reset_less=True)
- m.d.comb += reset_w.eq(self.go_wr_i | self.go_die_i)
- m.d.comb += reset_r.eq(self.go_rd_i | self.go_die_i)
- # This is fascinating and very important to observe that this
- # is in effect a "3-way revolving door". At no time may all 3
- # latches be set at the same time.
- # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
- m.d.sync += opc_l.s.eq(self.issue_i) # XXX NOTE: INVERTED FROM book!
- m.d.sync += opc_l.r.eq(reset_w) # XXX NOTE: INVERTED FROM book!
- # src operand latch (not using go_wr_i)
- m.d.sync += src_l.s.eq(self.issue_i)
- m.d.sync += src_l.r.eq(reset_r)
- # dest operand latch (not using issue_i)
- m.d.sync += req_l.s.eq(self.go_rd_i)
- m.d.sync += req_l.r.eq(reset_w)
- # create a latch/register for the operand
- oper_r = Signal(self.opwid+1, reset_less=True) # opcode reg
- latchregister(m, self.oper_i, oper_r, self.issue_i)
- # and one for the output from the ALU
- data_r = Signal(self.rwid, reset_less=True) # Dest register
- latchregister(m, self.alu.o, data_r, req_l.q)
- # get the top 2 bits for the ALU
- m.d.comb += self.alu.op.eq(oper_r[0:2])
- # 3rd bit is whether this is an immediate or not
- op_is_imm = Signal(reset_less=True)
- m.d.comb += op_is_imm.eq(oper_r[2])
- # select immediate if opcode says so. however also change the latch
- # to trigger *from* the opcode latch instead.
- src2_or_imm = Signal(self.rwid, reset_less=True)
- src_sel = Signal(reset_less=True)
- m.d.comb += src_sel.eq(Mux(op_is_imm, opc_l.qn, src_l.q))
- m.d.comb += src2_or_imm.eq(Mux(op_is_imm, self.imm_i, self.src2_i))
- # create a latch/register for src1/src2
- latchregister(m, self.src1_i, self.alu.a, src_l.q)
- latchregister(m, src2_or_imm, self.alu.b, src_sel)
- # -----
- # outputs
- # -----
- # all request signals gated by busy_o. prevents picker problems
- busy_o = self.busy_o
- m.d.comb += busy_o.eq(opc_l.q) # busy out
- m.d.comb += self.rd_rel_o.eq(src_l.q & busy_o) # src1/src2 req rel
- # on a go_read, tell the ALU we're accepting data.
- # NOTE: this spells TROUBLE if the ALU isn't ready!
- # go_read is only valid for one clock!
- with m.If(self.go_rd_i): # src operands ready, GO!
- with m.If(~self.alu.p_ready_o): # no ACK yet
- m.d.comb += self.alu.p_valid_i.eq(1) # so indicate valid
- # only proceed if ALU says its output is valid
- with m.If(self.alu.n_valid_o):
- # when ALU ready, write req release out. waits for shadow
- m.d.comb += self.req_rel_o.eq(req_l.q & busy_o & self.shadown_i)
- # when output latch is ready, and ALU says ready, accept ALU output
- with m.If(self.req_rel_o):
- m.d.comb += self.alu.n_ready_i.eq(1) # tells ALU "thanks got it"
- # output the data from the latch on go_write
- with m.If(self.go_wr_i):
- m.d.comb += self.data_o.eq(data_r)
- return m
- def __iter__(self):
- yield self.go_rd_i
- yield self.go_wr_i
- yield self.issue_i
- yield self.shadown_i
- yield self.go_die_i
- yield self.oper_i
- yield self.imm_i
- yield self.src1_i
- yield self.src2_i
- yield self.busy_o
- yield self.rd_rel_o
- yield self.req_rel_o
- yield self.data_o
- def ports(self):
- return list(self)
-def scoreboard_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_read_i.eq(1)
- yield
- yield dut.go_read_i.eq(0)
- yield
- yield dut.go_write_i.eq(1)
- yield
- yield dut.go_write_i.eq(0)
- yield
-def test_scoreboard():
- from alu_hier import ALU
- alu = ALU(16)
- dut = ComputationUnitNoDelay(16, 8, alu)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_compalu.il", "w") as f:
- f.write(vl)
- run_simulation(dut, scoreboard_sim(dut), vcd_name='test_compalu.vcd')
-if __name__ == '__main__':
- test_scoreboard()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Mux, Cat, Elaboratable
-from nmutil.latch import SRLatch, latchregister
-""" LOAD / STORE Computation Unit. Also capable of doing ADD and ADD immediate
- This module runs a "revolving door" set of four latches, based on
- * Issue
- * Go_Read
- * Go_Addr
- * Go_Write *OR* Go_Store
- (Note that opc_l has been inverted (and qn used), due to SRLatch
- default reset state being "0" rather than "1")
-# internal opcodes. hypothetically this could do more combinations.
-# meanings:
-# * bit 0: 0 = ADD , 1 = SUB
-# * bit 1: 0 = src1, 1 = IMM
-# * bit 2: 1 = LD
-# * bit 3: 1 = ST
-LDST_OP_ADDI = 0b0000 # plain ADD (src1 + src2)
-LDST_OP_SUBI = 0b0001 # plain SUB (src1 - src2)
-LDST_OP_ADD = 0b0010 # immed ADD (imm + src1)
-LDST_OP_SUB = 0b0011 # immed SUB (imm - src1)
-LDST_OP_ST = 0b0110 # immed ADD plus LD op. ADD result is address
-LDST_OP_LD = 0b1010 # immed ADD plus ST op. ADD result is address
-class LDSTCompUnit(Elaboratable):
- """ LOAD / STORE / ADD / SUB Computation Unit
- Inputs
- ------
- * :rwid: register width
- * :alu: an ALU module
- * :mem: a Memory Module (read-write capable)
- Control Signals (In)
- --------------------
- * :issue_i: LD/ST is being "issued".
- * :isalu_i: ADD/SUB is being "issued" (aka issue_alu_i)
- * :shadown_i: Inverted-shadow is being held (stops STORE *and* WRITE)
- * :go_rd_i: read is being actioned (latches in src regs)
- * :go_ad_i: address is being actioned (triggers actual mem LD)
- * :go_st_i: store is being actioned (triggers actual mem STORE)
- * :go_die_i: resets the unit back to "wait for issue"
- """
- def __init__(self, rwid, opwid, alu, mem):
- self.opwid = opwid
- self.rwid = rwid
- self.alu = alu
- self.mem = mem
- self.counter = Signal(4)
- self.go_rd_i = Signal(reset_less=True) # go read in
- self.go_ad_i = Signal(reset_less=True) # go address in
- self.go_wr_i = Signal(reset_less=True) # go write in
- self.go_st_i = Signal(reset_less=True) # go store in
- self.issue_i = Signal(reset_less=True) # fn issue in
- self.isalu_i = Signal(reset_less=True) # fn issue as ALU in
- self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
- self.go_die_i = Signal() # go die (reset)
- self.oper_i = Signal(opwid, reset_less=True) # opcode in
- self.imm_i = Signal(rwid, reset_less=True) # immediate in
- self.src1_i = Signal(rwid, reset_less=True) # oper1 in
- self.src2_i = Signal(rwid, reset_less=True) # oper2 in
- self.busy_o = Signal(reset_less=True) # fn busy out
- self.rd_rel_o = Signal(reset_less=True) # request src1/src2
- self.adr_rel_o = Signal(reset_less=True) # request address (from mem)
- self.sto_rel_o = Signal(reset_less=True) # request store (to mem)
- self.req_rel_o = Signal(reset_less=True) # request write (result)
- self.data_o = Signal(rwid, reset_less=True) # Dest out (LD or ALU)
- self.addr_o = Signal(rwid, reset_less=True) # Address out (LD or ST)
- # hmm... TODO... move these to outside of LDSTCompUnit
- self.load_mem_o = Signal(reset_less=True) # activate memory LOAD
- self.stwd_mem_o = Signal(reset_less=True) # activate memory STORE
- self.ld_o = Signal(reset_less=True) # operation is a LD
- self.st_o = Signal(reset_less=True) # operation is a ST
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- sync = m.d.sync
- m.submodules.alu = self.alu
- m.submodules.src_l = src_l = SRLatch(sync=False)
- m.submodules.opc_l = opc_l = SRLatch(sync=False)
- m.submodules.adr_l = adr_l = SRLatch(sync=False)
- m.submodules.req_l = req_l = SRLatch(sync=False)
- m.submodules.sto_l = sto_l = SRLatch(sync=False)
- # shadow/go_die
- reset_b = Signal(reset_less=True)
- reset_w = Signal(reset_less=True)
- reset_a = Signal(reset_less=True)
- reset_s = Signal(reset_less=True)
- reset_r = Signal(reset_less=True)
- comb += reset_b.eq(self.go_st_i | self.go_wr_i | self.go_die_i)
- comb += reset_w.eq(self.go_wr_i | self.go_die_i)
- comb += reset_s.eq(self.go_st_i | self.go_die_i)
- comb += reset_r.eq(self.go_rd_i | self.go_die_i)
- # this one is slightly different, issue_alu_i selects go_wr_i)
- a_sel = Mux(self.isalu_i, self.go_wr_i, self.go_ad_i)
- comb += reset_a.eq(a_sel| self.go_die_i)
- # opcode decode
- op_alu = Signal(reset_less=True)
- op_is_ld = Signal(reset_less=True)
- op_is_st = Signal(reset_less=True)
- op_ldst = Signal(reset_less=True)
- op_is_imm = Signal(reset_less=True)
- # select immediate or src2 reg to add
- src2_or_imm = Signal(self.rwid, reset_less=True)
- src_sel = Signal(reset_less=True)
- # issue can be either issue_i or issue_alu_i (isalu_i)
- issue_i = Signal(reset_less=True)
- comb += issue_i.eq(self.issue_i | self.isalu_i)
- # Ripple-down the latches, each one set cancels the previous.
- # NOTE: use sync to stop combinatorial loops.
- # opcode latch - inverted so that busy resets to 0
- sync += opc_l.s.eq(issue_i) # XXX NOTE: INVERTED FROM book!
- sync += opc_l.r.eq(reset_b) # XXX NOTE: INVERTED FROM book!
- # src operand latch
- sync += src_l.s.eq(issue_i)
- sync += src_l.r.eq(reset_r)
- # addr latch
- sync += adr_l.s.eq(self.go_rd_i)
- sync += adr_l.r.eq(reset_a)
- # dest operand latch
- sync += req_l.s.eq(self.go_ad_i)
- sync += req_l.r.eq(reset_w)
- # store latch
- sync += sto_l.s.eq(self.go_ad_i)
- sync += sto_l.r.eq(reset_s)
- # outputs: busy and release signals
- busy_o = self.busy_o
- comb += self.busy_o.eq(opc_l.q) # busy out
- comb += self.rd_rel_o.eq(src_l.q & busy_o) # src1/src2 req rel
- comb += self.sto_rel_o.eq(sto_l.q & busy_o & self.shadown_i & op_is_st)
- # request release enabled based on if op is a LD/ST or a plain ALU
- # if op is an ADD/SUB or a LD, req_rel activates.
- wr_q = Signal(reset_less=True)
- comb += wr_q.eq(req_l.q & (~op_ldst | op_is_ld))
- alulatch = Signal(reset_less=True)
- comb += alulatch.eq((op_ldst & self.adr_rel_o) | \
- (~op_ldst & self.req_rel_o))
- # only proceed if ALU says its output is valid
- with m.If(self.alu.n_valid_o):
- # write req release out. waits until shadow is dropped.
- comb += self.req_rel_o.eq(wr_q & busy_o & self.shadown_i)
- # address release only happens on LD/ST, and is shadowed.
- comb += self.adr_rel_o.eq(adr_l.q & op_ldst & busy_o & \
- self.shadown_i)
- # when output latch is ready, and ALU says ready, accept ALU output
- with m.If(self.req_rel_o):
- m.d.comb += self.alu.n_ready_i.eq(1) # tells ALU "thanks got it"
- # select immediate if opcode says so. however also change the latch
- # to trigger *from* the opcode latch instead.
- comb += src_sel.eq(Mux(op_is_imm, opc_l.qn, src_l.q))
- comb += src2_or_imm.eq(Mux(op_is_imm, self.imm_i, self.src2_i))
- # create a latch/register for src1/src2 (include immediate select)
- latchregister(m, self.src1_i, self.alu.a, src_l.q)
- latchregister(m, src2_or_imm, self.alu.b, src_sel)
- # create a latch/register for the operand
- oper_r = Signal(self.opwid, reset_less=True) # Dest register
- latchregister(m, self.oper_i, oper_r, self.issue_i)
- alu_op = Cat(op_alu, 0, op_is_imm) # using alu_hier, here.
- comb += self.alu.op.eq(alu_op)
- # and one for the output from the ALU
- data_r = Signal(self.rwid, reset_less=True) # Dest register
- latchregister(m, self.alu.o, data_r, alulatch)
- # decode bits of operand (latched)
- comb += op_alu.eq(oper_r[0])
- comb += op_is_imm.eq(oper_r[1])
- comb += op_is_ld.eq(oper_r[2])
- comb += op_is_st.eq(oper_r[3])
- comb += op_ldst.eq(op_is_ld | op_is_st)
- comb += self.load_mem_o.eq(op_is_ld & self.go_ad_i)
- comb += self.stwd_mem_o.eq(op_is_st & self.go_st_i)
- comb += self.ld_o.eq(op_is_ld)
- comb += self.st_o.eq(op_is_st)
- # on a go_read, tell the ALU we're accepting data.
- # NOTE: this spells TROUBLE if the ALU isn't ready!
- # go_read is only valid for one clock!
- with m.If(self.go_rd_i): # src operands ready, GO!
- with m.If(~self.alu.p_ready_o): # no ACK yet
- m.d.comb += self.alu.p_valid_i.eq(1) # so indicate valid
- # put the register directly onto the output bus on a go_write
- with m.If(self.go_wr_i):
- comb += self.data_o.eq(data_r)
- # put the register directly onto the address bus
- with m.If(self.go_ad_i):
- comb += self.addr_o.eq(data_r)
- return m
- def __iter__(self):
- yield self.go_rd_i
- yield self.go_ad_i
- yield self.go_wr_i
- yield self.go_st_i
- yield self.issue_i
- yield self.isalu_i
- yield self.shadown_i
- yield self.go_die_i
- yield self.oper_i
- yield self.imm_i
- yield self.src1_i
- yield self.src2_i
- yield self.busy_o
- yield self.rd_rel_o
- yield self.adr_rel_o
- yield self.sto_rel_o
- yield self.req_rel_o
- yield self.data_o
- yield self.load_mem_o
- yield self.stwd_mem_o
- def ports(self):
- return list(self)
-def scoreboard_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_read_i.eq(1)
- yield
- yield dut.go_read_i.eq(0)
- yield
- yield dut.go_write_i.eq(1)
- yield
- yield dut.go_write_i.eq(0)
- yield
-def test_scoreboard():
- from alu_hier import ALU
- alu = ALU(16)
- mem = alu # fake
- dut = LDSTCompUnit(16, 4, alu, mem)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_ldst_comp.il", "w") as f:
- f.write(vl)
- run_simulation(dut, scoreboard_sim(dut), vcd_name='test_ldst_comp.vcd')
-if __name__ == '__main__':
- test_scoreboard()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
-from regfile.regfile import RegFileArray, treereduce
-from scoreboard.fn_unit import IntFnUnit, FPFnUnit, LDFnUnit, STFnUnit
-from scoreboard.fu_fu_matrix import FUFUDepMatrix
-from scoreboard.fu_reg_matrix import FURegDepMatrix
-from scoreboard.global_pending import GlobalPending
-from scoreboard.group_picker import GroupPicker
-from scoreboard.issue_unit import IntFPIssueUnit, RegDecode
-from compalu import ComputationUnitNoDelay
-from alu_hier import ALU
-from nmutil.latch import SRLatch
-from random import randint
-class Scoreboard(Elaboratable):
- def __init__(self, rwid, n_regs):
- """ Inputs:
- * :rwid: bit width of register file(s) - both FP and INT
- * :n_regs: depth of register file(s) - number of FP and INT regs
- """
- self.rwid = rwid
- self.n_regs = n_regs
- # Register Files
- self.intregs = RegFileArray(rwid, n_regs)
- self.fpregs = RegFileArray(rwid, n_regs)
- # inputs
- self.int_store_i = Signal(reset_less=True) # instruction is a store
- self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
- self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
- self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
- self.issue_o = Signal(reset_less=True) # instruction was accepted
- def elaborate(self, platform):
- m = Module()
- m.submodules.intregs = self.intregs
- m.submodules.fpregs = self.fpregs
- # register ports
- int_dest = self.intregs.write_port("dest")
- int_src1 = self.intregs.read_port("src1")
- int_src2 = self.intregs.read_port("src2")
- fp_dest = self.fpregs.write_port("dest")
- fp_src1 = self.fpregs.read_port("src1")
- fp_src2 = self.fpregs.read_port("src2")
- # Int ALUs
- add = ALU(self.rwid)
- sub = ALU(self.rwid)
- m.submodules.comp1 = comp1 = ComputationUnitNoDelay(self.rwid, 1, add)
- m.submodules.comp2 = comp2 = ComputationUnitNoDelay(self.rwid, 1, sub)
- int_alus = [comp1, comp2]
- m.d.comb += comp1.oper_i.eq(Const(0)) # temporary/experiment: op=add
- m.d.comb += comp2.oper_i.eq(Const(1)) # temporary/experiment: op=sub
- # Int FUs
- if_l = []
- int_src1_pend_v = []
- int_src2_pend_v = []
- int_rd_pend_v = []
- int_wr_pend_v = []
- for i, a in enumerate(int_alus):
- # set up Integer Function Unit, add to module (and python list)
- fu = IntFnUnit(self.n_regs, shadow_wid=0)
- setattr(m.submodules, "intfu%d" % i, fu)
- if_l.append(fu)
- # collate the read/write pending vectors (to go into global pending)
- int_src1_pend_v.append(fu.src1_pend_o)
- int_src2_pend_v.append(fu.src2_pend_o)
- int_rd_pend_v.append(fu.int_rd_pend_o)
- int_wr_pend_v.append(fu.int_wr_pend_o)
- int_fus = Array(if_l)
- # Count of number of FUs
- n_int_fus = len(if_l)
- n_fp_fus = 0 # for now
- n_fus = n_int_fus + n_fp_fus # plus FP FUs
- # XXX replaced by array of FUs? *FnUnit
- # # Integer FU-FU Dep Matrix
- # m.submodules.intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
- # Integer FU-Reg Dep Matrix
- # intregdeps = FURegDepMatrix(self.n_regs, n_int_fus)
- # m.submodules.intregdeps = intregdeps
- # Integer Priority Picker 1: Adder + Subtractor
- intpick1 = GroupPicker(2) # picks between add and sub
- m.submodules.intpick1 = intpick1
- # Global Pending Vectors (INT and FP)
- # NOTE: number of vectors is NOT same as number of FUs.
- g_int_src1_pend_v = GlobalPending(self.n_regs, int_src1_pend_v)
- g_int_src2_pend_v = GlobalPending(self.n_regs, int_src2_pend_v)
- g_int_rd_pend_v = GlobalPending(self.n_regs, int_rd_pend_v, True)
- g_int_wr_pend_v = GlobalPending(self.n_regs, int_wr_pend_v, True)
- m.submodules.g_int_src1_pend_v = g_int_src1_pend_v
- m.submodules.g_int_src2_pend_v = g_int_src2_pend_v
- m.submodules.g_int_rd_pend_v = g_int_rd_pend_v
- m.submodules.g_int_wr_pend_v = g_int_wr_pend_v
- # INT/FP Issue Unit
- regdecode = RegDecode(self.n_regs)
- m.submodules.regdecode = regdecode
- issueunit = IntFPIssueUnit(self.n_regs, n_int_fus, n_fp_fus)
- m.submodules.issueunit = issueunit
- # FU-FU Dependency Matrices
- intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
- m.submodules.intfudeps = intfudeps
- #---------
- # ok start wiring things together...
- # "now hear de word of de looord... dem bones dem bones dem dryy bones"
- # https://www.youtube.com/watch?v=pYb8Wm6-QfA
- #---------
- #---------
- # Issue Unit is where it starts. set up some in/outs for this module
- #---------
- m.d.comb += [issueunit.i.store_i.eq(self.int_store_i),
- regdecode.dest_i.eq(self.int_dest_i),
- regdecode.src1_i.eq(self.int_src1_i),
- regdecode.src2_i.eq(self.int_src2_i),
- regdecode.enable_i.eq(1),
- self.issue_o.eq(issueunit.issue_o),
- issueunit.i.dest_i.eq(regdecode.dest_o),
- ]
- self.int_insn_i = issueunit.i.insn_i # enabled by instruction decode
- # connect global rd/wr pending vectors
- m.d.comb += issueunit.i.g_wr_pend_i.eq(g_int_wr_pend_v.g_pend_o)
- # TODO: issueunit.f (FP)
- # and int function issue / busy arrays, and dest/src1/src2
- fn_issue_l = []
- fn_busy_l = []
- for i, fu in enumerate(if_l):
- fn_issue_l.append(fu.issue_i)
- fn_busy_l.append(fu.busy_o)
- m.d.sync += fu.issue_i.eq(issueunit.i.fn_issue_o[i])
- m.d.sync += fu.dest_i.eq(self.int_dest_i)
- m.d.sync += fu.src1_i.eq(self.int_src1_i)
- m.d.sync += fu.src2_i.eq(self.int_src2_i)
- # XXX sync, so as to stop a simulation infinite loop
- m.d.comb += issueunit.i.busy_i[i].eq(fu.busy_o)
- #---------
- # connect Function Units
- #---------
- # Group Picker... done manually for now. TODO: cat array of pick sigs
- m.d.comb += if_l[0].go_rd_i.eq(intpick1.go_rd_o[0]) # add rd
- m.d.comb += if_l[0].go_wr_i.eq(intpick1.go_wr_o[0]) # add wr
- m.d.comb += if_l[1].go_rd_i.eq(intpick1.go_rd_o[1]) # subtract rd
- m.d.comb += if_l[1].go_wr_i.eq(intpick1.go_wr_o[1]) # subtract wr
- # create read-pending FU-FU vectors
- intfu_rd_pend_v = Signal(n_int_fus, reset_less = True)
- intfu_wr_pend_v = Signal(n_int_fus, reset_less = True)
- for i in range(n_int_fus):
- #m.d.comb += intfu_rd_pend_v[i].eq(if_l[i].int_rd_pend_o.bool())
- #m.d.comb += intfu_wr_pend_v[i].eq(if_l[i].int_wr_pend_o.bool())
- m.d.comb += intfu_rd_pend_v[i].eq(if_l[i].int_readable_o)
- m.d.comb += intfu_wr_pend_v[i].eq(if_l[i].int_writable_o)
- # Connect INT Fn Unit global wr/rd pending
- for fu in if_l:
- m.d.comb += fu.g_int_wr_pend_i.eq(g_int_wr_pend_v.g_pend_o)
- m.d.comb += fu.g_int_rd_pend_i.eq(g_int_rd_pend_v.g_pend_o)
- # Connect FU-FU Matrix, NOTE: FN Units readable/writable considered
- # to be unit "read-pending / write-pending"
- m.d.comb += intfudeps.rd_pend_i.eq(intfu_rd_pend_v)
- m.d.comb += intfudeps.wr_pend_i.eq(intfu_wr_pend_v)
- m.d.comb += intfudeps.issue_i.eq(issueunit.i.fn_issue_o)
- for i in range(n_int_fus):
- m.d.comb += intfudeps.go_rd_i[i].eq(intpick1.go_rd_o[i])
- m.d.comb += intfudeps.go_wr_i[i].eq(intpick1.go_wr_o[i])
- # Connect Picker (note connection to FU-FU)
- #---------
- readable_o = intfudeps.readable_o
- writable_o = intfudeps.writable_o
- m.d.comb += intpick1.rd_rel_i[0].eq(int_alus[0].rd_rel_o)
- m.d.comb += intpick1.rd_rel_i[1].eq(int_alus[1].rd_rel_o)
- m.d.comb += intpick1.req_rel_i[0].eq(int_alus[0].req_rel_o)
- m.d.comb += intpick1.req_rel_i[1].eq(int_alus[1].req_rel_o)
- m.d.comb += intpick1.readable_i[0].eq(readable_o[0]) # add rd
- m.d.comb += intpick1.writable_i[0].eq(writable_o[0]) # add wr
- m.d.comb += intpick1.readable_i[1].eq(readable_o[1]) # sub rd
- m.d.comb += intpick1.writable_i[1].eq(writable_o[1]) # sub wr
- #---------
- # Connect Register File(s)
- #---------
- #with m.If(if_l[0].go_wr_i | if_l[1].go_wr_i):
- m.d.sync += int_dest.wen.eq(g_int_wr_pend_v.g_pend_o)
- #with m.If(intpick1.go_rd_o):
- #with m.If(if_l[0].go_rd_i | if_l[1].go_rd_i):
- m.d.sync += int_src1.ren.eq(g_int_src1_pend_v.g_pend_o)
- m.d.sync += int_src2.ren.eq(g_int_src2_pend_v.g_pend_o)
- # merge (OR) all integer FU / ALU outputs to a single value
- # bit of a hack: treereduce needs a list with an item named "dest_o"
- dest_o = treereduce(int_alus)
- m.d.sync += int_dest.data_i.eq(dest_o)
- # connect ALUs
- for i, alu in enumerate(int_alus):
- m.d.comb += alu.go_rd_i.eq(intpick1.go_rd_o[i])
- m.d.comb += alu.go_wr_i.eq(intpick1.go_wr_o[i])
- m.d.comb += alu.issue_i.eq(fn_issue_l[i])
- #m.d.comb += fn_busy_l[i].eq(alu.busy_o) # XXX ignore, use fnissue
- m.d.comb += alu.src1_i.eq(int_src1.data_o)
- m.d.comb += alu.src2_i.eq(int_src2.data_o)
- m.d.comb += if_l[i].req_rel_i.eq(alu.req_rel_o) # pipe out ready
- return m
- def __iter__(self):
- yield from self.intregs
- yield from self.fpregs
- yield self.int_store_i
- yield self.int_dest_i
- yield self.int_src1_i
- yield self.int_src2_i
- yield self.issue_o
- #yield from self.int_src1
- #yield from self.int_dest
- #yield from self.int_src1
- #yield from self.int_src2
- #yield from self.fp_dest
- #yield from self.fp_src1
- #yield from self.fp_src2
- def ports(self):
- return list(self)
-IADD = 0
-ISUB = 1
-class RegSim:
- def __init__(self, rwidth, nregs):
- self.rwidth = rwidth
- self.regs = [0] * nregs
- def op(self, op, src1, src2, dest):
- src1 = self.regs[src1]
- src2 = self.regs[src2]
- if op == IADD:
- val = (src1 + src2) & ((1<<(self.rwidth))-1)
- elif op == ISUB:
- val = (src1 - src2) & ((1<<(self.rwidth))-1)
- self.regs[dest] = val
- def setval(self, dest, val):
- self.regs[dest] = val
- def dump(self, dut):
- for i, val in enumerate(self.regs):
- reg = yield dut.intregs.regs[i].reg
- okstr = "OK" if reg == val else "!ok"
- print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
- def check(self, dut):
- for i, val in enumerate(self.regs):
- reg = yield dut.intregs.regs[i].reg
- if reg != val:
- print("reg %d expected %x received %x\n" % (i, val, reg))
- yield from self.dump(dut)
- assert False
-def int_instr(dut, alusim, op, src1, src2, dest):
- for i in range(len(dut.int_insn_i)):
- yield dut.int_insn_i[i].eq(0)
- yield dut.int_dest_i.eq(dest)
- yield dut.int_src1_i.eq(src1)
- yield dut.int_src2_i.eq(src2)
- yield dut.int_insn_i[op].eq(1)
- alusim.op(op, src1, src2, dest)
-def print_reg(dut, rnums):
- rs = []
- for rnum in rnums:
- reg = yield dut.intregs.regs[rnum].reg
- rs.append("%x" % reg)
- rnums = map(str, rnums)
- print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
-def scoreboard_sim(dut, alusim):
- yield dut.int_store_i.eq(0)
- for i in range(1, dut.n_regs):
- yield dut.intregs.regs[i].reg.eq(i)
- alusim.setval(i, i)
- if False:
- yield from int_instr(dut, alusim, IADD, 4, 3, 5)
- yield from print_reg(dut, [3,4,5])
- yield
- yield from int_instr(dut, alusim, IADD, 5, 2, 5)
- yield from print_reg(dut, [3,4,5])
- yield
- yield from int_instr(dut, alusim, ISUB, 5, 1, 3)
- yield from print_reg(dut, [3,4,5])
- yield
- for i in range(len(dut.int_insn_i)):
- yield dut.int_insn_i[i].eq(0)
- yield from print_reg(dut, [3,4,5])
- yield
- yield from print_reg(dut, [3,4,5])
- yield
- yield from print_reg(dut, [3,4,5])
- yield
- yield from alusim.check(dut)
- for i in range(2):
- src1 = randint(1, dut.n_regs-1)
- src2 = randint(1, dut.n_regs-1)
- while True:
- dest = randint(1, dut.n_regs-1)
- break
- if dest not in [src1, src2]:
- break
- op = randint(0, 1)
- if False:
- if i % 2 == 0:
- src1 = 6
- src2 = 6
- dest = 1
- else:
- src1 = 1
- src2 = 7
- dest = 2
- #src1 = 2
- #src2 = 3
- #dest = 2
- op = i
- if True:
- if i == 0:
- src1 = 2
- src2 = 3
- dest = 3
- else:
- src1 = 5
- src2 = 3
- dest = 4
- #op = (i+1) % 2
- op = i
- print ("random %d: %d %d %d %d\n" % (i, op, src1, src2, dest))
- yield from int_instr(dut, alusim, op, src1, src2, dest)
- yield from print_reg(dut, [3,4,5])
- while True:
- yield
- issue_o = yield dut.issue_o
- if issue_o:
- yield from print_reg(dut, [3,4,5])
- for i in range(len(dut.int_insn_i)):
- yield dut.int_insn_i[i].eq(0)
- break
- print ("busy",)
- yield from print_reg(dut, [3,4,5])
- yield
- yield
- yield
- yield
- yield from print_reg(dut, [3,4,5])
- yield
- yield from print_reg(dut, [3,4,5])
- yield
- yield from print_reg(dut, [3,4,5])
- yield
- yield from print_reg(dut, [3,4,5])
- yield
- yield
- yield
- yield
- yield
- yield
- yield
- yield
- yield
- yield from alusim.check(dut)
- yield from alusim.dump(dut)
-def explore_groups(dut):
- from nmigen.hdl.ir import Fragment
- from nmigen.hdl.xfrm import LHSGroupAnalyzer
- fragment = dut.elaborate(platform=None)
- fr = Fragment.get(fragment, platform=None)
- groups = LHSGroupAnalyzer()(fragment._statements)
- print (groups)
-def test_scoreboard():
- dut = Scoreboard(16, 8)
- alusim = RegSim(16, 8)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_scoreboard.il", "w") as f:
- f.write(vl)
- run_simulation(dut, scoreboard_sim(dut, alusim),
- vcd_name='test_scoreboard.vcd')
-if __name__ == '__main__':
- test_scoreboard()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen.hdl.ast import unsigned
-from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
-from regfile.regfile import RegFileArray, treereduce
-from scoreboard.fu_fu_matrix import FUFUDepMatrix
-from scoreboard.fu_reg_matrix import FURegDepMatrix
-from scoreboard.global_pending import GlobalPending
-from scoreboard.group_picker import GroupPicker
-from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
-from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
-from scoreboard.instruction_q import Instruction, InstructionQ
-from scoreboard.memfu import MemFunctionUnits
-from compalu import ComputationUnitNoDelay
-from compldst import LDSTCompUnit
-from alu_hier import ALU, BranchALU
-from nmutil.latch import SRLatch
-from nmutil.nmoperator import eq
-from random import randint, seed
-from copy import deepcopy
-from math import log
-class TestMemory(Elaboratable):
- def __init__(self, regwid, addrw):
- self.ddepth = 1 # regwid //8
- depth = (1<<addrw) // self.ddepth
- self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
- def elaborate(self, platform):
- m = Module()
- m.submodules.rdport = self.rdport = self.mem.read_port()
- m.submodules.wrport = self.wrport = self.mem.write_port()
- return m
-class MemSim:
- def __init__(self, regwid, addrw):
- self.regwid = regwid
- self.ddepth = 1 # regwid//8
- depth = (1<<addrw) // self.ddepth
- self.mem = list(range(0, depth))
- def ld(self, addr):
- return self.mem[addr>>self.ddepth]
- def st(self, addr, data):
- self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
-class CompUnitsBase(Elaboratable):
- """ Computation Unit Base class.
- Amazingly, this class works recursively. It's supposed to just
- look after some ALUs (that can handle the same operations),
- grouping them together, however it turns out that the same code
- can also group *groups* of Computation Units together as well.
- Basically it was intended just to concatenate the ALU's issue,
- go_rd etc. signals together, which start out as bits and become
- sequences. Turns out that the same trick works just as well
- on Computation Units!
- So this class may be used recursively to present a top-level
- sequential concatenation of all the signals in and out of
- ALUs, whilst at the same time making it convenient to group
- ALUs together.
- At the lower level, the intent is that groups of (identical)
- ALUs may be passed the same operation. Even beyond that,
- the intent is that that group of (identical) ALUs actually
- share the *same pipeline* and as such become a "Concurrent
- Computation Unit" as defined by Mitch Alsup (see section
- """
- def __init__(self, rwid, units, ldstmode=False):
- """ Inputs:
- * :rwid: bit width of register file(s) - both FP and INT
- * :units: sequence of ALUs (or CompUnitsBase derivatives)
- """
- self.units = units
- self.ldstmode = ldstmode
- self.rwid = rwid
- self.rwid = rwid
- if units and isinstance(units[0], CompUnitsBase):
- self.n_units = 0
- for u in self.units:
- self.n_units += u.n_units
- else:
- self.n_units = len(units)
- n_units = self.n_units
- # inputs
- self.issue_i = Signal(n_units, reset_less=True)
- self.go_rd_i = Signal(n_units, reset_less=True)
- self.go_wr_i = Signal(n_units, reset_less=True)
- self.shadown_i = Signal(n_units, reset_less=True)
- self.go_die_i = Signal(n_units, reset_less=True)
- if ldstmode:
- self.go_ad_i = Signal(n_units, reset_less=True)
- self.go_st_i = Signal(n_units, reset_less=True)
- # outputs
- self.busy_o = Signal(n_units, reset_less=True)
- self.rd_rel_o = Signal(n_units, reset_less=True)
- self.req_rel_o = Signal(n_units, reset_less=True)
- if ldstmode:
- self.ld_o = Signal(n_units, reset_less=True) # op is LD
- self.st_o = Signal(n_units, reset_less=True) # op is ST
- self.adr_rel_o = Signal(n_units, reset_less=True)
- self.sto_rel_o = Signal(n_units, reset_less=True)
- self.req_rel_o = Signal(n_units, reset_less=True)
- self.load_mem_o = Signal(n_units, reset_less=True)
- self.stwd_mem_o = Signal(n_units, reset_less=True)
- self.addr_o = Signal(rwid, reset_less=True)
- # in/out register data (note: not register#, actual data)
- self.data_o = Signal(rwid, reset_less=True)
- self.src1_i = Signal(rwid, reset_less=True)
- self.src2_i = Signal(rwid, reset_less=True)
- # input operand
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- for i, alu in enumerate(self.units):
- setattr(m.submodules, "comp%d" % i, alu)
- go_rd_l = []
- go_wr_l = []
- issue_l = []
- busy_l = []
- req_rel_l = []
- rd_rel_l = []
- shadow_l = []
- godie_l = []
- for alu in self.units:
- req_rel_l.append(alu.req_rel_o)
- rd_rel_l.append(alu.rd_rel_o)
- shadow_l.append(alu.shadown_i)
- godie_l.append(alu.go_die_i)
- go_wr_l.append(alu.go_wr_i)
- go_rd_l.append(alu.go_rd_i)
- issue_l.append(alu.issue_i)
- busy_l.append(alu.busy_o)
- comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
- comb += self.req_rel_o.eq(Cat(*req_rel_l))
- comb += self.busy_o.eq(Cat(*busy_l))
- comb += Cat(*godie_l).eq(self.go_die_i)
- comb += Cat(*shadow_l).eq(self.shadown_i)
- comb += Cat(*go_wr_l).eq(self.go_wr_i)
- comb += Cat(*go_rd_l).eq(self.go_rd_i)
- comb += Cat(*issue_l).eq(self.issue_i)
- # connect data register input/output
- # merge (OR) all integer FU / ALU outputs to a single value
- if self.units:
- data_o = treereduce(self.units, "data_o")
- comb += self.data_o.eq(data_o)
- if self.ldstmode:
- addr_o = treereduce(self.units, "addr_o")
- comb += self.addr_o.eq(addr_o)
- for i, alu in enumerate(self.units):
- comb += alu.src1_i.eq(self.src1_i)
- comb += alu.src2_i.eq(self.src2_i)
- if not self.ldstmode:
- return m
- ldmem_l = []
- stmem_l = []
- go_ad_l = []
- go_st_l = []
- ld_l = []
- st_l = []
- adr_rel_l = []
- sto_rel_l = []
- for alu in self.units:
- ld_l.append(alu.ld_o)
- st_l.append(alu.st_o)
- adr_rel_l.append(alu.adr_rel_o)
- sto_rel_l.append(alu.sto_rel_o)
- ldmem_l.append(alu.load_mem_o)
- stmem_l.append(alu.stwd_mem_o)
- go_ad_l.append(alu.go_ad_i)
- go_st_l.append(alu.go_st_i)
- comb += self.ld_o.eq(Cat(*ld_l))
- comb += self.st_o.eq(Cat(*st_l))
- comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
- comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
- comb += self.load_mem_o.eq(Cat(*ldmem_l))
- comb += self.stwd_mem_o.eq(Cat(*stmem_l))
- comb += Cat(*go_ad_l).eq(self.go_ad_i)
- comb += Cat(*go_st_l).eq(self.go_st_i)
- return m
-class CompUnitLDSTs(CompUnitsBase):
- def __init__(self, rwid, opwid, n_ldsts, mem):
- """ Inputs:
- * :rwid: bit width of register file(s) - both FP and INT
- * :opwid: operand bit width
- """
- self.opwid = opwid
- # inputs
- self.oper_i = Signal(opwid, reset_less=True)
- self.imm_i = Signal(rwid, reset_less=True)
- # Int ALUs
- self.alus = []
- for i in range(n_ldsts):
- self.alus.append(ALU(rwid))
- units = []
- for alu in self.alus:
- aluopwid = 4 # see compldst.py for "internal" opcode
- units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
- CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
- def elaborate(self, platform):
- m = CompUnitsBase.elaborate(self, platform)
- comb = m.d.comb
- # hand the same operation to all units, 4 lower bits though
- for alu in self.units:
- comb += alu.oper_i[0:4].eq(self.oper_i)
- comb += alu.imm_i.eq(self.imm_i)
- comb += alu.isalu_i.eq(0)
- return m
-class CompUnitALUs(CompUnitsBase):
- def __init__(self, rwid, opwid, n_alus):
- """ Inputs:
- * :rwid: bit width of register file(s) - both FP and INT
- * :opwid: operand bit width
- """
- self.opwid = opwid
- # inputs
- self.oper_i = Signal(opwid, reset_less=True)
- self.imm_i = Signal(rwid, reset_less=True)
- # Int ALUs
- alus = []
- for i in range(n_alus):
- alus.append(ALU(rwid))
- units = []
- for alu in alus:
- aluopwid = 3 # extra bit for immediate mode
- units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
- CompUnitsBase.__init__(self, rwid, units)
- def elaborate(self, platform):
- m = CompUnitsBase.elaborate(self, platform)
- comb = m.d.comb
- # hand the same operation to all units, only lower 3 bits though
- for alu in self.units:
- comb += alu.oper_i[0:3].eq(self.oper_i)
- comb += alu.imm_i.eq(self.imm_i)
- return m
-class CompUnitBR(CompUnitsBase):
- def __init__(self, rwid, opwid):
- """ Inputs:
- * :rwid: bit width of register file(s) - both FP and INT
- * :opwid: operand bit width
- Note: bgt unit is returned so that a shadow unit can be created
- for it
- """
- self.opwid = opwid
- # inputs
- self.oper_i = Signal(opwid, reset_less=True)
- self.imm_i = Signal(rwid, reset_less=True)
- # Branch ALU and CU
- self.bgt = BranchALU(rwid)
- aluopwid = 3 # extra bit for immediate mode
- self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
- CompUnitsBase.__init__(self, rwid, [self.br1])
- def elaborate(self, platform):
- m = CompUnitsBase.elaborate(self, platform)
- comb = m.d.comb
- # hand the same operation to all units
- for alu in self.units:
- comb += alu.oper_i.eq(self.oper_i)
- comb += alu.imm_i.eq(self.imm_i)
- return m
-class FunctionUnits(Elaboratable):
- def __init__(self, n_regs, n_int_alus):
- self.n_regs = n_regs
- self.n_int_alus = n_int_alus
- self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
- self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
- self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
- self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
- self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
- self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
- self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
- self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
- self.readable_o = Signal(n_int_alus, reset_less=True)
- self.writable_o = Signal(n_int_alus, reset_less=True)
- self.go_rd_i = Signal(n_int_alus, reset_less=True)
- self.go_wr_i = Signal(n_int_alus, reset_less=True)
- self.go_die_i = Signal(n_int_alus, reset_less=True)
- self.fn_issue_i = Signal(n_int_alus, reset_less=True)
- # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- sync = m.d.sync
- n_intfus = self.n_int_alus
- # Integer FU-FU Dep Matrix
- intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
- m.submodules.intfudeps = intfudeps
- # Integer FU-Reg Dep Matrix
- intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
- m.submodules.intregdeps = intregdeps
- comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
- comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
- comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
- comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
- comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
- comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
- self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
- comb += intfudeps.issue_i.eq(self.fn_issue_i)
- comb += intfudeps.go_rd_i.eq(self.go_rd_i)
- comb += intfudeps.go_wr_i.eq(self.go_wr_i)
- comb += intfudeps.go_die_i.eq(self.go_die_i)
- comb += self.readable_o.eq(intfudeps.readable_o)
- comb += self.writable_o.eq(intfudeps.writable_o)
- # Connect function issue / arrays, and dest/src1/src2
- comb += intregdeps.dest_i.eq(self.dest_i)
- comb += intregdeps.src_i[0].eq(self.src1_i)
- comb += intregdeps.src_i[1].eq(self.src2_i)
- comb += intregdeps.go_rd_i.eq(self.go_rd_i)
- comb += intregdeps.go_wr_i.eq(self.go_wr_i)
- comb += intregdeps.go_die_i.eq(self.go_die_i)
- comb += intregdeps.issue_i.eq(self.fn_issue_i)
- comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
- comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
- comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
- return m
-class Scoreboard(Elaboratable):
- def __init__(self, rwid, n_regs):
- """ Inputs:
- * :rwid: bit width of register file(s) - both FP and INT
- * :n_regs: depth of register file(s) - number of FP and INT regs
- """
- self.rwid = rwid
- self.n_regs = n_regs
- # Register Files
- self.intregs = RegFileArray(rwid, n_regs)
- self.fpregs = RegFileArray(rwid, n_regs)
- # Memory (test for now)
- self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
- # issue q needs to get at these
- self.aluissue = IssueUnitGroup(2)
- self.lsissue = IssueUnitGroup(2)
- self.brissue = IssueUnitGroup(1)
- # and these
- self.alu_oper_i = Signal(4, reset_less=True)
- self.alu_imm_i = Signal(rwid, reset_less=True)
- self.br_oper_i = Signal(4, reset_less=True)
- self.br_imm_i = Signal(rwid, reset_less=True)
- self.ls_oper_i = Signal(4, reset_less=True)
- self.ls_imm_i = Signal(rwid, reset_less=True)
- # inputs
- self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
- self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
- self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
- self.reg_enable_i = Signal(reset_less=True) # enable reg decode
- # outputs
- self.issue_o = Signal(reset_less=True) # instruction was accepted
- self.busy_o = Signal(reset_less=True) # at least one CU is busy
- # for branch speculation experiment. branch_direction = 0 if
- # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
- # branch_succ and branch_fail are requests to have the current
- # instruction be dependent on the branch unit "shadow" capability.
- self.branch_succ_i = Signal(reset_less=True)
- self.branch_fail_i = Signal(reset_less=True)
- self.branch_direction_o = Signal(2, reset_less=True)
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- sync = m.d.sync
- m.submodules.intregs = self.intregs
- m.submodules.fpregs = self.fpregs
- m.submodules.mem = mem = self.mem
- # register ports
- int_dest = self.intregs.write_port("dest")
- int_src1 = self.intregs.read_port("src1")
- int_src2 = self.intregs.read_port("src2")
- fp_dest = self.fpregs.write_port("dest")
- fp_src1 = self.fpregs.read_port("src1")
- fp_src2 = self.fpregs.read_port("src2")
- # Int ALUs and BR ALUs
- n_int_alus = 5
- cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
- cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
- # LDST Comp Units
- n_ldsts = 2
- cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
- # Comp Units
- m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
- bgt = cub.bgt # get at the branch computation unit
- br1 = cub.br1
- # Int FUs
- m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
- # Memory FUs
- m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
- # Memory Priority Picker 1: one gateway per memory port
- mempick1 = GroupPicker(n_ldsts) # picks 1 reader and 1 writer to intreg
- m.submodules.mempick1 = mempick1
- # Count of number of FUs
- n_intfus = n_int_alus
- n_fp_fus = 0 # for now
- # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
- intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
- m.submodules.intpick1 = intpick1
- # INT/FP Issue Unit
- regdecode = RegDecode(self.n_regs)
- m.submodules.regdecode = regdecode
- issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
- m.submodules.issueunit = issueunit
- # Shadow Matrix. currently n_intfus shadows, to be used for
- # write-after-write hazards. NOTE: there is one extra for branches,
- # so the shadow width is increased by 1
- m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
- m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
- # record previous instruction to cast shadow on current instruction
- prev_shadow = Signal(n_intfus)
- # Branch Speculation recorder. tracks the success/fail state as
- # each instruction is issued, so that when the branch occurs the
- # allow/cancel can be issued as appropriate.
- m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
- #---------
- # ok start wiring things together...
- # "now hear de word of de looord... dem bones dem bones dem dryy bones"
- # https://www.youtube.com/watch?v=pYb8Wm6-QfA
- #---------
- #---------
- # Issue Unit is where it starts. set up some in/outs for this module
- #---------
- comb += [ regdecode.dest_i.eq(self.int_dest_i),
- regdecode.src1_i.eq(self.int_src1_i),
- regdecode.src2_i.eq(self.int_src2_i),
- regdecode.enable_i.eq(self.reg_enable_i),
- self.issue_o.eq(issueunit.issue_o)
- ]
- # take these to outside (issue needs them)
- comb += cua.oper_i.eq(self.alu_oper_i)
- comb += cua.imm_i.eq(self.alu_imm_i)
- comb += cub.oper_i.eq(self.br_oper_i)
- comb += cub.imm_i.eq(self.br_imm_i)
- comb += cul.oper_i.eq(self.ls_oper_i)
- comb += cul.imm_i.eq(self.ls_imm_i)
- # TODO: issueunit.f (FP)
- # and int function issue / busy arrays, and dest/src1/src2
- comb += intfus.dest_i.eq(regdecode.dest_o)
- comb += intfus.src1_i.eq(regdecode.src1_o)
- comb += intfus.src2_i.eq(regdecode.src2_o)
- fn_issue_o = issueunit.fn_issue_o
- comb += intfus.fn_issue_i.eq(fn_issue_o)
- comb += issueunit.busy_i.eq(cu.busy_o)
- comb += self.busy_o.eq(cu.busy_o.bool())
- #---------
- # Memory Function Unit
- #---------
- reset_b = Signal(cul.n_units, reset_less=True)
- sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
- comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
- comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
- comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
- # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
- # in a transitive fashion). This cycle activates based on LDSTCompUnit
- # issue_i. multi-issue gets a bit more complex but not a lot.
- prior_ldsts = Signal(cul.n_units, reset_less=True)
- sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
- with m.If(self.ls_oper_i[2]): # LD bit of operand
- comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
- with m.If(self.ls_oper_i[3]): # ST bit of operand
- comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
- # TODO: adr_rel_o needs to go into L1 Cache. for now,
- # just immediately activate go_adr
- comb += cul.go_ad_i.eq(cul.adr_rel_o)
- # connect up address data
- comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
- comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
- # connect loadable / storable to go_ld/go_st.
- # XXX should only be done when the memory ld/st has actually happened!
- go_st_i = Signal(cul.n_units, reset_less=True)
- go_ld_i = Signal(cul.n_units, reset_less=True)
- comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &\
- cul.req_rel_o & cul.ld_o)
- comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
- cul.sto_rel_o & cul.st_o)
- comb += memfus.go_ld_i.eq(go_ld_i)
- comb += memfus.go_st_i.eq(go_st_i)
- #comb += cul.go_wr_i.eq(go_ld_i)
- comb += cul.go_st_i.eq(go_st_i)
- #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
- #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
- #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
- #---------
- # merge shadow matrices outputs
- #---------
- # these are explained in ShadowMatrix docstring, and are to be
- # connected to the FUReg and FUFU Matrices, to get them to reset
- anydie = Signal(n_intfus, reset_less=True)
- allshadown = Signal(n_intfus, reset_less=True)
- shreset = Signal(n_intfus, reset_less=True)
- comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
- comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
- comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
- #---------
- # connect fu-fu matrix
- #---------
- # Group Picker... done manually for now.
- go_rd_o = intpick1.go_rd_o
- go_wr_o = intpick1.go_wr_o
- go_rd_i = intfus.go_rd_i
- go_wr_i = intfus.go_wr_i
- go_die_i = intfus.go_die_i
- # NOTE: connect to the shadowed versions so that they can "die" (reset)
- comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
- comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
- comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
- # Connect Picker
- #---------
- comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
- comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
- int_rd_o = intfus.readable_o
- int_wr_o = intfus.writable_o
- comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
- comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
- #---------
- # Shadow Matrix
- #---------
- comb += shadows.issue_i.eq(fn_issue_o)
- #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
- comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
- #---------
- # NOTE; this setup is for the instruction order preservation...
- # connect shadows / go_dies to Computation Units
- comb += cu.shadown_i[0:n_intfus].eq(allshadown)
- comb += cu.go_die_i[0:n_intfus].eq(anydie)
- # ok connect first n_int_fu shadows to busy lines, to create an
- # instruction-order linked-list-like arrangement, using a bit-matrix
- # (instead of e.g. a ring buffer).
- # when written, the shadow can be cancelled (and was good)
- for i in range(n_intfus):
- comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
- # *previous* instruction shadows *current* instruction, and, obviously,
- # if the previous is completed (!busy) don't cast the shadow!
- comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
- for i in range(n_intfus):
- comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
- #---------
- # ... and this is for branch speculation. it uses the extra bit
- # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
- # only needs to set shadow_i, s_fail_i and s_good_i
- # issue captures shadow_i (if enabled)
- comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
- bactive = Signal(reset_less=True)
- comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
- # instruction being issued (fn_issue_o) has a shadow cast by the branch
- with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
- comb += bshadow.issue_i.eq(fn_issue_o)
- for i in range(n_intfus):
- with m.If(fn_issue_o & (Const(1<<i))):
- comb += bshadow.shadow_i[i][0].eq(1)
- # finally, we need an indicator to the test infrastructure as to
- # whether the branch succeeded or failed, plus, link up to the
- # "recorder" of whether the instruction was under shadow or not
- with m.If(br1.issue_i):
- sync += bspec.active_i.eq(1)
- with m.If(self.branch_succ_i):
- comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
- with m.If(self.branch_fail_i):
- comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
- # branch is active (TODO: a better signal: this is over-using the
- # go_write signal - actually the branch should not be "writing")
- with m.If(br1.go_wr_i):
- sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
- sync += bspec.active_i.eq(0)
- comb += bspec.br_i.eq(1)
- # branch occurs if data == 1, failed if data == 0
- comb += bspec.br_ok_i.eq(br1.data_o == 1)
- for i in range(n_intfus):
- # *expected* direction of the branch matched against *actual*
- comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
- # ... or it didn't
- comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
- #---------
- # Connect Register File(s)
- #---------
- comb += int_dest.wen.eq(intfus.dest_rsel_o)
- comb += int_src1.ren.eq(intfus.src1_rsel_o)
- comb += int_src2.ren.eq(intfus.src2_rsel_o)
- # connect ALUs to regfule
- comb += int_dest.data_i.eq(cu.data_o)
- comb += cu.src1_i.eq(int_src1.data_o)
- comb += cu.src2_i.eq(int_src2.data_o)
- # connect ALU Computation Units
- comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
- comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
- comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
- return m
- def __iter__(self):
- yield from self.intregs
- yield from self.fpregs
- yield self.int_dest_i
- yield self.int_src1_i
- yield self.int_src2_i
- yield self.issue_o
- yield self.branch_succ_i
- yield self.branch_fail_i
- yield self.branch_direction_o
- def ports(self):
- return list(self)
-class IssueToScoreboard(Elaboratable):
- def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
- self.qlen = qlen
- self.n_in = n_in
- self.n_out = n_out
- self.rwid = rwid
- self.opw = opwid
- self.n_regs = n_regs
- mqbits = unsigned(int(log(qlen) / log(2))+2)
- self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
- self.p_ready_o = Signal() # instructions were added
- self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
- self.busy_o = Signal(reset_less=True) # at least one CU is busy
- self.qlen_o = Signal(mqbits, reset_less=True)
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- sync = m.d.sync
- iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
- sc = Scoreboard(self.rwid, self.n_regs)
- m.submodules.iq = iq
- m.submodules.sc = sc
- # get at the regfile for testing
- self.intregs = sc.intregs
- # and the "busy" signal and instruction queue length
- comb += self.busy_o.eq(sc.busy_o)
- comb += self.qlen_o.eq(iq.qlen_o)
- # link up instruction queue
- comb += iq.p_add_i.eq(self.p_add_i)
- comb += self.p_ready_o.eq(iq.p_ready_o)
- for i in range(self.n_in):
- comb += eq(iq.data_i[i], self.data_i[i])
- # take instruction and process it. note that it's possible to
- # "inspect" the queue contents *without* actually removing the
- # items. items are only removed when the
- # in "waiting" state
- wait_issue_br = Signal()
- wait_issue_alu = Signal()
- wait_issue_ls = Signal()
- with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
- # set instruction pop length to 1 if the unit accepted
- with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
- with m.If(iq.qlen_o != 0):
- comb += iq.n_sub_i.eq(1)
- with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
- with m.If(iq.qlen_o != 0):
- comb += iq.n_sub_i.eq(1)
- with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
- with m.If(iq.qlen_o != 0):
- comb += iq.n_sub_i.eq(1)
- # see if some instruction(s) are here. note that this is
- # "inspecting" the in-place queue. note also that on the
- # cycle following "waiting" for fn_issue_o to be set, the
- # "resetting" done above (insn_i=0) could be re-ASSERTed.
- with m.If(iq.qlen_o != 0):
- # get the operands and operation
- imm = iq.data_o[0].imm_i
- dest = iq.data_o[0].dest_i
- src1 = iq.data_o[0].src1_i
- src2 = iq.data_o[0].src2_i
- op = iq.data_o[0].oper_i
- opi = iq.data_o[0].opim_i # immediate set
- # set the src/dest regs
- comb += sc.int_dest_i.eq(dest)
- comb += sc.int_src1_i.eq(src1)
- comb += sc.int_src2_i.eq(src2)
- comb += sc.reg_enable_i.eq(1) # enable the regfile
- # choose a Function-Unit-Group
- with m.If((op & (0x3<<2)) != 0): # branch
- comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
- comb += sc.br_imm_i.eq(imm)
- comb += sc.brissue.insn_i.eq(1)
- comb += wait_issue_br.eq(1)
- with m.Elif((op & (0x3<<4)) != 0): # ld/st
- # see compldst.py
- # bit 0: ADD/SUB
- # bit 1: immed
- # bit 4: LD
- # bit 5: ST
- comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
- comb += sc.ls_imm_i.eq(imm)
- comb += sc.lsissue.insn_i.eq(1)
- comb += wait_issue_ls.eq(1)
- with m.Else(): # alu
- comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
- comb += sc.alu_imm_i.eq(imm)
- comb += sc.aluissue.insn_i.eq(1)
- comb += wait_issue_alu.eq(1)
- # these indicate that the instruction is to be made
- # shadow-dependent on
- # (either) branch success or branch fail
- #yield sc.branch_fail_i.eq(branch_fail)
- #yield sc.branch_succ_i.eq(branch_success)
- return m
- def __iter__(self):
- yield self.p_ready_o
- for o in self.data_i:
- yield from list(o)
- yield self.p_add_i
- def ports(self):
- return list(self)
-IADD = 0
-ISUB = 1
-IMUL = 2
-ISHF = 3
-IBGT = 4
-IBLT = 5
-IBEQ = 6
-IBNE = 7
-class RegSim:
- def __init__(self, rwidth, nregs):
- self.rwidth = rwidth
- self.regs = [0] * nregs
- def op(self, op, op_imm, imm, src1, src2, dest):
- maxbits = (1 << self.rwidth) - 1
- src1 = self.regs[src1] & maxbits
- if op_imm:
- src2 = imm
- else:
- src2 = self.regs[src2] & maxbits
- if op == IADD:
- val = src1 + src2
- elif op == ISUB:
- val = src1 - src2
- elif op == IMUL:
- val = src1 * src2
- elif op == ISHF:
- val = src1 >> (src2 & maxbits)
- elif op == IBGT:
- val = int(src1 > src2)
- elif op == IBLT:
- val = int(src1 < src2)
- elif op == IBEQ:
- val = int(src1 == src2)
- elif op == IBNE:
- val = int(src1 != src2)
- else:
- return 0 # LD/ST TODO
- val &= maxbits
- self.setval(dest, val)
- return val
- def setval(self, dest, val):
- print ("sim setval", dest, hex(val))
- self.regs[dest] = val
- def dump(self, dut):
- for i, val in enumerate(self.regs):
- reg = yield dut.intregs.regs[i].reg
- okstr = "OK" if reg == val else "!ok"
- print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
- def check(self, dut):
- for i, val in enumerate(self.regs):
- reg = yield dut.intregs.regs[i].reg
- if reg != val:
- print("reg %d expected %x received %x\n" % (i, val, reg))
- yield from self.dump(dut)
- assert False
-def instr_q(dut, op, op_imm, imm, src1, src2, dest,
- branch_success, branch_fail):
- instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
- 'src1_i': src1, 'src2_i': src2}]
- sendlen = 1
- for idx in range(sendlen):
- yield from eq(dut.data_i[idx], instrs[idx])
- di = yield dut.data_i[idx]
- print ("senddata %d %x" % (idx, di))
- yield dut.p_add_i.eq(sendlen)
- yield
- o_p_ready = yield dut.p_ready_o
- while not o_p_ready:
- yield
- o_p_ready = yield dut.p_ready_o
- yield dut.p_add_i.eq(0)
-def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
- yield from disable_issue(dut)
- yield dut.int_dest_i.eq(dest)
- yield dut.int_src1_i.eq(src1)
- yield dut.int_src2_i.eq(src2)
- if (op & (0x3<<2)) != 0: # branch
- yield dut.brissue.insn_i.eq(1)
- yield dut.br_oper_i.eq(Const(op & 0x3, 2))
- yield dut.br_imm_i.eq(imm)
- dut_issue = dut.brissue
- else:
- yield dut.aluissue.insn_i.eq(1)
- yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
- yield dut.alu_imm_i.eq(imm)
- dut_issue = dut.aluissue
- yield dut.reg_enable_i.eq(1)
- # these indicate that the instruction is to be made shadow-dependent on
- # (either) branch success or branch fail
- yield dut.branch_fail_i.eq(branch_fail)
- yield dut.branch_succ_i.eq(branch_success)
- yield
- yield from wait_for_issue(dut, dut_issue)
-def print_reg(dut, rnums):
- rs = []
- for rnum in rnums:
- reg = yield dut.intregs.regs[rnum].reg
- rs.append("%x" % reg)
- rnums = map(str, rnums)
- print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
-def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
- insts = []
- for i in range(n_ops):
- src1 = randint(1, dut.n_regs-1)
- src2 = randint(1, dut.n_regs-1)
- imm = randint(1, (1<<dut.rwid)-1)
- dest = randint(1, dut.n_regs-1)
- op = randint(0, max_opnums)
- opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
- if shadowing:
- insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
- else:
- insts.append((src1, src2, dest, op, opi, imm))
- return insts
-def wait_for_busy_clear(dut):
- while True:
- busy_o = yield dut.busy_o
- if not busy_o:
- break
- print ("busy",)
- yield
-def disable_issue(dut):
- yield dut.aluissue.insn_i.eq(0)
- yield dut.brissue.insn_i.eq(0)
- yield dut.lsissue.insn_i.eq(0)
-def wait_for_issue(dut, dut_issue):
- while True:
- issue_o = yield dut_issue.fn_issue_o
- if issue_o:
- yield from disable_issue(dut)
- yield dut.reg_enable_i.eq(0)
- break
- print ("busy",)
- #yield from print_reg(dut, [1,2,3])
- yield
- #yield from print_reg(dut, [1,2,3])
-def scoreboard_branch_sim(dut, alusim):
- iseed = 3
- for i in range(1):
- print ("rseed", iseed)
- seed(iseed)
- iseed += 1
- yield dut.branch_direction_o.eq(0)
- # set random values in the registers
- for i in range(1, dut.n_regs):
- val = 31+i*3
- val = randint(0, (1<<alusim.rwidth)-1)
- yield dut.intregs.regs[i].reg.eq(val)
- alusim.setval(i, val)
- if False:
- # create some instructions: branches create a tree
- insts = create_random_ops(dut, 1, True, 1)
- #insts.append((6, 6, 1, 2, (0, 0)))
- #insts.append((4, 3, 3, 0, (0, 0)))
- src1 = randint(1, dut.n_regs-1)
- src2 = randint(1, dut.n_regs-1)
- #op = randint(4, 7)
- op = 4 # only BGT at the moment
- branch_ok = create_random_ops(dut, 1, True, 1)
- branch_fail = create_random_ops(dut, 1, True, 1)
- insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
- if True:
- insts = []
- insts.append( (3, 5, 2, 0, (0, 0)) )
- branch_ok = []
- branch_fail = []
- #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
- branch_ok.append( None )
- branch_fail.append( (1, 1, 2, 0, (0, 1)) )
- #branch_fail.append( None )
- insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
- siminsts = deepcopy(insts)
- # issue instruction(s)
- i = -1
- instrs = insts
- branch_direction = 0
- while instrs:
- yield
- yield
- i += 1
- branch_direction = yield dut.branch_direction_o # way branch went
- (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
- if branch_direction == 1 and shadow_on:
- print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
- continue # branch was "success" and this is a "failed"... skip
- if branch_direction == 2 and shadow_off:
- print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
- continue # branch was "fail" and this is a "success"... skip
- if branch_direction != 0:
- shadow_on = 0
- shadow_off = 0
- is_branch = op >= 4
- if is_branch:
- branch_ok, branch_fail = dest
- dest = src2
- # ok zip up the branch success / fail instructions and
- # drop them into the queue, one marked "to have branch success"
- # the other to be marked shadow branch "fail".
- # one out of each of these will be cancelled
- for ok, fl in zip(branch_ok, branch_fail):
- if ok:
- instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
- if fl:
- instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
- print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
- (i, src1, src2, dest, op, shadow_on, shadow_off))
- yield from int_instr(dut, op, src1, src2, dest,
- shadow_on, shadow_off)
- # wait for all instructions to stop before checking
- yield
- yield from wait_for_busy_clear(dut)
- i = -1
- while siminsts:
- instr = siminsts.pop(0)
- if instr is None:
- continue
- (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
- i += 1
- is_branch = op >= 4
- if is_branch:
- branch_ok, branch_fail = dest
- dest = src2
- print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
- (i, src1, src2, dest, op, shadow_on, shadow_off))
- branch_res = alusim.op(op, src1, src2, dest)
- if is_branch:
- if branch_res:
- siminsts += branch_ok
- else:
- siminsts += branch_fail
- # check status
- yield from alusim.check(dut)
- yield from alusim.dump(dut)
-def scoreboard_sim(dut, alusim):
- seed(0)
- for i in range(1):
- # set random values in the registers
- for i in range(1, dut.n_regs):
- val = randint(0, (1<<alusim.rwidth)-1)
- #val = 31+i*3
- #val = i
- yield dut.intregs.regs[i].reg.eq(val)
- alusim.setval(i, val)
- # create some instructions (some random, some regression tests)
- instrs = []
- if False:
- instrs = create_random_ops(dut, 15, True, 4)
- if False: # LD/ST test (with immediate)
- instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
- #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
- if True:
- instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
- if True:
- instrs.append( (7, 3, 2, 4, 0, 0, (0, 0)) )
- instrs.append( (7, 6, 6, 2, 0, 0, (0, 0)) )
- instrs.append( (1, 7, 2, 2, 0, 0, (0, 0)) )
- if True:
- instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
- instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
- instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
- instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
- instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
- if False:
- instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
- instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
- instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
- if False:
- instrs.append((5, 6, 2, 1))
- instrs.append((2, 2, 4, 0))
- #instrs.append((2, 2, 3, 1))
- if False:
- instrs.append((2, 1, 2, 3))
- if False:
- instrs.append((2, 6, 2, 1))
- instrs.append((2, 1, 2, 0))
- if False:
- instrs.append((1, 2, 7, 2))
- instrs.append((7, 1, 5, 0))
- instrs.append((4, 4, 1, 1))
- if False:
- instrs.append((5, 6, 2, 2))
- instrs.append((1, 1, 4, 1))
- instrs.append((6, 5, 3, 0))
- if False:
- # Write-after-Write Hazard
- instrs.append( (3, 6, 7, 2) )
- instrs.append( (4, 4, 7, 1) )
- if False:
- # self-read/write-after-write followed by Read-after-Write
- instrs.append((1, 1, 1, 1))
- instrs.append((1, 5, 3, 0))
- if False:
- # Read-after-Write followed by self-read-after-write
- instrs.append((5, 6, 1, 2))
- instrs.append((1, 1, 1, 1))
- if False:
- # self-read-write sandwich
- instrs.append((5, 6, 1, 2))
- instrs.append((1, 1, 1, 1))
- instrs.append((1, 5, 3, 0))
- if False:
- # very weird failure
- instrs.append( (5, 2, 5, 2) )
- instrs.append( (2, 6, 3, 0) )
- instrs.append( (4, 2, 2, 1) )
- if False:
- v1 = 4
- yield dut.intregs.regs[5].reg.eq(v1)
- alusim.setval(5, v1)
- yield dut.intregs.regs[3].reg.eq(5)
- alusim.setval(3, 5)
- instrs.append((5, 3, 3, 4, (0, 0)))
- instrs.append((4, 2, 1, 2, (0, 1)))
- if False:
- v1 = 6
- yield dut.intregs.regs[5].reg.eq(v1)
- alusim.setval(5, v1)
- yield dut.intregs.regs[3].reg.eq(5)
- alusim.setval(3, 5)
- instrs.append((5, 3, 3, 4, (0, 0)))
- instrs.append((4, 2, 1, 2, (1, 0)))
- if False:
- instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
- instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
- instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
- instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
- instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
- instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
- instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
- instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
- instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
- # issue instruction(s), wait for issue to be free before proceeding
- for i, instr in enumerate(instrs):
- src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
- print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
- (i, src1, src2, dest, op, opi, imm))
- alusim.op(op, opi, imm, src1, src2, dest)
- yield from instr_q(dut, op, opi, imm, src1, src2, dest,
- br_ok, br_fail)
- # wait for all instructions to stop before checking
- while True:
- iqlen = yield dut.qlen_o
- if iqlen == 0:
- break
- yield
- yield
- yield
- yield
- yield
- yield from wait_for_busy_clear(dut)
- # check status
- yield from alusim.check(dut)
- yield from alusim.dump(dut)
-def test_scoreboard():
- dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
- alusim = RegSim(16, 8)
- memsim = MemSim(16, 16)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_scoreboard6600.il", "w") as f:
- f.write(vl)
- run_simulation(dut, scoreboard_sim(dut, alusim),
- vcd_name='test_scoreboard6600.vcd')
- #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
- # vcd_name='test_scoreboard6600.vcd')
-if __name__ == '__main__':
- test_scoreboard()
+++ /dev/null
-# Copyright 2018 ETH Zurich and University of Bologna.
-# Copyright and related rights are licensed under the Solderpad Hardware
-# License, Version 0.51 (the "License"); you may not use this file except in
-# compliance with the License. You may obtain a copy of the License at
-# http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# or agreed to in writing, software, hardware and materials distributed under
-# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations under the License.
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-# module axi4_ar_buffer
-# #(
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-# input logic [AXI_ID_WIDTH-1:0] s_axi4_arid,
-# input logic [31:0] s_axi4_araddr,
-# input logic s_axi4_arvalid,
-# output logic s_axi4_arready,
-# input logic [7:0] s_axi4_arlen,
-# input logic [2:0] s_axi4_arsize,
-# input logic [1:0] s_axi4_arburst,
-# input logic s_axi4_arlock,
-# input logic [2:0] s_axi4_arprot,
-# input logic [3:0] s_axi4_arcache,
-# input logic [AXI_USER_WIDTH-1:0] s_axi4_aruser,
-# output logic [AXI_ID_WIDTH-1:0] m_axi4_arid,
-# output logic [31:0] m_axi4_araddr,
-# output logic m_axi4_arvalid,
-# input logic m_axi4_arready,
-# output logic [7:0] m_axi4_arlen,
-# output logic [2:0] m_axi4_arsize,
-# output logic [1:0] m_axi4_arburst,
-# output logic m_axi4_arlock,
-# output logic [2:0] m_axi4_arprot,
-# output logic [3:0] m_axi4_arcache,
-# output logic [AXI_USER_WIDTH-1:0] m_axi4_aruser
-# );
-class axi4_ar_buffer(Elaboratable):
- def __init__(self):
- # self.axi4_aclk = Signal() # input
- # self.axi4_arstn = Signal() # input
- self.s_axi4_arid = Signal(AXI_ID_WIDTH) # input
- self.s_axi4_araddr = Signal(32) # input
- self.s_axi4_arvalid = Signal() # input
- self.s_axi4_arready = Signal() # output
- self.s_axi4_arlen = Signal(8) # input
- self.s_axi4_arsize = Signal(3) # input
- self.s_axi4_arburst = Signal(2) # input
- self.s_axi4_arlock = Signal() # input
- self.s_axi4_arprot = Signal(3) # input
- self.s_axi4_arcache = Signal(4) # input
- self.s_axi4_aruser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_arid = Signal(AXI_ID_WIDTH) # output
- self.m_axi4_araddr = Signal(32) # output
- self.m_axi4_arvalid = Signal() # output
- self.m_axi4_arready = Signal() # input
- self.m_axi4_arlen = Signal(8) # output
- self.m_axi4_arsize = Signal(3) # output
- self.m_axi4_arburst = Signal(2) # output
- self.m_axi4_arlock = Signal() # output
- self.m_axi4_arprot = Signal(3) # output
- self.m_axi4_arcache = Signal(4) # output
- self.m_axi4_aruser = Signal(AXI_USER_WIDTH) # output
- def elaborate(self, platform=None):
- m = Module()
- # #TODO use record types here
- # wire [AXI_ID_WIDTH+AXI_USER_WIDTH+52:0] data_in;
- # wire [AXI_ID_WIDTH+AXI_USER_WIDTH+52:0] data_out;
- # assign data_in [3:0] = s_axi4_arcache;
- # assign data_in [6:4] = s_axi4_arprot;
- # assign data_in [7] = s_axi4_arlock;
- # assign data_in [9:8] = s_axi4_arburst;
- # assign data_in [12:10] = s_axi4_arsize;
- # assign data_in [20:13] = s_axi4_arlen;
- # assign data_in [52:21] = s_axi4_araddr;
- # assign data_in [52+AXI_ID_WIDTH:53] = s_axi4_arid;
- # assign data_in[52+AXI_ID_WIDTH+AXI_USER_WIDTH:53+AXI_ID_WIDTH] = s_axi4_aruser;
- #
- # assign m_axi4_arcache = data_out[3:0];
- # assign m_axi4_arprot = data_out[6:4];
- # assign m_axi4_arlock = data_out[7];
- # assign m_axi4_arburst = data_out[9:8];
- # assign m_axi4_arsize = data_out[12:10];
- # assign m_axi4_arlen = data_out[20:13];
- # assign m_axi4_araddr = data_out[52:21];
- # assign m_axi4_arid = data_out[52+AXI_ID_WIDTH:53];
- # assign m_axi4_aruser = data_out[52+AXI_ID_WIDTH+AXI_USER_WIDTH:53+AXI_ID_WIDTH];
- # m.d.comb += self.m_axi4_arcache.eq(..)
- # m.d.comb += self.m_axi4_arprot.eq(..)
- # m.d.comb += self.m_axi4_arlock.eq(..)
- # m.d.comb += self.m_axi4_arburst.eq(..)
- # m.d.comb += self.m_axi4_arsize.eq(..)
- # m.d.comb += self.m_axi4_arlen.eq(..)
- # m.d.comb += self.m_axi4_araddr.eq(..)
- # m.d.comb += self.m_axi4_arid.eq(..)
- # m.d.comb += self.m_axi4_aruser.eq(..)
- return m
-# TODO convert axi_buffer_rab.sv
-# axi_buffer_rab
-# #(
-# .BUFFER_DEPTH ( 4 )
-# )
-# u_buffer
-# (
-# .clk ( axi4_aclk ),
-# .rstn ( axi4_arstn ),
-# .valid_out ( m_axi4_arvalid ),
-# .data_out ( data_out ),
-# .ready_in ( m_axi4_arready ),
-# .valid_in ( s_axi4_arvalid ),
-# .data_in ( data_in ),
-# .ready_out ( s_axi4_arready )
-# );
-# endmodule
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class axi4_ar_sender(Elaboratable):
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.l1_done_o = Signal() # output
- self.l1_accept_i = Signal() # input
- self.l1_drop_i = Signal() # input
- self.l1_save_i = Signal() # input
- self.l2_done_o = Signal() # output
- self.l2_accept_i = Signal() # input
- self.l2_drop_i = Signal() # input
- self.l2_sending_o = Signal() # output
- self.l1_araddr_i = Signal(AXI_ADDR_WIDTH) # input
- self.l2_araddr_i = Signal(AXI_ADDR_WIDTH) # input
- self.s_axi4_arid = Signal(AXI_ID_WIDTH) # input
- self.s_axi4_arvalid = Signal() # input
- self.s_axi4_arready = Signal() # output
- self.s_axi4_arlen = Signal(8) # input
- self.s_axi4_arsize = Signal(3) # input
- self.s_axi4_arburst = Signal(2) # input
- self.s_axi4_arlock = Signal() # input
- self.s_axi4_arprot = Signal(3) # input
- self.s_axi4_arcache = Signal(4) # input
- self.s_axi4_aruser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_arid = Signal(AXI_ID_WIDTH) # output
- self.m_axi4_araddr = Signal(AXI_ADDR_WIDTH) # output
- self.m_axi4_arvalid = Signal() # output
- self.m_axi4_arready = Signal() # input
- self.m_axi4_arlen = Signal(8) # output
- self.m_axi4_arsize = Signal(3) # output
- self.m_axi4_arburst = Signal(2) # output
- self.m_axi4_arlock = Signal() # output
- self.m_axi4_arprot = Signal(3) # output
- self.m_axi4_arcache = Signal(4) # output
- self.m_axi4_aruser = Signal(AXI_USER_WIDTH) # output
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.l1_save.eq(self.None)
- m.d.comb += self.l1_done_o.eq(self.None)
- m.d.comb += self.m_axi4_arvalid.eq(self.None)
- m.d.comb += self.s_axi4_arready.eq(self.None)
- m.d.comb += self.m_axi4_aruser.eq(self.None)
- m.d.comb += self.m_axi4_arcache.eq(self.None)
- m.d.comb += self.m_axi4_arprot.eq(self.None)
- m.d.comb += self.m_axi4_arlock.eq(self.None)
- m.d.comb += self.m_axi4_arburst.eq(self.None)
- m.d.comb += self.m_axi4_arsize.eq(self.None)
- m.d.comb += self.m_axi4_arlen.eq(self.None)
- m.d.comb += self.m_axi4_araddr.eq(self.None)
- m.d.comb += self.m_axi4_arid.eq(self.None)
- m.d.comb += self.l2_sending_o.eq(self.None)
- m.d.comb += self.l2_sent.eq(self.None)
- m.d.comb += self.l2_done_o.eq(self.None)
- m.d.comb += self.m_axi4_aruser.eq(self.s_axi4_aruser)
- m.d.comb += self.m_axi4_arcache.eq(self.s_axi4_arcache)
- m.d.comb += self.m_axi4_arprot.eq(self.s_axi4_arprot)
- m.d.comb += self.m_axi4_arlock.eq(self.s_axi4_arlock)
- m.d.comb += self.m_axi4_arburst.eq(self.s_axi4_arburst)
- m.d.comb += self.m_axi4_arsize.eq(self.s_axi4_arsize)
- m.d.comb += self.m_axi4_arlen.eq(self.s_axi4_arlen)
- m.d.comb += self.m_axi4_araddr.eq(self.l1_araddr_i)
- m.d.comb += self.m_axi4_arid.eq(self.s_axi4_arid)
- m.d.comb += self.l2_sending_o.eq(self.1: 'b0)
- m.d.comb += self.l2_available_q.eq(self.1: 'b0)
- m.d.comb += self.l2_done_o.eq(self.1: 'b0)
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# module axi4_ar_sender
-# #(
-# parameter AXI_ADDR_WIDTH = 40,
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4,
-# parameter ENABLE_L2TLB = 0
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-# output logic l1_done_o,
-# input logic l1_accept_i,
-# input logic l1_drop_i,
-# input logic l1_save_i,
-# output logic l2_done_o,
-# input logic l2_accept_i,
-# input logic l2_drop_i,
-# output logic l2_sending_o,
-# input logic [AXI_ADDR_WIDTH-1:0] l1_araddr_i,
-# input logic [AXI_ADDR_WIDTH-1:0] l2_araddr_i,
-# input logic [AXI_ID_WIDTH-1:0] s_axi4_arid,
-# input logic s_axi4_arvalid,
-# output logic s_axi4_arready,
-# input logic [7:0] s_axi4_arlen,
-# input logic [2:0] s_axi4_arsize,
-# input logic [1:0] s_axi4_arburst,
-# input logic s_axi4_arlock,
-# input logic [2:0] s_axi4_arprot,
-# input logic [3:0] s_axi4_arcache,
-# input logic [AXI_USER_WIDTH-1:0] s_axi4_aruser,
-# output logic [AXI_ID_WIDTH-1:0] m_axi4_arid,
-# output logic [AXI_ADDR_WIDTH-1:0] m_axi4_araddr,
-# output logic m_axi4_arvalid,
-# input logic m_axi4_arready,
-# output logic [7:0] m_axi4_arlen,
-# output logic [2:0] m_axi4_arsize,
-# output logic [1:0] m_axi4_arburst,
-# output logic m_axi4_arlock,
-# output logic [2:0] m_axi4_arprot,
-# output logic [3:0] m_axi4_arcache,
-# output logic [AXI_USER_WIDTH-1:0] m_axi4_aruser
-# );
-# logic l1_save;
-# logic l2_sent;
-# logic l2_available_q;
-# assign l1_save = l1_save_i & l2_available_q;
-# assign l1_done_o = s_axi4_arvalid & s_axi4_arready ;
-# // if 1: accept and forward a transaction translated by L1
-# // 2: drop or save request (if L2 slot not occupied already)
-# assign m_axi4_arvalid = (s_axi4_arvalid & l1_accept_i) |
-# l2_sending_o;
-# assign s_axi4_arready = (m_axi4_arvalid & m_axi4_arready & ~l2_sending_o) |
-# (s_axi4_arvalid & (l1_drop_i | l1_save));
-# generate
-# if (ENABLE_L2TLB == 1) begin
-# logic [AXI_USER_WIDTH-1:0] l2_axi4_aruser ;
-# logic [3:0] l2_axi4_arcache ;
-# logic [3:0] l2_axi4_arregion;
-# logic [3:0] l2_axi4_arqos ;
-# logic [2:0] l2_axi4_arprot ;
-# logic l2_axi4_arlock ;
-# logic [1:0] l2_axi4_arburst ;
-# logic [2:0] l2_axi4_arsize ;
-# logic [7:0] l2_axi4_arlen ;
-# logic [AXI_ID_WIDTH-1:0] l2_axi4_arid ;
-# assign m_axi4_aruser = l2_sending_o ? l2_axi4_aruser : s_axi4_aruser;
-# assign m_axi4_arcache = l2_sending_o ? l2_axi4_arcache : s_axi4_arcache;
-# assign m_axi4_arprot = l2_sending_o ? l2_axi4_arprot : s_axi4_arprot;
-# assign m_axi4_arlock = l2_sending_o ? l2_axi4_arlock : s_axi4_arlock;
-# assign m_axi4_arburst = l2_sending_o ? l2_axi4_arburst : s_axi4_arburst;
-# assign m_axi4_arsize = l2_sending_o ? l2_axi4_arsize : s_axi4_arsize;
-# assign m_axi4_arlen = l2_sending_o ? l2_axi4_arlen : s_axi4_arlen;
-# assign m_axi4_araddr = l2_sending_o ? l2_araddr_i : l1_araddr_i;
-# assign m_axi4_arid = l2_sending_o ? l2_axi4_arid : s_axi4_arid;
-# // Buffer AXI signals in case of L1 miss
-# always @(posedge axi4_aclk or negedge axi4_arstn) begin
-# if (axi4_arstn == 1'b0) begin
-# l2_axi4_aruser <= 'b0;
-# l2_axi4_arcache <= 'b0;
-# l2_axi4_arprot <= 'b0;
-# l2_axi4_arlock <= 1'b0;
-# l2_axi4_arburst <= 'b0;
-# l2_axi4_arsize <= 'b0;
-# l2_axi4_arlen <= 'b0;
-# l2_axi4_arid <= 'b0;
-# end else if (l1_save) begin
-# l2_axi4_aruser <= s_axi4_aruser;
-# l2_axi4_arcache <= s_axi4_arcache;
-# l2_axi4_arprot <= s_axi4_arprot;
-# l2_axi4_arlock <= s_axi4_arlock;
-# l2_axi4_arburst <= s_axi4_arburst;
-# l2_axi4_arsize <= s_axi4_arsize;
-# l2_axi4_arlen <= s_axi4_arlen;
-# l2_axi4_arid <= s_axi4_arid;
-# end
-# end
-# // signal that an l1_save_i can be accepted
-# always @(posedge axi4_aclk or negedge axi4_arstn) begin
-# if (axi4_arstn == 1'b0) begin
-# l2_available_q <= 1'b1;
-# end else if (l2_sent | l2_drop_i) begin
-# l2_available_q <= 1'b1;
-# end else if (l1_save) begin
-# l2_available_q <= 1'b0;
-# end
-# end
-# assign l2_sending_o = l2_accept_i & ~l2_available_q;
-# assign l2_sent = l2_sending_o & m_axi4_arvalid & m_axi4_arready;
-# // if 1: having sent out a transaction translated by L2
-# // 2: drop request (L2 slot is available again)
-# assign l2_done_o = l2_sent | l2_drop_i;
-# end else begin // !`ifdef ENABLE_L2TLB
-# assign m_axi4_aruser = s_axi4_aruser;
-# assign m_axi4_arcache = s_axi4_arcache;
-# assign m_axi4_arprot = s_axi4_arprot;
-# assign m_axi4_arlock = s_axi4_arlock;
-# assign m_axi4_arburst = s_axi4_arburst;
-# assign m_axi4_arsize = s_axi4_arsize;
-# assign m_axi4_arlen = s_axi4_arlen;
-# assign m_axi4_araddr = l1_araddr_i;
-# assign m_axi4_arid = s_axi4_arid;
-# assign l2_sending_o = 1'b0;
-# assign l2_available_q = 1'b0;
-# assign l2_done_o = 1'b0;
-# end // else: !if(ENABLE_L2TLB == 1)
-# endgenerate
-# endmodule
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class axi4_aw_buffer(Elaboratable):
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.s_axi4_awid = Signal(AXI_ID_WIDTH) # input
- self.s_axi4_awaddr = Signal(32) # input
- self.s_axi4_awvalid = Signal() # input
- self.s_axi4_awready = Signal() # output
- self.s_axi4_awlen = Signal(8) # input
- self.s_axi4_awsize = Signal(3) # input
- self.s_axi4_awburst = Signal(2) # input
- self.s_axi4_awlock = Signal() # input
- self.s_axi4_awprot = Signal(3) # input
- self.s_axi4_awcache = Signal(4) # input
- self.s_axi4_awregion = Signal(4) # input
- self.s_axi4_awqos = Signal(4) # input
- self.s_axi4_awuser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_awid = Signal(AXI_ID_WIDTH) # output
- self.m_axi4_awaddr = Signal(32) # output
- self.m_axi4_awvalid = Signal() # output
- self.m_axi4_awready = Signal() # input
- self.m_axi4_awlen = Signal(8) # output
- self.m_axi4_awsize = Signal(3) # output
- self.m_axi4_awburst = Signal(2) # output
- self.m_axi4_awlock = Signal() # output
- self.m_axi4_awprot = Signal(3) # output
- self.m_axi4_awcache = Signal(4) # output
- self.m_axi4_awregion = Signal(4) # output
- self.m_axi4_awqos = Signal(4) # output
- self.m_axi4_awuser = Signal(AXI_USER_WIDTH) # output
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.None.eq(self.s_axi4_awcache)
- m.d.comb += self.None.eq(self.s_axi4_awprot)
- m.d.comb += self.None.eq(self.s_axi4_awlock)
- m.d.comb += self.None.eq(self.s_axi4_awburst)
- m.d.comb += self.None.eq(self.s_axi4_awsize)
- m.d.comb += self.None.eq(self.s_axi4_awlen)
- m.d.comb += self.None.eq(self.s_axi4_awaddr)
- m.d.comb += self.None.eq(self.s_axi4_awregion)
- m.d.comb += self.None.eq(self.s_axi4_awqos)
- m.d.comb += self.None.eq(self.s_axi4_awid)
- m.d.comb += self.None.eq(self.s_axi4_awuser)
- m.d.comb += self.m_axi4_awcache.eq(self.None)
- m.d.comb += self.m_axi4_awprot.eq(self.None)
- m.d.comb += self.m_axi4_awlock.eq(self.None)
- m.d.comb += self.m_axi4_awburst.eq(self.None)
- m.d.comb += self.m_axi4_awsize.eq(self.None)
- m.d.comb += self.m_axi4_awlen.eq(self.None)
- m.d.comb += self.m_axi4_awaddr.eq(self.None)
- m.d.comb += self.m_axi4_awregion.eq(self.None)
- m.d.comb += self.m_axi4_awqos.eq(self.None)
- m.d.comb += self.m_axi4_awid.eq(self.None)
- m.d.comb += self.m_axi4_awuser.eq(self.None)
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# module axi4_aw_buffer
-# #(
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-# input logic [AXI_ID_WIDTH-1:0] s_axi4_awid,
-# input logic [31:0] s_axi4_awaddr,
-# input logic s_axi4_awvalid,
-# output logic s_axi4_awready,
-# input logic [7:0] s_axi4_awlen,
-# input logic [2:0] s_axi4_awsize,
-# input logic [1:0] s_axi4_awburst,
-# input logic s_axi4_awlock,
-# input logic [2:0] s_axi4_awprot,
-# input logic [3:0] s_axi4_awcache,
-# input logic [3:0] s_axi4_awregion,
-# input logic [3:0] s_axi4_awqos,
-# input logic [AXI_USER_WIDTH-1:0] s_axi4_awuser,
-# output logic [AXI_ID_WIDTH-1:0] m_axi4_awid,
-# output logic [31:0] m_axi4_awaddr,
-# output logic m_axi4_awvalid,
-# input logic m_axi4_awready,
-# output logic [7:0] m_axi4_awlen,
-# output logic [2:0] m_axi4_awsize,
-# output logic [1:0] m_axi4_awburst,
-# output logic m_axi4_awlock,
-# output logic [2:0] m_axi4_awprot,
-# output logic [3:0] m_axi4_awcache,
-# output logic [3:0] m_axi4_awregion,
-# output logic [3:0] m_axi4_awqos,
-# output logic [AXI_USER_WIDTH-1:0] m_axi4_awuser
-# );
-# wire [AXI_USER_WIDTH+AXI_ID_WIDTH+60:0] data_in;
-# wire [AXI_USER_WIDTH+AXI_ID_WIDTH+60:0] data_out;
-# assign data_in [3:0] = s_axi4_awcache;
-# assign data_in [6:4] = s_axi4_awprot;
-# assign data_in [7] = s_axi4_awlock;
-# assign data_in [9:8] = s_axi4_awburst;
-# assign data_in [12:10] = s_axi4_awsize;
-# assign data_in [20:13] = s_axi4_awlen;
-# assign data_in [52:21] = s_axi4_awaddr;
-# assign data_in [56:53] = s_axi4_awregion;
-# assign data_in [60:57] = s_axi4_awqos;
-# assign data_in [60+AXI_ID_WIDTH:61] = s_axi4_awid;
-# assign data_in [60+AXI_ID_WIDTH+AXI_USER_WIDTH:61+AXI_ID_WIDTH] = s_axi4_awuser;
-# assign m_axi4_awcache = data_out[3:0];
-# assign m_axi4_awprot = data_out[6:4];
-# assign m_axi4_awlock = data_out[7];
-# assign m_axi4_awburst = data_out[9:8];
-# assign m_axi4_awsize = data_out[12:10];
-# assign m_axi4_awlen = data_out[20:13];
-# assign m_axi4_awaddr = data_out[52:21];
-# assign m_axi4_awregion = data_out[56:53];
-# assign m_axi4_awqos = data_out[60:57];
-# assign m_axi4_awid = data_out[60+AXI_ID_WIDTH:61];
-# assign m_axi4_awuser = data_out[60+AXI_ID_WIDTH+AXI_USER_WIDTH:61+AXI_ID_WIDTH];
-# axi_buffer_rab
-# #(
-# .BUFFER_DEPTH ( 4 )
-# )
-# u_buffer
-# (
-# .clk ( axi4_aclk ),
-# .rstn ( axi4_arstn ),
-# .valid_out ( m_axi4_awvalid ),
-# .data_out ( data_out ),
-# .ready_in ( m_axi4_awready ),
-# .valid_in ( s_axi4_awvalid ),
-# .data_in ( data_in ),
-# .ready_out ( s_axi4_awready )
-# );
-# endmodule
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class axi4_aw_sender(Elaboratable):
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.l1_done_o = Signal() # output
- self.l1_accept_i = Signal() # input
- self.l1_drop_i = Signal() # input
- self.l1_save_i = Signal() # input
- self.l2_done_o = Signal() # output
- self.l2_accept_i = Signal() # input
- self.l2_drop_i = Signal() # input
- self.l2_sending_o = Signal() # output
- self.l1_awaddr_i = Signal(AXI_ADDR_WIDTH) # input
- self.l2_awaddr_i = Signal(AXI_ADDR_WIDTH) # input
- self.s_axi4_awid = Signal(AXI_ID_WIDTH) # input
- self.s_axi4_awvalid = Signal() # input
- self.s_axi4_awready = Signal() # output
- self.s_axi4_awlen = Signal(8) # input
- self.s_axi4_awsize = Signal(3) # input
- self.s_axi4_awburst = Signal(2) # input
- self.s_axi4_awlock = Signal() # input
- self.s_axi4_awprot = Signal(3) # input
- self.s_axi4_awcache = Signal(4) # input
- self.s_axi4_awregion = Signal(4) # input
- self.s_axi4_awqos = Signal(4) # input
- self.s_axi4_awuser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_awid = Signal(AXI_ID_WIDTH) # output
- self.m_axi4_awaddr = Signal(AXI_ADDR_WIDTH) # output
- self.m_axi4_awvalid = Signal() # output
- self.m_axi4_awready = Signal() # input
- self.m_axi4_awlen = Signal(8) # output
- self.m_axi4_awsize = Signal(3) # output
- self.m_axi4_awburst = Signal(2) # output
- self.m_axi4_awlock = Signal() # output
- self.m_axi4_awprot = Signal(3) # output
- self.m_axi4_awcache = Signal(4) # output
- self.m_axi4_awregion = Signal(4) # output
- self.m_axi4_awqos = Signal(4) # output
- self.m_axi4_awuser = Signal(AXI_USER_WIDTH) # output
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.l1_save.eq(self.None)
- m.d.comb += self.l1_done_o.eq(self.None)
- m.d.comb += self.m_axi4_awvalid.eq(self.None)
- m.d.comb += self.s_axi4_awready.eq(self.None)
- m.d.comb += self.m_axi4_awuser.eq(self.None)
- m.d.comb += self.m_axi4_awcache.eq(self.None)
- m.d.comb += self.m_axi4_awregion.eq(self.None)
- m.d.comb += self.m_axi4_awqos.eq(self.None)
- m.d.comb += self.m_axi4_awprot.eq(self.None)
- m.d.comb += self.m_axi4_awlock.eq(self.None)
- m.d.comb += self.m_axi4_awburst.eq(self.None)
- m.d.comb += self.m_axi4_awsize.eq(self.None)
- m.d.comb += self.m_axi4_awlen.eq(self.None)
- m.d.comb += self.m_axi4_awaddr.eq(self.None)
- m.d.comb += self.m_axi4_awid.eq(self.None)
- m.d.comb += self.l2_sending_o.eq(self.None)
- m.d.comb += self.l2_sent.eq(self.None)
- m.d.comb += self.l2_done_o.eq(self.None)
- m.d.comb += self.m_axi4_awuser.eq(self.s_axi4_awuser)
- m.d.comb += self.m_axi4_awcache.eq(self.s_axi4_awcache)
- m.d.comb += self.m_axi4_awregion.eq(self.s_axi4_awregion)
- m.d.comb += self.m_axi4_awqos.eq(self.s_axi4_awqos)
- m.d.comb += self.m_axi4_awprot.eq(self.s_axi4_awprot)
- m.d.comb += self.m_axi4_awlock.eq(self.s_axi4_awlock)
- m.d.comb += self.m_axi4_awburst.eq(self.s_axi4_awburst)
- m.d.comb += self.m_axi4_awsize.eq(self.s_axi4_awsize)
- m.d.comb += self.m_axi4_awlen.eq(self.s_axi4_awlen)
- m.d.comb += self.m_axi4_awaddr.eq(self.l1_awaddr_i)
- m.d.comb += self.m_axi4_awid.eq(self.s_axi4_awid)
- m.d.comb += self.l2_sending_o.eq(self.1: 'b0)
- m.d.comb += self.l2_available_q.eq(self.1: 'b0)
- m.d.comb += self.l2_done_o.eq(self.1: 'b0)
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# module axi4_aw_sender
-# #(
-# parameter AXI_ADDR_WIDTH = 40,
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4,
-# parameter ENABLE_L2TLB = 0
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-# output logic l1_done_o,
-# input logic l1_accept_i,
-# input logic l1_drop_i,
-# input logic l1_save_i,
-# output logic l2_done_o,
-# input logic l2_accept_i,
-# input logic l2_drop_i,
-# output logic l2_sending_o,
-# input logic [AXI_ADDR_WIDTH-1:0] l1_awaddr_i,
-# input logic [AXI_ADDR_WIDTH-1:0] l2_awaddr_i,
-# input logic [AXI_ID_WIDTH-1:0] s_axi4_awid,
-# input logic s_axi4_awvalid,
-# output logic s_axi4_awready,
-# input logic [7:0] s_axi4_awlen,
-# input logic [2:0] s_axi4_awsize,
-# input logic [1:0] s_axi4_awburst,
-# input logic s_axi4_awlock,
-# input logic [2:0] s_axi4_awprot,
-# input logic [3:0] s_axi4_awcache,
-# input logic [3:0] s_axi4_awregion,
-# input logic [3:0] s_axi4_awqos,
-# input logic [AXI_USER_WIDTH-1:0] s_axi4_awuser,
-# output logic [AXI_ID_WIDTH-1:0] m_axi4_awid,
-# output logic [AXI_ADDR_WIDTH-1:0] m_axi4_awaddr,
-# output logic m_axi4_awvalid,
-# input logic m_axi4_awready,
-# output logic [7:0] m_axi4_awlen,
-# output logic [2:0] m_axi4_awsize,
-# output logic [1:0] m_axi4_awburst,
-# output logic m_axi4_awlock,
-# output logic [2:0] m_axi4_awprot,
-# output logic [3:0] m_axi4_awcache,
-# output logic [3:0] m_axi4_awregion,
-# output logic [3:0] m_axi4_awqos,
-# output logic [AXI_USER_WIDTH-1:0] m_axi4_awuser
-# );
-# logic l1_save;
-# logic l2_sent;
-# logic l2_available_q;
-# assign l1_save = l1_save_i & l2_available_q;
-# assign l1_done_o = s_axi4_awvalid & s_axi4_awready ;
-# // if 1: accept and forward a transaction translated by L1
-# // 2: drop or save request (if L2 slot not occupied already)
-# assign m_axi4_awvalid = (s_axi4_awvalid & l1_accept_i) |
-# l2_sending_o;
-# assign s_axi4_awready = (m_axi4_awvalid & m_axi4_awready & ~l2_sending_o) |
-# (s_axi4_awvalid & (l1_drop_i | l1_save));
-# generate
-# if (ENABLE_L2TLB == 1) begin
-# logic [AXI_USER_WIDTH-1:0] l2_axi4_awuser ;
-# logic [3:0] l2_axi4_awcache ;
-# logic [3:0] l2_axi4_awregion;
-# logic [3:0] l2_axi4_awqos ;
-# logic [2:0] l2_axi4_awprot ;
-# logic l2_axi4_awlock ;
-# logic [1:0] l2_axi4_awburst ;
-# logic [2:0] l2_axi4_awsize ;
-# logic [7:0] l2_axi4_awlen ;
-# logic [AXI_ID_WIDTH-1:0] l2_axi4_awid ;
-# assign m_axi4_awuser = l2_sending_o ? l2_axi4_awuser : s_axi4_awuser;
-# assign m_axi4_awcache = l2_sending_o ? l2_axi4_awcache : s_axi4_awcache;
-# assign m_axi4_awregion = l2_sending_o ? l2_axi4_awregion : s_axi4_awregion;
-# assign m_axi4_awqos = l2_sending_o ? l2_axi4_awqos : s_axi4_awqos;
-# assign m_axi4_awprot = l2_sending_o ? l2_axi4_awprot : s_axi4_awprot;
-# assign m_axi4_awlock = l2_sending_o ? l2_axi4_awlock : s_axi4_awlock;
-# assign m_axi4_awburst = l2_sending_o ? l2_axi4_awburst : s_axi4_awburst;
-# assign m_axi4_awsize = l2_sending_o ? l2_axi4_awsize : s_axi4_awsize;
-# assign m_axi4_awlen = l2_sending_o ? l2_axi4_awlen : s_axi4_awlen;
-# assign m_axi4_awaddr = l2_sending_o ? l2_awaddr_i : l1_awaddr_i;
-# assign m_axi4_awid = l2_sending_o ? l2_axi4_awid : s_axi4_awid;
-# // buffer AXI signals in case of L1 miss
-# always @(posedge axi4_aclk or negedge axi4_arstn) begin
-# if (axi4_arstn == 1'b0) begin
-# l2_axi4_awuser <= 'b0;
-# l2_axi4_awcache <= 'b0;
-# l2_axi4_awregion <= 'b0;
-# l2_axi4_awqos <= 'b0;
-# l2_axi4_awprot <= 'b0;
-# l2_axi4_awlock <= 1'b0;
-# l2_axi4_awburst <= 'b0;
-# l2_axi4_awsize <= 'b0;
-# l2_axi4_awlen <= 'b0;
-# l2_axi4_awid <= 'b0;
-# end else if (l1_save) begin
-# l2_axi4_awuser <= s_axi4_awuser;
-# l2_axi4_awcache <= s_axi4_awcache;
-# l2_axi4_awregion <= s_axi4_awregion;
-# l2_axi4_awqos <= s_axi4_awqos;
-# l2_axi4_awprot <= s_axi4_awprot;
-# l2_axi4_awlock <= s_axi4_awlock;
-# l2_axi4_awburst <= s_axi4_awburst;
-# l2_axi4_awsize <= s_axi4_awsize;
-# l2_axi4_awlen <= s_axi4_awlen;
-# l2_axi4_awid <= s_axi4_awid;
-# end
-# end
-# // signal that an l1_save_i can be accepted
-# always @(posedge axi4_aclk or negedge axi4_arstn) begin
-# if (axi4_arstn == 1'b0) begin
-# l2_available_q <= 1'b1;
-# end else if (l2_sent | l2_drop_i) begin
-# l2_available_q <= 1'b1;
-# end else if (l1_save) begin
-# l2_available_q <= 1'b0;
-# end
-# end
-# assign l2_sending_o = l2_accept_i & ~l2_available_q;
-# assign l2_sent = l2_sending_o & m_axi4_awvalid & m_axi4_awready;
-# // if 1: having sent out a transaction translated by L2
-# // 2: drop request (L2 slot is available again)
-# assign l2_done_o = l2_sent | l2_drop_i;
-# end else begin // !`ifdef ENABLE_L2TLB
-# assign m_axi4_awuser = s_axi4_awuser;
-# assign m_axi4_awcache = s_axi4_awcache;
-# assign m_axi4_awregion = s_axi4_awregion;
-# assign m_axi4_awqos = s_axi4_awqos;
-# assign m_axi4_awprot = s_axi4_awprot;
-# assign m_axi4_awlock = s_axi4_awlock;
-# assign m_axi4_awburst = s_axi4_awburst;
-# assign m_axi4_awsize = s_axi4_awsize;
-# assign m_axi4_awlen = s_axi4_awlen;
-# assign m_axi4_awaddr = l1_awaddr_i;
-# assign m_axi4_awid = s_axi4_awid;
-# assign l2_sending_o = 1'b0;
-# assign l2_available_q = 1'b0;
-# assign l2_done_o = 1'b0;
-# end // !`ifdef ENABLE_L2TLB
-# endgenerate
-# endmodule
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class axi4_b_buffer(Elaboratable):
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.s_axi4_bid = Signal(AXI_ID_WIDTH) # output
- self.s_axi4_bresp = Signal(2) # output
- self.s_axi4_bvalid = Signal() # output
- self.s_axi4_buser = Signal(AXI_USER_WIDTH) # output
- self.s_axi4_bready = Signal() # input
- self.m_axi4_bid = Signal(AXI_ID_WIDTH) # input
- self.m_axi4_bresp = Signal(2) # input
- self.m_axi4_bvalid = Signal() # input
- self.m_axi4_buser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_bready = Signal() # output
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.None.eq(self.m_axi4_bresp)
- m.d.comb += self.None.eq(self.m_axi4_bid)
- m.d.comb += self.None.eq(self.m_axi4_buser)
- m.d.comb += self.s_axi4_buser.eq(self.None)
- m.d.comb += self.s_axi4_bid.eq(self.None)
- m.d.comb += self.s_axi4_bresp.eq(self.None)
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# module axi4_b_buffer
-# #(
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-# output logic [AXI_ID_WIDTH-1:0] s_axi4_bid,
-# output logic [1:0] s_axi4_bresp,
-# output logic s_axi4_bvalid,
-# output logic [AXI_USER_WIDTH-1:0] s_axi4_buser,
-# input logic s_axi4_bready,
-# input logic [AXI_ID_WIDTH-1:0] m_axi4_bid,
-# input logic [1:0] m_axi4_bresp,
-# input logic m_axi4_bvalid,
-# input logic [AXI_USER_WIDTH-1:0] m_axi4_buser,
-# output logic m_axi4_bready
-# );
-# wire [AXI_ID_WIDTH+AXI_USER_WIDTH+1:0] data_in;
-# wire [AXI_ID_WIDTH+AXI_USER_WIDTH+1:0] data_out;
-# assign data_in [1:0] = m_axi4_bresp;
-# assign data_in [AXI_ID_WIDTH+1:2] = m_axi4_bid;
-# assign data_in[AXI_ID_WIDTH+AXI_USER_WIDTH+1:AXI_ID_WIDTH+2] = m_axi4_buser;
-# assign s_axi4_buser = data_out[AXI_ID_WIDTH+AXI_USER_WIDTH+1:AXI_ID_WIDTH+2];
-# assign s_axi4_bid = data_out[AXI_ID_WIDTH+1:2];
-# assign s_axi4_bresp = data_out[1:0];
-# axi_buffer_rab
-# #(
-# .BUFFER_DEPTH ( 4 )
-# )
-# u_buffer
-# (
-# .clk ( axi4_aclk ),
-# .rstn ( axi4_arstn ),
-# .valid_out( s_axi4_bvalid ),
-# .data_out ( data_out ),
-# .ready_in ( s_axi4_bready ),
-# .valid_in ( m_axi4_bvalid ),
-# .data_in ( data_in ),
-# .ready_out( m_axi4_bready )
-# );
-# endmodule
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class axi4_b_sender(Elaboratable):
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.drop_i = Signal() # input
- self.done_o = Signal() # output
- self.id_i = Signal(AXI_ID_WIDTH) # input
- self.prefetch_i = Signal() # input
- self.hit_i = Signal() # input
- self.s_axi4_bid = Signal(AXI_ID_WIDTH) # output
- self.s_axi4_bresp = Signal(2) # output
- self.s_axi4_bvalid = Signal() # output
- self.s_axi4_buser = Signal(AXI_USER_WIDTH) # output
- self.s_axi4_bready = Signal() # input
- self.m_axi4_bid = Signal(AXI_ID_WIDTH) # input
- self.m_axi4_bresp = Signal(2) # input
- self.m_axi4_bvalid = Signal() # input
- self.m_axi4_buser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_bready = Signal() # output
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.fifo_push.eq(self.None)
- m.d.comb += self.done_o.eq(self.fifo_push)
- m.d.comb += self.fifo_pop.eq(self.None)
- m.d.comb += self.s_axi4_buser.eq(self.None)
- m.d.comb += self.s_axi4_bid.eq(self.None)
- m.d.comb += self.s_axi4_bresp.eq(self.None)
- m.d.comb += self.s_axi4_bvalid.eq(self.None)
- m.d.comb += self.m_axi4_bready.eq(self.None)
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# module axi4_b_sender
-# #(
-# parameter AXI_ID_WIDTH = 10,
-# parameter AXI_USER_WIDTH = 4
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-# input logic drop_i,
-# output logic done_o,
-# input logic [AXI_ID_WIDTH-1:0] id_i,
-# input logic prefetch_i,
-# input logic hit_i,
-# output logic [AXI_ID_WIDTH-1:0] s_axi4_bid,
-# output logic [1:0] s_axi4_bresp,
-# output logic s_axi4_bvalid,
-# output logic [AXI_USER_WIDTH-1:0] s_axi4_buser,
-# input logic s_axi4_bready,
-# input logic [AXI_ID_WIDTH-1:0] m_axi4_bid,
-# input logic [1:0] m_axi4_bresp,
-# input logic m_axi4_bvalid,
-# input logic [AXI_USER_WIDTH-1:0] m_axi4_buser,
-# output logic m_axi4_bready
-# );
-# logic fifo_valid;
-# logic fifo_pop;
-# logic fifo_push;
-# logic fifo_ready;
-# logic [AXI_ID_WIDTH-1:0] id;
-# logic prefetch;
-# logic hit;
-# logic dropping;
-# axi_buffer_rab
-# #(
-# .BUFFER_DEPTH ( 4 )
-# )
-# u_fifo
-# (
-# .clk ( axi4_aclk ),
-# .rstn ( axi4_arstn ),
-# // Pop
-# .data_out ( {prefetch, hit, id} ),
-# .valid_out ( fifo_valid ),
-# .ready_in ( fifo_pop ),
-# // Push
-# .valid_in ( fifo_push ),
-# .data_in ( {prefetch_i, hit_i, id_i} ),
-# .ready_out ( fifo_ready )
-# );
-# assign fifo_push = drop_i & fifo_ready;
-# assign done_o = fifo_push;
-# assign fifo_pop = dropping & s_axi4_bready;
-# always @ (posedge axi4_aclk or negedge axi4_arstn) begin
-# if (axi4_arstn == 1'b0) begin
-# dropping <= 1'b0;
-# end else begin
-# if (fifo_valid && ~dropping)
-# dropping <= 1'b1;
-# else if (fifo_pop)
-# dropping <= 1'b0;
-# end
-# end
-# assign s_axi4_buser = dropping ? {AXI_USER_WIDTH{1'b0}} : m_axi4_buser;
-# assign s_axi4_bid = dropping ? id : m_axi4_bid;
-# assign s_axi4_bresp = (dropping & prefetch & hit) ? 2'b00 : // prefetch hit, mutli, prot
-# (dropping & prefetch ) ? 2'b10 : // prefetch miss
-# (dropping & hit) ? 2'b10 : // non-prefetch multi, prot
-# (dropping ) ? 2'b10 : // non-prefetch miss
-# m_axi4_bresp;
-# assign s_axi4_bvalid = dropping | m_axi4_bvalid;
-# assign m_axi4_bready = ~dropping & s_axi4_bready;
-# endmodule
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class axi4_r_buffer(Elaboratable):
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.s_axi4_rid = Signal(AXI_ID_WIDTH) # output
- self.s_axi4_rresp = Signal(2) # output
- self.s_axi4_rdata = Signal(AXI_DATA_WIDTH) # output
- self.s_axi4_rlast = Signal() # output
- self.s_axi4_rvalid = Signal() # output
- self.s_axi4_ruser = Signal(AXI_USER_WIDTH) # output
- self.s_axi4_rready = Signal() # input
- self.m_axi4_rid = Signal(AXI_ID_WIDTH) # input
- self.m_axi4_rresp = Signal(2) # input
- self.m_axi4_rdata = Signal(AXI_DATA_WIDTH) # input
- self.m_axi4_rlast = Signal() # input
- self.m_axi4_rvalid = Signal() # input
- self.m_axi4_ruser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_rready = Signal() # output
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.None.eq(self.m_axi4_rresp)
- m.d.comb += self.None.eq(self.m_axi4_rlast)
- m.d.comb += self.None.eq(self.m_axi4_rid)
- m.d.comb += self.None.eq(self.m_axi4_rdata)
- m.d.comb += self.None.eq(self.m_axi4_ruser)
- m.d.comb += self.s_axi4_rresp.eq(self.None)
- m.d.comb += self.s_axi4_rlast.eq(self.None)
- m.d.comb += self.s_axi4_rid.eq(self.None)
- m.d.comb += self.s_axi4_rdata.eq(self.None)
- m.d.comb += self.s_axi4_ruser.eq(self.None)
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# module axi4_r_buffer
-# #(
-# parameter AXI_DATA_WIDTH = 32,
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-# output logic [AXI_ID_WIDTH-1:0] s_axi4_rid,
-# output logic [1:0] s_axi4_rresp,
-# output logic [AXI_DATA_WIDTH-1:0] s_axi4_rdata,
-# output logic s_axi4_rlast,
-# output logic s_axi4_rvalid,
-# output logic [AXI_USER_WIDTH-1:0] s_axi4_ruser,
-# input logic s_axi4_rready,
-# input logic [AXI_ID_WIDTH-1:0] m_axi4_rid,
-# input logic [1:0] m_axi4_rresp,
-# input logic [AXI_DATA_WIDTH-1:0] m_axi4_rdata,
-# input logic m_axi4_rlast,
-# input logic m_axi4_rvalid,
-# input logic [AXI_USER_WIDTH-1:0] m_axi4_ruser,
-# output logic m_axi4_rready
-# );
-# wire [AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3-1:0] data_out;
-# localparam ID_START = 3;
-# localparam ID_END = AXI_ID_WIDTH-1 + ID_START;
-# localparam DATA_START = ID_END + 1;
-# localparam USER_START = DATA_END + 1;
-# assign data_in [1:0] = m_axi4_rresp;
-# assign data_in [2] = m_axi4_rlast;
-# assign data_in [ID_END:ID_START] = m_axi4_rid;
-# assign data_in[DATA_END:DATA_START] = m_axi4_rdata;
-# assign data_in[USER_END:USER_START] = m_axi4_ruser;
-# assign s_axi4_rresp = data_out [1:0];
-# assign s_axi4_rlast = data_out [2];
-# assign s_axi4_rid = data_out [ID_END:ID_START];
-# assign s_axi4_rdata = data_out[DATA_END:DATA_START];
-# assign s_axi4_ruser = data_out[USER_END:USER_START];
-# axi_buffer_rab
-# #(
-# .BUFFER_DEPTH ( 4 )
-# )
-# u_buffer
-# (
-# .clk ( axi4_aclk ),
-# .rstn ( axi4_arstn ),
-# // Pop
-# .valid_out ( s_axi4_rvalid ),
-# .data_out ( data_out ),
-# .ready_in ( s_axi4_rready ),
-# // Push
-# .valid_in ( m_axi4_rvalid ),
-# .data_in ( data_in ),
-# .ready_out ( m_axi4_rready )
-# );
-# endmodule
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class axi4_r_sender(Elaboratable):
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.drop_i = Signal() # input
- self.drop_len_i = Signal(8) # input
- self.done_o = Signal() # output
- self.id_i = Signal(AXI_ID_WIDTH) # input
- self.prefetch_i = Signal() # input
- self.hit_i = Signal() # input
- self.s_axi4_rid = Signal(AXI_ID_WIDTH) # output
- self.s_axi4_rresp = Signal(2) # output
- self.s_axi4_rdata = Signal(AXI_DATA_WIDTH) # output
- self.s_axi4_rlast = Signal() # output
- self.s_axi4_rvalid = Signal() # output
- self.s_axi4_ruser = Signal(AXI_USER_WIDTH) # output
- self.s_axi4_rready = Signal() # input
- self.m_axi4_rid = Signal(AXI_ID_WIDTH) # input
- self.m_axi4_rresp = Signal(2) # input
- self.m_axi4_rdata = Signal(AXI_DATA_WIDTH) # input
- self.m_axi4_rlast = Signal() # input
- self.m_axi4_rvalid = Signal() # input
- self.m_axi4_ruser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_rready = Signal() # output
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.fifo_push.eq(self.None)
- m.d.comb += self.done_o.eq(self.fifo_push)
- m.d.comb += self.s_axi4_rdata.eq(self.m_axi4_rdata)
- m.d.comb += self.s_axi4_ruser.eq(self.None)
- m.d.comb += self.s_axi4_rid.eq(self.None)
- m.d.comb += self.s_axi4_rresp.eq(self.None)
- m.d.comb += self.s_axi4_rvalid.eq(self.None)
- m.d.comb += self.m_axi4_rready.eq(self.None)
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# //import CfMath::log2;
-# module axi4_r_sender
-# #(
-# parameter AXI_DATA_WIDTH = 32,
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-# input logic drop_i,
-# input logic [7:0] drop_len_i,
-# output logic done_o,
-# input logic [AXI_ID_WIDTH-1:0] id_i,
-# input logic prefetch_i,
-# input logic hit_i,
-# output logic [AXI_ID_WIDTH-1:0] s_axi4_rid,
-# output logic [1:0] s_axi4_rresp,
-# output logic [AXI_DATA_WIDTH-1:0] s_axi4_rdata,
-# output logic s_axi4_rlast,
-# output logic s_axi4_rvalid,
-# output logic [AXI_USER_WIDTH-1:0] s_axi4_ruser,
-# input logic s_axi4_rready,
-# input logic [AXI_ID_WIDTH-1:0] m_axi4_rid,
-# input logic [1:0] m_axi4_rresp,
-# input logic [AXI_DATA_WIDTH-1:0] m_axi4_rdata,
-# input logic m_axi4_rlast,
-# input logic m_axi4_rvalid,
-# input logic [AXI_USER_WIDTH-1:0] m_axi4_ruser,
-# output logic m_axi4_rready
-# );
-# localparam BUFFER_DEPTH = 16;
-# logic fifo_valid;
-# logic fifo_pop;
-# logic fifo_push;
-# logic fifo_ready;
-# logic [AXI_ID_WIDTH-1:0] id;
-# logic [7:0] len;
-# logic prefetch;
-# logic hit;
-# logic dropping;
-# enum logic [1:0] { FORWARDING, DROPPING }
-# state_d, state_q;
-# logic burst_ongoing_d, burst_ongoing_q;
-# logic [7:0] drop_cnt_d, drop_cnt_q;
-# axi_buffer_rab
-# #(
-# )
-# u_fifo
-# (
-# .clk ( axi4_aclk ),
-# .rstn ( axi4_arstn ),
-# // Pop
-# .data_out ( {prefetch, hit, id, len} ),
-# .valid_out ( fifo_valid ),
-# .ready_in ( fifo_pop ),
-# // Push
-# .valid_in ( fifo_push ),
-# .data_in ( {prefetch_i, hit_i, id_i, drop_len_i} ),
-# .ready_out ( fifo_ready )
-# );
-# assign fifo_push = drop_i & fifo_ready;
-# assign done_o = fifo_push;
-# always_comb begin
-# burst_ongoing_d = burst_ongoing_q;
-# drop_cnt_d = drop_cnt_q;
-# dropping = 1'b0;
-# s_axi4_rlast = 1'b0;
-# fifo_pop = 1'b0;
-# state_d = state_q;
-# case (state_q)
-# FORWARDING: begin
-# s_axi4_rlast = m_axi4_rlast;
-# // Remember whether there is currently a burst ongoing.
-# if (m_axi4_rvalid && m_axi4_rready) begin
-# if (m_axi4_rlast) begin
-# burst_ongoing_d = 1'b0;
-# end else begin
-# burst_ongoing_d = 1'b1;
-# end
-# end
-# // If there is no burst ongoing and the FIFO has a drop request ready, process it.
-# if (!burst_ongoing_d && fifo_valid) begin
-# drop_cnt_d = len;
-# state_d = DROPPING;
-# end
-# end
-# DROPPING: begin
-# dropping = 1'b1;
-# s_axi4_rlast = (drop_cnt_q == '0);
-# // Handshake on slave interface
-# if (s_axi4_rready) begin
-# drop_cnt_d -= 1;
-# if (drop_cnt_q == '0) begin
-# drop_cnt_d = '0;
-# fifo_pop = 1'b1;
-# state_d = FORWARDING;
-# end
-# end
-# end
-# default: begin
-# state_d = FORWARDING;
-# end
-# endcase
-# end
-# assign s_axi4_rdata = m_axi4_rdata;
-# assign s_axi4_ruser = dropping ? {AXI_USER_WIDTH{1'b0}} : m_axi4_ruser;
-# assign s_axi4_rid = dropping ? id : m_axi4_rid;
-# assign s_axi4_rresp = (dropping & prefetch & hit) ? 2'b00 : // prefetch hit, mutli, prot
-# (dropping & prefetch ) ? 2'b10 : // prefetch miss
-# (dropping & hit) ? 2'b10 : // non-prefetch multi, prot
-# (dropping ) ? 2'b10 : // non-prefetch miss
-# m_axi4_rresp;
-# assign s_axi4_rvalid = dropping | m_axi4_rvalid;
-# assign m_axi4_rready = ~dropping & s_axi4_rready;
-# always_ff @(posedge axi4_aclk, negedge axi4_arstn) begin
-# if (axi4_arstn == 1'b0) begin
-# burst_ongoing_q <= 1'b0;
-# drop_cnt_q <= 'b0;
-# state_q <= FORWARDING;
-# end else begin
-# burst_ongoing_q <= burst_ongoing_d;
-# drop_cnt_q <= drop_cnt_d;
-# state_q <= state_d;
-# end
-# end
-# endmodule
+++ /dev/null
-# this file has been generated by sv2nmigen
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class axi4_w_buffer(Elaboratable):
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.l1_done_o = Signal() # output
- self.l1_accept_i = Signal() # input
- self.l1_save_i = Signal() # input
- self.l1_drop_i = Signal() # input
- self.l1_master_i = Signal() # input
- self.l1_id_i = Signal(AXI_ID_WIDTH) # input
- self.l1_len_i = Signal(8) # input
- self.l1_prefetch_i = Signal() # input
- self.l1_hit_i = Signal() # input
- self.l2_done_o = Signal() # output
- self.l2_accept_i = Signal() # input
- self.l2_drop_i = Signal() # input
- self.l2_master_i = Signal() # input
- self.l2_id_i = Signal(AXI_ID_WIDTH) # input
- self.l2_len_i = Signal(8) # input
- self.l2_prefetch_i = Signal() # input
- self.l2_hit_i = Signal() # input
- self.master_select_o = Signal() # output
- self.input_stall_o = Signal() # output
- self.output_stall_o = Signal() # output
- self.b_drop_o = Signal() # output
- self.b_done_i = Signal() # input
- self.id_o = Signal(AXI_ID_WIDTH) # output
- self.prefetch_o = Signal() # output
- self.hit_o = Signal() # output
- self.s_axi4_wdata = Signal(AXI_DATA_WIDTH) # input
- self.s_axi4_wvalid = Signal() # input
- self.s_axi4_wready = Signal() # output
- self.s_axi4_wstrb = Signal(1+ERROR p_expression_25) # input
- self.s_axi4_wlast = Signal() # input
- self.s_axi4_wuser = Signal(AXI_USER_WIDTH) # input
- self.m_axi4_wdata = Signal(AXI_DATA_WIDTH) # output
- self.m_axi4_wvalid = Signal() # output
- self.m_axi4_wready = Signal() # input
- self.m_axi4_wstrb = Signal(1+ERROR p_expression_25) # output
- self.m_axi4_wlast = Signal() # output
- self.m_axi4_wuser = Signal(AXI_USER_WIDTH) # output
- def elaborate(self, platform=None):
- m = Module()
- return m
-# //import CfMath::log2;
-# module axi4_w_buffer
-# #(
-# parameter AXI_DATA_WIDTH = 32,
-# parameter AXI_ID_WIDTH = 4,
-# parameter AXI_USER_WIDTH = 4,
-# parameter ENABLE_L2TLB = 0,
-# parameter HUM_BUFFER_DEPTH = 16
-# )
-# (
-# input logic axi4_aclk,
-# input logic axi4_arstn,
-# // L1 & L2 interfaces
-# output logic l1_done_o,
-# input logic l1_accept_i,
-# input logic l1_save_i,
-# input logic l1_drop_i,
-# input logic l1_master_i,
-# input logic [AXI_ID_WIDTH-1:0] l1_id_i,
-# input logic [7:0] l1_len_i,
-# input logic l1_prefetch_i,
-# input logic l1_hit_i,
-# output logic l2_done_o,
-# input logic l2_accept_i,
-# input logic l2_drop_i,
-# input logic l2_master_i,
-# input logic [AXI_ID_WIDTH-1:0] l2_id_i,
-# input logic [7:0] l2_len_i,
-# input logic l2_prefetch_i,
-# input logic l2_hit_i,
-# output logic master_select_o,
-# output logic input_stall_o,
-# output logic output_stall_o,
-# // B sender interface
-# output logic b_drop_o,
-# input logic b_done_i,
-# output logic [AXI_ID_WIDTH-1:0] id_o,
-# output logic prefetch_o,
-# output logic hit_o,
-# // AXI W channel interfaces
-# input logic [AXI_DATA_WIDTH-1:0] s_axi4_wdata,
-# input logic s_axi4_wvalid,
-# output logic s_axi4_wready,
-# input logic [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb,
-# input logic s_axi4_wlast,
-# input logic [AXI_USER_WIDTH-1:0] s_axi4_wuser,
-# output logic [AXI_DATA_WIDTH-1:0] m_axi4_wdata,
-# output logic m_axi4_wvalid,
-# input logic m_axi4_wready,
-# output logic [AXI_DATA_WIDTH/8-1:0] m_axi4_wstrb,
-# output logic m_axi4_wlast,
-# output logic [AXI_USER_WIDTH-1:0] m_axi4_wuser
-# );
- localparam INPUT_BUFFER_DEPTH = 4;
- localparam L1_FIFO_DEPTH = 8;
- localparam L2_FIFO_DEPTH = 4;
- logic [AXI_DATA_WIDTH-1:0] axi4_wdata;
- logic axi4_wvalid;
- logic axi4_wready;
- logic [AXI_DATA_WIDTH/8-1:0] axi4_wstrb;
- logic axi4_wlast;
- logic [AXI_USER_WIDTH-1:0] axi4_wuser;
- logic l1_fifo_valid_out;
- logic l1_fifo_ready_in;
- logic l1_fifo_valid_in;
- logic l1_fifo_ready_out;
- logic l1_req;
- logic l1_accept_cur, l1_save_cur, l1_drop_cur;
- logic l1_master_cur;
- logic [AXI_ID_WIDTH-1:0] l1_id_cur;
- logic [7:0] l1_len_cur;
- logic l1_hit_cur, l1_prefetch_cur;
- logic l1_save_in, l1_save_out;
- logic [log2(L1_FIFO_DEPTH)-1:0] n_l1_save_SP;
- logic l2_fifo_valid_out;
- logic l2_fifo_ready_in;
- logic l2_fifo_valid_in;
- logic l2_fifo_ready_out;
- logic l2_req;
- logic l2_accept_cur, l2_drop_cur;
- logic l2_master_cur;
- logic [AXI_ID_WIDTH-1:0] l2_id_cur;
- logic [7:0] l2_len_cur;
- logic l2_hit_cur, l2_prefetch_cur;
- logic fifo_select, fifo_select_SN, fifo_select_SP;
- logic w_done;
- logic b_drop_set;
- // HUM buffer signals
- logic hum_buf_ready_out;
- logic hum_buf_valid_in;
- logic hum_buf_ready_in;
- logic hum_buf_valid_out;
- logic hum_buf_underfull;
- logic [AXI_DATA_WIDTH-1:0] hum_buf_wdata;
- logic [AXI_DATA_WIDTH/8-1:0] hum_buf_wstrb;
- logic hum_buf_wlast;
- logic [AXI_USER_WIDTH-1:0] hum_buf_wuser;
- logic hum_buf_drop_req_SN, hum_buf_drop_req_SP;
- logic [7:0] hum_buf_drop_len_SN, hum_buf_drop_len_SP;
- logic hum_buf_almost_full;
- logic stop_store;
- logic wlast_in, wlast_out;
- logic signed [3:0] n_wlast_SN, n_wlast_SP;
- logic block_forwarding;
- // Search FSM
- typedef enum logic [3:0] {STORE, BYPASS,
- hum_buf_state_t;
- hum_buf_state_t hum_buf_SP; // Present state
- hum_buf_state_tbg hum_buf_SN; // Next State
- axi_buffer_rab
- #(
- )
- u_input_buf
- (
- .clk ( axi4_aclk ),
- .rstn ( axi4_arstn ),
- // Push
- .data_in ( {s_axi4_wuser, s_axi4_wstrb, s_axi4_wdata, s_axi4_wlast} ),
- .valid_in ( s_axi4_wvalid ),
- .ready_out ( s_axi4_wready ),
- // Pop
- .data_out ( {axi4_wuser, axi4_wstrb, axi4_wdata, axi4_wlast} ),
- .valid_out ( axi4_wvalid ),
- .ready_in ( axi4_wready )
- );
- axi_buffer_rab
- #(
- )
- u_l1_fifo
- (
- .clk ( axi4_aclk ),
- .rstn ( axi4_arstn ),
- // Push
- .data_in ( {l1_prefetch_i, l1_hit_i, l1_id_i, l1_len_i, l1_master_i, l1_accept_i, l1_save_i, l1_drop_i} ),
- .valid_in ( l1_fifo_valid_in ),
- .ready_out ( l1_fifo_ready_out ),
- // Pop
- .data_out ( {l1_prefetch_cur, l1_hit_cur, l1_id_cur, l1_len_cur, l1_master_cur, l1_accept_cur, l1_save_cur, l1_drop_cur} ),
- .valid_out ( l1_fifo_valid_out ),
- .ready_in ( l1_fifo_ready_in )
- );
- // Push upon receiving new requests from the TLB.
- assign l1_req = l1_accept_i | l1_save_i | l1_drop_i;
- assign l1_fifo_valid_in = l1_req & l1_fifo_ready_out;
- // Signal handshake
- assign l1_done_o = l1_fifo_valid_in;
- assign l2_done_o = l2_fifo_valid_in;
- // Stall AW input of L1 TLB
- assign input_stall_o = ~(l1_fifo_ready_out & l2_fifo_ready_out);
- // Interface b_drop signals + handshake
- always_comb begin
- if (fifo_select == 1'b0) begin
- prefetch_o = l1_prefetch_cur;
- hit_o = l1_hit_cur;
- id_o = l1_id_cur;
- l1_fifo_ready_in = w_done | b_done_i;
- l2_fifo_ready_in = 1'b0;
- end else begin
- prefetch_o = l2_prefetch_cur;
- hit_o = l2_hit_cur;
- id_o = l2_id_cur;
- l1_fifo_ready_in = 1'b0;
- l2_fifo_ready_in = w_done | b_done_i;
- end
- end
- // Detect when an L1 transaction save request enters or exits the L1 FIFO.
- assign l1_save_in = l1_fifo_valid_in & l1_save_i;
- assign l1_save_out = l1_fifo_ready_in & l1_save_cur;
- // Count the number of L1 transaction to save in the L1 FIFO.
- always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin
- if (axi4_arstn == 0) begin
- n_l1_save_SP <= '0;
- end else if (l1_save_in ^ l1_save_out) begin
- if (l1_save_in) begin
- n_l1_save_SP <= n_l1_save_SP + 1'b1;
- end else if (l1_save_out) begin
- n_l1_save_SP <= n_l1_save_SP - 1'b1;
- end
- end
- end
- // Stall forwarding of AW L1 hits if:
- // 1. The HUM buffer does not allow to be bypassed.
- // 2. There are multiple L1 save requests in the FIFO, i.e., multiple L2 outputs pending.
- assign output_stall_o = (n_l1_save_SP > 1) || (block_forwarding == 1'b1);
- generate
- if (ENABLE_L2TLB == 1) begin : HUM_BUFFER
- axi_buffer_rab_bram
- #(
- )
- u_hum_buf
- (
- .clk ( axi4_aclk ),
- .rstn ( axi4_arstn ),
- // Push
- .data_in ( {axi4_wuser, axi4_wstrb, axi4_wdata, axi4_wlast} ),
- .valid_in ( hum_buf_valid_in ),
- .ready_out ( hum_buf_ready_out ),
- // Pop
- .data_out ( {hum_buf_wuser, hum_buf_wstrb, hum_buf_wdata, hum_buf_wlast} ),
- .valid_out ( hum_buf_valid_out ),
- .ready_in ( hum_buf_ready_in ),
- // Clear
- .almost_full ( hum_buf_almost_full ),
- .underfull ( hum_buf_underfull ),
- .drop_req ( hum_buf_drop_req_SP ),
- .drop_len ( hum_buf_drop_len_SP )
- );
- axi_buffer_rab
- #(
- )
- u_l2_fifo
- (
- .clk ( axi4_aclk ),
- .rstn ( axi4_arstn ),
- // Push
- .data_in ( {l2_prefetch_i, l2_hit_i, l2_id_i, l2_len_i, l2_master_i, l2_accept_i, l2_drop_i} ),
- .valid_in ( l2_fifo_valid_in ),
- .ready_out ( l2_fifo_ready_out ),
- // Pop
- .data_out ( {l2_prefetch_cur, l2_hit_cur, l2_id_cur, l2_len_cur, l2_master_cur, l2_accept_cur, l2_drop_cur} ),
- .valid_out ( l2_fifo_valid_out ),
- .ready_in ( l2_fifo_ready_in )
- );
- // Push upon receiving new result from TLB.
- assign l2_req = l2_accept_i | l2_drop_i;
- assign l2_fifo_valid_in = l2_req & l2_fifo_ready_out;
- assign wlast_in = axi4_wlast & hum_buf_valid_in & hum_buf_ready_out;
- assign wlast_out = hum_buf_wlast & hum_buf_valid_out & hum_buf_ready_in;
- always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin
- if (axi4_arstn == 0) begin
- fifo_select_SP <= 1'b0;
- hum_buf_drop_len_SP <= 'b0;
- hum_buf_drop_req_SP <= 1'b0;
- hum_buf_SP <= STORE;
- n_wlast_SP <= 'b0;
- end else begin
- fifo_select_SP <= fifo_select_SN;
- hum_buf_drop_len_SP <= hum_buf_drop_len_SN;
- hum_buf_drop_req_SP <= hum_buf_drop_req_SN;
- hum_buf_SP <= hum_buf_SN;
- n_wlast_SP <= n_wlast_SN;
- end
- end
- always_comb begin
- n_wlast_SN = n_wlast_SP;
- if (hum_buf_drop_req_SP) begin // Happens exactly once per burst to be dropped.
- n_wlast_SN -= 1;
- end
- if (wlast_in) begin
- n_wlast_SN += 1;
- end
- if (wlast_out) begin
- n_wlast_SN -= 1;
- end
- end
- always_comb begin : HUM_BUFFER_FSM
- hum_buf_SN = hum_buf_SP;
- m_axi4_wlast = 1'b0;
- m_axi4_wdata = 'b0;
- m_axi4_wstrb = 'b0;
- m_axi4_wuser = 'b0;
- m_axi4_wvalid = 1'b0;
- axi4_wready = 1'b0;
- hum_buf_valid_in = 1'b0;
- hum_buf_ready_in = 1'b0;
- hum_buf_drop_req_SN = hum_buf_drop_req_SP;
- hum_buf_drop_len_SN = hum_buf_drop_len_SP;
- master_select_o = 1'b0;
- w_done = 1'b0; // read from FIFO without handshake with B sender
- b_drop_o = 1'b0; // send data from FIFO to B sender (with handshake)
- fifo_select = 1'b0;
- fifo_select_SN = fifo_select_SP;
- stop_store = 1'b0;
- block_forwarding = 1'b0;
- unique case (hum_buf_SP)
- STORE : begin
- // Simply store the data in the buffer.
- hum_buf_valid_in = axi4_wvalid & hum_buf_ready_out;
- axi4_wready = hum_buf_ready_out;
- // We have got a full burst in the HUM buffer, thus stop storing.
- if (wlast_in & !hum_buf_underfull | (n_wlast_SP > $signed(0))) begin
- hum_buf_SN = WAIT_L1_BYPASS_YES;
- // The buffer is full, thus wait for decision.
- end else if (~hum_buf_ready_out) begin
- hum_buf_SN = WAIT_L1_BYPASS_NO;
- end
- // Avoid the forwarding of L1 hits until we know whether we can bypass.
- if (l1_fifo_valid_out & l1_save_cur) begin
- block_forwarding = 1'b1;
- end
- end
- WAIT_L1_BYPASS_YES : begin
- // Wait for orders from L1 TLB.
- if (l1_fifo_valid_out) begin
- // L1 hit - forward data from buffer
- if (l1_accept_cur) begin
- m_axi4_wlast = hum_buf_wlast;
- m_axi4_wdata = hum_buf_wdata;
- m_axi4_wstrb = hum_buf_wstrb;
- m_axi4_wuser = hum_buf_wuser;
- m_axi4_wvalid = hum_buf_valid_out;
- hum_buf_ready_in = m_axi4_wready;
- master_select_o = l1_master_cur;
- // Detect last data beat.
- if (wlast_out) begin
- fifo_select = 1'b0;
- w_done = 1'b1;
- hum_buf_SN = STORE;
- end
- // L1 miss - wait for L2
- end else if (l1_save_cur) begin
- fifo_select = 1'b0;
- w_done = 1'b1;
- hum_buf_SN = WAIT_L2_BYPASS_YES;
- // L1 prefetch, prot, multi - drop data
- end else if (l1_drop_cur) begin
- fifo_select_SN = 1'b0; // L1
- hum_buf_drop_req_SN = 1'b1;
- hum_buf_drop_len_SN = l1_len_cur;
- hum_buf_SN = FLUSH;
- end
- end
- end
- WAIT_L2_BYPASS_YES : begin
- // Wait for orders from L2 TLB.
- if (l2_fifo_valid_out) begin
- // L2 hit - forward data from buffer
- if (l2_accept_cur) begin
- m_axi4_wlast = hum_buf_wlast;
- m_axi4_wdata = hum_buf_wdata;
- m_axi4_wstrb = hum_buf_wstrb;
- m_axi4_wuser = hum_buf_wuser;
- m_axi4_wvalid = hum_buf_valid_out;
- hum_buf_ready_in = m_axi4_wready;
- master_select_o = l2_master_cur;
- // Detect last data beat.
- if (wlast_out) begin
- fifo_select = 1'b1;
- w_done = 1'b1;
- hum_buf_SN = STORE;
- end
- // L2 miss/prefetch hit
- end else if (l2_drop_cur) begin
- fifo_select_SN = 1'b1; // L2
- hum_buf_drop_req_SN = 1'b1;
- hum_buf_drop_len_SN = l2_len_cur;
- hum_buf_SN = FLUSH;
- end
- // While we wait for orders from L2 TLB, we can still drop and accept L1 transactions.
- end else if (l1_fifo_valid_out) begin
- // L1 hit
- if (l1_accept_cur) begin
- hum_buf_SN = BYPASS;
- // L1 prefetch/prot/multi
- end else if (l1_drop_cur) begin
- hum_buf_SN = DISCARD;
- end
- end
- end
- FLUSH : begin
- // Clear HUM buffer flush request.
- hum_buf_drop_req_SN = 1'b0;
- // perform handshake with B sender
- fifo_select = fifo_select_SP;
- b_drop_o = 1'b1;
- if (b_done_i) begin
- hum_buf_SN = STORE;
- end
- end
- BYPASS : begin
- // Forward one full transaction from input buffer.
- m_axi4_wlast = axi4_wlast;
- m_axi4_wdata = axi4_wdata;
- m_axi4_wstrb = axi4_wstrb;
- m_axi4_wuser = axi4_wuser;
- m_axi4_wvalid = axi4_wvalid;
- axi4_wready = m_axi4_wready;
- master_select_o = l1_master_cur;
- // We have got a full transaction.
- if (axi4_wlast & axi4_wready & axi4_wvalid) begin
- fifo_select = 1'b0;
- w_done = 1'b1;
- hum_buf_SN = WAIT_L2_BYPASS_YES;
- end
- end
- DISCARD : begin
- // Discard one full transaction from input buffer.
- axi4_wready = 1'b1;
- // We have got a full transaction.
- if (axi4_wlast & axi4_wready & axi4_wvalid) begin
- // Try to perform handshake with B sender.
- fifo_select = 1'b0;
- b_drop_o = 1'b1;
- // We cannot wait here due to axi4_wready.
- if (b_done_i) begin
- hum_buf_SN = WAIT_L2_BYPASS_YES;
- end else begin
- hum_buf_SN = DISCARD_FINISH;
- end
- end
- end
- // Perform handshake with B sender.
- fifo_select = 1'b0;
- b_drop_o = 1'b1;
- if (b_done_i) begin
- hum_buf_SN = WAIT_L2_BYPASS_YES;
- end
- end
- WAIT_L1_BYPASS_NO : begin
- // Do not allow the forwarding of L1 hits.
- block_forwarding = 1'b1;
- // Wait for orders from L1 TLB.
- if (l1_fifo_valid_out) begin
- // L1 hit - forward data from/through HUM buffer and refill the buffer
- if (l1_accept_cur) begin
- // Forward data from HUM buffer.
- m_axi4_wlast = hum_buf_wlast;
- m_axi4_wdata = hum_buf_wdata;
- m_axi4_wstrb = hum_buf_wstrb;
- m_axi4_wuser = hum_buf_wuser;
- m_axi4_wvalid = hum_buf_valid_out;
- hum_buf_ready_in = m_axi4_wready;
- master_select_o = l1_master_cur;
- // Refill the HUM buffer. Stop when buffer full.
- stop_store = ~hum_buf_ready_out;
- hum_buf_valid_in = stop_store ? 1'b0 : axi4_wvalid ;
- axi4_wready = stop_store ? 1'b0 : hum_buf_ready_out;
- // Detect last data beat.
- if (wlast_out) begin
- fifo_select = 1'b0;
- w_done = 1'b1;
- if (~hum_buf_ready_out | hum_buf_almost_full) begin
- hum_buf_SN = WAIT_L1_BYPASS_NO;
- end else begin
- hum_buf_SN = STORE;
- end
- end
- // Allow the forwarding of L1 hits.
- block_forwarding = 1'b0;
- // L1 miss - wait for L2
- end else if (l1_save_cur) begin
- fifo_select = 1'b0;
- w_done = 1'b1;
- hum_buf_SN = WAIT_L2_BYPASS_NO;
- // L1 prefetch, prot, multi - drop data
- end else if (l1_drop_cur) begin
- fifo_select_SN = 1'b0; // L1
- hum_buf_drop_req_SN = 1'b1;
- hum_buf_drop_len_SN = l1_len_cur;
- hum_buf_SN = FLUSH;
- // Allow the forwarding of L1 hits.
- block_forwarding = 1'b0;
- end
- end
- end
- WAIT_L2_BYPASS_NO : begin
- // Do not allow the forwarding of L1 hits.
- block_forwarding = 1'b1;
- // Wait for orders from L2 TLB.
- if (l2_fifo_valid_out) begin
- // L2 hit - forward first part from HUM buffer, rest from input buffer
- if (l2_accept_cur) begin
- // Forward data from HUM buffer.
- m_axi4_wlast = hum_buf_wlast;
- m_axi4_wdata = hum_buf_wdata;
- m_axi4_wstrb = hum_buf_wstrb;
- m_axi4_wuser = hum_buf_wuser;
- m_axi4_wvalid = hum_buf_valid_out;
- hum_buf_ready_in = m_axi4_wready;
- master_select_o = l2_master_cur;
- // Refill the HUM buffer. Stop when buffer full.
- stop_store = ~hum_buf_ready_out;
- hum_buf_valid_in = stop_store ? 1'b0 : axi4_wvalid ;
- axi4_wready = stop_store ? 1'b0 : hum_buf_ready_out;
- // Detect last data beat.
- if (wlast_out) begin
- fifo_select = 1'b1;
- w_done = 1'b1;
- if (~hum_buf_ready_out | hum_buf_almost_full) begin
- hum_buf_SN = WAIT_L1_BYPASS_NO;
- end else begin
- hum_buf_SN = STORE;
- end
- end
- // Allow the forwarding of L1 hits.
- block_forwarding = 1'b0;
- // L2 miss/prefetch hit - drop data
- end else if (l2_drop_cur) begin
- fifo_select_SN = 1'b1; // L2
- hum_buf_drop_req_SN = 1'b1;
- hum_buf_drop_len_SN = l2_len_cur;
- hum_buf_SN = FLUSH;
- // Allow the forwarding of L1 hits.
- block_forwarding = 1'b0;
- end
- end
- end
- default: begin
- hum_buf_SN = STORE;
- end
- endcase // hum_buf_SP
- assign b_drop_set = 1'b0;
- end else begin // HUM_BUFFER
- // register to perform the handshake with B sender
- always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin
- if (axi4_arstn == 0) begin
- b_drop_o <= 1'b0;
- end else if (b_done_i) begin
- b_drop_o <= 1'b0;
- end else if (b_drop_set) begin
- b_drop_o <= 1'b1;;
- end
- end
- always_comb begin : OUTPUT_CTRL
- fifo_select = 1'b0;
- w_done = 1'b0;
- b_drop_set = 1'b0;
- m_axi4_wlast = 1'b0;
- m_axi4_wdata = 'b0;
- m_axi4_wstrb = 'b0;
- m_axi4_wuser = 'b0;
- m_axi4_wvalid = 1'b0;
- axi4_wready = 1'b0;
- if (l1_fifo_valid_out) begin
- // forward data
- if (l1_accept_cur) begin
- m_axi4_wlast = axi4_wlast;
- m_axi4_wdata = axi4_wdata;
- m_axi4_wstrb = axi4_wstrb;
- m_axi4_wuser = axi4_wuser;
- m_axi4_wvalid = axi4_wvalid;
- axi4_wready = m_axi4_wready;
- // Simply pop from FIFO upon last data beat.
- w_done = axi4_wlast & axi4_wvalid & axi4_wready;
- // discard entire burst
- end else if (b_drop_o == 1'b0) begin
- axi4_wready = 1'b1;
- // Simply pop from FIFO upon last data beat. Perform handshake with B sender.
- if (axi4_wlast & axi4_wvalid & axi4_wready)
- b_drop_set = 1'b1;
- end
- end
- end // OUTPUT_CTRL
- assign master_select_o = l1_master_cur;
- assign l2_fifo_ready_out = 1'b1;
- assign block_forwarding = 1'b0;
- // unused signals
- assign hum_buf_ready_out = 1'b0;
- assign hum_buf_valid_in = 1'b0;
- assign hum_buf_ready_in = 1'b0;
- assign hum_buf_valid_out = 1'b0;
- assign hum_buf_wdata = 'b0;
- assign hum_buf_wstrb = 'b0;
- assign hum_buf_wlast = 1'b0;
- assign hum_buf_wuser = 'b0;
- assign hum_buf_drop_len_SN = 'b0;
- assign hum_buf_drop_req_SN = 1'b0;
- assign hum_buf_almost_full = 1'b0;
- assign l2_fifo_valid_in = 1'b0;
- assign l2_fifo_valid_out = 1'b0;
- assign l2_prefetch_cur = 1'b0;
- assign l2_hit_cur = 1'b0;
- assign l2_id_cur = 'b0;
- assign l2_len_cur = 'b0;
- assign l2_master_cur = 1'b0;
- assign l2_accept_cur = 1'b0;
- assign l2_drop_cur = 1'b0;
- assign l2_req = 1'b0;
- assign fifo_select_SN = 1'b0;
- assign fifo_select_SP = 1'b0;
- assign stop_store = 1'b0;
- assign n_wlast_SP = 'b0;
- assign wlast_in = 1'b0;
- assign wlast_out = 1'b0;
- end // HUM_BUFFER
- endgenerate
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class axi4_w_sender(Elaboratable):
- def __init__(self):
- self.axi4_aclk = Signal() # input
- self.axi4_arstn = Signal() # input
- self.s_axi4_wdata = Signal() # input
- self.s_axi4_wvalid = Signal() # input
- self.s_axi4_wready = Signal() # output
- self.s_axi4_wstrb = Signal() # input
- self.s_axi4_wlast = Signal() # input
- self.s_axi4_wuser = Signal() # input
- self.m_axi4_wdata = Signal() # output
- self.m_axi4_wvalid = Signal() # output
- self.m_axi4_wready = Signal() # input
- self.m_axi4_wstrb = Signal() # output
- self.m_axi4_wlast = Signal() # output
- self.m_axi4_wuser = Signal() # output
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.m_axi4_wdata.eq(self.s_axi4_wdata)
- m.d.comb += self.m_axi4_wstrb.eq(self.s_axi4_wstrb)
- m.d.comb += self.m_axi4_wlast.eq(self.s_axi4_wlast)
- m.d.comb += self.m_axi4_wuser.eq(self.s_axi4_wuser)
- m.d.comb += self.m_axi4_wvalid.eq(self.s_axi4_wvalid)
- m.d.comb += self.s_axi4_wready.eq(self.m_axi4_wready)
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# module axi4_w_sender
-# #(
-# parameter AXI_DATA_WIDTH = 32,
-# parameter AXI_USER_WIDTH = 2
-# )
-# (
-# input axi4_aclk,
-# input axi4_arstn,
-# input [AXI_DATA_WIDTH-1:0] s_axi4_wdata,
-# input s_axi4_wvalid,
-# output s_axi4_wready,
-# input [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb,
-# input s_axi4_wlast,
-# input [AXI_USER_WIDTH-1:0] s_axi4_wuser,
-# output [AXI_DATA_WIDTH-1:0] m_axi4_wdata,
-# output m_axi4_wvalid,
-# input m_axi4_wready,
-# output [AXI_DATA_WIDTH/8-1:0] m_axi4_wstrb,
-# output m_axi4_wlast,
-# output [AXI_USER_WIDTH-1:0] m_axi4_wuser
-# );
-# assign m_axi4_wdata = s_axi4_wdata;
-# assign m_axi4_wstrb = s_axi4_wstrb;
-# assign m_axi4_wlast = s_axi4_wlast;
-# assign m_axi4_wuser = s_axi4_wuser;
-# assign m_axi4_wvalid = s_axi4_wvalid;
-# assign s_axi4_wready = m_axi4_wready;
-# endmodule
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class axi_buffer_rab(Elaboratable):
- def __init__(self):
- self.clk = Signal() # input
- self.rstn = Signal() # input
- self.data_out = Signal(DATA_WIDTH) # output
- self.valid_out = Signal() # output
- self.ready_in = Signal() # input
- self.valid_in = Signal() # input
- self.data_in = Signal(DATA_WIDTH) # input
- self.ready_out = Signal() # output
- def elaborate(self, platform=None):
- m = Module()
- m.d.comb += self.full.eq(self.None)
- m.d.comb += self.data_out.eq(self.None)
- m.d.comb += self.valid_out.eq(self.None)
- m.d.comb += self.ready_out.eq(self.None)
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# //import CfMath::log2;
-# module axi_buffer_rab
-# //#(
-# // parameter DATA_WIDTH,
-# // parameter BUFFER_DEPTH
-# //)
-# (
-# input logic clk,
-# input logic rstn,
-# // Downstream port
-# output logic [DATA_WIDTH-1:0] data_out,
-# output logic valid_out,
-# input logic ready_in,
-# // Upstream port
-# input logic valid_in,
-# input logic [DATA_WIDTH-1:0] data_in,
-# output logic ready_out
-# );
-# localparam integer LOG_BUFFER_DEPTH = log2(BUFFER_DEPTH);
-# // Internal data structures
-# reg [LOG_BUFFER_DEPTH - 1 : 0] pointer_in; // location to which we last wrote
-# reg [LOG_BUFFER_DEPTH - 1 : 0] pointer_out; // location from which we last sent
-# reg [LOG_BUFFER_DEPTH : 0] elements; // number of elements in the buffer
-# reg [DATA_WIDTH - 1 : 0] buffer [BUFFER_DEPTH - 1 : 0];
-# wire full;
-# integer loop1;
-# assign full = (elements == BUFFER_DEPTH);
-# always @(posedge clk or negedge rstn)
-# begin: elements_sequential
-# if (rstn == 1'b0)
-# elements <= 0;
-# else
-# begin
-# // ------------------
-# // Are we filling up?
-# // ------------------
-# // One out, none in
-# if (ready_in && valid_out && (!valid_in || full))
-# elements <= elements - 1;
-# // None out, one in
-# else if ((!valid_out || !ready_in) && valid_in && !full)
-# elements <= elements + 1;
-# // Else, either one out and one in, or none out and none in - stays unchanged
-# end
-# end
-# always @(posedge clk or negedge rstn)
-# begin: buffers_sequential
-# if (rstn == 1'b0)
-# begin
-# for (loop1 = 0 ; loop1 < BUFFER_DEPTH ; loop1 = loop1 + 1)
-# buffer[loop1] <= 0;
-# end
-# else
-# begin
-# // Update the memory
-# if (valid_in && !full)
-# buffer[pointer_in] <= data_in;
-# end
-# end
-# always @(posedge clk or negedge rstn)
-# begin: sequential
-# if (rstn == 1'b0)
-# begin
-# pointer_out <= 0;
-# pointer_in <= 0;
-# end
-# else
-# begin
-# // ------------------------------------
-# // Check what to do with the input side
-# // ------------------------------------
-# // We have some input, increase by 1 the input pointer
-# if (valid_in && !full)
-# begin
-# if (pointer_in == $unsigned(BUFFER_DEPTH - 1))
-# pointer_in <= 0;
-# else
-# pointer_in <= pointer_in + 1;
-# end
-# // Else we don't have any input, the input pointer stays the same
-# // -------------------------------------
-# // Check what to do with the output side
-# // -------------------------------------
-# // We had pushed one flit out, we can try to go for the next one
-# if (ready_in && valid_out)
-# begin
-# if (pointer_out == $unsigned(BUFFER_DEPTH - 1))
-# pointer_out <= 0;
-# else
-# pointer_out <= pointer_out + 1;
-# end
-# // Else stay on the same output location
-# end
-# end
-# // Update output ports
-# assign data_out = buffer[pointer_out];
-# assign valid_out = (elements != 0);
-# assign ready_out = ~full;
-# endmodule
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class axi_buffer_rab_bram(Elaboratable):
- def __init__(self):
- self.clk = Signal() # input
- self.rstn = Signal() # input
- self.data_out = Signal(DATA_WIDTH) # output
- self.valid_out = Signal() # output
- self.ready_in = Signal() # input
- self.valid_in = Signal() # input
- self.data_in = Signal(DATA_WIDTH) # input
- self.ready_out = Signal() # output
- self.almost_full = Signal() # output
- self.underfull = Signal() # output
- self.drop_req = Signal() # input
- self.drop_len = Signal(8) # input
- def elaborate(self, platform=None):
- m = Module()
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# ////import CfMath::log2;
-# module axi_buffer_rab_bram
-# //#(
-# // parameter DATA_WIDTH,
-# // parameter BUFFER_DEPTH
-# // )
-# (
-# input logic clk,
-# input logic rstn,
-# // Downstream port
-# output logic [DATA_WIDTH-1:0] data_out,
-# output logic valid_out,
-# input logic ready_in,
-# // Upstream port
-# input logic valid_in,
-# input logic [DATA_WIDTH-1:0] data_in,
-# output logic ready_out,
-# // Status and drop control
-# output logic almost_full,
-# output logic underfull,
-# input logic drop_req,
-# // Number of items to drop. As for AXI lengths, counting starts at zero, i.e., `drop_len == 0`
-# // and `drop_req` means drop one item.
-# input logic [7:0] drop_len
-# );
-""" #docstring_begin
- // The BRAM needs to be in "write-first" mode for first-word fall-through FIFO behavior.
- // To still push and pop simultaneously if the buffer is full, we internally increase the
- // buffer depth by 1.
- /**
- * Internal data structures
- */
- // Location to which we last wrote
- logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_in_d, ptr_in_q;
- // Location from which we last sent
- logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_out_d, ptr_out_q;
- // Required for fall-through behavior on the first word
- logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_out_bram;
- // Number of elements in the buffer. Can be negative if elements that have been dropped have not
- // yet been written.
- logic signed [ACT_LOG_BUFFER_DEPTH:0] n_elems_d, n_elems_q;
- logic [DATA_WIDTH-1:0] data_out_bram, data_out_q;
- logic valid_out_q;
- logic full;
- assign almost_full = (n_elems_q == BUFFER_DEPTH-1);
- assign full = (n_elems_q == BUFFER_DEPTH);
- always_ff @(posedge clk, negedge rstn) begin
- if (~rstn) begin
- n_elems_q <= '0;
- ptr_in_q <= '0;
- ptr_out_q <= '0;
- end else begin
- n_elems_q <= n_elems_d;
- ptr_in_q <= ptr_in_d;
- ptr_out_q <= ptr_out_d;
- end
- end
- // Update the number of elements.
- always_comb begin
- n_elems_d = n_elems_q;
- if (drop_req) begin
- n_elems_d -= (drop_len + 1);
- end
- if (valid_in && ready_out) begin
- n_elems_d += 1;
- end
- if (valid_out && ready_in) begin
- n_elems_d -= 1;
- end
- end
- // Update the output pointer.
- always_comb begin
- ptr_out_d = ptr_out_q;
- if (drop_req) begin
- if ((ptr_out_q + drop_len + 1) > (ACT_BUFFER_DEPTH - 1)) begin
- ptr_out_d = drop_len + 1 - (ACT_BUFFER_DEPTH - ptr_out_q);
- end else begin
- ptr_out_d += (drop_len + 1);
- end
- end
- if (valid_out && ready_in) begin
- if (ptr_out_d == (ACT_BUFFER_DEPTH - 1)) begin
- ptr_out_d = '0;
- end else begin
- ptr_out_d += 1;
- end
- end
- end
- // The BRAM has a read latency of one cycle, so apply the new address one cycle earlier for
- // first-word fall-through FIFO behavior.
- //assign ptr_out_bram = (ptr_out_q == (ACT_BUFFER_DEPTH-1)) ? '0 : (ptr_out_q + 1);
- assign ptr_out_bram = ptr_out_d;
- // Update the input pointer.
- always_comb begin
- ptr_in_d = ptr_in_q;
- if (valid_in && ready_out) begin
- if (ptr_in_d == (ACT_BUFFER_DEPTH - 1)) begin
- ptr_in_d = '0;
- end else begin
- ptr_in_d += 1;
- end
- end
- end
- // Update output ports.
- assign valid_out = (n_elems_q > $signed(0));
- assign underfull = (n_elems_q < $signed(0));
- assign ready_out = ~full;
- ram_tp_write_first #(
- )
- ram_tp_write_first_0
- (
- .clk ( clk ),
- .we ( valid_in & ~full ),
- .addr0 ( ptr_in_q ),
- .addr1 ( ptr_out_bram ),
- .d_i ( data_in ),
- .d0_o ( ),
- .d1_o ( data_out_bram )
- );
- // When reading from/writing two the same address on both ports ("Write-Read Collision"),
- // the data on the read port is invalid (during the write cycle). In this implementation,
- // this can happen only when the buffer is empty. Thus, we forward the data from an
- // register in this case.
- always @(posedge clk) begin
- if (rstn == 1'b0) begin
- data_out_q <= 'b0;
- end else if ( (ptr_out_bram == ptr_in_q) && (valid_in && !full) ) begin
- data_out_q <= data_in;
- end
- end
- always @(posedge clk) begin
- if (rstn == 1'b0) begin
- valid_out_q <= 'b0;
- end else begin
- valid_out_q <= valid_out;
- end
- end
- // Drive output data
- always_comb begin
- if (valid_out && !valid_out_q) begin // We have just written to an empty FIFO
- data_out = data_out_q;
- end else begin
- data_out = data_out_bram;
- end
- end
-# endmodule
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class axi_rab_cfg(Elaboratable):
- def __init__(self):
- self.Clk_CI = Signal() # input
- self.Rst_RBI = Signal() # input
- self.s_axi_awaddr = Signal(AXI_ADDR_WIDTH) # input
- self.s_axi_awvalid = Signal() # input
- self.s_axi_awready = Signal() # output
- self.s_axi_wdata = Signal() # input
- self.s_axi_wstrb = Signal(1+ERROR p_expression_25) # input
- self.s_axi_wvalid = Signal() # input
- self.s_axi_wready = Signal() # output
- self.s_axi_bresp = Signal(2) # output
- self.s_axi_bvalid = Signal() # output
- self.s_axi_bready = Signal() # input
- self.s_axi_araddr = Signal(AXI_ADDR_WIDTH) # input
- self.s_axi_arvalid = Signal() # input
- self.s_axi_arready = Signal() # output
- self.s_axi_rdata = Signal(AXI_DATA_WIDTH) # output
- self.s_axi_rresp = Signal(2) # output
- self.s_axi_rvalid = Signal() # output
- self.s_axi_rready = Signal() # input
- self.L1Cfg_DO = Signal() # output
- self.L1AllowMultiHit_SO = Signal() # output
- self.MissAddr_DI = Signal(ADDR_WIDTH_VIRT) # input
- self.MissMeta_DI = Signal(MISS_META_WIDTH) # input
- self.Miss_SI = Signal() # input
- self.MhFifoFull_SO = Signal() # output
- self.wdata_l2 = Signal() # output
- self.waddr_l2 = Signal() # output
- self.wren_l2 = Signal(N_PORTS) # output
- def elaborate(self, platform=None):
- m = Module()
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# // --=========================================================================--
-# //
-# // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ██████╗███████╗ ██████╗
-# // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔════╝██╔════╝
-# // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ █████╗ ██║ ███╗
-# // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██╔══╝ ██║ ██║
-# // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ╚██████╗██║ ╚██████╔╝
-# // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝╚═╝ ╚═════╝
-# //
-# //
-# // Author: Pirmin Vogel - vogelpi@iis.ee.ethz.ch
-# //
-# // Purpose : AXI4-Lite configuration and miss handling interface for RAB
-# //
-# // --=========================================================================--
-# //import CfMath::log2;
-# module axi_rab_cfg
-# #(
-# parameter N_PORTS = 3,
-# parameter N_REGS = 196,
-# parameter N_L2_SETS = 32,
-# parameter N_L2_SET_ENTRIES= 32,
-# parameter ADDR_WIDTH_PHYS = 40,
-# parameter ADDR_WIDTH_VIRT = 32,
-# parameter N_FLAGS = 4,
-# parameter AXI_DATA_WIDTH = 64,
-# parameter AXI_ADDR_WIDTH = 32,
-# parameter MISS_META_WIDTH = 10, // <= FIFO_WIDTH
-# parameter MH_FIFO_DEPTH = 16
-# )
-# (
-# input logic Clk_CI,
-# input logic Rst_RBI,
-# // AXI Lite interface
-# input logic [AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
-# input logic s_axi_awvalid,
-# output logic s_axi_awready,
-# input logic [AXI_DATA_WIDTH/8-1:0][7:0] s_axi_wdata,
-# input logic [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb,
-# input logic s_axi_wvalid,
-# output logic s_axi_wready,
-# output logic [1:0] s_axi_bresp,
-# output logic s_axi_bvalid,
-# input logic s_axi_bready,
-# input logic [AXI_ADDR_WIDTH-1:0] s_axi_araddr,
-# input logic s_axi_arvalid,
-# output logic s_axi_arready,
-# output logic [AXI_DATA_WIDTH-1:0] s_axi_rdata,
-# output logic [1:0] s_axi_rresp,
-# output logic s_axi_rvalid,
-# input logic s_axi_rready,
-# // Slice configuration
-# output logic [N_REGS-1:0][63:0] L1Cfg_DO,
-# output logic L1AllowMultiHit_SO,
-# // Miss handling
-# input logic [ADDR_WIDTH_VIRT-1:0] MissAddr_DI,
-# input logic [MISS_META_WIDTH-1:0] MissMeta_DI,
-# input logic Miss_SI,
-# output logic MhFifoFull_SO,
-# // L2 TLB
-# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] wdata_l2,
-# output logic [N_PORTS-1:0] [AXI_ADDR_WIDTH-1:0] waddr_l2,
-# output logic [N_PORTS-1:0] wren_l2
-# );
-""" #docstring_begin
- localparam ADDR_LSB = log2(64/8); // 64 even if the AXI Lite interface is 32,
- // because RAB slices are 64 bit wide.
- localparam ADDR_MSB = log2(N_REGS)+ADDR_LSB-1;
- localparam L2SINGLE_AMAP_SIZE = 16'h4000; // Maximum 2048 TLB entries in L2
- localparam integer N_L2_ENTRIES = N_L2_SETS * N_L2_SET_ENTRIES;
- localparam logic [AXI_ADDR_WIDTH-1:0] L2_VA_MAX_ADDR = (N_L2_ENTRIES-1) << 2;
- logic [AXI_DATA_WIDTH/8-1:0][7:0] L1Cfg_DP[N_REGS]; // [Byte][Bit]
- genvar j;
- // █████╗ ██╗ ██╗██╗██╗ ██╗ ██╗ ██╗████████╗███████╗
- // ██╔══██╗╚██╗██╔╝██║██║ ██║ ██║ ██║╚══██╔══╝██╔════╝
- // ███████║ ╚███╔╝ ██║███████║█████╗██║ ██║ ██║ █████╗
- // ██╔══██║ ██╔██╗ ██║╚════██║╚════╝██║ ██║ ██║ ██╔══╝
- // ██║ ██║██╔╝ ██╗██║ ██║ ███████╗██║ ██║ ███████╗
- // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚═╝ ╚══════╝
- //
- logic [AXI_ADDR_WIDTH-1:0] awaddr_reg;
- logic awaddr_done_rise;
- logic awaddr_done_reg;
- logic awaddr_done_reg_dly;
- logic [AXI_DATA_WIDTH/8-1:0][7:0] wdata_reg;
- logic [AXI_DATA_WIDTH/8-1:0] wstrb_reg;
- logic wdata_done_rise;
- logic wdata_done_reg;
- logic wdata_done_reg_dly;
- logic wresp_done_reg;
- logic wresp_running_reg;
- logic [AXI_ADDR_WIDTH-1:0] araddr_reg;
- logic araddr_done_reg;
- logic [AXI_DATA_WIDTH-1:0] rdata_reg;
- logic rresp_done_reg;
- logic rresp_running_reg;
- logic awready;
- logic wready;
- logic bvalid;
- logic arready;
- logic rvalid;
- logic wren;
- logic wren_l1;
- assign wren = ( wdata_done_rise & awaddr_done_reg ) | ( awaddr_done_rise & wdata_done_reg );
- assign wdata_done_rise = wdata_done_reg & ~wdata_done_reg_dly;
- assign awaddr_done_rise = awaddr_done_reg & ~awaddr_done_reg_dly;
- // reg_dly
- always @(posedge Clk_CI or negedge Rst_RBI)
- begin
- if (!Rst_RBI)
- begin
- wdata_done_reg_dly <= 1'b0;
- awaddr_done_reg_dly <= 1'b0;
- end
- else
- begin
- wdata_done_reg_dly <= wdata_done_reg;
- awaddr_done_reg_dly <= awaddr_done_reg;
- end
- end
- // AW Channel
- always @(posedge Clk_CI or negedge Rst_RBI)
- begin
- if (!Rst_RBI)
- begin
- awaddr_done_reg <= 1'b0;
- awaddr_reg <= '0;
- awready <= 1'b1;
- end
- else
- begin
- if (awready && s_axi_awvalid)
- begin
- awready <= 1'b0;
- awaddr_done_reg <= 1'b1;
- awaddr_reg <= s_axi_awaddr;
- end
- else if (awaddr_done_reg && wresp_done_reg)
- begin
- awready <= 1'b1;
- awaddr_done_reg <= 1'b0;
- end
- end
- end
- // W Channel
- always @(posedge Clk_CI or negedge Rst_RBI)
- begin
- if (!Rst_RBI)
- begin
- wdata_done_reg <= 1'b0;
- wready <= 1'b1;
- wdata_reg <= '0;
- wstrb_reg <= '0;
- end
- else
- begin
- if (wready && s_axi_wvalid)
- begin
- wready <= 1'b0;
- wdata_done_reg <= 1'b1;
- wdata_reg <= s_axi_wdata;
- wstrb_reg <= s_axi_wstrb;
- end
- else if (wdata_done_reg && wresp_done_reg)
- begin
- wready <= 1'b1;
- wdata_done_reg <= 1'b0;
- end
- end
- end
- // B Channel
- always @(posedge Clk_CI or negedge Rst_RBI)
- begin
- if (!Rst_RBI)
- begin
- bvalid <= 1'b0;
- wresp_done_reg <= 1'b0;
- wresp_running_reg <= 1'b0;
- end
- else
- begin
- if (awaddr_done_reg && wdata_done_reg && !wresp_done_reg)
- begin
- if (!wresp_running_reg)
- begin
- bvalid <= 1'b1;
- wresp_running_reg <= 1'b1;
- end
- else if (s_axi_bready)
- begin
- bvalid <= 1'b0;
- wresp_done_reg <= 1'b1;
- wresp_running_reg <= 1'b0;
- end
- end
- else
- begin
- bvalid <= 1'b0;
- wresp_done_reg <= 1'b0;
- wresp_running_reg <= 1'b0;
- end
- end
- end
- // AR Channel
- always @(posedge Clk_CI or negedge Rst_RBI)
- begin
- if (!Rst_RBI)
- begin
- araddr_done_reg <= 1'b0;
- arready <= 1'b1;
- araddr_reg <= '0;
- end
- else
- begin
- if (arready && s_axi_arvalid)
- begin
- arready <= 1'b0;
- araddr_done_reg <= 1'b1;
- araddr_reg <= s_axi_araddr;
- end
- else if (araddr_done_reg && rresp_done_reg)
- begin
- arready <= 1'b1;
- araddr_done_reg <= 1'b0;
- end
- end
- end
- // R Channel
- always @(posedge Clk_CI or negedge Rst_RBI)
- begin
- if (!Rst_RBI)
- begin
- rresp_done_reg <= 1'b0;
- rvalid <= 1'b0;
- rresp_running_reg <= 1'b0;
- end
- else
- begin
- if (araddr_done_reg && !rresp_done_reg)
- begin
- if (!rresp_running_reg)
- begin
- rvalid <= 1'b1;
- rresp_running_reg <= 1'b1;
- end
- else if (s_axi_rready)
- begin
- rvalid <= 1'b0;
- rresp_done_reg <= 1'b1;
- rresp_running_reg <= 1'b0;
- end
- end
- else
- begin
- rvalid <= 1'b0;
- rresp_done_reg <= 1'b0;
- rresp_running_reg <= 1'b0;
- end
- end
- end
- // ██╗ ██╗ ██████╗███████╗ ██████╗ ██████╗ ███████╗ ██████╗
- // ██║ ███║ ██╔════╝██╔════╝██╔════╝ ██╔══██╗██╔════╝██╔════╝
- // ██║ ╚██║ ██║ █████╗ ██║ ███╗ ██████╔╝█████╗ ██║ ███╗
- // ██║ ██║ ██║ ██╔══╝ ██║ ██║ ██╔══██╗██╔══╝ ██║ ██║
- // ███████╗██║ ╚██████╗██║ ╚██████╔╝ ██║ ██║███████╗╚██████╔╝
- // ╚══════╝╚═╝ ╚═════╝╚═╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝ ╚═════╝
- //
- assign wren_l1 = wren && (awaddr_reg < L2SINGLE_AMAP_SIZE);
- always @( posedge Clk_CI or negedge Rst_RBI )
- begin
- var integer idx_reg, idx_byte;
- if ( Rst_RBI == 1'b0 )
- begin
- for ( idx_reg = 0; idx_reg < N_REGS; idx_reg++ )
- L1Cfg_DP[idx_reg] <= '0;
- end
- else if ( wren_l1 )
- begin
- if ( awaddr_reg[ADDR_LSB+1] == 1'b0 ) begin // VIRT_ADDR
- for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin
- if ( (idx_byte < ADDR_WIDTH_VIRT/8) ) begin
- if ( wstrb_reg[idx_byte] ) begin
- L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte];
- end
- end
- else begin // Let synthesizer optimize away unused registers.
- L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0;
- end
- end
- end
- else if ( awaddr_reg[ADDR_LSB+1:ADDR_LSB] == 2'b10 ) begin // PHYS_ADDR
- for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin
- if ( (idx_byte < ADDR_WIDTH_PHYS/8) ) begin
- if ( wstrb_reg[idx_byte] ) begin
- L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte];
- end
- end
- else begin // Let synthesizer optimize away unused registers.
- L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0;
- end
- end
- end
- else begin // ( awaddr_reg[ADDR_LSB+1:ADDR_LSB] == 2'b11 ) // FLAGS
- for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin
- if ( (idx_byte < 1) ) begin
- if ( wstrb_reg[idx_byte] ) begin
- L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte] & { {{8-N_FLAGS}{1'b0}}, {{N_FLAGS}{1'b1}} };
- end
- end
- else begin // Let synthesizer optimize away unused registers.
- L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0;
- end
- end
- end
- end
- end // always @ ( posedge Clk_CI or negedge Rst_RBI )
- generate
- // Mask unused bits -> Synthesizer should optimize away unused registers
- for( j=0; j<N_REGS; j++ ) begin
- if ( j[1] == 1'b0 ) // VIRT_ADDR
- assign L1Cfg_DO[j] = { {{64-ADDR_WIDTH_VIRT}{1'b0}},{ADDR_WIDTH_VIRT{1'b1}} } & L1Cfg_DP[j];
- else if ( j[1:0] == 2'b10 ) // PHYS_ADDR
- assign L1Cfg_DO[j] = { {{64-ADDR_WIDTH_PHYS}{1'b0}},{ADDR_WIDTH_PHYS{1'b1}} } & L1Cfg_DP[j];
- else // if ( j[1:0] == 2'b11 ) // FLAGS
- assign L1Cfg_DO[j] = { {{64-N_FLAGS}{1'b0}},{N_FLAGS{1'b1}} } & L1Cfg_DP[j];
- end
- endgenerate
- always_comb
- begin
- if ( araddr_reg[ADDR_LSB-1] == 1'b1 ) // read upper 32 bit, for debugging over 32-bit interface
- rdata_reg = { {32'h00000000},{L1Cfg_DO[araddr_reg[ADDR_MSB:ADDR_LSB]][63:32]} };
- else
- rdata_reg = L1Cfg_DO[araddr_reg[ADDR_MSB:ADDR_LSB]];
- end
- assign s_axi_awready = awready;
- assign s_axi_wready = wready;
- assign s_axi_bresp = 2'b00;
- assign s_axi_bvalid = bvalid;
- assign s_axi_arready = arready;
- assign s_axi_rresp = 2'b00;
- assign s_axi_rvalid = rvalid;
- // ██╗ ██████╗ ██████╗███████╗ ██████╗
- // ██║ ╚════██╗ ██╔════╝██╔════╝██╔════╝
- // ██║ █████╔╝ ██║ █████╗ ██║ ███╗
- // ██║ ██╔═══╝ ██║ ██╔══╝ ██║ ██║
- // ███████╗███████╗ ╚██████╗██║ ╚██████╔╝
- // ╚══════╝╚══════╝ ╚═════╝╚═╝ ╚═════╝
- //
- logic [N_PORTS-1:0] l2_addr_is_in_va_rams;
- logic [N_PORTS-1:0] upper_word_is_written;
- logic [N_PORTS-1:0] lower_word_is_written;
- generate
- for( j=0; j< N_PORTS; j++)
- begin
- if (AXI_DATA_WIDTH == 64) begin
- assign l2_addr_is_in_va_rams[j] = (awaddr_reg >= (j+1)*L2SINGLE_AMAP_SIZE) && (awaddr_reg[log2(L2SINGLE_AMAP_SIZE)-1:0] <= L2_VA_MAX_ADDR);
- assign upper_word_is_written[j] = (wstrb_reg[7:4] != 4'b0000);
- assign lower_word_is_written[j] = (wstrb_reg[3:0] != 4'b0000);
- end else begin
- assign l2_addr_is_in_va_rams[j] = 1'b0;
- assign upper_word_is_written[j] = 1'b0;
- assign lower_word_is_written[j] = 1'b0;
- end
- always @( posedge Clk_CI or negedge Rst_RBI ) begin
- var integer idx_byte, off_byte;
- if ( Rst_RBI == 1'b0 )
- begin
- wren_l2[j] <= 1'b0;
- wdata_l2[j] <= '0;
- end
- else if (wren)
- begin
- if ( (awaddr_reg >= (j+1)*L2SINGLE_AMAP_SIZE) && (awaddr_reg < (j+2)*L2SINGLE_AMAP_SIZE) && (|wstrb_reg) )
- wren_l2[j] <= 1'b1;
- if (AXI_DATA_WIDTH == 32) begin
- for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ )
- wdata_l2[j][idx_byte*8 +: 8] <= wdata_reg[idx_byte] & {8{wstrb_reg[idx_byte]}};
- end
- else if (AXI_DATA_WIDTH == 64) begin
- if (lower_word_is_written[j] == 1'b1)
- off_byte = 0;
- else
- off_byte = 4;
- // always put the payload in the lower word and set upper word to 0
- for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8/2; idx_byte++ )
- wdata_l2[j][idx_byte*8 +: 8] <= wdata_reg[idx_byte+off_byte] & {8{wstrb_reg[idx_byte+off_byte]}};
- wdata_l2[j][AXI_DATA_WIDTH-1:AXI_DATA_WIDTH/2] <= 'b0;
- end
- // pragma translate_off
- else
- $fatal(1, "Unsupported AXI_DATA_WIDTH!");
- // pragma translate_on
- end
- else
- wren_l2[j] <= '0;
- end // always @ ( posedge Clk_CI or negedge Rst_RBI )
- // Properly align the 32-bit word address when writing from 64-bit interface:
- // Depending on the system, the incoming address is (non-)aligned to the 64-bit
- // word when writing the upper 32-bit word.
- always_comb begin
- waddr_l2[j] = (awaddr_reg -(j+1)*L2SINGLE_AMAP_SIZE)/4;
- if (wren_l2[j]) begin
- if (AXI_DATA_WIDTH == 64) begin
- if (upper_word_is_written[j] == 1'b1) begin
- // address must be non-aligned
- waddr_l2[j][0] = 1'b1;
- end
- end
- // pragma translate_off
- else if (AXI_DATA_WIDTH != 32) begin
- $fatal(1, "Unsupported AXI_DATA_WIDTH!");
- end
- // pragma translate_on
- end
- end
- // Assert that only one 32-bit word is ever written at a time to VA RAMs on 64-bit data
- // systems.
- // pragma translate_off
- always_ff @ (posedge Clk_CI) begin
- if (AXI_DATA_WIDTH == 64) begin
- if (l2_addr_is_in_va_rams[j]) begin
- if (upper_word_is_written[j]) begin
- assert (!lower_word_is_written[j])
- else $error("Unsupported write across two 32-bit words to VA RAMs!");
- end
- else if (lower_word_is_written[j]) begin
- assert (!upper_word_is_written[j])
- else $error("Unsupported write across two 32-bit words to VA RAMs!");
- end
- end
- end
- end
- // pragma translate_on
- end // for (j=0; j< N_PORTS; j++)
- endgenerate
- // ███╗ ███╗██╗ ██╗ ███████╗██╗███████╗ ██████╗ ███████╗
- // ████╗ ████║██║ ██║ ██╔════╝██║██╔════╝██╔═══██╗██╔════╝
- // ██╔████╔██║███████║ █████╗ ██║█████╗ ██║ ██║███████╗
- // ██║╚██╔╝██║██╔══██║ ██╔══╝ ██║██╔══╝ ██║ ██║╚════██║
- // ██║ ╚═╝ ██║██║ ██║ ██║ ██║██║ ╚██████╔╝███████║
- // ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═════╝ ╚══════╝
- //
- logic [ADDR_WIDTH_VIRT-1:0] AddrFifoDin_D;
- logic AddrFifoWen_S;
- logic AddrFifoRen_S;
- logic [ADDR_WIDTH_VIRT-1:0] AddrFifoDout_D;
- logic AddrFifoFull_S;
- logic AddrFifoEmpty_S;
- logic AddrFifoEmpty_SB;
- logic AddrFifoFull_SB;
- logic [MISS_META_WIDTH-1:0] MetaFifoDin_D;
- logic MetaFifoWen_S;
- logic MetaFifoRen_S;
- logic [MISS_META_WIDTH-1:0] MetaFifoDout_D;
- logic MetaFifoFull_S;
- logic MetaFifoEmpty_S;
- logic MetaFifoEmpty_SB;
- logic MetaFifoFull_SB;
- logic FifosDisabled_S;
- logic ConfRegWen_S;
- logic [1:0] ConfReg_DN;
- logic [1:0] ConfReg_DP;
- logic [AXI_DATA_WIDTH-1:0] wdata_reg_vec;
- assign FifosDisabled_S = ConfReg_DP[0];
- assign L1AllowMultiHit_SO = ConfReg_DP[1];
- assign AddrFifoEmpty_S = ~AddrFifoEmpty_SB;
- assign MetaFifoEmpty_S = ~MetaFifoEmpty_SB;
- assign AddrFifoFull_S = ~AddrFifoFull_SB;
- assign MetaFifoFull_S = ~MetaFifoFull_SB;
- assign MhFifoFull_SO = (AddrFifoWen_S & AddrFifoFull_S) | (MetaFifoWen_S & MetaFifoFull_S);
- generate
- for ( j=0; j<AXI_DATA_WIDTH/8; j++ )
- assign wdata_reg_vec[(j+1)*8-1:j*8] = wdata_reg[j];
- endgenerate
- // write address FIFO
- always_comb
- begin
- AddrFifoWen_S = 1'b0;
- AddrFifoDin_D = 'b0;
- if ( (Miss_SI == 1'b1) && (FifosDisabled_S == 1'b0) ) // register a new miss
- begin
- AddrFifoWen_S = 1'b1;
- AddrFifoDin_D = MissAddr_DI;
- end
- else if ( (wren_l1 == 1'b1) && (awaddr_reg[ADDR_MSB:0] == 'b0) && (FifosDisabled_S == 1'b0)) // write request from AXI interface
- begin
- AddrFifoWen_S = 1'b1;
- AddrFifoDin_D = wdata_reg_vec[ADDR_WIDTH_VIRT-1:0];
- end
- end
- // write meta FIFO
- always_comb
- begin
- MetaFifoWen_S = 1'b0;
- MetaFifoDin_D = 'b0;
- if ( (Miss_SI == 1'b1) && (FifosDisabled_S == 1'b0) ) // register a new miss
- begin
- MetaFifoWen_S = 1'b1;
- MetaFifoDin_D[MISS_META_WIDTH-1:0] = MissMeta_DI;
- end
- else if ( (wren_l1 == 1'b1) && (awaddr_reg[ADDR_MSB:0] == 4'h8) && (FifosDisabled_S == 1'b0) ) // write request from AXI interface
- begin
- MetaFifoWen_S = 1'b1;
- MetaFifoDin_D = wdata_reg_vec[MISS_META_WIDTH-1:0];
- end
- end
- // write configuration register
- always_comb
- begin
- ConfRegWen_S = 1'b0;
- ConfReg_DN = 1'b0;
- if ( (wren_l1 == 1'b1) && (awaddr_reg[ADDR_MSB:0] == 8'h10) ) // write request from AXI interface
- begin
- ConfRegWen_S = 1'b1;
- ConfReg_DN = wdata_reg_vec[$high(ConfReg_DN):0];
- end
- end
- // AXI read data
- always_comb
- begin
- s_axi_rdata = rdata_reg; // read L1 config
- AddrFifoRen_S = 1'b0;
- MetaFifoRen_S = 1'b0;
- if ( rvalid == 1'b1 )
- begin
- // read address FIFO
- if ( araddr_reg[ADDR_MSB:0] == 'b0 )
- begin
- s_axi_rdata = {AXI_DATA_WIDTH{1'b0}};
- s_axi_rdata[ADDR_WIDTH_VIRT-1:0] = AddrFifoDout_D;
- if ( AddrFifoEmpty_S == 1'b0 )
- AddrFifoRen_S = 1'b1;
- end
- // read meta FIFO
- else if ( araddr_reg[ADDR_MSB:0] == 4'h8 )
- begin
- s_axi_rdata = {AXI_DATA_WIDTH{1'b0}};
- s_axi_rdata[31] = MetaFifoEmpty_S;
- s_axi_rdata[MISS_META_WIDTH-1:0] = MetaFifoDout_D;
- if ( MetaFifoEmpty_S == 1'b0 )
- MetaFifoRen_S = 1'b1;
- end
- // read configuration register
- else if ( araddr_reg[ADDR_MSB:0] == 8'h10 )
- begin
- s_axi_rdata = {AXI_DATA_WIDTH{1'b0}};
- s_axi_rdata[$high(ConfReg_DP):0] = ConfReg_DP;
- end
- end // if ( rvalid == 1'b1 )
- end // always_comb begin
- // configuration register
- always_ff @(posedge Clk_CI or negedge Rst_RBI) begin
- if (Rst_RBI == 1'b0)
- begin
- ConfReg_DP <= 'b0;
- end
- else if (ConfRegWen_S == 1'b1)
- begin
- ConfReg_DP <= ConfReg_DN;
- end
- end
- generic_fifo
- #(
- )
- fifo_addr_i
- (
- .clk ( Clk_CI ),
- .rst_n ( Rst_RBI ),
- .data_i ( AddrFifoDin_D ),
- .valid_i ( AddrFifoWen_S & AddrFifoFull_SB ),
- .grant_o ( AddrFifoFull_SB ),
- .data_o ( AddrFifoDout_D ),
- .valid_o ( AddrFifoEmpty_SB ),
- .grant_i ( AddrFifoRen_S ),
- .test_mode_i ( 1'b0 )
- );
- generic_fifo
- #(
- )
- fifo_meta_i
- (
- .clk ( Clk_CI ),
- .rst_n ( Rst_RBI ),
- .data_i ( MetaFifoDin_D ),
- .valid_i ( MetaFifoWen_S & MetaFifoFull_SB ),
- .grant_o ( MetaFifoFull_SB ),
- .data_o ( MetaFifoDout_D ),
- .valid_o ( MetaFifoEmpty_SB ),
- .grant_i ( MetaFifoRen_S ),
- .test_mode_i ( 1'b0 )
- );
-# endmodule
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class axi_rab_top(Elaboratable):
- def __init__(self):
- self.Clk_CI = Signal() # input
- self.NonGatedClk_CI = Signal() # input
- self.Rst_RBI = Signal() # input
- self.s_axi4_awid = Signal() # input
- self.s_axi4_awaddr = Signal() # input
- self.s_axi4_awvalid = Signal(N_PORTS) # input
- self.s_axi4_awready = Signal(N_PORTS) # output
- self.s_axi4_awlen = Signal() # input
- self.s_axi4_awsize = Signal() # input
- self.s_axi4_awburst = Signal() # input
- self.s_axi4_awlock = Signal(N_PORTS) # input
- self.s_axi4_awprot = Signal() # input
- self.s_axi4_awcache = Signal() # input
- self.s_axi4_awregion = Signal() # input
- self.s_axi4_awqos = Signal() # input
- self.s_axi4_awuser = Signal() # input
- self.s_axi4_wdata = Signal() # input
- self.s_axi4_wvalid = Signal(N_PORTS) # input
- self.s_axi4_wready = Signal(N_PORTS) # output
- self.s_axi4_wstrb = Signal() # input
- self.s_axi4_wlast = Signal(N_PORTS) # input
- self.s_axi4_wuser = Signal() # input
- self.s_axi4_bid = Signal() # output
- self.s_axi4_bresp = Signal() # output
- self.s_axi4_bvalid = Signal(N_PORTS) # output
- self.s_axi4_buser = Signal() # output
- self.s_axi4_bready = Signal(N_PORTS) # input
- self.s_axi4_arid = Signal() # input
- self.s_axi4_araddr = Signal() # input
- self.s_axi4_arvalid = Signal(N_PORTS) # input
- self.s_axi4_arready = Signal(N_PORTS) # output
- self.s_axi4_arlen = Signal() # input
- self.s_axi4_arsize = Signal() # input
- self.s_axi4_arburst = Signal() # input
- self.s_axi4_arlock = Signal(N_PORTS) # input
- self.s_axi4_arprot = Signal() # input
- self.s_axi4_arcache = Signal() # input
- self.s_axi4_aruser = Signal() # input
- self.s_axi4_rid = Signal() # output
- self.s_axi4_rdata = Signal() # output
- self.s_axi4_rresp = Signal() # output
- self.s_axi4_rvalid = Signal(N_PORTS) # output
- self.s_axi4_rready = Signal(N_PORTS) # input
- self.s_axi4_rlast = Signal(N_PORTS) # output
- self.s_axi4_ruser = Signal() # output
- self.m0_axi4_awid = Signal() # output
- self.m0_axi4_awaddr = Signal() # output
- self.m0_axi4_awvalid = Signal(N_PORTS) # output
- self.m0_axi4_awready = Signal(N_PORTS) # input
- self.m0_axi4_awlen = Signal() # output
- self.m0_axi4_awsize = Signal() # output
- self.m0_axi4_awburst = Signal() # output
- self.m0_axi4_awlock = Signal(N_PORTS) # output
- self.m0_axi4_awprot = Signal() # output
- self.m0_axi4_awcache = Signal() # output
- self.m0_axi4_awregion = Signal() # output
- self.m0_axi4_awqos = Signal() # output
- self.m0_axi4_awuser = Signal() # output
- self.m0_axi4_wdata = Signal() # output
- self.m0_axi4_wvalid = Signal(N_PORTS) # output
- self.m0_axi4_wready = Signal(N_PORTS) # input
- self.m0_axi4_wstrb = Signal() # output
- self.m0_axi4_wlast = Signal(N_PORTS) # output
- self.m0_axi4_wuser = Signal() # output
- self.m0_axi4_bid = Signal() # input
- self.m0_axi4_bresp = Signal() # input
- self.m0_axi4_bvalid = Signal(N_PORTS) # input
- self.m0_axi4_buser = Signal() # input
- self.m0_axi4_bready = Signal(N_PORTS) # output
- self.m0_axi4_arid = Signal() # output
- self.m0_axi4_araddr = Signal() # output
- self.m0_axi4_arvalid = Signal(N_PORTS) # output
- self.m0_axi4_arready = Signal(N_PORTS) # input
- self.m0_axi4_arlen = Signal() # output
- self.m0_axi4_arsize = Signal() # output
- self.m0_axi4_arburst = Signal() # output
- self.m0_axi4_arlock = Signal(N_PORTS) # output
- self.m0_axi4_arprot = Signal() # output
- self.m0_axi4_arcache = Signal() # output
- self.m0_axi4_aruser = Signal() # output
- self.m0_axi4_rid = Signal() # input
- self.m0_axi4_rdata = Signal() # input
- self.m0_axi4_rresp = Signal() # input
- self.m0_axi4_rvalid = Signal(N_PORTS) # input
- self.m0_axi4_rready = Signal(N_PORTS) # output
- self.m0_axi4_rlast = Signal(N_PORTS) # input
- self.m0_axi4_ruser = Signal() # input
- self.m1_axi4_awid = Signal() # output
- self.m1_axi4_awaddr = Signal() # output
- self.m1_axi4_awvalid = Signal(N_PORTS) # output
- self.m1_axi4_awready = Signal(N_PORTS) # input
- self.m1_axi4_awlen = Signal() # output
- self.m1_axi4_awsize = Signal() # output
- self.m1_axi4_awburst = Signal() # output
- self.m1_axi4_awlock = Signal(N_PORTS) # output
- self.m1_axi4_awprot = Signal() # output
- self.m1_axi4_awcache = Signal() # output
- self.m1_axi4_awregion = Signal() # output
- self.m1_axi4_awqos = Signal() # output
- self.m1_axi4_awuser = Signal() # output
- self.m1_axi4_wdata = Signal() # output
- self.m1_axi4_wvalid = Signal(N_PORTS) # output
- self.m1_axi4_wready = Signal(N_PORTS) # input
- self.m1_axi4_wstrb = Signal() # output
- self.m1_axi4_wlast = Signal(N_PORTS) # output
- self.m1_axi4_wuser = Signal() # output
- self.m1_axi4_bid = Signal() # input
- self.m1_axi4_bresp = Signal() # input
- self.m1_axi4_bvalid = Signal(N_PORTS) # input
- self.m1_axi4_buser = Signal() # input
- self.m1_axi4_bready = Signal(N_PORTS) # output
- self.m1_axi4_arid = Signal() # output
- self.m1_axi4_araddr = Signal() # output
- self.m1_axi4_arvalid = Signal(N_PORTS) # output
- self.m1_axi4_arready = Signal(N_PORTS) # input
- self.m1_axi4_arlen = Signal() # output
- self.m1_axi4_arsize = Signal() # output
- self.m1_axi4_arburst = Signal() # output
- self.m1_axi4_arlock = Signal(N_PORTS) # output
- self.m1_axi4_arprot = Signal() # output
- self.m1_axi4_arcache = Signal() # output
- self.m1_axi4_aruser = Signal() # output
- self.m1_axi4_rid = Signal() # input
- self.m1_axi4_rdata = Signal() # input
- self.m1_axi4_rresp = Signal() # input
- self.m1_axi4_rvalid = Signal(N_PORTS) # input
- self.m1_axi4_rready = Signal(N_PORTS) # output
- self.m1_axi4_rlast = Signal(N_PORTS) # input
- self.m1_axi4_ruser = Signal() # input
- self.s_axi4lite_awaddr = Signal(AXI_LITE_ADDR_WIDTH) # input
- self.s_axi4lite_awvalid = Signal() # input
- self.s_axi4lite_awready = Signal() # output
- self.s_axi4lite_wdata = Signal(AXI_LITE_DATA_WIDTH) # input
- self.s_axi4lite_wvalid = Signal() # input
- self.s_axi4lite_wready = Signal() # output
- self.s_axi4lite_wstrb = Signal(1+ERROR p_expression_25) # input
- self.s_axi4lite_bresp = Signal(2) # output
- self.s_axi4lite_bvalid = Signal() # output
- self.s_axi4lite_bready = Signal() # input
- self.s_axi4lite_araddr = Signal(AXI_LITE_ADDR_WIDTH) # input
- self.s_axi4lite_arvalid = Signal() # input
- self.s_axi4lite_arready = Signal() # output
- self.s_axi4lite_rdata = Signal(AXI_LITE_DATA_WIDTH) # output
- self.s_axi4lite_rresp = Signal(2) # output
- self.s_axi4lite_rvalid = Signal() # output
- self.s_axi4lite_rready = Signal() # input
- self.int_miss = Signal(N_PORTS) # output
- self.int_multi = Signal(N_PORTS) # output
- self.int_prot = Signal(N_PORTS) # output
- self.int_mhf_full = Signal() # output
- def elaborate(self, platform=None):
- m = Module()
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# // --=========================================================================--
-# //
-# // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ████████╗ ██████╗ ██████╗
-# // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ╚══██╔══╝██╔═══██╗██╔══██╗
-# // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ ██║ ██║██████╔╝
-# // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██║ ██║██╔═══╝
-# // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ██║ ╚██████╔╝██║
-# // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═════╝ ╚═╝
-# //
-# // --=========================================================================--
-# /*
-# * axi_rab_top
-# *
-# * The remapping address block (RAB) performs address translation for AXI
-# * transactions arriving at the input port and forwards them to different
-# * downstream AXI ports.
-# *
-# * The five axi channels are each buffered on the input side using a FIFO,
-# * described in axi4_XX_buffer. The RAB lookup result is merged into the
-# * AXI transaction via the axi4_XX_sender instances, which manages upstream
-# * error signaling for failed lookups.
-# *
-# * Address translation is performed based on data stored in up to two
-# * translation lookaside buffers (TLBs), which are private per RAB port (each
-# * of which having two AXI master ports and one AXI slave port). These TLBs
-# * are managed in software through the AXI-Lite interface.
-# *
-# * If ACP is enabled, the `cache_coherent` flag in the TLBs is used to
-# * multiplex between the two ports. If ACP is disabled, only the first master
-# * port is used. In this case, the `cache_coherent` flag is used to set the
-# * AxCACHE signals of the AXI bus accordingly.
-# *
-# * Authors:
-# * Antonio Pullini <pullinia@iis.ee.ethz.ch>
-# * Conrad Burchert <bconrad@ethz.ch>
-# * Maheshwara Sharma <msharma@student.ethz.ch>
-# * Andreas Kurth <akurth@iis.ee.ethz.ch>
-# * Johannes Weinbuch <jweinbuch@student.ethz.ch>
-# * Pirmin Vogel <vogelpi@iis.ee.ethz.ch>
-# */
-# //`include "pulp_soc_defines.sv"
-# ////import CfMath::log2;
-# module axi_rab_top
-# // Parameters {{{
-# #(
-# parameter N_PORTS = 2,
-# parameter N_L2_SETS = 32,
-# parameter N_L2_SET_ENTRIES = 32,
-# parameter AXI_DATA_WIDTH = 64,
-# parameter AXI_S_ADDR_WIDTH = 32,
-# parameter AXI_M_ADDR_WIDTH = 40,
-# parameter AXI_LITE_DATA_WIDTH = 64,
-# parameter AXI_LITE_ADDR_WIDTH = 32,
-# parameter AXI_ID_WIDTH = 10,
-# parameter AXI_USER_WIDTH = 6,
-# parameter MH_FIFO_DEPTH = 16
-# )
-# // }}}
-# // Ports {{{
-# (
-# input logic Clk_CI, // This clock may be gated.
-# input logic NonGatedClk_CI,
-# input logic Rst_RBI,
-# // For every slave port there are two master ports. The master
-# // port to use can be set using the master_select flag of the protection
-# // bits of a slice
-# // AXI4 Slave {{{
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_awid,
-# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] s_axi4_awaddr,
-# input logic [N_PORTS-1:0] s_axi4_awvalid,
-# output logic [N_PORTS-1:0] s_axi4_awready,
-# input logic [N_PORTS-1:0] [7:0] s_axi4_awlen,
-# input logic [N_PORTS-1:0] [2:0] s_axi4_awsize,
-# input logic [N_PORTS-1:0] [1:0] s_axi4_awburst,
-# input logic [N_PORTS-1:0] s_axi4_awlock,
-# input logic [N_PORTS-1:0] [2:0] s_axi4_awprot,
-# input logic [N_PORTS-1:0] [3:0] s_axi4_awcache,
-# input logic [N_PORTS-1:0] [3:0] s_axi4_awregion,
-# input logic [N_PORTS-1:0] [3:0] s_axi4_awqos,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_awuser,
-# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] s_axi4_wdata,
-# input logic [N_PORTS-1:0] s_axi4_wvalid,
-# output logic [N_PORTS-1:0] s_axi4_wready,
-# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb,
-# input logic [N_PORTS-1:0] s_axi4_wlast,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_wuser,
-# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_bid,
-# output logic [N_PORTS-1:0] [1:0] s_axi4_bresp,
-# output logic [N_PORTS-1:0] s_axi4_bvalid,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_buser,
-# input logic [N_PORTS-1:0] s_axi4_bready,
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_arid,
-# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] s_axi4_araddr,
-# input logic [N_PORTS-1:0] s_axi4_arvalid,
-# output logic [N_PORTS-1:0] s_axi4_arready,
-# input logic [N_PORTS-1:0] [7:0] s_axi4_arlen,
-# input logic [N_PORTS-1:0] [2:0] s_axi4_arsize,
-# input logic [N_PORTS-1:0] [1:0] s_axi4_arburst,
-# input logic [N_PORTS-1:0] s_axi4_arlock,
-# input logic [N_PORTS-1:0] [2:0] s_axi4_arprot,
-# input logic [N_PORTS-1:0] [3:0] s_axi4_arcache,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_aruser,
-# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_rid,
-# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] s_axi4_rdata,
-# output logic [N_PORTS-1:0] [1:0] s_axi4_rresp,
-# output logic [N_PORTS-1:0] s_axi4_rvalid,
-# input logic [N_PORTS-1:0] s_axi4_rready,
-# output logic [N_PORTS-1:0] s_axi4_rlast,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_ruser,
-# // }}}
-# // AXI4 Master 0 {{{
-# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_awid,
-# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m0_axi4_awaddr,
-# output logic [N_PORTS-1:0] m0_axi4_awvalid,
-# input logic [N_PORTS-1:0] m0_axi4_awready,
-# output logic [N_PORTS-1:0] [7:0] m0_axi4_awlen,
-# output logic [N_PORTS-1:0] [2:0] m0_axi4_awsize,
-# output logic [N_PORTS-1:0] [1:0] m0_axi4_awburst,
-# output logic [N_PORTS-1:0] m0_axi4_awlock,
-# output logic [N_PORTS-1:0] [2:0] m0_axi4_awprot,
-# output logic [N_PORTS-1:0] [3:0] m0_axi4_awcache,
-# output logic [N_PORTS-1:0] [3:0] m0_axi4_awregion,
-# output logic [N_PORTS-1:0] [3:0] m0_axi4_awqos,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_awuser,
-# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m0_axi4_wdata,
-# output logic [N_PORTS-1:0] m0_axi4_wvalid,
-# input logic [N_PORTS-1:0] m0_axi4_wready,
-# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] m0_axi4_wstrb,
-# output logic [N_PORTS-1:0] m0_axi4_wlast,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_wuser,
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_bid,
-# input logic [N_PORTS-1:0] [1:0] m0_axi4_bresp,
-# input logic [N_PORTS-1:0] m0_axi4_bvalid,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_buser,
-# output logic [N_PORTS-1:0] m0_axi4_bready,
-# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_arid,
-# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m0_axi4_araddr,
-# output logic [N_PORTS-1:0] m0_axi4_arvalid,
-# input logic [N_PORTS-1:0] m0_axi4_arready,
-# output logic [N_PORTS-1:0] [7:0] m0_axi4_arlen,
-# output logic [N_PORTS-1:0] [2:0] m0_axi4_arsize,
-# output logic [N_PORTS-1:0] [1:0] m0_axi4_arburst,
-# output logic [N_PORTS-1:0] m0_axi4_arlock,
-# output logic [N_PORTS-1:0] [2:0] m0_axi4_arprot,
-# output logic [N_PORTS-1:0] [3:0] m0_axi4_arcache,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_aruser,
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_rid,
-# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m0_axi4_rdata,
-# input logic [N_PORTS-1:0] [1:0] m0_axi4_rresp,
-# input logic [N_PORTS-1:0] m0_axi4_rvalid,
-# output logic [N_PORTS-1:0] m0_axi4_rready,
-# input logic [N_PORTS-1:0] m0_axi4_rlast,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_ruser,
-# // }}}
-# // AXI4 Master 1 {{{
-# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_awid,
-# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m1_axi4_awaddr,
-# output logic [N_PORTS-1:0] m1_axi4_awvalid,
-# input logic [N_PORTS-1:0] m1_axi4_awready,
-# output logic [N_PORTS-1:0] [7:0] m1_axi4_awlen,
-# output logic [N_PORTS-1:0] [2:0] m1_axi4_awsize,
-# output logic [N_PORTS-1:0] [1:0] m1_axi4_awburst,
-# output logic [N_PORTS-1:0] m1_axi4_awlock,
-# output logic [N_PORTS-1:0] [2:0] m1_axi4_awprot,
-# output logic [N_PORTS-1:0] [3:0] m1_axi4_awcache,
-# output logic [N_PORTS-1:0] [3:0] m1_axi4_awregion,
-# output logic [N_PORTS-1:0] [3:0] m1_axi4_awqos,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_awuser,
-# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m1_axi4_wdata,
-# output logic [N_PORTS-1:0] m1_axi4_wvalid,
-# input logic [N_PORTS-1:0] m1_axi4_wready,
-# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] m1_axi4_wstrb,
-# output logic [N_PORTS-1:0] m1_axi4_wlast,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_wuser,
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_bid,
-# input logic [N_PORTS-1:0] [1:0] m1_axi4_bresp,
-# input logic [N_PORTS-1:0] m1_axi4_bvalid,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_buser,
-# output logic [N_PORTS-1:0] m1_axi4_bready,
-# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_arid,
-# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m1_axi4_araddr,
-# output logic [N_PORTS-1:0] m1_axi4_arvalid,
-# input logic [N_PORTS-1:0] m1_axi4_arready,
-# output logic [N_PORTS-1:0] [7:0] m1_axi4_arlen,
-# output logic [N_PORTS-1:0] [2:0] m1_axi4_arsize,
-# output logic [N_PORTS-1:0] [1:0] m1_axi4_arburst,
-# output logic [N_PORTS-1:0] m1_axi4_arlock,
-# output logic [N_PORTS-1:0] [2:0] m1_axi4_arprot,
-# output logic [N_PORTS-1:0] [3:0] m1_axi4_arcache,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_aruser,
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_rid,
-# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m1_axi4_rdata,
-# input logic [N_PORTS-1:0] [1:0] m1_axi4_rresp,
-# input logic [N_PORTS-1:0] m1_axi4_rvalid,
-# output logic [N_PORTS-1:0] m1_axi4_rready,
-# input logic [N_PORTS-1:0] m1_axi4_rlast,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_ruser,
-# // }}}
-# // AXI 4 Lite Slave (Configuration Interface) {{{
-# // AXI4-Lite port to setup the rab slices
-# // use this to program the configuration registers
-# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi4lite_awaddr,
-# input logic s_axi4lite_awvalid,
-# output logic s_axi4lite_awready,
-# input logic [AXI_LITE_DATA_WIDTH-1:0] s_axi4lite_wdata,
-# input logic s_axi4lite_wvalid,
-# output logic s_axi4lite_wready,
-# input logic [AXI_LITE_DATA_WIDTH/8-1:0] s_axi4lite_wstrb,
-# output logic [1:0] s_axi4lite_bresp,
-# output logic s_axi4lite_bvalid,
-# input logic s_axi4lite_bready,
-# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi4lite_araddr,
-# input logic s_axi4lite_arvalid,
-# output logic s_axi4lite_arready,
-# output logic [AXI_LITE_DATA_WIDTH-1:0] s_axi4lite_rdata,
-# output logic [1:0] s_axi4lite_rresp,
-# output logic s_axi4lite_rvalid,
-# input logic s_axi4lite_rready,
-# // }}}
-# // BRAMs {{{
-# //`ifdef RAB_AX_LOG_EN
-# // BramPort.Slave ArBram_PS,
-# // BramPort.Slave AwBram_PS,
-# //`endif
-# // }}}
-# // Logger Control {{{
-# //`ifdef RAB_AX_LOG_EN
-# // input logic LogEn_SI,
-# // input logic ArLogClr_SI,
-# // input logic AwLogClr_SI,
-# // output logic ArLogRdy_SO,
-# // output logic AwLogRdy_SO,
-# //`endif
-# // }}}
-# // Interrupt Outputs {{{
-# // Interrupt lines to handle misses, collisions of slices/multiple hits,
-# // protection faults and overflow of the miss handling fifo
-# //`ifdef RAB_AX_LOG_EN
-# // output logic int_ar_log_full,
-# // output logic int_aw_log_full,
-# //`endif
-# output logic [N_PORTS-1:0] int_miss,
-# output logic [N_PORTS-1:0] int_multi,
-# output logic [N_PORTS-1:0] int_prot,
-# output logic int_mhf_full
-# // }}}
-# );
- // }}}
- // Signals {{{
- // ███████╗██╗ ██████╗ ███╗ ██╗ █████╗ ██╗ ███████╗
- // ██╔════╝██║██╔════╝ ████╗ ██║██╔══██╗██║ ██╔════╝
- // ███████╗██║██║ ███╗██╔██╗ ██║███████║██║ ███████╗
- // ╚════██║██║██║ ██║██║╚██╗██║██╔══██║██║ ╚════██║
- // ███████║██║╚██████╔╝██║ ╚████║██║ ██║███████╗███████║
- // ╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚══════╝
- //
- // Internal AXI4 lines, these connect buffers on the slave side to the rab core and
- // multiplexers which switch between the two master outputs
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_awid;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_awaddr;
- logic [N_PORTS-1:0] int_awvalid;
- logic [N_PORTS-1:0] int_awready;
- logic [N_PORTS-1:0] [7:0] int_awlen;
- logic [N_PORTS-1:0] [2:0] int_awsize;
- logic [N_PORTS-1:0] [1:0] int_awburst;
- logic [N_PORTS-1:0] int_awlock;
- logic [N_PORTS-1:0] [2:0] int_awprot;
- logic [N_PORTS-1:0] [3:0] int_awcache;
- logic [N_PORTS-1:0] [3:0] int_awregion;
- logic [N_PORTS-1:0] [3:0] int_awqos;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_awuser;
- logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_wdata;
- logic [N_PORTS-1:0] int_wvalid;
- logic [N_PORTS-1:0] int_wready;
- logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] int_wstrb;
- logic [N_PORTS-1:0] int_wlast;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_wuser;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_bid;
- logic [N_PORTS-1:0] [1:0] int_bresp;
- logic [N_PORTS-1:0] int_bvalid;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_buser;
- logic [N_PORTS-1:0] int_bready;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_arid;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_araddr;
- logic [N_PORTS-1:0] int_arvalid;
- logic [N_PORTS-1:0] int_arready;
- logic [N_PORTS-1:0] [7:0] int_arlen;
- logic [N_PORTS-1:0] [2:0] int_arsize;
- logic [N_PORTS-1:0] [1:0] int_arburst;
- logic [N_PORTS-1:0] int_arlock;
- logic [N_PORTS-1:0] [2:0] int_arprot;
- logic [N_PORTS-1:0] [3:0] int_arcache;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_aruser;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_rid;
- logic [N_PORTS-1:0] [1:0] int_rresp;
- logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_rdata;
- logic [N_PORTS-1:0] int_rlast;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_ruser;
- logic [N_PORTS-1:0] int_rvalid;
- logic [N_PORTS-1:0] int_rready;
- // rab_core outputs
- logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] int_wtrans_addr;
- logic [N_PORTS-1:0] int_wtrans_accept;
- logic [N_PORTS-1:0] int_wtrans_drop;
- logic [N_PORTS-1:0] int_wtrans_miss;
- logic [N_PORTS-1:0] int_wtrans_sent;
- logic [N_PORTS-1:0] int_wtrans_cache_coherent;
- logic [N_PORTS-1:0] int_wmaster_select;
- logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] int_rtrans_addr;
- logic [N_PORTS-1:0] int_rtrans_accept;
- logic [N_PORTS-1:0] int_rtrans_drop;
- logic [N_PORTS-1:0] int_rtrans_miss;
- logic [N_PORTS-1:0] int_rtrans_sent;
- logic [N_PORTS-1:0] int_rtrans_cache_coherent;
- logic [N_PORTS-1:0] int_rmaster_select;
- logic [N_PORTS-1:0] w_master_select;
- // Internal master0 AXI4 lines. These connect the first master port to the
- // multiplexers
- // For channels read address, write address and write data the other lines
- // are ignored if valid is not set, therefore we only need to multiplex those
- logic [N_PORTS-1:0] int_m0_awvalid;
- logic [N_PORTS-1:0] int_m0_awready;
- logic [N_PORTS-1:0] int_m0_wvalid;
- logic [N_PORTS-1:0] int_m0_wready;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m0_bid;
- logic [N_PORTS-1:0] [1:0] int_m0_bresp;
- logic [N_PORTS-1:0] int_m0_bvalid;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m0_buser;
- logic [N_PORTS-1:0] int_m0_bready;
- logic [N_PORTS-1:0] int_m0_arvalid;
- logic [N_PORTS-1:0] int_m0_arready;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m0_rid;
- logic [N_PORTS-1:0] [1:0] int_m0_rresp;
- logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_m0_rdata;
- logic [N_PORTS-1:0] int_m0_rlast;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m0_ruser;
- logic [N_PORTS-1:0] int_m0_rready;
- logic [N_PORTS-1:0] int_m0_rvalid;
- logic [N_PORTS-1:0] l1_m0_ar_accept;
- logic [N_PORTS-1:0] l1_m0_ar_drop;
- logic [N_PORTS-1:0] l1_m0_ar_save;
- logic [N_PORTS-1:0] l1_m0_ar_done;
- logic [N_PORTS-1:0] l2_m0_ar_accept;
- logic [N_PORTS-1:0] l2_m0_ar_drop;
- logic [N_PORTS-1:0] l2_m0_ar_done;
- logic [N_PORTS-1:0] l2_m0_ar_sending;
- logic [N_PORTS-1:0] l1_m0_aw_accept;
- logic [N_PORTS-1:0] l1_m0_aw_drop;
- logic [N_PORTS-1:0] l1_m0_aw_save;
- logic [N_PORTS-1:0] l1_m0_aw_done;
- logic [N_PORTS-1:0] l2_m0_aw_accept;
- logic [N_PORTS-1:0] l2_m0_aw_drop;
- logic [N_PORTS-1:0] l2_m0_aw_done;
- logic [N_PORTS-1:0] l2_m0_aw_sending;
- // Internal master1 AXI4 lines. These connect the second master port to the
- // multiplexers
- // For channels read address, write address and write data the other lines
- // are ignored if valid is not set, therefore we only need to multiplex those
- logic [N_PORTS-1:0] int_m1_awvalid;
- logic [N_PORTS-1:0] int_m1_awready;
- logic [N_PORTS-1:0] int_m1_wvalid;
- logic [N_PORTS-1:0] int_m1_wready;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m1_bid;
- logic [N_PORTS-1:0] [1:0] int_m1_bresp;
- logic [N_PORTS-1:0] int_m1_bvalid;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m1_buser;
- logic [N_PORTS-1:0] int_m1_bready;
- logic [N_PORTS-1:0] int_m1_arvalid;
- logic [N_PORTS-1:0] int_m1_arready;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m1_rid;
- logic [N_PORTS-1:0] [1:0] int_m1_rresp;
- logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_m1_rdata;
- logic [N_PORTS-1:0] int_m1_rlast;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m1_ruser;
- logic [N_PORTS-1:0] int_m1_rvalid;
- logic [N_PORTS-1:0] int_m1_rready;
- logic [N_PORTS-1:0] l1_m1_ar_accept;
- logic [N_PORTS-1:0] l1_m1_ar_drop;
- logic [N_PORTS-1:0] l1_m1_ar_save;
- logic [N_PORTS-1:0] l1_m1_ar_done;
- logic [N_PORTS-1:0] l2_m1_ar_accept;
- logic [N_PORTS-1:0] l2_m1_ar_drop;
- logic [N_PORTS-1:0] l2_m1_ar_done;
- logic [N_PORTS-1:0] l1_m1_aw_accept;
- logic [N_PORTS-1:0] l1_m1_aw_drop;
- logic [N_PORTS-1:0] l1_m1_aw_save;
- logic [N_PORTS-1:0] l1_m1_aw_done;
- logic [N_PORTS-1:0] l2_m1_aw_accept;
- logic [N_PORTS-1:0] l2_m1_aw_drop;
- logic [N_PORTS-1:0] l2_m1_aw_done;
- // L1 outputs
- logic [N_PORTS-1:0] rab_miss; // L1 RAB miss
- logic [N_PORTS-1:0] rab_prot;
- logic [N_PORTS-1:0] rab_multi;
- logic [N_PORTS-1:0] rab_prefetch;
- //
- // Signals used to support L2 TLB
- //
- // L2 RAM configuration signals
- logic [N_PORTS-1:0] [AXI_LITE_DATA_WIDTH-1:0] L2CfgWData_D;
- logic [N_PORTS-1:0] [AXI_LITE_ADDR_WIDTH-1:0] L2CfgWAddr_D;
- logic [N_PORTS-1:0] L2CfgWE_S;
- // L1 output and drop Buffer
- logic [N_PORTS-1:0] L1OutRwType_D, L1DropRwType_DP;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L1OutUser_D, L1DropUser_DP;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L1OutId_D, L1DropId_DP;
- logic [N_PORTS-1:0] [7:0] L1OutLen_D, L1DropLen_DP;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L1OutAddr_D, L1DropAddr_DP;
- logic [N_PORTS-1:0] L1OutProt_D, L1DropProt_DP;
- logic [N_PORTS-1:0] L1OutMulti_D, L1DropMulti_DP;
- logic [N_PORTS-1:0] L1DropEn_S;
- logic [N_PORTS-1:0] L1DropPrefetch_S;
- logic [N_PORTS-1:0] L1DropValid_SN, L1DropValid_SP;
- // L2 input Buffer
- logic [N_PORTS-1:0] L2InRwType_DP;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L2InUser_DP;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L2InId_DP;
- logic [N_PORTS-1:0] [7:0] L2InLen_DP;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L2InAddr_DP;
- logic [N_PORTS-1:0] L2InEn_S;
- // L2 output Buffer
- logic [N_PORTS-1:0] L2OutRwType_DP;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L2OutUser_DP;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L2OutId_DP;
- logic [N_PORTS-1:0] [7:0] L2OutLen_DP;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L2OutInAddr_DP;
- logic [N_PORTS-1:0] L2OutHit_SN, L2OutHit_SP;
- logic [N_PORTS-1:0] L2OutMiss_SN, L2OutMiss_SP;
- logic [N_PORTS-1:0] L2OutProt_SN, L2OutProt_SP;
- logic [N_PORTS-1:0] L2OutMulti_SN, L2OutMulti_SP;
- logic [N_PORTS-1:0] L2OutCC_SN, L2OutCC_SP;
- logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] L2OutAddr_DN, L2OutAddr_DP;
- logic [N_PORTS-1:0] L2OutValid_SN, L2OutValid_SP;
- logic [N_PORTS-1:0] L2OutPrefetch_S;
- logic [N_PORTS-1:0] L2OutReady_S;
- logic [N_PORTS-1:0] L2OutEn_S;
- // L2 outputs
- logic [N_PORTS-1:0] L2Busy_S;
- logic [N_PORTS-1:0] L2OutValid_S;
- logic [N_PORTS-1:0] L2Miss_S;
- // Signals for interfacing the AXI modules
- logic [N_PORTS-1:0] l1_ar_accept;
- logic [N_PORTS-1:0] l1_aw_accept;
- logic [N_PORTS-1:0] l1_w_accept;
- logic [N_PORTS-1:0] l1_xw_accept;
- logic [N_PORTS-1:0] l1_ar_drop;
- logic [N_PORTS-1:0] l1_aw_drop;
- logic [N_PORTS-1:0] l1_w_drop;
- logic [N_PORTS-1:0] l1_xw_drop;
- logic [N_PORTS-1:0] l1_ar_save;
- logic [N_PORTS-1:0] l1_aw_save;
- logic [N_PORTS-1:0] l1_w_save;
- logic [N_PORTS-1:0] l1_xw_save;
- logic [N_PORTS-1:0] l1_ar_done;
- logic [N_PORTS-1:0] l1_r_done;
- logic [N_PORTS-1:0] l1_r_drop;
- logic [N_PORTS-1:0] lx_r_drop;
- logic [N_PORTS-1:0] lx_r_done;
- logic [N_PORTS-1:0] l1_aw_done;
- logic [N_PORTS-1:0] l1_w_done;
- logic [N_PORTS-1:0] l1_xw_done;
- logic [N_PORTS-1:0] l1_aw_done_SP;
- logic [N_PORTS-1:0] l1_w_done_SP;
- logic [N_PORTS-1:0] l2_ar_accept;
- logic [N_PORTS-1:0] l2_aw_accept;
- logic [N_PORTS-1:0] l2_w_accept;
- logic [N_PORTS-1:0] l2_xw_accept;
- logic [N_PORTS-1:0] l2_ar_drop;
- logic [N_PORTS-1:0] l2_r_drop;
- logic [N_PORTS-1:0] l2_xr_drop;
- logic [N_PORTS-1:0] l2_aw_drop;
- logic [N_PORTS-1:0] l2_w_drop;
- logic [N_PORTS-1:0] l2_xw_drop;
- logic [N_PORTS-1:0] l2_aw_done;
- logic [N_PORTS-1:0] l2_w_done;
- logic [N_PORTS-1:0] l2_xw_done;
- logic [N_PORTS-1:0] l2_aw_done_SP;
- logic [N_PORTS-1:0] l2_w_done_SP;
- logic [N_PORTS-1:0] l2_ar_done;
- logic [N_PORTS-1:0] l2_r_done;
- logic [N_PORTS-1:0] l2_xr_done;
- logic [N_PORTS-1:0] l2_ar_done_SP;
- logic [N_PORTS-1:0] l2_r_done_SP;
- logic [N_PORTS-1:0] l1_mx_aw_done;
- logic [N_PORTS-1:0] l1_mx_ar_done;
- logic [N_PORTS-1:0] l1_m0_aw_done_SP;
- logic [N_PORTS-1:0] l1_m0_ar_done_SP;
- logic [N_PORTS-1:0] l1_m1_aw_done_SP;
- logic [N_PORTS-1:0] l1_m1_ar_done_SP;
- logic [N_PORTS-1:0] l2_mx_aw_done;
- logic [N_PORTS-1:0] l2_mx_ar_done;
- logic [N_PORTS-1:0] l2_m0_aw_done_SP;
- logic [N_PORTS-1:0] l2_m0_ar_done_SP;
- logic [N_PORTS-1:0] l2_m1_aw_done_SP;
- logic [N_PORTS-1:0] l2_m1_ar_done_SP;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] l1_id_drop, lx_id_drop, b_id_drop;
- logic [N_PORTS-1:0] [7:0] l1_len_drop, lx_len_drop;
- logic [N_PORTS-1:0] l1_prefetch_drop, lx_prefetch_drop, b_prefetch_drop;
- logic [N_PORTS-1:0] l1_hit_drop, lx_hit_drop, b_hit_drop;
- logic [N_PORTS-1:0] b_drop;
- logic [N_PORTS-1:0] b_done;
- logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] l2_aw_addr;
- logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] l2_ar_addr;
- logic [N_PORTS-1:0] l2_cache_coherent;
- logic [N_PORTS-1:0] l2_master_select;
- logic [N_PORTS-1:0] aw_in_stall;
- logic [N_PORTS-1:0] aw_out_stall;
- genvar i;
- typedef enum logic {IDLE, BUSY} r_resp_mux_ctrl_state_t;
- r_resp_mux_ctrl_state_t [N_PORTS-1:0] RRespMuxCtrl_SN, RRespMuxCtrl_SP;
- logic [N_PORTS-1:0] RRespSel_SN, RRespSel_SP;
- logic [N_PORTS-1:0] RRespBurst_S;
- logic [N_PORTS-1:0] RRespSelIm_S;
- // }}}
- // Local parameters {{{
- // Enable L2 for select ports
- localparam integer ENABLE_L2TLB[N_PORTS-1:0] = `EN_L2TLB_ARRAY;
- // L2TLB parameters
- localparam integer HUM_BUFFER_DEPTH = (N_L2_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS)+13;
- // }}}
- // Derive `master_select` from cache coherency flag. {{{
- `ifdef EN_ACP
- assign int_wmaster_select = int_wtrans_cache_coherent;
- assign int_rmaster_select = int_rtrans_cache_coherent;
- assign l2_master_select = l2_cache_coherent;
- `else
- assign int_wmaster_select = '0;
- assign int_rmaster_select = '0;
- assign l2_master_select = '0;
- `endif
- // }}}
- // Buf and Send {{{
- // ██████╗ ██╗ ██╗███████╗ ██╗ ███████╗███████╗███╗ ██╗██████╗
- // ██╔══██╗██║ ██║██╔════╝ ██║ ██╔════╝██╔════╝████╗ ██║██╔══██╗
- // ██████╔╝██║ ██║█████╗ ████████╗ ███████╗█████╗ ██╔██╗ ██║██║ ██║
- // ██╔══██╗██║ ██║██╔══╝ ██╔═██╔═╝ ╚════██║██╔══╝ ██║╚██╗██║██║ ██║
- // ██████╔╝╚██████╔╝██║ ██████║ ███████║███████╗██║ ╚████║██████╔╝
- // ╚═════╝ ╚═════╝ ╚═╝ ╚═════╝ ╚══════╝╚══════╝╚═╝ ╚═══╝╚═════╝
- //
- logic[N_PORTS-1:0] m0_write_is_burst, m0_read_is_burst;
- logic[N_PORTS-1:0] m1_write_is_burst, m1_read_is_burst;
- generate for (i = 0; i < N_PORTS; i++) begin : BUF_AND_SEND
- // Write Address channel (aw) {{{
- /*
- * write address channel (aw)
- *
- * ██╗ ██╗██████╗ ██╗████████╗███████╗ █████╗ ██████╗ ██████╗ ██████╗
- * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔══██╗██╔══██╗██╔══██╗
- * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ███████║██║ ██║██║ ██║██████╔╝
- * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██╔══██║██║ ██║██║ ██║██╔══██╗
- * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██║ ██║██████╔╝██████╔╝██║ ██║
- * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝
- *
- */
- axi4_aw_buffer
- #(
- )
- u_aw_buffer
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_awid ( s_axi4_awid[i] ),
- .s_axi4_awaddr ( s_axi4_awaddr[i] ),
- .s_axi4_awvalid ( s_axi4_awvalid[i] ),
- .s_axi4_awready ( s_axi4_awready[i] ),
- .s_axi4_awlen ( s_axi4_awlen[i] ),
- .s_axi4_awsize ( s_axi4_awsize[i] ),
- .s_axi4_awburst ( s_axi4_awburst[i] ),
- .s_axi4_awlock ( s_axi4_awlock[i] ),
- .s_axi4_awprot ( s_axi4_awprot[i] ),
- .s_axi4_awcache ( s_axi4_awcache[i] ),
- .s_axi4_awregion ( s_axi4_awregion[i] ),
- .s_axi4_awqos ( s_axi4_awqos[i] ),
- .s_axi4_awuser ( s_axi4_awuser[i] ),
- .m_axi4_awid ( int_awid[i] ),
- .m_axi4_awaddr ( int_awaddr[i] ),
- .m_axi4_awvalid ( int_awvalid[i] ),
- .m_axi4_awready ( int_awready[i] ),
- .m_axi4_awlen ( int_awlen[i] ),
- .m_axi4_awsize ( int_awsize[i] ),
- .m_axi4_awburst ( int_awburst[i] ),
- .m_axi4_awlock ( int_awlock[i] ),
- .m_axi4_awprot ( int_awprot[i] ),
- .m_axi4_awcache ( int_awcache[i] ),
- .m_axi4_awregion ( int_awregion[i] ),
- .m_axi4_awqos ( int_awqos[i] ),
- .m_axi4_awuser ( int_awuser[i] )
- );
- axi4_aw_sender
- #(
- )
- u_aw_sender_m0
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .l1_done_o ( l1_m0_aw_done[i] ),
- .l1_accept_i ( l1_m0_aw_accept[i] ),
- .l1_drop_i ( l1_m0_aw_drop[i] ),
- .l1_save_i ( l1_m0_aw_save[i] ),
- .l2_done_o ( l2_m0_aw_done[i] ),
- .l2_accept_i ( l2_m0_aw_accept[i] ),
- .l2_drop_i ( l2_m0_aw_drop[i] ),
- .l2_sending_o ( l2_m0_aw_sending[i] ),
- .l1_awaddr_i ( int_wtrans_addr[i] ),
- .l2_awaddr_i ( l2_aw_addr[i] ),
- .s_axi4_awid ( int_awid[i] ),
- .s_axi4_awvalid ( int_m0_awvalid[i] ),
- .s_axi4_awready ( int_m0_awready[i] ),
- .s_axi4_awlen ( int_awlen[i] ),
- .s_axi4_awsize ( int_awsize[i] ),
- .s_axi4_awburst ( int_awburst[i] ),
- .s_axi4_awlock ( int_awlock[i] ),
- .s_axi4_awprot ( int_awprot[i] ),
- .s_axi4_awcache ( int_awcache[i] ),
- .s_axi4_awregion ( int_awregion[i] ),
- .s_axi4_awqos ( int_awqos[i] ),
- .s_axi4_awuser ( int_awuser[i] ),
- .m_axi4_awid ( m0_axi4_awid[i] ),
- .m_axi4_awaddr ( m0_axi4_awaddr[i] ),
- .m_axi4_awvalid ( m0_axi4_awvalid[i] ),
- .m_axi4_awready ( m0_axi4_awready[i] ),
- .m_axi4_awlen ( m0_axi4_awlen[i] ),
- .m_axi4_awsize ( m0_axi4_awsize[i] ),
- .m_axi4_awburst ( m0_axi4_awburst[i] ),
- .m_axi4_awlock ( m0_axi4_awlock[i] ),
- .m_axi4_awprot ( m0_axi4_awprot[i] ),
- .m_axi4_awcache ( ),
- .m_axi4_awregion ( m0_axi4_awregion[i] ),
- .m_axi4_awqos ( m0_axi4_awqos[i] ),
- .m_axi4_awuser ( m0_axi4_awuser[i] )
- );
- // The AXCACHE signals are set according to burstiness and cache coherence or statically
- // when not connected to ACP on Zynq (implemented below).
- assign m0_write_is_burst[i] = (m0_axi4_awlen[i] != {8{1'b0}}) && (m0_axi4_awburst[i] != 2'b00);
- `ifndef EN_ACP
- always_comb begin
- if ( (l2_m0_aw_sending[i] & l2_cache_coherent[i]) | int_wtrans_cache_coherent[i]) begin
- if (m0_write_is_burst[i]) begin
- m0_axi4_awcache[i] = 4'b0111;
- end else begin
- m0_axi4_awcache[i] = 4'b1111;
- end
- end else begin
- m0_axi4_awcache[i] = 4'b0011;
- end
- end
- `else
- assign m0_axi4_awcache[i] = 4'b0011;
- `endif
- axi4_aw_sender
- #(
- )
- u_aw_sender_m1
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .l1_accept_i ( l1_m1_aw_accept[i] ),
- .l1_drop_i ( l1_m1_aw_drop[i] ),
- .l1_save_i ( l1_m1_aw_save[i] ),
- .l1_done_o ( l1_m1_aw_done[i] ),
- .l2_accept_i ( l2_m1_aw_accept[i] ),
- .l2_drop_i ( l2_m1_aw_drop[i] ),
- .l2_done_o ( l2_m1_aw_done[i] ),
- .l2_sending_o ( ), // just helps to set axcache
- .l1_awaddr_i ( int_wtrans_addr[i] ),
- .l2_awaddr_i ( l2_aw_addr[i] ),
- .s_axi4_awid ( int_awid[i] ),
- .s_axi4_awvalid ( int_m1_awvalid[i] ),
- .s_axi4_awready ( int_m1_awready[i] ),
- .s_axi4_awlen ( int_awlen[i] ),
- .s_axi4_awsize ( int_awsize[i] ),
- .s_axi4_awburst ( int_awburst[i] ),
- .s_axi4_awlock ( int_awlock[i] ),
- .s_axi4_awprot ( int_awprot[i] ),
- .s_axi4_awcache ( int_awcache[i] ),
- .s_axi4_awregion ( int_awregion[i] ),
- .s_axi4_awqos ( int_awqos[i] ),
- .s_axi4_awuser ( int_awuser[i] ),
- .m_axi4_awid ( m1_axi4_awid[i] ),
- .m_axi4_awaddr ( m1_axi4_awaddr[i] ),
- .m_axi4_awvalid ( m1_axi4_awvalid[i] ),
- .m_axi4_awready ( m1_axi4_awready[i] ),
- .m_axi4_awlen ( m1_axi4_awlen[i] ),
- .m_axi4_awsize ( m1_axi4_awsize[i] ),
- .m_axi4_awburst ( m1_axi4_awburst[i] ),
- .m_axi4_awlock ( m1_axi4_awlock[i] ),
- .m_axi4_awprot ( m1_axi4_awprot[i] ),
- .m_axi4_awcache ( ),
- .m_axi4_awregion ( m1_axi4_awregion[i] ),
- .m_axi4_awqos ( m1_axi4_awqos[i] ),
- .m_axi4_awuser ( m1_axi4_awuser[i] )
- );
- // The AXCACHE signals are set according to burstiness and cache coherence or statically
- // when not connected to ACP on Zynq (implemented below).
- assign m1_write_is_burst[i] = (m1_axi4_awlen[i] != {8{1'b0}}) && (m1_axi4_awburst[i] != 2'b00);
- `ifdef EN_ACP
- always_comb begin
- if (m1_write_is_burst[i]) begin
- m1_axi4_awcache[i] = 4'b1011;
- end else begin
- m1_axi4_awcache[i] = 4'b1111;
- end
- end
- `else
- assign m1_axi4_awcache[i] = 4'b0011;
- `endif
- // }}}
- // Write Data channel (w) {{{
- /*
- * write data channel (w)
- *
- * ██╗ ██╗██████╗ ██╗████████╗███████╗ ██████╗ █████╗ ████████╗ █████╗
- * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔══██╗╚══██╔══╝██╔══██╗
- * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ██║ ██║███████║ ██║ ███████║
- * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██║ ██║██╔══██║ ██║ ██╔══██║
- * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██████╔╝██║ ██║ ██║ ██║ ██║
- * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝
- *
- */
- axi4_w_buffer
- #(
- )
- u_w_buffer
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- // L1 interface
- .l1_done_o ( l1_w_done[i] ),
- .l1_accept_i ( l1_w_accept[i] ),
- .l1_save_i ( l1_w_save[i] ),
- .l1_drop_i ( l1_w_drop[i] ),
- .l1_master_i ( int_wmaster_select[i] ),
- .l1_id_i ( l1_id_drop[i] ),
- .l1_len_i ( l1_len_drop[i] ),
- .l1_prefetch_i ( l1_prefetch_drop[i] ),
- .l1_hit_i ( l1_hit_drop[i] ),
- // L2 interface
- .l2_done_o ( l2_w_done[i] ),
- .l2_accept_i ( l2_w_accept[i] ),
- .l2_drop_i ( l2_w_drop[i] ),
- .l2_master_i ( l2_master_select[i] ),
- .l2_id_i ( lx_id_drop[i] ),
- .l2_len_i ( lx_len_drop[i] ),
- .l2_prefetch_i ( lx_prefetch_drop[i] ),
- .l2_hit_i ( lx_hit_drop[i] ),
- // Top-level control outputs
- .master_select_o ( w_master_select[i] ),
- .input_stall_o ( aw_in_stall[i] ), // stall L1 AW input if request buffers full
- .output_stall_o ( aw_out_stall[i] ), // stall L1 AW hit forwarding if bypass not possible
- // B sender interface
- .b_drop_o ( b_drop[i] ),
- .b_done_i ( b_done[i] ),
- .id_o ( b_id_drop[i] ),
- .prefetch_o ( b_prefetch_drop[i] ),
- .hit_o ( b_hit_drop[i] ),
- // AXI W channel interfaces
- .s_axi4_wdata ( s_axi4_wdata[i] ),
- .s_axi4_wvalid ( s_axi4_wvalid[i] ),
- .s_axi4_wready ( s_axi4_wready[i] ),
- .s_axi4_wstrb ( s_axi4_wstrb[i] ),
- .s_axi4_wlast ( s_axi4_wlast[i] ),
- .s_axi4_wuser ( s_axi4_wuser[i] ),
- .m_axi4_wdata ( int_wdata[i] ),
- .m_axi4_wvalid ( int_wvalid[i] ),
- .m_axi4_wready ( int_wready[i] ),
- .m_axi4_wstrb ( int_wstrb[i] ),
- .m_axi4_wlast ( int_wlast[i] ),
- .m_axi4_wuser ( int_wuser[i] )
- );
- axi4_w_sender
- #(
- )
- u_w_sender_m0
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_wdata ( int_wdata[i] ),
- .s_axi4_wvalid ( int_m0_wvalid[i] ),
- .s_axi4_wready ( int_m0_wready[i] ),
- .s_axi4_wstrb ( int_wstrb[i] ),
- .s_axi4_wlast ( int_wlast[i] ),
- .s_axi4_wuser ( int_wuser[i] ),
- .m_axi4_wdata ( m0_axi4_wdata[i] ),
- .m_axi4_wvalid ( m0_axi4_wvalid[i] ),
- .m_axi4_wready ( m0_axi4_wready[i] ),
- .m_axi4_wstrb ( m0_axi4_wstrb[i] ),
- .m_axi4_wlast ( m0_axi4_wlast[i] ),
- .m_axi4_wuser ( m0_axi4_wuser[i] )
- );
- axi4_w_sender
- #(
- )
- u_w_sender_m1
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_wdata ( int_wdata[i] ),
- .s_axi4_wvalid ( int_m1_wvalid[i] ),
- .s_axi4_wready ( int_m1_wready[i] ),
- .s_axi4_wstrb ( int_wstrb[i] ),
- .s_axi4_wlast ( int_wlast[i] ),
- .s_axi4_wuser ( int_wuser[i] ),
- .m_axi4_wdata ( m1_axi4_wdata[i] ),
- .m_axi4_wvalid ( m1_axi4_wvalid[i] ),
- .m_axi4_wready ( m1_axi4_wready[i] ),
- .m_axi4_wstrb ( m1_axi4_wstrb[i] ),
- .m_axi4_wlast ( m1_axi4_wlast[i] ),
- .m_axi4_wuser ( m1_axi4_wuser[i] )
- );
- /*
- * Multiplexer to switch between the two output master ports on the write data (w) channel
- */
- always_comb begin
- /* Only one output can be selected at any time */
- if (w_master_select[i] == 1'b0) begin
- int_m0_wvalid[i] = int_wvalid[i];
- int_m1_wvalid[i] = 1'b0;
- int_wready[i] = int_m0_wready[i];
- end else begin
- int_m0_wvalid[i] = 1'b0;
- int_m1_wvalid[i] = int_wvalid[i];
- int_wready[i] = int_m1_wready[i];
- end
- end
- // }}}
- // Write Response channel (b) {{{
- /*
- * write response channel (b)
- *
- * ██╗ ██╗██████╗ ██╗████████╗███████╗ ██████╗ ███████╗███████╗██████╗
- * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔════╝██╔════╝██╔══██╗
- * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ██████╔╝█████╗ ███████╗██████╔╝
- * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██╔══██╗██╔══╝ ╚════██║██╔═══╝
- * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██║ ██║███████╗███████║██║
- * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝
- *
- */
- axi4_b_buffer
- #(
- )
- u_b_buffer_m0
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_bid ( int_m0_bid[i] ),
- .s_axi4_bresp ( int_m0_bresp[i] ),
- .s_axi4_bvalid ( int_m0_bvalid[i] ),
- .s_axi4_buser ( int_m0_buser[i] ),
- .s_axi4_bready ( int_m0_bready[i] ),
- .m_axi4_bid ( m0_axi4_bid[i] ),
- .m_axi4_bresp ( m0_axi4_bresp[i] ),
- .m_axi4_bvalid ( m0_axi4_bvalid[i] ),
- .m_axi4_buser ( m0_axi4_buser[i] ),
- .m_axi4_bready ( m0_axi4_bready[i] )
- );
- axi4_b_buffer
- #(
- )
- u_b_buffer_m1
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_bid ( int_m1_bid[i] ),
- .s_axi4_bresp ( int_m1_bresp[i] ),
- .s_axi4_bvalid ( int_m1_bvalid[i] ),
- .s_axi4_buser ( int_m1_buser[i] ),
- .s_axi4_bready ( int_m1_bready[i] ),
- .m_axi4_bid ( m1_axi4_bid[i] ),
- .m_axi4_bresp ( m1_axi4_bresp[i] ),
- .m_axi4_bvalid ( m1_axi4_bvalid[i] ),
- .m_axi4_buser ( m1_axi4_buser[i] ),
- .m_axi4_bready ( m1_axi4_bready[i] )
- );
- axi4_b_sender
- #(
- )
- u_b_sender
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .drop_i ( b_drop[i] ),
- .done_o ( b_done[i] ),
- .id_i ( b_id_drop[i] ),
- .prefetch_i ( b_prefetch_drop[i] ),
- .hit_i ( b_hit_drop[i] ),
- .s_axi4_bid ( s_axi4_bid[i] ),
- .s_axi4_bresp ( s_axi4_bresp[i] ),
- .s_axi4_bvalid ( s_axi4_bvalid[i] ),
- .s_axi4_buser ( s_axi4_buser[i] ),
- .s_axi4_bready ( s_axi4_bready[i] ),
- .m_axi4_bid ( int_bid[i] ),
- .m_axi4_bresp ( int_bresp[i] ),
- .m_axi4_bvalid ( int_bvalid[i] ),
- .m_axi4_buser ( int_buser[i] ),
- .m_axi4_bready ( int_bready[i] )
- );
- /*
- * Multiplexer to switch between the two output master ports on the write response (b) channel
- */
- always_comb begin
- /* Output 1 always gets priority, so if it has something to send connect
- it and let output 0 wait using rready = 0 */
- if (int_m1_bvalid[i] == 1'b1) begin
- int_m0_bready[i] = 1'b0;
- int_m1_bready[i] = int_bready[i];
- int_bid[i] = int_m1_bid[i];
- int_bresp[i] = int_m1_bresp[i];
- int_buser[i] = int_m1_buser[i];
- int_bvalid[i] = int_m1_bvalid[i];
- end else begin
- int_m0_bready[i] = int_bready[i];
- int_m1_bready[i] = 1'b0;
- int_bid[i] = int_m0_bid[i];
- int_bresp[i] = int_m0_bresp[i];
- int_buser[i] = int_m0_buser[i];
- int_bvalid[i] = int_m0_bvalid[i];
- end
- end
- // }}}
- // Read Address channel (ar) {{{
- /*
- * read address channel (ar)
- *
- * ██████╗ ███████╗ █████╗ ██████╗ █████╗ ██████╗ ██████╗ ██████╗
- * ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ██╔══██╗██╔══██╗██╔══██╗██╔══██╗
- * ██████╔╝█████╗ ███████║██║ ██║ ███████║██║ ██║██║ ██║██████╔╝
- * ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ██╔══██║██║ ██║██║ ██║██╔══██╗
- * ██║ ██║███████╗██║ ██║██████╔╝ ██║ ██║██████╔╝██████╔╝██║ ██║
- * ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝
- *
- */
- axi4_ar_buffer
- #(
- )
- u_ar_buffer
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_arid ( s_axi4_arid[i] ),
- .s_axi4_araddr ( s_axi4_araddr[i] ),
- .s_axi4_arvalid ( s_axi4_arvalid[i] ),
- .s_axi4_arready ( s_axi4_arready[i] ),
- .s_axi4_arlen ( s_axi4_arlen[i] ),
- .s_axi4_arsize ( s_axi4_arsize[i] ),
- .s_axi4_arburst ( s_axi4_arburst[i] ),
- .s_axi4_arlock ( s_axi4_arlock[i] ),
- .s_axi4_arprot ( s_axi4_arprot[i] ),
- .s_axi4_arcache ( s_axi4_arcache[i] ),
- .s_axi4_aruser ( s_axi4_aruser[i] ),
- .m_axi4_arid ( int_arid[i] ),
- .m_axi4_araddr ( int_araddr[i] ),
- .m_axi4_arvalid ( int_arvalid[i] ),
- .m_axi4_arready ( int_arready[i] ),
- .m_axi4_arlen ( int_arlen[i] ),
- .m_axi4_arsize ( int_arsize[i] ),
- .m_axi4_arburst ( int_arburst[i] ),
- .m_axi4_arlock ( int_arlock[i] ),
- .m_axi4_arprot ( int_arprot[i] ),
- .m_axi4_arcache ( int_arcache[i] ),
- .m_axi4_aruser ( int_aruser[i] )
- );
- axi4_ar_sender
- #(
- )
- u_ar_sender_m0
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .l1_done_o ( l1_m0_ar_done[i] ),
- .l1_accept_i ( l1_m0_ar_accept[i] ),
- .l1_drop_i ( l1_m0_ar_drop[i] ),
- .l1_save_i ( l1_m0_ar_save[i] ),
- .l2_done_o ( l2_m0_ar_done[i] ),
- .l2_accept_i ( l2_m0_ar_accept[i] ),
- .l2_drop_i ( l2_m0_ar_drop[i] ),
- .l2_sending_o ( l2_m0_ar_sending[i] ),
- .l1_araddr_i ( int_rtrans_addr[i] ),
- .l2_araddr_i ( l2_ar_addr[i] ),
- .s_axi4_arid ( int_arid[i] ),
- .s_axi4_arvalid ( int_m0_arvalid[i] ),
- .s_axi4_arready ( int_m0_arready[i] ),
- .s_axi4_arlen ( int_arlen[i] ),
- .s_axi4_arsize ( int_arsize[i] ),
- .s_axi4_arburst ( int_arburst[i] ),
- .s_axi4_arlock ( int_arlock[i] ),
- .s_axi4_arprot ( int_arprot[i] ),
- .s_axi4_arcache ( int_arcache[i] ),
- .s_axi4_aruser ( int_aruser[i] ),
- .m_axi4_arid ( m0_axi4_arid[i] ),
- .m_axi4_araddr ( m0_axi4_araddr[i] ),
- .m_axi4_arvalid ( m0_axi4_arvalid[i] ),
- .m_axi4_arready ( m0_axi4_arready[i] ),
- .m_axi4_arlen ( m0_axi4_arlen[i] ),
- .m_axi4_arsize ( m0_axi4_arsize[i] ),
- .m_axi4_arburst ( m0_axi4_arburst[i] ),
- .m_axi4_arlock ( m0_axi4_arlock[i] ),
- .m_axi4_arprot ( m0_axi4_arprot[i] ),
- .m_axi4_arcache ( ),
- .m_axi4_aruser ( m0_axi4_aruser[i] )
- );
- // The AXCACHE signals are set according to burstiness and cache coherence or statically
- // when not connected to ACP on Zynq (implemented below).
- assign m0_read_is_burst[i] = (m0_axi4_arlen[i] != {8{1'b0}}) && (m0_axi4_arburst[i] != 2'b00);
- `ifndef EN_ACP
- always_comb begin
- if ( (l2_m0_ar_sending[i] & l2_cache_coherent[i]) | int_rtrans_cache_coherent[i]) begin
- if (m0_read_is_burst[i]) begin
- m0_axi4_arcache[i] = 4'b1011;
- end else begin
- m0_axi4_arcache[i] = 4'b1111;
- end
- end else begin
- m0_axi4_arcache[i] = 4'b0011;
- end
- end
- `else
- assign m0_axi4_arcache[i] = 4'b0011;
- `endif
- axi4_ar_sender
- #(
- )
- u_ar_sender_m1
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .l1_done_o ( l1_m1_ar_done[i] ),
- .l1_accept_i ( l1_m1_ar_accept[i] ),
- .l1_drop_i ( l1_m1_ar_drop[i] ),
- .l1_save_i ( l1_m1_ar_save[i] ),
- .l2_done_o ( l2_m1_ar_done[i] ),
- .l2_accept_i ( l2_m1_ar_accept[i] ),
- .l2_drop_i ( l2_m1_ar_drop[i] ),
- .l2_sending_o ( ), // just helps to set axcache
- .l1_araddr_i ( int_rtrans_addr[i] ),
- .l2_araddr_i ( l2_ar_addr[i] ),
- .s_axi4_arid ( int_arid[i] ),
- .s_axi4_arvalid ( int_m1_arvalid[i] ),
- .s_axi4_arready ( int_m1_arready[i] ),
- .s_axi4_arlen ( int_arlen[i] ),
- .s_axi4_arsize ( int_arsize[i] ),
- .s_axi4_arburst ( int_arburst[i] ),
- .s_axi4_arlock ( int_arlock[i] ),
- .s_axi4_arprot ( int_arprot[i] ),
- .s_axi4_arcache ( int_arcache[i] ),
- .s_axi4_aruser ( int_aruser[i] ),
- .m_axi4_arid ( m1_axi4_arid[i] ),
- .m_axi4_araddr ( m1_axi4_araddr[i] ),
- .m_axi4_arvalid ( m1_axi4_arvalid[i] ),
- .m_axi4_arready ( m1_axi4_arready[i] ),
- .m_axi4_arlen ( m1_axi4_arlen[i] ),
- .m_axi4_arsize ( m1_axi4_arsize[i] ),
- .m_axi4_arburst ( m1_axi4_arburst[i] ),
- .m_axi4_arlock ( m1_axi4_arlock[i] ),
- .m_axi4_arprot ( m1_axi4_arprot[i] ),
- .m_axi4_arcache ( ),
- .m_axi4_aruser ( m1_axi4_aruser[i] )
- );
- // The AXCACHE signals are set according to burstiness and cache coherence or statically
- // when not connected to ACP on Zynq (implemented below).
- assign m1_read_is_burst[i] = (m1_axi4_arlen[i] != {8{1'b0}}) && (m1_axi4_arburst[i] != 2'b00);
- `ifdef EN_ACP
- always_comb begin
- if (m1_read_is_burst[i]) begin
- m1_axi4_arcache[i] = 4'b1011;
- end else begin
- m1_axi4_arcache[i] = 4'b1111;
- end
- end
- `else
- assign m1_axi4_arcache[i] = 4'b0011;
- `endif
- // }}}
- // Read Response channel (r) {{{
- /*
- * read response channel (r)
- *
- * ██████╗ ███████╗ █████╗ ██████╗ ██████╗ ███████╗███████╗██████╗
- * ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ██╔══██╗██╔════╝██╔════╝██╔══██╗
- * ██████╔╝█████╗ ███████║██║ ██║ ██████╔╝█████╗ ███████╗██████╔╝
- * ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ██╔══██╗██╔══╝ ╚════██║██╔═══╝
- * ██║ ██║███████╗██║ ██║██████╔╝ ██║ ██║███████╗███████║██║
- * ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝
- *
- */
- axi4_r_buffer
- #(
- )
- u_r_buffer_m0
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_rid ( int_m0_rid[i] ),
- .s_axi4_rresp ( int_m0_rresp[i] ),
- .s_axi4_rdata ( int_m0_rdata[i] ),
- .s_axi4_rlast ( int_m0_rlast[i] ),
- .s_axi4_rvalid ( int_m0_rvalid[i] ),
- .s_axi4_ruser ( int_m0_ruser[i] ),
- .s_axi4_rready ( int_m0_rready[i] ),
- .m_axi4_rid ( m0_axi4_rid[i] ),
- .m_axi4_rresp ( m0_axi4_rresp[i] ),
- .m_axi4_rdata ( m0_axi4_rdata[i] ),
- .m_axi4_rlast ( m0_axi4_rlast[i] ),
- .m_axi4_rvalid ( m0_axi4_rvalid[i] ),
- .m_axi4_ruser ( m0_axi4_ruser[i] ),
- .m_axi4_rready ( m0_axi4_rready[i] )
- );
- axi4_r_buffer
- #(
- )
- u_r_buffer_m1
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .s_axi4_rid ( int_m1_rid[i] ),
- .s_axi4_rresp ( int_m1_rresp[i] ),
- .s_axi4_rdata ( int_m1_rdata[i] ),
- .s_axi4_rlast ( int_m1_rlast[i] ),
- .s_axi4_rvalid ( int_m1_rvalid[i] ),
- .s_axi4_ruser ( int_m1_ruser[i] ),
- .s_axi4_rready ( int_m1_rready[i] ),
- .m_axi4_rid ( m1_axi4_rid[i] ),
- .m_axi4_rresp ( m1_axi4_rresp[i] ),
- .m_axi4_rdata ( m1_axi4_rdata[i] ),
- .m_axi4_rlast ( m1_axi4_rlast[i] ),
- .m_axi4_rvalid ( m1_axi4_rvalid[i] ),
- .m_axi4_ruser ( m1_axi4_ruser[i] ),
- .m_axi4_rready ( m1_axi4_rready[i] )
- );
- axi4_r_sender
- #(
- )
- u_r_sender
- (
- .axi4_aclk ( Clk_CI ),
- .axi4_arstn ( Rst_RBI ),
- .drop_i ( lx_r_drop[i] ),
- .drop_len_i ( lx_len_drop[i] ),
- .done_o ( lx_r_done[i] ),
- .id_i ( lx_id_drop[i] ),
- .prefetch_i ( lx_prefetch_drop[i] ),
- .hit_i ( lx_hit_drop[i] ),
- .s_axi4_rid ( s_axi4_rid[i] ),
- .s_axi4_rresp ( s_axi4_rresp[i] ),
- .s_axi4_rdata ( s_axi4_rdata[i] ),
- .s_axi4_rlast ( s_axi4_rlast[i] ),
- .s_axi4_rvalid ( s_axi4_rvalid[i] ),
- .s_axi4_ruser ( s_axi4_ruser[i] ),
- .s_axi4_rready ( s_axi4_rready[i] ),
- .m_axi4_rid ( int_rid[i] ),
- .m_axi4_rresp ( int_rresp[i] ),
- .m_axi4_rdata ( int_rdata[i] ),
- .m_axi4_rlast ( int_rlast[i] ),
- .m_axi4_rvalid ( int_rvalid[i] ),
- .m_axi4_ruser ( int_ruser[i] ),
- .m_axi4_rready ( int_rready[i] )
- );
- /*
- * Multiplexer to switch between the two output master ports on the read response(r) channel
- *
- * Do not perform read burst interleaving as the DMA does not support it. This means we can only
- * switch between the two masters upon sending rlast or when idle.
- *
- * However, if the downstream already performs burst interleaving, this cannot be undone here.
- * Also, the downstream may interleave a burst reponse with a single-beat transaction. In this
- * case, the FSM below falls out of the burst mode. To avoid it performing burst interleaving
- * after such an event, it gives priority to the master which received the last burst in case
- * both have a have a burst ready (rvalid).
- *
- * Order of priority:
- * 1. Ongoing burst transaction
- * 2. Single-beat transaction on Master 1.
- * 3. Single-beat transaction on Master 0.
- * 4. Burst transaction on master that received the last burst.
- */
- // Select signal
- always_ff @(posedge Clk_CI) begin
- if (Rst_RBI == 0) begin
- RRespSel_SP[i] <= 1'b0;
- end else begin
- RRespSel_SP[i] <= RRespSel_SN[i];
- end
- end
- // FSM
- always_comb begin : RRespMuxFsm
- RRespMuxCtrl_SN[i] = RRespMuxCtrl_SP[i];
- RRespSel_SN[i] = RRespSel_SP[i];
- RRespBurst_S[i] = 1'b0;
- RRespSelIm_S[i] = 1'b0;
- unique case (RRespMuxCtrl_SP[i])
- IDLE: begin
- // immediately forward single-beat transactions
- if (int_m1_rvalid[i] && int_m1_rlast[i])
- RRespSelIm_S[i] = 1'b1;
- else if (int_m0_rvalid[i] && int_m0_rlast[i])
- RRespSelIm_S[i] = 1'b0;
- // bursts - they also start immediately
- else if (int_m1_rvalid[i] || int_m0_rvalid[i]) begin
- RRespMuxCtrl_SN[i] = BUSY;
- // in case both are ready, continue with the master that had the last burst
- if (int_m1_rvalid[i] && int_m0_rvalid[i]) begin
- RRespSel_SN[i] = RRespSel_SP[i];
- RRespSelIm_S[i] = RRespSel_SP[i];
- end else if (int_m1_rvalid[i]) begin
- RRespSel_SN[i] = 1'b1;
- RRespSelIm_S[i] = 1'b1;
- end else begin
- RRespSel_SN[i] = 1'b0;
- RRespSelIm_S[i] = 1'b0;
- end
- end
- end
- BUSY: begin
- RRespBurst_S[i] = 1'b1;
- // detect last handshake of currently ongoing transfer
- if (int_rvalid[i] && int_rready[i] && int_rlast[i])
- RRespMuxCtrl_SN[i] = IDLE;
- end
- default: begin
- RRespMuxCtrl_SN[i] = IDLE;
- end
- endcase
- end
- // FSM state
- always_ff @(posedge Clk_CI) begin
- if (Rst_RBI == 0) begin
- RRespMuxCtrl_SP[i] <= IDLE;
- end else begin
- RRespMuxCtrl_SP[i] <= RRespMuxCtrl_SN[i];
- end
- end
- // Actual multiplexer
- always_comb begin
- if ( (RRespBurst_S[i] && RRespSel_SP[i]) || (!RRespBurst_S[i] && RRespSelIm_S[i]) ) begin
- int_m0_rready[i] = 1'b0;
- int_m1_rready[i] = int_rready[i];
- int_rid[i] = int_m1_rid[i];
- int_rresp[i] = int_m1_rresp[i];
- int_rdata[i] = int_m1_rdata[i];
- int_rlast[i] = int_m1_rlast[i];
- int_ruser[i] = int_m1_ruser[i];
- int_rvalid[i] = int_m1_rvalid[i];
- end else begin
- int_m0_rready[i] = int_rready[i];
- int_m1_rready[i] = 1'b0;
- int_rid[i] = int_m0_rid[i];
- int_rresp[i] = int_m0_rresp[i];
- int_rdata[i] = int_m0_rdata[i];
- int_rlast[i] = int_m0_rlast[i];
- int_ruser[i] = int_m0_ruser[i];
- int_rvalid[i] = int_m0_rvalid[i];
- end
- end
- end // BUF & SEND
- // }}}
- endgenerate // BUF & SEND }}}
- // Log {{{
-`ifdef RAB_AX_LOG_EN
- AxiBramLogger
- #(
- )
- u_aw_logger
- (
- .Clk_CI ( NonGatedClk_CI ),
- .TimestampClk_CI ( Clk_CI ),
- .Rst_RBI ( Rst_RBI ),
- .AxiValid_SI ( s_axi4_awvalid[1] ),
- .AxiReady_SI ( s_axi4_awready[1] ),
- .AxiId_DI ( s_axi4_awid[1] ),
- .AxiAddr_DI ( s_axi4_awaddr[1] ),
- .AxiLen_DI ( s_axi4_awlen[1] ),
- .Clear_SI ( AwLogClr_SI ),
- .LogEn_SI ( LogEn_SI ),
- .Full_SO ( int_aw_log_full ),
- .Ready_SO ( AwLogRdy_SO ),
- .Bram_PS ( AwBram_PS )
- );
- AxiBramLogger
- #(
- )
- u_ar_logger
- (
- .Clk_CI ( NonGatedClk_CI ),
- .TimestampClk_CI ( Clk_CI ),
- .Rst_RBI ( Rst_RBI ),
- .AxiValid_SI ( s_axi4_arvalid[1] ),
- .AxiReady_SI ( s_axi4_arready[1] ),
- .AxiId_DI ( s_axi4_arid[1] ),
- .AxiAddr_DI ( s_axi4_araddr[1] ),
- .AxiLen_DI ( s_axi4_arlen[1] ),
- .Clear_SI ( ArLogClr_SI ),
- .LogEn_SI ( LogEn_SI ),
- .Full_SO ( int_ar_log_full ),
- .Ready_SO ( ArLogRdy_SO ),
- .Bram_PS ( ArBram_PS )
- );
- // }}}
- // RAB Core {{{
- // ██████╗ █████╗ ██████╗ ██████╗ ██████╗ ██████╗ ███████╗
- // ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔═══██╗██╔══██╗██╔════╝
- // ██████╔╝███████║██████╔╝ ██║ ██║ ██║██████╔╝█████╗
- // ██╔══██╗██╔══██║██╔══██╗ ██║ ██║ ██║██╔══██╗██╔══╝
- // ██║ ██║██║ ██║██████╔╝ ╚██████╗╚██████╔╝██║ ██║███████╗
- // ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝
- //
- /*
- * rab_core
- *
- * The rab core translates addresses. It has two ports, which can be used
- * independently, however they will compete for time internally, as lookups
- * are serialized.
- *
- * type is the read(0) or write(1) used to check the protection flags. If they
- * don't match an interrupt is created on the int_prot line.
- */
- rab_core
- #(
- .N_L2_SETS ( N_L2_SETS ),
- )
- u_rab_core
- (
- .Clk_CI ( Clk_CI ),
- .Rst_RBI ( Rst_RBI ),
- // Config IF
- .s_axi_awaddr ( s_axi4lite_awaddr ),
- .s_axi_awvalid ( s_axi4lite_awvalid ),
- .s_axi_awready ( s_axi4lite_awready ),
- .s_axi_wdata ( s_axi4lite_wdata ),
- .s_axi_wstrb ( s_axi4lite_wstrb ),
- .s_axi_wvalid ( s_axi4lite_wvalid ),
- .s_axi_wready ( s_axi4lite_wready ),
- .s_axi_bresp ( s_axi4lite_bresp ),
- .s_axi_bvalid ( s_axi4lite_bvalid ),
- .s_axi_bready ( s_axi4lite_bready ),
- .s_axi_araddr ( s_axi4lite_araddr ),
- .s_axi_arvalid ( s_axi4lite_arvalid ),
- .s_axi_arready ( s_axi4lite_arready ),
- .s_axi_rready ( s_axi4lite_rready ),
- .s_axi_rdata ( s_axi4lite_rdata ),
- .s_axi_rresp ( s_axi4lite_rresp ),
- .s_axi_rvalid ( s_axi4lite_rvalid ),
- // L1 miss info outputs -> L2 TLB arbitration
- .int_miss ( rab_miss ),
- .int_multi ( rab_multi ),
- .int_prot ( rab_prot ),
- .int_prefetch ( rab_prefetch ),
- .int_mhf_full ( int_mhf_full ),
- // L1 transaction info outputs -> L2 TLB arbitration
- .int_axaddr_o ( L1OutAddr_D ),
- .int_axid_o ( L1OutId_D ),
- .int_axlen_o ( L1OutLen_D ),
- .int_axuser_o ( L1OutUser_D ),
- // Write Req IF
- .port1_addr ( int_awaddr ),
- .port1_id ( int_awid ),
- .port1_len ( int_awlen ),
- .port1_size ( int_awsize ),
- .port1_addr_valid ( int_awvalid & ~aw_in_stall ), // avoid the FSM accepting new AW requests
- .port1_type ( {N_PORTS{1'b1}} ),
- .port1_user ( int_awuser ),
- .port1_sent ( int_wtrans_sent ), // signal done to L1 FSM
- .port1_out_addr ( int_wtrans_addr ),
- .port1_cache_coherent ( int_wtrans_cache_coherent ),
- .port1_accept ( int_wtrans_accept ),
- .port1_drop ( int_wtrans_drop ),
- .port1_miss ( int_wtrans_miss ),
- // Read Req IF
- .port2_addr ( int_araddr ),
- .port2_id ( int_arid ),
- .port2_len ( int_arlen ),
- .port2_size ( int_arsize ),
- .port2_addr_valid ( int_arvalid ),
- .port2_type ( {N_PORTS{1'b0}} ),
- .port2_user ( int_aruser ),
- .port2_sent ( int_rtrans_sent ), // signal done to L1 FSM
- .port2_out_addr ( int_rtrans_addr ),
- .port2_cache_coherent ( int_rtrans_cache_coherent ),
- .port2_accept ( int_rtrans_accept ),
- .port2_drop ( int_rtrans_drop ),
- .port2_miss ( int_rtrans_miss ),
- // L2 miss info inputs -> axi_rab_cfg
- .miss_l2_i ( L2Miss_S ),
- .miss_l2_addr_i ( L2OutInAddr_DP ),
- .miss_l2_id_i ( L2OutId_DP ),
- .miss_l2_user_i ( L2OutUser_DP ),
- // L2 config outputs
- .wdata_l2_o ( L2CfgWData_D ),
- .waddr_l2_o ( L2CfgWAddr_D ),
- .wren_l2_o ( L2CfgWE_S )
- );
- // }}}
- // AX SPLITS {{{
- // █████╗ ██╗ ██╗ ███████╗██████╗ ██╗ ██╗████████╗
- // ██╔══██╗╚██╗██╔╝ ██╔════╝██╔══██╗██║ ██║╚══██╔══╝
- // ███████║ ╚███╔╝ ███████╗██████╔╝██║ ██║ ██║
- // ██╔══██║ ██╔██╗ ╚════██║██╔═══╝ ██║ ██║ ██║
- // ██║ ██║██╔╝ ██╗ ███████║██║ ███████╗██║ ██║
- // ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚══════╝╚═╝ ╚═╝
- //
- /**
- * Multiplex the two output master ports of the Read Address and Write Address (AR/AW) channels.
- *
- * Use the `int_xmaster_select` signal to route the signals to either Master 0 (to memory) or
- * Master 1 (to ACP). In case of an L1 miss: Route the signals to both masters. They shall be
- * saved until the L2 outputs are available.
- */
- generate for (i = 0; i < N_PORTS; i++) begin : AX_SPLIT
- /*
- * When accepting L1 transactions, we must just do so on the selected master. Drop requests must
- * be performed on any one of the two masters. Save requests must be performed by both masters.
- */
- always_comb begin : AW_L1_SPLIT
- // TLB handshake
- l1_m0_aw_accept[i] = 1'b0;
- l1_m1_aw_accept[i] = 1'b0;
- l1_m0_aw_drop[i] = 1'b0;
- l1_m1_aw_drop[i] = 1'b0;
- l1_m0_aw_save[i] = 1'b0;
- l1_m1_aw_save[i] = 1'b0;
- l1_mx_aw_done[i] = 1'b0;
- // AXI sender input handshake
- int_m0_awvalid[i] = 1'b0;
- int_m1_awvalid[i] = 1'b0;
- int_awready[i] = 1'b0;
- // accept on selected master only
- if (l1_aw_accept[i]) begin
- if (int_wmaster_select[i]) begin
- l1_m1_aw_accept[i] = 1'b1;
- l1_mx_aw_done[i] = l1_m1_aw_done[i];
- int_m1_awvalid[i] = int_awvalid[i];
- int_awready[i] = int_m1_awready[i];
- end else begin
- l1_m0_aw_accept[i] = 1'b1;
- l1_mx_aw_done[i] = l1_m0_aw_done[i];
- int_m0_awvalid[i] = int_awvalid[i];
- int_awready[i] = int_m0_awready[i];
- end
- // drop on Master 0 only
- end else if (l1_aw_drop[i]) begin
- l1_m0_aw_drop[i] = 1'b1;
- l1_mx_aw_done[i] = l1_m0_aw_done[i];
- int_m0_awvalid[i] = int_awvalid[i];
- int_awready[i] = l1_m0_aw_done[i];
- // save on both masters
- end else if (l1_aw_save[i]) begin
- // split save
- l1_m0_aw_save[i] = ~l1_m0_aw_done_SP[i];
- l1_m1_aw_save[i] = ~l1_m1_aw_done_SP[i];
- // combine done
- l1_mx_aw_done[i] = l1_m0_aw_done_SP[i] & l1_m1_aw_done_SP[i];
- int_m0_awvalid[i] = int_awvalid[i];
- int_m1_awvalid[i] = int_awvalid[i];
- int_awready[i] = l1_mx_aw_done[i];
- end
- end
- // signal back to handshake splitter
- assign l1_aw_done[i] = l1_mx_aw_done[i];
- always_ff @(posedge Clk_CI) begin : L1_MX_AW_DONE_REG
- if (Rst_RBI == 0) begin
- l1_m0_aw_done_SP[i] <= 1'b0;
- l1_m1_aw_done_SP[i] <= 1'b0;
- end else if (l1_mx_aw_done[i]) begin
- l1_m0_aw_done_SP[i] <= 1'b0;
- l1_m1_aw_done_SP[i] <= 1'b0;
- end else begin
- l1_m0_aw_done_SP[i] <= l1_m0_aw_done_SP[i] | l1_m0_aw_done[i];
- l1_m1_aw_done_SP[i] <= l1_m1_aw_done_SP[i] | l1_m1_aw_done[i];
- end
- end
- /*
- * When accepting L2 transactions, we must drop the corresponding transaction from the other
- * master to make it available again for save requests from L1_DROP_SAVE.
- */
- always_comb begin : AW_L2_SPLIT
- l2_m0_aw_accept[i] = 1'b0;
- l2_m1_aw_accept[i] = 1'b0;
- l2_m0_aw_drop[i] = 1'b0;
- l2_m1_aw_drop[i] = 1'b0;
- // de-assert request signals individually upon handshakes
- if (l2_aw_accept[i]) begin
- if (l2_master_select[i]) begin
- l2_m1_aw_accept[i] = ~l2_m1_aw_done_SP[i];
- l2_m0_aw_drop[i] = ~l2_m0_aw_done_SP[i];
- end else begin
- l2_m0_aw_accept[i] = ~l2_m0_aw_done_SP[i];
- l2_m1_aw_drop[i] = ~l2_m1_aw_done_SP[i];
- end
- end else begin
- l2_m0_aw_drop[i] = ~l2_m0_aw_done_SP[i] ? l2_aw_drop[i] : 1'b0;
- l2_m1_aw_drop[i] = ~l2_m1_aw_done_SP[i] ? l2_aw_drop[i] : 1'b0;
- end
- // combine done
- l2_mx_aw_done[i] = l2_m0_aw_done_SP[i] & l2_m1_aw_done_SP[i];
- l2_aw_done[i] = l2_mx_aw_done[i];
- end
- always_ff @(posedge Clk_CI) begin : L2_MX_AW_DONE_REG
- if (Rst_RBI == 0) begin
- l2_m0_aw_done_SP[i] <= 1'b0;
- l2_m1_aw_done_SP[i] <= 1'b0;
- end else if (l2_mx_aw_done[i]) begin
- l2_m0_aw_done_SP[i] <= 1'b0;
- l2_m1_aw_done_SP[i] <= 1'b0;
- end else begin
- l2_m0_aw_done_SP[i] <= l2_m0_aw_done_SP[i] | l2_m0_aw_done[i];
- l2_m1_aw_done_SP[i] <= l2_m1_aw_done_SP[i] | l2_m1_aw_done[i];
- end
- end
- /*
- * When accepting L1 transactions, we must just do so on the selected master. Drop requests must
- * be performed on any one of the two masters. Save requests must be performed by both masters.
- */
- always_comb begin : AR_L1_SPLIT
- // TLB handshake
- l1_m0_ar_accept[i] = 1'b0;
- l1_m1_ar_accept[i] = 1'b0;
- l1_m0_ar_drop[i] = 1'b0;
- l1_m1_ar_drop[i] = 1'b0;
- l1_m0_ar_save[i] = 1'b0;
- l1_m1_ar_save[i] = 1'b0;
- l1_mx_ar_done[i] = 1'b0;
- // AXI sender input handshake
- int_m0_arvalid[i] = 1'b0;
- int_m1_arvalid[i] = 1'b0;
- int_arready[i] = 1'b0;
- // accept on selected master only
- if (l1_ar_accept[i]) begin
- if (int_rmaster_select[i]) begin
- l1_m1_ar_accept[i] = 1'b1;
- l1_mx_ar_done[i] = l1_m1_ar_done[i];
- int_m1_arvalid[i] = int_arvalid[i];
- int_arready[i] = int_m1_arready[i];
- end else begin
- l1_m0_ar_accept[i] = 1'b1;
- l1_mx_ar_done[i] = l1_m0_ar_done[i];
- int_m0_arvalid[i] = int_arvalid[i];
- int_arready[i] = int_m0_arready[i];
- end
- // drop on Master 0 only
- end else if (l1_ar_drop[i]) begin
- l1_m0_ar_drop[i] = 1'b1;
- l1_mx_ar_done[i] = l1_m0_ar_done[i];
- int_m0_arvalid[i] = int_arvalid[i];
- int_arready[i] = l1_m0_ar_done[i];
- // save on both masters
- end else if (l1_ar_save[i]) begin
- // split save
- l1_m0_ar_save[i] = ~l1_m0_ar_done_SP[i];
- l1_m1_ar_save[i] = ~l1_m1_ar_done_SP[i];
- // combine done
- l1_mx_ar_done[i] = l1_m0_ar_done_SP[i] & l1_m1_ar_done_SP[i];
- int_m0_arvalid[i] = int_arvalid[i];
- int_m1_arvalid[i] = int_arvalid[i];
- int_arready[i] = l1_mx_ar_done[i];
- end
- end
- // signal back to handshake splitter
- assign l1_ar_done[i] = l1_mx_ar_done[i];
- always_ff @(posedge Clk_CI) begin : L1_MX_AR_DONE_REG
- if (Rst_RBI == 0) begin
- l1_m0_ar_done_SP[i] <= 1'b0;
- l1_m1_ar_done_SP[i] <= 1'b0;
- end else if (l1_mx_ar_done[i]) begin
- l1_m0_ar_done_SP[i] <= 1'b0;
- l1_m1_ar_done_SP[i] <= 1'b0;
- end else begin
- l1_m0_ar_done_SP[i] <= l1_m0_ar_done_SP[i] | l1_m0_ar_done[i];
- l1_m1_ar_done_SP[i] <= l1_m1_ar_done_SP[i] | l1_m1_ar_done[i];
- end
- end
- /*
- * When accepting L2 transactions, we must drop the corresponding transaction from the other
- * master to make it available again for save requests from L1_DROP_SAVE.
- */
- always_comb begin : AR_L2_SPLIT
- l2_m0_ar_accept[i] = 1'b0;
- l2_m1_ar_accept[i] = 1'b0;
- l2_m0_ar_drop[i] = 1'b0;
- l2_m1_ar_drop[i] = 1'b0;
- // de-assert request signals individually upon handshakes
- if (l2_ar_accept[i]) begin
- if (l2_master_select[i]) begin
- l2_m1_ar_accept[i] = ~l2_m1_ar_done_SP[i];
- l2_m0_ar_drop[i] = ~l2_m0_ar_done_SP[i];
- end else begin
- l2_m0_ar_accept[i] = ~l2_m0_ar_done_SP[i];
- l2_m1_ar_drop[i] = ~l2_m1_ar_done_SP[i];
- end
- end else if (l2_ar_drop[i]) begin
- l2_m0_ar_drop[i] = ~l2_m0_ar_done_SP[i] ? l2_ar_drop[i] : 1'b0;
- l2_m1_ar_drop[i] = ~l2_m1_ar_done_SP[i] ? l2_ar_drop[i] : 1'b0;
- end
- // combine done
- l2_mx_ar_done[i] = l2_m0_ar_done_SP[i] & l2_m1_ar_done_SP[i];
- l2_ar_done[i] = l2_mx_ar_done[i];
- end
- always_ff @(posedge Clk_CI) begin : L2_MX_AR_DONE_REG
- if (Rst_RBI == 0) begin
- l2_m0_ar_done_SP[i] <= 1'b0;
- l2_m1_ar_done_SP[i] <= 1'b0;
- end else if (l2_mx_ar_done[i]) begin
- l2_m0_ar_done_SP[i] <= 1'b0;
- l2_m1_ar_done_SP[i] <= 1'b0;
- end else begin
- l2_m0_ar_done_SP[i] <= l2_m0_ar_done_SP[i] | l2_m0_ar_done[i];
- l2_m1_ar_done_SP[i] <= l2_m1_ar_done_SP[i] | l2_m1_ar_done[i];
- end
- end
- end // AX_SPLIT
- endgenerate // AX_SPLIT
- // }}}
- // ██╗ ██╗███████╗ ███████╗██████╗ ██╗ ██╗████████╗
- // ██║ ██║██╔════╝ ██╔════╝██╔══██╗██║ ██║╚══██╔══╝
- // ███████║███████╗ ███████╗██████╔╝██║ ██║ ██║
- // ██╔══██║╚════██║ ╚════██║██╔═══╝ ██║ ██║ ██║
- // ██║ ██║███████║ ███████║██║ ███████╗██║ ██║
- // ╚═╝ ╚═╝╚══════╝ ╚══════╝╚═╝ ╚══════╝╚═╝ ╚═╝
- //
- /*
- * We need to perform combined handshakes with multiple AXI modules
- * upon transactions drops, accepts, saves etc. from two TLBs.
- */
- generate for (i = 0; i < N_PORTS; i++) begin : HANDSHAKE_SPLIT
- assign l1_xw_accept[i] = int_wtrans_accept[i] & ~aw_out_stall[i];
- assign int_wtrans_sent[i] = l1_xw_done[i];
- assign l1_ar_accept[i] = int_rtrans_accept[i];
- assign int_rtrans_sent[i] = l1_ar_done[i];
- /*
- * L1 AW sender + W buffer handshake split
- */
- // forward
- assign l1_aw_accept[i] = l1_xw_accept[i] & ~l1_aw_done_SP[i];
- assign l1_w_accept[i] = l1_xw_accept[i] & ~l1_w_done_SP[i];
- assign l1_aw_save[i] = l1_xw_save[i] & ~l1_aw_done_SP[i];
- assign l1_w_save[i] = l1_xw_save[i] & ~l1_w_done_SP[i];
- assign l1_aw_drop[i] = l1_xw_drop[i] & ~l1_aw_done_SP[i];
- assign l1_w_drop[i] = l1_xw_drop[i] & ~l1_w_done_SP[i];
- // backward
- assign l1_xw_done[i] = l1_aw_done_SP[i] & l1_w_done_SP[i];
- always_ff @(posedge Clk_CI) begin : L1_XW_HS_SPLIT
- if (Rst_RBI == 0) begin
- l1_aw_done_SP[i] <= 1'b0;
- l1_w_done_SP[i] <= 1'b0;
- end else if (l1_xw_done[i]) begin
- l1_aw_done_SP[i] <= 1'b0;
- l1_w_done_SP[i] <= 1'b0;
- end else begin
- l1_aw_done_SP[i] <= l1_aw_done_SP[i] | l1_aw_done[i];
- l1_w_done_SP[i] <= l1_w_done_SP[i] | l1_w_done[i];
- end
- end
- if (ENABLE_L2TLB[i] == 1) begin : L2_HS_SPLIT
- /*
- * L1 AR sender + R sender handshake split
- *
- * AR and R do not need to be strictly in sync. We thus use separate handshakes.
- * But the handshake signals for the R sender are multiplexed with the those for
- * the L2. However, L2_ACCEPT_DROP_SAVE has always higher priority.
- */
- assign lx_r_drop[i] = l2_r_drop[i] | l1_r_drop[i];
- assign l1_r_done[i] = l2_r_drop[i] ? 1'b0 : lx_r_done[i];
- assign l2_r_done[i] = l2_r_drop[i] ? lx_r_done[i] : 1'b0;
- /*
- * L2 AW sender + W buffer handshake split
- */
- // forward
- assign l2_aw_accept[i] = l2_xw_accept[i] & ~l2_aw_done_SP[i];
- assign l2_w_accept[i] = l2_xw_accept[i] & ~l2_w_done_SP[i];
- assign l2_aw_drop[i] = l2_xw_drop[i] & ~l2_aw_done_SP[i];
- assign l2_w_drop[i] = l2_xw_drop[i] & ~l2_w_done_SP[i];
- // backward
- assign l2_xw_done[i] = l2_aw_done_SP[i] & l2_w_done_SP[i];
- always_ff @(posedge Clk_CI) begin : L2_XW_HS_SPLIT
- if (Rst_RBI == 0) begin
- l2_aw_done_SP[i] <= 1'b0;
- l2_w_done_SP[i] <= 1'b0;
- end else if (l2_xw_done[i]) begin
- l2_aw_done_SP[i] <= 1'b0;
- l2_w_done_SP[i] <= 1'b0;
- end else begin
- l2_aw_done_SP[i] <= l2_aw_done_SP[i] | l2_aw_done[i];
- l2_w_done_SP[i] <= l2_w_done_SP[i] | l2_w_done[i];
- end
- end
- /*
- * L2 AR + R sender handshake split
- */
- // forward
- assign l2_ar_drop[i] = l2_xr_drop[i] & ~l2_ar_done_SP[i];
- assign l2_r_drop[i] = l2_xr_drop[i] & ~l2_r_done_SP[i];
- // backward - make sure to always clear L2_XR_HS_SPLIT
- always_comb begin
- if (l2_xr_drop[i]) begin
- l2_xr_done[i] = l2_ar_done_SP[i] & l2_r_done_SP[i];
- end else begin
- l2_xr_done[i] = l2_ar_done_SP[i];
- end
- end
- always_ff @(posedge Clk_CI) begin : L2_XR_HS_SPLIT
- if (Rst_RBI == 0) begin
- l2_ar_done_SP[i] <= 1'b0;
- l2_r_done_SP[i] <= 1'b0;
- end else if (l2_xr_done[i]) begin
- l2_ar_done_SP[i] <= 1'b0;
- l2_r_done_SP[i] <= 1'b0;
- end else begin
- l2_ar_done_SP[i] <= l2_ar_done_SP[i] | l2_ar_done[i];
- l2_r_done_SP[i] <= l2_r_done_SP[i] | l2_r_done[i];
- end
- end
- end else begin // if (ENABLE_L2TLB[i] == 1)
- assign lx_r_drop[i] = l1_r_drop[i];
- assign l1_r_done[i] = lx_r_done[i];
- assign l2_aw_accept[i] = 1'b0;
- assign l2_w_accept[i] = 1'b0;
- assign l2_aw_drop[i] = 1'b0;
- assign l2_w_drop[i] = 1'b0;
- assign l2_xw_done[i] = 1'b0;
- assign l2_aw_done_SP[i] = 1'b0;
- assign l2_w_done_SP[i] = 1'b0;
- assign l2_ar_accept[i] = 1'b0;
- assign l2_ar_drop[i] = 1'b0;
- assign l2_r_drop[i] = 1'b0;
- assign l2_xr_done[i] = 1'b0;
- assign l2_r_done[i] = 1'b0;
- assign l2_ar_done_SP[i] = 1'b0;
- assign l2_r_done_SP[i] = 1'b0;
- end // if (ENABLE_L2TLB[i] == 1)
- endgenerate // HANDSHAKE_SPLIT
- // }}}
- // L2 TLB {{{
- // ██╗ ██████╗ ████████╗██╗ ██████╗
- // ██║ ╚════██╗ ╚══██╔══╝██║ ██╔══██╗
- // ██║ █████╔╝ ██║ ██║ ██████╔╝
- // ██║ ██╔═══╝ ██║ ██║ ██╔══██╗
- // ███████╗███████╗ ██║ ███████╗██████╔╝
- // ╚══════╝╚══════╝ ╚═╝ ╚══════╝╚═════╝
- //
- /*
- * l2_tlb
- *
- * The L2 TLB translates addresses upon misses in the L1 TLB (rab_core).
- *
- * It supports one ongoing translation at a time. If an L1 miss occurs while the L2 is busy,
- * the L1 is stalled untill the L2 is available again.
- *
- */
- generate for (i = 0; i < N_PORTS; i++) begin : L2_TLB
- if (ENABLE_L2TLB[i] == 1) begin : L2_TLB
- /*
- * L1 output selector
- */
- assign L1OutRwType_D[i] = int_wtrans_drop[i] ? 1'b1 : 1'b0;
- assign L1OutProt_D[i] = rab_prot[i];
- assign L1OutMulti_D[i] = rab_multi[i];
- /*
- * L1 output control + L1_DROP_BUF, L2_IN_BUF management
- *
- * Forward the L1 drop request to AR/AW sender modules if
- * 1. the transactions needs to be dropped (L1 multi, prot, prefetch), or
- * 2. if a lookup in the L2 TLB is required (L1 miss) and the input buffer is not full.
- *
- * The AR/AW senders do not support more than 1 oustanding L1 miss. The push back towards
- * the upstream is realized by not accepting the save request (saving the L1 transaction)
- * in the senders as long as the L2 TLB is busy or has valid output. This ultimately
- * blocks the L1 TLB.
- *
- * Together with the AW drop/save, we also perform the W drop/save as AW and W need to
- * absolutely remain in order. In contrast, the R drop is performed
- */
- always_comb begin : L1_DROP_SAVE
- l1_ar_drop[i] = 1'b0;
- l1_ar_save[i] = 1'b0;
- l1_xw_drop[i] = 1'b0;
- l1_xw_save[i] = 1'b0;
- l1_id_drop[i] = L1OutId_D[i];
- l1_len_drop[i] = L1OutLen_D[i];
- l1_prefetch_drop[i] = rab_prefetch[i];
- l1_hit_drop[i] = 1'b1; // there are no drops for L1 misses
- L1DropEn_S[i] = 1'b0;
- L2InEn_S[i] = 1'b0;
- if ( rab_prot[i] | rab_multi[i] | rab_prefetch[i] ) begin
- // 1. Drop
- l1_ar_drop[i] = int_rtrans_drop[i] & ~L1DropValid_SP[i];
- l1_xw_drop[i] = int_wtrans_drop[i] & ~L1DropValid_SP[i];
- // Store to L1_DROP_BUF upon handshake
- L1DropEn_S[i] = (l1_ar_drop[i] & l1_ar_done[i]) |
- (l1_xw_drop[i] & l1_xw_done[i]);
- end else if ( rab_miss[i] ) begin
- // 2. Save - Make sure L2 is really available.
- l1_ar_save[i] = int_rtrans_drop[i] & ~L2Busy_S[i];
- l1_xw_save[i] = int_wtrans_drop[i] & ~L2Busy_S[i];
- // Store to L2_IN_BUF upon handshake - triggers the L2 TLB
- L2InEn_S[i] = (l1_ar_save[i] & l1_ar_done[i]) |
- (l1_xw_save[i] & l1_xw_done[i]);
- end
- end
- /*
- * L2 output control + L2_OUT_BUF management + R/B sender control + W buffer control
- *
- * Perform L1 R transaction drops unless the L2 output buffer holds valid data. The AXI specs
- * require the B response to be sent only after consuming/discarding the corresponding data
- * in the W channel. Thus, we only send L2 drop request to the W buffer here. The drop
- * request to the B sender is then sent by the W buffer autonomously.
- *
- * L1 AW/W drop requests are managed by L1_DROP_SAVE.
- */
- always_comb begin : L2_ACCEPT_DROP_SAVE
- l2_ar_addr[i] = 'b0;
- l2_aw_addr[i] = 'b0;
- l2_ar_accept[i] = 1'b0;
- l2_xr_drop[i] = 1'b0;
- l2_xw_accept[i] = 1'b0;
- l2_xw_drop[i] = 1'b0;
- l1_r_drop[i] = 1'b0;
- lx_id_drop[i] = 'b0;
- lx_len_drop[i] = 'b0;
- lx_prefetch_drop[i] = 1'b0;
- lx_hit_drop[i] = 1'b0;
- L1DropValid_SN[i] = L1DropValid_SP[i] | L1DropEn_S[i];
- L2OutValid_SN[i] = L2OutValid_SP[i];
- L2OutReady_S[i] = 1'b0;
- L2OutEn_S[i] = 1'b0;
- L2Miss_S[i] = 1'b0;
- int_multi[i] = 1'b0;
- int_prot[i] = 1'b0;
- if (L2OutValid_SP[i] == 1'b0) begin
- // Drop L1 from R senders
- if (L1DropValid_SP[i] == 1'b1) begin
- // Only perform the R sender drop here.
- if (~L1DropRwType_DP[i]) begin
- l1_r_drop[i] = 1'b1;
- lx_id_drop[i] = L1DropId_DP[i];
- lx_len_drop[i] = L1DropLen_DP[i];
- lx_prefetch_drop[i] = L1DropPrefetch_S[i];
- lx_hit_drop[i] = 1'b1; // there are no drops for L1 misses
- // Invalidate L1_DROP_BUF upon handshake
- if ( l1_r_drop[i] & l1_r_done[i] ) begin
- L1DropValid_SN[i] = 1'b0;
- int_prot[i] = L1DropProt_DP[i];
- int_multi[i] = L1DropMulti_DP[i];
- end
- end else begin
- // Invalidate L1_DROP_BUF
- L1DropValid_SN[i] = 1'b0;
- int_prot[i] = L1DropProt_DP[i];
- int_multi[i] = L1DropMulti_DP[i];
- end
- end
- end else begin // L2_OUT_BUF has valid data
- if ( L2OutHit_SP[i] & ~(L2OutPrefetch_S[i] | L2OutProt_SP[i] | L2OutMulti_SP[i]) ) begin
- l2_ar_addr[i] = L2OutAddr_DP[i];
- l2_aw_addr[i] = L2OutAddr_DP[i];
- l2_ar_accept[i] = L2OutRwType_DP[i] ? 1'b0 : 1'b1;
- l2_xw_accept[i] = L2OutRwType_DP[i] ? 1'b1 : 1'b0;
- // Invalidate L2_OUT_BUF upon handshake
- L2OutValid_SN[i] = ~( (l2_ar_accept[i] & l2_ar_done[i]) |
- (l2_xw_accept[i] & l2_xw_done[i]) );
- end else begin
- lx_id_drop[i] = L2OutId_DP[i];
- lx_len_drop[i] = L2OutLen_DP[i];
- lx_prefetch_drop[i] = L2OutPrefetch_S[i];
- lx_hit_drop[i] = L2OutHit_SP[i];
- // The l2_xr_drop will also perform the handshake with the R sender
- l2_xr_drop[i] = L2OutRwType_DP[i] ? 1'b0 : 1'b1;
- l2_xw_drop[i] = L2OutRwType_DP[i] ? 1'b1 : 1'b0;
- // Invalidate L1_DROP_BUF upon handshake
- if ( (l2_xr_drop[i] & l2_xr_done[i]) | (l2_xw_drop[i] & l2_xw_done[i]) ) begin
- L2OutValid_SN[i] = 1'b0;
- L2Miss_S[i] = ~L2OutHit_SP[i];
- int_prot[i] = L2OutProt_SP[i];
- int_multi[i] = L2OutMulti_SP[i];
- end
- end
- end
- // Only accept new L2 output after ongoing drops have finished.
- if ( (l2_xr_drop[i] == l2_xr_done[i]) &
- (l2_xw_drop[i] == l2_xw_done[i]) &
- (l1_r_drop[i] == l1_r_done[i] ) ) begin
- // Store to L2_OUT_BUF upon handshake with L2 TLB module
- if ( (L2OutValid_SP[i] == 1'b0) && (L2OutValid_S[i] == 1'b1) ) begin
- L2OutValid_SN[i] = 1'b1;
- L2OutReady_S[i] = 1'b1;
- L2OutEn_S[i] = 1'b1;
- end
- end
- end
- /*
- * L1 drop buffer
- *
- * Used in case of multi, prot and prefetch hits in the L1 TLB.
- */
- always_ff @(posedge Clk_CI) begin : L1_DROP_BUF
- if (Rst_RBI == 0) begin
- L1DropProt_DP[i] <= 1'b0;
- L1DropMulti_DP[i] <= 1'b0;
- L1DropRwType_DP[i] <= 1'b0;
- L1DropUser_DP[i] <= 'b0;
- L1DropId_DP[i] <= 'b0;
- L1DropLen_DP[i] <= 'b0;
- L1DropAddr_DP[i] <= 'b0;
- end else if (L1DropEn_S[i] == 1'b1) begin
- L1DropProt_DP[i] <= L1OutProt_D[i] ;
- L1DropMulti_DP[i] <= L1OutMulti_D[i] ;
- L1DropRwType_DP[i] <= L1OutRwType_D[i];
- L1DropUser_DP[i] <= L1OutUser_D[i] ;
- L1DropId_DP[i] <= L1OutId_D[i] ;
- L1DropLen_DP[i] <= L1OutLen_D[i] ;
- L1DropAddr_DP[i] <= L1OutAddr_D[i] ;
- end
- end // always_ff @ (posedge Clk_CI)
- /*
- * L2 input buffer
- *
- * Make sure there are no combinational paths between L1 TLB/inputs and L2 TLB.
- */
- always_ff @(posedge Clk_CI) begin : L2_IN_BUF
- if (Rst_RBI == 0) begin
- L2InRwType_DP[i] <= 1'b0;
- L2InUser_DP[i] <= 'b0;
- L2InId_DP[i] <= 'b0;
- L2InLen_DP[i] <= 'b0;
- L2InAddr_DP[i] <= 'b0;
- end else if (L2InEn_S[i] == 1'b1) begin
- L2InRwType_DP[i] <= L1OutRwType_D[i];
- L2InUser_DP[i] <= L1OutUser_D[i] ;
- L2InId_DP[i] <= L1OutId_D[i] ;
- L2InLen_DP[i] <= L1OutLen_D[i] ;
- L2InAddr_DP[i] <= L1OutAddr_D[i] ;
- end
- end // always_ff @ (posedge Clk_CI)
- l2_tlb
- #(
- .N_SETS ( `RAB_L2_N_SETS ),
- )
- u_l2_tlb
- (
- .clk_i ( Clk_CI ),
- .rst_ni ( Rst_RBI ),
- // Config inputs
- .we_i ( L2CfgWE_S[i] ),
- .waddr_i ( L2CfgWAddr_D[i] ),
- .wdata_i ( L2CfgWData_D[i] ),
- // Request input
- .start_i ( L2InEn_S[i] ),
- .busy_o ( L2Busy_S[i] ),
- .rw_type_i ( L2InRwType_DP[i] ),
- .in_addr_i ( L2InAddr_DP[i] ),
- // Response output
- .out_ready_i ( L2OutReady_S[i] ),
- .out_valid_o ( L2OutValid_S[i] ),
- .hit_o ( L2OutHit_SN[i] ),
- .miss_o ( L2OutMiss_SN[i] ),
- .prot_o ( L2OutProt_SN[i] ),
- .multi_o ( L2OutMulti_SN[i] ),
- .cache_coherent_o ( L2OutCC_SN[i] ),
- .out_addr_o ( L2OutAddr_DN[i] )
- );
- /*
- * L2 output buffer
- *
- * Make sure there are no combinational paths between L1 TLB/inputs and L2 TLB.
- */
- always_ff @(posedge Clk_CI) begin : L2_OUT_BUF
- if (Rst_RBI == 0) begin
- L2OutRwType_DP[i] <= 1'b0;
- L2OutUser_DP[i] <= 'b0;
- L2OutLen_DP[i] <= 'b0;
- L2OutId_DP[i] <= 'b0;
- L2OutInAddr_DP[i] <= 'b0;
- L2OutHit_SP[i] <= 1'b0;
- L2OutMiss_SP[i] <= 1'b0;
- L2OutProt_SP[i] <= 1'b0;
- L2OutMulti_SP[i] <= 1'b0;
- L2OutCC_SP[i] <= 1'b0;
- L2OutAddr_DP[i] <= 'b0;
- end else if (L2OutEn_S[i] == 1'b1) begin
- L2OutRwType_DP[i] <= L2InRwType_DP[i];
- L2OutUser_DP[i] <= L2InUser_DP[i] ;
- L2OutLen_DP[i] <= L2InLen_DP[i] ;
- L2OutId_DP[i] <= L2InId_DP[i] ;
- L2OutInAddr_DP[i] <= L2InAddr_DP[i] ;
- L2OutHit_SP[i] <= L2OutHit_SN[i] ;
- L2OutMiss_SP[i] <= L2OutMiss_SN[i] ;
- L2OutProt_SP[i] <= L2OutProt_SN[i] ;
- L2OutMulti_SP[i] <= L2OutMulti_SN[i];
- L2OutCC_SP[i] <= L2OutCC_SN[i] ;
- L2OutAddr_DP[i] <= L2OutAddr_DN[i] ;
- end
- end // always_ff @ (posedge Clk_CI)
- always_ff @(posedge Clk_CI) begin : BUF_VALID
- if (Rst_RBI == 0) begin
- L1DropValid_SP[i] = 1'b0;
- L2OutValid_SP[i] = 1'b0;
- end else begin
- L1DropValid_SP[i] = L1DropValid_SN[i];
- L2OutValid_SP[i] = L2OutValid_SN[i];
- end
- end
- always_comb begin : BUF_TO_PREFETCH
- // L1 Drop Buf
- if (L1DropUser_DP[i] == {AXI_USER_WIDTH{1'b1}})
- L1DropPrefetch_S[i] = 1'b1;
- else
- L1DropPrefetch_S[i] = 1'b0;
- // L2 Out Buf
- if (L2OutUser_DP[i] == {AXI_USER_WIDTH{1'b1}})
- L2OutPrefetch_S[i] = 1'b1;
- else
- L2OutPrefetch_S[i] = 1'b0;
- end
- assign l2_cache_coherent[i] = L2OutCC_SP[i];
- assign int_miss[i] = L2Miss_S[i];
- end else begin : L2_TLB_STUB // if (ENABLE_L2TLB[i] == 1)
- assign l1_ar_drop[i] = int_rtrans_drop[i];
- assign l1_r_drop[i] = int_rtrans_drop[i];
- assign l1_xw_drop[i] = int_wtrans_drop[i];
- assign l1_ar_save[i] = 1'b0;
- assign l1_xw_save[i] = 1'b0;
- assign l2_xw_accept[i] = 1'b0;
- assign l2_xr_drop[i] = 1'b0;
- assign l2_xw_drop[i] = 1'b0;
- assign l2_ar_addr[i] = 'b0;
- assign l2_aw_addr[i] = 'b0;
- assign l1_id_drop[i] = int_wtrans_drop[i] ? int_awid[i] :
- int_rtrans_drop[i] ? int_arid[i] :
- '0;
- assign l1_len_drop[i] = int_wtrans_drop[i] ? int_awlen[i] :
- int_rtrans_drop[i] ? int_arlen[i] :
- '0;
- assign l1_prefetch_drop[i] = rab_prefetch[i];
- assign l1_hit_drop[i] = ~rab_miss[i];
- assign lx_id_drop[i] = int_wtrans_drop[i] ? int_awid[i] :
- int_rtrans_drop[i] ? int_arid[i] :
- '0;
- assign lx_len_drop[i] = int_wtrans_drop[i] ? int_awlen[i] :
- int_rtrans_drop[i] ? int_arlen[i] :
- '0;
- assign lx_prefetch_drop[i] = rab_prefetch[i];
- assign lx_hit_drop[i] = ~rab_miss[i];
- assign l2_cache_coherent[i] = 1'b0;
- assign int_miss[i] = rab_miss[i];
- assign int_prot[i] = rab_prot[i];
- assign int_multi[i] = rab_multi[i];
- // unused signals
- assign L2Miss_S[i] = 1'b0;
- assign L1OutRwType_D[i] = 1'b0;
- assign L1OutProt_D[i] = 1'b0;
- assign L1OutMulti_D[i] = 1'b0;
- assign L1DropRwType_DP[i] = 1'b0;
- assign L1DropUser_DP[i] = 'b0;
- assign L1DropId_DP[i] = 'b0;
- assign L1DropLen_DP[i] = 'b0;
- assign L1DropAddr_DP[i] = 'b0;
- assign L1DropProt_DP[i] = 1'b0;
- assign L1DropMulti_DP[i] = 1'b0;
- assign L1DropEn_S[i] = 1'b0;
- assign L1DropPrefetch_S[i] = 1'b0;
- assign L1DropValid_SN[i] = 1'b0;
- assign L1DropValid_SP[i] = 1'b0;
- assign L2InRwType_DP[i] = 1'b0;
- assign L2InUser_DP[i] = 'b0;
- assign L2InId_DP[i] = 'b0;
- assign L2InLen_DP[i] = 'b0;
- assign L2InAddr_DP[i] = 'b0;
- assign L2InEn_S[i] = 1'b0;
- assign L2OutHit_SN[i] = 1'b0;
- assign L2OutMiss_SN[i] = 1'b0;
- assign L2OutProt_SN[i] = 1'b0;
- assign L2OutMulti_SN[i] = 1'b0;
- assign L2OutCC_SN[i] = 1'b0;
- assign L2OutAddr_DN[i] = 'b0;
- assign L2OutRwType_DP[i] = 1'b0;
- assign L2OutUser_DP[i] = 'b0;
- assign L2OutId_DP[i] = 'b0;
- assign L2OutLen_DP[i] = 'b0;
- assign L2OutInAddr_DP[i] = 'b0;
- assign L2OutHit_SP[i] = 1'b0;
- assign L2OutMiss_SP[i] = 1'b0;
- assign L2OutProt_SP[i] = 1'b0;
- assign L2OutMulti_SP[i] = 1'b0;
- assign L2OutCC_SP[i] = 1'b0;
- assign L2OutAddr_DP[i] = 'b0;
- assign L2OutEn_S[i] = 1'b0;
- assign L2OutPrefetch_S[i] = 1'b0;
- assign L2Busy_S[i] = 1'b0;
- assign L2OutValid_S[i] = 1'b0;
- assign L2OutValid_SN[i] = 1'b0;
- assign L2OutValid_SP[i] = 1'b0;
- assign L2OutReady_S[i] = 1'b0;
- end // !`ifdef ENABLE_L2TLB
- end // for (i = 0; i < N_PORTS; i++)
- endgenerate
-// }}}
-# endmodule
-# // vim: ts=2 sw=2 sts=2 et nosmartindent autoindent foldmethod=marker
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class check_ram(Elaboratable):
- def __init__(self):
- self.clk_i = Signal() # input
- self.rst_ni = Signal() # input
- self.in_addr = Signal(ADDR_WIDTH) # input
- self.rw_type = Signal() # input
- self.ram_we = Signal() # input
- self.port0_addr = Signal(1+ERROR p_expression_25) # input
- self.port1_addr = Signal(1+ERROR p_expression_25) # input
- self.ram_wdata = Signal(RAM_DATA_WIDTH) # input
- self.output_sent = Signal() # input
- self.output_valid = Signal() # input
- self.offset_addr_d = Signal(OFFSET_WIDTH) # input
- self.hit_addr = Signal(1+ERROR p_expression_25) # output
- self.master = Signal() # output
- self.hit = Signal() # output
- self.multi_hit = Signal() # output
- self.prot = Signal() # output
- def elaborate(self, platform=None):
- m = Module()
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# //import CfMath::log2;
-# //`define MULTI_HIT_FULL_SET
-# module check_ram
-# //#(
-# // parameter ADDR_WIDTH = 32,
-# // parameter RAM_DATA_WIDTH = 32,
-# // parameter PAGE_SIZE = 4096, // 4kB
-# // parameter SET_WIDTH = 5,
-# // parameter OFFSET_WIDTH = 4
-# // )
-# (
-# input logic clk_i,
-# input logic rst_ni,
-# input logic [ADDR_WIDTH-1:0] in_addr,
-# input logic rw_type, // 1 => write, 0=> read
-# input logic ram_we,
-# input logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr,
-# input logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port1_addr,
-# input logic [RAM_DATA_WIDTH-1:0] ram_wdata,
-# input logic output_sent,
-# input logic output_valid,
-# input logic [OFFSET_WIDTH-1:0] offset_addr_d,
-# output logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr,
-# output logic master,
-# output logic hit,
-# output logic multi_hit,
-# output logic prot
-# );
-""" #docstring_begin
- localparam IGNORE_LSB = log2(PAGE_SIZE); // 12
- logic [RAM_DATA_WIDTH-1:0] port0_data_o, port1_data_o; // RAM read data outputs
- logic port0_hit, port1_hit; // Ram output matches in_addr
- logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr_saved, port1_addr_saved;
- // Hit FSM Signals
- typedef enum logic {SEARCH, HIT} hit_state_t;
- hit_state_t hit_SP; // Hit FSM state
- hit_state_t hit_SN; // Hit FSM next state
- // Multi Hit FSM signals
- typedef enum logic[1:0] {NO_HITS, ONE_HIT, MULTI_HIT} multi_state_t;
- multi_state_t multi_SP; // Multi Hit FSM state
- multi_state_t multi_SN; // Multi Hit FSM next state
- logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr_saved;
- logic master_saved;
- //// --------------- Block RAM (Dual Port) -------------- ////
- // The outputs of the BRAMs are only valid if in the previous cycle:
- // 1. the inputs were valid, and
- // 2. the BRAM was not written to.
- // Otherwise, the outputs must be ignored which is controlled by the output_valid signal.
- // This signal is driven by the uppler level L2 TLB module.
- ram_tp_no_change #(
- )
- ram_tp_no_change_0
- (
- .clk ( clk_i ),
- .we ( ram_we ),
- .addr0 ( port0_addr ),
- .addr1 ( port1_addr ),
- .d_i ( ram_wdata ),
- .d0_o ( port0_data_o ),
- .d1_o ( port1_data_o )
- );
- //// Check Ram Outputs
- assign port0_hit = (port0_data_o[0] == 1'b1) && (in_addr[ADDR_WIDTH-1: IGNORE_LSB] == port0_data_o[RAM_DATA_WIDTH-1:4]);
- assign port1_hit = (port1_data_o[0] == 1'b1) && (in_addr[ADDR_WIDTH-1: IGNORE_LSB] == port1_data_o[RAM_DATA_WIDTH-1:4]);
- //// ----------------------------------------------------- /////
- //// ------------------- Check if Hit ------------------------ ////
- // FSM
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- hit_SP <= SEARCH;
- end else begin
- hit_SP <= hit_SN;
- end
- end
- always_ff @(posedge clk_i, negedge rst_ni) begin
- if (!rst_ni) begin
- port0_addr_saved <= '0;
- port1_addr_saved <= '0;
- end else begin
- port0_addr_saved <= port0_addr;
- port1_addr_saved <= port1_addr;
- end
- end
- always_comb begin
- hit_SN = hit_SP;
- hit = 1'b0;
- hit_addr = 0;
- master = 1'b0;
- unique case(hit_SP)
- if (output_valid)
- if (port0_hit || port1_hit) begin
- hit_SN = HIT;
- hit = 1'b1;
- hit_addr = port0_hit ? {port0_addr_saved[SET_WIDTH+OFFSET_WIDTH:OFFSET_WIDTH], offset_addr_d} :
- port1_hit ? {port1_addr_saved[SET_WIDTH+OFFSET_WIDTH:OFFSET_WIDTH], offset_addr_d} :
- 0;
- master = port0_hit ? port0_data_o[3] :
- port1_hit ? port1_data_o[3] :
- 1'b0;
- end
- HIT : begin
-`ifdef MULTI_HIT_FULL_SET // Since the search continues after the first hit, it needs to be saved to be accessed later.
- hit = 1'b1;
- hit_addr = hit_addr_saved;
- master = master_saved;
- if (output_sent)
- hit_SN = SEARCH;
- end
- default : begin
- hit_SN = SEARCH;
- end
- endcase // case (hit_SP)
- end // always_comb begin
- //// ------------------------------------------- ////
- assign prot = output_valid && port0_hit ? ((~port0_data_o[2] && rw_type) || (~port0_data_o[1] && ~rw_type)) :
- output_valid && port1_hit ? ((~port1_data_o[2] && rw_type) || (~port1_data_o[1] && ~rw_type)) :
- 1'b0;
- //// ------------------- Multi ------------------- ////
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- hit_addr_saved <= 0;
- master_saved <= 1'b0;
- end else if (output_valid) begin
- hit_addr_saved <= hit_addr;
- master_saved <= master;
- end
- end
- // FSM
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- multi_SP <= NO_HITS;
- end else begin
- multi_SP <= multi_SN;
- end
- end
- always_comb begin
- multi_SN = multi_SP;
- multi_hit = 1'b0;
- unique case(multi_SP)
- if(output_valid && (port0_hit && port1_hit)) begin
- multi_SN = MULTI_HIT;
- multi_hit = 1'b1;
- end else if(output_valid && (port0_hit || port1_hit))
- multi_SN = ONE_HIT;
- if(output_valid && (port0_hit || port1_hit)) begin
- multi_SN = MULTI_HIT;
- multi_hit = 1'b1;
- end else if (output_sent)
- multi_SN = NO_HITS;
- MULTI_HIT : begin
- multi_hit = 1'b1;
- if (output_sent)
- multi_SN = NO_HITS;
- end
- endcase // case (multi_SP)
- end // always_comb begin
-`else // !`ifdef MULTI_HIT_FULL_SET
- assign multi_hit = output_valid && port0_hit && port1_hit;
-`endif // !`ifdef MULTI_HIT_FULL_SET
- //// ------------------------------------------- ////
-# endmodule
+++ /dev/null
-class CoreConfig:
- def __init__(self):
- self.N_SLICES = 16
- self.N_REGS = 4*self.N_SLICES
- self.ADDR_WIDTH_PHYS = 40
- self.ADDR_WIDTH_VIRT = 32
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class fsm(Elaboratable):
- def __init__(self):
- self.Clk_CI = Signal() # input
- self.Rst_RBI = Signal() # input
- self.port1_addr_valid_i = Signal() # input
- self.port2_addr_valid_i = Signal() # input
- self.port1_sent_i = Signal() # input
- self.port2_sent_i = Signal() # input
- self.select_i = Signal() # input
- self.no_hit_i = Signal() # input
- self.multi_hit_i = Signal() # input
- self.no_prot_i = Signal() # input
- self.prefetch_i = Signal() # input
- self.out_addr_i = Signal(AXI_M_ADDR_WIDTH) # input
- self.cache_coherent_i = Signal() # input
- self.port1_accept_o = Signal() # output
- self.port1_drop_o = Signal() # output
- self.port1_miss_o = Signal() # output
- self.port2_accept_o = Signal() # output
- self.port2_drop_o = Signal() # output
- self.port2_miss_o = Signal() # output
- self.out_addr_o = Signal(AXI_M_ADDR_WIDTH) # output
- self.cache_coherent_o = Signal() # output
- self.miss_o = Signal() # output
- self.multi_o = Signal() # output
- self.prot_o = Signal() # output
- self.prefetch_o = Signal() # output
- self.in_addr_i = Signal(AXI_S_ADDR_WIDTH) # input
- self.in_id_i = Signal(AXI_ID_WIDTH) # input
- self.in_len_i = Signal(8) # input
- self.in_user_i = Signal(AXI_USER_WIDTH) # input
- self.in_addr_o = Signal(AXI_S_ADDR_WIDTH) # output
- self.in_id_o = Signal(AXI_ID_WIDTH) # output
- self.in_len_o = Signal(8) # output
- self.in_user_o = Signal(AXI_USER_WIDTH) # output
- def elaborate(self, platform=None):
- m = Module()
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# //`timescale 1ns / 1ps
-# module fsm
-# #(
-# parameter AXI_M_ADDR_WIDTH = 40,
-# parameter AXI_S_ADDR_WIDTH = 32,
-# parameter AXI_ID_WIDTH = 8,
-# parameter AXI_USER_WIDTH = 6
-# )
-# (
-# input logic Clk_CI,
-# input logic Rst_RBI,
-# input logic port1_addr_valid_i,
-# input logic port2_addr_valid_i,
-# input logic port1_sent_i,
-# input logic port2_sent_i,
-# input logic select_i,
-# input logic no_hit_i,
-# input logic multi_hit_i,
-# input logic no_prot_i,
-# input logic prefetch_i,
-# input logic [AXI_M_ADDR_WIDTH-1:0] out_addr_i,
-# input logic cache_coherent_i,
-# output logic port1_accept_o,
-# output logic port1_drop_o,
-# output logic port1_miss_o,
-# output logic port2_accept_o,
-# output logic port2_drop_o,
-# output logic port2_miss_o,
-# output logic [AXI_M_ADDR_WIDTH-1:0] out_addr_o,
-# output logic cache_coherent_o,
-# output logic miss_o,
-# output logic multi_o,
-# output logic prot_o,
-# output logic prefetch_o,
-# input logic [AXI_S_ADDR_WIDTH-1:0] in_addr_i,
-# input logic [AXI_ID_WIDTH-1:0] in_id_i,
-# input logic [7:0] in_len_i,
-# input logic [AXI_USER_WIDTH-1:0] in_user_i,
-# output logic [AXI_S_ADDR_WIDTH-1:0] in_addr_o,
-# output logic [AXI_ID_WIDTH-1:0] in_id_o,
-# output logic [7:0] in_len_o,
-# output logic [AXI_USER_WIDTH-1:0] in_user_o
-# );
-""" #docstring_begin
- //-------------Internal Signals----------------------
- typedef enum logic {IDLE, WAIT} state_t;
- logic state_SP; // Present state
- logic state_SN; // Next State
- logic port1_accept_SN;
- logic port1_drop_SN;
- logic port1_miss_SN;
- logic port2_accept_SN;
- logic port2_drop_SN;
- logic port2_miss_SN;
- logic miss_SN;
- logic multi_SN;
- logic prot_SN;
- logic prefetch_SN;
- logic cache_coherent_SN;
- logic [AXI_M_ADDR_WIDTH-1:0] out_addr_DN;
- logic out_reg_en_S;
- //----------FSM comb------------------------------
- always_comb begin: FSM_COMBO
- state_SN = state_SP;
- port1_accept_SN = 1'b0;
- port1_drop_SN = 1'b0;
- port1_miss_SN = 1'b0;
- port2_accept_SN = 1'b0;
- port2_drop_SN = 1'b0;
- port2_miss_SN = 1'b0;
- miss_SN = 1'b0;
- multi_SN = 1'b0;
- prot_SN = 1'b0;
- prefetch_SN = 1'b0;
- cache_coherent_SN = 1'b0;
- out_addr_DN = '0;
- out_reg_en_S = 1'b0; // by default hold register output
- unique case(state_SP)
- IDLE :
- if ( (port1_addr_valid_i & select_i) | (port2_addr_valid_i & ~select_i) ) begin
- out_reg_en_S = 1'b1;
- state_SN = WAIT;
- // Select inputs for output registers
- if (port1_addr_valid_i & select_i) begin
- port1_accept_SN = ~(no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
- port1_drop_SN = (no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
- port1_miss_SN = no_hit_i;
- port2_accept_SN = 1'b0;
- port2_drop_SN = 1'b0;
- port2_miss_SN = 1'b0;
- end else if (port2_addr_valid_i & ~select_i) begin
- port1_accept_SN = 1'b0;
- port1_drop_SN = 1'b0;
- port1_miss_SN = 1'b0;
- port2_accept_SN = ~(no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
- port2_drop_SN = (no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
- port2_miss_SN = no_hit_i;
- end
- miss_SN = port1_miss_SN | port2_miss_SN;
- multi_SN = multi_hit_i;
- prot_SN = ~no_prot_i;
- prefetch_SN = ~no_hit_i & prefetch_i;
- cache_coherent_SN = cache_coherent_i;
- out_addr_DN = out_addr_i;
- end
- WAIT :
- if ( port1_sent_i | port2_sent_i ) begin
- out_reg_en_S = 1'b1; // "clear" the register
- state_SN = IDLE;
- end
- default : begin
- state_SN = IDLE;
- end
- endcase
- end
- //----------FSM seq-------------------------------
- always_ff @(posedge Clk_CI, negedge Rst_RBI) begin: FSM_SEQ
- if (Rst_RBI == 1'b0)
- state_SP <= IDLE;
- else
- state_SP <= state_SN;
- end
- //----------Output seq--------------------------
- always_ff @(posedge Clk_CI, negedge Rst_RBI) begin: OUTPUT_SEQ
- if (Rst_RBI == 1'b0) begin
- port1_accept_o = 1'b0;
- port1_drop_o = 1'b0;
- port1_miss_o = 1'b0;
- port2_accept_o = 1'b0;
- port2_drop_o = 1'b0;
- port2_miss_o = 1'b0;
- miss_o = 1'b0;
- multi_o = 1'b0;
- prot_o = 1'b0;
- prefetch_o = 1'b0;
- cache_coherent_o = 1'b0;
- out_addr_o = '0;
- in_addr_o = '0;
- in_id_o = '0;
- in_len_o = '0;
- in_user_o = '0;
- end else if (out_reg_en_S == 1'b1) begin
- port1_accept_o = port1_accept_SN;
- port1_drop_o = port1_drop_SN;
- port1_miss_o = port1_miss_SN;
- port2_accept_o = port2_accept_SN;
- port2_drop_o = port2_drop_SN;
- port2_miss_o = port2_miss_SN;
- miss_o = miss_SN;
- multi_o = multi_SN;
- prot_o = prot_SN;
- prefetch_o = prefetch_SN;
- cache_coherent_o = cache_coherent_SN;
- out_addr_o = out_addr_DN;
- in_addr_o = in_addr_i;
- in_id_o = in_id_i;
- in_len_o = in_len_i;
- in_user_o = in_user_i;
- end
- end // block: OUTPUT_SEQ
-# endmodule
+++ /dev/null
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class l2_tlb(Elaboratable):
- def __init__(self):
- self.clk_i = Signal() # input
- self.rst_ni = Signal() # input
- self.we_i = Signal() # input
- self.waddr_i = Signal(AXI_LITE_ADDR_WIDTH) # input
- self.wdata_i = Signal(AXI_LITE_DATA_WIDTH) # input
- self.start_i = Signal() # input
- self.busy_o = Signal() # output
- self.in_addr_i = Signal(AXI_S_ADDR_WIDTH) # input
- self.rw_type_i = Signal() # input
- self.out_ready_i = Signal() # input
- self.out_valid_o = Signal() # output
- self.hit_o = Signal() # output
- self.miss_o = Signal() # output
- self.prot_o = Signal() # output
- self.multi_o = Signal() # output
- self.cache_coherent_o = Signal() # output
- self.out_addr_o = Signal(AXI_M_ADDR_WIDTH) # output
- def elaborate(self, platform=None):
- m = Module()
- return m
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# //`include "pulp_soc_defines.sv"
-# ////import CfMath::log2;
-# //`define MULTI_HIT_FULL_SET // Enable full multi hit detection. Always the entire set is searched.
-# //`define MULTI_HIT_CUR_CYCLE // Enable partial multi hit detection. Only multi hits in the same search cycle are detected.
-# //`ifdef MULTI_HIT_FULL_SET
-# // `ifndef MULTI_HIT_CUR_CYCLE
-# // `define MULTI_HIT_CUR_CYCLE
-# // `endif
-# //`endif
-# module l2_tlb
-# //#(
-# // parameter AXI_S_ADDR_WIDTH = 32,
-# // parameter AXI_M_ADDR_WIDTH = 40,
-# // parameter AXI_LITE_DATA_WIDTH = 64,
-# // parameter AXI_LITE_ADDR_WIDTH = 32,
-# // parameter N_SETS = 32,
-# // parameter N_OFFSETS = 4, //per port. There are 2 ports.
-# // parameter PAGE_SIZE = 4096, // 4kB
-# // parameter N_PAR_VA_RAMS = 4,
-# // parameter HIT_OFFSET_STORE_WIDTH = 2 // Num of bits of VA RAM offset stored. This should not be greater than OFFSET_WIDTH
-# // )
-# (
-# input logic clk_i,
-# input logic rst_ni,
-# input logic we_i,
-# input logic [AXI_LITE_ADDR_WIDTH-1:0] waddr_i,
-# input logic [AXI_LITE_DATA_WIDTH-1:0] wdata_i,
-# input logic start_i,
-# output logic busy_o,
-# input logic [AXI_S_ADDR_WIDTH-1:0] in_addr_i,
-# input logic rw_type_i, //1 => write, 0=> read
-# input logic out_ready_i,
-# output logic out_valid_o,
-# output logic hit_o,
-# output logic miss_o,
-# output logic prot_o,
-# output logic multi_o,
-# output logic cache_coherent_o,
-# output logic [AXI_M_ADDR_WIDTH-1:0] out_addr_o
-# );
-""" #docstring_begin
- localparam VA_RAM_DEPTH = N_SETS * N_OFFSETS * 2;
- localparam VA_RAM_ADDR_WIDTH = log2(VA_RAM_DEPTH);
- localparam PA_RAM_ADDR_WIDTH = log2(PA_RAM_DEPTH);
- localparam SET_WIDTH = log2(N_SETS);
- localparam OFFSET_WIDTH = log2(N_OFFSETS);
- localparam LL_WIDTH = log2(N_PAR_VA_RAMS);
- localparam IGNORE_LSB = log2(PAGE_SIZE);
- logic [N_PAR_VA_RAMS-1:0] hit, prot, multi_hit, cache_coherent;
- logic [N_PAR_VA_RAMS-1:0] ram_we;
- logic last_search, last_search_next;
- logic first_search, first_search_next;
- logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] ram_waddr;
- logic [N_PAR_VA_RAMS-1:0][SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr;
- logic pa_ram_we;
- logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_raddr, pa_port0_waddr; // PA RAM read, Write addr;
- logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_raddr_reg_SN, pa_port0_raddr_reg_SP; // registered addresses, needed for WAIT_ON_WRITE;
- logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_addr; // PA RAM addr
- logic [PA_RAM_DATA_WIDTH-1:0] pa_port0_data, pa_data, pa_port0_data_reg; // PA RAM data
- logic pa_ram_store_data_SN, pa_ram_store_data_SP;
- logic hit_top, prot_top, multi_hit_top, first_hit_top;
- logic output_sent;
- int hit_block_num;
- logic searching, search_done;
- logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr, port0_raddr; // VA RAM port0 addr
- logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port1_addr; // VA RAM port1 addr
- logic [OFFSET_WIDTH-1:0] offset_addr, offset_addr_d;
- logic [OFFSET_WIDTH-1:0] offset_start_addr, offset_end_addr;
- logic [SET_WIDTH-1:0] set_num;
- logic va_output_valid;
- logic searching_q;
- genvar z;
- // Search FSM
- typedef enum logic [1:0] {IDLE, SEARCH, DONE} search_state_t;
- search_state_t search_SP; // Present state
- search_state_t search_SN; // Next State
- // Output FSM
- typedef enum logic [1:0] {OUT_IDLE, SEND_OUTPUT, WAIT_ON_WRITE} out_state_t;
- out_state_t out_SP; // Present state
- out_state_t out_SN; // Next State
- logic miss_next;
- logic hit_next;
- logic prot_next;
- logic multi_next;
- logic cache_coherent_next;
- // Generate the VA Block rams and their surrounding logic
- generate
- for (z = 0; z < N_PAR_VA_RAMS; z++) begin : VA_RAMS
- check_ram
- #(
- )
- u_check_ram
- (
- .clk_i ( clk_i ),
- .rst_ni ( rst_ni ),
- .in_addr ( in_addr_i ),
- .rw_type ( rw_type_i ),
- .ram_we ( ram_we[z] ),
- .port0_addr ( port0_addr ),
- .port1_addr ( port1_addr ),
- .ram_wdata ( wdata_i[VA_RAM_DATA_WIDTH-1:0] ),
- .output_sent ( output_sent ),
- .output_valid ( va_output_valid ),
- .offset_addr_d ( offset_addr_d ),
- .hit_addr ( hit_addr[z] ),
- .master ( cache_coherent[z] ),
- .hit ( hit[z] ),
- .multi_hit ( multi_hit[z] ),
- .prot ( prot[z] )
- );
- end // for (z = 0; z < N_PORTS; z++)
- endgenerate
- ////////////////// ---------------- Control and Address --------------- ////////////////////////
- // FSM
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- search_SP <= IDLE;
- end else begin
- search_SP <= search_SN;
- end
- end
- always_comb begin : SEARCH_FSM
- search_SN = search_SP;
- busy_o = 1'b0;
- searching = 1'b0;
- search_done = 1'b0;
- last_search_next = 1'b0;
- first_search_next = first_search;
- unique case (search_SP)
- IDLE : begin
- if (start_i) begin
- search_SN = SEARCH;
- first_search_next = 1'b1;
- end
- end
- SEARCH : begin
- busy_o = 1'b1;
- // detect last search cycle
- if ( (first_search == 1'b0) && (offset_addr == offset_end_addr) )
- last_search_next = 1'b1;
- // pause search during VA RAM reconfigration
- if (|ram_we) begin
- searching = 1'b0;
- end else begin
- searching = 1'b1;
- first_search_next = 1'b0;
- end
- if (va_output_valid) begin
- // stop search
- if (last_search | prot_top | multi_hit_top) begin
- if (last_search | prot_top | multi_hit_top | hit_top ) begin
- search_SN = DONE;
- search_done = 1'b1;
- end
- end
- end
- DONE : begin
- busy_o = 1'b1;
- if (out_valid_o & out_ready_i)
- search_SN = IDLE;
- end
- default : begin
- search_SN = IDLE;
- end
- endcase // case (prot_SP)
- end // always_comb begin
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- last_search <= 1'b0;
- first_search <= 1'b0;
- end else begin
- last_search <= last_search_next;
- first_search <= first_search_next;
- end
- end
- /*
- * VA RAM address generation
- *
- * The input address and set number, and thus the offset start address, are available in the
- * cycle after the start signal. The buffered offset_addr becomes available one cycle later.
- * During the first search cycle, we therefore directly use offset_addr_start for the lookup.
- */
- assign set_num = in_addr_i[SET_WIDTH+IGNORE_LSB -1 : IGNORE_LSB];
- assign port0_raddr[OFFSET_WIDTH] = 1'b0;
- assign port1_addr [OFFSET_WIDTH] = 1'b1;
- assign port0_raddr[OFFSET_WIDTH-1:0] = first_search ? offset_start_addr : offset_addr;
- assign port1_addr [OFFSET_WIDTH-1:0] = first_search ? offset_start_addr : offset_addr;
- assign port0_raddr[SET_WIDTH+OFFSET_WIDTH : OFFSET_WIDTH+1] = set_num;
- assign port1_addr [SET_WIDTH+OFFSET_WIDTH : OFFSET_WIDTH+1] = set_num;
- assign port0_addr = ram_we ? ram_waddr : port0_raddr;
- // The outputs of the BRAMs are only valid if in the previous cycle:
- // 1. the inputs were valid, and
- // 2. the BRAMs were not written to.
- // Otherwise, the outputs must be ignored.
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- searching_q <= 1'b0;
- end else begin
- searching_q <= searching;
- end
- end
- assign va_output_valid = searching_q;
- // Address offset for looking up the VA RAMs
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- offset_addr <= 0;
- end else if (first_search) begin
- offset_addr <= offset_start_addr + 1'b1;
- end else if (searching) begin
- offset_addr <= offset_addr + 1'b1;
- end
- end
- // Delayed address offest for looking up the PA RAM upon a hit in the VA RAMs
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- offset_addr_d <= 0;
- end else if (first_search) begin
- offset_addr_d <= offset_start_addr;
- end else if (searching) begin
- offset_addr_d <= offset_addr_d + 1'b1;
- end
- end
- // Store the offset addr for hit to reduce latency for next search.
- generate
- logic [N_SETS-1:0][HIT_OFFSET_STORE_WIDTH-1:0] hit_offset_addr; // Contains offset addr for previous hit for every SET.
- logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr_reg;
- assign offset_start_addr = { hit_offset_addr[set_num] , {{OFFSET_WIDTH-HIT_OFFSET_STORE_WIDTH}{1'b0}} };
- assign offset_end_addr = hit_offset_addr[set_num]-1'b1;
- // Register the hit addr
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- hit_addr_reg <= 0;
- end else if (hit_top) begin
- hit_addr_reg <= hit_addr[hit_block_num];
- end
- end
- // Store hit addr for each set. The next search in the same set will start from the saved addr.
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- hit_offset_addr <= 0;
- end else if (hit_o) begin
- hit_offset_addr[set_num][HIT_OFFSET_STORE_WIDTH-1:0] <= hit_addr_reg[OFFSET_WIDTH-1 : (OFFSET_WIDTH - HIT_OFFSET_STORE_WIDTH)];
- end
- end
-`else // No need to store offset if full multi hit detection is enabled because the entire SET is searched.
- assign offset_start_addr = 0;
- assign offset_end_addr = {OFFSET_WIDTH{1'b1}};
- end else begin // if (HIT_OFFSET_STORE_WIDTH > 0)
- assign offset_start_addr = 0;
- assign offset_end_addr = {OFFSET_WIDTH{1'b1}};
- end
- endgenerate
- assign prot_top = |prot;
- //////////////////////////////////////////////////////////////////////////////////////
- // check for hit, multi hit
- // In case of a multi hit, the hit_block_num indicates the lowest VA RAM with a hit.
- // In case of a multi hit in the same VA RAM, Port 0 is given priority.
- always_comb begin : HIT_CHECK
- hit_top = |hit;
- hit_block_num = 0;
- first_hit_top = 1'b0;
- multi_hit_top = 1'b0;
- for (int i=N_PAR_VA_RAMS-1; i>=0; i--) begin
- if (hit[i] == 1'b1) begin
- if (multi_hit[i] | first_hit_top ) begin
- multi_hit_top = 1'b1;
- end
- first_hit_top = 1'b1;
- hit_block_num = i;
- end
- end // for (int i=0; i<N_PAR_VA_RAMS; i++)
- end // always_comb begin
- ///////////////////// ------------- Outputs ------------ //////////////////////////////////
- //// FSM
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- out_SP <= OUT_IDLE;
- pa_ram_store_data_SP <= 1'b0;
- pa_port0_raddr_reg_SP <= 'b0;
- end else begin
- out_SP <= out_SN;
- pa_ram_store_data_SP <= pa_ram_store_data_SN;
- pa_port0_raddr_reg_SP <= pa_port0_raddr_reg_SN;
- end
- end
- always_comb begin : OUTPUT_FSM
- out_SN = out_SP;
- miss_next = miss_o;
- prot_next = prot_o;
- multi_next = multi_o;
- hit_next = hit_o;
- cache_coherent_next = cache_coherent_o;
- pa_port0_raddr_reg_SN = pa_port0_raddr_reg_SP;
- pa_port0_raddr = 'b0;
- pa_ram_store_data_SN = 1'b0;
- out_valid_o = 1'b0;
- output_sent = 1'b0;
- unique case (out_SP)
- OUT_IDLE : begin
- hit_next = 1'b0;
- miss_next = 1'b0;
- prot_next = 1'b0;
- multi_next = 1'b0;
- cache_coherent_next = 1'b0;
- // abort transaction
- if ((search_done & ~hit_top) | prot_top | multi_hit_top) begin
- if (search_done & ~hit_top) begin
- miss_next = 1'b1;
- end
- if (prot_top) begin
- prot_next = 1'b1;
- hit_next = 1'b1;
- end
- if (multi_hit_top) begin
- multi_next = 1'b1;
- hit_next = 1'b1;
- end
- // read PA RAM
- end else if (search_done & hit_top) begin
- hit_next = 1'b1;
- cache_coherent_next = cache_coherent[hit_block_num];
- pa_port0_raddr = (N_PAR_VA_RAMS * hit_addr[hit_block_num]) + hit_block_num;
- pa_port0_raddr_reg_SN = pa_port0_raddr;
- // read PA RAM now
- if (~pa_ram_we) begin
- pa_ram_store_data_SN = 1'b1;
- // read PA RAM after PA RAM reconfiguration
- end else begin // pa_ram_we
- end
- end
- end
- WAIT_ON_WRITE : begin
- if ( ~pa_ram_we ) begin
- pa_port0_raddr = pa_port0_raddr_reg_SP;
- pa_ram_store_data_SN = 1'b1;
- end
- end
- SEND_OUTPUT : begin
- out_valid_o = 1'b1;
- if (out_ready_i) begin
- out_SN = OUT_IDLE;
- output_sent = 1'b1;
- end
- end
- default : begin
- out_SN = OUT_IDLE;
- end
- endcase // case (out_SP)
- end // always_comb begin
- //// Output signals
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- miss_o <= 1'b0;
- prot_o <= 1'b0;
- multi_o <= 1'b0;
- hit_o <= 1'b0;
- cache_coherent_o <= 1'b0;
- end else begin
- miss_o <= miss_next;
- prot_o <= prot_next;
- multi_o <= multi_next;
- hit_o <= hit_next;
- cache_coherent_o <= cache_coherent_next;
- end
- end
- ///////////////////////////////////////////////////////////////////////////////////////////////////
- ///////////////////// --------------- Physical Address -------------- ////////////////////////////
- /// PA Block RAM
- ram_tp_no_change #(
- )
- pa_ram
- (
- .clk ( clk_i ),
- .we ( pa_ram_we ),
- .addr0 ( pa_port0_addr ),
- .addr1 ( '0 ),
- .d_i ( wdata_i[PA_RAM_DATA_WIDTH-1:0] ),
- .d0_o ( pa_port0_data ),
- .d1_o ( )
- );
- assign out_addr_o[IGNORE_LSB-1:0] = in_addr_i[IGNORE_LSB-1:0];
- assign out_addr_o[AXI_M_ADDR_WIDTH-1:IGNORE_LSB] = pa_data;
- always_ff @(posedge clk_i) begin
- if (rst_ni == 0) begin
- pa_port0_data_reg <= 0;
- end else if (pa_ram_store_data_SP) begin
- pa_port0_data_reg <= pa_port0_data;
- end
- end
- assign pa_data = pa_ram_store_data_SP ? pa_port0_data : pa_port0_data_reg;
-///// Write enable for all block rams
-generate if (LL_WIDTH != 0) begin
- always_comb begin
- var reg[LL_WIDTH:0] para;
- var int para_int;
- for (para = 0; para < N_PAR_VA_RAMS; para=para+1'b1) begin
- para_int = int'(para);
- ram_we[para_int] = we_i && (waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] == 1'b0) && (waddr_i[LL_WIDTH-1:0] == para);
- end
- end
-end else begin
- assign ram_we[0] = we_i && (waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] == 1'b0);
-// Addresses are word, not byte addresses
-assign pa_ram_we = we_i && (waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] == 1'b1); //waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] will be 0 for all VA writes and 1 for all PA writes
-assign ram_waddr = waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH-1:LL_WIDTH];
-assign pa_port0_waddr = waddr_i[PA_RAM_ADDR_WIDTH-1:0];
-assign pa_port0_addr = pa_ram_we ? pa_port0_waddr : pa_port0_raddr;
-# endmodule
-# // vim: ts=3 sw=3 sts=3 et nosmartindent autoindent foldmethod=marker tw=100
+++ /dev/null
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# this file has been generated by sv2nmigen
-# //`include "pulp_soc_defines.sv"
-# ////import CfMath::log2;
-# //`define MY_ARRAY_SUM(MY_ARRAY,ARRAY_SIZE) ( (ARRAY_SIZE==1) ? MY_ARRAY[0] : (ARRAY_SIZE==2) ? MY_ARRAY[0] + MY_ARRAY[1] : (ARRAY_SIZE==3) ? MY_ARRAY[0] + MY_ARRAY[1] + MY_ARRAY[2] : (ARRAY_SIZE==4) ? MY_ARRAY[0] + MY_ARRAY[1] + MY_ARRAY[2] + MY_ARRAY[3] : 0 )
-# module rab_core
-# #(
-# parameter N_PORTS = 3,
-# parameter N_L2_SETS = 32,
-# parameter N_L2_SET_ENTRIES = 32,
-# parameter AXI_DATA_WIDTH = 64,
-# parameter AXI_S_ADDR_WIDTH = 32,
-# parameter AXI_M_ADDR_WIDTH = 40,
-# parameter AXI_LITE_DATA_WIDTH = 64,
-# parameter AXI_LITE_ADDR_WIDTH = 32,
-# parameter AXI_ID_WIDTH = 8,
-# parameter AXI_USER_WIDTH = 6,
-# parameter MH_FIFO_DEPTH = 16
-# )
-# (
-# input logic Clk_CI,
-# input logic Rst_RBI,
-# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi_awaddr,
-# input logic s_axi_awvalid,
-# output logic s_axi_awready,
-# input logic [AXI_LITE_DATA_WIDTH-1:0] s_axi_wdata,
-# input logic [AXI_LITE_DATA_WIDTH/8-1:0] s_axi_wstrb,
-# input logic s_axi_wvalid,
-# output logic s_axi_wready,
-# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi_araddr,
-# input logic s_axi_arvalid,
-# output logic s_axi_arready,
-# input logic s_axi_rready,
-# output logic [AXI_LITE_DATA_WIDTH-1:0] s_axi_rdata,
-# output logic [1:0] s_axi_rresp,
-# output logic s_axi_rvalid,
-# output logic [1:0] s_axi_bresp,
-# output logic s_axi_bvalid,
-# input logic s_axi_bready,
-# output logic [N_PORTS-1:0] int_miss,
-# output logic [N_PORTS-1:0] int_prot,
-# output logic [N_PORTS-1:0] int_multi,
-# output logic [N_PORTS-1:0] int_prefetch,
-# output logic int_mhf_full,
-# output logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_axaddr_o,
-# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_axid_o,
-# output logic [N_PORTS-1:0] [7:0] int_axlen_o,
-# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_axuser_o,
-# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] port1_addr,
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] port1_id,
-# input logic [N_PORTS-1:0] [7:0] port1_len,
-# input logic [N_PORTS-1:0] [2:0] port1_size,
-# input logic [N_PORTS-1:0] port1_addr_valid,
-# input logic [N_PORTS-1:0] port1_type,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] port1_user,
-# input logic [N_PORTS-1:0] port1_sent,
-# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] port1_out_addr,
-# output logic [N_PORTS-1:0] port1_cache_coherent,
-# output logic [N_PORTS-1:0] port1_accept,
-# output logic [N_PORTS-1:0] port1_drop,
-# output logic [N_PORTS-1:0] port1_miss,
-# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] port2_addr,
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] port2_id,
-# input logic [N_PORTS-1:0] [7:0] port2_len,
-# input logic [N_PORTS-1:0] [2:0] port2_size,
-# input logic [N_PORTS-1:0] port2_addr_valid,
-# input logic [N_PORTS-1:0] port2_type,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] port2_user,
-# input logic [N_PORTS-1:0] port2_sent,
-# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] port2_out_addr,
-# output logic [N_PORTS-1:0] port2_cache_coherent,
-# output logic [N_PORTS-1:0] port2_accept,
-# output logic [N_PORTS-1:0] port2_drop,
-# output logic [N_PORTS-1:0] port2_miss,
-# input logic [N_PORTS-1:0] miss_l2_i,
-# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] miss_l2_addr_i,
-# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] miss_l2_id_i,
-# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] miss_l2_user_i,
-# output logic [N_PORTS-1:0] [AXI_LITE_DATA_WIDTH-1:0] wdata_l2_o,
-# output logic [N_PORTS-1:0] [AXI_LITE_ADDR_WIDTH-1:0] waddr_l2_o,
-# output logic [N_PORTS-1:0] wren_l2_o
-# );
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class rab_core(Elaboratable):
- def __init__(self):
- self.s_axi_awaddr = Signal(AXI_LITE_ADDR_WIDTH) # input
- self.s_axi_awvalid = Signal() # input
- self.s_axi_awready = Signal() # output
- self.s_axi_wdata = Signal(AXI_LITE_DATA_WIDTH) # input
- self.s_axi_wstrb = Signal(FIXME) # input
- self.s_axi_wvalid = Signal() # input
- self.s_axi_wready = Signal() # output
- self.s_axi_araddr = Signal(AXI_LITE_ADDR_WIDTH) # input
- self.s_axi_arvalid = Signal() # input
- self.s_axi_arready = Signal() # output
- self.s_axi_rready = Signal() # input
- self.s_axi_rdata = Signal(AXI_LITE_DATA_WIDTH) # output
- self.s_axi_rresp = Signal(2) # output
- self.s_axi_rvalid = Signal() # output
- self.s_axi_bresp = Signal(2) # output
- self.s_axi_bvalid = Signal() # output
- self.s_axi_bready = Signal() # input
- self.int_miss = Signal(N_PORTS) # output
- self.int_prot = Signal(N_PORTS) # output
- self.int_multi = Signal(N_PORTS) # output
- self.int_prefetch = Signal(N_PORTS) # output
- self.int_mhf_full = Signal() # output
- self.int_axaddr_o = Signal() # output
- self.int_axid_o = Signal() # output
- self.int_axlen_o = Signal() # output
- self.int_axuser_o = Signal() # output
- self.port1_addr = Signal() # input
- self.port1_id = Signal() # input
- self.port1_len = Signal() # input
- self.port1_size = Signal() # input
- self.port1_addr_valid = Signal(N_PORTS) # input
- self.port1_type = Signal(N_PORTS) # input
- self.port1_user = Signal() # input
- self.port1_sent = Signal(N_PORTS) # input
- self.port1_out_addr = Signal() # output
- self.port1_cache_coherent = Signal(N_PORTS) # output
- self.port1_accept = Signal(N_PORTS) # output
- self.port1_drop = Signal(N_PORTS) # output
- self.port1_miss = Signal(N_PORTS) # output
- self.port2_addr = Signal() # input
- self.port2_id = Signal() # input
- self.port2_len = Signal() # input
- self.port2_size = Signal() # input
- self.port2_addr_valid = Signal(N_PORTS) # input
- self.port2_type = Signal(N_PORTS) # input
- self.port2_user = Signal() # input
- self.port2_sent = Signal(N_PORTS) # input
- self.port2_out_addr = Signal() # output
- self.port2_cache_coherent = Signal(N_PORTS) # output
- self.port2_accept = Signal(N_PORTS) # output
- self.port2_drop = Signal(N_PORTS) # output
- self.port2_miss = Signal(N_PORTS) # output
- self.miss_l2_i = Signal(N_PORTS) # input
- self.miss_l2_addr_i = Signal() # input
- self.miss_l2_id_i = Signal() # input
- self.miss_l2_user_i = Signal() # input
- self.wdata_l2_o = Signal() # output
- self.waddr_l2_o = Signal() # output
- self.wren_l2_o = Signal(N_PORTS) # output
- def elaborate(self, platform=None):
- m = Module()
- return m
- // ███████╗██╗ ██████╗ ███╗ ██╗ █████╗ ██╗ ███████╗
- // ██╔════╝██║██╔════╝ ████╗ ██║██╔══██╗██║ ██╔════╝
- // ███████╗██║██║ ███╗██╔██╗ ██║███████║██║ ███████╗
- // ╚════██║██║██║ ██║██║╚██╗██║██╔══██║██║ ╚════██║
- // ███████║██║╚██████╔╝██║ ╚████║██║ ██║███████╗███████║
- // ╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚══════╝
- // signals
- localparam integer ENABLE_L2TLB[N_PORTS-1:0] = `EN_L2TLB_ARRAY;
- localparam integer N_SLICES[N_PORTS-1:0] = `N_SLICES_ARRAY;
- localparam N_SLICES_MAX = `N_SLICES_MAX;
- localparam N_REGS = 4*N_SLICES_TOT + 4;
- localparam AXI_SIZE_WIDTH = log2(AXI_DATA_WIDTH/8);
- localparam PORT_ID_WIDTH = (N_PORTS < 2) ? 1 : log2(N_PORTS);
- logic [N_PORTS-1:0] [15:0] p1_burst_size;
- logic [N_PORTS-1:0] [15:0] p2_burst_size;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p1_align_addr;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p2_align_addr;
- logic [N_PORTS-1:0] [AXI_SIZE_WIDTH-1:0] p1_mask;
- logic [N_PORTS-1:0] [AXI_SIZE_WIDTH-1:0] p2_mask;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p1_max_addr;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p2_max_addr;
- logic [N_PORTS-1:0] p1_prefetch;
- logic [N_PORTS-1:0] p2_prefetch;
- logic [N_PORTS-1:0] int_rw;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_addr_min;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_addr_max;
- logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_id;
- logic [N_PORTS-1:0] [7:0] int_len;
- logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_user;
- logic [N_PORTS-1:0] hit;
- logic [N_PORTS-1:0] prot;
- logic [N_PORTS-1:0] prefetch;
- logic [N_PORTS-1:0] no_hit;
- logic [N_PORTS-1:0] no_prot;
- logic [N_PORTS-1:0] [N_SLICES_MAX-1:0] hit_slices;
- logic [N_PORTS-1:0] [N_SLICES_MAX-1:0] prot_slices;
- logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] out_addr;
- logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] out_addr_reg;
- logic [N_PORTS-1:0] cache_coherent;
- logic [N_PORTS-1:0] cache_coherent_reg;
- logic [N_PORTS-1:0] select;
- reg [N_PORTS-1:0] curr_priority;
- reg [N_PORTS-1:0] multi_hit;
- logic [N_PORTS-1:0] miss_valid_mhf;
- logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] miss_addr_mhf;
- logic [N_PORTS-1:0] [MISS_META_WIDTH-1:0] miss_meta_mhf;
- logic [N_REGS-1:0] [63:0] int_cfg_regs;
- logic [N_PORTS-1:0] [4*N_SLICES_MAX-1:0] [63:0] int_cfg_regs_slices;
- logic L1AllowMultiHit_S;
- genvar z;
- // █████╗ ███████╗███████╗██╗ ██████╗ ███╗ ██╗███╗ ███╗███████╗███╗ ██╗████████╗███████╗
- // ██╔══██╗██╔════╝██╔════╝██║██╔════╝ ████╗ ██║████╗ ████║██╔════╝████╗ ██║╚══██╔══╝██╔════╝
- // ███████║███████╗███████╗██║██║ ███╗██╔██╗ ██║██╔████╔██║█████╗ ██╔██╗ ██║ ██║ ███████╗
- // ██╔══██║╚════██║╚════██║██║██║ ██║██║╚██╗██║██║╚██╔╝██║██╔══╝ ██║╚██╗██║ ██║ ╚════██║
- // ██║ ██║███████║███████║██║╚██████╔╝██║ ╚████║██║ ╚═╝ ██║███████╗██║ ╚████║ ██║ ███████║
- // ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚═╝ ╚═══╝ ╚═╝ ╚══════╝
- // assignments
- always_comb
- begin : PORT_SELECT
- var integer idx;
- for (idx=0; idx<N_PORTS; idx++) begin
- // select = 1 -> port1 active
- // select = 0 -> port2 active
- select[idx] = (curr_priority[idx] & port1_addr_valid[idx]) | ~port2_addr_valid[idx];
- p1_burst_size[idx] = (port1_len[idx] + 1) << port1_size[idx];
- p2_burst_size[idx] = (port2_len[idx] + 1) << port2_size[idx];
- // align min addr for max addr computation to allow for smart AXI bursts around the 4k boundary
- if (port1_size[idx] == 3'b001)
- p1_mask[idx] = 3'b110;
- else if (port1_size[idx] == 3'b010)
- p1_mask[idx] = 3'b100;
- else if (port1_size[idx] == 3'b011)
- p1_mask[idx] = 3'b000;
- else
- p1_mask[idx] = 3'b111;
- p1_align_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH] = port1_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH];
- p1_align_addr[idx][AXI_SIZE_WIDTH-1:0] = port1_addr[idx][AXI_SIZE_WIDTH-1:0] & p1_mask[idx];
- if (port2_size[idx] == 3'b001)
- p2_mask[idx] = 3'b110;
- else if (port2_size[idx] == 3'b010)
- p2_mask[idx] = 3'b100;
- else if (port2_size[idx] == 3'b011)
- p2_mask[idx] = 3'b000;
- else
- p2_mask[idx] = 3'b111;
- if (port1_user[idx] == {AXI_USER_WIDTH{1'b1}})
- p1_prefetch[idx] = 1'b1;
- else
- p1_prefetch[idx] = 1'b0;
- if (port2_user[idx] == {AXI_USER_WIDTH{1'b1}})
- p2_prefetch[idx] = 1'b1;
- else
- p2_prefetch[idx] = 1'b0;
- p2_align_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH] = port2_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH];
- p2_align_addr[idx][AXI_SIZE_WIDTH-1:0] = port2_addr[idx][AXI_SIZE_WIDTH-1:0] & p2_mask[idx];
- p1_max_addr[idx] = p1_align_addr[idx] + p1_burst_size[idx] - 1;
- p2_max_addr[idx] = p2_align_addr[idx] + p2_burst_size[idx] - 1;
- int_addr_min[idx] = select[idx] ? port1_addr[idx] : port2_addr[idx];
- int_addr_max[idx] = select[idx] ? p1_max_addr[idx] : p2_max_addr[idx];
- int_rw[idx] = select[idx] ? port1_type[idx] : port2_type[idx];
- int_id[idx] = select[idx] ? port1_id[idx] : port2_id[idx];
- int_len[idx] = select[idx] ? port1_len[idx] : port2_len[idx];
- int_user[idx] = select[idx] ? port1_user[idx] : port2_user[idx];
- prefetch[idx] = select[idx] ? p1_prefetch[idx] : p2_prefetch[idx];
- hit [idx] = | hit_slices [idx];
- prot[idx] = | prot_slices[idx];
- no_hit [idx] = ~hit [idx];
- no_prot[idx] = ~prot[idx];
- port1_out_addr[idx] = out_addr_reg[idx];
- port2_out_addr[idx] = out_addr_reg[idx];
- port1_cache_coherent[idx] = cache_coherent_reg[idx];
- port2_cache_coherent[idx] = cache_coherent_reg[idx];
- end
- end
- always_comb
- begin
- var integer idx_port, idx_slice;
- var integer reg_num;
- reg_num=0;
- for ( idx_port = 0; idx_port < N_PORTS; idx_port++ ) begin
- for ( idx_slice = 0; idx_slice < 4*N_SLICES[idx_port]; idx_slice++ ) begin
- int_cfg_regs_slices[idx_port][idx_slice] = int_cfg_regs[4+reg_num];
- reg_num++;
- end
- // int_cfg_regs_slices[idx_port][N_SLICES_MAX:N_SLICES[idx_port]] will be dangling
- // Fix to zero. Synthesis will remove these signals.
- // int_cfg_regs_slices[idx_port][4*N_SLICES_MAX-1:4*N_SLICES[idx_port]] = 0;
- end
- end
- always @(posedge Clk_CI or negedge Rst_RBI)
- var integer idx;
- if (Rst_RBI == 1'b0)
- curr_priority = 'h0;
- else begin
- for (idx=0; idx<N_PORTS; idx++) begin
- if (port1_accept[idx] || port1_drop[idx])
- curr_priority[idx] = 1'b1;
- else if (port2_accept[idx] || port2_drop[idx])
- curr_priority[idx] = 1'b0;
- end
- end
- end
- // find port that misses
- logic [PORT_ID_WIDTH-1:0] PortIdx_D; // index of the first missing port
- var integer idx_miss;
- always_comb begin : MHF_PORT_SELECT
- PortIdx_D = 'b0;
- for (idx_miss = 0; idx_miss < N_PORTS; idx_miss++) begin
- if (miss_valid_mhf[idx_miss] == 1'b1) begin
- PortIdx_D = idx_miss;
- break;
- end
- end
- end // always_comb begin
- // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ██████╗███████╗ ██████╗
- // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔════╝██╔════╝
- // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ █████╗ ██║ ███╗
- // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██╔══╝ ██║ ██║
- // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ╚██████╗██║ ╚██████╔╝
- // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝╚═╝ ╚═════╝
- axi_rab_cfg
- #(
- .N_REGS ( N_REGS ),
- .N_L2_SETS ( N_L2_SETS ),
- .N_FLAGS ( 4 ),
- )
- u_axi_rab_cfg
- (
- .Clk_CI ( Clk_CI ),
- .Rst_RBI ( Rst_RBI ),
- .s_axi_awaddr ( s_axi_awaddr ),
- .s_axi_awvalid ( s_axi_awvalid ),
- .s_axi_wdata ( s_axi_wdata ),
- .s_axi_wstrb ( s_axi_wstrb ),
- .s_axi_wvalid ( s_axi_wvalid ),
- .s_axi_bready ( s_axi_bready ),
- .s_axi_araddr ( s_axi_araddr ),
- .s_axi_arvalid ( s_axi_arvalid ),
- .s_axi_rready ( s_axi_rready ),
- .s_axi_arready ( s_axi_arready ),
- .s_axi_rdata ( s_axi_rdata ),
- .s_axi_rresp ( s_axi_rresp ),
- .s_axi_rvalid ( s_axi_rvalid ),
- .s_axi_wready ( s_axi_wready ),
- .s_axi_bresp ( s_axi_bresp ),
- .s_axi_bvalid ( s_axi_bvalid ),
- .s_axi_awready ( s_axi_awready ),
- .L1Cfg_DO ( int_cfg_regs ),
- .L1AllowMultiHit_SO ( L1AllowMultiHit_S ),
- .MissAddr_DI ( miss_addr_mhf[PortIdx_D] ),
- .MissMeta_DI ( miss_meta_mhf[PortIdx_D] ),
- .Miss_SI ( miss_valid_mhf[PortIdx_D] ),
- .MhFifoFull_SO ( int_mhf_full ),
- .wdata_l2 ( wdata_l2_o ),
- .waddr_l2 ( waddr_l2_o ),
- .wren_l2 ( wren_l2_o )
- );
- generate for (z = 0; z < N_PORTS; z++) begin : MHF_TLB_SELECT
- if (ENABLE_L2TLB[z] == 1) begin // L2 TLB is enabled
- assign miss_valid_mhf[z] = miss_l2_i[z];
- assign miss_addr_mhf[z] = miss_l2_addr_i[z];
- assign miss_meta_mhf[z] = {miss_l2_user_i[z], PortIdx_D, miss_l2_id_i[z]};
- end else begin// L2 TLB is disabled
- assign miss_valid_mhf[z] = int_miss[z];
- assign miss_addr_mhf[z] = int_addr_min[z];
- assign miss_meta_mhf[z] = {int_user[z], PortIdx_D, int_id[z]};
- end
- end
- endgenerate
- // ███████╗██╗ ██╗ ██████╗███████╗ ████████╗ ██████╗ ██████╗
- // ██╔════╝██║ ██║██╔════╝██╔════╝ ╚══██╔══╝██╔═══██╗██╔══██╗
- // ███████╗██║ ██║██║ █████╗ ██║ ██║ ██║██████╔╝
- // ╚════██║██║ ██║██║ ██╔══╝ ██║ ██║ ██║██╔═══╝
- // ███████║███████╗██║╚██████╗███████╗ ██║ ╚██████╔╝██║
- // ╚══════╝╚══════╝╚═╝ ╚═════╝╚══════╝ ╚═╝ ╚═════╝ ╚═╝
- generate for (z = 0; z < N_PORTS; z++) begin : SLICE_TOP_GEN
- slice_top
- #(
- .N_SLICES ( N_SLICES[z] ),
- .N_REGS ( 4*N_SLICES[z] ),
- )
- u_slice_top
- (
- .int_cfg_regs ( int_cfg_regs_slices[z][4*N_SLICES[z]-1:0] ),
- .int_rw ( int_rw[z] ),
- .int_addr_min ( int_addr_min[z] ),
- .int_addr_max ( int_addr_max[z] ),
- .multi_hit_allow ( L1AllowMultiHit_S ),
- .multi_hit ( multi_hit[z] ),
- .prot ( prot_slices[z][N_SLICES[z]-1:0] ),
- .hit ( hit_slices [z][N_SLICES[z]-1:0] ),
- .cache_coherent ( cache_coherent[z] ),
- .out_addr ( out_addr[z] )
- );
- // hit_slices [N_SLICES_MAX-1:N_SLICES_MAX-N_SLICES[z]] will be dangling
- // prot_slices[N_SLICES_MAX-1:N_SLICES_MAX-N_SLICES[z]] will be dangling
- // Fix to zero. Synthesis will remove these signals.
- if ( N_SLICES[z] < N_SLICES_MAX ) begin
- assign hit_slices [z][N_SLICES_MAX-1:N_SLICES[z]] = 0;
- assign prot_slices[z][N_SLICES_MAX-1:N_SLICES[z]] = 0;
- end
- end // for (z = 0; z < N_PORTS; z++)
- endgenerate
- // ███████╗███████╗███╗ ███╗
- // ██╔════╝██╔════╝████╗ ████║
- // █████╗ ███████╗██╔████╔██║
- // ██╔══╝ ╚════██║██║╚██╔╝██║
- // ██║ ███████║██║ ╚═╝ ██║
- // ╚═╝ ╚══════╝╚═╝ ╚═╝
- //
- generate for (z = 0; z < N_PORTS; z++) begin : FSM_GEN
- fsm
- #(
- )
- u_fsm
- (
- .Clk_CI ( Clk_CI ),
- .Rst_RBI ( Rst_RBI ),
- .port1_addr_valid_i ( port1_addr_valid[z] ),
- .port2_addr_valid_i ( port2_addr_valid[z] ),
- .port1_sent_i ( port1_sent[z] ),
- .port2_sent_i ( port2_sent[z] ),
- .select_i ( select[z] ),
- .no_hit_i ( no_hit[z] ),
- .multi_hit_i ( multi_hit[z] ),
- .no_prot_i ( no_prot[z] ),
- .prefetch_i ( prefetch[z] ),
- .out_addr_i ( out_addr[z] ),
- .cache_coherent_i ( cache_coherent[z] ),
- .port1_accept_o ( port1_accept[z] ),
- .port1_drop_o ( port1_drop[z] ),
- .port1_miss_o ( port1_miss[z] ),
- .port2_accept_o ( port2_accept[z] ),
- .port2_drop_o ( port2_drop[z] ),
- .port2_miss_o ( port2_miss[z] ),
- .out_addr_o ( out_addr_reg[z] ),
- .cache_coherent_o ( cache_coherent_reg[z] ),
- .miss_o ( int_miss[z] ),
- .multi_o ( int_multi[z] ),
- .prot_o ( int_prot[z] ),
- .prefetch_o ( int_prefetch[z] ),
- .in_addr_i ( int_addr_min[z] ),
- .in_id_i ( int_id[z] ),
- .in_len_i ( int_len[z] ),
- .in_user_i ( int_user[z] ),
- .in_addr_o ( int_axaddr_o[z] ),
- .in_id_o ( int_axid_o[z] ),
- .in_len_o ( int_axlen_o[z] ),
- .in_user_o ( int_axuser_o[z] )
- );
- end
- endgenerate
+++ /dev/null
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# module rab_slice
-# #(
-# parameter ADDR_WIDTH_PHYS = 40,
-# parameter ADDR_WIDTH_VIRT = 32
-# )
-# (
-# input logic [ADDR_WIDTH_VIRT-1:0] cfg_min,
-# input logic [ADDR_WIDTH_VIRT-1:0] cfg_max,
-# input logic [ADDR_WIDTH_PHYS-1:0] cfg_offset,
-# input logic cfg_wen,
-# input logic cfg_ren,
-# input logic cfg_en,
-# input logic in_trans_type,
-# input logic [ADDR_WIDTH_VIRT-1:0] in_addr_min,
-# input logic [ADDR_WIDTH_VIRT-1:0] in_addr_max,
-# output logic out_hit,
-# output logic out_prot,
-# output logic [ADDR_WIDTH_PHYS-1:0] out_addr
-# );
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-class rab_slice(Elaboratable):
- def __init__(self, params): # pass config object
- # TODO parameters
- self.params = params
- self.cfg_min = Signal(params.ADDR_WIDTH_VIRT) # input
- self.cfg_max = Signal(params.ADDR_WIDTH_VIRT) # input
- self.cfg_offset = Signal(params.ADDR_WIDTH_PHYS) # input
- self.cfg_wen = Signal() # input
- self.cfg_ren = Signal() # input
- self.cfg_en = Signal() # input
- self.in_trans_type = Signal() # input
- self.in_addr_min = Signal(params.ADDR_WIDTH_VIRT) # input
- self.in_addr_max = Signal(params.ADDR_WIDTH_VIRT) # input
- self.out_hit = Signal() # output
- self.out_prot = Signal() # output
- self.out_addr = Signal(params.ADDR_WIDTH_PHYS) # output
- def elaborate(self, platform=None):
- m = Module()
- min_above_min = Signal()
- min_below_max = Signal()
- max_below_max = Signal()
- # assign min_above_min = (in_addr_min >= cfg_min) ? 1'b1 : 1'b0;
- # assign min_below_max = (in_addr_min <= cfg_max) ? 1'b1 : 1'b0;
- # assign max_below_max = (in_addr_max <= cfg_max) ? 1'b1 : 1'b0;
- # assign out_hit = cfg_en & min_above_min & min_below_max & max_below_max;
- # assign out_prot = out_hit & ((in_trans_type & ~cfg_wen) | (~in_trans_type & ~cfg_ren));
- # assign out_addr = in_addr_min - cfg_min + cfg_offset;
- m.d.comb += [
- min_above_min.eq(self.in_addr_min >= self.cfg_min),
- min_below_max.eq(self.in_addr_min <= self.cfg_max),
- max_below_max.eq(self.in_addr_max <= self.cfg_max),
- self.out_hit.eq(self.cfg_en & min_above_min &
- min_below_max & max_below_max),
- self.out_prot.eq(self.out_hit & (
- (self.in_trans_type & ~self.cfg_wen) | (~self.in_trans_type & ~self.cfg_ren))),
- self.out_addr.eq(self.in_addr_min - self.cfg_min + self.cfg_offset)
- ]
- return m
+++ /dev/null
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# /*
-# * ram_tp_no_change
-# *
-# * This code implements a parameterizable two-port memory. Port 0 can read and
-# * write while Port 1 can read only. The Xilinx tools will infer a BRAM with
-# * Port 0 in "no change" mode, i.e., during a write, it retains the last read
-# * value on the output. Port 1 (read-only) is in "write first" mode. Still, it
-# * outputs the old data during the write cycle. Note: Port 1 outputs invalid
-# * data in the cycle after the write when reading the same address.
-# *
-# * For more information, see Xilinx PG058 Block Memory Generator Product Guide.
-# */
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-from nmigen import Memory
-import math
-# module ram_tp_no_change
-# #(
-# )
-# (
-# input clk,
-# input we,
-# input [ADDR_WIDTH-1:0] addr0,
-# input [ADDR_WIDTH-1:0] addr1,
-# input [DATA_WIDTH-1:0] d_i,
-# output [DATA_WIDTH-1:0] d0_o,
-# output [DATA_WIDTH-1:0] d1_o
-# );
-class ram_tp_no_change(Elaboratable):
- def __init__(self):
- self.we = Signal() # input
- self.addr0 = Signal(ADDR_WIDTH) # input
- self.addr1 = Signal(ADDR_WIDTH) # input
- self.d_i = Signal(DATA_WIDTH) # input
- self.d0_o = Signal(DATA_WIDTH) # output
- self.d1_o = Signal(DATA_WIDTH) # output
- DEPTH = int(math.pow(2, ADDR_WIDTH))
- self.ram = Memory(DATA_WIDTH, DEPTH)
- #
- # localparam DEPTH = 2**ADDR_WIDTH;
- #
- # (* ram_style = "block" *) reg [DATA_WIDTH-1:0] ram[DEPTH];
- # reg [DATA_WIDTH-1:0] d0;
- # reg [DATA_WIDTH-1:0] d1;
- #
- # always_ff @(posedge clk) begin
- # if(we == 1'b1) begin
- # ram[addr0] <= d_i;
- # end else begin
- # only change data if we==false
- # d0 <= ram[addr0];
- # end
- # d1 <= ram[addr1];
- # end
- #
- # assign d0_o = d0;
- # assign d1_o = d1;
- #
- def elaborate(self, platform=None):
- m = Module()
- m.submodules.read_ram0 = read_ram0 = self.ram.read_port()
- m.submodules.read_ram1 = read_ram1 = self.ram.read_port()
- m.submodules.write_ram = write_ram = self.ram.write_port()
- # write port
- m.d.comb += write_ram.en.eq(self.we)
- m.d.comb += write_ram.addr.eq(self.addr0)
- m.d.comb += write_ram.data.eq(self.d_i)
- # read ports
- m.d.comb += read_ram0.addr.eq(self.addr0)
- m.d.comb += read_ram1.addr.eq(self.addr1)
- with m.If(self.we == 0):
- m.d.sync += self.d0_o.eq(read_ram0.data)
- m.d.sync += self.d1_o.eq(read_ram1.data)
- return m
+++ /dev/null
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# /*
-# * ram_tp_write_first
-# *
-# * This code implements a parameterizable two-port memory. Port 0 can read and
-# * write while Port 1 can read only. Xilinx Vivado will infer a BRAM in
-# * "write first" mode, i.e., upon a read and write to the same address, the
-# * new value is read. Note: Port 1 outputs invalid data in the cycle after
-# * the write when reading the same address.
-# *
-# * For more information, see Xilinx PG058 Block Memory Generator Product Guide.
-# */
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-from nmigen import Memory
-import math
-# module ram_tp_write_first
-# #(
-# )
-# (
-# input clk,
-# input we,
-# input [ADDR_WIDTH-1:0] addr0,
-# input [ADDR_WIDTH-1:0] addr1,
-# input [DATA_WIDTH-1:0] d_i,
-# output [DATA_WIDTH-1:0] d0_o,
-# output [DATA_WIDTH-1:0] d1_o
-# );
-class ram_tp_write_first(Elaboratable):
- def __init__(self):
- self.we = Signal() # input
- self.addr0 = Signal(ADDR_WIDTH) # input
- self.addr1 = Signal(ADDR_WIDTH) # input
- self.d_i = Signal(DATA_WIDTH) # input
- self.d0_o = Signal(DATA_WIDTH) # output
- self.d1_o = Signal(DATA_WIDTH) # output
- DEPTH = int(math.pow(2, ADDR_WIDTH))
- self.ram = Memory(DATA_WIDTH, DEPTH)
- #
- # localparam DEPTH = 2**ADDR_WIDTH;
- #
- # (* ram_style = "block" *) reg [DATA_WIDTH-1:0] ram[DEPTH];
- # reg [ADDR_WIDTH-1:0] raddr0;
- # reg [ADDR_WIDTH-1:0] raddr1;
- #
- # always_ff @(posedge clk) begin
- # if(we == 1'b1) begin
- # ram[addr0] <= d_i;
- # end
- # raddr0 <= addr0;
- # raddr1 <= addr1;
- # end
- #
- # assign d0_o = ram[raddr0];
- # assign d1_o = ram[raddr1];
- #
- def elaborate(self, platform=None):
- m = Module()
- m.submodules.read_ram0 = read_ram0 = self.ram.read_port()
- m.submodules.read_ram1 = read_ram1 = self.ram.read_port()
- m.submodules.write_ram = write_ram = self.ram.write_port()
- # write port
- m.d.comb += write_ram.en.eq(self.we)
- m.d.comb += write_ram.addr.eq(self.addr0)
- m.d.comb += write_ram.data.eq(self.d_i)
- # read ports
- m.d.comb += read_ram0.addr.eq(self.addr0)
- m.d.comb += read_ram1.addr.eq(self.addr1)
- m.d.sync += self.d0_o.eq(read_ram0.data)
- m.d.sync += self.d1_o.eq(read_ram1.data)
- return m
+++ /dev/null
-# // Copyright 2018 ETH Zurich and University of Bologna.
-# // Copyright and related rights are licensed under the Solderpad Hardware
-# // License, Version 0.51 (the "License"); you may not use this file except in
-# // compliance with the License. You may obtain a copy of the License at
-# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
-# // or agreed to in writing, software, hardware and materials distributed under
-# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
-# // specific language governing permissions and limitations under the License.
-# this file has been generated by sv2nmigen
-from nmigen import Signal, Module, Const, Cat, Elaboratable
-import rab_slice
-import coreconfig
-# module slice_top
-# //#(
-# // parameter N_SLICES = 16,
-# // parameter N_REGS = 4*N_SLICES,
-# // parameter ADDR_WIDTH_PHYS = 40,
-# // parameter ADDR_WIDTH_VIRT = 32
-# // )
-# (
-# input logic [N_REGS-1:0] [63:0] int_cfg_regs,
-# input logic int_rw,
-# input logic [ADDR_WIDTH_VIRT-1:0] int_addr_min,
-# input logic [ADDR_WIDTH_VIRT-1:0] int_addr_max,
-# input logic multi_hit_allow,
-# output logic multi_hit,
-# output logic [N_SLICES-1:0] prot,
-# output logic [N_SLICES-1:0] hit,
-# output logic cache_coherent,
-# output logic [ADDR_WIDTH_PHYS-1:0] out_addr
-# );
-class slice_top(Elaboratable):
- def __init__(self):
- # FIXME self.int_cfg_regs = Signal() # input
- self.params = coreconfig.CoreConfig() # rename ?
- self.int_rw = Signal() # input
- self.int_addr_min = Signal(self.params.ADDR_WIDTH_VIRT) # input
- self.int_addr_max = Signal(self.params.ADDR_WIDTH_VIRT) # input
- self.multi_hit_allow = Signal() # input
- self.multi_hit = Signal() # output
- self.prot = Signal(self.params.N_SLICES) # output
- self.hit = Signal(self.params.N_SLICES) # output
- self.cache_coherent = Signal() # output
- self.out_addr = Signal(self.params.ADDR_WIDTH_PHYS) # output
- def elaborate(self, platform=None):
- m = Module()
- first_hit = Signal()
- for i in range(self.params.N_SLICES):
- # TODO pass params / core config here
- u_slice = rab_slice.rab_slice(self.params)
- setattr(m.submodules, "u_slice%d" % i, u_slice)
- # TODO set param and connect ports
- # In case of a multi hit, the lowest slice with a hit is selected.
- # TODO always_comb begin : HIT_CHECK
- m.d.comb += [
- first_hit.eq(0),
- self.multi_hit.eq(0),
- self.out_addr.eq(0),
- self.cache_coherent.eq(0)]
- for j in range(self.params.N_SLICES):
- with m.If(self.hit[j] == 1):
- with m.If(first_hit == 1):
- with m.If(self.multi_hit_allow == 0):
- m.d.comb += [self.multi_hit.eq(1)]
- with m.Elif(first_hit == 1):
- m.d.comb += [first_hit.eq(1)
- # only output first slice that was hit
- # SV self.out_addr.eq(slice_out_addr[ADDR_WIDTH_PHYS*j + : ADDR_WIDTH_PHYS]),
- # SV self.cache_coherent.eq(int_cfg_regs[4*j+3][3]),
- ]
- return m
- # TODO translate generate statement
- logic [ADDR_WIDTH_PHYS*N_SLICES-1:0] slice_out_addr;
- generate
- for ( i=0; i<N_SLICES; i++ )
- begin
- rab_slice
- #(
- )
- u_slice
- (
- .cfg_min ( int_cfg_regs[4*i] [ADDR_WIDTH_VIRT-1:0] ),
- .cfg_max ( int_cfg_regs[4*i+1][ADDR_WIDTH_VIRT-1:0] ),
- .cfg_offset ( int_cfg_regs[4*i+2][ADDR_WIDTH_PHYS-1:0] ),
- .cfg_wen ( int_cfg_regs[4*i+3][2] ),
- .cfg_ren ( int_cfg_regs[4*i+3][1] ),
- .cfg_en ( int_cfg_regs[4*i+3][0] ),
- .in_trans_type ( int_rw ),
- .in_addr_min ( int_addr_min ),
- .in_addr_max ( int_addr_max ),
- .out_addr ( slice_out_addr[ADDR_WIDTH_PHYS*i+ADDR_WIDTH_PHYS-1:ADDR_WIDTH_PHYS*i] ),
- .out_prot ( prot[i] ),
- .out_hit ( hit[i] )
- );
- end
- endgenerate
- // In case of a multi hit, the lowest slice with a hit is selected.
- always_comb begin : HIT_CHECK
- first_hit = 0;
- multi_hit = 0;
- out_addr = '0;
- cache_coherent = 0;
- for (j = 0; j < N_SLICES; j++) begin
- if (hit[j] == 1'b1) begin
- if (first_hit == 1'b1) begin
- if (multi_hit_allow == 1'b0) begin
- multi_hit = 1'b1;
- end
- end else begin
- first_hit = 1'b1;
- out_addr = slice_out_addr[ADDR_WIDTH_PHYS*j +: ADDR_WIDTH_PHYS];
- cache_coherent = int_cfg_regs[4*j+3][3];
- end
- end
- end
- end
-# sv 2 migen: TODO add translate code for generate statements and for loops inside always_comb
+++ /dev/null
-from ram_tp_write_first import ram_tp_write_first
-from nmigen.compat.sim import run_simulation
-import sys
-def tbench(dut):
- yield dut.we.eq(1)
- for i in range(0, 255):
- yield dut.addr0.eq(i)
- yield dut.d_i.eq(i)
- yield
-if __name__ == "__main__":
- dut = ram_tp_write_first()
- run_simulation(dut, tbench(dut), vcd_name="ram_tp_write_first.vcd")
- print("ram_tp_write_first Unit Test Success")
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-import sys
-# sys.path.append("../../../TestUtil")
-from slice_top import slice_top
-def tbench(dut):
- yield
-if __name__ == "__main__":
- dut = slice_top()
- run_simulation(dut, tbench(dut), vcd_name="test_slice_top.vcd")
- print("slice_top Unit Test Success")
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Cat, Const, Array, Signal, Elaboratable, Module
-from nmutil.iocontrol import RecordObject
-from math import log
-from functools import reduce
-import operator
-class Register(Elaboratable):
- def __init__(self, width, writethru=True):
- self.width = width
- self.writethru = writethru
- self._rdports = []
- self._wrports = []
- def read_port(self, name=None):
- port = RecordObject([("ren", 1),
- ("data_o", self.width)],
- name=name)
- self._rdports.append(port)
- return port
- def write_port(self, name=None):
- port = RecordObject([("wen", 1),
- ("data_i", self.width)],
- name=name)
- self._wrports.append(port)
- return port
- def elaborate(self, platform):
- m = Module()
- self.reg = reg = Signal(self.width, name="reg")
- # read ports. has write-through detection (returns data written)
- for rp in self._rdports:
- with m.If(rp.ren):
- if self.writethru:
- wr_detect = Signal(reset_less=False)
- m.d.comb += wr_detect.eq(0)
- for wp in self._wrports:
- with m.If(wp.wen):
- m.d.comb += rp.data_o.eq(wp.data_i)
- m.d.comb += wr_detect.eq(1)
- with m.If(~wr_detect):
- m.d.comb += rp.data_o.eq(reg)
- else:
- m.d.comb += rp.data_o.eq(reg)
- # write ports, don't allow write to address 0 (ignore it)
- for wp in self._wrports:
- with m.If(wp.wen):
- m.d.sync += reg.eq(wp.data_i)
- return m
- def __iter__(self):
- for p in self._rdports:
- yield from p
- for p in self._wrports:
- yield from p
- def ports(self):
- res = list(self)
-def treereduce(tree, attr="data_o"):
- #print ("treereduce", tree)
- if not isinstance(tree, list):
- return tree
- if len(tree) == 1:
- return getattr(tree[0], attr)
- if len(tree) == 2:
- return getattr(tree[0], attr) | getattr(tree[1], attr)
- split = len(tree) // 2
- return treereduce(tree[:split], attr) | treereduce(tree[split:], attr)
-class RegFileArray(Elaboratable):
- """ an array-based register file (register having write-through capability)
- that has no "address" decoder, instead it has individual write-en
- and read-en signals (per port).
- """
- def __init__(self, width, depth):
- self.width = width
- self.depth = depth
- self.regs = Array(Register(width) for _ in range(self.depth))
- self._rdports = []
- self._wrports = []
- def read_port(self, name=None):
- regs = []
- for i in range(self.depth):
- port = self.regs[i].read_port(name)
- regs.append(port)
- regs = Array(regs)
- port = RecordObject([("ren", self.depth),
- ("data_o", self.width)], name)
- self._rdports.append((regs, port))
- return port
- def write_port(self, name=None):
- regs = []
- for i in range(self.depth):
- port = self.regs[i].write_port(name)
- regs.append(port)
- regs = Array(regs)
- port = RecordObject([("wen", self.depth),
- ("data_i", self.width)])
- self._wrports.append((regs, port))
- return port
- def _get_en_sig(self, port, typ):
- wen = []
- for p in port:
- wen.append(p[typ])
- return Cat(*wen)
- def elaborate(self, platform):
- m = Module()
- for i, reg in enumerate(self.regs):
- setattr(m.submodules, "reg_%d" % i, reg)
- for (regs, p) in self._rdports:
- #print (p)
- m.d.comb += self._get_en_sig(regs, 'ren').eq(p.ren)
- ror = treereduce(list(regs))
- m.d.comb += p.data_o.eq(ror)
- for (regs, p) in self._wrports:
- m.d.comb += self._get_en_sig(regs, 'wen').eq(p.wen)
- for r in regs:
- m.d.comb += r.data_i.eq(p.data_i)
- return m
- def __iter__(self):
- for r in self.regs:
- yield from r
- def ports(self):
- return list(self)
-class RegFile(Elaboratable):
- def __init__(self, width, depth):
- self.width = width
- self.depth = depth
- self._rdports = []
- self._wrports = []
- def read_port(self):
- bsz = int(log(self.width) / log(2))
- port = RecordObject([("raddr", bsz),
- ("ren", 1),
- ("data_o", self.width)])
- self._rdports.append(port)
- return port
- def write_port(self):
- bsz = int(log(self.width) / log(2))
- port = RecordObject([("waddr", bsz),
- ("wen", 1),
- ("data_i", self.width)])
- self._wrports.append(port)
- return port
- def elaborate(self, platform):
- m = Module()
- bsz = int(log(self.width) / log(2))
- regs = Array(Signal(self.width, name="reg") for _ in range(self.depth))
- # read ports. has write-through detection (returns data written)
- for rp in self._rdports:
- wr_detect = Signal(reset_less=False)
- with m.If(rp.ren):
- m.d.comb += wr_detect.eq(0)
- for wp in self._wrports:
- addrmatch = Signal(reset_less=False)
- m.d.comb += addrmatch.eq(wp.waddr == rp.raddr)
- with m.If(wp.wen & addrmatch):
- m.d.comb += rp.data_o.eq(wp.data_i)
- m.d.comb += wr_detect.eq(1)
- with m.If(~wr_detect):
- m.d.comb += rp.data_o.eq(regs[rp.raddr])
- # write ports, don't allow write to address 0 (ignore it)
- for wp in self._wrports:
- with m.If(wp.wen & (wp.waddr != Const(0, bsz))):
- m.d.sync += regs[wp.waddr].eq(wp.data_i)
- return m
- def __iter__(self):
- yield from self._rdports
- yield from self._wrports
- def ports(self):
- res = list(self)
- for r in res:
- if isinstance(r, RecordObject):
- yield from r
- else:
- yield r
-def regfile_sim(dut, rp, wp):
- yield wp.waddr.eq(1)
- yield wp.data_i.eq(2)
- yield wp.wen.eq(1)
- yield
- yield wp.wen.eq(0)
- yield rp.ren.eq(1)
- yield rp.raddr.eq(1)
- yield
- data = yield rp.data_o
- print (data)
- assert data == 2
- yield wp.waddr.eq(5)
- yield rp.raddr.eq(5)
- yield rp.ren.eq(1)
- yield wp.wen.eq(1)
- yield wp.data_i.eq(6)
- data = yield rp.data_o
- print (data)
- yield
- yield wp.wen.eq(0)
- yield rp.ren.eq(0)
- data = yield rp.data_o
- print (data)
- assert data == 6
- yield
- data = yield rp.data_o
- print (data)
-def regfile_array_sim(dut, rp1, rp2, wp):
- yield wp.data_i.eq(2)
- yield wp.wen.eq(1<<1)
- yield
- yield wp.wen.eq(0)
- yield rp1.ren.eq(1<<1)
- yield
- data = yield rp1.data_o
- print (data)
- assert data == 2
- yield rp1.ren.eq(1<<5)
- yield rp2.ren.eq(1<<1)
- yield wp.wen.eq(1<<5)
- yield wp.data_i.eq(6)
- data = yield rp1.data_o
- print (data)
- yield
- yield wp.wen.eq(0)
- yield rp1.ren.eq(0)
- yield rp2.ren.eq(0)
- data1 = yield rp1.data_o
- print (data1)
- data2 = yield rp2.data_o
- print (data2)
- assert data1 == 6
- yield
- data = yield rp1.data_o
- print (data)
-def test_regfile():
- dut = RegFile(32, 8)
- rp = dut.read_port()
- wp = dut.write_port()
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_regfile.il", "w") as f:
- f.write(vl)
- run_simulation(dut, regfile_sim(dut, rp, wp), vcd_name='test_regfile.vcd')
- dut = RegFileArray(32, 8)
- rp1 = dut.read_port("read1")
- rp2 = dut.read_port("read2")
- wp = dut.write_port("write")
- ports=dut.ports()
- print ("ports", ports)
- vl = rtlil.convert(dut, ports=ports)
- with open("test_regfile_array.il", "w") as f:
- f.write(vl)
- run_simulation(dut, regfile_array_sim(dut, rp1, rp2, wp),
- vcd_name='test_regfile_array.vcd')
-if __name__ == '__main__':
- test_regfile()
+++ /dev/null
-""" Load / Store partial address matcher
-Loads and Stores do not need a full match (CAM), they need "good enough"
-avoidance. Around 11 bits on a 64-bit address is "good enough".
-The simplest way to use this module is to ignore not only the top bits,
-but also the bottom bits as well: in this case (this RV64 processor),
-enough to cover a DWORD (64-bit). that means ignore the bottom 4 bits,
-due to the possibility of 64-bit LD/ST being misaligned.
-To reiterate: the use of this module is an *optimisation*. All it has
-to do is cover the cases that are *definitely* matches (by checking 11
-bits or so), and if a few opportunities for parallel LD/STs are missed
-because the top (or bottom) bits weren't checked, so what: all that
-happens is: the mis-matched addresses are LD/STd on single-cycles. Big Deal.
-However, if we wanted to enhance this algorithm (without using a CAM and
-without using expensive comparators) probably the best way to do so would
-be to turn the last 16 bits into a byte-level bitmap. LD/ST on a byte
-would have 1 of the 16 bits set. LD/ST on a DWORD would have 8 of the 16
-bits set (offset if the LD/ST was misaligned). TODO.
-> I have used bits <11:6> as they are not translated (4KB pages)
-> and larger than a cache line (64 bytes).
-> I have used bits <11:4> when the L1 cache was QuadW sized and
-> the L2 cache was Line sized.
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Const, Array, Cat, Elaboratable
-from nmutil.latch import latchregister, SRLatch
-class PartialAddrMatch(Elaboratable):
- """A partial address matcher
- """
- def __init__(self, n_adr, bitwid):
- self.n_adr = n_adr
- self.bitwid = bitwid
- # inputs
- self.addrs_i = Array(Signal(bitwid, name="addr") for i in range(n_adr))
- self.addr_we_i = Signal(n_adr) # write-enable for incoming address
- self.addr_en_i = Signal(n_adr) # address latched in
- self.addr_rs_i = Signal(n_adr) # address deactivated
- # output
- self.addr_nomatch_o = Signal(n_adr, name="nomatch_o")
- self.addr_nomatch_a_o = Array(Signal(n_adr, name="nomatch_array_o") \
- for i in range(n_adr))
- def elaborate(self, platform):
- m = Module()
- return self._elaborate(m, platform)
- def _elaborate(self, m, platform):
- comb = m.d.comb
- sync = m.d.sync
- m.submodules.l = l = SRLatch(llen=self.n_adr, sync=False)
- addrs_r = Array(Signal(self.bitwid, name="a_r") \
- for i in range(self.n_adr))
- # latch set/reset
- comb += l.s.eq(self.addr_en_i)
- comb += l.r.eq(self.addr_rs_i)
- # copy in addresses (and "enable" signals)
- for i in range(self.n_adr):
- latchregister(m, self.addrs_i[i], addrs_r[i], l.q[i])
- # is there a clash, yes/no
- matchgrp = []
- for i in range(self.n_adr):
- match = []
- for j in range(self.n_adr):
- if i == j:
- match.append(Const(0)) # don't match against self!
- else:
- match.append(addrs_r[i] == addrs_r[j])
- comb += self.addr_nomatch_a_o[i].eq(~Cat(*match) & l.q)
- matchgrp.append(self.addr_nomatch_a_o[i] == l.q)
- comb += self.addr_nomatch_o.eq(Cat(*matchgrp) & l.q)
- return m
- def __iter__(self):
- yield from self.addrs_i
- yield self.addr_we_i
- yield self.addr_en_i
- yield from self.addr_nomatch_a_o
- yield self.addr_nomatch_o
- def ports(self):
- return list(self)
-def part_addr_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_rd_i.eq(1)
- yield
- yield dut.go_rd_i.eq(0)
- yield
- yield dut.go_wr_i.eq(1)
- yield
- yield dut.go_wr_i.eq(0)
- yield
-def test_part_addr():
- dut = PartialAddrMatch(3, 10)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_part_addr.il", "w") as f:
- f.write(vl)
- run_simulation(dut, part_addr_sim(dut), vcd_name='test_part_addr.vcd')
-if __name__ == '__main__':
- test_part_addr()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable, Array, Cat, Repl
-from nmutil.latch import SRLatch
-from functools import reduce
-from operator import or_
-class DependencyRow(Elaboratable):
- """ implements 11.4.7 mitch alsup dependence cell, p27
- adjusted to be clock-sync'd on rising edge only.
- mitch design (as does 6600) requires alternating rising/falling clock
- * SET mode: issue_i HI, go_i LO, reg_i HI - register is captured
- - FWD is DISABLED (~issue_i)
- * QRY mode: issue_i LO, go_i LO, haz_i HI - FWD is ASSERTED
- reg_i HI - ignored
- * GO mode : issue_i LO, go_i HI - RSEL is ASSERTED
- haz_i HI - FWD still can be ASSERTED
- FWD assertion (hazard protection) therefore still occurs in both
- Query and Go Modes, for this cycle, due to the cq register
- GO mode works for one cycle, again due to the cq register capturing
- the latch output. Without the cq register, the SR Latch (which is
- asynchronous) would be reset at the exact moment that GO was requested,
- and the RSEL would be garbage.
- """
- def __init__(self, n_reg, n_src, cancel_mode=False):
- self.cancel_mode = cancel_mode
- self.n_reg = n_reg
- self.n_src = n_src
- # arrays
- src = []
- rsel = []
- fwd = []
- for i in range(n_src):
- j = i + 1 # name numbering to match src1/src2
- src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
- rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
- fwd.append(Signal(n_reg, name="src%d_fwd_o" % j, reset_less=True))
- # inputs
- self.dest_i = Signal(n_reg, reset_less=True) # Dest in (top)
- self.src_i = Array(src) # operands in (top)
- self.issue_i = Signal(reset_less=True) # Issue in (top)
- self.rd_pend_i = Signal(n_reg, reset_less=True) # Read pend in (top)
- self.wr_pend_i = Signal(n_reg, reset_less=True) # Write pend in (top)
- self.v_rd_rsel_o = Signal(n_reg, reset_less=True) # Read pend out (bot)
- self.v_wr_rsel_o = Signal(n_reg, reset_less=True) # Write pend out (bot)
- self.go_wr_i = Signal(reset_less=True) # Go Write in (left)
- self.go_rd_i = Signal(reset_less=True) # Go Read in (left)
- if self.cancel_mode:
- self.go_die_i = Signal(n_reg, reset_less=True) # Go Die in (left)
- else:
- self.go_die_i = Signal(reset_less=True) # Go Die in (left)
- # for Register File Select Lines (vertical)
- self.dest_rsel_o = Signal(n_reg, reset_less=True) # dest reg sel (bot)
- self.src_rsel_o = Array(rsel) # src reg sel (bot)
- self.src2_rsel_o = Signal(n_reg, reset_less=True) # src2 reg sel (bot)
- # for Function Unit "forward progress" (horizontal)
- self.dest_fwd_o = Signal(n_reg, reset_less=True) # dest FU fw (right)
- self.src_fwd_o = Array(fwd) # src FU fw (right)
- def elaborate(self, platform):
- m = Module()
- m.submodules.dest_c = dest_c = SRLatch(sync=False, llen=self.n_reg)
- src_c = []
- for i in range(self.n_src):
- src_l = SRLatch(sync=False, llen=self.n_reg)
- setattr(m.submodules, "src%d_c" % (i+1), src_l)
- src_c.append(src_l)
- # connect go_rd / go_wr (dest->wr, src->rd)
- wr_die = Signal(self.n_reg, reset_less=True)
- rd_die = Signal(self.n_reg, reset_less=True)
- if self.cancel_mode:
- go_die = self.go_die_i
- else:
- go_die = Repl(self.go_die_i, self.n_reg)
- m.d.comb += wr_die.eq(Repl(self.go_wr_i, self.n_reg) | go_die)
- m.d.comb += rd_die.eq(Repl(self.go_rd_i, self.n_reg) | go_die)
- m.d.comb += dest_c.r.eq(wr_die)
- for i in range(self.n_src):
- m.d.comb += src_c[i].r.eq(rd_die)
- # connect input reg bit (unary)
- i_ext = Repl(self.issue_i, self.n_reg)
- m.d.comb += dest_c.s.eq(i_ext & self.dest_i)
- for i in range(self.n_src):
- m.d.comb += src_c[i].s.eq(i_ext & self.src_i[i])
- # connect up hazard checks: read-after-write and write-after-read
- m.d.comb += self.dest_fwd_o.eq(dest_c.q & self.rd_pend_i)
- for i in range(self.n_src):
- m.d.comb += self.src_fwd_o[i].eq(src_c[i].q & self.wr_pend_i)
- # connect reg-sel outputs
- rd_ext = Repl(self.go_rd_i, self.n_reg)
- wr_ext = Repl(self.go_wr_i, self.n_reg)
- m.d.comb += self.dest_rsel_o.eq(dest_c.qlq & wr_ext)
- for i in range(self.n_src):
- m.d.comb += self.src_rsel_o[i].eq(src_c[i].qlq & rd_ext)
- # to be accumulated to indicate if register is in use (globally)
- # after ORing, is fed back in to rd_pend_i / wr_pend_i
- src_q = []
- for i in range(self.n_src):
- src_q.append(src_c[i].qlq)
- m.d.comb += self.v_rd_rsel_o.eq(reduce(or_, src_q))
- m.d.comb += self.v_wr_rsel_o.eq(dest_c.qlq)
- return m
- def __iter__(self):
- yield self.dest_i
- yield from self.src_i
- yield self.rd_pend_i
- yield self.wr_pend_i
- yield self.issue_i
- yield self.go_wr_i
- yield self.go_rd_i
- yield self.go_die_i
- yield self.dest_rsel_o
- yield from self.src_rsel_o
- yield self.dest_fwd_o
- yield from self.src_fwd_o
- def ports(self):
- return list(self)
-def dcell_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_rd_i.eq(1)
- yield
- yield dut.go_rd_i.eq(0)
- yield
- yield dut.go_wr_i.eq(1)
- yield
- yield dut.go_wr_i.eq(0)
- yield
-def test_dcell():
- dut = DependencyRow(4, 2, True)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_drow.il", "w") as f:
- f.write(vl)
- run_simulation(dut, dcell_sim(dut), vcd_name='test_dcell.vcd')
-if __name__ == '__main__':
- test_dcell()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Cat, Array, Const, Elaboratable
-from nmigen.lib.coding import Decoder
-from nmutil.latch import SRLatch, latchregister
-from scoreboard.shadow import Shadow
-class FnUnit(Elaboratable):
- """ implements 11.4.8 function unit, p31
- also implements optional shadowing 11.5.1, p55
- shadowing can be used for branches as well as exceptions (interrupts),
- load/store hold (exceptions again), and vector-element predication
- (once the predicate is known, which it may not be at instruction issue)
- Inputs
- * :wid: register file width
- * :shadow_wid: number of shadow/fail/good/go_die sets
- * :n_dests: number of destination regfile(s) (index: rfile_sel_i)
- * :wr_pend: if true, writable observes the g_wr_pend_i vector
- otherwise observes g_rd_pend_i
- notes:
- * dest_i / src1_i / src2_i are in *binary*, whereas...
- * ...g_rd_pend_i / g_wr_pend_i and rd_pend_o / wr_pend_o are UNARY
- * req_rel_i (request release) is the direct equivalent of pipeline
- "output valid" (valid_o)
- * recover is a local python variable (actually go_die_o)
- * when shadow_wid = 0, recover and shadown are Consts (i.e. do nothing)
- * wr_pend is set False for the majority of uses: however for
- use in a STORE Function Unit it is set to True
- """
- def __init__(self, wid, shadow_wid=0, n_dests=1, wr_pend=False):
- self.reg_width = wid
- self.n_dests = n_dests
- self.shadow_wid = shadow_wid
- self.wr_pend = wr_pend
- # inputs
- if n_dests > 1:
- self.rfile_sel_i = Signal(max=n_dests, reset_less=True)
- else:
- self.rfile_sel_i = Const(0) # no selection. gets Array[0]
- self.dest_i = Signal(max=wid, reset_less=True) # Dest R# in (top)
- self.src1_i = Signal(max=wid, reset_less=True) # oper1 R# in (top)
- self.src2_i = Signal(max=wid, reset_less=True) # oper2 R# in (top)
- self.issue_i = Signal(reset_less=True) # Issue in (top)
- self.go_wr_i = Signal(reset_less=True) # Go Write in (left)
- self.go_rd_i = Signal(reset_less=True) # Go Read in (left)
- self.req_rel_i = Signal(reset_less=True) # request release (left)
- self.g_xx_pend_i = Array(Signal(wid, reset_less=True, name="g_pend_i") \
- for i in range(n_dests)) # global rd (right)
- self.g_wr_pend_i = Signal(wid, reset_less=True) # global wr (right)
- if shadow_wid:
- self.shadow_i = Signal(shadow_wid, reset_less=True)
- self.s_fail_i = Signal(shadow_wid, reset_less=True)
- self.s_good_i = Signal(shadow_wid, reset_less=True)
- self.go_die_o = Signal(reset_less=True)
- # outputs
- self.readable_o = Signal(reset_less=True) # Readable out (right)
- self.writable_o = Array(Signal(reset_less=True, name="writable_o") \
- for i in range(n_dests)) # writable out (right)
- self.busy_o = Signal(reset_less=True) # busy out (left)
- self.src1_pend_o = Signal(wid, reset_less=True) # src1 pending
- self.src2_pend_o = Signal(wid, reset_less=True) # src1 pending
- self.rd_pend_o = Signal(wid, reset_less=True) # rd pending (right)
- self.xx_pend_o = Array(Signal(wid, reset_less=True, name="pend_o") \
- for i in range(n_dests))# wr pending (right)
- def elaborate(self, platform):
- m = Module()
- m.submodules.rd_l = rd_l = SRLatch(sync=False)
- m.submodules.wr_l = wr_l = SRLatch(sync=False)
- m.submodules.dest_d = dest_d = Decoder(self.reg_width)
- m.submodules.src1_d = src1_d = Decoder(self.reg_width)
- m.submodules.src2_d = src2_d = Decoder(self.reg_width)
- # shadow / recover (optional: shadow_wid > 0)
- m.submodules.shadow = shadow = Shadow(self.shadow_wid)
- if self.shadow_wid:
- m.d.comb += shadow.issue_i.eq(self.issue_i)
- m.d.comb += shadow.s_fail_i.eq(self.s_fail_i)
- m.d.comb += shadow.s_good_i.eq(self.s_good_i)
- m.d.comb += shadow.shadow_i.eq(self.shadow_i)
- shadown = shadow.shadown_o
- recover = shadow.go_die_o
- # selector
- xx_pend_o = self.xx_pend_o[self.rfile_sel_i]
- writable_o = self.writable_o[self.rfile_sel_i]
- g_pend_i = self.g_xx_pend_i[self.rfile_sel_i]
- for i in range(self.n_dests):
- m.d.comb += self.xx_pend_o[i].eq(0) # initialise all array
- m.d.comb += self.writable_o[i].eq(0) # to zero
- m.d.comb += self.readable_o.eq(0) # to zero
- # go_wr latch: reset on go_wr HI, set on issue
- m.d.comb += wr_l.s.eq(self.issue_i)
- m.d.comb += wr_l.r.eq(self.go_wr_i | recover)
- # src1 latch: reset on go_rd HI, set on issue
- m.d.comb += rd_l.s.eq(self.issue_i)
- m.d.comb += rd_l.r.eq(self.go_rd_i | recover)
- # latch/registers for dest / src1 / src2
- dest_r = Signal(max=self.reg_width, reset_less=True)
- src1_r = Signal(max=self.reg_width, reset_less=True)
- src2_r = Signal(max=self.reg_width, reset_less=True)
- # XXX latch based on *issue* rather than !latch (as in book)
- latchregister(m, self.dest_i, dest_r, self.issue_i) #wr_l.qn)
- latchregister(m, self.src1_i, src1_r, self.issue_i) #wr_l.qn)
- latchregister(m, self.src2_i, src2_r, self.issue_i) #wr_l.qn)
- # dest decoder (use dest reg as input): write-pending out
- m.d.comb += dest_d.i.eq(dest_r)
- m.d.comb += dest_d.n.eq(wr_l.qn) # decode is inverted
- m.d.comb += self.busy_o.eq(wr_l.q) # busy if set
- m.d.comb += xx_pend_o.eq(dest_d.o)
- # src1/src2 decoder (use src1/2 regs as input): read-pending out
- m.d.comb += src1_d.i.eq(src1_r)
- m.d.comb += src1_d.n.eq(rd_l.qn) # decode is inverted
- m.d.comb += src2_d.i.eq(src2_r)
- m.d.comb += src2_d.n.eq(rd_l.qn) # decode is inverted
- m.d.comb += self.src1_pend_o.eq(src1_d.o)
- m.d.comb += self.src2_pend_o.eq(src2_d.o)
- m.d.comb += self.rd_pend_o.eq(src1_d.o | src2_d.o)
- # readable output signal
- g_rd = Signal(self.reg_width, reset_less=True)
- ro = Signal(reset_less=True)
- m.d.comb += g_rd.eq(~self.g_wr_pend_i & self.rd_pend_o)
- m.d.comb += ro.eq(~g_rd.bool())
- m.d.comb += self.readable_o.eq(ro)
- # writable output signal
- g_wr_v = Signal(self.reg_width, reset_less=True)
- g_wr = Signal(reset_less=True)
- wo = Signal(reset_less=True)
- m.d.comb += g_wr_v.eq(g_pend_i & xx_pend_o)
- m.d.comb += g_wr.eq(~g_wr_v.bool())
- m.d.comb += wo.eq(g_wr & rd_l.qn & self.req_rel_i & shadown)
- m.d.comb += writable_o.eq(wo)
- return m
- def __iter__(self):
- yield self.dest_i
- yield self.src1_i
- yield self.src2_i
- yield self.issue_i
- yield self.go_wr_i
- yield self.go_rd_i
- yield self.req_rel_i
- yield from self.g_xx_pend_i
- yield self.g_wr_pend_i
- yield self.readable_o
- yield from self.writable_o
- yield self.rd_pend_o
- yield from self.xx_pend_o
- def ports(self):
- return list(self)
-############# ###############
-# --- --- #
-# --- renamed / redirected from base class --- #
-# --- --- #
-# --- below are convenience classes which match the names --- #
-# --- of the various mitch alsup book chapter gate diagrams --- #
-# --- --- #
-############# ###############
-class IntFnUnit(FnUnit):
- def __init__(self, wid, shadow_wid=0):
- FnUnit.__init__(self, wid, shadow_wid)
- self.int_rd_pend_o = self.rd_pend_o
- self.int_wr_pend_o = self.xx_pend_o[0]
- self.g_int_wr_pend_i = self.g_wr_pend_i
- self.g_int_rd_pend_i = self.g_xx_pend_i[0]
- self.int_readable_o = self.readable_o
- self.int_writable_o = self.writable_o[0]
- self.int_rd_pend_o.name = "int_rd_pend_o"
- self.int_wr_pend_o.name = "int_wr_pend_o"
- self.g_int_rd_pend_i.name = "g_int_rd_pend_i"
- self.g_int_wr_pend_i.name = "g_int_wr_pend_i"
- self.int_readable_o.name = "int_readable_o"
- self.int_writable_o.name = "int_writable_o"
-class FPFnUnit(FnUnit):
- def __init__(self, wid, shadow_wid=0):
- FnUnit.__init__(self, wid, shadow_wid)
- self.fp_rd_pend_o = self.rd_pend_o
- self.fp_wr_pend_o = self.xx_pend_o[0]
- self.g_fp_wr_pend_i = self.g_wr_pend_i
- self.g_fp_rd_pend_i = self.g_xx_pend_i[0]
- self.fp_writable_o = self.writable_o[0]
- self.fp_readable_o = self.readable_o
- self.fp_rd_pend_o.name = "fp_rd_pend_o"
- self.fp_wr_pend_o.name = "fp_wr_pend_o"
- self.g_fp_rd_pend_i.name = "g_fp_rd_pend_i"
- self.g_fp_wr_pend_i.name = "g_fp_wr_pend_i"
- self.fp_writable_o.name = "fp_writable_o"
- self.fp_readable_o.name = "fp_readable_o"
-class LDFnUnit(FnUnit):
- """ number of dest selectors: 2. assumes len(int_regfile) == len(fp_regfile)
- * when rfile_sel_i == 0, int_wr_pend_o is set
- * when rfile_sel_i == 1, fp_wr_pend_o is set
- """
- def __init__(self, wid, shadow_wid=0):
- FnUnit.__init__(self, wid, shadow_wid, n_dests=2)
- self.int_rd_pend_o = self.rd_pend_o
- self.int_wr_pend_o = self.xx_pend_o[0]
- self.fp_wr_pend_o = self.xx_pend_o[1]
- self.g_int_wr_pend_i = self.g_wr_pend_i
- self.g_int_rd_pend_i = self.g_xx_pend_i[0]
- self.g_fp_rd_pend_i = self.g_xx_pend_i[1]
- self.int_readable_o = self.readable_o
- self.int_writable_o = self.writable_o[0]
- self.fp_writable_o = self.writable_o[1]
- self.int_rd_pend_o.name = "int_rd_pend_o"
- self.int_wr_pend_o.name = "int_wr_pend_o"
- self.fp_wr_pend_o.name = "fp_wr_pend_o"
- self.g_int_wr_pend_i.name = "g_int_wr_pend_i"
- self.g_int_rd_pend_i.name = "g_int_rd_pend_i"
- self.g_fp_rd_pend_i.name = "g_fp_rd_pend_i"
- self.int_readable_o.name = "int_readable_o"
- self.int_writable_o.name = "int_writable_o"
- self.fp_writable_o.name = "fp_writable_o"
-class STFnUnit(FnUnit):
- """ number of dest selectors: 2. assumes len(int_regfile) == len(fp_regfile)
- * wr_pend=False indicates to observe global fp write pending
- * when rfile_sel_i == 0, int_wr_pend_o is set
- * when rfile_sel_i == 1, fp_wr_pend_o is set
- *
- """
- def __init__(self, wid, shadow_wid=0):
- FnUnit.__init__(self, wid, shadow_wid, n_dests=2, wr_pend=True)
- self.int_rd_pend_o = self.rd_pend_o # 1st int read-pending vector
- self.int2_rd_pend_o = self.xx_pend_o[0] # 2nd int read-pending vector
- self.fp_rd_pend_o = self.xx_pend_o[1] # 1x FP read-pending vector
- # yes overwrite FnUnit base class g_wr_pend_i vector
- self.g_int_wr_pend_i = self.g_wr_pend_i = self.g_xx_pend_i[0]
- self.g_fp_wr_pend_i = self.g_xx_pend_i[1]
- self.int_readable_o = self.readable_o
- self.int_writable_o = self.writable_o[0]
- self.fp_writable_o = self.writable_o[1]
- self.int_rd_pend_o.name = "int_rd_pend_o"
- self.int2_rd_pend_o.name = "int2_rd_pend_o"
- self.fp_rd_pend_o.name = "fp_rd_pend_o"
- self.g_int_wr_pend_i.name = "g_int_wr_pend_i"
- self.g_fp_wr_pend_i.name = "g_fp_wr_pend_i"
- self.int_readable_o.name = "int_readable_o"
- self.int_writable_o.name = "int_writable_o"
- self.fp_writable_o.name = "fp_writable_o"
-def int_fn_unit_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_rd_i.eq(1)
- yield
- yield dut.go_rd_i.eq(0)
- yield
- yield dut.go_wr_i.eq(1)
- yield
- yield dut.go_wr_i.eq(0)
- yield
-def test_int_fn_unit():
- dut = FnUnit(32, 2, 2)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_fn_unit.il", "w") as f:
- f.write(vl)
- dut = LDFnUnit(32, 2)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_ld_fn_unit.il", "w") as f:
- f.write(vl)
- dut = STFnUnit(32, 0)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_st_fn_unit.il", "w") as f:
- f.write(vl)
- run_simulation(dut, int_fn_unit_sim(dut), vcd_name='test_fn_unit.vcd')
-if __name__ == '__main__':
- test_int_fn_unit()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Const, Elaboratable
-from nmutil.latch import SRLatch
-class FUDependenceCell(Elaboratable):
- """ implements 11.4.7 mitch alsup dependence cell, p27
- """
- def __init__(self, dummy, n_fu=1):
- self.n_fu = n_fu
- self.dummy = Const(~(1<<dummy), n_fu)
- # inputs
- self.rd_pend_i = Signal(n_fu, reset_less=True) # read pend in (left)
- self.wr_pend_i = Signal(n_fu, reset_less=True) # write pend in (left)
- self.issue_i = Signal(n_fu, reset_less=True) # Issue in (top)
- self.go_wr_i = Signal(n_fu, reset_less=True) # Go Write in (left)
- self.go_rd_i = Signal(n_fu, reset_less=True) # Go Read in (left)
- self.go_die_i = Signal(n_fu, reset_less=True) # Go Die in (left)
- # outputs (latched rd/wr wait)
- self.rd_wait_o = Signal(n_fu, reset_less=True) # read wait out (right)
- self.wr_wait_o = Signal(n_fu, reset_less=True) # write wait out (right)
- def elaborate(self, platform):
- m = Module()
- m.submodules.rd_c = rd_c = SRLatch(sync=False, llen=self.n_fu)
- m.submodules.wr_c = wr_c = SRLatch(sync=False, llen=self.n_fu)
- # reset on go HI, set on dest and issue
- m.d.comb += rd_c.s.eq(self.issue_i & self.rd_pend_i)
- m.d.comb += wr_c.s.eq(self.issue_i & self.wr_pend_i)
- # connect go_rd / go_wr
- m.d.comb += wr_c.r.eq(self.go_wr_i | self.go_die_i)
- m.d.comb += rd_c.r.eq(self.go_rd_i | self.go_die_i)
- # connect pend_i
- m.d.comb += rd_c.s.eq(self.issue_i & self.rd_pend_i & self.dummy)
- m.d.comb += wr_c.s.eq(self.issue_i & self.wr_pend_i & self.dummy)
- # connect output
- m.d.comb += self.rd_wait_o.eq(rd_c.qlq & ~self.issue_i)
- m.d.comb += self.wr_wait_o.eq(wr_c.qlq & ~self.issue_i)
- return m
- def __iter__(self):
- yield self.rd_pend_i
- yield self.wr_pend_i
- yield self.issue_i
- yield self.go_wr_i
- yield self.go_rd_i
- yield self.go_die_i
- yield self.rd_wait_o
- yield self.wr_wait_o
- def ports(self):
- return list(self)
-def dcell_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_rd_i.eq(1)
- yield
- yield dut.go_rd_i.eq(0)
- yield
- yield dut.go_wr_i.eq(1)
- yield
- yield dut.go_wr_i.eq(0)
- yield
-def test_dcell():
- dut = FUDependenceCell(dummy=0, n_fu=4)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_fu_dcell.il", "w") as f:
- f.write(vl)
- run_simulation(dut, dcell_sim(dut), vcd_name='test_fu_dcell.vcd')
-if __name__ == '__main__':
- test_dcell()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable, Array, Cat, Const
-from .fu_dep_cell import FUDependenceCell
-from .fu_picker_vec import FU_Pick_Vec
- 6600 Function Unit Dependency Table Matrix inputs / outputs
- -----------------------------------------------------------
-class FUFUDepMatrix(Elaboratable):
- """ implements 11.4.7 mitch alsup FU-to-Reg Dependency Matrix, p26
- """
- def __init__(self, n_fu_row, n_fu_col):
- self.n_fu_row = n_fu_row # Y (FU row#) ^v
- self.n_fu_col = n_fu_col # X (FU col #) <>
- self.rd_pend_i = Signal(n_fu_row, reset_less=True) # Rd pending (left)
- self.wr_pend_i = Signal(n_fu_row, reset_less=True) # Wr pending (left)
- self.issue_i = Signal(n_fu_col, reset_less=True) # Issue in (top)
- self.go_wr_i = Signal(n_fu_row, reset_less=True) # Go Write in (left)
- self.go_rd_i = Signal(n_fu_row, reset_less=True) # Go Read in (left)
- self.go_die_i = Signal(n_fu_row, reset_less=True) # Go Die in (left)
- # for Function Unit Readable/Writable (horizontal)
- self.readable_o = Signal(n_fu_col, reset_less=True) # readable (bot)
- self.writable_o = Signal(n_fu_col, reset_less=True) # writable (bot)
- def elaborate(self, platform):
- m = Module()
- # ---
- # matrix of dependency cells
- # ---
- dm = Array(FUDependenceCell(f, self.n_fu_col) \
- for f in range(self.n_fu_row))
- for y in range(self.n_fu_row):
- setattr(m.submodules, "dm%d" % y, dm[y])
- # ---
- # array of Function Unit Readable/Writable: row-length, horizontal
- # ---
- fur = Array(FU_Pick_Vec(self.n_fu_row) for r in range(self.n_fu_col))
- for x in range(self.n_fu_col):
- setattr(m.submodules, "fur_x%d" % (x), fur[x])
- # ---
- # connect FU Readable/Writable vector
- # ---
- readable = []
- writable = []
- for y in range(self.n_fu_row):
- fu = fur[y]
- # accumulate Readable/Writable Vector outputs
- readable.append(fu.readable_o)
- writable.append(fu.writable_o)
- # ... and output them from this module (horizontal, width=REGs)
- m.d.comb += self.readable_o.eq(Cat(*readable))
- m.d.comb += self.writable_o.eq(Cat(*writable))
- # ---
- # connect FU Pending
- # ---
- for y in range(self.n_fu_row):
- dc = dm[y]
- fu = fur[y]
- # connect cell reg-select outputs to Reg Vector In
- m.d.comb += [fu.rd_pend_i.eq(dc.rd_wait_o),
- fu.wr_pend_i.eq(dc.wr_wait_o),
- ]
- # ---
- # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i
- # ---
- for x in range(self.n_fu_col):
- issue_i = []
- for y in range(self.n_fu_row):
- dc = dm[y]
- # accumulate cell inputs issue
- issue_i.append(dc.issue_i[x])
- # wire up inputs from module to row cell inputs
- m.d.comb += Cat(*issue_i).eq(self.issue_i)
- # ---
- # connect Matrix go_rd_i/go_wr_i to module readable/writable
- # ---
- for y in range(self.n_fu_row):
- dc = dm[y]
- # wire up inputs from module to row cell inputs
- m.d.comb += [dc.go_rd_i.eq(self.go_rd_i),
- dc.go_wr_i.eq(self.go_wr_i),
- dc.go_die_i.eq(self.go_die_i),
- ]
- # ---
- # connect Matrix pending
- # ---
- for y in range(self.n_fu_row):
- dc = dm[y]
- # wire up inputs from module to row cell inputs
- m.d.comb += [dc.rd_pend_i.eq(self.rd_pend_i),
- dc.wr_pend_i.eq(self.wr_pend_i),
- ]
- return m
- def __iter__(self):
- yield self.rd_pend_i
- yield self.wr_pend_i
- yield self.issue_i
- yield self.go_wr_i
- yield self.go_rd_i
- yield self.readable_o
- yield self.writable_o
- def ports(self):
- return list(self)
-def d_matrix_sim(dut):
- """ XXX TODO
- """
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_rd_i.eq(1)
- yield
- yield dut.go_rd_i.eq(0)
- yield
- yield dut.go_wr_i.eq(1)
- yield
- yield dut.go_wr_i.eq(0)
- yield
-def test_fu_fu_matrix():
- dut = FUFUDepMatrix(n_fu_row=3, n_fu_col=4)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_fu_fu_matrix.il", "w") as f:
- f.write(vl)
- run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_fu_matrix.vcd')
-if __name__ == '__main__':
- test_fu_fu_matrix()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable, Array, Cat, Const
-from scoreboard.fumem_dep_cell import FUMemDependenceCell
-from scoreboard.fu_mem_picker_vec import FUMem_Pick_Vec
- 6600 Function Unit Dependency Table Matrix inputs / outputs
- -----------------------------------------------------------
-class FUMemDepMatrix(Elaboratable):
- """ implements FU-to-FU Memory Dependency Matrix
- """
- def __init__(self, n_fu_row, n_fu_col):
- self.n_fu_row = n_fu_row # Y (FU row#) ^v
- self.n_fu_col = n_fu_col # X (FU col #) <>
- self.st_pend_i = Signal(n_fu_row, reset_less=True) # Rd pending (left)
- self.ld_pend_i = Signal(n_fu_row, reset_less=True) # Wr pending (left)
- self.issue_i = Signal(n_fu_col, reset_less=True) # Issue in (top)
- self.go_ld_i = Signal(n_fu_row, reset_less=True) # Go Write in (left)
- self.go_st_i = Signal(n_fu_row, reset_less=True) # Go Read in (left)
- self.go_die_i = Signal(n_fu_row, reset_less=True) # Go Die in (left)
- # for Function Unit Readable/Writable (horizontal)
- self.storable_o = Signal(n_fu_col, reset_less=True) # storable (bot)
- self.loadable_o = Signal(n_fu_col, reset_less=True) # loadable (bot)
- def elaborate(self, platform):
- m = Module()
- # ---
- # matrix of dependency cells
- # ---
- dm = Array(FUMemDependenceCell(f, self.n_fu_col) \
- for f in range(self.n_fu_row))
- for y in range(self.n_fu_row):
- setattr(m.submodules, "dm%d" % y, dm[y])
- # ---
- # array of Function Unit Readable/Writable: row-length, horizontal
- # ---
- fur = Array(FUMem_Pick_Vec(self.n_fu_row) for r in range(self.n_fu_col))
- for x in range(self.n_fu_col):
- setattr(m.submodules, "fur_x%d" % (x), fur[x])
- # ---
- # connect FU Readable/Writable vector
- # ---
- storable = []
- loadable = []
- for y in range(self.n_fu_row):
- fu = fur[y]
- # accumulate Readable/Writable Vector outputs
- storable.append(fu.storable_o)
- loadable.append(fu.loadable_o)
- # ... and output them from this module (horizontal, width=REGs)
- m.d.comb += self.storable_o.eq(Cat(*storable))
- m.d.comb += self.loadable_o.eq(Cat(*loadable))
- # ---
- # connect FU Pending
- # ---
- for y in range(self.n_fu_row):
- dc = dm[y]
- fu = fur[y]
- # connect cell reg-select outputs to Reg Vector In
- m.d.comb += [fu.st_pend_i.eq(dc.st_wait_o),
- fu.ld_pend_i.eq(dc.ld_wait_o),
- ]
- # ---
- # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i
- # ---
- for x in range(self.n_fu_col):
- issue_i = []
- for y in range(self.n_fu_row):
- dc = dm[y]
- # accumulate cell inputs issue
- issue_i.append(dc.issue_i[x])
- # wire up inputs from module to row cell inputs
- m.d.comb += Cat(*issue_i).eq(self.issue_i)
- # ---
- # connect Matrix go_st_i/go_ld_i to module storable/loadable
- # ---
- for y in range(self.n_fu_row):
- dc = dm[y]
- # wire up inputs from module to row cell inputs
- m.d.comb += [dc.go_st_i.eq(self.go_st_i),
- dc.go_ld_i.eq(self.go_ld_i),
- dc.go_die_i.eq(self.go_die_i),
- ]
- # ---
- # connect Matrix pending
- # ---
- for y in range(self.n_fu_row):
- dc = dm[y]
- # wire up inputs from module to row cell inputs
- m.d.comb += [dc.st_pend_i.eq(self.st_pend_i),
- dc.ld_pend_i.eq(self.ld_pend_i),
- ]
- return m
- def __iter__(self):
- yield self.st_pend_i
- yield self.ld_pend_i
- yield self.issue_i
- yield self.go_ld_i
- yield self.go_st_i
- yield self.storable_o
- yield self.loadable_o
- def ports(self):
- return list(self)
-def d_matrix_sim(dut):
- """ XXX TODO
- """
- yield dut.ld_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.st_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_st_i.eq(1)
- yield
- yield dut.go_st_i.eq(0)
- yield
- yield dut.go_ld_i.eq(1)
- yield
- yield dut.go_ld_i.eq(0)
- yield
-def test_fu_fu_matrix():
- dut = FUMemDepMatrix(n_fu_row=3, n_fu_col=3)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_fu_mem_matrix.il", "w") as f:
- f.write(vl)
- run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_mem_matrix.vcd')
-if __name__ == '__main__':
- test_fu_fu_matrix()
+++ /dev/null
-from nmigen import Elaboratable, Module, Signal, Cat
-class FUMem_Pick_Vec(Elaboratable):
- """ these are allocated per-FU (horizontally),
- and are of length fu_row_n
- """
- def __init__(self, fu_row_n):
- self.fu_row_n = fu_row_n
- self.st_pend_i = Signal(fu_row_n, reset_less=True)
- self.ld_pend_i = Signal(fu_row_n, reset_less=True)
- self.storable_o = Signal(reset_less=True)
- self.loadable_o = Signal(reset_less=True)
- def elaborate(self, platform):
- m = Module()
- # Readable if there are no writes pending
- m.d.comb += self.storable_o.eq(~self.ld_pend_i.bool())
- # Writable if there are no reads pending
- m.d.comb += self.loadable_o.eq(~self.st_pend_i.bool())
- return m
+++ /dev/null
-from nmigen import Elaboratable, Module, Signal, Cat
-class FU_Pick_Vec(Elaboratable):
- """ these are allocated per-FU (horizontally),
- and are of length fu_row_n
- """
- def __init__(self, fu_row_n):
- self.fu_row_n = fu_row_n
- self.rd_pend_i = Signal(fu_row_n, reset_less=True)
- self.wr_pend_i = Signal(fu_row_n, reset_less=True)
- self.readable_o = Signal(reset_less=True)
- self.writable_o = Signal(reset_less=True)
- def elaborate(self, platform):
- m = Module()
- # Readable if there are no writes pending
- m.d.comb += self.readable_o.eq(~self.wr_pend_i.bool())
- # Writable if there are no reads pending
- m.d.comb += self.writable_o.eq(~self.rd_pend_i.bool())
- return m
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable, Array, Cat, Repl
-from scoreboard.dependence_cell import DependencyRow
-from scoreboard.fu_wr_pending import FU_RW_Pend
-from scoreboard.reg_select import Reg_Rsv
-from scoreboard.global_pending import GlobalPending
- 6600 Dependency Table Matrix inputs / outputs
- ---------------------------------------------
- d s1 s2 i d s1 s2 i d s1 s2 i d s1 s2 i
- | | | | | | | | | | | | | | | |
- v v v v v v v v v v v v v v v v
- go_rd/go_wr -> dm-r0-fu0 dm-r1-fu0 dm-r2-fu0 dm-r3-fu0 -> wr/rd-pend
- go_rd/go_wr -> dm-r0-fu1 dm-r1-fu1 dm-r2-fu1 dm-r3-fu1 -> wr/rd-pend
- go_rd/go_wr -> dm-r0-fu2 dm-r1-fu2 dm-r2-fu2 dm-r3-fu2 -> wr/rd-pend
- | | | | | | | | | | | |
- v v v v v v v v v v v v
- d s1 s2 d s1 s2 d s1 s2 d s1 s2
- reg sel reg sel reg sel reg sel
-class FURegDepMatrix(Elaboratable):
- """ implements 11.4.7 mitch alsup FU-to-Reg Dependency Matrix, p26
- """
- def __init__(self, n_fu_row, n_reg_col, n_src, cancel=None):
- self.n_src = n_src
- self.n_fu_row = nf = n_fu_row # Y (FUs) ^v
- self.n_reg_col = n_reg = n_reg_col # X (Regs) <>
- # arrays
- src = []
- rsel = []
- for i in range(n_src):
- j = i + 1 # name numbering to match src1/src2
- src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
- rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
- pend = []
- for i in range(nf):
- j = i + 1 # name numbering to match src1/src2
- pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
- self.dest_i = Signal(n_reg_col, reset_less=True) # Dest in (top)
- self.src_i = Array(src) # oper in (top)
- # cancellation array (from Address Matching), ties in with go_die_i
- self.cancel = cancel
- # Register "Global" vectors for determining RaW and WaR hazards
- self.wr_pend_i = Signal(n_reg_col, reset_less=True) # wr pending (top)
- self.rd_pend_i = Signal(n_reg_col, reset_less=True) # rd pending (top)
- self.v_wr_rsel_o = Signal(n_reg_col, reset_less=True) # wr pending (bot)
- self.v_rd_rsel_o = Signal(n_reg_col, reset_less=True) # rd pending (bot)
- self.issue_i = Signal(n_fu_row, reset_less=True) # Issue in (top)
- self.go_wr_i = Signal(n_fu_row, reset_less=True) # Go Write in (left)
- self.go_rd_i = Signal(n_fu_row, reset_less=True) # Go Read in (left)
- self.go_die_i = Signal(n_fu_row, reset_less=True) # Go Die in (left)
- # for Register File Select Lines (horizontal), per-reg
- self.dest_rsel_o = Signal(n_reg_col, reset_less=True) # dest reg (bot)
- self.src_rsel_o = Array(rsel) # src reg (bot)
- # for Function Unit "forward progress" (vertical), per-FU
- self.wr_pend_o = Signal(n_fu_row, reset_less=True) # wr pending (right)
- self.rd_pend_o = Signal(n_fu_row, reset_less=True) # rd pending (right)
- self.rd_src_pend_o = Array(pend) # src1 pending
- def elaborate(self, platform):
- m = Module()
- return self._elaborate(m, platform)
- def _elaborate(self, m, platform):
- # ---
- # matrix of dependency cells
- # ---
- cancel_mode = self.cancel is not None
- dm = Array(DependencyRow(self.n_reg_col, self.n_src, cancel_mode) \
- for r in range(self.n_fu_row))
- for fu in range(self.n_fu_row):
- setattr(m.submodules, "dr_fu%d" % fu, dm[fu])
- # ---
- # array of Function Unit Pending vectors
- # ---
- fupend = Array(FU_RW_Pend(self.n_reg_col, self.n_src) \
- for f in range(self.n_fu_row))
- for fu in range(self.n_fu_row):
- setattr(m.submodules, "fu_fu%d" % (fu), fupend[fu])
- # ---
- # array of Register Reservation vectors
- # ---
- regrsv = Array(Reg_Rsv(self.n_fu_row, self.n_src) \
- for r in range(self.n_reg_col))
- for rn in range(self.n_reg_col):
- setattr(m.submodules, "rr_r%d" % (rn), regrsv[rn])
- # ---
- # connect Function Unit vector
- # ---
- wr_pend = []
- rd_pend = []
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- fup = fupend[fu]
- dest_fwd_o = []
- for rn in range(self.n_reg_col):
- # accumulate cell fwd outputs for dest/src1/src2
- dest_fwd_o.append(dc.dest_fwd_o[rn])
- # connect cell fwd outputs to FU Vector in [Cat is gooood]
- m.d.comb += [fup.dest_fwd_i.eq(Cat(*dest_fwd_o)),
- ]
- # accumulate FU Vector outputs
- wr_pend.append(fup.reg_wr_pend_o)
- rd_pend.append(fup.reg_rd_pend_o)
- # ... and output them from this module (vertical, width=FUs)
- m.d.comb += self.wr_pend_o.eq(Cat(*wr_pend))
- m.d.comb += self.rd_pend_o.eq(Cat(*rd_pend))
- # same for src
- for i in range(self.n_src):
- rd_src_pend = []
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- fup = fupend[fu]
- src_fwd_o = []
- for rn in range(self.n_reg_col):
- # accumulate cell fwd outputs for dest/src1/src2
- src_fwd_o.append(dc.src_fwd_o[i][rn])
- # connect cell fwd outputs to FU Vector in [Cat is gooood]
- m.d.comb += [fup.src_fwd_i[i].eq(Cat(*src_fwd_o)),
- ]
- # accumulate FU Vector outputs
- rd_src_pend.append(fup.reg_rd_src_pend_o[i])
- # ... and output them from this module (vertical, width=FUs)
- m.d.comb += self.rd_src_pend_o[i].eq(Cat(*rd_src_pend))
- # ---
- # connect Reg Selection vector
- # ---
- dest_rsel = []
- for rn in range(self.n_reg_col):
- rsv = regrsv[rn]
- dest_rsel_o = []
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- # accumulate cell reg-select outputs dest/src1/src2
- dest_rsel_o.append(dc.dest_rsel_o[rn])
- # connect cell reg-select outputs to Reg Vector In
- m.d.comb += rsv.dest_rsel_i.eq(Cat(*dest_rsel_o)),
- # accumulate Reg-Sel Vector outputs
- dest_rsel.append(rsv.dest_rsel_o)
- # ... and output them from this module (horizontal, width=REGs)
- m.d.comb += self.dest_rsel_o.eq(Cat(*dest_rsel))
- # same for src
- for i in range(self.n_src):
- src_rsel = []
- for rn in range(self.n_reg_col):
- rsv = regrsv[rn]
- src_rsel_o = []
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- # accumulate cell reg-select outputs dest/src1/src2
- src_rsel_o.append(dc.src_rsel_o[i][rn])
- # connect cell reg-select outputs to Reg Vector In
- m.d.comb += rsv.src_rsel_i[i].eq(Cat(*src_rsel_o)),
- # accumulate Reg-Sel Vector outputs
- src_rsel.append(rsv.src_rsel_o[i])
- # ... and output them from this module (horizontal, width=REGs)
- m.d.comb += self.src_rsel_o[i].eq(Cat(*src_rsel))
- # ---
- # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i
- # ---
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- # wire up inputs from module to row cell inputs (Cat is gooood)
- m.d.comb += [dc.dest_i.eq(self.dest_i),
- dc.rd_pend_i.eq(self.rd_pend_i),
- dc.wr_pend_i.eq(self.wr_pend_i),
- ]
- # same for src
- for i in range(self.n_src):
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- # wire up inputs from module to row cell inputs (Cat is gooood)
- m.d.comb += dc.src_i[i].eq(self.src_i[i])
- # accumulate rsel bits into read/write pending vectors.
- rd_pend_v = []
- wr_pend_v = []
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- rd_pend_v.append(dc.v_rd_rsel_o)
- wr_pend_v.append(dc.v_wr_rsel_o)
- rd_v = GlobalPending(self.n_reg_col, rd_pend_v)
- wr_v = GlobalPending(self.n_reg_col, wr_pend_v)
- m.submodules.rd_v = rd_v
- m.submodules.wr_v = wr_v
- m.d.comb += self.v_rd_rsel_o.eq(rd_v.g_pend_o)
- m.d.comb += self.v_wr_rsel_o.eq(wr_v.g_pend_o)
- # ---
- # connect Dep issue_i/go_rd_i/go_wr_i to module issue_i/go_rd/go_wr
- # ---
- go_rd_i = []
- go_wr_i = []
- issue_i = []
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- # accumulate cell fwd outputs for dest/src1/src2
- go_rd_i.append(dc.go_rd_i)
- go_wr_i.append(dc.go_wr_i)
- issue_i.append(dc.issue_i)
- # wire up inputs from module to row cell inputs (Cat is gooood)
- m.d.comb += [Cat(*go_rd_i).eq(self.go_rd_i),
- Cat(*go_wr_i).eq(self.go_wr_i),
- Cat(*issue_i).eq(self.issue_i),
- ]
- # ---
- # connect Dep go_die_i
- # ---
- if cancel_mode:
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- go_die = Repl(self.go_die_i[fu], self.n_fu_row)
- go_die = go_die | self.cancel[fu]
- m.d.comb += dc.go_die_i.eq(go_die)
- else:
- go_die_i = []
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- # accumulate cell fwd outputs for dest/src1/src2
- go_die_i.append(dc.go_die_i)
- # wire up inputs from module to row cell inputs (Cat is gooood)
- m.d.comb += Cat(*go_die_i).eq(self.go_die_i)
- return m
- def __iter__(self):
- yield self.dest_i
- yield from self.src_i
- yield self.issue_i
- yield self.go_wr_i
- yield self.go_rd_i
- yield self.go_die_i
- yield self.dest_rsel_o
- yield from self.src_rsel_o
- yield self.wr_pend_o
- yield self.rd_pend_o
- yield self.wr_pend_i
- yield self.rd_pend_i
- yield self.v_wr_rsel_o
- yield self.v_rd_rsel_o
- yield from self.rd_src_pend_o
- def ports(self):
- return list(self)
-def d_matrix_sim(dut):
- """ XXX TODO
- """
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_rd_i.eq(1)
- yield
- yield dut.go_rd_i.eq(0)
- yield
- yield dut.go_wr_i.eq(1)
- yield
- yield dut.go_wr_i.eq(0)
- yield
-def test_d_matrix():
- dut = FURegDepMatrix(n_fu_row=3, n_reg_col=4, n_src=2)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_fu_reg_matrix.il", "w") as f:
- f.write(vl)
- run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_reg_matrix.vcd')
-if __name__ == '__main__':
- test_d_matrix()
+++ /dev/null
-from nmigen import Elaboratable, Module, Signal, Array
-class FU_RW_Pend(Elaboratable):
- """ these are allocated per-FU (horizontally),
- and are of length reg_count
- """
- def __init__(self, reg_count, n_src):
- self.n_src = n_src
- self.reg_count = reg_count
- self.dest_fwd_i = Signal(reg_count, reset_less=True)
- src = []
- for i in range(n_src):
- j = i + 1 # name numbering to match src1/src2
- src.append(Signal(reg_count, name="src%d" % j, reset_less=True))
- self.src_fwd_i = Array(src)
- self.reg_wr_pend_o = Signal(reset_less=True)
- self.reg_rd_pend_o = Signal(reset_less=True)
- self.reg_rd_src_pend_o = Signal(n_src, reset_less=True)
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.reg_wr_pend_o.eq(self.dest_fwd_i.bool())
- for i in range(self.n_src):
- m.d.comb += self.reg_rd_src_pend_o[i].eq(self.src_fwd_i[i].bool())
- m.d.comb += self.reg_rd_pend_o.eq(self.reg_rd_src_pend_o.bool())
- return m
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Const, Elaboratable
-from nmutil.latch import SRLatch
-class FUMemDependenceCell(Elaboratable):
- """ implements 11.4.7 mitch alsup dependence cell, p27
- """
- def __init__(self, dummy, n_fu=1):
- self.n_fu = n_fu
- self.dummy = Const(~(1<<dummy), n_fu)
- # inputs
- self.st_pend_i = Signal(n_fu, reset_less=True) # read pend in (left)
- self.ld_pend_i = Signal(n_fu, reset_less=True) # write pend in (left)
- self.issue_i = Signal(n_fu, reset_less=True) # Issue in (top)
- self.go_ld_i = Signal(n_fu, reset_less=True) # Go Write in (left)
- self.go_st_i = Signal(n_fu, reset_less=True) # Go Read in (left)
- self.go_die_i = Signal(n_fu, reset_less=True) # Go Die in (left)
- # outputs (latched rd/wr wait)
- self.st_wait_o = Signal(n_fu, reset_less=True) # read wait out (right)
- self.ld_wait_o = Signal(n_fu, reset_less=True) # write wait out (right)
- def elaborate(self, platform):
- m = Module()
- m.submodules.st_c = st_c = SRLatch(sync=False, llen=self.n_fu)
- m.submodules.ld_c = ld_c = SRLatch(sync=False, llen=self.n_fu)
- # reset on go HI, set on dest and issue
- m.d.comb += st_c.s.eq(self.issue_i & self.st_pend_i)
- m.d.comb += ld_c.s.eq(self.issue_i & self.ld_pend_i)
- # connect go_rd / go_wr
- m.d.comb += ld_c.r.eq(self.go_ld_i | self.go_die_i)
- m.d.comb += st_c.r.eq(self.go_st_i | self.go_die_i)
- # connect pend_i
- m.d.comb += st_c.s.eq(self.issue_i & self.st_pend_i & self.dummy)
- m.d.comb += ld_c.s.eq(self.issue_i & self.ld_pend_i & self.dummy)
- # connect output
- m.d.comb += self.st_wait_o.eq(st_c.qlq & ~self.issue_i)
- m.d.comb += self.ld_wait_o.eq(ld_c.qlq & ~self.issue_i)
- return m
- def __iter__(self):
- yield self.st_pend_i
- yield self.ld_pend_i
- yield self.issue_i
- yield self.go_ld_i
- yield self.go_st_i
- yield self.go_die_i
- yield self.st_wait_o
- yield self.ld_wait_o
- def ports(self):
- return list(self)
-def dcell_sim(dut):
- yield dut.ld_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.st_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_st_i.eq(1)
- yield
- yield dut.go_st_i.eq(0)
- yield
- yield dut.go_ld_i.eq(1)
- yield
- yield dut.go_ld_i.eq(0)
- yield
-def test_dcell():
- dut = FUMemDependenceCell(dummy=0, n_fu=4)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_fumem_dcell.il", "w") as f:
- f.write(vl)
- run_simulation(dut, dcell_sim(dut), vcd_name='test_fumem_dcell.vcd')
-if __name__ == '__main__':
- test_dcell()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Cat, Elaboratable
-class GlobalPending(Elaboratable):
- """ implements Global Pending Vector, basically ORs all incoming Function
- Unit vectors together. Can be used for creating Read or Write Global
- Pending. Can be used for INT or FP Global Pending.
- Inputs:
- * :dep: register file depth
- * :fu_vecs: a python list of function unit "pending" vectors, each
- vector being a Signal of width equal to the reg file.
- Notes:
- * the regfile may be Int or FP, this code doesn't care which.
- obviously do not try to put in a mixture of regfiles into fu_vecs.
- * this code also doesn't care if it's used for Read Pending or Write
- pending, it can be used for both: again, obviously, do not try to
- put in a mixture of read *and* write pending vectors in.
- * if some Function Units happen not to be uniform (don't operate
- on a particular register (extremely unusual), they must set a Const
- zero bit in the vector.
- """
- def __init__(self, dep, fu_vecs, sync=False):
- self.reg_dep = dep
- # inputs
- self.fu_vecs = fu_vecs
- self.sync = sync
- for v in fu_vecs:
- assert len(v) == dep, "FU Vector must be same width as regfile"
- self.g_pend_o = Signal(dep, reset_less=True) # global pending vector
- def elaborate(self, platform):
- m = Module()
- pend_l = []
- for i in range(self.reg_dep): # per-register
- vec_bit_l = []
- for v in self.fu_vecs:
- vec_bit_l.append(v[i]) # fu bit for same register
- pend_l.append(Cat(*vec_bit_l).bool()) # OR all bits for same reg
- if self.sync:
- m.d.sync += self.g_pend_o.eq(Cat(*pend_l)) # merge all OR'd bits
- else:
- m.d.comb += self.g_pend_o.eq(Cat(*pend_l)) # merge all OR'd bits
- return m
- def __iter__(self):
- yield from self.fu_vecs
- yield self.g_pend_o
- def ports(self):
- return list(self)
-def g_vec_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_rd_i.eq(1)
- yield
- yield dut.go_rd_i.eq(0)
- yield
- yield dut.go_wr_i.eq(1)
- yield
- yield dut.go_wr_i.eq(0)
- yield
-def test_g_vec():
- vecs = []
- for i in range(3):
- vecs.append(Signal(32, name="fu%d" % i))
- dut = GlobalPending(32, vecs)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_global_pending.il", "w") as f:
- f.write(vl)
- run_simulation(dut, g_vec_sim(dut), vcd_name='test_global_pending.vcd')
-if __name__ == '__main__':
- test_g_vec()
+++ /dev/null
-""" Group Picker: to select an instruction that is permitted to read (or write)
- based on the Function Unit expressing a *desire* to read (or write).
- The job of the Group Picker is extremely simple yet extremely important.
- It sits in front of a register file port (read or write) and stops it from
- being corrupted. It's a "port contention selector", basically.
- The way it works is:
- * Function Units need to read from (or write to) the register file,
- in order to get (or store) their operands, so they each have a signal,
- readable (or writable), which "expresses" this need. This is an
- *unary* encoding.
- * The Function Units also have a signal which indicates that they
- are requesting "release" of the register file port (this because
- in the scoreboard, readable/writable can be permanently HI even
- if the FU is idle, whereas the "release" signal is very specifically
- only HI if the read (or write) latch is still active)
- * The Group Picker takes this unary encoding of the desire to read
- (or write) and, on a priority basis, activates one *and only* one
- of those signals, again as an unary output.
- * Due to the way that the Computation Unit works, that signal (Go_Read
- or Go_Write) will fire for one (and only one) cycle, and can be used
- to enable the register file port read (or write) lines. The Go_Read/Wr
- signal basically loops back to the Computation Unit and resets the
- "desire-to-read/write-expressing" latch.
- In theory (and in practice!) the following is possible:
- * Separate src1 and src2 Group Pickers. This would allow instructions
- with only one operand to read to not block up other instructions,
- and it would also allow 3-operand instructions to be interleaved
- with 1 and 2 operand instructions.
- * *Multiple* Group Pickers (multi-issue). This would require
- a corresponding increase in the number of register file ports,
- either 4R2W (or more) or by "striping" the register file into
- split banks (a strategy best deployed on Vector Processors)
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable
-from nmutil.picker import PriorityPicker
-class GroupPicker(Elaboratable):
- """ implements 10.5 mitch alsup group picker, p27
- """
- def __init__(self, wid):
- self.gp_wid = wid
- # inputs
- self.readable_i = Signal(wid, reset_less=True) # readable in (top)
- self.writable_i = Signal(wid, reset_less=True) # writable in (top)
- self.rd_rel_i = Signal(wid, reset_less=True) # go read in (top)
- self.req_rel_i = Signal(wid, reset_less=True) # release request in (top)
- # outputs
- self.go_rd_o = Signal(wid, reset_less=True) # go read (bottom)
- self.go_wr_o = Signal(wid, reset_less=True) # go write (bottom)
- def elaborate(self, platform):
- m = Module()
- m.submodules.rpick = rpick = PriorityPicker(self.gp_wid)
- m.submodules.wpick = wpick = PriorityPicker(self.gp_wid)
- # combine release (output ready signal) with writeable
- m.d.comb += wpick.i.eq(self.writable_i & self.req_rel_i)
- m.d.comb += self.go_wr_o.eq(wpick.o)
- m.d.comb += rpick.i.eq(self.readable_i & self.rd_rel_i)
- m.d.comb += self.go_rd_o.eq(rpick.o)
- return m
- def __iter__(self):
- yield self.readable_i
- yield self.writable_i
- yield self.req_rel_i
- yield self.go_rd_o
- yield self.go_wr_o
- def ports(self):
- return list(self)
-def grp_pick_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.rd_rel_i.eq(1)
- yield
- yield dut.rd_rel_i.eq(0)
- yield
- yield dut.go_wr_i.eq(1)
- yield
- yield dut.go_wr_i.eq(0)
- yield
-def test_grp_pick():
- dut = GroupPicker(4)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_grp_pick.il", "w") as f:
- f.write(vl)
- run_simulation(dut, grp_pick_sim(dut), vcd_name='test_grp_pick.vcd')
-if __name__ == '__main__':
- test_grp_pick()
+++ /dev/null
-from math import log
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Cat, Array, Const, Repl, Elaboratable
-from nmutil.iocontrol import RecordObject
-from nmutil.nmoperator import eq, shape, cat
-class Instruction(RecordObject):
- def __init__(self, name, wid, opwid):
- RecordObject.__init__(self, name=name)
- self.oper_i = Signal(opwid, reset_less=True)
- self.opim_i = Signal(1, reset_less=True) # src2 is an immediate
- self.imm_i = Signal(wid, reset_less=True)
- self.dest_i = Signal(wid, reset_less=True)
- self.src1_i = Signal(wid, reset_less=True)
- self.src2_i = Signal(wid, reset_less=True)
- @staticmethod
- def nq(n_insns, name, wid, opwid):
- q = []
- for i in range(n_insns):
- q.append(Instruction("%s%d" % (name, i), wid, opwid))
- return Array(q)
-class InstructionQ(Elaboratable):
- """ contains a queue of (part-decoded) instructions.
- output is copied combinatorially from the front of the queue,
- for easy access on the clock cycle. only "n_in" instructions
- are made available this way
- input and shifting occurs on sync.
- """
- def __init__(self, wid, opwid, iqlen, n_in, n_out):
- """ constructor
- Inputs
- * :wid: register file width
- * :opwid: operand width
- * :iqlen: instruction queue length
- * :n_in: max number of instructions allowed "in"
- """
- self.iqlen = iqlen
- self.reg_width = wid
- self.opwid = opwid
- self.n_in = n_in
- self.n_out = n_out
- mqbits = (int(log(iqlen) / log(2))+2, False)
- self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
- self.p_ready_o = Signal() # instructions were added
- self.data_i = Instruction.nq(n_in, "data_i", wid, opwid)
- self.data_o = Instruction.nq(n_out, "data_o", wid, opwid)
- self.n_sub_i = Signal(mqbits) # number of instructions to remove
- self.n_sub_o = Signal(mqbits) # number of instructions removed
- self.qsz = shape(self.data_o[0])[0]
- q = []
- for i in range(iqlen):
- q.append(Signal(self.qsz, name="q%d" % i))
- self.q = Array(q)
- self.qlen_o = Signal(mqbits)
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- sync = m.d.sync
- iqlen = self.iqlen
- mqbits = int(log(iqlen) / log(2))
- left = Signal((mqbits+2, False))
- spare = Signal((mqbits+2, False))
- qmaxed = Signal()
- start_q = Signal(mqbits)
- end_q = Signal(mqbits)
- mqlen = Const(iqlen, (len(left), False))
- print ("mqlen", mqlen)
- # work out how many can be subtracted from the queue
- with m.If(self.n_sub_i):
- qinmax = Signal()
- comb += qinmax.eq(self.n_sub_i > self.qlen_o)
- with m.If(qinmax):
- comb += self.n_sub_o.eq(self.qlen_o)
- with m.Else():
- comb += self.n_sub_o.eq(self.n_sub_i)
- # work out how many new items are going to be in the queue
- comb += left.eq(self.qlen_o )#- self.n_sub_o)
- comb += spare.eq(mqlen - self.p_add_i)
- comb += qmaxed.eq(left <= spare)
- comb += self.p_ready_o.eq(qmaxed & (self.p_add_i != 0))
- # put q (flattened) into output
- for i in range(self.n_out):
- opos = Signal(mqbits)
- comb += opos.eq(end_q + i)
- comb += cat(self.data_o[i]).eq(self.q[opos])
- with m.If(self.n_sub_o):
- # ok now the end's moved
- sync += end_q.eq(end_q + self.n_sub_o)
- with m.If(self.p_ready_o):
- # copy in the input... insanely gate-costly... *sigh*...
- for i in range(self.n_in):
- with m.If(self.p_add_i > Const(i, len(self.p_add_i))):
- ipos = Signal(mqbits)
- comb += ipos.eq(start_q + i) # should roll round
- sync += self.q[ipos].eq(cat(self.data_i[i]))
- sync += start_q.eq(start_q + self.p_add_i)
- with m.If(self.p_ready_o):
- # update the queue length
- add2 = Signal(mqbits+1)
- comb += add2.eq(self.qlen_o + self.p_add_i)
- sync += self.qlen_o.eq(add2 - self.n_sub_o)
- with m.Else():
- sync += self.qlen_o.eq(self.qlen_o - self.n_sub_o)
- return m
- def __iter__(self):
- yield from self.q
- yield self.p_ready_o
- for o in self.data_i:
- yield from list(o)
- yield self.p_add_i
- for o in self.data_o:
- yield from list(o)
- yield self.n_sub_i
- yield self.n_sub_o
- def ports(self):
- return list(self)
-def instruction_q_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_rd_i.eq(1)
- yield
- yield dut.go_rd_i.eq(0)
- yield
- yield dut.go_wr_i.eq(1)
- yield
- yield dut.go_wr_i.eq(0)
- yield
-def test_instruction_q():
- dut = InstructionQ(16, 4, 4, n_in=2, n_out=2)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_instruction_q.il", "w") as f:
- f.write(vl)
- run_simulation(dut, instruction_q_sim(dut),
- vcd_name='test_instruction_q.vcd')
-if __name__ == '__main__':
- test_instruction_q()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Cat, Array, Const, Repl, Elaboratable
-from nmigen.lib.coding import Decoder
-from scoreboard.group_picker import PriorityPicker
-class RegDecode(Elaboratable):
- """ decodes registers into unary
- Inputs
- * :wid: register file width
- """
- def __init__(self, wid):
- self.reg_width = wid
- # inputs
- self.enable_i = Signal(reset_less=True) # enable decoders
- self.dest_i = Signal(range(wid), reset_less=True) # Dest R# in
- self.src1_i = Signal(range(wid), reset_less=True) # oper1 R# in
- self.src2_i = Signal(range(wid), reset_less=True) # oper2 R# in
- # outputs
- self.dest_o = Signal(wid, reset_less=True) # Dest unary out
- self.src1_o = Signal(wid, reset_less=True) # oper1 unary out
- self.src2_o = Signal(wid, reset_less=True) # oper2 unary out
- def elaborate(self, platform):
- m = Module()
- m.submodules.dest_d = dest_d = Decoder(self.reg_width)
- m.submodules.src1_d = src1_d = Decoder(self.reg_width)
- m.submodules.src2_d = src2_d = Decoder(self.reg_width)
- # dest decoder: write-pending
- for d, i, o in [(dest_d, self.dest_i, self.dest_o),
- (src1_d, self.src1_i, self.src1_o),
- (src2_d, self.src2_i, self.src2_o)]:
- m.d.comb += d.i.eq(i)
- m.d.comb += d.n.eq(~self.enable_i)
- m.d.comb += o.eq(d.o)
- return m
- def __iter__(self):
- yield self.enable_i
- yield self.dest_i
- yield self.src1_i
- yield self.src2_i
- yield self.dest_o
- yield self.src1_o
- yield self.src2_o
- def ports(self):
- return list(self)
-class IssueUnitGroup(Elaboratable):
- """ Manages a batch of Computation Units all of which can do the same task
- A priority picker will allocate one instruction in this cycle based
- on whether the others are busy.
- insn_i indicates to this module that there is an instruction to be
- issued which this group can handle
- busy_i is a vector of signals that indicate, in this cycle, which
- of the units are currently busy.
- busy_o indicates whether it is "safe to proceed" i.e. whether
- there is a unit here that can *be* issued an instruction
- fn_issue_o indicates, out of the available (non-busy) units,
- which one may be selected
- """
- def __init__(self, n_insns):
- """ Set up inputs and outputs for the Group
- Input Parameters
- * :n_insns: number of instructions in this issue unit.
- """
- self.n_insns = n_insns
- # inputs
- self.insn_i = Signal(reset_less=True, name="insn_i")
- self.busy_i = Signal(n_insns, reset_less=True, name="busy_i")
- # outputs
- self.fn_issue_o = Signal(n_insns, reset_less=True, name="fn_issue_o")
- self.busy_o = Signal(reset_less=True)
- def elaborate(self, platform):
- m = Module()
- if self.n_insns == 0:
- return m
- m.submodules.pick = pick = PriorityPicker(self.n_insns)
- # temporaries
- allissue = Signal(self.n_insns, reset_less=True)
- m.d.comb += allissue.eq(Repl(self.insn_i, self.n_insns))
- # Pick one (and only one) of the units to proceed in this cycle
- m.d.comb += pick.i.eq(~self.busy_i & allissue)
- # "Safe to issue" condition is basically when all units are not busy
- m.d.comb += self.busy_o.eq(~((~self.busy_i).bool()))
- # Picker only raises one signal, therefore it's also the fn_issue
- m.d.comb += self.fn_issue_o.eq(pick.o & Repl(~self.busy_o, self.n_insns))
- return m
- def __iter__(self):
- yield self.insn_i
- yield self.busy_i
- yield self.fn_issue_o
- yield self.g_issue_o
- def ports(self):
- return list(self)
-class IssueUnitArray(Elaboratable):
- """ Convenience module that amalgamates the issue and busy signals
- unit issue_i is to be set externally, at the same time as the
- ALU group oper_i
- """
- def __init__(self, units):
- self.units = units
- self.issue_o = Signal(reset_less=True)
- n_insns = 0
- for u in self.units:
- n_insns += len(u.fn_issue_o)
- self.busy_i = Signal(n_insns, reset_less=True)
- self.fn_issue_o = Signal(n_insns, reset_less=True)
- self.n_insns = n_insns
- def elaborate(self, platform):
- m = Module()
- for i, u in enumerate(self.units):
- setattr(m.submodules, "issue%d" % i, u)
- g_issue_o = []
- busy_i = []
- fn_issue_o = []
- for u in self.units:
- busy_i.append(u.busy_i)
- g_issue_o.append(u.busy_o)
- fn_issue_o.append(u.fn_issue_o)
- m.d.comb += self.issue_o.eq(~(Cat(*g_issue_o).bool()))
- m.d.comb += self.fn_issue_o.eq(Cat(*fn_issue_o))
- m.d.comb += Cat(*busy_i).eq(self.busy_i)
- return m
- def ports(self):
- yield self.busy_i
- yield self.issue_o
- yield self.fn_issue_o
- yield from self.units
-class IssueUnit(Elaboratable):
- """ implements 11.4.14 issue unit, p50
- Inputs
- * :n_insns: number of instructions in this issue unit.
- """
- def __init__(self, n_insns):
- self.n_insns = n_insns
- # inputs
- self.insn_i = Signal(n_insns, reset_less=True, name="insn_i")
- self.busy_i = Signal(n_insns, reset_less=True, name="busy_i")
- # outputs
- self.fn_issue_o = Signal(n_insns, reset_less=True, name="fn_issue_o")
- self.g_issue_o = Signal(reset_less=True)
- def elaborate(self, platform):
- m = Module()
- if self.n_insns == 0:
- return m
- # temporaries
- fu_stall = Signal(reset_less=True)
- ib_l = []
- for i in range(self.n_insns):
- ib_l.append(self.insn_i[i] & self.busy_i[i])
- m.d.comb += fu_stall.eq(Cat(*ib_l).bool())
- m.d.comb += self.g_issue_o.eq(~(fu_stall))
- for i in range(self.n_insns):
- m.d.comb += self.fn_issue_o[i].eq(self.g_issue_o & self.insn_i[i])
- return m
- def __iter__(self):
- yield self.insn_i
- yield self.busy_i
- yield self.fn_issue_o
- yield self.g_issue_o
- def ports(self):
- return list(self)
-class IntFPIssueUnit(Elaboratable):
- def __init__(self, n_int_insns, n_fp_insns):
- self.i = IssueUnit(n_int_insns)
- self.f = IssueUnit(n_fp_insns)
- self.issue_o = Signal(reset_less=True)
- def elaborate(self, platform):
- m = Module()
- m.submodules.intissue = self.i
- m.submodules.fpissue = self.f
- m.d.comb += self.issue_o.eq(self.i.g_issue_o | self.f.g_issue_o)
- return m
- def ports(self):
- yield self.issue_o
- yield from self.i
- yield from self.f
-def issue_unit_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_rd_i.eq(1)
- yield
- yield dut.go_rd_i.eq(0)
- yield
- yield dut.go_wr_i.eq(1)
- yield
- yield dut.go_wr_i.eq(0)
- yield
-def test_issue_unit():
- dut = IssueUnitGroup(3)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_issue_unit_group.il", "w") as f:
- f.write(vl)
- dut = IssueUnit(32, 3)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_issue_unit.il", "w") as f:
- f.write(vl)
- dut = IntFPIssueUnit(32, 3, 3)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_intfp_issue_unit.il", "w") as f:
- f.write(vl)
- run_simulation(dut, issue_unit_sim(dut), vcd_name='test_issue_unit.vcd')
-if __name__ == '__main__':
- test_issue_unit()
+++ /dev/null
-""" Mitch Alsup 6600-style LD/ST scoreboard Dependency Cell
-Relevant bugreports:
-* http://bugs.libre-riscv.org/show_bug.cgi?id=81
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Repl, Elaboratable
-from nmutil.latch import SRLatch
-class LDSTDepCell(Elaboratable):
- """ implements 11.4.12 mitch alsup load/store dependence cell, p45
- """
- def __init__(self, n_ls=1):
- self.n_ls = n_ls
- # inputs
- self.load_h_i = Signal(reset_less=True) # load in (left)
- self.stor_h_i = Signal(reset_less=True) # store in (left)
- self.load_v_i = Signal(n_ls, reset_less=True) # load in (top)
- self.stor_v_i = Signal(n_ls, reset_less=True) # store in (top)
- self.issue_i = Signal(reset_less=True) # Issue in (left)
- self.go_die_i = Signal(reset_less=True) # Issue in (left)
- # load / store hit - basically connect these to go_wr from LD/STCompUnit
- # LD.go_wr -> load_hit_i, ST.go_wr -> stwd_hit_i.
- self.load_hit_i = Signal(n_ls, reset_less=True) # ld hit in (right)
- self.stwd_hit_i = Signal(n_ls, reset_less=True) # st w/ hit in (right)
- # outputs (latched rd/wr pend)
- self.ld_hold_st_o = Signal(reset_less=True) # ld holds st out (l)
- self.st_hold_ld_o = Signal(reset_less=True) # st holds ld out (l)
- def elaborate(self, platform):
- m = Module()
- m.submodules.war_l = war_l = SRLatch(sync=False, llen=self.n_ls) # WaR
- m.submodules.raw_l = raw_l = SRLatch(sync=False, llen=self.n_ls) # RaW
- # temporaries (repeat-extend)
- issue = Repl(self.issue_i, self.n_ls)
- die = Repl(self.go_die_i, self.n_ls)
- # issue & store & load - used for WAR Setting. LD is left, ST is top
- i_s = Signal(reset_less=True)
- i_s_l = Signal(self.n_ls, reset_less=True)
- m.d.comb += i_s.eq(issue & self.stor_h_i) # horizontal single-signal
- m.d.comb += i_s_l.eq(Repl(i_s, self.n_ls) & self.load_v_i) # multi, vert
- # issue & load & store - used for RAW Setting. ST is left, LD is top
- i_l = Signal(reset_less=True)
- i_l_s = Signal(self.n_ls, reset_less=True)
- m.d.comb += i_l.eq(issue & self.load_h_i) # horizontal single-signal
- m.d.comb += i_l_s.eq(Repl(i_l, self.n_ls) & self.stor_v_i) # multi, vert
- # write after read latch: loads block stores
- m.d.comb += war_l.s.eq(i_s_l)
- m.d.comb += war_l.r.eq(die | ~self.load_v_i) # reset on LD
- # read after write latch: stores block loads
- m.d.comb += raw_l.s.eq(i_s_l)
- m.d.comb += raw_l.r.eq(die | ~self.stor_v_i) # reset on ST
- # Hold results (read out horizontally, accumulate in OR fashion)
- m.d.comb += self.ld_hold_st_o.eq((war_l.qn & self.load_hit_i).bool())
- m.d.comb += self.st_hold_ld_o.eq((raw_l.qn & self.stwd_hit_i).bool())
- return m
- def __iter__(self):
- yield self.load_h_i
- yield self.load_v_i
- yield self.stor_h_i
- yield self.stor_h_i
- yield self.issue_i
- yield self.load_hit_i
- yield self.stwd_hit_i
- yield self.ld_hold_st_o
- yield self.st_hold_ld_o
- def ports(self):
- return list(self)
-def dcell_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_rd_i.eq(1)
- yield
- yield dut.go_rd_i.eq(0)
- yield
- yield dut.go_wr_i.eq(1)
- yield
- yield dut.go_wr_i.eq(0)
- yield
-def test_dcell():
- dut = LDSTDepCell()
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_ldst_dcell.il", "w") as f:
- f.write(vl)
- run_simulation(dut, dcell_sim(dut), vcd_name='test_ldst_dcell.vcd')
-if __name__ == '__main__':
- test_dcell()
+++ /dev/null
-""" Mitch Alsup 6600-style LD/ST Memory Scoreboard Matrix (sparse vector)
-6600 LD/ST Dependency Table Matrix inputs / outputs
-Relevant comments (p45-46):
-* If there are no WAR dependencies on a Load instruction with a computed
- address it can assert Bank_Addressable and Translate_Addressable.
-* If there are no RAW dependencies on a Store instruction with both a
- write permission and store data present it can assert Bank_Addressable
-Relevant bugreports:
-* http://bugs.libre-riscv.org/show_bug.cgi?id=81
-* Load Hit (or Store Hit with Data) are asserted by the LD/ST Computation
- Unit when it has data and address ready
-* Asserting the ld_hit_i (or stwd_hit_i) *requires* that the output be
- captured or at least taken into consideration for the next LD/STs
- *right then*. Failure to observe the xx_hold_xx_o *will* result in
- data corruption, as they are *only* asserted if xx_hit_i is asserted
-* The hold signals still have to go through "maybe address clashes"
- detection, they cannot just be used as-is to stop a LD/ST.
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable, Array, Cat, Const
-from ldst_dep_cell import LDSTDepCell
-class LDSTDepMatrix(Elaboratable):
- """ implements 11.4.12 mitch alsup LD/ST Dependency Matrix, p46
- actually a sparse matrix along the diagonal.
- load-hold-store and store-hold-load accumulate in a priority-picking
- fashion, ORing together. the OR gate from the dependency cell is
- here.
- """
- def __init__(self, n_ldst):
- self.n_ldst = n_ldst # X and Y (FUs)
- self.ld_pend_i = Signal(n_ldst, reset_less=True) # load pending in
- self.st_pend_i = Signal(n_ldst, reset_less=True) # store pending in
- self.issue_i = Signal(n_ldst, reset_less=True) # Issue in
- self.go_die_i = Signal(n_ldst, reset_less=True) # Die/Reset in
- self.load_hit_i = Signal(n_ldst, reset_less=True) # load hit in
- self.stwd_hit_i = Signal(n_ldst, reset_less=True) # store w/data hit in
- # outputs
- self.ld_hold_st_o = Signal(n_ldst, reset_less=True) # load holds st out
- self.st_hold_ld_o = Signal(n_ldst, reset_less=True) # st holds load out
- def elaborate(self, platform):
- m = Module()
- # ---
- # matrix of dependency cells. actually, LDSTDepCell is a row, now
- # ---
- dm = Array(LDSTDepCell(self.n_ldst) for f in range(self.n_ldst))
- for fu in range(self.n_ldst):
- setattr(m.submodules, "dm_fu%d" % (fu), dm[fu])
- # ---
- # connect Function Unit vector, all horizontal
- # ---
- lhs_l = []
- shl_l = []
- issue_l = []
- go_die_l = []
- lh_l = []
- sh_l = []
- for fu in range(self.n_ldst):
- dc = dm[fu]
- # accumulate load-hold-store / store-hold-load bits (horizontal)
- lhs_l.append(dc.ld_hold_st_o)
- shl_l.append(dc.st_hold_ld_o)
- # accumulate inputs (for Cat'ing later) - TODO: must be a better way
- issue_l.append(dc.issue_i)
- go_die_l.append(dc.go_die_i)
- # load-hit and store-with-data-hit go in vertically (top)
- m.d.comb += [dc.load_hit_i.eq(self.load_hit_i),
- dc.stwd_hit_i.eq(self.stwd_hit_i),
- dc.load_v_i.eq(self.ld_pend_i),
- dc.stor_v_i.eq(self.st_pend_i),
- ]
- # connect cell inputs using Cat(*list_of_stuff)
- m.d.comb += [Cat(*issue_l).eq(self.issue_i),
- Cat(*go_die_l).eq(self.go_die_i),
- ]
- # connect the load-hold-store / store-hold-load OR-accumulated outputs
- m.d.comb += self.ld_hold_st_o.eq(Cat(*lhs_l))
- m.d.comb += self.st_hold_ld_o.eq(Cat(*shl_l))
- # the load/store input also needs to be connected to "top" (vertically)
- for fu in range(self.n_ldst):
- load_h_l = []
- stor_h_l = []
- for fux in range(self.n_ldst):
- dc = dm[fux]
- load_h_l.append(dc.load_h_i)
- stor_h_l.append(dc.stor_h_i)
- m.d.comb += [Cat(*load_h_l).eq(self.ld_pend_i),
- Cat(*stor_h_l).eq(self.st_pend_i),
- ]
- return m
- def __iter__(self):
- yield self.ld_pend_i
- yield self.st_pend_i
- yield self.issue_i
- yield self.go_die_i
- yield self.load_hit_i
- yield self.stwd_hit_i
- yield self.ld_hold_st_o
- yield self.st_hold_ld_o
- def ports(self):
- return list(self)
-def d_matrix_sim(dut):
- """ XXX TODO
- """
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_rd_i.eq(1)
- yield
- yield dut.go_rd_i.eq(0)
- yield
- yield dut.go_wr_i.eq(1)
- yield
- yield dut.go_wr_i.eq(0)
- yield
-def test_d_matrix():
- dut = LDSTDepMatrix(n_ldst=4)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_ld_st_matrix.il", "w") as f:
- f.write(vl)
- run_simulation(dut, d_matrix_sim(dut), vcd_name='test_ld_st_matrix.vcd')
-if __name__ == '__main__':
- test_d_matrix()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module
-from scoreboard.fu_reg_matrix import FURegDepMatrix
-from scoreboard.addr_match import PartialAddrMatch
-class FUMemMatchMatrix(FURegDepMatrix, PartialAddrMatch):
- """ implement a FU-Regs overload with memory-address matching
- """
- def __init__(self, n_fu, addrbitwid):
- PartialAddrMatch.__init__(self, n_fu, addrbitwid)
- FURegDepMatrix.__init__(self, n_fu, n_fu, 1, self.addr_nomatch_o)
- def elaborate(self, platform):
- m = Module()
- PartialAddrMatch._elaborate(self, m, platform)
- FURegDepMatrix._elaborate(self, m, platform)
- return m
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable, Array, Cat, Repl
-from nmutil.latch import SRLatch
-class MemDepRow(Elaboratable):
- """ implements 1st phase Memory Depencency cell
- """
- def __init__(self, n_reg):
- self.n_reg = n_reg
- # inputs
- self.ld_i = Signal(n_reg, reset_less=True) # Dest in (top)
- self.st_i = Signal(n_reg, reset_less=True) # oper1 in (top)
- self.issue_i = Signal(reset_less=True) # Issue in (top)
- self.st_pend_i = Signal(n_reg, reset_less=True) # Read pend in (top)
- self.ld_pend_i = Signal(n_reg, reset_less=True) # Write pend in (top)
- self.v_st_rsel_o = Signal(n_reg, reset_less=True) # Read pend out (bot)
- self.v_ld_rsel_o = Signal(n_reg, reset_less=True) # Write pend out (bot)
- self.go_ld_i = Signal(reset_less=True) # Go Write in (left)
- self.go_st_i = Signal(reset_less=True) # Go Read in (left)
- self.go_die_i = Signal(reset_less=True) # Go Die in (left)
- # for Register File Select Lines (vertical)
- self.ld_rsel_o = Signal(n_reg, reset_less=True) # dest reg sel (bot)
- self.st_rsel_o = Signal(n_reg, reset_less=True) # src1 reg sel (bot)
- # for Function Unit "forward progress" (horizontal)
- self.ld_fwd_o = Signal(n_reg, reset_less=True) # dest FU fw (right)
- self.st_fwd_o = Signal(n_reg, reset_less=True) # src1 FU fw (right)
- def elaborate(self, platform):
- m = Module()
- m.submodules.ld_c = ld_c = SRLatch(sync=False, llen=self.n_reg)
- m.submodules.st_c = st_c = SRLatch(sync=False, llen=self.n_reg)
- # connect go_rd / go_wr (dest->wr, src->rd)
- ld_die = Signal(reset_less=True)
- st_die = Signal(reset_less=True)
- m.d.comb += ld_die.eq(self.go_ld_i | self.go_die_i)
- m.d.comb += st_die.eq(self.go_st_i | self.go_die_i)
- m.d.comb += ld_c.r.eq(Repl(ld_die, self.n_reg))
- m.d.comb += st_c.r.eq(Repl(st_die, self.n_reg))
- # connect input reg bit (unary)
- i_ext = Repl(self.issue_i, self.n_reg)
- m.d.comb += ld_c.s.eq(i_ext & self.ld_i)
- m.d.comb += st_c.s.eq(i_ext & self.st_i)
- # connect up hazard checks: read-after-write and write-after-read
- m.d.comb += self.ld_fwd_o.eq(ld_c.q & self.st_pend_i)
- m.d.comb += self.st_fwd_o.eq(st_c.q & self.ld_pend_i)
- # connect reg-sel outputs
- st_ext = Repl(self.go_st_i, self.n_reg)
- ld_ext = Repl(self.go_ld_i, self.n_reg)
- m.d.comb += self.ld_rsel_o.eq(ld_c.qlq & ld_ext)
- m.d.comb += self.st_rsel_o.eq(st_c.qlq & st_ext)
- # to be accumulated to indicate if register is in use (globally)
- # after ORing, is fed back in to st_pend_i / ld_pend_i
- m.d.comb += self.v_st_rsel_o.eq(st_c.qlq)
- m.d.comb += self.v_ld_rsel_o.eq(ld_c.qlq)
- return m
- def __iter__(self):
- yield self.ld_i
- yield self.st_i
- yield self.st_pend_i
- yield self.ld_pend_i
- yield self.issue_i
- yield self.go_ld_i
- yield self.go_st_i
- yield self.go_die_i
- yield self.v_ld_rsel_o
- yield self.v_st_rsel_o
- yield self.ld_rsel_o
- yield self.st_rsel_o
- yield self.ld_fwd_o
- yield self.st_fwd_o
- def ports(self):
- return list(self)
-def dcell_sim(dut):
- yield dut.ld_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.st_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_st_i.eq(1)
- yield
- yield dut.go_st_i.eq(0)
- yield
- yield dut.go_ld_i.eq(1)
- yield
- yield dut.go_ld_i.eq(0)
- yield
-def test_dcell():
- dut = MemDepRow(4)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_mem_drow.il", "w") as f:
- f.write(vl)
- run_simulation(dut, dcell_sim(dut), vcd_name='test_mem_dcell.vcd')
-if __name__ == '__main__':
- test_dcell()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Elaboratable, Array, Cat
-from scoreboard.mem_dependence_cell import MemDepRow
-from scoreboard.mem_fu_pending import MemFU_Pend
-from scoreboard.mem_select import Mem_Rsv
-from scoreboard.global_pending import GlobalPending
-class MemFUDepMatrix(Elaboratable):
- """ implements 1st phase Memory-to-FU Dependency Matrix
- """
- def __init__(self, n_fu_row, n_reg_col):
- self.n_fu_row = n_fu_row # Y (FUs) ^v
- self.n_reg_col = n_reg_col # X (Regs) <>
- self.ld_i = Signal(n_reg_col, reset_less=True) # LD in (top)
- self.st_i = Signal(n_reg_col, reset_less=True) # ST in (top)
- # Register "Global" vectors for determining RaW and WaR hazards
- self.ld_pend_i = Signal(n_reg_col, reset_less=True) # ld pending (top)
- self.st_pend_i = Signal(n_reg_col, reset_less=True) # st pending (top)
- self.v_ld_rsel_o = Signal(n_reg_col, reset_less=True) # ld pending (bot)
- self.v_st_rsel_o = Signal(n_reg_col, reset_less=True) # st pending (bot)
- self.issue_i = Signal(n_fu_row, reset_less=True) # Issue in (top)
- self.go_ld_i = Signal(n_fu_row, reset_less=True) # Go LOAD in (left)
- self.go_st_i = Signal(n_fu_row, reset_less=True) # Go STOR in (left)
- self.go_die_i = Signal(n_fu_row, reset_less=True) # Go Die in (left)
- # for Register File Select Lines (horizontal), per-reg
- self.ld_rsel_o = Signal(n_reg_col, reset_less=True) # ld reg (bot)
- self.st_rsel_o = Signal(n_reg_col, reset_less=True) # st reg (bot)
- # for Function Unit "forward progress" (vertical), per-FU
- self.ld_pend_o = Signal(n_fu_row, reset_less=True) # ld pending (right)
- self.st_pend_o = Signal(n_fu_row, reset_less=True) # st pending (right)
- def elaborate(self, platform):
- m = Module()
- # ---
- # matrix of dependency cells
- # ---
- dm = Array(MemDepRow(self.n_reg_col) for r in range(self.n_fu_row))
- for fu in range(self.n_fu_row):
- setattr(m.submodules, "dr_fu%d" % fu, dm[fu])
- # ---
- # array of Function Unit Pending vectors
- # ---
- fupend = Array(MemFU_Pend(self.n_reg_col) for f in range(self.n_fu_row))
- for fu in range(self.n_fu_row):
- setattr(m.submodules, "fu_fu%d" % (fu), fupend[fu])
- # ---
- # array of Register Reservation vectors
- # ---
- regrsv = Array(Mem_Rsv(self.n_fu_row) for r in range(self.n_reg_col))
- for rn in range(self.n_reg_col):
- setattr(m.submodules, "rr_r%d" % (rn), regrsv[rn])
- # ---
- # connect Function Unit vector
- # ---
- ld_pend = []
- st_pend = []
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- fup = fupend[fu]
- ld_fwd_o = []
- st_fwd_o = []
- for rn in range(self.n_reg_col):
- # accumulate cell fwd outputs for dest/src1
- ld_fwd_o.append(dc.ld_fwd_o[rn])
- st_fwd_o.append(dc.st_fwd_o[rn])
- # connect cell fwd outputs to FU Vector in [Cat is gooood]
- m.d.comb += [fup.ld_fwd_i.eq(Cat(*ld_fwd_o)),
- fup.st_fwd_i.eq(Cat(*st_fwd_o)),
- ]
- # accumulate FU Vector outputs
- ld_pend.append(fup.reg_ld_pend_o)
- st_pend.append(fup.reg_st_pend_o)
- # ... and output them from this module (vertical, width=FUs)
- m.d.comb += self.ld_pend_o.eq(Cat(*ld_pend))
- m.d.comb += self.st_pend_o.eq(Cat(*st_pend))
- # ---
- # connect Reg Selection vector
- # ---
- ld_rsel = []
- st_rsel = []
- for rn in range(self.n_reg_col):
- rsv = regrsv[rn]
- ld_rsel_o = []
- st_rsel_o = []
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- # accumulate cell reg-select outputs dest/src1
- ld_rsel_o.append(dc.ld_rsel_o[rn])
- st_rsel_o.append(dc.st_rsel_o[rn])
- # connect cell reg-select outputs to Reg Vector In
- m.d.comb += [rsv.ld_rsel_i.eq(Cat(*ld_rsel_o)),
- rsv.st_rsel_i.eq(Cat(*st_rsel_o)),
- ]
- # accumulate Reg-Sel Vector outputs
- ld_rsel.append(rsv.ld_rsel_o)
- st_rsel.append(rsv.st_rsel_o)
- # ... and output them from this module (horizontal, width=REGs)
- m.d.comb += self.ld_rsel_o.eq(Cat(*ld_rsel))
- m.d.comb += self.st_rsel_o.eq(Cat(*st_rsel))
- # ---
- # connect Dependency Matrix dest/src1/issue to module d/s/s/i
- # ---
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- # wire up inputs from module to row cell inputs (Cat is gooood)
- m.d.comb += [dc.ld_i.eq(self.ld_i),
- dc.st_i.eq(self.st_i),
- dc.st_pend_i.eq(self.st_pend_i),
- dc.ld_pend_i.eq(self.ld_pend_i),
- ]
- # accumulate rsel bits into read/write pending vectors.
- st_pend_v = []
- ld_pend_v = []
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- st_pend_v.append(dc.v_st_rsel_o)
- ld_pend_v.append(dc.v_ld_rsel_o)
- st_v = GlobalPending(self.n_reg_col, st_pend_v)
- ld_v = GlobalPending(self.n_reg_col, ld_pend_v)
- m.submodules.st_v = st_v
- m.submodules.ld_v = ld_v
- m.d.comb += self.v_st_rsel_o.eq(st_v.g_pend_o)
- m.d.comb += self.v_ld_rsel_o.eq(ld_v.g_pend_o)
- # ---
- # connect Dep issue_i/go_st_i/go_ld_i to module issue_i/go_rd/go_wr
- # ---
- go_st_i = []
- go_ld_i = []
- go_die_i = []
- issue_i = []
- for fu in range(self.n_fu_row):
- dc = dm[fu]
- # accumulate cell fwd outputs for dest/src1
- go_st_i.append(dc.go_st_i)
- go_ld_i.append(dc.go_ld_i)
- go_die_i.append(dc.go_die_i)
- issue_i.append(dc.issue_i)
- # wire up inputs from module to row cell inputs (Cat is gooood)
- m.d.comb += [Cat(*go_st_i).eq(self.go_st_i),
- Cat(*go_ld_i).eq(self.go_ld_i),
- Cat(*go_die_i).eq(self.go_die_i),
- Cat(*issue_i).eq(self.issue_i),
- ]
- return m
- def __iter__(self):
- yield self.ld_i
- yield self.st_i
- yield self.issue_i
- yield self.go_ld_i
- yield self.go_st_i
- yield self.go_die_i
- yield self.ld_rsel_o
- yield self.st_rsel_o
- yield self.ld_pend_o
- yield self.st_pend_o
- yield self.ld_pend_i
- yield self.st_pend_i
- yield self.ld_rsel_o
- yield self.st_rsel_o
- def ports(self):
- return list(self)
-def d_matrix_sim(dut):
- """ XXX TODO
- """
- yield dut.ld_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.st_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_st_i.eq(1)
- yield
- yield dut.go_st_i.eq(0)
- yield
- yield dut.go_ld_i.eq(1)
- yield
- yield dut.go_ld_i.eq(0)
- yield
-def test_d_matrix():
- dut = MemFUDepMatrix(n_fu_row=3, n_reg_col=3)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_fu_mem_matrix.il", "w") as f:
- f.write(vl)
- run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_mem_matrix.vcd')
-if __name__ == '__main__':
- test_d_matrix()
+++ /dev/null
-from nmigen import Elaboratable, Module, Signal, Cat
-class MemFU_Pend(Elaboratable):
- """ these are allocated per-FU (horizontally),
- and are of length reg_count
- """
- def __init__(self, reg_count):
- self.reg_count = reg_count
- self.ld_fwd_i = Signal(reg_count, reset_less=True)
- self.st_fwd_i = Signal(reg_count, reset_less=True)
- self.reg_ld_pend_o = Signal(reset_less=True)
- self.reg_st_pend_o = Signal(reset_less=True)
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.reg_ld_pend_o.eq(self.ld_fwd_i.bool())
- m.d.comb += self.reg_st_pend_o.eq(self.st_fwd_i.bool())
- return m
+++ /dev/null
-from nmigen import Elaboratable, Module, Signal
-class Mem_Rsv(Elaboratable):
- """ these are allocated per-Register (vertically),
- and are each of length fu_count
- """
- def __init__(self, fu_count):
- self.fu_count = fu_count
- self.ld_rsel_i = Signal(fu_count, reset_less=True)
- self.st_rsel_i = Signal(fu_count, reset_less=True)
- self.ld_rsel_o = Signal(reset_less=True)
- self.st_rsel_o = Signal(reset_less=True)
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.ld_rsel_o.eq(self.ld_rsel_i.bool())
- m.d.comb += self.st_rsel_o.eq(self.st_rsel_i.bool())
- return m
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Array, Elaboratable
-from scoreboard.fu_fu_matrix import FUFUDepMatrix
-from scoreboard.mdm import FUMemMatchMatrix
-class MemFunctionUnits(Elaboratable):
- def __init__(self, n_ldsts, addrbitwid):
- self.n_ldsts = n_ldsts
- self.bitwid = addrbitwid
- self.st_i = Signal(n_ldsts, reset_less=True) # Dest R# in
- self.ld_i = Signal(n_ldsts, reset_less=True) # oper1 R# in
- self.g_int_ld_pend_o = Signal(n_ldsts, reset_less=True)
- self.g_int_st_pend_o = Signal(n_ldsts, reset_less=True)
- self.st_rsel_o = Signal(n_ldsts, reset_less=True) # dest reg (bot)
- self.ld_rsel_o = Signal(n_ldsts, reset_less=True) # src1 reg (bot)
- self.loadable_o = Signal(n_ldsts, reset_less=True)
- self.storable_o = Signal(n_ldsts, reset_less=True)
- self.addr_nomatch_o = Signal(n_ldsts, reset_less=True)
- self.go_ld_i = Signal(n_ldsts, reset_less=True)
- self.go_st_i = Signal(n_ldsts, reset_less=True)
- self.go_die_i = Signal(n_ldsts, reset_less=True)
- self.fn_issue_i = Signal(n_ldsts, reset_less=True)
- # address matching
- self.addrs_i = Array(Signal(self.bitwid, name="addrs_i%d" % i) \
- for i in range(n_ldsts))
- self.addr_we_i = Signal(n_ldsts) # write-enable for incoming address
- self.addr_en_i = Signal(n_ldsts) # address latched in
- self.addr_rs_i = Signal(n_ldsts) # address deactivated
- # Note: FURegs st_pend_o is also outputted from here, for use in WaWGrid
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- sync = m.d.sync
- n_fus = self.n_ldsts
- # Integer FU-FU Dep Matrix
- intfudeps = FUFUDepMatrix(n_fus, n_fus)
- m.submodules.intfudeps = intfudeps
- # Integer FU-Reg Dep Matrix
- intregdeps = FUMemMatchMatrix(n_fus, self.bitwid)
- m.submodules.intregdeps = intregdeps
- # ok, because we do not know in advance what the AGEN (address gen)
- # is, we have to make a transitive dependency set. i.e. the LD
- # (or ST) being requested now must depend on ALL prior LDs *AND* STs.
- # these get dropped very rapidly once AGEN is carried out.
- # connect fureg matrix as a mem system
- comb += self.g_int_ld_pend_o.eq(intregdeps.v_rd_rsel_o)
- comb += self.g_int_st_pend_o.eq(intregdeps.v_wr_rsel_o)
- comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
- comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
- comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
- comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
- self.st_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
- comb += intfudeps.issue_i.eq(self.fn_issue_i)
- comb += intfudeps.go_rd_i.eq(self.go_ld_i)
- comb += intfudeps.go_wr_i.eq(self.go_st_i)
- comb += intfudeps.go_die_i.eq(self.go_die_i)
- comb += self.loadable_o.eq(intfudeps.readable_o)
- comb += self.storable_o.eq(intfudeps.writable_o)
- comb += self.addr_nomatch_o.eq(intregdeps.addr_nomatch_o)
- # Connect function issue / arrays, and dest/src1/src2
- comb += intregdeps.dest_i.eq(self.st_i)
- comb += intregdeps.src_i[0].eq(self.ld_i)
- comb += intregdeps.go_rd_i.eq(self.go_ld_i)
- comb += intregdeps.go_wr_i.eq(self.go_st_i)
- comb += intregdeps.go_die_i.eq(self.go_die_i)
- comb += intregdeps.issue_i.eq(self.fn_issue_i)
- comb += self.st_rsel_o.eq(intregdeps.dest_rsel_o)
- comb += self.ld_rsel_o.eq(intregdeps.src_rsel_o[0])
- # connect address matching: these get connected to the Addr CUs
- for i in range(self.n_ldsts):
- comb += intregdeps.addrs_i[i].eq(self.addrs_i[i])
- comb += intregdeps.addr_we_i.eq(self.addr_we_i)
- comb += intregdeps.addr_en_i.eq(self.addr_en_i)
- comb += intregdeps.addr_rs_i.eq(self.addr_rs_i)
- return m
- def __iter__(self):
- yield self.ld_i
- yield self.st_i
- yield self.g_int_st_pend_o
- yield self.g_int_ld_pend_o
- yield self.ld_rsel_o
- yield self.st_rsel_o
- yield self.loadable_o
- yield self.storable_o
- yield self.go_st_i
- yield self.go_ld_i
- yield self.go_die_i
- yield self.fn_issue_i
- yield from self.addrs_i
- yield self.addr_we_i
- yield self.addr_en_i
- def ports(self):
- return list(self)
+++ /dev/null
-from nmigen import Elaboratable, Module, Signal, Array
-class Reg_Rsv(Elaboratable):
- """ these are allocated per-Register (vertically),
- and are each of length fu_count
- """
- def __init__(self, fu_count, n_src):
- self.n_src = n_src
- self.fu_count = fu_count
- self.dest_rsel_i = Signal(fu_count, reset_less=True)
- self.src_rsel_i = Array(Signal(fu_count, name="src_rsel_i",
- reset_less=True) \
- for i in range(n_src))
- self.dest_rsel_o = Signal(reset_less=True)
- self.src_rsel_o = Signal(n_src, reset_less=True)
- def elaborate(self, platform):
- m = Module()
- m.d.comb += self.dest_rsel_o.eq(self.dest_rsel_i.bool())
- for i in range(self.n_src):
- m.d.comb += self.src_rsel_o[i].eq(self.src_rsel_i[i].bool())
- return m
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Cat, Array, Const, Elaboratable, Repl
-from nmigen.lib.coding import Decoder
-from scoreboard.shadow_fn import ShadowFn
-class ShadowMatrix(Elaboratable):
- """ Matrix of Shadow Functions. One per FU.
- Inputs
- * :n_fus: register file width
- * :shadow_wid: number of shadow/fail/good/go_die sets
- Notes:
- * Shadow enable/fail/good are all connected to all Shadow Functions
- (incoming at the top)
- * Output is an array of "shadow active" (schroedinger wires: neither
- alive nor dead) and an array of "go die" signals, one per FU.
- * the shadown must be connected to the Computation Unit's
- write release request, preventing it (ANDing) from firing
- (and thus preventing Writable. this by the way being the
- whole point of having the Shadow Matrix...)
- * go_die_o must be connected to *both* the Computation Unit's
- src-operand and result-operand latch resets, causing both
- of them to reset.
- * go_die_o also needs to be wired into the Dependency and Function
- Unit Matrices by way of over-enabling (ORing) into Go_Read and
- Go_Write, resetting every cell that is required to "die"
- """
- def __init__(self, n_fus, shadow_wid=0, syncreset=False):
- self.syncreset = syncreset
- self.n_fus = n_fus
- self.shadow_wid = shadow_wid
- # inputs
- self.issue_i = Signal(n_fus, reset_less=True)
- self.reset_i = Signal(n_fus, reset_less=True)
- self.shadow_i = Array(Signal(shadow_wid, name="sh_i", reset_less=True) \
- for f in range(n_fus))
- self.s_fail_i = Array(Signal(shadow_wid, name="fl_i", reset_less=True) \
- for f in range(n_fus))
- self.s_good_i = Array(Signal(shadow_wid, name="gd_i", reset_less=True) \
- for f in range(n_fus))
- # outputs
- self.go_die_o = Signal(n_fus, reset_less=True)
- self.shadown_o = Signal(n_fus, reset_less=True)
- def elaborate(self, platform):
- m = Module()
- shadows = []
- for i in range(self.n_fus):
- sh = ShadowFn(self.shadow_wid, self.syncreset)
- setattr(m.submodules, "sh%d" % i, sh)
- shadows.append(sh)
- # connect shadow/fail/good to all shadows
- m.d.comb += sh.s_fail_i.eq(self.s_fail_i[i])
- m.d.comb += sh.s_good_i.eq(self.s_good_i[i])
- # this one is the matrix (shadow enables)
- m.d.comb += sh.shadow_i.eq(self.shadow_i[i])
- # connect all shadow outputs and issue input
- issue_l = []
- reset_l = []
- sho_l = []
- rec_l = []
- for l in shadows:
- issue_l.append(l.issue_i)
- reset_l.append(l.reset_i)
- sho_l.append(l.shadown_o)
- rec_l.append(l.go_die_o)
- m.d.comb += Cat(*issue_l).eq(self.issue_i)
- m.d.comb += Cat(*reset_l).eq(self.reset_i)
- m.d.comb += self.shadown_o.eq(Cat(*sho_l))
- m.d.comb += self.go_die_o.eq(Cat(*rec_l))
- return m
- def __iter__(self):
- yield self.issue_i
- yield self.reset_i
- yield from self.shadow_i
- yield from self.s_fail_i
- yield from self.s_good_i
- yield self.go_die_o
- yield self.shadown_o
- def ports(self):
- return list(self)
-class BranchSpeculationRecord(Elaboratable):
- """ A record of which function units will be cancelled and which
- allowed to proceed, on a branch.
- Whilst the input is a pair that says whether the instruction is
- under the "success" branch shadow (good_i) or the "fail" shadow
- (fail_i path), when the branch result is known, the "good" path
- must be cancelled if "fail" occurred, and the "fail" path cancelled
- if "good" occurred.
- therefore, use "good|~fail" and "fail|~good" respectively as
- output.
- """
- def __init__(self, n_fus):
- self.n_fus = n_fus
- # inputs: record *expected* status
- self.active_i = Signal(reset_less=True)
- self.good_i = Signal(n_fus, reset_less=True)
- self.fail_i = Signal(n_fus, reset_less=True)
- # inputs: status of branch (when result was known)
- self.br_i = Signal(reset_less=True)
- self.br_ok_i = Signal(reset_less=True)
- # outputs: true if the *expected* outcome matched the *actual* outcome
- self.match_f_o = Signal(n_fus, reset_less=True)
- self.match_g_o = Signal(n_fus, reset_less=True)
- def elaborate(self, platform):
- m = Module()
- # registers to record *expected* status
- good_r = Signal(self.n_fus)
- fail_r = Signal(self.n_fus)
- for i in range(self.n_fus):
- with m.If(self.active_i):
- m.d.sync += good_r[i].eq(good_r[i] | self.good_i[i])
- m.d.sync += fail_r[i].eq(fail_r[i] | self.fail_i[i])
- with m.If(self.br_i):
- with m.If(good_r[i]):
- # we expected good, return OK that good was EXPECTED
- m.d.comb += self.match_g_o[i].eq(self.br_ok_i)
- m.d.comb += self.match_f_o[i].eq(~self.br_ok_i)
- with m.If(fail_r[i]):
- # we expected fail, return OK that fail was EXPECTED
- m.d.comb += self.match_g_o[i].eq(~self.br_ok_i)
- m.d.comb += self.match_f_o[i].eq(self.br_ok_i)
- m.d.sync += good_r[i].eq(0) # might be set if issue set as well
- m.d.sync += fail_r[i].eq(0) # might be set if issue set as well
- return m
- def __iter__(self):
- yield self.active_i
- yield self.good_i
- yield self.fail_i
- yield self.br_i
- yield self.br_good_i
- yield self.br_fail_i
- yield self.good_o
- yield self.fail_o
- def ports(self):
- return list(self)
-class WaWGrid(Elaboratable):
- """ An NxM grid-selector which raises a 2D bit selected by N and M
- """
- def __init__(self, n_fus, shadow_wid):
- self.n_fus = n_fus
- self.shadow_wid = shadow_wid
- self.shadow_i = Signal(shadow_wid, reset_less=True)
- self.fu_i = Signal(n_fus, reset_less=True)
- self.waw_o = Array(Signal(shadow_wid, name="waw_o", reset_less=True) \
- for f in range(n_fus))
- def elaborate(self, platform):
- m = Module()
- for i in range(self.n_fus):
- v = Repl(self.fu_i[i], self.shadow_wid)
- m.d.comb += self.waw_o[i].eq(v & self.shadow_i)
- return m
-def shadow_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_rd_i.eq(1)
- yield
- yield dut.go_rd_i.eq(0)
- yield
- yield dut.go_wr_i.eq(1)
- yield
- yield dut.go_wr_i.eq(0)
- yield
-def test_shadow():
- dut = ShadowMatrix(4, 2)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_shadow.il", "w") as f:
- f.write(vl)
- dut = BranchSpeculationRecord(4)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_branchspecrecord.il", "w") as f:
- f.write(vl)
- run_simulation(dut, shadow_sim(dut), vcd_name='test_shadow.vcd')
-if __name__ == '__main__':
- test_shadow()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Signal, Cat, Repl, Const, Elaboratable
-from nmutil.latch import SRLatch
-class ShadowFn(Elaboratable):
- """ implements shadowing 11.5.1, p55, just the individual shadow function
- shadowing can be used for branches as well as exceptions (interrupts),
- load/store hold (exceptions again), and vector-element predication
- (once the predicate is known, which it may not be at instruction issue)
- Inputs
- * :shadow_wid: number of shadow/fail/good/go_die sets
- notes:
- * when shadow_wid = 0, recover and shadown are Consts (i.e. do nothing)
- """
- def __init__(self, slen, syncreset=False):
- self.slen = slen
- self.syncreset = syncreset
- if self.slen:
- # inputs
- self.issue_i = Signal(reset_less=True)
- self.shadow_i = Signal(slen, reset_less=True)
- self.reset_i = Signal(reset_less=True)
- self.s_fail_i = Signal(slen, reset_less=True)
- self.s_good_i = Signal(slen, reset_less=True)
- # outputs
- self.shadown_o = Signal(reset_less=True)
- self.go_die_o = Signal(reset_less=True)
- else:
- # outputs when no shadowing needed
- self.shadown_o = Const(1)
- self.go_die_o = Const(0)
- def elaborate(self, platform):
- m = Module()
- if self.slen == 0:
- return
- m.submodules.sl = sl = SRLatch(sync=False, llen=self.slen)
- r_ext = Repl(self.reset_i, self.slen)
- reset_r = Signal(self.slen)
- if self.syncreset:
- m.d.comb += reset_r.eq(self.s_good_i | self.s_fail_i | r_ext)
- else:
- m.d.comb += reset_r.eq(self.s_good_i | self.s_fail_i | r_ext)
- i_ext = Repl(self.issue_i, self.slen)
- m.d.comb += sl.s.eq(self.shadow_i & i_ext & \
- ~self.s_good_i & ~reset_r)
- m.d.comb += sl.r.eq(r_ext | reset_r | self.s_good_i | \
- (i_ext & ~self.shadow_i))
- m.d.comb += self.go_die_o.eq((sl.qlq & self.s_fail_i).bool())
- m.d.comb += self.shadown_o.eq(~sl.qlq.bool())
- return m
- def __iter__(self):
- yield self.issue_i
- yield self.reset_i
- yield self.shadow_i
- yield self.s_fail_i
- yield self.s_good_i
- yield self.shadown_o
- yield self.go_die_o
- def ports(self):
- return list(self)
-def shadow_fn_unit_sim(dut):
- yield dut.dest_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.src1_i.eq(1)
- yield dut.issue_i.eq(1)
- yield
- yield
- yield
- yield dut.issue_i.eq(0)
- yield
- yield dut.go_rd_i.eq(1)
- yield
- yield dut.go_rd_i.eq(0)
- yield
- yield dut.go_wr_i.eq(1)
- yield
- yield dut.go_wr_i.eq(0)
- yield
-def test_shadow_fn_unit():
- dut = ShadowFn(4)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_shadow_fn_unit.il", "w") as f:
- f.write(vl)
- run_simulation(dut, shadow_fn_unit_sim(dut),
- vcd_name='test_shadow_fn_unit.vcd')
-if __name__ == '__main__':
- test_shadow_fn_unit()
+++ /dev/null
-""" testing of InstructionQ
-from copy import deepcopy
-from random import randint
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from scoreboard.instruction_q import InstructionQ
-from nmutil.nmoperator import eq
-class IQSim:
- def __init__(self, dut, iq, n_in, n_out):
- self.dut = dut
- self.iq = iq
- self.oq = []
- self.n_in = n_in
- self.n_out = n_out
- def send(self):
- i = 0
- while i < len(self.iq):
- sendlen = randint(1, self.n_in)
- sendlen = 1
- sendlen = min(len(self.iq) - i, sendlen)
- print ("sendlen", len(self.iq)-i, sendlen)
- for idx in range(sendlen):
- instr = self.iq[i+idx]
- yield from eq(self.dut.data_i[idx], instr)
- di = yield self.dut.data_i[idx]#.src1_i
- print ("senddata %d %x" % ((i+idx), di))
- self.oq.append(di)
- yield self.dut.p_add_i.eq(sendlen)
- yield
- o_p_ready = yield self.dut.p_ready_o
- while not o_p_ready:
- yield
- o_p_ready = yield self.dut.p_ready_o
- yield self.dut.p_add_i.eq(0)
- print ("send", len(self.iq), i, sendlen)
- # wait random period of time before queueing another value
- for j in range(randint(0, 3)):
- yield
- i += sendlen
- yield self.dut.p_add_i.eq(0)
- yield
- print ("send ended")
- ## wait random period of time before queueing another value
- #for i in range(randint(0, 3)):
- # yield
- #send_range = randint(0, 3)
- #if send_range == 0:
- # send = True
- #else:
- # send = randint(0, send_range) != 0
- def rcv(self):
- i = 0
- yield
- yield
- yield
- while i < len(self.iq):
- rcvlen = randint(1, self.n_out)
- #print ("outreq", rcvlen)
- yield self.dut.n_sub_i.eq(rcvlen)
- n_sub_o = yield self.dut.n_sub_o
- print ("recv", n_sub_o)
- for j in range(n_sub_o):
- r = yield self.dut.data_o[j]#.src1_i
- print ("recvdata %x %s" % (r, repr(self.iq[i+j])))
- assert r == self.oq[i+j]
- yield
- if n_sub_o == 0:
- continue
- yield self.dut.n_sub_i.eq(0)
- i += n_sub_o
- print ("recv ended")
-def mk_insns(n_insns, wid, opwid):
- res = []
- for i in range(n_insns):
- op1 = randint(0, (1<<wid)-1)
- opi = randint(0, 1)
- op2 = randint(0, (1<<wid)-1)
- dst = randint(0, (1<<wid)-1)
- oper = randint(0, (1<<opwid)-1)
- imm = randint(0, (1<<wid)-1)
- res.append({'oper_i': oper, 'opim_i': opi,
- 'imm_i': imm, 'dest_i': dst,
- 'src1_i': op1, 'src2_i': op2})
- return res
-def test_iq():
- wid = 8
- opwid = 4
- qlen = 2
- n_in = 1
- n_out = 1
- dut = InstructionQ(wid, opwid, qlen, n_in, n_out)
- insns = mk_insns(1000, wid, opwid)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_iq.il", "w") as f:
- f.write(vl)
- test = IQSim(dut, insns, n_in, n_out)
- print (insns)
- run_simulation(dut, [test.rcv(), test.send()
- ],
- vcd_name="test_iq.vcd")
-if __name__ == '__main__':
- test_iq()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
-from regfile.regfile import RegFileArray, treereduce
-from scoreboard.global_pending import GlobalPending
-from scoreboard.group_picker import GroupPicker
-from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
-from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
-from scoreboard.memfu import MemFunctionUnits
-from nmutil.latch import SRLatch
-from nmutil.nmoperator import eq
-from random import randint, seed
-from copy import deepcopy
-from math import log
-class Memory(Elaboratable):
- def __init__(self, regwid, addrw):
- self.ddepth = regwid/8
- depth = (1<<addrw) / self.ddepth
- self.adr = Signal(addrw)
- self.dat_r = Signal(regwid)
- self.dat_w = Signal(regwid)
- self.we = Signal()
- self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
- def elaborate(self, platform):
- m = Module()
- m.submodules.rdport = rdport = self.mem.read_port()
- m.submodules.wrport = wrport = self.mem.write_port()
- m.d.comb += [
- rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
- self.dat_r.eq(rdport.data),
- wrport.addr.eq(self.adr),
- wrport.data.eq(self.dat_w),
- wrport.en.eq(self.we),
- ]
- return m
-class MemSim:
- def __init__(self, regwid, addrw):
- self.regwid = regwid
- self.ddepth = regwid//8
- depth = (1<<addrw) // self.ddepth
- self.mem = list(range(0, depth))
- def ld(self, addr):
- return self.mem[addr>>self.ddepth]
- def st(self, addr, data):
- self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
-class Scoreboard(Elaboratable):
- def __init__(self, rwid, n_regs):
- """ Inputs:
- * :rwid: bit width of register file(s) - both FP and INT
- * :n_regs: depth of register file(s) - number of FP and INT regs
- """
- self.rwid = rwid
- self.n_regs = n_regs
- # Register Files
- self.intregs = RegFileArray(rwid, n_regs)
- self.fpregs = RegFileArray(rwid, n_regs)
- # issue q needs to get at these
- self.aluissue = IssueUnitGroup(4)
- self.brissue = IssueUnitGroup(1)
- # and these
- self.alu_oper_i = Signal(4, reset_less=True)
- self.alu_imm_i = Signal(rwid, reset_less=True)
- self.br_oper_i = Signal(4, reset_less=True)
- self.br_imm_i = Signal(rwid, reset_less=True)
- # inputs
- self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
- self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
- self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
- self.reg_enable_i = Signal(reset_less=True) # enable reg decode
- # outputs
- self.issue_o = Signal(reset_less=True) # instruction was accepted
- self.busy_o = Signal(reset_less=True) # at least one CU is busy
- # for branch speculation experiment. branch_direction = 0 if
- # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
- # branch_succ and branch_fail are requests to have the current
- # instruction be dependent on the branch unit "shadow" capability.
- self.branch_succ_i = Signal(reset_less=True)
- self.branch_fail_i = Signal(reset_less=True)
- self.branch_direction_o = Signal(2, reset_less=True)
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- sync = m.d.sync
- m.submodules.intregs = self.intregs
- m.submodules.fpregs = self.fpregs
- # register ports
- int_dest = self.intregs.write_port("dest")
- int_src1 = self.intregs.read_port("src1")
- int_src2 = self.intregs.read_port("src2")
- fp_dest = self.fpregs.write_port("dest")
- fp_src1 = self.fpregs.read_port("src1")
- fp_src2 = self.fpregs.read_port("src2")
- # Int ALUs and Comp Units
- n_int_alus = 5
- cua = CompUnitALUs(self.rwid, 3)
- cub = CompUnitBR(self.rwid, 3)
- m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
- bgt = cub.bgt # get at the branch computation unit
- br1 = cub.br1
- # Int FUs
- m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
- # Count of number of FUs
- n_intfus = n_int_alus
- n_fp_fus = 0 # for now
- # Integer Priority Picker 1: Adder + Subtractor
- intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
- m.submodules.intpick1 = intpick1
- # INT/FP Issue Unit
- regdecode = RegDecode(self.n_regs)
- m.submodules.regdecode = regdecode
- issueunit = IssueUnitArray([self.aluissue, self.brissue])
- m.submodules.issueunit = issueunit
- # Shadow Matrix. currently n_intfus shadows, to be used for
- # write-after-write hazards. NOTE: there is one extra for branches,
- # so the shadow width is increased by 1
- m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
- m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
- # record previous instruction to cast shadow on current instruction
- prev_shadow = Signal(n_intfus)
- # Branch Speculation recorder. tracks the success/fail state as
- # each instruction is issued, so that when the branch occurs the
- # allow/cancel can be issued as appropriate.
- m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
- #---------
- # ok start wiring things together...
- # "now hear de word of de looord... dem bones dem bones dem dryy bones"
- # https://www.youtube.com/watch?v=pYb8Wm6-QfA
- #---------
- #---------
- # Issue Unit is where it starts. set up some in/outs for this module
- #---------
- comb += [ regdecode.dest_i.eq(self.int_dest_i),
- regdecode.src1_i.eq(self.int_src1_i),
- regdecode.src2_i.eq(self.int_src2_i),
- regdecode.enable_i.eq(self.reg_enable_i),
- self.issue_o.eq(issueunit.issue_o)
- ]
- # take these to outside (issue needs them)
- comb += cua.oper_i.eq(self.alu_oper_i)
- comb += cua.imm_i.eq(self.alu_imm_i)
- comb += cub.oper_i.eq(self.br_oper_i)
- comb += cub.imm_i.eq(self.br_imm_i)
- # TODO: issueunit.f (FP)
- # and int function issue / busy arrays, and dest/src1/src2
- comb += intfus.dest_i.eq(regdecode.dest_o)
- comb += intfus.src1_i.eq(regdecode.src1_o)
- comb += intfus.src2_i.eq(regdecode.src2_o)
- fn_issue_o = issueunit.fn_issue_o
- comb += intfus.fn_issue_i.eq(fn_issue_o)
- comb += issueunit.busy_i.eq(cu.busy_o)
- comb += self.busy_o.eq(cu.busy_o.bool())
- #---------
- # merge shadow matrices outputs
- #---------
- # these are explained in ShadowMatrix docstring, and are to be
- # connected to the FUReg and FUFU Matrices, to get them to reset
- anydie = Signal(n_intfus, reset_less=True)
- allshadown = Signal(n_intfus, reset_less=True)
- shreset = Signal(n_intfus, reset_less=True)
- comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
- comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
- comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
- #---------
- # connect fu-fu matrix
- #---------
- # Group Picker... done manually for now.
- go_rd_o = intpick1.go_rd_o
- go_wr_o = intpick1.go_wr_o
- go_rd_i = intfus.go_rd_i
- go_wr_i = intfus.go_wr_i
- go_die_i = intfus.go_die_i
- # NOTE: connect to the shadowed versions so that they can "die" (reset)
- comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
- comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
- comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
- # Connect Picker
- #---------
- comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
- comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
- int_rd_o = intfus.readable_o
- int_wr_o = intfus.writable_o
- comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
- comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
- #---------
- # Shadow Matrix
- #---------
- comb += shadows.issue_i.eq(fn_issue_o)
- #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
- comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
- #---------
- # NOTE; this setup is for the instruction order preservation...
- # connect shadows / go_dies to Computation Units
- comb += cu.shadown_i[0:n_intfus].eq(allshadown)
- comb += cu.go_die_i[0:n_intfus].eq(anydie)
- # ok connect first n_int_fu shadows to busy lines, to create an
- # instruction-order linked-list-like arrangement, using a bit-matrix
- # (instead of e.g. a ring buffer).
- # when written, the shadow can be cancelled (and was good)
- for i in range(n_intfus):
- comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
- # *previous* instruction shadows *current* instruction, and, obviously,
- # if the previous is completed (!busy) don't cast the shadow!
- comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
- for i in range(n_intfus):
- comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
- #---------
- # ... and this is for branch speculation. it uses the extra bit
- # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
- # only needs to set shadow_i, s_fail_i and s_good_i
- # issue captures shadow_i (if enabled)
- comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
- bactive = Signal(reset_less=True)
- comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
- # instruction being issued (fn_issue_o) has a shadow cast by the branch
- with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
- comb += bshadow.issue_i.eq(fn_issue_o)
- for i in range(n_intfus):
- with m.If(fn_issue_o & (Const(1<<i))):
- comb += bshadow.shadow_i[i][0].eq(1)
- # finally, we need an indicator to the test infrastructure as to
- # whether the branch succeeded or failed, plus, link up to the
- # "recorder" of whether the instruction was under shadow or not
- with m.If(br1.issue_i):
- sync += bspec.active_i.eq(1)
- with m.If(self.branch_succ_i):
- comb += bspec.good_i.eq(fn_issue_o & 0x1f)
- with m.If(self.branch_fail_i):
- comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
- # branch is active (TODO: a better signal: this is over-using the
- # go_write signal - actually the branch should not be "writing")
- with m.If(br1.go_wr_i):
- sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
- sync += bspec.active_i.eq(0)
- comb += bspec.br_i.eq(1)
- # branch occurs if data == 1, failed if data == 0
- comb += bspec.br_ok_i.eq(br1.data_o == 1)
- for i in range(n_intfus):
- # *expected* direction of the branch matched against *actual*
- comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
- # ... or it didn't
- comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
- #---------
- # Connect Register File(s)
- #---------
- comb += int_dest.wen.eq(intfus.dest_rsel_o)
- comb += int_src1.ren.eq(intfus.src1_rsel_o)
- comb += int_src2.ren.eq(intfus.src2_rsel_o)
- # connect ALUs to regfule
- comb += int_dest.data_i.eq(cu.data_o)
- comb += cu.src1_i.eq(int_src1.data_o)
- comb += cu.src2_i.eq(int_src2.data_o)
- # connect ALU Computation Units
- comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
- comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
- comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
- return m
- def __iter__(self):
- yield from self.intregs
- yield from self.fpregs
- yield self.int_dest_i
- yield self.int_src1_i
- yield self.int_src2_i
- yield self.issue_o
- yield self.branch_succ_i
- yield self.branch_fail_i
- yield self.branch_direction_o
- def ports(self):
- return list(self)
-def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
- yield from disable_issue(dut)
- yield dut.int_dest_i.eq(dest)
- yield dut.int_src1_i.eq(src1)
- yield dut.int_src2_i.eq(src2)
- if (op & (0x3<<2)) != 0: # branch
- yield dut.brissue.insn_i.eq(1)
- yield dut.br_oper_i.eq(Const(op & 0x3, 2))
- yield dut.br_imm_i.eq(imm)
- dut_issue = dut.brissue
- else:
- yield dut.aluissue.insn_i.eq(1)
- yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
- yield dut.alu_imm_i.eq(imm)
- dut_issue = dut.aluissue
- yield dut.reg_enable_i.eq(1)
- # these indicate that the instruction is to be made shadow-dependent on
- # (either) branch success or branch fail
- yield dut.branch_fail_i.eq(branch_fail)
- yield dut.branch_succ_i.eq(branch_success)
- yield
- yield from wait_for_issue(dut, dut_issue)
-def print_reg(dut, rnums):
- rs = []
- for rnum in rnums:
- reg = yield dut.intregs.regs[rnum].reg
- rs.append("%x" % reg)
- rnums = map(str, rnums)
- print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
-def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
- insts = []
- for i in range(n_ops):
- src1 = randint(1, dut.n_regs-1)
- src2 = randint(1, dut.n_regs-1)
- imm = randint(1, (1<<dut.rwid)-1)
- dest = randint(1, dut.n_regs-1)
- op = randint(0, max_opnums)
- opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
- if shadowing:
- insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
- else:
- insts.append((src1, src2, dest, op, opi, imm))
- return insts
-def scoreboard_sim(dut, alusim):
- seed(0)
- for i in range(50):
- # set random values in the registers
- for i in range(1, dut.n_regs):
- val = randint(0, (1<<alusim.rwidth)-1)
- #val = 31+i*3
- #val = i
- yield dut.intregs.regs[i].reg.eq(val)
- alusim.setval(i, val)
- # create some instructions (some random, some regression tests)
- instrs = []
- if True:
- instrs = create_random_ops(dut, 15, True, 4)
- if False:
- instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
- if False:
- instrs.append( (7, 3, 2, 4, (0, 0)) )
- instrs.append( (7, 6, 6, 2, (0, 0)) )
- instrs.append( (1, 7, 2, 2, (0, 0)) )
- if False:
- instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
- instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
- instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
- instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
- instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
- if False:
- instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
- instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
- instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
- if False:
- instrs.append((5, 6, 2, 1))
- instrs.append((2, 2, 4, 0))
- #instrs.append((2, 2, 3, 1))
- if False:
- instrs.append((2, 1, 2, 3))
- if False:
- instrs.append((2, 6, 2, 1))
- instrs.append((2, 1, 2, 0))
- if False:
- instrs.append((1, 2, 7, 2))
- instrs.append((7, 1, 5, 0))
- instrs.append((4, 4, 1, 1))
- if False:
- instrs.append((5, 6, 2, 2))
- instrs.append((1, 1, 4, 1))
- instrs.append((6, 5, 3, 0))
- if False:
- # Write-after-Write Hazard
- instrs.append( (3, 6, 7, 2) )
- instrs.append( (4, 4, 7, 1) )
- if False:
- # self-read/write-after-write followed by Read-after-Write
- instrs.append((1, 1, 1, 1))
- instrs.append((1, 5, 3, 0))
- if False:
- # Read-after-Write followed by self-read-after-write
- instrs.append((5, 6, 1, 2))
- instrs.append((1, 1, 1, 1))
- if False:
- # self-read-write sandwich
- instrs.append((5, 6, 1, 2))
- instrs.append((1, 1, 1, 1))
- instrs.append((1, 5, 3, 0))
- if False:
- # very weird failure
- instrs.append( (5, 2, 5, 2) )
- instrs.append( (2, 6, 3, 0) )
- instrs.append( (4, 2, 2, 1) )
- if False:
- v1 = 4
- yield dut.intregs.regs[5].reg.eq(v1)
- alusim.setval(5, v1)
- yield dut.intregs.regs[3].reg.eq(5)
- alusim.setval(3, 5)
- instrs.append((5, 3, 3, 4, (0, 0)))
- instrs.append((4, 2, 1, 2, (0, 1)))
- if False:
- v1 = 6
- yield dut.intregs.regs[5].reg.eq(v1)
- alusim.setval(5, v1)
- yield dut.intregs.regs[3].reg.eq(5)
- alusim.setval(3, 5)
- instrs.append((5, 3, 3, 4, (0, 0)))
- instrs.append((4, 2, 1, 2, (1, 0)))
- if False:
- instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
- instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
- instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
- instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
- instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
- instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
- instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
- instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
- instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
- # issue instruction(s), wait for issue to be free before proceeding
- for i, instr in enumerate(instrs):
- src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
- print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
- (i, src1, src2, dest, op, opi, imm))
- alusim.op(op, opi, imm, src1, src2, dest)
- yield from instr_q(dut, op, opi, imm, src1, src2, dest,
- br_ok, br_fail)
- # wait for all instructions to stop before checking
- while True:
- iqlen = yield dut.qlen_o
- if iqlen == 0:
- break
- yield
- yield
- yield
- yield
- yield
- yield from wait_for_busy_clear(dut)
- # check status
- yield from alusim.check(dut)
- yield from alusim.dump(dut)
-def test_scoreboard():
- dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
- alusim = RegSim(16, 8)
- memsim = MemSim(16, 16)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_scoreboard6600.il", "w") as f:
- f.write(vl)
- run_simulation(dut, scoreboard_sim(dut, alusim),
- vcd_name='test_scoreboard6600.vcd')
- #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
- # vcd_name='test_scoreboard6600.vcd')
-def mem_sim(dut):
- yield dut.ld_i.eq(0x1)
- yield dut.fn_issue_i.eq(0x1)
- yield
- yield dut.ld_i.eq(0x0)
- yield dut.st_i.eq(0x3)
- yield dut.fn_issue_i.eq(0x2)
- yield
- yield dut.st_i.eq(0x0)
- yield dut.fn_issue_i.eq(0x0)
- yield
- yield dut.addrs_i[0].eq(0x012)
- yield dut.addrs_i[1].eq(0x012)
- yield dut.addrs_i[2].eq(0x010)
- yield dut.addr_en_i.eq(0x3)
- yield
- yield dut.addr_we_i.eq(0x3)
- yield
- yield dut.go_ld_i.eq(0x1)
- yield
- yield dut.go_ld_i.eq(0x0)
- yield
- yield dut.go_st_i.eq(0x2)
- yield
- yield dut.go_st_i.eq(0x0)
- yield
-def test_mem_fus():
- dut = MemFunctionUnits(3, 11)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_mem_fus.il", "w") as f:
- f.write(vl)
- run_simulation(dut, mem_sim(dut),
- vcd_name='test_mem_fus.vcd')
-if __name__ == '__main__':
- test_mem_fus()
+++ /dev/null
-from nmigen.compat.sim import run_simulation
-from nmigen.cli import verilog, rtlil
-from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
-from regfile.regfile import RegFileArray, treereduce
-from scoreboard.ldst_matrix import LDSTDepMatrix
-from scoreboard.fu_mem_matrix import FUMemDepMatrix
-from scoreboard.global_pending import GlobalPending
-from scoreboard.group_picker import GroupPicker
-from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
-from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
-from nmutil.latch import SRLatch
-from nmutil.nmoperator import eq
-from random import randint, seed
-from copy import deepcopy
-from math import log
-class Memory(Elaboratable):
- def __init__(self, regwid, addrw):
- self.ddepth = regwid/8
- depth = (1<<addrw) / self.ddepth
- self.adr = Signal(addrw)
- self.dat_r = Signal(regwid)
- self.dat_w = Signal(regwid)
- self.we = Signal()
- self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
- def elaborate(self, platform):
- m = Module()
- m.submodules.rdport = rdport = self.mem.read_port()
- m.submodules.wrport = wrport = self.mem.write_port()
- m.d.comb += [
- rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
- self.dat_r.eq(rdport.data),
- wrport.addr.eq(self.adr),
- wrport.data.eq(self.dat_w),
- wrport.en.eq(self.we),
- ]
- return m
-class MemSim:
- def __init__(self, regwid, addrw):
- self.regwid = regwid
- self.ddepth = regwid//8
- depth = (1<<addrw) // self.ddepth
- self.mem = list(range(0, depth))
- def ld(self, addr):
- return self.mem[addr>>self.ddepth]
- def st(self, addr, data):
- self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
-class MemFunctionUnits(Elaboratable):
- def __init__(self, n_int_alus):
- self.n_int_alus = n_int_alus
- self.ld_i = Signal(n_int_alus, reset_less=True) # Dest R# in
- self.st_i = Signal(n_int_alus, reset_less=True) # oper1 R# in
- self.load_hit_i = Signal(n_int_alus, reset_less=True) # Load Hit
- self.stwd_hit_i = Signal(n_int_alus, reset_less=True) # Store Hit
- #self.g_int_st_pend_o = Signal(n_int_alus, reset_less=True)
- #self.g_int_ld_pend_o = Signal(n_int_alus, reset_less=True)
- #self.ld_rsel_o = Signal(n_int_alus, reset_less=True) # dest reg (bot)
- #self.st_rsel_o = Signal(n_int_alus, reset_less=True) # src1 reg (bot)
- self.req_rel_i = Signal(n_int_alus, reset_less = True)
- self.loadable_o = Signal(n_int_alus, reset_less=True)
- self.storable_o = Signal(n_int_alus, reset_less=True)
- self.go_st_i = Signal(n_int_alus, reset_less=True)
- self.go_ld_i = Signal(n_int_alus, reset_less=True)
- self.go_die_i = Signal(n_int_alus, reset_less=True)
- self.req_rel_o = Signal(n_int_alus, reset_less=True)
- self.fn_issue_i = Signal(n_int_alus, reset_less=True)
- # Note: FURegs ld_pend_o is also outputted from here, for use in WaWGrid
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- sync = m.d.sync
- n_intfus = self.n_int_alus
- # Integer LD/ST Dep Matrix
- ldstdeps = LDSTDepMatrix(n_intfus)
- m.submodules.ldstdeps = ldstdeps
- # Integer FU-Mem Dep Matrix
- fumemdeps = FUMemDepMatrix(n_intfus, n_intfus)
- m.submodules.fumemdeps = fumemdeps
- #comb += self.g_int_st_pend_o.eq(fumemdeps.v_st_rsel_o)
- #comb += self.g_int_ld_pend_o.eq(fumemdeps.v_ld_rsel_o)
- #comb += fumemdeps.st_pend_i.eq(fumemdeps.v_st_rsel_o)
- #comb += fumemdeps.ld_pend_i.eq(fumemdeps.v_ld_rsel_o)
- #comb += ldstdeps.st_pend_i.eq(fumemdeps.st_pend_o)
- #comb += ldstdeps.ld_pend_i.eq(fumemdeps.ld_pend_o)
- #self.ld_pend_o = fumemdeps.ld_pend_o # also output for use in WaWGrid
- comb += ldstdeps.ld_pend_i.eq(self.ld_i)
- comb += ldstdeps.st_pend_i.eq(self.st_i)
- comb += ldstdeps.issue_i.eq(self.fn_issue_i)
- comb += ldstdeps.load_hit_i.eq(self.load_hit_i)
- comb += ldstdeps.stwd_hit_i.eq(self.stwd_hit_i)
- comb += ldstdeps.go_die_i.eq(self.go_die_i)
- comb += self.storable_o.eq(fumemdeps.storable_o)
- comb += self.loadable_o.eq(fumemdeps.loadable_o)
- comb += fumemdeps.ld_pend_i.eq(ldstdeps.ld_hold_st_o)
- comb += fumemdeps.st_pend_i.eq(ldstdeps.st_hold_ld_o)
- # Connect function issue / arrays, and dest/src1/src2
- comb += fumemdeps.go_st_i.eq(self.stwd_hit_i)
- comb += fumemdeps.go_ld_i.eq(self.load_hit_i)
- comb += fumemdeps.go_die_i.eq(self.go_die_i)
- comb += fumemdeps.issue_i.eq(self.fn_issue_i)
- #comb += self.ld_rsel_o.eq(fumemdeps.ld_rsel_o)
- #comb += self.st_rsel_o.eq(fumemdeps.st_rsel_o)
- return m
- def __iter__(self):
- yield self.ld_i
- yield self.st_i
- #yield self.g_int_st_pend_o
- #yield self.g_int_ld_pend_o
- #yield self.ld_rsel_o
- #yield self.st_rsel_o
- yield self.req_rel_i
- yield self.loadable_o
- yield self.storable_o
- yield self.load_hit_i
- yield self.stwd_hit_i
- yield self.go_st_i
- yield self.go_ld_i
- yield self.go_die_i
- yield self.req_rel_o
- yield self.fn_issue_i
- def ports(self):
- return list(self)
-class Scoreboard(Elaboratable):
- def __init__(self, rwid, n_regs):
- """ Inputs:
- * :rwid: bit width of register file(s) - both FP and INT
- * :n_regs: depth of register file(s) - number of FP and INT regs
- """
- self.rwid = rwid
- self.n_regs = n_regs
- # Register Files
- self.intregs = RegFileArray(rwid, n_regs)
- self.fpregs = RegFileArray(rwid, n_regs)
- # issue q needs to get at these
- self.aluissue = IssueUnitGroup(4)
- self.brissue = IssueUnitGroup(1)
- # and these
- self.alu_oper_i = Signal(4, reset_less=True)
- self.alu_imm_i = Signal(rwid, reset_less=True)
- self.br_oper_i = Signal(4, reset_less=True)
- self.br_imm_i = Signal(rwid, reset_less=True)
- # inputs
- self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
- self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
- self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
- self.reg_enable_i = Signal(reset_less=True) # enable reg decode
- # outputs
- self.issue_o = Signal(reset_less=True) # instruction was accepted
- self.busy_o = Signal(reset_less=True) # at least one CU is busy
- # for branch speculation experiment. branch_direction = 0 if
- # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
- # branch_succ and branch_fail are requests to have the current
- # instruction be dependent on the branch unit "shadow" capability.
- self.branch_succ_i = Signal(reset_less=True)
- self.branch_fail_i = Signal(reset_less=True)
- self.branch_direction_o = Signal(2, reset_less=True)
- def elaborate(self, platform):
- m = Module()
- comb = m.d.comb
- sync = m.d.sync
- m.submodules.intregs = self.intregs
- m.submodules.fpregs = self.fpregs
- # register ports
- int_dest = self.intregs.write_port("dest")
- int_src1 = self.intregs.read_port("src1")
- int_src2 = self.intregs.read_port("src2")
- fp_dest = self.fpregs.write_port("dest")
- fp_src1 = self.fpregs.read_port("src1")
- fp_src2 = self.fpregs.read_port("src2")
- # Int ALUs and Comp Units
- n_int_alus = 5
- cua = CompUnitALUs(self.rwid, 3)
- cub = CompUnitBR(self.rwid, 3)
- m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
- bgt = cub.bgt # get at the branch computation unit
- br1 = cub.br1
- # Int FUs
- m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
- # Count of number of FUs
- n_intfus = n_int_alus
- n_fp_fus = 0 # for now
- # Integer Priority Picker 1: Adder + Subtractor
- intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
- m.submodules.intpick1 = intpick1
- # INT/FP Issue Unit
- regdecode = RegDecode(self.n_regs)
- m.submodules.regdecode = regdecode
- issueunit = IssueUnitArray([self.aluissue, self.brissue])
- m.submodules.issueunit = issueunit
- # Shadow Matrix. currently n_intfus shadows, to be used for
- # write-after-write hazards. NOTE: there is one extra for branches,
- # so the shadow width is increased by 1
- m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
- m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
- # record previous instruction to cast shadow on current instruction
- prev_shadow = Signal(n_intfus)
- # Branch Speculation recorder. tracks the success/fail state as
- # each instruction is issued, so that when the branch occurs the
- # allow/cancel can be issued as appropriate.
- m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
- #---------
- # ok start wiring things together...
- # "now hear de word of de looord... dem bones dem bones dem dryy bones"
- # https://www.youtube.com/watch?v=pYb8Wm6-QfA
- #---------
- #---------
- # Issue Unit is where it starts. set up some in/outs for this module
- #---------
- comb += [ regdecode.dest_i.eq(self.int_dest_i),
- regdecode.src1_i.eq(self.int_src1_i),
- regdecode.src2_i.eq(self.int_src2_i),
- regdecode.enable_i.eq(self.reg_enable_i),
- self.issue_o.eq(issueunit.issue_o)
- ]
- # take these to outside (issue needs them)
- comb += cua.oper_i.eq(self.alu_oper_i)
- comb += cua.imm_i.eq(self.alu_imm_i)
- comb += cub.oper_i.eq(self.br_oper_i)
- comb += cub.imm_i.eq(self.br_imm_i)
- # TODO: issueunit.f (FP)
- # and int function issue / busy arrays, and dest/src1/src2
- comb += intfus.dest_i.eq(regdecode.dest_o)
- comb += intfus.src1_i.eq(regdecode.src1_o)
- comb += intfus.src2_i.eq(regdecode.src2_o)
- fn_issue_o = issueunit.fn_issue_o
- comb += intfus.fn_issue_i.eq(fn_issue_o)
- comb += issueunit.busy_i.eq(cu.busy_o)
- comb += self.busy_o.eq(cu.busy_o.bool())
- #---------
- # merge shadow matrices outputs
- #---------
- # these are explained in ShadowMatrix docstring, and are to be
- # connected to the FUReg and FUFU Matrices, to get them to reset
- anydie = Signal(n_intfus, reset_less=True)
- allshadown = Signal(n_intfus, reset_less=True)
- shreset = Signal(n_intfus, reset_less=True)
- comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
- comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
- comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
- #---------
- # connect fu-fu matrix
- #---------
- # Group Picker... done manually for now.
- go_rd_o = intpick1.go_rd_o
- go_wr_o = intpick1.go_wr_o
- go_rd_i = intfus.go_rd_i
- go_wr_i = intfus.go_wr_i
- go_die_i = intfus.go_die_i
- # NOTE: connect to the shadowed versions so that they can "die" (reset)
- comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
- comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
- comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
- # Connect Picker
- #---------
- comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
- comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
- int_rd_o = intfus.readable_o
- int_wr_o = intfus.writable_o
- comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
- comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
- #---------
- # Shadow Matrix
- #---------
- comb += shadows.issue_i.eq(fn_issue_o)
- #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
- comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
- #---------
- # NOTE; this setup is for the instruction order preservation...
- # connect shadows / go_dies to Computation Units
- comb += cu.shadown_i[0:n_intfus].eq(allshadown)
- comb += cu.go_die_i[0:n_intfus].eq(anydie)
- # ok connect first n_int_fu shadows to busy lines, to create an
- # instruction-order linked-list-like arrangement, using a bit-matrix
- # (instead of e.g. a ring buffer).
- # when written, the shadow can be cancelled (and was good)
- for i in range(n_intfus):
- comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
- # *previous* instruction shadows *current* instruction, and, obviously,
- # if the previous is completed (!busy) don't cast the shadow!
- comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
- for i in range(n_intfus):
- comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
- #---------
- # ... and this is for branch speculation. it uses the extra bit
- # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
- # only needs to set shadow_i, s_fail_i and s_good_i
- # issue captures shadow_i (if enabled)
- comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
- bactive = Signal(reset_less=True)
- comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
- # instruction being issued (fn_issue_o) has a shadow cast by the branch
- with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
- comb += bshadow.issue_i.eq(fn_issue_o)
- for i in range(n_intfus):
- with m.If(fn_issue_o & (Const(1<<i))):
- comb += bshadow.shadow_i[i][0].eq(1)
- # finally, we need an indicator to the test infrastructure as to
- # whether the branch succeeded or failed, plus, link up to the
- # "recorder" of whether the instruction was under shadow or not
- with m.If(br1.issue_i):
- sync += bspec.active_i.eq(1)
- with m.If(self.branch_succ_i):
- comb += bspec.good_i.eq(fn_issue_o & 0x1f)
- with m.If(self.branch_fail_i):
- comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
- # branch is active (TODO: a better signal: this is over-using the
- # go_write signal - actually the branch should not be "writing")
- with m.If(br1.go_wr_i):
- sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
- sync += bspec.active_i.eq(0)
- comb += bspec.br_i.eq(1)
- # branch occurs if data == 1, failed if data == 0
- comb += bspec.br_ok_i.eq(br1.data_o == 1)
- for i in range(n_intfus):
- # *expected* direction of the branch matched against *actual*
- comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
- # ... or it didn't
- comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
- #---------
- # Connect Register File(s)
- #---------
- comb += int_dest.wen.eq(intfus.dest_rsel_o)
- comb += int_src1.ren.eq(intfus.src1_rsel_o)
- comb += int_src2.ren.eq(intfus.src2_rsel_o)
- # connect ALUs to regfule
- comb += int_dest.data_i.eq(cu.data_o)
- comb += cu.src1_i.eq(int_src1.data_o)
- comb += cu.src2_i.eq(int_src2.data_o)
- # connect ALU Computation Units
- comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
- comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
- comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
- return m
- def __iter__(self):
- yield from self.intregs
- yield from self.fpregs
- yield self.int_dest_i
- yield self.int_src1_i
- yield self.int_src2_i
- yield self.issue_o
- yield self.branch_succ_i
- yield self.branch_fail_i
- yield self.branch_direction_o
- def ports(self):
- return list(self)
-def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
- yield from disable_issue(dut)
- yield dut.int_dest_i.eq(dest)
- yield dut.int_src1_i.eq(src1)
- yield dut.int_src2_i.eq(src2)
- if (op & (0x3<<2)) != 0: # branch
- yield dut.brissue.insn_i.eq(1)
- yield dut.br_oper_i.eq(Const(op & 0x3, 2))
- yield dut.br_imm_i.eq(imm)
- dut_issue = dut.brissue
- else:
- yield dut.aluissue.insn_i.eq(1)
- yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
- yield dut.alu_imm_i.eq(imm)
- dut_issue = dut.aluissue
- yield dut.reg_enable_i.eq(1)
- # these indicate that the instruction is to be made shadow-dependent on
- # (either) branch success or branch fail
- yield dut.branch_fail_i.eq(branch_fail)
- yield dut.branch_succ_i.eq(branch_success)
- yield
- yield from wait_for_issue(dut, dut_issue)
-def print_reg(dut, rnums):
- rs = []
- for rnum in rnums:
- reg = yield dut.intregs.regs[rnum].reg
- rs.append("%x" % reg)
- rnums = map(str, rnums)
- print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
-def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
- insts = []
- for i in range(n_ops):
- src1 = randint(1, dut.n_regs-1)
- src2 = randint(1, dut.n_regs-1)
- imm = randint(1, (1<<dut.rwid)-1)
- dest = randint(1, dut.n_regs-1)
- op = randint(0, max_opnums)
- opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
- if shadowing:
- insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
- else:
- insts.append((src1, src2, dest, op, opi, imm))
- return insts
-def scoreboard_sim(dut, alusim):
- seed(0)
- for i in range(50):
- # set random values in the registers
- for i in range(1, dut.n_regs):
- val = randint(0, (1<<alusim.rwidth)-1)
- #val = 31+i*3
- #val = i
- yield dut.intregs.regs[i].reg.eq(val)
- alusim.setval(i, val)
- # create some instructions (some random, some regression tests)
- instrs = []
- if True:
- instrs = create_random_ops(dut, 15, True, 4)
- if False:
- instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
- if False:
- instrs.append( (7, 3, 2, 4, (0, 0)) )
- instrs.append( (7, 6, 6, 2, (0, 0)) )
- instrs.append( (1, 7, 2, 2, (0, 0)) )
- if False:
- instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
- instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
- instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
- instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
- instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
- if False:
- instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
- instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
- instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
- if False:
- instrs.append((5, 6, 2, 1))
- instrs.append((2, 2, 4, 0))
- #instrs.append((2, 2, 3, 1))
- if False:
- instrs.append((2, 1, 2, 3))
- if False:
- instrs.append((2, 6, 2, 1))
- instrs.append((2, 1, 2, 0))
- if False:
- instrs.append((1, 2, 7, 2))
- instrs.append((7, 1, 5, 0))
- instrs.append((4, 4, 1, 1))
- if False:
- instrs.append((5, 6, 2, 2))
- instrs.append((1, 1, 4, 1))
- instrs.append((6, 5, 3, 0))
- if False:
- # Write-after-Write Hazard
- instrs.append( (3, 6, 7, 2) )
- instrs.append( (4, 4, 7, 1) )
- if False:
- # self-read/write-after-write followed by Read-after-Write
- instrs.append((1, 1, 1, 1))
- instrs.append((1, 5, 3, 0))
- if False:
- # Read-after-Write followed by self-read-after-write
- instrs.append((5, 6, 1, 2))
- instrs.append((1, 1, 1, 1))
- if False:
- # self-read-write sandwich
- instrs.append((5, 6, 1, 2))
- instrs.append((1, 1, 1, 1))
- instrs.append((1, 5, 3, 0))
- if False:
- # very weird failure
- instrs.append( (5, 2, 5, 2) )
- instrs.append( (2, 6, 3, 0) )
- instrs.append( (4, 2, 2, 1) )
- if False:
- v1 = 4
- yield dut.intregs.regs[5].reg.eq(v1)
- alusim.setval(5, v1)
- yield dut.intregs.regs[3].reg.eq(5)
- alusim.setval(3, 5)
- instrs.append((5, 3, 3, 4, (0, 0)))
- instrs.append((4, 2, 1, 2, (0, 1)))
- if False:
- v1 = 6
- yield dut.intregs.regs[5].reg.eq(v1)
- alusim.setval(5, v1)
- yield dut.intregs.regs[3].reg.eq(5)
- alusim.setval(3, 5)
- instrs.append((5, 3, 3, 4, (0, 0)))
- instrs.append((4, 2, 1, 2, (1, 0)))
- if False:
- instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
- instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
- instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
- instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
- instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
- instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
- instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
- instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
- instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
- # issue instruction(s), wait for issue to be free before proceeding
- for i, instr in enumerate(instrs):
- src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
- print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
- (i, src1, src2, dest, op, opi, imm))
- alusim.op(op, opi, imm, src1, src2, dest)
- yield from instr_q(dut, op, opi, imm, src1, src2, dest,
- br_ok, br_fail)
- # wait for all instructions to stop before checking
- while True:
- iqlen = yield dut.qlen_o
- if iqlen == 0:
- break
- yield
- yield
- yield
- yield
- yield
- yield from wait_for_busy_clear(dut)
- # check status
- yield from alusim.check(dut)
- yield from alusim.dump(dut)
-def test_scoreboard():
- dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
- alusim = RegSim(16, 8)
- memsim = MemSim(16, 16)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_scoreboard6600.il", "w") as f:
- f.write(vl)
- run_simulation(dut, scoreboard_sim(dut, alusim),
- vcd_name='test_scoreboard6600.vcd')
- #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
- # vcd_name='test_scoreboard6600.vcd')
-def mem_sim(dut):
- yield dut.ld_i.eq(0x1)
- yield dut.fn_issue_i.eq(0x1)
- yield
- #yield dut.ld_i.eq(0x0)
- yield dut.st_i.eq(0x2)
- yield dut.fn_issue_i.eq(0x2)
- yield
- #yield dut.st_i.eq(0x0)
- yield dut.fn_issue_i.eq(0x0)
- yield
- yield dut.load_hit_i.eq(0x1)
- yield
- yield dut.load_hit_i.eq(0x0)
- yield
- yield dut.stwd_hit_i.eq(0x2)
- yield
- yield dut.stwd_hit_i.eq(0x0)
- yield
-def test_mem_fus():
- dut = MemFunctionUnits(3)
- vl = rtlil.convert(dut, ports=dut.ports())
- with open("test_mem_fus.il", "w") as f:
- f.write(vl)
- run_simulation(dut, mem_sim(dut),
- vcd_name='test_mem_fus.vcd')
-if __name__ == '__main__':
- test_mem_fus()
--- /dev/null
--- /dev/null
+from nmigen import Module, Signal, Elaboratable
+from nmigen.lib.coding import Encoder, PriorityEncoder
+class AddressEncoder(Elaboratable):
+ """Address Encoder
+ The purpose of this module is to take in a vector and
+ encode the bits that are one hot into an address. This module
+ combines both nmigen's Encoder and PriorityEncoder and will state
+ whether the input line has a single bit hot, multiple bits hot,
+ or no bits hot. The output line will always have the lowest value
+ address output.
+ Usage:
+ The output is valid when either single or multiple match is high.
+ Otherwise output is 0.
+ """
+ def __init__(self, width):
+ """ Arguments:
+ * width: The desired length of the input vector
+ """
+ # Internal
+ self.encoder = Encoder(width)
+ self.p_encoder = PriorityEncoder(width)
+ # Input
+ self.i = Signal(width)
+ # Output
+ self.single_match = Signal(1)
+ self.multiple_match = Signal(1)
+ self.o = Signal(max=width)
+ def elaborate(self, platform=None):
+ m = Module()
+ # Add internal submodules
+ m.submodules.encoder = self.encoder
+ m.submodules.p_encoder = self.p_encoder
+ m.d.comb += [
+ self.encoder.i.eq(self.i),
+ self.p_encoder.i.eq(self.i)
+ ]
+ # Steps:
+ # 1. check if the input vector is non-zero
+ # 2. if non-zero, check if single match or multiple match
+ # 3. set output line to be lowest value address output
+ # If the priority encoder recieves an input of 0
+ # If n is 1 then the output is not valid
+ with m.If(self.p_encoder.n):
+ m.d.comb += [
+ self.single_match.eq(0),
+ self.multiple_match.eq(0),
+ self.o.eq(0)
+ ]
+ # If the priority encoder recieves an input > 0
+ with m.Else():
+ # Multiple Match if encoder n is invalid
+ with m.If(self.encoder.n):
+ m.d.comb += [
+ self.single_match.eq(0),
+ self.multiple_match.eq(1)
+ ]
+ # Single Match if encoder n is valid
+ with m.Else():
+ m.d.comb += [
+ self.single_match.eq(1),
+ self.multiple_match.eq(0)
+ ]
+ # Always set output based on priority encoder output
+ m.d.comb += self.o.eq(self.p_encoder.o)
+ return m
--- /dev/null
+from nmigen import Array, Cat, Module, Signal, Elaboratable
+from nmigen.lib.coding import Decoder
+from nmigen.cli import main #, verilog
+from .CamEntry import CamEntry
+from .AddressEncoder import AddressEncoder
+class Cam(Elaboratable):
+ """ Content Addressable Memory (CAM)
+ The purpose of this module is to quickly look up whether an
+ entry exists given a data key.
+ This module will search for the given data in all internal entries
+ and output whether a single or multiple match was found.
+ If an single entry is found the address be returned and single_match
+ is set HIGH. If multiple entries are found the lowest address is
+ returned and multiple_match is set HIGH. If neither single_match or
+ multiple_match are HIGH this implies no match was found. To write
+ to the CAM set the address bus to the desired entry and set write_enable
+ HIGH. Entry managment should be performed one level above this block
+ as lookup is performed within.
+ Notes:
+ The read and write operations take one clock cycle to complete.
+ Currently the read_warning line is present for interfacing but
+ is not necessary for this design. This module is capable of writing
+ in the first cycle, reading on the second, and output the correct
+ address on the third.
+ """
+ def __init__(self, data_size, cam_size):
+ """ Arguments:
+ * data_size: (bits) The bit size of the data
+ * cam_size: (number) The number of entries in the CAM
+ """
+ # Internal
+ self.cam_size = cam_size
+ self.encoder = AddressEncoder(cam_size)
+ self.decoder = Decoder(cam_size)
+ self.entry_array = Array(CamEntry(data_size) for x in range(cam_size))
+ # Input
+ self.enable = Signal(1)
+ self.write_enable = Signal(1)
+ self.data_in = Signal(data_size) # The data to be written
+ self.data_mask = Signal(data_size) # mask for ternary writes
+ self.address_in = Signal(max=cam_size) # address of CAM Entry to write
+ # Output
+ self.read_warning = Signal(1) # High when a read interrupts a write
+ self.single_match = Signal(1) # High when there is only one match
+ self.multiple_match = Signal(1) # High when there at least two matches
+ self.match_address = Signal(max=cam_size) # The lowest address matched
+ def elaborate(self, platform=None):
+ m = Module()
+ # AddressEncoder for match types and output address
+ m.submodules.AddressEncoder = self.encoder
+ # Decoder is used to select which entry will be written to
+ m.submodules.Decoder = self.decoder
+ # CamEntry Array Submodules
+ # Note these area added anonymously
+ entry_array = self.entry_array
+ m.submodules += entry_array
+ # Decoder logic
+ m.d.comb += [
+ self.decoder.i.eq(self.address_in),
+ self.decoder.n.eq(0)
+ ]
+ encoder_vector = []
+ with m.If(self.enable):
+ # Set the key value for every CamEntry
+ for index in range(self.cam_size):
+ # Write Operation
+ with m.If(self.write_enable):
+ with m.If(self.decoder.o[index]):
+ m.d.comb += entry_array[index].command.eq(2)
+ with m.Else():
+ m.d.comb += entry_array[index].command.eq(0)
+ # Read Operation
+ with m.Else():
+ m.d.comb += entry_array[index].command.eq(1)
+ # Send data input to all entries
+ m.d.comb += entry_array[index].data_in.eq(self.data_in)
+ # Send all entry matches to encoder
+ ematch = entry_array[index].match
+ encoder_vector.append(ematch)
+ # Give input to and accept output from encoder module
+ m.d.comb += [
+ self.encoder.i.eq(Cat(*encoder_vector)),
+ self.single_match.eq(self.encoder.single_match),
+ self.multiple_match.eq(self.encoder.multiple_match),
+ self.match_address.eq(self.encoder.o)
+ ]
+ # If the CAM is not enabled set all outputs to 0
+ with m.Else():
+ m.d.comb += [
+ self.read_warning.eq(0),
+ self.single_match.eq(0),
+ self.multiple_match.eq(0),
+ self.match_address.eq(0)
+ ]
+ return m
+ def ports(self):
+ return [self.enable, self.write_enable,
+ self.data_in, self.data_mask,
+ self.read_warning, self.single_match,
+ self.multiple_match, self.match_address]
+if __name__ == '__main__':
+ cam = Cam(4, 4)
+ main(cam, ports=cam.ports())
--- /dev/null
+from nmigen import Module, Signal, Elaboratable
+class CamEntry(Elaboratable):
+ """ Content Addressable Memory (CAM) Entry
+ The purpose of this module is to represent an entry within a CAM.
+ This module when given a read command will compare the given data
+ and output whether a match was found or not. When given a write
+ command it will write the given data into internal registers.
+ """
+ def __init__(self, data_size):
+ """ Arguments:
+ * data_size: (bit count) The size of the data
+ """
+ # Input
+ self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset
+ self.data_in = Signal(data_size) # Data input when writing
+ # Output
+ self.match = Signal(1) # Result of the internal/input key comparison
+ self.data = Signal(data_size)
+ def elaborate(self, platform=None):
+ m = Module()
+ with m.Switch(self.command):
+ with m.Case("00"):
+ m.d.sync += self.match.eq(0)
+ with m.Case("01"):
+ with m.If(self.data == self.data_in):
+ m.d.sync += self.match.eq(1)
+ with m.Else():
+ m.d.sync += self.match.eq(0)
+ with m.Case("10"):
+ m.d.sync += [
+ self.data.eq(self.data_in),
+ self.match.eq(0)
+ ]
+ with m.Case():
+ m.d.sync += [
+ self.match.eq(0),
+ self.data.eq(0)
+ ]
+ return m
--- /dev/null
+# SPDX-License-Identifier: LGPL-2.1-or-later
+# See Notices.txt for copyright information
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+from nmigen.cli import verilog, rtlil
+class LFSRPolynomial(set):
+ """ implements a polynomial for use in LFSR
+ """
+ def __init__(self, exponents=()):
+ for e in exponents:
+ assert isinstance(e, int), TypeError("%s must be an int" % repr(e))
+ assert (e >= 0), ValueError("%d must not be negative" % e)
+ set.__init__(self, set(exponents).union({0})) # must contain zero
+ @property
+ def max_exponent(self):
+ return max(self) # derived from set, so this returns the max exponent
+ @property
+ def exponents(self):
+ exponents = list(self) # get elements of set as a list
+ exponents.sort(reverse=True)
+ return exponents
+ def __str__(self):
+ expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2)
+ retval = map(lambda i: expd[min(i,2)].format(i), self.exponents)
+ return " + ".join(retval)
+ def __repr__(self):
+ return "LFSRPolynomial(%s)" % self.exponents
+# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs # noqa
+LFSR_POLY_2 = LFSRPolynomial([2, 1, 0])
+LFSR_POLY_3 = LFSRPolynomial([3, 2, 0])
+LFSR_POLY_4 = LFSRPolynomial([4, 3, 0])
+LFSR_POLY_5 = LFSRPolynomial([5, 3, 0])
+LFSR_POLY_6 = LFSRPolynomial([6, 5, 0])
+LFSR_POLY_7 = LFSRPolynomial([7, 6, 0])
+LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0])
+LFSR_POLY_9 = LFSRPolynomial([9, 5, 0])
+LFSR_POLY_10 = LFSRPolynomial([10, 7, 0])
+LFSR_POLY_11 = LFSRPolynomial([11, 9, 0])
+LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0])
+LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0])
+LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0])
+LFSR_POLY_15 = LFSRPolynomial([15, 14, 0])
+LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0])
+LFSR_POLY_17 = LFSRPolynomial([17, 14, 0])
+LFSR_POLY_18 = LFSRPolynomial([18, 11, 0])
+LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0])
+LFSR_POLY_20 = LFSRPolynomial([20, 17, 0])
+LFSR_POLY_21 = LFSRPolynomial([21, 19, 0])
+LFSR_POLY_22 = LFSRPolynomial([22, 21, 0])
+LFSR_POLY_23 = LFSRPolynomial([23, 18, 0])
+LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0])
+class LFSR(LFSRPolynomial, Elaboratable):
+ """ implements a Linear Feedback Shift Register
+ """
+ def __init__(self, polynomial):
+ """ Inputs:
+ ------
+ :polynomial: the polynomial to feedback on. may be a LFSRPolynomial
+ instance or an iterable of ints (list/tuple/generator)
+ :enable: enable (set LO to disable. NOTE: defaults to HI)
+ Outputs:
+ -------
+ :state: the LFSR state. bitwidth is taken from the polynomial
+ maximum exponent.
+ Note: if an LFSRPolynomial is passed in as the input, because
+ LFSRPolynomial is derived from set() it's ok:
+ LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p)
+ """
+ LFSRPolynomial.__init__(self, polynomial)
+ self.state = Signal(self.max_exponent, reset=1)
+ self.enable = Signal(reset=1)
+ def elaborate(self, platform):
+ m = Module()
+ # do absolutely nothing if the polynomial is empty (always has a zero)
+ if self.max_exponent <= 1:
+ return m
+ # create XOR-bunch, select bits from state based on exponent
+ feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain)
+ for exponent in self:
+ if exponent > 0: # don't have to skip, saves CPU cycles though
+ feedback ^= self.state[exponent - 1]
+ # if enabled, shift-and-feedback
+ with m.If(self.enable):
+ # shift up lower bits by Cat'ing in a new bit zero (feedback)
+ newstate = Cat(feedback, self.state[:-1])
+ m.d.sync += self.state.eq(newstate)
+ return m
+# example: Poly24
+if __name__ == '__main__':
+ p24 = rtlil.convert(LFSR(LFSR_POLY_24))
+ with open("lfsr2_p24.il", "w") as f:
+ f.write(p24)
--- /dev/null
+# SPDX-License-Identifier: LGPL-2.1-or-later
+# See Notices.txt for copyright information
+from nmigen import Module
+from typing import Iterable, Optional, Iterator, Any, Union
+from typing_extensions import final
+class LFSRPolynomial(set):
+ def __init__(self, exponents: Iterable[int] = ()):
+ def elements() -> Iterable[int]: ...
+ @property
+ def exponents(self) -> list[int]: ...
+ def __str__(self) -> str: ...
+ def __repr__(self) -> str: ...
+class LFSR:
+ def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ...
+ @property
+ def width(self) -> int: ...
+ def elaborate(self, platform: Any) -> Module: ...
--- /dev/null
+ python3 Cam.py generate -t v > Cam.v
--- /dev/null
+from nmigen import Cat, Memory, Module, Signal, Elaboratable
+from nmigen.cli import main
+from nmigen.cli import verilog, rtlil
+class MemorySet(Elaboratable):
+ def __init__(self, data_size, tag_size, set_count, active):
+ self.active = active
+ input_size = tag_size + data_size # Size of the input data
+ memory_width = input_size + 1 # The width of the cache memory
+ self.active = active
+ self.data_size = data_size
+ self.tag_size = tag_size
+ # XXX TODO, use rd-enable and wr-enable?
+ self.mem = Memory(memory_width, set_count)
+ self.r = self.mem.read_port()
+ self.w = self.mem.write_port()
+ # inputs (address)
+ self.cset = Signal(max=set_count) # The set to be checked
+ self.tag = Signal(tag_size) # The tag to find
+ self.data_i = Signal(data_size) # Incoming data
+ # outputs
+ self.valid = Signal()
+ self.data_o = Signal(data_size) # Outgoing data (excludes tag)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.mem = self.mem
+ m.submodules.r = self.r
+ m.submodules.w = self.w
+ # temporaries
+ active_bit = Signal()
+ tag_valid = Signal()
+ data_start = self.active + 1
+ data_end = data_start + self.data_size
+ tag_start = data_end
+ tag_end = tag_start + self.tag_size
+ # connect the read port address to the set/entry
+ read_port = self.r
+ m.d.comb += read_port.addr.eq(self.cset)
+ # Pull out active bit from data
+ data = read_port.data
+ m.d.comb += active_bit.eq(data[self.active])
+ # Validate given tag vs stored tag
+ tag = data[tag_start:tag_end]
+ m.d.comb += tag_valid.eq(self.tag == tag)
+ # An entry is only valid if the tags match AND
+ # is marked as a valid entry
+ m.d.comb += self.valid.eq(tag_valid & active_bit)
+ # output data: TODO, check rd-enable?
+ m.d.comb += self.data_o.eq(data[data_start:data_end])
+ # connect the write port addr to the set/entry (only if write enabled)
+ # (which is only done on a match, see SAC.write_entry below)
+ write_port = self.w
+ with m.If(write_port.en):
+ m.d.comb += write_port.addr.eq(self.cset)
+ m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag))
+ return m
--- /dev/null
+from nmigen import Module, Signal, Elaboratable
+from nmigen.cli import main
+from TLB.PteEntry import PteEntry
+class PermissionValidator(Elaboratable):
+ """ The purpose of this Module is to check the Permissions of a given PTE
+ against the requested access permissions.
+ This module will either validate (by setting the valid bit HIGH)
+ the request or find a permission fault and invalidate (by setting
+ the valid bit LOW) the request
+ """
+ def __init__(self, asid_size, pte_size):
+ """ Arguments:
+ * asid_size: (bit count) The size of the asid to be processed
+ * pte_size: (bit count) The size of the pte to be processed
+ Return:
+ * valid HIGH when permissions are correct
+ """
+ # Internal
+ self.pte_entry = PteEntry(asid_size, pte_size)
+ # Input
+ self.data = Signal(asid_size + pte_size);
+ self.xwr = Signal(3) # Execute, Write, Read
+ self.super_mode = Signal(1) # Supervisor Mode
+ self.super_access = Signal(1) # Supervisor Access
+ self.asid = Signal(15) # Address Space IDentifier (ASID)
+ # Output
+ self.valid = Signal(1) # Denotes if the permissions are correct
+ def elaborate(self, platform=None):
+ m = Module()
+ m.submodules.pte_entry = self.pte_entry
+ m.d.comb += self.pte_entry.i.eq(self.data)
+ # Check if the entry is valid
+ with m.If(self.pte_entry.v):
+ # ASID match or Global Permission
+ # Note that the MSB bound is exclusive
+ with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g):
+ # Check Execute, Write, Read (XWR) Permissions
+ with m.If(self.pte_entry.xwr == self.xwr):
+ # Supervisor Logic
+ with m.If(self.super_mode):
+ # Valid if entry is not in user mode or supervisor
+ # has Supervisor User Memory (SUM) access via the
+ # SUM bit in the sstatus register
+ m.d.comb += self.valid.eq((~self.pte_entry.u) \
+ | self.super_access)
+ # User logic
+ with m.Else():
+ # Valid if the entry is in user mode only
+ m.d.comb += self.valid.eq(self.pte_entry.u)
+ with m.Else():
+ m.d.comb += self.valid.eq(0)
+ with m.Else():
+ m.d.comb += self.valid.eq(0)
+ with m.Else():
+ m.d.comb += self.valid.eq(0)
+ return m
--- /dev/null
+from nmigen import Module, Signal, Elaboratable
+from nmigen.cli import main
+class PteEntry(Elaboratable):
+ """ The purpose of this Module is to centralize the parsing of Page
+ Table Entries (PTE) into one module to prevent common mistakes
+ and duplication of code. The control bits are parsed out for
+ ease of use.
+ This module parses according to the standard PTE given by the
+ Volume II: RISC-V Privileged Architectures V1.10 Pg 60.
+ The Address Space IDentifier (ASID) is appended to the MSB of the input
+ and is parsed out as such.
+ An valid input Signal would be:
+ Bits:[78-64][63-0]
+ The output PTE value will include the control bits.
+ """
+ def __init__(self, asid_size, pte_size):
+ """ Arguments:
+ * asid_size: (bit count) The size of the asid to be processed
+ * pte_size: (bit count) The size of the pte to be processed
+ Return:
+ * d The Dirty bit from the PTE portion of i
+ * a The Accessed bit from the PTE portion of i
+ * g The Global bit from the PTE portion of i
+ * u The User Mode bit from the PTE portion of i
+ * xwr The Execute/Write/Read bit from the PTE portion of i
+ * v The Valid bit from the PTE portion of i
+ * asid The asid portion of i
+ * pte The pte portion of i
+ """
+ # Internal
+ self.asid_start = pte_size
+ self.asid_end = pte_size + asid_size
+ # Input
+ self.i = Signal(asid_size + pte_size)
+ # Output
+ self.d = Signal(1) # Dirty bit (From pte)
+ self.a = Signal(1) # Accessed bit (From pte)
+ self.g = Signal(1) # Global Access (From pte)
+ self.u = Signal(1) # User Mode (From pte)
+ self.xwr = Signal(3) # Execute Read Write (From pte)
+ self.v = Signal(1) # Valid (From pte)
+ self.asid = Signal(asid_size) # Associated Address Space IDentifier
+ self.pte = Signal(pte_size) # Full Page Table Entry
+ def elaborate(self, platform=None):
+ m = Module()
+ # Pull out all control bites from PTE
+ m.d.comb += [
+ self.d.eq(self.i[7]),
+ self.a.eq(self.i[6]),
+ self.g.eq(self.i[5]),
+ self.u.eq(self.i[4]),
+ self.xwr.eq(self.i[1:4]),
+ self.v.eq(self.i[0])
+ ]
+ m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end])
+ m.d.comb += self.pte.eq(self.i[0:self.asid_start])
+ return m
--- /dev/null
+Online simulator of 4-way set-associative cache:
+Python simulator of a N-way set-associative cache:
+from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable
+from nmigen.compat.genlib import fsm
+from nmigen.cli import main
+from nmigen.cli import verilog, rtlil
+from .AddressEncoder import AddressEncoder
+from .MemorySet import MemorySet
+# TODO: use a LFSR that advances continuously and picking the bottom
+# few bits from it to select which cache line to replace, instead of PLRU
+# http://bugs.libre-riscv.org/show_bug.cgi?id=71
+from .ariane.plru import PLRU
+from .LFSR import LFSR, LFSR_POLY_24
+SA_NA = "00" # no action (none)
+SA_RD = "01" # read
+SA_WR = "10" # write
+class SetAssociativeCache(Elaboratable):
+ """ Set Associative Cache Memory
+ The purpose of this module is to generate a memory cache given the
+ constraints passed in. This will create a n-way set associative cache.
+ It is expected for the SV TLB that the VMA will provide the set number
+ while the ASID provides the tag (still to be decided).
+ """
+ def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False):
+ """ Arguments
+ * tag_size (bits): The bit count of the tag
+ * data_size (bits): The bit count of the data to be stored
+ * set_count (number): The number of sets/entries in the cache
+ * way_count (number): The number of slots a data can be stored
+ in one set
+ * lfsr: if set, use an LFSR for (pseudo-randomly) selecting
+ set/entry to write to. otherwise, use a PLRU
+ """
+ # Internals
+ self.lfsr_mode = lfsr
+ self.way_count = way_count # The number of slots in one set
+ self.tag_size = tag_size # The bit count of the tag
+ self.data_size = data_size # The bit count of the data to be stored
+ # set up Memory array
+ self.mem_array = Array() # memory array
+ for i in range(way_count):
+ ms = MemorySet(data_size, tag_size, set_count, active=0)
+ self.mem_array.append(ms)
+ # Finds valid entries
+ self.encoder = AddressEncoder(way_count)
+ # setup PLRU or LFSR
+ if lfsr:
+ # LFSR mode
+ self.lfsr = LFSR(LFSR_POLY_24)
+ else:
+ # PLRU mode
+ self.plru = PLRU(way_count) # One block to handle plru calculations
+ self.plru_array = Array() # PLRU data on each set
+ for i in range(set_count):
+ name="plru%d" % i
+ self.plru_array.append(Signal(self.plru.TLBSZ, name=name))
+ # Input
+ self.enable = Signal(1) # Whether the cache is enabled
+ self.command = Signal(2) # 00=None, 01=Read, 10=Write (see SA_XX)
+ self.cset = Signal(max=set_count) # The set to be checked
+ self.tag = Signal(tag_size) # The tag to find
+ self.data_i = Signal(data_size) # The input data
+ # Output
+ self.ready = Signal(1) # 0 => Processing 1 => Ready for commands
+ self.hit = Signal(1) # Tag matched one way in the given set
+ self.multiple_hit = Signal(1) # Tag matched many ways in the given set
+ self.data_o = Signal(data_size) # The data linked to the matched tag
+ def check_tags(self, m):
+ """ Validate the tags in the selected set. If one and only one
+ tag matches set its state to zero and increment all others
+ by one. We only advance to next state if a single hit is found.
+ """
+ # Vector to store way valid results
+ # A zero denotes a way is invalid
+ valid_vector = []
+ # Loop through memory to prep read/write ports and set valid_vector
+ for i in range(self.way_count):
+ valid_vector.append(self.mem_array[i].valid)
+ # Pass encoder the valid vector
+ m.d.comb += self.encoder.i.eq(Cat(*valid_vector))
+ # Only one entry should be marked
+ # This is due to already verifying the tags
+ # matched and the valid bit is high
+ with m.If(self.hit):
+ m.next = "FINISHED_READ"
+ # Pull out data from the read port
+ data = self.mem_array[self.encoder.o].data_o
+ m.d.comb += self.data_o.eq(data)
+ if not self.lfsr_mode:
+ self.access_plru(m)
+ # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k
+ with m.Elif(self.multiple_hit):
+ # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
+ m.d.comb += self.data_o.eq(0)
+ # No tag matches means no data
+ with m.Else():
+ # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck
+ m.d.comb += self.data_o.eq(0)
+ def access_plru(self, m):
+ """ An entry was accessed and the plru tree must now be updated
+ """
+ # Pull out the set's entry being edited
+ plru_entry = self.plru_array[self.cset]
+ m.d.comb += [
+ # Set the plru data to the current state
+ self.plru.plru_tree.eq(plru_entry),
+ # Set that the cache was accessed
+ self.plru.lu_access_i.eq(1)
+ ]
+ def read(self, m):
+ """ Go through the read process of the cache.
+ This takes two cycles to complete. First it checks for a valid tag
+ and secondly it updates the LRU values.
+ """
+ with m.FSM() as fsm_read:
+ with m.State("READY"):
+ m.d.comb += self.ready.eq(0)
+ # check_tags will set the state if the conditions are met
+ self.check_tags(m)
+ with m.State("FINISHED_READ"):
+ m.next = "READY"
+ m.d.comb += self.ready.eq(1)
+ if not self.lfsr_mode:
+ plru_tree_o = self.plru.plru_tree_o
+ m.d.sync += self.plru_array[self.cset].eq(plru_tree_o)
+ def write_entry(self, m):
+ if not self.lfsr_mode:
+ m.d.comb += [# set cset (mem address) into PLRU
+ self.plru.plru_tree.eq(self.plru_array[self.cset]),
+ # and connect plru to encoder for write
+ self.encoder.i.eq(self.plru.replace_en_o)
+ ]
+ write_port = self.mem_array[self.encoder.o].w
+ else:
+ # use the LFSR to generate a random(ish) one of the mem array
+ lfsr_output = Signal(max=self.way_count)
+ lfsr_random = Signal(max=self.way_count)
+ m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits
+ # address too big, limit to range of array
+ m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count,
+ lfsr_output - self.way_count,
+ lfsr_output))
+ write_port = self.mem_array[lfsr_random].w
+ # then if there is a match from the encoder, enable the selected write
+ with m.If(self.encoder.single_match):
+ m.d.comb += write_port.en.eq(1)
+ def write(self, m):
+ """ Go through the write process of the cache.
+ This takes two cycles to complete. First it writes the entry,
+ and secondly it updates the PLRU (in plru mode)
+ """
+ with m.FSM() as fsm_write:
+ with m.State("READY"):
+ m.d.comb += self.ready.eq(0)
+ self.write_entry(m)
+ m.next ="FINISHED_WRITE"
+ with m.State("FINISHED_WRITE"):
+ m.d.comb += self.ready.eq(1)
+ if not self.lfsr_mode:
+ plru_entry = self.plru_array[self.cset]
+ m.d.sync += plru_entry.eq(self.plru.plru_tree_o)
+ m.next = "READY"
+ def elaborate(self, platform=None):
+ m = Module()
+ # ----
+ # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array
+ # ----
+ m.submodules.AddressEncoder = self.encoder
+ if self.lfsr_mode:
+ m.submodules.LFSR = self.lfsr
+ else:
+ m.submodules.PLRU = self.plru
+ for i, mem in enumerate(self.mem_array):
+ setattr(m.submodules, "mem%d" % i, mem)
+ # ----
+ # select mode: PLRU connect to encoder, LFSR do... something
+ # ----
+ if not self.lfsr_mode:
+ # Set what entry was hit
+ m.d.comb += self.plru.lu_hit.eq(self.encoder.o)
+ else:
+ # enable LFSR
+ m.d.comb += self.lfsr.enable.eq(self.enable)
+ # ----
+ # connect hit/multiple hit to encoder output
+ # ----
+ m.d.comb += [
+ self.hit.eq(self.encoder.single_match),
+ self.multiple_hit.eq(self.encoder.multiple_match),
+ ]
+ # ----
+ # connect incoming data/tag/cset(addr) to mem_array
+ # ----
+ for mem in self.mem_array:
+ write_port = mem.w
+ m.d.comb += [mem.cset.eq(self.cset),
+ mem.tag.eq(self.tag),
+ mem.data_i.eq(self.data_i),
+ write_port.en.eq(0), # default: disable write
+ ]
+ # ----
+ # Commands: READ/WRITE/TODO
+ # ----
+ with m.If(self.enable):
+ with m.Switch(self.command):
+ # Search all sets at a particular tag
+ with m.Case(SA_RD):
+ self.read(m)
+ with m.Case(SA_WR):
+ self.write(m)
+ # Maybe catch multiple tags write here?
+ # TODO
+ # TODO: invalidate/flush, flush-all?
+ return m
+ def ports(self):
+ return [self.enable, self.command, self.cset, self.tag, self.data_i,
+ self.ready, self.hit, self.multiple_hit, self.data_o]
+if __name__ == '__main__':
+ sac = SetAssociativeCache(4, 8, 4, 6)
+ vl = rtlil.convert(sac, ports=sac.ports())
+ with open("SetAssociativeCache.il", "w") as f:
+ f.write(vl)
+ sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True)
+ vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports())
+ with open("SetAssociativeCacheLFSR.il", "w") as f:
+ f.write(vl)
--- /dev/null
+""" TLB Module
+ The expected form of the data is:
+ * Item (Bits)
+ * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0)
+from nmigen import Memory, Module, Signal, Cat, Elaboratable
+from nmigen.cli import main
+from .PermissionValidator import PermissionValidator
+from .Cam import Cam
+class TLB(Elaboratable):
+ def __init__(self, asid_size, vma_size, pte_size, L1_size):
+ """ Arguments
+ * asid_size: Address Space IDentifier (ASID) typically 15 bits
+ * vma_size: Virtual Memory Address (VMA) typically 36 bits
+ * pte_size: Page Table Entry (PTE) typically 64 bits
+ Notes:
+ These arguments should represent the largest possible size
+ defined by the MODE settings. See
+ Volume II: RISC-V Privileged Architectures V1.10 Page 57
+ """
+ # Internal
+ self.state = 0
+ # L1 Cache Modules
+ self.cam_L1 = Cam(vma_size, L1_size)
+ self.mem_L1 = Memory(asid_size + pte_size, L1_size)
+ # Permission Validator
+ self.perm_validator = PermissionValidator(asid_size, pte_size)
+ # Inputs
+ self.supermode = Signal(1) # Supervisor Mode
+ self.super_access = Signal(1) # Supervisor Access
+ self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2
+ self.xwr = Signal(3) # Execute, Write, Read
+ self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64
+ self.address_L1 = Signal(max=L1_size)
+ self.asid = Signal(asid_size) # Address Space IDentifier (ASID)
+ self.vma = Signal(vma_size) # Virtual Memory Address (VMA)
+ self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE)
+ # Outputs
+ self.hit = Signal(1) # Denotes if the VMA had a mapped PTE
+ self.perm_valid = Signal(1) # Denotes if the permissions are correct
+ self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA
+ def search(self, m, read_L1, write_L1):
+ """ searches the TLB
+ """
+ m.d.comb += [
+ write_L1.en.eq(0),
+ self.cam_L1.write_enable.eq(0),
+ self.cam_L1.data_in.eq(self.vma)
+ ]
+ # Match found in L1 CAM
+ match_found = Signal(reset_less=True)
+ m.d.comb += match_found.eq(self.cam_L1.single_match
+ | self.cam_L1.multiple_match)
+ with m.If(match_found):
+ # Memory shortcut variables
+ mem_address = self.cam_L1.match_address
+ # Memory Logic
+ m.d.comb += read_L1.addr.eq(mem_address)
+ # Permission Validator Logic
+ m.d.comb += [
+ self.hit.eq(1),
+ # Set permission validator data to the correct
+ # register file data according to CAM match
+ # address
+ self.perm_validator.data.eq(read_L1.data),
+ # Execute, Read, Write
+ self.perm_validator.xwr.eq(self.xwr),
+ # Supervisor Mode
+ self.perm_validator.super_mode.eq(self.supermode),
+ # Supverisor Access
+ self.perm_validator.super_access.eq(self.super_access),
+ # Address Space IDentifier (ASID)
+ self.perm_validator.asid.eq(self.asid),
+ # Output result of permission validation
+ self.perm_valid.eq(self.perm_validator.valid)
+ ]
+ # Only output PTE if permissions are valid
+ with m.If(self.perm_validator.valid):
+ # XXX TODO - dummy for now
+ reg_data = Signal.like(self.pte_out)
+ m.d.comb += [
+ self.pte_out.eq(reg_data)
+ ]
+ with m.Else():
+ m.d.comb += [
+ self.pte_out.eq(0)
+ ]
+ # Miss Logic
+ with m.Else():
+ m.d.comb += [
+ self.hit.eq(0),
+ self.perm_valid.eq(0),
+ self.pte_out.eq(0)
+ ]
+ def write_l1(self, m, read_L1, write_L1):
+ """ writes to the L1 cache
+ """
+ # Memory_L1 Logic
+ m.d.comb += [
+ write_L1.en.eq(1),
+ write_L1.addr.eq(self.address_L1),
+ # The Cat places arguments from LSB -> MSB
+ write_L1.data.eq(Cat(self.pte_in, self.asid))
+ ]
+ # CAM_L1 Logic
+ m.d.comb += [
+ self.cam_L1.write_enable.eq(1),
+ self.cam_L1.data_in.eq(self.vma), #data_in is sent to all entries
+ # self.cam_L1.address_in.eq(todo) # a CAM entry needs to be selected
+ ]
+ def elaborate(self, platform):
+ m = Module()
+ # Add submodules
+ # Submodules for L1 Cache
+ m.submodules.cam_L1 = self.cam_L1
+ m.submodules.read_L1 = read_L1 = self.mem_L1.read_port()
+ m.submodules.write_L1 = write_L1 = self.mem_L1.write_port()
+ # Permission Validator Submodule
+ m.submodules.perm_valididator = self.perm_validator
+ # When MODE specifies translation
+ # TODO add in different bit length handling ie prefix 0s
+ tlb_enable = Signal(reset_less=True)
+ m.d.comb += tlb_enable.eq(self.mode != 0)
+ with m.If(tlb_enable):
+ m.d.comb += [
+ self.cam_L1.enable.eq(1)
+ ]
+ with m.Switch(self.command):
+ # Search
+ with m.Case("01"):
+ self.search(m, read_L1, write_L1)
+ # Write L1
+ # Expected that the miss will be handled in software
+ with m.Case("10"):
+ self.write_l1(m, read_L1, write_L1)
+ # TODO
+ #with m.Case("11"):
+ # When disabled
+ with m.Else():
+ m.d.comb += [
+ self.cam_L1.enable.eq(0),
+ # XXX TODO - self.reg_file.enable.eq(0),
+ self.hit.eq(0),
+ self.perm_valid.eq(0), # XXX TODO, check this
+ self.pte_out.eq(0)
+ ]
+ return m
+if __name__ == '__main__':
+ tlb = TLB(15, 36, 64, 4)
+ main(tlb, ports=[ tlb.supermode, tlb.super_access, tlb.command,
+ tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid,
+ tlb.vma, tlb.pte_in,
+ tlb.hit, tlb.perm_valid, tlb.pte_out,
+ ] + tlb.cam_L1.ports())
--- /dev/null
+#include <cstdint>
+#include <iostream>
+#include <cmath>
+#define NWAY 4
+#define NLINE 256
+#define HIT 0
+#define MISS 1
+#define MS 1000
+Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing
+Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt
+four-way set associative - three bits
+ each bit represents one branch point in a binary decision tree; let 1
+ represent that the left side has been referenced more recently than the
+ right side, and 0 vice-versa
+ are all 4 lines valid?
+ / \
+ yes no, use an invalid line
+ |
+ |
+ |
+ bit_0 == 0? state | replace ref to | next state
+ / \ ------+-------- -------+-----------
+ y n 00x | line_0 line_0 | 11_
+ / \ 01x | line_1 line_1 | 10_
+ bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
+ / \ / \ 1x1 | line_3 line_3 | 0_0
+ y n y n
+ / \ / \ ('x' means ('_' means unchanged)
+ line_0 line_1 line_2 line_3 don't care)
+ 8-way set associative - 7 = 1+2+4 bits
+16-way set associative - 15 = 1+2+4+8 bits
+32-way set associative - 31 = 1+2+4+8+16 bits
+64-way set associative - 63 = 1+2+4+8+16+32 bits
+using namespace std;
+struct AddressField {
+ uint64_t wd_idx : 2;//Unused
+ uint64_t offset : 4;//Unused
+ uint64_t index : 8;//NLINE = 256 = 2^8
+ uint64_t tag : 50;
+union Address {
+ uint32_t* p;
+ AddressField fields;
+struct Cell {
+ bool v;
+ uint64_t tag;
+ Cell() : v(false), tag(0) {}
+ bool isHit(uint64_t tag) {
+ return v && (tag == this->tag);
+ }
+ void fetch(uint32_t* address) {
+ Address addr;
+ addr.p = address;
+ addr.fields.offset = 0;
+ addr.fields.wd_idx = 0;
+ tag = addr.fields.tag;
+ v = true;
+ }
+ostream& operator<<(ostream & out, const Cell& cell) {
+ out << " v:" << cell.v << " tag:" << hex << cell.tag;
+ return out;
+struct Block {
+ Cell cell[NWAY];
+ uint32_t state;
+ uint64_t *mask;//Mask the state to get accurate value for specified 1 bit.
+ uint64_t *value;
+ uint64_t *next_value;
+ Block() : state(0) {
+ switch (NWAY) {
+ case 4:
+ mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101};
+ value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101};
+ next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000};
+ break;
+ case 8:
+ mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001,
+ 0b1010001};
+ value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000,
+ 0b1010001};
+ next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000,
+ 0b0000001, 0b0000000};
+ break;
+ //TODO - more NWAY goes here.
+ default:
+ std::cout << "Error definition NWAY = " << NWAY << std::endl;
+ }
+ }
+ uint32_t *getByTag(uint64_t tag, uint32_t *pway) {
+ for (int i = 0; i < NWAY; ++i) {
+ if (cell[i].isHit(tag)) {
+ *pway = i;
+ return pway;
+ }
+ }
+ return NULL;
+ }
+ void setLRU(uint32_t *address) {
+ int way = 0;
+ uint32_t st = state;
+ for (int i = 0; i < NWAY; ++i) {
+ if ((state & mask[i]) == value[i]) {
+ state ^= mask[i];
+ way = i;
+ break;
+ }
+ }
+ cell[way].fetch(address);
+ cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl;
+ }
+ uint32_t *get(uint32_t *address, uint32_t *pway) {
+ Address addr;
+ addr.p = address;
+ uint32_t *d = getByTag(addr.fields.tag, pway);
+ if (d != NULL) {
+ return &d[addr.fields.offset];
+ }
+ return d;
+ }
+ int set(uint32_t *address) {
+ uint32_t way = 0;
+ uint32_t *p = get(address, &way);
+ if (p != NULL) {
+ printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state);
+ state &= ~mask[way];
+ printf("%X --> ", state);
+ state |= next_value[way];
+ printf("%X\n", state);
+ // *p = *address; //skip since address is fake.
+ return HIT;
+ } else {
+ setLRU(address);
+ return MISS;
+ }
+ }
+ostream& operator<<(ostream & out, const Block& block) {
+ out << "state:" << block.state << " ";
+ for (int i = 0; i<NWAY; i++) {
+ out << block.cell[i];
+ }
+ return out;
+struct Cache {
+ Block block[NLINE];
+ uint32_t count[2];
+ Cache() { count[HIT] = 0; count[MISS] = 0; }
+ void access(uint32_t* address) {
+ Address addr;
+ addr.p = address;
+ Block& b = block[addr.fields.index];
+ ++count[b.set(address)];
+ }
+ostream& operator<<(ostream & out, const Cache& cache) {
+ out << "\n==Summary==\n\tHit: " << cache.count[HIT] << " Miss: " << cache.count[MISS] << std::endl;
+ for (int i = 0; i < NLINE; i++) {
+ out << cache.block[i] << endl;
+ }
+ return out;
+Cache cache;
+void multiply(uint32_t* m1, uint32_t* m2, uint32_t* res)
+ int x, i, j;
+ for (i = 0; i < MS; i++) {
+ for (j = 0; j < MS; j++) {
+ cache.access(res + i*MS +j);
+ for (x = 0; x < MS; x++) {
+ cache.access(m1 + i*MS + x);
+ cache.access(m2 + x*MS + j);
+ cache.access(res + i*MS +j);
+ // res[i][j] += m1[i][x] * m2[x][j];
+ cache.access(res + i*MS +j);
+ }
+ }
+ }
+int main()
+ uint32_t* m1 = (uint32_t*) 0xFACE00A000000000LL; // fake virtual address; don’t access it
+ uint32_t* m2 = (uint32_t*) 0xFACE00B000000000LL; // fake virtual address; don’t access it
+ uint32_t* res = (uint32_t*) 0xFACE00C000000000LL; // fake virtual address; don’t access it
+ multiply(m1, m2, res);
+ cout << cache << endl;
+ return 0;
--- /dev/null
+from nmigen import Const
+INSTR_ACCESS_FAULT = Const(1, 64)
+ILLEGAL_INSTR = Const(2, 64)
+BREAKPOINT = Const(3, 64)
+LD_ADDR_MISALIGNED = Const(4, 64)
+LD_ACCESS_FAULT = Const(5, 64)
+ST_ADDR_MISALIGNED = Const(6, 64)
+ST_ACCESS_FAULT = Const(7, 64)
+ENV_CALL_UMODE = Const(8, 64) # environment call from user mode
+ENV_CALL_SMODE = Const(9, 64) # environment call from supervisor mode
+ENV_CALL_MMODE = Const(11, 64) # environment call from machine mode
+INSTR_PAGE_FAULT = Const(12, 64) # Instruction page fault
+LOAD_PAGE_FAULT = Const(13, 64) # Load page fault
+STORE_PAGE_FAULT = Const(15, 64) # Store page fault
--- /dev/null
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+# Author: Florian Zaruba, ETH Zurich
+# Date: 12.11.2017
+# Description: Handles cache misses.
+from nmigen.lib.coding import Encoder, PriorityEncoder
+# --------------
+# MISS Handler
+# --------------
+import ariane_pkg::*;
+import std_cache_pkg::*;
+unsigned NR_PORTS = 3
+class MissReq(RecordObject):
+ def __init__(self, name=None):
+ Record.__init__(self, name)
+ self.valid = Signal()
+ self.addr = Signal(64)
+ self.be = Signal(8)
+ self.size = Signal(2)
+ self.we = Signal()
+ self.wdata = Signal(64)
+ bypass = Signal()
+class CacheLine:
+ def __init__(self):
+ self.tag = Signal(DCACHE_TAG_WIDTH) # tag array
+ self.data = Signal(DCACHE_LINE_WIDTH) # data array
+ self.valid = Signal() # state array
+ self.dirty = Signal() # state array
+# cache line byte enable
+class CLBE:
+ def __init__(self):
+ self.tag = Signal(DCACHE_TAG_WIDTH+7)//8) # byte enable into tag array
+ self.data = Signal(DCACHE_LINE_WIDTH+7)//8) # byte enable data array
+ # bit enable into state array (valid for a pair of dirty/valid bits)
+ self.vldrty = Signal(DCACHE_SET_ASSOC)
+ } cl_be_t;
+ # FSM states
+ enum logic [3:0] {
+ IDLE, # 0
+ FLUSH, # 2
+ MISS, # 7
+ MISS_REPL, # 9
+ INIT, # B
+ } state_d, state_q;
+class MissHandler(Elaboratable):
+ def __init__(self, NR_PORTS):
+ self.pwid = pwid = ceil(log(NR_PORTS) / log(2))
+ self.flush_i = Signal() # flush request
+ self.flush_ack_o = Signal() # acknowledge successful flush
+ self.miss_o = Signal()
+ self.busy_i = Signal() # dcache is busy with something
+ # Bypass or miss
+ self.miss_req_i = Array(MissReq(name="missreq") for i in range(NR_PORTS))
+ # Bypass handling
+ self.bypass_gnt_o = Signal(NR_PORTS)
+ self.bypass_valid_o = Signal(NR_PORTS)
+ self.bypass_data_o = Array(Signal(name="bdata_o", 64) \
+ for i in range(NR_PORTS))
+ # AXI port
+ output ariane_axi::req_t axi_bypass_o,
+ input ariane_axi::resp_t axi_bypass_i,
+ # Miss handling (~> cacheline refill)
+ self.miss_gnt_o = Signal(NR_PORTS)
+ self.active_serving_o = Signal(NR_PORTS)
+ self.critical_word_o = Signal(64)
+ self.critical_word_valid_o = Signal()
+ output ariane_axi::req_t axi_data_o,
+ input ariane_axi::resp_t axi_data_i,
+ self.mshr_addr_i = Array(Signal(name="bdata_o", 56) \
+ for i in range(NR_PORTS))
+ self.mshr_addr_matches_o = Signal(NR_PORTS)
+ self.mshr_index_matches_o = Signal(NR_PORTS)
+ # AMO
+ self.amo_req_i = AMOReq()
+ self.amo_resp_o = AMOResp()
+ # Port to SRAMs, for refill and eviction
+ self.req_o = Signal(DCACHE_SET_ASSOC)
+ self.addr_o = Signal(DCACHE_INDEX_WIDTH) # address into cache array
+ self.data_o = CacheLine()
+ self.be_o = CLBE()
+ self.data_i = Array(CacheLine() \
+ for i in range(DCACHE_SET_ASSOC))
+ self.we_o = Signal()
+ def elaborate(self, platform):
+ # Registers
+ mshr_t mshr_d, mshr_q;
+ logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q;
+ logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q;
+ # cache line to evict
+ cache_line_t evict_cl_d, evict_cl_q;
+ logic serve_amo_d, serve_amo_q;
+ # Request from one FSM
+ miss_req_valid = Signal(self.NR_PORTS)
+ miss_req_bypass = Signal(self.NR_PORTS)
+ miss_req_addr = Array(Signal(name="miss_req_addr", 64) \
+ for i in range(NR_PORTS))
+ miss_req_wdata = Array(Signal(name="miss_req_wdata", 64) \
+ for i in range(NR_PORTS))
+ miss_req_we = Signal(self.NR_PORTS)
+ miss_req_be = Array(Signal(name="miss_req_be", 8) \
+ for i in range(NR_PORTS))
+ miss_req_size = Array(Signal(name="miss_req_size", 2) \
+ for i in range(NR_PORTS))
+ # Cache Line Refill <-> AXI
+ req_fsm_miss_valid = Signal()
+ req_fsm_miss_addr = Signal(64)
+ req_fsm_miss_wdata = Signal(DCACHE_LINE_WIDTH)
+ req_fsm_miss_we = Signal()
+ req_fsm_miss_be = Signal(DCACHE_LINE_WIDTH//8)
+ ariane_axi::ad_req_t req_fsm_miss_req;
+ req_fsm_miss_size = Signal(2)
+ gnt_miss_fsm = Signal()
+ valid_miss_fsm = Signal()
+ nmiss = DCACHE_LINE_WIDTH//64
+ data_miss_fsm = Array(Signal(name="data_miss_fsm", 64) \
+ for i in range(nmiss))
+ # Cache Management <-> LFSR
+ lfsr_enable = Signal()
+ lfsr_oh = Signal(DCACHE_SET_ASSOC)
+ lfsr_bin = Signal($clog2(DCACHE_SET_ASSOC-1))
+ # AMOs
+ ariane_pkg::amo_t amo_op;
+ amo_operand_a = Signal(64)
+ amo_operand_b = Signal(64)
+ amo_result_o = Signal(64)
+ struct packed {
+ logic [63:3] address;
+ logic valid;
+ } reservation_d, reservation_q;
+ # ------------------------------
+ # Cache Management
+ # ------------------------------
+ evict_way = Signal(DCACHE_SET_ASSOC)
+ valid_way = Signal(DCACHE_SET_ASSOC)
+ for (i in range(DCACHE_SET_ASSOC):
+ comb += evict_way[i].eq(data_i[i].valid & data_i[i].dirty)
+ comb += valid_way[i].eq(data_i[i].valid)
+ # ----------------------
+ # Default Assignments
+ # ----------------------
+ # to AXI refill
+ req_fsm_miss_req = ariane_axi::CACHE_LINE_REQ;
+ req_fsm_miss_size = Const(0b11, 2)
+ # core
+ serve_amo_d = serve_amo_q;
+ # --------------------------------
+ # Flush and Miss operation
+ # --------------------------------
+ state_d = state_q;
+ cnt_d = cnt_q;
+ evict_way_d = evict_way_q;
+ evict_cl_d = evict_cl_q;
+ mshr_d = mshr_q;
+ # communicate to the requester which unit we are currently serving
+ active_serving_o[mshr_q.id] = mshr_q.valid;
+ # AMOs
+ # silence the unit when not used
+ amo_op = amo_req_i.amo_op;
+ reservation_d = reservation_q;
+ with m.FSM() as state_q:
+ with m.Case("IDLE"):
+ # lowest priority are AMOs, wait until everything else
+ # is served before going for the AMOs
+ with m.If (amo_req_i.req & ~busy_i):
+ # 1. Flush the cache
+ with m.If(~serve_amo_q):
+ m.next = "FLUSH_REQ_STATUS"
+ serve_amo_d.eq(0b1
+ cnt_d.eq(0
+ # 2. Do the AMO
+ with m.Else():
+ m.next = "AMO_LOAD"
+ serve_amo_d.eq(0b0
+ # check if we want to flush and can flush
+ # e.g.: we are not busy anymore
+ # TODO: Check that the busy flag is indeed needed
+ with m.If (flush_i & ~busy_i):
+ m.next = "FLUSH_REQ_STATUS"
+ cnt_d = 0
+ # check if one of the state machines missed
+ for i in range(NR_PORTS):
+ # here comes the refill portion of code
+ with m.If (miss_req_valid[i] & ~miss_req_bypass[i]):
+ m.next = "MISS"
+ # we are taking another request so don't
+ # take the AMO
+ serve_amo_d = 0b0;
+ # save to MSHR
+ comb += [ mshr_d.valid.eq(0b1),
+ mshr_d.we.eq(miss_req_we[i]),
+ mshr_d.id.eq(i),
+ mshr_d.addr.eq(miss_req_addr[i][0:wid]),
+ mshr_d.wdata.eq(miss_req_wdata[i]),
+ mshr_d.be.eq(miss_req_be[i]),
+ ]
+ break
+ # ~> we missed on the cache
+ with m.Case("MISS"):
+ # 1. Check if there is an empty cache-line
+ # 2. If not -> evict one
+ comb += req_o.eq(1)
+ sync += addr_o.eq(mshr_q.addr[:DCACHE_INDEX_WIDTH]
+ m.next = "MISS_REPL"
+ comb += miss_o.eq(1)
+ # ~> second miss cycle
+ with m.Case("MISS_REPL"):
+ # if all are valid we need to evict one,
+ # pseudo random from LFSR
+ with m.If(~(~valid_way).bool()):
+ comb += lfsr_enable.eq(0b1)
+ comb += evict_way_d.eq(lfsr_oh)
+ # do we need to write back the cache line?
+ with m.If(data_i[lfsr_bin].dirty):
+ state_d = WB_CACHELINE_MISS;
+ comb += evict_cl_d.tag.eq(data_i[lfsr_bin].tag)
+ comb += evict_cl_d.data.eq(data_i[lfsr_bin].data)
+ comb += cnt_d.eq(mshr_q.addr[:DCACHE_INDEX_WIDTH])
+ # no - we can request a cache line now
+ with m.Else():
+ m.next = "REQ_CACHELINE"
+ # we have at least one free way
+ with m.Else():
+ # get victim cache-line by looking for the
+ # first non-valid bit
+ comb += evict_way_d.eq(get_victim_cl(~valid_way)
+ m.next = "REQ_CACHELINE"
+ # ~> we can just load the cache-line,
+ # the way is store in evict_way_q
+ with m.Case("REQ_CACHELINE"):
+ comb += req_fsm_miss_valid .eq(1)
+ sync += req_fsm_miss_addr .eq(mshr_q.addr)
+ with m.If (gnt_miss_fsm):
+ m.next = "SAVE_CACHELINE"
+ comb += miss_gnt_o[mshr_q.id].eq(1)
+ # ~> replace the cacheline
+ with m.Case("SAVE_CACHELINE"):
+ # calculate cacheline offset
+ automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset;
+ sync += cl_offset.eq(mshr_q.addr[3:DCACHE_BYTE_OFFSET] << 6)
+ # we've got a valid response from refill unit
+ with m.If (valid_miss_fsm):
+ sync += addr_o .eq(mshr_q.addr[:DCACHE_INDEX_WIDTH])
+ sync += req_o .eq(evict_way_q)
+ comb += we_o .eq(1)
+ comb += be_o .eq(1)
+ sync += be_o.vldrty .eq(evict_way_q)
+ sync += data_o.tag .eq(mshr_q.addr[DCACHE_INDEX_WIDTH:wid]
+ comb += data_o.data .eq(data_miss_fsm)
+ comb += data_o.valid.eq(1)
+ comb += data_o.dirty.eq(0)
+ # is this a write?
+ with m.If (mshr_q.we):
+ # Yes, so safe the updated data now
+ for i in range(8):
+ # check if we really want to write
+ # the corresponding byte
+ with m.If (mshr_q.be[i]):
+ sync += data_o.data[(cl_offset + i*8) +: 8].eq(mshr_q.wdata[i];
+ # it's immediately dirty if we write
+ comb += data_o.dirty.eq(1)
+ # reset MSHR
+ comb += mshr_d.valid.eq(0)
+ # go back to idle
+ m.next = 'IDLE'
+ # ------------------------------
+ # Write Back Operation
+ # ------------------------------
+ # ~> evict a cache line from way saved in evict_way_q
+ with m.Case("WB_CACHELINE_FLUSH"):
+ with m.Case("WB_CACHELINE_MISS"):
+ comb += req_fsm_miss_valid .eq(0b1)
+ sync += req_fsm_miss_addr .eq({evict_cl_q.tag, cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET], {{DCACHE_BYTE_OFFSET}{0b0}}};
+ comb += req_fsm_miss_be .eq(1)
+ comb += req_fsm_miss_we .eq(0b1)
+ sync += req_fsm_miss_wdata .eq(evict_cl_q.data;
+ # we've got a grant --> this is timing critical, think about it
+ if (gnt_miss_fsm) begin
+ # write status array
+ sync += addr_o .eq(cnt_q)
+ comb += req_o .eq(0b1)
+ comb += we_o .eq(0b1)
+ comb += data_o.valid.eq(INVALIDATE_ON_FLUSH ? 0b0 : 0b1)
+ # invalidate
+ sync += be_o.vldrty.eq(evict_way_q)
+ # go back to handling the miss or flushing,
+ # depending on where we came from
+ with m.If(state_q == WB_CACHELINE_MISS):
+ m.next = "MISS"
+ with m.Else():
+ m.next = "FLUSH_REQ_STATUS"
+ # ------------------------------
+ # Flushing & Initialization
+ # ------------------------------
+ # ~> make another request to check the same
+ # cache-line if there are still some valid entries
+ with m.Case("FLUSH_REQ_STATUS"):
+ comb += req_o .eq(1)
+ sync += addr_o .eq(cnt_q)
+ m.next = "FLUSHING"
+ with m.Case("FLUSHING"):
+ # this has priority
+ # at least one of the cache lines is dirty
+ with m.If(~evict_way):
+ # evict cache line, look for the first
+ # cache-line which is dirty
+ comb += evict_way_d.eq(get_victim_cl(evict_way))
+ comb += evict_cl_d .eq(data_i[one_hot_to_bin(evict_way)])
+ # not dirty ~> increment and continue
+ with m.Else():
+ # increment and re-request
+ sync += cnt_d.eq(cnt_q + (1 << DCACHE_BYTE_OFFSET))
+ m.next = "FLUSH_REQ_STATUS"
+ sync += addr_o .eq(cnt_q)
+ comb += req_o .eq(1)
+ comb += be_o.vldrty.eq(INVALIDATE_ON_FLUSH ? 1 : 0)
+ comb += we_o .eq(1)
+ # finished with flushing operation, go back to idle
+ # only acknowledge if the flush wasn't
+ # triggered by an atomic
+ sync += flush_ack_o.eq(~serve_amo_q)
+ m.next = "IDLE"
+ # ~> only called after reset
+ with m.Case("INIT"):
+ # initialize status array
+ sync += addr_o.eq(cnt_q)
+ comb += req_o .eq(1)
+ comb += we_o .eq(1)
+ # only write the dirty array
+ comb += be_o.vldrty.eq(1)
+ sync += cnt_d .eq(cnt_q + (1 << DCACHE_BYTE_OFFSET))
+ # finished initialization
+ m.next = "IDLE"
+ # ----------------------
+ # AMOs
+ # ----------------------
+ # TODO(zarubaf) Move this closer to memory
+ # ~> we are here because we need to do the AMO,
+ # the cache is clean at this point
+ # start by executing the load
+ with m.Case("AMO_LOAD"):
+ comb += req_fsm_miss_valid.eq(1)
+ # address is in operand a
+ comb += req_fsm_miss_addr.eq(amo_req_i.operand_a)
+ comb += req_fsm_miss_req.eq(ariane_axi::SINGLE_REQ)
+ comb += req_fsm_miss_size.eq(amo_req_i.size)
+ # the request has been granted
+ with m.If(gnt_miss_fsm):
+ m.next = "AMO_SAVE_LOAD"
+ # save the load value
+ with m.Case("AMO_SAVE_LOAD"):
+ with m.If (valid_miss_fsm):
+ # we are only concerned about the lower 64-bit
+ comb += mshr_d.wdata.eq(data_miss_fsm[0])
+ m.next = "AMO_STORE"
+ # and do the store
+ with m.Case("AMO_STORE"):
+ load_data = Signal(64)
+ # re-align load data
+ comb += load_data.eq(data_align(amo_req_i.operand_a[:3],
+ mshr_q.wdata))
+ # Sign-extend for word operation
+ with m.If (amo_req_i.size == 0b10):
+ comb += amo_operand_a.eq(sext32(load_data[:32]))
+ comb += amo_operand_b.eq(sext32(amo_req_i.operand_b[:32]))
+ with m.Else():
+ comb += amo_operand_a.eq(load_data)
+ comb += amo_operand_b.eq(amo_req_i.operand_b)
+ # we do not need a store request for load reserved
+ # or a failing store conditional
+ # we can bail-out without making any further requests
+ with m.If ((amo_req_i.amo_op == AMO_LR) | \
+ ((amo_req_i.amo_op == AMO_SC) & \
+ ((reservation_q.valid & \
+ (reservation_q.address != \
+ amo_req_i.operand_a[3:64])) | \
+ ~reservation_q.valid))):
+ comb += req_fsm_miss_valid.eq(0)
+ m.next = "IDLE"
+ comb += amo_resp_o.ack.eq(1)
+ # write-back the result
+ comb += amo_resp_o.result.eq(amo_operand_a)
+ # we know that the SC failed
+ with m.If (amo_req_i.amo_op == AMO_SC):
+ comb += amo_resp_o.result.eq(1)
+ # also clear the reservation
+ comb += reservation_d.valid.eq(0)
+ with m.Else():
+ comb += req_fsm_miss_valid.eq(1)
+ comb += req_fsm_miss_we .eq(1)
+ comb += req_fsm_miss_req .eq(ariane_axi::SINGLE_REQ)
+ comb += req_fsm_miss_size.eq(amo_req_i.size)
+ comb += req_fsm_miss_addr.eq(amo_req_i.operand_a)
+ comb += req_fsm_miss_wdata.eq(
+ data_align(amo_req_i.operand_a[0:3], amo_result_o))
+ comb += req_fsm_miss_be.eq(
+ be_gen(amo_req_i.operand_a[0:3], amo_req_i.size))
+ # place a reservation on the memory
+ with m.If (amo_req_i.amo_op == AMO_LR):
+ comb += reservation_d.address.eq(amo_req_i.operand_a[3:64])
+ comb += reservation_d.valid.eq(1)
+ # the request is valid or we didn't need to go for another store
+ with m.If (valid_miss_fsm):
+ m.next = "IDLE"
+ comb += amo_resp_o.ack.eq(1)
+ # write-back the result
+ comb += amo_resp_o.result.eq(amo_operand_a;
+ if (amo_req_i.amo_op == AMO_SC) begin
+ comb += amo_resp_o.result.eq(0)
+ # An SC must fail if there is another SC
+ # (to any address) between the LR and the SC in
+ # program order (even to the same address).
+ # in any case destroy the reservation
+ comb += reservation_d.valid.eq(0)
+ # check MSHR for aliasing
+ comb += mshr_addr_matches_o .eq(0)
+ comb += mshr_index_matches_o.eq()
+ for i in range(NR_PORTS):
+ # check mshr for potential matching of other units,
+ # exclude the unit currently being served
+ with m.If (mshr_q.valid & \
+ (mshr_addr_i[i][DCACHE_BYTE_OFFSET:56] == \
+ mshr_q.addr[DCACHE_BYTE_OFFSET:56])):
+ comb += mshr_addr_matches_o[i].eq(1)
+ # same as previous, but checking only the index
+ with m.If (mshr_q.valid & \
+ mshr_index_matches_o[i].eq(1)
+ # --------------------
+ # Sequential Process
+ # --------------------
+ """
+ #pragma translate_off
+ `ifndef VERILATOR
+ # assert that cache only hits on one way
+ assert property (
+ @(posedge clk_i) $onehot0(evict_way_q)) else $warning("Evict-way should be one-hot encoded");
+ `endif
+ #pragma translate_on
+ """
+ # ----------------------
+ # Bypass Arbiter
+ # ----------------------
+ # Connection Arbiter <-> AXI
+ req_fsm_bypass_valid = Signal()
+ req_fsm_bypass_addr = Signal(64)
+ req_fsm_bypass_wdata = Signal(64)
+ req_fsm_bypass_we = Signal()
+ req_fsm_bypass_be = Signal(8)
+ req_fsm_bypass_size = Signal(2)
+ gnt_bypass_fsm = Signal()
+ valid_bypass_fsm = Signal()
+ data_bypass_fsm = Signal(64)
+ logic [$clog2(NR_PORTS)-1:0] id_fsm_bypass;
+ logic [3:0] id_bypass_fsm;
+ logic [3:0] gnt_id_bypass_fsm;
+ i_bypass_arbiter = ib = AXIArbiter( NR_PORTS, 64)
+ comb += [
+ # Master Side
+ ib.data_req_i .eq( miss_req_valid & miss_req_bypass ),
+ ib.address_i .eq( miss_req_addr ),
+ ib.data_wdata_i .eq( miss_req_wdata ),
+ ib.data_we_i .eq( miss_req_we ),
+ ib.data_be_i .eq( miss_req_be ),
+ ib.data_size_i .eq( miss_req_size ),
+ ib.data_gnt_o .eq( bypass_gnt_o ),
+ ib.data_rvalid_o .eq( bypass_valid_o ),
+ ib.data_rdata_o .eq( bypass_data_o ),
+ # Slave Sid
+ ib.id_i .eq( id_bypass_fsm[$clog2(NR_PORTS)-1:0] ),
+ ib.id_o .eq( id_fsm_bypass ),
+ ib.gnt_id_i .eq( gnt_id_bypass_fsm[$clog2(NR_PORTS)-1:0] ),
+ ib.address_o .eq( req_fsm_bypass_addr ),
+ ib.data_wdata_o .eq( req_fsm_bypass_wdata ),
+ ib.data_req_o .eq( req_fsm_bypass_valid ),
+ ib.data_we_o .eq( req_fsm_bypass_we ),
+ ib.data_be_o .eq( req_fsm_bypass_be ),
+ ib.data_size_o .eq( req_fsm_bypass_size ),
+ ib.data_gnt_i .eq( gnt_bypass_fsm ),
+ ib.data_rvalid_i .eq( valid_bypass_fsm ),
+ ib.data_rdata_i .eq( data_bypass_fsm ),
+ ]
+ axi_adapter #(
+ .DATA_WIDTH ( 64 ),
+ .AXI_ID_WIDTH ( 4 ),
+ ) i_bypass_axi_adapter (
+ .clk_i,
+ .rst_ni,
+ .req_i ( req_fsm_bypass_valid ),
+ .type_i ( ariane_axi::SINGLE_REQ ),
+ .gnt_o ( gnt_bypass_fsm ),
+ .addr_i ( req_fsm_bypass_addr ),
+ .we_i ( req_fsm_bypass_we ),
+ .wdata_i ( req_fsm_bypass_wdata ),
+ .be_i ( req_fsm_bypass_be ),
+ .size_i ( req_fsm_bypass_size ),
+ .id_i ( Cat(id_fsm_bypass, 0, 0) ),
+ .valid_o ( valid_bypass_fsm ),
+ .rdata_o ( data_bypass_fsm ),
+ .gnt_id_o ( gnt_id_bypass_fsm ),
+ .id_o ( id_bypass_fsm ),
+ .critical_word_o ( ), # not used for single requests
+ .critical_word_valid_o ( ), # not used for single requests
+ .axi_req_o ( axi_bypass_o ),
+ .axi_resp_i ( axi_bypass_i )
+ );
+ # ----------------------
+ # Cache Line AXI Refill
+ # ----------------------
+ axi_adapter #(
+ .AXI_ID_WIDTH ( 4 ),
+ ) i_miss_axi_adapter (
+ .clk_i,
+ .rst_ni,
+ .req_i ( req_fsm_miss_valid ),
+ .type_i ( req_fsm_miss_req ),
+ .gnt_o ( gnt_miss_fsm ),
+ .addr_i ( req_fsm_miss_addr ),
+ .we_i ( req_fsm_miss_we ),
+ .wdata_i ( req_fsm_miss_wdata ),
+ .be_i ( req_fsm_miss_be ),
+ .size_i ( req_fsm_miss_size ),
+ .id_i ( Const(0b1100, 4) ),
+ .gnt_id_o ( ), # open
+ .valid_o ( valid_miss_fsm ),
+ .rdata_o ( data_miss_fsm ),
+ .id_o ( ),
+ .critical_word_o,
+ .critical_word_valid_o,
+ .axi_req_o ( axi_data_o ),
+ .axi_resp_i ( axi_data_i )
+ );
+ # -----------------
+ # Replacement LFSR
+ # -----------------
+ lfsr_8bit #(.WIDTH (DCACHE_SET_ASSOC)) i_lfsr (
+ .en_i ( lfsr_enable ),
+ .refill_way_oh ( lfsr_oh ),
+ .refill_way_bin ( lfsr_bin ),
+ .*
+ );
+ # -----------------
+ # -----------------
+ amo_alu i_amo_alu (
+ .amo_op_i ( amo_op ),
+ .amo_operand_a_i ( amo_operand_a ),
+ .amo_operand_b_i ( amo_operand_b ),
+ .amo_result_o ( amo_result_o )
+ );
+ # -----------------
+ # Struct Split
+ # -----------------
+ for i in range(NR_PORTS):
+ miss_req = MissReq()
+ comb += miss_req.eq(miss_req_i[i]);
+ comb += miss_req_valid [i] .eq(miss_req.valid)
+ comb += miss_req_bypass [i] .eq(miss_req.bypass)
+ comb += miss_req_addr [i] .eq(miss_req.addr)
+ comb += miss_req_wdata [i] .eq(miss_req.wdata)
+ comb += miss_req_we [i] .eq(miss_req.we)
+ comb += miss_req_be [i] .eq(miss_req.be)
+ comb += miss_req_size [i] .eq(miss_req.size)
+ # --------------
+ # AXI Arbiter
+ # --------------s
+ #
+ # Description: Arbitrates access to AXI refill/bypass
+ #
+class AXIArbiter:
+ def __init__(self, NR_PORTS = 3, DATA_WIDTH = 64):
+ self.pwid = pwid = ceil(log(NR_PORTS) / log(2))
+ rst_ni = ResetSignal() # Asynchronous reset active low
+ # master ports
+ self.data_req_i = Signal(NR_PORTS)
+ self.address_i = Array(Signal(name="address_i", 64) \
+ for i in range(NR_PORTS))
+ self.data_wdata_i = Array(Signal(name="data_wdata_i", 64) \
+ for i in range(NR_PORTS))
+ self.data_we_i = Signal(NR_PORTS)
+ self.data_be_i = Array(Signal(name="data_wdata_i", DATA_WIDTH/8) \
+ for i in range(NR_PORTS))
+ self.data_size_i = Array(Signal(name="data_size_i", 2) \
+ for i in range(NR_PORTS))
+ self.data_gnt_o = Signal(NR_PORTS)
+ self.data_rvalid_o = Signal(NR_PORTS)
+ self.data_rdata_o = Array(Signal(name="data_rdata_o", 64) \
+ for i in range(NR_PORTS))
+ # slave port
+ self.id_i = Signal(pwid)
+ self.id_o = Signal(pwid)
+ self.gnt_id_i = Signal(pwid)
+ self.data_req_o = Signal()
+ self.address_o = Signal(64)
+ self.data_wdata_o = Signal(DATA_WIDTH)
+ self.data_we_o = Signal()
+ self.data_be_o = Signal(DATA_WIDTH/8)
+ self.data_size_o = Signal(2)
+ self.data_gnt_i = Signal()
+ self.data_rvalid_i = Signal()
+ self.data_rdata_i = Signal(DATA_WIDTH)
+ def elaborate(self, platform):
+ #enum logic [1:0] { IDLE, REQ, SERVING } state_d, state_q;
+ class Packet:
+ def __init__(self, pwid, DATA_WIDTH):
+ self.id = Signal(pwid)
+ self.address = Signal(64)
+ self.data = Signal(64)
+ self.size = Signal(2)
+ self.be = Signal(DATA_WIDTH/8)
+ self.we = Signal()
+ request_index = Signal(self.pwid)
+ req_q = Packet(self.pwid, self.DATA_WIDTH)
+ req_d = Packet(self.pwid, self.DATA_WIDTH)
+ # request register
+ sync += req_q.eq(req_d)
+ # request port
+ comb += self.address_o .eq(req_q.address)
+ comb += self.data_wdata_o .eq(req_q.data)
+ comb += self.data_be_o .eq(req_q.be)
+ comb += self.data_size_o .eq(req_q.size)
+ comb += self.data_we_o .eq(req_q.we)
+ comb += self.id_o .eq(req_q.id)
+ comb += self.data_gnt_o .eq(0)
+ # read port
+ comb += self.data_rvalid_o .eq(0)
+ comb += self.data_rdata_o .eq(0)
+ comb += self.data_rdata_o[req_q.id].eq(data_rdata_i)
+ m.submodules.pp = pp = PriorityEncoder(self.NR_PORTS)
+ comb += pp.i.eq(self.data_req_i) # select one request (priority-based)
+ comb += request_index.eq(pp.o)
+ with m.Switch("state") as s:
+ with m.Case("IDLE"):
+ # wait for incoming requests (priority encoder data_req_i)
+ with m.If(~pp.n): # one output valid from encoder
+ comb += self.data_req_o .eq(self.data_req_i[i])
+ comb += self.data_gnt_o[i].eq(self.data_req_i[i])
+ # save the request
+ comb += req_d.address.eq(self.address_i[i])
+ comb += req_d.id.eq(request_index)
+ comb += req_d.data.eq(self.data_wdata_i[i])
+ comb += req_d.size.eq(self.data_size_i[i])
+ comb += req_d.be.eq(self.data_be_i[i])
+ comb += req_d.we.eq(self.data_we_i[i])
+ m.next = "SERVING"
+ comb += self.address_o .eq(self.address_i[request_index])
+ comb += self.data_wdata_o .eq(self.data_wdata_i[request_index])
+ comb += self.data_be_o .eq(self.data_be_i[request_index])
+ comb += self.data_size_o .eq(self.data_size_i[request_index])
+ comb += self.data_we_o .eq(self.data_we_i[request_index])
+ comb += self.id_o .eq(request_index)
+ with m.Case("SERVING"):
+ comb += self.data_req_o.eq(1)
+ with m.If (self.data_rvalid_i):
+ comb += self.data_rvalid_o[req_q.id].eq(1)
+ m.next = "IDLE"
+ # ------------
+ # Assertions
+ # ------------
+ """
+#pragma translate_off
+`ifndef VERILATOR
+# make sure that we eventually get an rvalid after we received a grant
+assert property (@(posedge clk_i) data_gnt_i |-> ##[1:$] data_rvalid_i )
+ else begin $error("There was a grant without a rvalid"); $stop(); end
+# assert that there is no grant without a request
+assert property (@(negedge clk_i) data_gnt_i |-> data_req_o)
+ else begin $error("There was a grant without a request."); $stop(); end
+# assert that the address does not contain X when request is sent
+assert property ( @(posedge clk_i) (data_req_o) |-> (!$isunknown(address_o)) )
+ else begin $error("address contains X when request is set"); $stop(); end
+#pragma translate_on
+ """
--- /dev/null
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+# Author: Florian Zaruba, ETH Zurich
+# Date: 19/04/2017
+# Description: Memory Management Unit for Ariane, contains TLB and
+# address translation unit. SV48 as defined in
+# Volume II: RISC-V Privileged Architectures V1.10 Page 63
+import ariane_pkg::*;
+from nmigen import Const, Signal, Cat, Module, Mux
+from nmigen.cli import verilog, rtlil
+from ptw import DCacheReqI, DCacheReqO, TLBUpdate, PTE, PTW
+from tlb import TLB
+from exceptcause import (INSTR_ACCESS_FAULT, INSTR_PAGE_FAULT,
+PRIV_LVL_M = Const(0b11, 2)
+PRIV_LVL_S = Const(0b01, 2)
+PRIV_LVL_U = Const(0b00, 2)
+class RVException:
+ def __init__(self):
+ self.cause = Signal(64) # cause of exception
+ self.tval = Signal(64) # more info of causing exception
+ # (e.g.: instruction causing it),
+ # address of LD/ST fault
+ self.valid = Signal()
+ def eq(self, inp):
+ res = []
+ for (o, i) in zip(self.ports(), inp.ports()):
+ res.append(o.eq(i))
+ return res
+ def __iter__(self):
+ yield self.cause
+ yield self.tval
+ yield self.valid
+ def ports(self):
+ return list(self)
+class ICacheReqI:
+ def __init__(self):
+ self.fetch_valid = Signal() # address translation valid
+ self.fetch_paddr = Signal(64) # physical address in
+ self.fetch_exception = RVException() # exception occurred during fetch
+ def __iter__(self):
+ yield self.fetch_valid
+ yield self.fetch_paddr
+ yield from self.fetch_exception
+ def ports(self):
+ return list(self)
+class ICacheReqO:
+ def __init__(self):
+ self.fetch_req = Signal() # address translation request
+ self.fetch_vaddr = Signal(64) # virtual address out
+ def __iter__(self):
+ yield self.fetch_req
+ yield self.fetch_vaddr
+ def ports(self):
+ return list(self)
+class MMU:
+ def __init__(self, instr_tlb_entries = 4,
+ data_tlb_entries = 4,
+ asid_width = 1):
+ self.instr_tlb_entries = instr_tlb_entries
+ self.data_tlb_entries = data_tlb_entries
+ self.asid_width = asid_width
+ self.flush_i = Signal()
+ self.enable_translation_i = Signal()
+ self.en_ld_st_translation_i = Signal() # enable VM translation for LD/ST
+ # IF interface
+ self.icache_areq_i = ICacheReqO()
+ self.icache_areq_o = ICacheReqI()
+ # LSU interface
+ # this is a more minimalistic interface because the actual addressing
+ # logic is handled in the LSU as we distinguish load and stores,
+ # what we do here is simple address translation
+ self.misaligned_ex_i = RVException()
+ self.lsu_req_i = Signal() # request address translation
+ self.lsu_vaddr_i = Signal(64) # virtual address in
+ self.lsu_is_store_i = Signal() # the translation is requested by a store
+ # if we need to walk the page table we can't grant in the same cycle
+ # Cycle 0
+ self.lsu_dtlb_hit_o = Signal() # sent in the same cycle as the request
+ # if translation hits in the DTLB
+ # Cycle 1
+ self.lsu_valid_o = Signal() # translation is valid
+ self.lsu_paddr_o = Signal(64) # translated address
+ self.lsu_exception_o = RVException() # addr translate threw exception
+ # General control signals
+ self.priv_lvl_i = Signal(2)
+ self.ld_st_priv_lvl_i = Signal(2)
+ self.sum_i = Signal()
+ self.mxr_i = Signal()
+ # input logic flag_mprv_i,
+ self.satp_ppn_i = Signal(44)
+ self.asid_i = Signal(self.asid_width)
+ self.flush_tlb_i = Signal()
+ # Performance counters
+ self.itlb_miss_o = Signal()
+ self.dtlb_miss_o = Signal()
+ # PTW memory interface
+ self.req_port_i = DCacheReqO()
+ self.req_port_o = DCacheReqI()
+ def elaborate(self, platform):
+ m = Module()
+ iaccess_err = Signal() # insufficient priv to access instr page
+ daccess_err = Signal() # insufficient priv to access data page
+ ptw_active = Signal() # PTW is currently walking a page table
+ walking_instr = Signal() # PTW is walking because of an ITLB miss
+ ptw_error = Signal() # PTW threw an exception
+ update_vaddr = Signal(48) # guessed
+ uaddr64 = Cat(update_vaddr, Const(0, 25)) # extend to 64bit with zeros
+ update_ptw_itlb = TLBUpdate(self.asid_width)
+ update_ptw_dtlb = TLBUpdate(self.asid_width)
+ itlb_lu_access = Signal()
+ itlb_content = PTE()
+ itlb_is_2M = Signal()
+ itlb_is_1G = Signal()
+ itlb_is_512G = Signal()
+ itlb_lu_hit = Signal()
+ dtlb_lu_access = Signal()
+ dtlb_content = PTE()
+ dtlb_is_2M = Signal()
+ dtlb_is_1G = Signal()
+ dtlb_is_512G = Signal()
+ dtlb_lu_hit = Signal()
+ # Assignments
+ m.d.comb += [itlb_lu_access.eq(self.icache_areq_i.fetch_req),
+ dtlb_lu_access.eq(self.lsu_req_i)
+ ]
+ # ITLB
+ m.submodules.i_tlb = i_tlb = TLB(self.instr_tlb_entries,
+ self.asid_width)
+ m.d.comb += [i_tlb.flush_i.eq(self.flush_tlb_i),
+ i_tlb.update_i.eq(update_ptw_itlb),
+ i_tlb.lu_access_i.eq(itlb_lu_access),
+ i_tlb.lu_asid_i.eq(self.asid_i),
+ i_tlb.lu_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
+ itlb_content.eq(i_tlb.lu_content_o),
+ itlb_is_2M.eq(i_tlb.lu_is_2M_o),
+ itlb_is_1G.eq(i_tlb.lu_is_1G_o),
+ itlb_is_512G.eq(i_tlb.lu_is_512G_o),
+ itlb_lu_hit.eq(i_tlb.lu_hit_o),
+ ]
+ # DTLB
+ m.submodules.d_tlb = d_tlb = TLB(self.data_tlb_entries,
+ self.asid_width)
+ m.d.comb += [d_tlb.flush_i.eq(self.flush_tlb_i),
+ d_tlb.update_i.eq(update_ptw_dtlb),
+ d_tlb.lu_access_i.eq(dtlb_lu_access),
+ d_tlb.lu_asid_i.eq(self.asid_i),
+ d_tlb.lu_vaddr_i.eq(self.lsu_vaddr_i),
+ dtlb_content.eq(d_tlb.lu_content_o),
+ dtlb_is_2M.eq(d_tlb.lu_is_2M_o),
+ dtlb_is_1G.eq(d_tlb.lu_is_1G_o),
+ dtlb_is_512G.eq(d_tlb.lu_is_512G_o),
+ dtlb_lu_hit.eq(d_tlb.lu_hit_o),
+ ]
+ # PTW
+ m.submodules.ptw = ptw = PTW(self.asid_width)
+ m.d.comb += [ptw_active.eq(ptw.ptw_active_o),
+ walking_instr.eq(ptw.walking_instr_o),
+ ptw_error.eq(ptw.ptw_error_o),
+ ptw.enable_translation_i.eq(self.enable_translation_i),
+ update_vaddr.eq(ptw.update_vaddr_o),
+ update_ptw_itlb.eq(ptw.itlb_update_o),
+ update_ptw_dtlb.eq(ptw.dtlb_update_o),
+ ptw.itlb_access_i.eq(itlb_lu_access),
+ ptw.itlb_hit_i.eq(itlb_lu_hit),
+ ptw.itlb_vaddr_i.eq(self.icache_areq_i.fetch_vaddr),
+ ptw.dtlb_access_i.eq(dtlb_lu_access),
+ ptw.dtlb_hit_i.eq(dtlb_lu_hit),
+ ptw.dtlb_vaddr_i.eq(self.lsu_vaddr_i),
+ ptw.req_port_i.eq(self.req_port_i),
+ self.req_port_o.eq(ptw.req_port_o),
+ ]
+ # ila_1 i_ila_1 (
+ # .clk(clk_i), # input wire clk
+ # .probe0({req_port_o.address_tag, req_port_o.address_index}),
+ # .probe1(req_port_o.data_req), # input wire [63:0] probe1
+ # .probe2(req_port_i.data_gnt), # input wire [0:0] probe2
+ # .probe3(req_port_i.data_rdata), # input wire [0:0] probe3
+ # .probe4(req_port_i.data_rvalid), # input wire [0:0] probe4
+ # .probe5(ptw_error), # input wire [1:0] probe5
+ # .probe6(update_vaddr), # input wire [0:0] probe6
+ # .probe7(update_ptw_itlb.valid), # input wire [0:0] probe7
+ # .probe8(update_ptw_dtlb.valid), # input wire [0:0] probe8
+ # .probe9(dtlb_lu_access), # input wire [0:0] probe9
+ # .probe10(lsu_vaddr_i), # input wire [0:0] probe10
+ # .probe11(dtlb_lu_hit), # input wire [0:0] probe11
+ # .probe12(itlb_lu_access), # input wire [0:0] probe12
+ # .probe13(icache_areq_i.fetch_vaddr), # input wire [0:0] probe13
+ # .probe14(itlb_lu_hit) # input wire [0:0] probe13
+ # );
+ #-----------------------
+ # Instruction Interface
+ #-----------------------
+ # The instruction interface is a simple request response interface
+ # MMU disabled: just pass through
+ m.d.comb += [self.icache_areq_o.fetch_valid.eq(
+ self.icache_areq_i.fetch_req),
+ # play through in case we disabled address translation
+ self.icache_areq_o.fetch_paddr.eq(
+ self.icache_areq_i.fetch_vaddr)
+ ]
+ # two potential exception sources:
+ # 1. HPTW threw an exception -> signal with a page fault exception
+ # 2. We got an access error because of insufficient permissions ->
+ # throw an access exception
+ m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0)
+ # Check whether we are allowed to access this memory region
+ # from a fetch perspective
+ # PLATEN TODO: use PermissionValidator instead [we like modules]
+ m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \
+ (((self.priv_lvl_i == PRIV_LVL_U) & \
+ ~itlb_content.u) | \
+ ((self.priv_lvl_i == PRIV_LVL_S) & \
+ itlb_content.u)))
+ # MMU enabled: address from TLB, request delayed until hit.
+ # Error when TLB hit and no access right or TLB hit and
+ # translated address not valid (e.g. AXI decode error),
+ # or when PTW performs walk due to ITLB miss and raises
+ # an error.
+ with m.If (self.enable_translation_i):
+ # we work with SV48, so if VM is enabled, check that
+ # all bits [47:38] are equal
+ with m.If (self.icache_areq_i.fetch_req & \
+ ~(((~self.icache_areq_i.fetch_vaddr[47:64]) == 0) | \
+ (self.icache_areq_i.fetch_vaddr[47:64]) == 0)):
+ fe = self.icache_areq_o.fetch_exception
+ m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
+ fe.tval.eq(self.icache_areq_i.fetch_vaddr),
+ fe.valid.eq(1)
+ ]
+ m.d.comb += self.icache_areq_o.fetch_valid.eq(0)
+ # 4K page
+ paddr = Signal.like(self.icache_areq_o.fetch_paddr)
+ paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12],
+ itlb_content.ppn)
+ m.d.comb += paddr.eq(paddr4k)
+ # Mega page
+ with m.If(itlb_is_2M):
+ m.d.comb += paddr[12:21].eq(
+ self.icache_areq_i.fetch_vaddr[12:21])
+ # Giga page
+ with m.If(itlb_is_1G):
+ m.d.comb += paddr[12:30].eq(
+ self.icache_areq_i.fetch_vaddr[12:30])
+ m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr)
+ # Tera page
+ with m.If(itlb_is_512G):
+ m.d.comb += paddr[12:39].eq(
+ self.icache_areq_i.fetch_vaddr[12:39])
+ m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr)
+ # ---------
+ # ITLB Hit
+ # --------
+ # if we hit the ITLB output the request signal immediately
+ with m.If(itlb_lu_hit):
+ m.d.comb += self.icache_areq_o.fetch_valid.eq(
+ self.icache_areq_i.fetch_req)
+ # we got an access error
+ with m.If (iaccess_err):
+ # throw a page fault
+ fe = self.icache_areq_o.fetch_exception
+ m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT),
+ fe.tval.eq(self.icache_areq_i.fetch_vaddr),
+ fe.valid.eq(1)
+ ]
+ # ---------
+ # ITLB Miss
+ # ---------
+ # watch out for exceptions happening during walking the page table
+ with m.Elif(ptw_active & walking_instr):
+ m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error)
+ fe = self.icache_areq_o.fetch_exception
+ m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT),
+ fe.tval.eq(uaddr64),
+ fe.valid.eq(1)
+ ]
+ #-----------------------
+ # Data Interface
+ #-----------------------
+ lsu_vaddr = Signal(64)
+ dtlb_pte = PTE()
+ misaligned_ex = RVException()
+ lsu_req = Signal()
+ lsu_is_store = Signal()
+ dtlb_hit = Signal()
+ #dtlb_is_2M = Signal()
+ #dtlb_is_1G = Signal()
+ #dtlb_is_512 = Signal()
+ # check if we need to do translation or if we are always
+ # ready (e.g.: we are not translating anything)
+ m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i,
+ dtlb_lu_hit, 1))
+ # The data interface is simpler and only consists of a
+ # request/response interface
+ m.d.comb += [
+ # save request and DTLB response
+ lsu_vaddr.eq(self.lsu_vaddr_i),
+ lsu_req.eq(self.lsu_req_i),
+ misaligned_ex.eq(self.misaligned_ex_i),
+ dtlb_pte.eq(dtlb_content),
+ dtlb_hit.eq(dtlb_lu_hit),
+ lsu_is_store.eq(self.lsu_is_store_i),
+ #dtlb_is_2M.eq(dtlb_is_2M),
+ #dtlb_is_1G.eq(dtlb_is_1G),
+ ##dtlb_is_512.eq(self.dtlb_is_512G) #????
+ ]
+ m.d.sync += [
+ self.lsu_paddr_o.eq(lsu_vaddr),
+ self.lsu_valid_o.eq(lsu_req),
+ self.lsu_exception_o.eq(misaligned_ex),
+ ]
+ sverr = Signal()
+ usrerr = Signal()
+ m.d.comb += [
+ # mute misaligned exceptions if there is no request
+ # otherwise they will throw accidental exceptions
+ misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i),
+ # SUM is not set and we are trying to access a user
+ # page in supervisor mode
+ sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \
+ dtlb_pte.u),
+ # this is not a user page but we are in user mode and
+ # trying to access it
+ usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u),
+ # Check if the User flag is set, then we may only
+ # access it in supervisor mode if SUM is enabled
+ daccess_err.eq(sverr | usrerr),
+ ]
+ # translation is enabled and no misaligned exception occurred
+ with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid):
+ m.d.comb += lsu_req.eq(0)
+ # 4K page
+ paddr = Signal.like(lsu_vaddr)
+ paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn)
+ m.d.comb += paddr.eq(paddr4k)
+ # Mega page
+ with m.If(dtlb_is_2M):
+ m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21])
+ # Giga page
+ with m.If(dtlb_is_1G):
+ m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30])
+ m.d.sync += self.lsu_paddr_o.eq(paddr)
+ # TODO platen tera_page
+ # ---------
+ # DTLB Hit
+ # --------
+ with m.If(dtlb_hit & lsu_req):
+ m.d.comb += lsu_req.eq(1)
+ # this is a store
+ with m.If (lsu_is_store):
+ # check if the page is write-able and
+ # we are not violating privileges
+ # also check if the dirty flag is set
+ with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d):
+ le = self.lsu_exception_o
+ m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
+ le.tval.eq(lsu_vaddr),
+ le.valid.eq(1)
+ ]
+ # this is a load, check for sufficient access
+ # privileges - throw a page fault if necessary
+ with m.Elif(daccess_err):
+ le = self.lsu_exception_o
+ m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
+ le.tval.eq(lsu_vaddr),
+ le.valid.eq(1)
+ ]
+ # ---------
+ # DTLB Miss
+ # ---------
+ # watch out for exceptions
+ with m.Elif (ptw_active & ~walking_instr):
+ # page table walker threw an exception
+ with m.If (ptw_error):
+ # an error makes the translation valid
+ m.d.comb += lsu_req.eq(1)
+ # the page table walker can only throw page faults
+ with m.If (lsu_is_store):
+ le = self.lsu_exception_o
+ m.d.sync += [le.cause.eq(STORE_PAGE_FAULT),
+ le.tval.eq(uaddr64),
+ le.valid.eq(1)
+ ]
+ with m.Else():
+ m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT),
+ le.tval.eq(uaddr64),
+ le.valid.eq(1)
+ ]
+ return m
+ def ports(self):
+ return [self.flush_i, self.enable_translation_i,
+ self.en_ld_st_translation_i,
+ self.lsu_req_i,
+ self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o,
+ self.lsu_valid_o, self.lsu_paddr_o,
+ self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i,
+ self.satp_ppn_i, self.asid_i, self.flush_tlb_i,
+ self.itlb_miss_o, self.dtlb_miss_o] + \
+ self.icache_areq_i.ports() + self.icache_areq_o.ports() + \
+ self.req_port_i.ports() + self.req_port_o.ports() + \
+ self.misaligned_ex_i.ports() + self.lsu_exception_o.ports()
+if __name__ == '__main__':
+ mmu = MMU()
+ vl = rtlil.convert(mmu, ports=mmu.ports())
+ with open("test_mmu.il", "w") as f:
+ f.write(vl)
--- /dev/null
+two-way set associative - one bit
+ indicates which line of the two has been reference more recently
+four-way set associative - three bits
+ each bit represents one branch point in a binary decision tree; let 1
+ represent that the left side has been referenced more recently than the
+ right side, and 0 vice-versa
+ are all 4 lines valid?
+ / \
+ yes no, use an invalid line
+ |
+ |
+ |
+ bit_0 == 0? state | replace ref to | next state
+ / \ ------+-------- -------+-----------
+ y n 00x | line_0 line_0 | 11_
+ / \ 01x | line_1 line_1 | 10_
+ bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1
+ / \ / \ 1x1 | line_3 line_3 | 0_0
+ y n y n
+ / \ / \ ('x' means ('_' means unchanged)
+ line_0 line_1 line_2 line_3 don't care)
+ (see Figure 3-7, p. 3-18, in Intel Embedded Pentium Processor Family Dev.
+ Manual, 1998, http://www.intel.com/design/intarch/manuals/273204.htm)
+note that there is a 6-bit encoding for true LRU for four-way set associative
+ bit 0: bank[1] more recently used than bank[0]
+ bit 1: bank[2] more recently used than bank[0]
+ bit 2: bank[2] more recently used than bank[1]
+ bit 3: bank[3] more recently used than bank[0]
+ bit 4: bank[3] more recently used than bank[1]
+ bit 5: bank[3] more recently used than bank[2]
+ this results in 24 valid bit patterns within the 64 possible bit patterns
+ (4! possible valid traces for bank references)
+ e.g., a trace of 0 1 2 3, where 0 is LRU and 3 is MRU, is encoded as 111111
+ you can implement a state machine with a 256x6 ROM (6-bit state encoding
+ appended with a 2-bit bank reference input will yield a new 6-bit state),
+ and you can implement an LRU bank indicator with a 64x2 ROM
--- /dev/null
+from nmigen import Signal, Module, Cat, Const
+from nmigen.hdl.ir import Elaboratable
+from math import log2
+class PLRU(Elaboratable):
+ """ PLRU - Pseudo Least Recently Used Replacement
+ PLRU-tree indexing:
+ lvl0 0
+ / \
+ / \
+ lvl1 1 2
+ / \ / \
+ lvl2 3 4 5 6
+ / \ /\/\ /\
+ ... ... ... ...
+ """
+ def __init__(self, entries):
+ self.entries = entries
+ self.lu_hit = Signal(entries)
+ self.replace_en_o = Signal(entries)
+ self.lu_access_i = Signal()
+ # Tree (bit per entry)
+ self.TLBSZ = 2*(self.entries-1)
+ self.plru_tree = Signal(self.TLBSZ)
+ self.plru_tree_o = Signal(self.TLBSZ)
+ def elaborate(self, platform=None):
+ m = Module()
+ # Just predefine which nodes will be set/cleared
+ # E.g. for a TLB with 8 entries, the for-loop is semantically
+ # equivalent to the following pseudo-code:
+ # unique case (1'b1)
+ # lu_hit[7]: plru_tree[0, 2, 6] = {1, 1, 1};
+ # lu_hit[6]: plru_tree[0, 2, 6] = {1, 1, 0};
+ # lu_hit[5]: plru_tree[0, 2, 5] = {1, 0, 1};
+ # lu_hit[4]: plru_tree[0, 2, 5] = {1, 0, 0};
+ # lu_hit[3]: plru_tree[0, 1, 4] = {0, 1, 1};
+ # lu_hit[2]: plru_tree[0, 1, 4] = {0, 1, 0};
+ # lu_hit[1]: plru_tree[0, 1, 3] = {0, 0, 1};
+ # lu_hit[0]: plru_tree[0, 1, 3] = {0, 0, 0};
+ # default: begin /* No hit */ end
+ # endcase
+ LOG_TLB = int(log2(self.entries))
+ print(LOG_TLB)
+ for i in range(self.entries):
+ # we got a hit so update the pointer as it was least recently used
+ hit = Signal(reset_less=True)
+ m.d.comb += hit.eq(self.lu_hit[i] & self.lu_access_i)
+ with m.If(hit):
+ # Set the nodes to the values we would expect
+ for lvl in range(LOG_TLB):
+ idx_base = (1<<lvl)-1
+ # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+ shift = LOG_TLB - lvl;
+ new_idx = Const(~((i >> (shift-1)) & 1), (1, False))
+ plru_idx = idx_base + (i >> shift)
+ print ("plru", i, lvl, hex(idx_base),
+ plru_idx, shift, new_idx)
+ m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx)
+ # Decode tree to write enable signals
+ # Next for-loop basically creates the following logic for e.g.
+ # an 8 entry TLB (note: pseudo-code obviously):
+ # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1}
+ # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0}
+ # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1}
+ # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0}
+ # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1}
+ # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0}
+ # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1}
+ # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0}
+ # For each entry traverse the tree. If every tree-node matches
+ # the corresponding bit of the entry's index, this is
+ # the next entry to replace.
+ replace = []
+ for i in range(self.entries):
+ en = []
+ for lvl in range(LOG_TLB):
+ idx_base = (1<<lvl)-1
+ # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
+ shift = LOG_TLB - lvl;
+ new_idx = (i >> (shift-1)) & 1;
+ plru_idx = idx_base + (i>>shift)
+ plru = Signal(reset_less=True,
+ name="plru-%d-%d-%d" % (i, lvl, plru_idx))
+ m.d.comb += plru.eq(self.plru_tree[plru_idx])
+ # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx;
+ if new_idx:
+ en.append(~plru) # yes inverted (using bool())
+ else:
+ en.append(plru) # yes inverted (using bool())
+ print ("plru", i, en)
+ # boolean logic manipulation:
+ # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2)
+ replace.append(~Cat(*en).bool())
+ m.d.comb += self.replace_en_o.eq(Cat(*replace))
+ return m
+ def ports(self):
+ return [self.entries, self.lu_hit, self.replace_en_o,
+ self.lu_access_i, self.plru_tree, self.plru_tree_o]
--- /dev/null
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+# Author: David Schaffenrath, TU Graz
+# Author: Florian Zaruba, ETH Zurich
+# Date: 24.4.2017
+# Description: Hardware-PTW
+/* verilator lint_off WIDTH */
+import ariane_pkg::*;
+see linux kernel source:
+* "arch/riscv/include/asm/page.h"
+* "arch/riscv/include/asm/mmu_context.h"
+* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET)
+from nmigen import Const, Signal, Cat, Module, Elaboratable
+from nmigen.hdl.ast import ArrayProxy
+from nmigen.cli import verilog, rtlil
+from math import log2
+CONFIG_L1D_SIZE = 32*1024
+class DCacheReqI:
+ def __init__(self):
+ self.address_index = Signal(DCACHE_INDEX_WIDTH)
+ self.address_tag = Signal(DCACHE_TAG_WIDTH)
+ self.data_wdata = Signal(64)
+ self.data_req = Signal()
+ self.data_we = Signal()
+ self.data_be = Signal(8)
+ self.data_size = Signal(2)
+ self.kill_req = Signal()
+ self.tag_valid = Signal()
+ def eq(self, inp):
+ res = []
+ for (o, i) in zip(self.ports(), inp.ports()):
+ res.append(o.eq(i))
+ return res
+ def ports(self):
+ return [self.address_index, self.address_tag,
+ self.data_wdata, self.data_req,
+ self.data_we, self.data_be, self.data_size,
+ self.kill_req, self.tag_valid,
+ ]
+class DCacheReqO:
+ def __init__(self):
+ self.data_gnt = Signal()
+ self.data_rvalid = Signal()
+ self.data_rdata = Signal(64) # actually in PTE object format
+ def eq(self, inp):
+ res = []
+ for (o, i) in zip(self.ports(), inp.ports()):
+ res.append(o.eq(i))
+ return res
+ def ports(self):
+ return [self.data_gnt, self.data_rvalid, self.data_rdata]
+class PTE: #(RecordObject):
+ def __init__(self):
+ self.v = Signal()
+ self.r = Signal()
+ self.w = Signal()
+ self.x = Signal()
+ self.u = Signal()
+ self.g = Signal()
+ self.a = Signal()
+ self.d = Signal()
+ self.rsw = Signal(2)
+ self.ppn = Signal(44)
+ self.reserved = Signal(10)
+ def flatten(self):
+ return Cat(*self.ports())
+ def eq(self, x):
+ if isinstance(x, ArrayProxy):
+ res = []
+ for o in self.ports():
+ i = getattr(x, o.name)
+ res.append(i)
+ x = Cat(*res)
+ else:
+ x = x.flatten()
+ return self.flatten().eq(x)
+ def __iter__(self):
+ """ order is critical so that flatten creates LSB to MSB
+ """
+ yield self.v
+ yield self.r
+ yield self.w
+ yield self.x
+ yield self.u
+ yield self.g
+ yield self.a
+ yield self.d
+ yield self.rsw
+ yield self.ppn
+ yield self.reserved
+ def ports(self):
+ return list(self)
+class TLBUpdate:
+ def __init__(self, asid_width):
+ self.valid = Signal() # valid flag
+ self.is_2M = Signal()
+ self.is_1G = Signal()
+ self.is_512G = Signal()
+ self.vpn = Signal(36)
+ self.asid = Signal(asid_width)
+ self.content = PTE()
+ def flatten(self):
+ return Cat(*self.ports())
+ def eq(self, x):
+ return self.flatten().eq(x.flatten())
+ def ports(self):
+ return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \
+ self.content.ports()
+# SV48 defines four levels of page tables
+LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1
+LVL2 = Const(1, 2)
+LVL3 = Const(2, 2)
+LVL4 = Const(3, 2)
+class PTW(Elaboratable):
+ def __init__(self, asid_width=8):
+ self.asid_width = asid_width
+ self.flush_i = Signal() # flush everything, we need to do this because
+ # actually everything we do is speculative at this stage
+ # e.g.: there could be a CSR instruction that changes everything
+ self.ptw_active_o = Signal(reset=1) # active if not IDLE
+ self.walking_instr_o = Signal() # set when walking for TLB
+ self.ptw_error_o = Signal() # set when an error occurred
+ self.enable_translation_i = Signal() # CSRs indicate to enable SV48
+ self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st
+ self.lsu_is_store_i = Signal() # translation triggered by store
+ # PTW memory interface
+ self.req_port_i = DCacheReqO()
+ self.req_port_o = DCacheReqI()
+ # to TLBs, update logic
+ self.itlb_update_o = TLBUpdate(asid_width)
+ self.dtlb_update_o = TLBUpdate(asid_width)
+ self.update_vaddr_o = Signal(48)
+ self.asid_i = Signal(self.asid_width)
+ # from TLBs
+ # did we miss?
+ self.itlb_access_i = Signal()
+ self.itlb_hit_i = Signal()
+ self.itlb_vaddr_i = Signal(64)
+ self.dtlb_access_i = Signal()
+ self.dtlb_hit_i = Signal()
+ self.dtlb_vaddr_i = Signal(64)
+ # from CSR file
+ self.satp_ppn_i = Signal(44) # ppn from satp
+ self.mxr_i = Signal()
+ # Performance counters
+ self.itlb_miss_o = Signal()
+ self.dtlb_miss_o = Signal()
+ def ports(self):
+ return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o,
+ ]
+ return [
+ self.enable_translation_i, self.en_ld_st_translation_i,
+ self.lsu_is_store_i, self.req_port_i, self.req_port_o,
+ self.update_vaddr_o,
+ self.asid_i,
+ self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i,
+ self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i,
+ self.satp_ppn_i, self.mxr_i,
+ self.itlb_miss_o, self.dtlb_miss_o
+ ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports()
+ def elaborate(self, platform):
+ m = Module()
+ # input registers
+ data_rvalid = Signal()
+ data_rdata = Signal(64)
+ # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata
+ # is spec'd in 64-bit binary-format: better to spec as Record?
+ pte = PTE()
+ m.d.comb += pte.flatten().eq(data_rdata)
+ # SV48 defines four levels of page tables
+ ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above)
+ ptw_lvl1 = Signal()
+ ptw_lvl2 = Signal()
+ ptw_lvl3 = Signal()
+ ptw_lvl4 = Signal()
+ m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1),
+ ptw_lvl2.eq(ptw_lvl == LVL2),
+ ptw_lvl3.eq(ptw_lvl == LVL3),
+ ptw_lvl4.eq(ptw_lvl == LVL4)
+ ]
+ # is this an instruction page table walk?
+ is_instr_ptw = Signal()
+ global_mapping = Signal()
+ # latched tag signal
+ tag_valid = Signal()
+ # register the ASID
+ tlb_update_asid = Signal(self.asid_width)
+ # register VPN we need to walk, SV48 defines a 48 bit virtual addr
+ vaddr = Signal(64)
+ # 4 byte aligned physical pointer
+ ptw_pptr = Signal(56)
+ m.d.sync += [
+ # Assignments
+ self.update_vaddr_o.eq(vaddr),
+ self.walking_instr_o.eq(is_instr_ptw),
+ # directly output the correct physical address
+ self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]),
+ self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]),
+ # we are never going to kill this request
+ self.req_port_o.kill_req.eq(0), # XXX assign comb?
+ # we are never going to write with the HPTW
+ self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb?
+ # -----------
+ # TLB Update
+ # -----------
+ self.itlb_update_o.vpn.eq(vaddr[12:48]),
+ self.dtlb_update_o.vpn.eq(vaddr[12:48]),
+ # update the correct page table level
+ self.itlb_update_o.is_2M.eq(ptw_lvl3),
+ self.itlb_update_o.is_1G.eq(ptw_lvl2),
+ self.itlb_update_o.is_512G.eq(ptw_lvl1),
+ self.dtlb_update_o.is_2M.eq(ptw_lvl3),
+ self.dtlb_update_o.is_1G.eq(ptw_lvl2),
+ self.dtlb_update_o.is_512G.eq(ptw_lvl1),
+ # output the correct ASID
+ self.itlb_update_o.asid.eq(tlb_update_asid),
+ self.dtlb_update_o.asid.eq(tlb_update_asid),
+ # set the global mapping bit
+ self.itlb_update_o.content.eq(pte),
+ self.itlb_update_o.content.g.eq(global_mapping),
+ self.dtlb_update_o.content.eq(pte),
+ self.dtlb_update_o.content.g.eq(global_mapping),
+ self.req_port_o.tag_valid.eq(tag_valid),
+ ]
+ #-------------------
+ # Page table walker #needs update
+ #-------------------
+ # A virtual address va is translated into a physical address pa as
+ # follows:
+ # 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv48,
+ # PAGESIZE=2^12 and LEVELS=4.)
+ # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE.
+ # (For Sv32, PTESIZE=4.)
+ # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an
+ # access exception.
+ # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to
+ # step 5. Otherwise, this PTE is a pointer to the next level of
+ # the page table.
+ # Let i=i-1. If i < 0, stop and raise an access exception.
+ # Otherwise, let a = pte.ppn × PAGESIZE and go to step 2.
+ # 5. A leaf PTE has been found. Determine if the requested memory
+ # access is allowed by the pte.r, pte.w, and pte.x bits. If not,
+ # stop and raise an access exception. Otherwise, the translation is
+ # successful. Set pte.a to 1, and, if the memory access is a
+ # store, set pte.d to 1.
+ # The translated physical address is given as follows:
+ # - pa.pgoff = va.pgoff.
+ # - If i > 0, then this is a superpage translation and
+ # pa.ppn[i-1:0] = va.vpn[i-1:0].
+ # - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i].
+ # 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned
+ # superpage stop and raise a page-fault exception.
+ m.d.sync += tag_valid.eq(0)
+ # default assignments
+ m.d.comb += [
+ # PTW memory interface
+ self.req_port_o.data_req.eq(0),
+ self.req_port_o.data_be.eq(Const(0xFF, 8)),
+ self.req_port_o.data_size.eq(Const(0b11, 2)),
+ self.req_port_o.data_we.eq(0),
+ self.ptw_error_o.eq(0),
+ self.itlb_update_o.valid.eq(0),
+ self.dtlb_update_o.valid.eq(0),
+ self.itlb_miss_o.eq(0),
+ self.dtlb_miss_o.eq(0),
+ ]
+ # ------------
+ # State Machine
+ # ------------
+ with m.FSM() as fsm:
+ with m.State("IDLE"):
+ self.idle(m, is_instr_ptw, ptw_lvl, global_mapping,
+ ptw_pptr, vaddr, tlb_update_asid)
+ with m.State("WAIT_GRANT"):
+ self.grant(m, tag_valid, data_rvalid)
+ with m.State("PTE_LOOKUP"):
+ # we wait for the valid signal
+ with m.If(data_rvalid):
+ self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, ptw_lvl4,
+ data_rvalid, global_mapping,
+ is_instr_ptw, ptw_pptr)
+ # Propagate error to MMU/LSU
+ with m.State("PROPAGATE_ERROR"):
+ m.next = "IDLE"
+ m.d.comb += self.ptw_error_o.eq(1)
+ # wait for the rvalid before going back to IDLE
+ with m.State("WAIT_RVALID"):
+ with m.If(data_rvalid):
+ m.next = "IDLE"
+ m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata),
+ data_rvalid.eq(self.req_port_i.data_rvalid)
+ ]
+ return m
+ def set_grant_state(self, m):
+ # should we have flushed before we got an rvalid,
+ # wait for it until going back to IDLE
+ with m.If(self.flush_i):
+ with m.If (self.req_port_i.data_gnt):
+ m.next = "WAIT_RVALID"
+ with m.Else():
+ m.next = "IDLE"
+ with m.Else():
+ m.next = "WAIT_GRANT"
+ def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping,
+ ptw_pptr, vaddr, tlb_update_asid):
+ # by default we start with the top-most page table
+ m.d.sync += [is_instr_ptw.eq(0),
+ ptw_lvl.eq(LVL1),
+ global_mapping.eq(0),
+ self.ptw_active_o.eq(0), # deactive (IDLE)
+ ]
+ # work out itlb/dtlb miss
+ m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \
+ self.itlb_access_i & \
+ ~self.itlb_hit_i & \
+ ~self.dtlb_access_i)
+ m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \
+ self.dtlb_access_i & \
+ ~self.dtlb_hit_i)
+ # we got an ITLB miss?
+ with m.If(self.itlb_miss_o):
+ pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:48],
+ self.satp_ppn_i)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ is_instr_ptw.eq(1),
+ vaddr.eq(self.itlb_vaddr_i),
+ tlb_update_asid.eq(self.asid_i),
+ ]
+ self.set_grant_state(m)
+ # we got a DTLB miss?
+ with m.Elif(self.dtlb_miss_o):
+ pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:48],
+ self.satp_ppn_i)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ vaddr.eq(self.dtlb_vaddr_i),
+ tlb_update_asid.eq(self.asid_i),
+ ]
+ self.set_grant_state(m)
+ def grant(self, m, tag_valid, data_rvalid):
+ # we've got a data WAIT_GRANT so tell the
+ # cache that the tag is valid
+ # send a request out
+ m.d.comb += self.req_port_o.data_req.eq(1)
+ # wait for the WAIT_GRANT
+ with m.If(self.req_port_i.data_gnt):
+ # send the tag valid signal one cycle later
+ m.d.sync += tag_valid.eq(1)
+ # should we have flushed before we got an rvalid,
+ # wait for it until going back to IDLE
+ with m.If(self.flush_i):
+ with m.If (~data_rvalid):
+ m.next = "WAIT_RVALID"
+ with m.Else():
+ m.next = "IDLE"
+ with m.Else():
+ m.next = "PTE_LOOKUP"
+ def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, ptw_lvl4,
+ data_rvalid, global_mapping,
+ is_instr_ptw, ptw_pptr):
+ # temporaries
+ pte_rx = Signal(reset_less=True)
+ pte_exe = Signal(reset_less=True)
+ pte_inv = Signal(reset_less=True)
+ pte_a = Signal(reset_less=True)
+ st_wd = Signal(reset_less=True)
+ m.d.comb += [pte_rx.eq(pte.r | pte.x),
+ pte_exe.eq(~pte.x | ~pte.a),
+ pte_inv.eq(~pte.v | (~pte.r & pte.w)),
+ pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))),
+ st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))]
+ l1err = Signal(reset_less=True)
+ l2err = Signal(reset_less=True)
+ l3err = Signal(reset_less=True)
+ m.d.comb += [l3err.eq((ptw_lvl3) & pte.ppn[0:9] != Const(0,0)),
+ l2err.eq((ptw_lvl2) & pte.ppn[0:18] != Const(0, 18)),
+ l1err.eq((ptw_lvl1) & pte.ppn[0:27] != Const(0, 27))]
+ # check if the global mapping bit is set
+ with m.If (pte.g):
+ m.d.sync += global_mapping.eq(1)
+ m.next = "IDLE"
+ # -------------
+ # Invalid PTE
+ # -------------
+ # If pte.v = 0, or if pte.r = 0 and pte.w = 1,
+ # stop and raise a page-fault exception.
+ with m.If (pte_inv):
+ m.next = "PROPAGATE_ERROR"
+ # -----------
+ # Valid PTE
+ # -----------
+ # it is a valid PTE
+ # if pte.r = 1 or pte.x = 1 it is a valid PTE
+ with m.Elif (pte_rx):
+ # Valid translation found (either 1G, 2M or 4K)
+ with m.If(is_instr_ptw):
+ # ------------
+ # Update ITLB
+ # ------------
+ # If page not executable, we can directly raise error.
+ # This doesn't put a useless entry into the TLB.
+ # The same idea applies to the access flag since we let
+ # the access flag be managed by SW.
+ with m.If (pte_exe):
+ m.next = "IDLE"
+ with m.Else():
+ m.d.comb += self.itlb_update_o.valid.eq(1)
+ with m.Else():
+ # ------------
+ # Update DTLB
+ # ------------
+ # Check if the access flag has been set, otherwise
+ # throw page-fault and let software handle those bits.
+ # If page not readable (there are no write-only pages)
+ # directly raise an error. This doesn't put a useless
+ # entry into the TLB.
+ with m.If(pte_a):
+ m.d.comb += self.dtlb_update_o.valid.eq(1)
+ with m.Else():
+ m.next = "PROPAGATE_ERROR"
+ # Request is a store: perform additional checks
+ # If the request was a store and the page not
+ # write-able, raise an error
+ # the same applies if the dirty flag is not set
+ with m.If (st_wd):
+ m.d.comb += self.dtlb_update_o.valid.eq(0)
+ m.next = "PROPAGATE_ERROR"
+ # check if the ppn is correctly aligned: Case (6)
+ with m.If(l1err | l2err | l3err):
+ m.next = "PROPAGATE_ERROR"
+ m.d.comb += [self.dtlb_update_o.valid.eq(0),
+ self.itlb_update_o.valid.eq(0)]
+ # this is a pointer to the next TLB level
+ with m.Else():
+ # pointer to next level of page table
+ with m.If (ptw_lvl1):
+ # we are in the second level now
+ pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39], pte.ppn)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ ptw_lvl.eq(LVL2)
+ ]
+ with m.If(ptw_lvl2):
+ # here we received a pointer to the third level
+ pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ ptw_lvl.eq(LVL3)
+ ]
+ with m.If(ptw_lvl3): #guess: shift page levels by one
+ # here we received a pointer to the fourth level
+ # the last one is near the page offset
+ pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn)
+ m.d.sync += [ptw_pptr.eq(pptr),
+ ptw_lvl.eq(LVL4)
+ ]
+ self.set_grant_state(m)
+ with m.If (ptw_lvl4):
+ # Should already be the last level
+ # page table => Error
+ m.d.sync += ptw_lvl.eq(LVL4)
+ m.next = "PROPAGATE_ERROR"
+if __name__ == '__main__':
+ ptw = PTW()
+ vl = rtlil.convert(ptw, ports=ptw.ports())
+ with open("test_ptw.il", "w") as f:
+ f.write(vl)
--- /dev/null
+import sys
+from TLB.ariane.plru import PLRU
+from nmigen.compat.sim import run_simulation
+def tbench(dut):
+ yield
+if __name__ == "__main__":
+ dut = PLRU(4)
+ run_simulation(dut, tbench(dut), vcd_name="test_plru.vcd")
+ print("PLRU Unit Test Success")
--- /dev/null
+import sys
+from nmigen.compat.sim import run_simulation
+from TLB.ariane.ptw import PTW, PTE
+# unit was changed, test needs to be changed
+def tbench(dut):
+ addr = 0x8000000
+ #pte = PTE()
+ #yield pte.v.eq(1)
+ #yield pte.r.eq(1)
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield dut.req_port_i.data_rvalid.eq(1)
+ yield dut.req_port_i.data_rdata.eq(0x43)#pte.flatten())
+ # data lookup
+ yield dut.en_ld_st_translation_i.eq(1)
+ yield dut.asid_i.eq(1)
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x400000000)
+ yield
+ yield
+ yield
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x200000)
+ yield
+ yield
+ yield
+ yield dut.req_port_i.data_gnt.eq(0)
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x400000011)
+ yield
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield
+ yield
+ # data lookup, PTW levels 1-2-3
+ addr = 0x4000000
+ yield dut.dtlb_vaddr_i.eq(addr)
+ yield dut.mxr_i.eq(0x1)
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield dut.req_port_i.data_rvalid.eq(1)
+ yield dut.req_port_i.data_rdata.eq(0x41 | (addr>>12)<<10)#pte.flatten())
+ yield dut.en_ld_st_translation_i.eq(1)
+ yield dut.asid_i.eq(1)
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(addr)
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield dut.req_port_i.data_gnt.eq(0)
+ yield dut.dtlb_access_i.eq(1)
+ yield dut.dtlb_hit_i.eq(0)
+ yield dut.dtlb_vaddr_i.eq(0x400000011)
+ yield
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield
+ yield
+ yield
+ yield
+ # instruction lookup
+ yield dut.en_ld_st_translation_i.eq(0)
+ yield dut.enable_translation_i.eq(1)
+ yield dut.asid_i.eq(1)
+ yield dut.itlb_access_i.eq(1)
+ yield dut.itlb_hit_i.eq(0)
+ yield dut.itlb_vaddr_i.eq(0x800000)
+ yield
+ yield
+ yield
+ yield dut.itlb_access_i.eq(1)
+ yield dut.itlb_hit_i.eq(0)
+ yield dut.itlb_vaddr_i.eq(0x200000)
+ yield
+ yield
+ yield
+ yield dut.req_port_i.data_gnt.eq(0)
+ yield dut.itlb_access_i.eq(1)
+ yield dut.itlb_hit_i.eq(0)
+ yield dut.itlb_vaddr_i.eq(0x800011)
+ yield
+ yield dut.req_port_i.data_gnt.eq(1)
+ yield
+ yield
+ yield
+def test_ptw():
+ dut = PTW()
+ run_simulation(dut, tbench(dut), vcd_name="test_ptw.vcd")
+ print("PTW Unit Test Success")
+if __name__ == "__main__":
+ test_ptw()
--- /dev/null
+import sys
+from nmigen.compat.sim import run_simulation
+from TLB.ariane.tlb import TLB
+def set_vaddr(addr):
+ yield dut.lu_vaddr_i.eq(addr)
+ yield dut.update_i.vpn.eq(addr>>12)
+def tbench(dut):
+ yield dut.lu_access_i.eq(1)
+ yield dut.lu_asid_i.eq(1)
+ yield dut.update_i.valid.eq(1)
+ yield dut.update_i.is_1G.eq(0)
+ yield dut.update_i.is_2M.eq(0)
+ yield dut.update_i.asid.eq(1)
+ yield dut.update_i.content.ppn.eq(0)
+ yield dut.update_i.content.rsw.eq(0)
+ yield dut.update_i.content.r.eq(1)
+ yield
+ addr = 0x80000
+ yield from set_vaddr(addr)
+ yield
+ addr = 0x90001
+ yield from set_vaddr(addr)
+ yield
+ addr = 0x28000000
+ yield from set_vaddr(addr)
+ yield
+ addr = 0x28000001
+ yield from set_vaddr(addr)
+ addr = 0x28000001
+ yield from set_vaddr(addr)
+ yield
+ addr = 0x1000040000
+ yield from set_vaddr(addr)
+ yield
+ addr = 0x1000040001
+ yield from set_vaddr(addr)
+ yield
+ yield dut.update_i.is_1G.eq(1)
+ addr = 0x2040000
+ yield from set_vaddr(addr)
+ yield
+ yield dut.update_i.is_1G.eq(1)
+ addr = 0x2040001
+ yield from set_vaddr(addr)
+ yield
+ yield
+if __name__ == "__main__":
+ dut = TLB()
+ run_simulation(dut, tbench(dut), vcd_name="test_tlb.vcd")
+ print("TLB Unit Test Success")
--- /dev/null
+import sys
+from nmigen.compat.sim import run_simulation
+from TLB.ariane.tlb_content import TLBContent
+from TestUtil.test_helper import assert_op, assert_eq
+def update(dut,a,t,g,m):
+ yield dut.replace_en_i.eq(1)
+ yield dut.update_i.valid.eq(1)
+ yield dut.update_i.is_512G.eq(t)
+ yield dut.update_i.is_1G.eq(g)
+ yield dut.update_i.is_2M.eq(m)
+ yield dut.update_i.vpn.eq(a)
+ yield
+ yield
+def check_hit(dut,hit,pagesize):
+ hit_d = yield dut.lu_hit_o
+ assert_eq("hit", hit_d, hit)
+ if(hit):
+ if(pagesize=="t"):
+ hitp = yield dut.lu_is_512G_o
+ assert_eq("lu_is_512G_o", hitp, 1)
+ elif(pagesize=="g"):
+ hitp = yield dut.lu_is_1G_o
+ assert_eq("lu_is_1G_o", hitp, 1)
+ elif(pagesize=="m"):
+ hitp = yield dut.lu_is_2M_o
+ assert_eq("lu_is_2M_o", hitp, 1)
+def addr(a,b,c,d):
+ return a | b << 9 | c << 18 | d << 27
+def tbench(dut):
+ yield dut.vpn0.eq(0x0A)
+ yield dut.vpn1.eq(0x0B)
+ yield dut.vpn2.eq(0x0C)
+ yield dut.vpn3.eq(0x0D)
+ yield from update(dut,addr(0xFF,0xFF,0xFF,0x0D),1,0,0)
+ yield from check_hit(dut,1,"t")
+ yield from update(dut,addr(0xFF,0xFF,0x0C,0x0D),0,1,0)
+ yield from check_hit(dut,1,"g")
+ yield from update(dut,addr(0xFF,0x0B,0x0C,0x0D),0,0,1)
+ yield from check_hit(dut,1,"m")
+ yield from update(dut,addr(0x0A,0x0B,0x0C,0x0D),0,0,0)
+ yield from check_hit(dut,1,"")
+ yield from update(dut,addr(0xAA,0xBB,0xCC,0xDD),0,0,0)
+ yield from check_hit(dut,0,"miss")
+if __name__ == "__main__":
+ dut = TLBContent(4,4)
+ #
+ run_simulation(dut, tbench(dut), vcd_name="test_tlb_content.vcd")
+ print("TLBContent Unit Test Success")
--- /dev/null
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+# Author: David Schaffenrath, TU Graz
+# Author: Florian Zaruba, ETH Zurich
+# Date: 21.4.2017
+# Description: Translation Lookaside Buffer, SV48
+# fully set-associative
+Implementation in c++:
+Text description:
+Online simulator:
+from math import log2
+from nmigen import Signal, Module, Cat, Const, Array, Elaboratable
+from nmigen.cli import verilog, rtlil
+from nmigen.lib.coding import Encoder
+from TLB.ariane.ptw import TLBUpdate, PTE, ASID_WIDTH
+from TLB.ariane.plru import PLRU
+from TLB.ariane.tlb_content import TLBContent
+class TLB(Elaboratable):
+ def __init__(self, tlb_entries=8, asid_width=8):
+ self.tlb_entries = tlb_entries
+ self.asid_width = asid_width
+ self.flush_i = Signal() # Flush signal
+ # Lookup signals
+ self.lu_access_i = Signal()
+ self.lu_asid_i = Signal(self.asid_width)
+ self.lu_vaddr_i = Signal(64)
+ self.lu_content_o = PTE()
+ self.lu_is_2M_o = Signal()
+ self.lu_is_1G_o = Signal()
+ self.lu_is_512G_o = Signal()
+ self.lu_hit_o = Signal()
+ # Update TLB
+ self.pte_width = len(self.lu_content_o.flatten())
+ self.update_i = TLBUpdate(asid_width)
+ def elaborate(self, platform):
+ m = Module()
+ vpn3 = Signal(9) #FIXME unused signal
+ vpn2 = Signal(9)
+ vpn1 = Signal(9)
+ vpn0 = Signal(9)
+ #-------------
+ # Translation
+ #-------------
+ # SV48 defines four levels of page tables
+ m.d.comb += [ vpn0.eq(self.lu_vaddr_i[12:21]),
+ vpn1.eq(self.lu_vaddr_i[21:30]),
+ vpn2.eq(self.lu_vaddr_i[30:39]),
+ vpn3.eq(self.lu_vaddr_i[39:48]), ### FIXME
+ ]
+ tc = []
+ for i in range(self.tlb_entries):
+ tlc = TLBContent(self.pte_width, self.asid_width)
+ setattr(m.submodules, "tc%d" % i, tlc)
+ tc.append(tlc)
+ # connect inputs
+ tlc.update_i = self.update_i # saves a lot of graphviz links
+ m.d.comb += [tlc.vpn0.eq(vpn0),
+ tlc.vpn1.eq(vpn1),
+ tlc.vpn2.eq(vpn2),
+ # TODO 4th
+ tlc.flush_i.eq(self.flush_i),
+ #tlc.update_i.eq(self.update_i),
+ tlc.lu_asid_i.eq(self.lu_asid_i)]
+ tc = Array(tc)
+ #--------------
+ # Select hit
+ #--------------
+ # use Encoder to select hit index
+ # XXX TODO: assert that there's only one valid entry (one lu_hit)
+ hitsel = Encoder(self.tlb_entries)
+ m.submodules.hitsel = hitsel
+ hits = []
+ for i in range(self.tlb_entries):
+ hits.append(tc[i].lu_hit_o)
+ m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well)
+ idx = hitsel.o
+ active = Signal(reset_less=True)
+ m.d.comb += active.eq(~hitsel.n)
+ with m.If(active):
+ # active hit, send selected as output
+ m.d.comb += [ self.lu_is_512G_o.eq(tc[idx].lu_is_512G_o),
+ self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o),
+ self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o),
+ self.lu_hit_o.eq(1),
+ self.lu_content_o.flatten().eq(tc[idx].lu_content_o),
+ ]
+ #--------------
+ # PLRU.
+ #--------------
+ p = PLRU(self.tlb_entries)
+ plru_tree = Signal(p.TLBSZ)
+ m.submodules.plru = p
+ # connect PLRU inputs/outputs
+ # XXX TODO: assert that there's only one valid entry (one replace_en)
+ en = []
+ for i in range(self.tlb_entries):
+ en.append(tc[i].replace_en_i)
+ m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags
+ p.lu_hit.eq(hitsel.i),
+ p.lu_access_i.eq(self.lu_access_i),
+ p.plru_tree.eq(plru_tree)]
+ m.d.sync += plru_tree.eq(p.plru_tree_o)
+ #--------------
+ # Sanity checks
+ #--------------
+ assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \
+ "TLB size must be a multiple of 2 and greater than 1"
+ assert (self.asid_width >= 1), \
+ "ASID width must be at least 1"
+ return m
+ """
+ # Just for checking
+ function int countSetBits(logic[self.tlb_entries-1:0] vector);
+ automatic int count = 0;
+ foreach (vector[idx]) begin
+ count += vector[idx];
+ end
+ return count;
+ endfunction
+ assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1))
+ else $error("More then one hit in TLB!"); $stop(); end
+ assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1))
+ else $error("More then one TLB entry selected for next replace!");
+ """
+ def ports(self):
+ return [self.flush_i, self.lu_access_i,
+ self.lu_asid_i, self.lu_vaddr_i,
+ self.lu_is_2M_o, self.lu_1G_o, self.lu_is_512G_o, self.lu_hit_o
+ ] + self.lu_content_o.ports() + self.update_i.ports()
+if __name__ == '__main__':
+ tlb = TLB()
+ vl = rtlil.convert(tlb, ports=tlb.ports())
+ with open("test_tlb.il", "w") as f:
+ f.write(vl)
--- /dev/null
+from nmigen import Signal, Module, Cat, Const, Elaboratable
+from TLB.ariane.ptw import TLBUpdate, PTE
+class TLBEntry:
+ def __init__(self, asid_width):
+ self.asid = Signal(asid_width,name="ent_asid")
+ # SV48 defines four levels of page tables
+ self.vpn0 = Signal(9,name="ent_vpn0")
+ self.vpn1 = Signal(9,name="ent_vpn1")
+ self.vpn2 = Signal(9,name="ent_vpn2")
+ self.vpn3 = Signal(9,name="ent_vpn3")
+ self.is_2M = Signal(name="ent_is_2M")
+ self.is_1G = Signal(name="ent_is_1G")
+ self.is_512G = Signal(name="ent_is_512G")
+ self.valid = Signal(name="ent_valid")
+ def flatten(self):
+ return Cat(*self.ports())
+ def eq(self, x):
+ return self.flatten().eq(x.flatten())
+ def ports(self):
+ return [self.asid, self.vpn0, self.vpn1, self.vpn2,
+ self.is_2M, self.is_1G, self.valid]
+class TLBContent(Elaboratable):
+ def __init__(self, pte_width, asid_width):
+ self.asid_width = asid_width
+ self.pte_width = pte_width
+ self.flush_i = Signal() # Flush signal
+ # Update TLB
+ self.update_i = TLBUpdate(asid_width)
+ self.vpn3 = Signal(9)
+ self.vpn2 = Signal(9)
+ self.vpn1 = Signal(9)
+ self.vpn0 = Signal(9)
+ self.replace_en_i = Signal() # replace the following entry,
+ # set by replacement strategy
+ # Lookup signals
+ self.lu_asid_i = Signal(asid_width)
+ self.lu_content_o = Signal(pte_width)
+ self.lu_is_512G_o = Signal()
+ self.lu_is_2M_o = Signal()
+ self.lu_is_1G_o = Signal()
+ self.lu_hit_o = Signal()
+ def elaborate(self, platform):
+ m = Module()
+ tags = TLBEntry(self.asid_width)
+ content = Signal(self.pte_width)
+ m.d.comb += [self.lu_hit_o.eq(0),
+ self.lu_is_512G_o.eq(0),
+ self.lu_is_2M_o.eq(0),
+ self.lu_is_1G_o.eq(0)]
+ # temporaries for lookup
+ asid_ok = Signal(reset_less=True)
+ # tags_ok = Signal(reset_less=True)
+ vpn3_ok = Signal(reset_less=True)
+ vpn2_ok = Signal(reset_less=True)
+ vpn1_ok = Signal(reset_less=True)
+ vpn0_ok = Signal(reset_less=True)
+ #tags_2M = Signal(reset_less=True)
+ vpn0_or_2M = Signal(reset_less=True)
+ m.d.comb += [
+ #compare asid and vpn*
+ asid_ok.eq(tags.asid == self.lu_asid_i),
+ vpn3_ok.eq(tags.vpn3 == self.vpn3),
+ vpn2_ok.eq(tags.vpn2 == self.vpn2),
+ vpn1_ok.eq(tags.vpn1 == self.vpn1),
+ vpn0_ok.eq(tags.vpn0 == self.vpn0),
+ vpn0_or_2M.eq(tags.is_2M | vpn0_ok)
+ ]
+ with m.If(asid_ok & tags.valid):
+ # first level, only vpn3 needs to match
+ with m.If (tags.is_512G & vpn3_ok):
+ m.d.comb += [ self.lu_content_o.eq(content),
+ self.lu_is_512G_o.eq(1),
+ self.lu_hit_o.eq(1),
+ ]
+ # second level , second level vpn2 and vpn3 need to match
+ with m.Elif (tags.is_1G & vpn2_ok & vpn3_ok):
+ m.d.comb += [ self.lu_content_o.eq(content),
+ self.lu_is_1G_o.eq(1),
+ self.lu_hit_o.eq(1),
+ ]
+ # not a giga page hit nor a tera page hit so check further
+ with m.Elif(vpn1_ok):
+ # this could be a 2 mega page hit or a 4 kB hit
+ # output accordingly
+ with m.If(vpn0_or_2M):
+ m.d.comb += [ self.lu_content_o.eq(content),
+ self.lu_is_2M_o.eq(tags.is_2M),
+ self.lu_hit_o.eq(1),
+ ]
+ # ------------------
+ # Update or Flush
+ # ------------------
+ # temporaries
+ replace_valid = Signal(reset_less=True)
+ m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i)
+ # flush
+ with m.If (self.flush_i):
+ # invalidate (flush) conditions: all if zero or just this ASID
+ with m.If (self.lu_asid_i == Const(0, self.asid_width) |
+ (self.lu_asid_i == tags.asid)):
+ m.d.sync += tags.valid.eq(0)
+ # normal replacement
+ with m.Elif(replace_valid):
+ m.d.sync += [ # update tag array
+ tags.asid.eq(self.update_i.asid),
+ tags.vpn3.eq(self.update_i.vpn[27:36]),
+ tags.vpn2.eq(self.update_i.vpn[18:27]),
+ tags.vpn1.eq(self.update_i.vpn[9:18]),
+ tags.vpn0.eq(self.update_i.vpn[0:9]),
+ tags.is_512G.eq(self.update_i.is_512G),
+ tags.is_1G.eq(self.update_i.is_1G),
+ tags.is_2M.eq(self.update_i.is_2M),
+ tags.valid.eq(1),
+ # and content as well
+ content.eq(self.update_i.content.flatten())
+ ]
+ return m
+ def ports(self):
+ return [self.flush_i,
+ self.lu_asid_i,
+ self.lu_is_2M_o, self.lu_is_1G_o,self.lu_is_512G_o, self.lu_hit_o,
+ ] + self.update_i.content.ports() + self.update_i.ports()
--- /dev/null
+# SPDX-License-Identifier: LGPL-2.1-or-later
+# See Notices.txt for copyright information
+from TLB.LFSR import LFSR, LFSRPolynomial, LFSR_POLY_3
+from nmigen.back.pysim import Simulator, Delay, Tick
+import unittest
+class TestLFSR(unittest.TestCase):
+ def test_poly(self):
+ v = LFSRPolynomial()
+ self.assertEqual(repr(v), "LFSRPolynomial([0])")
+ self.assertEqual(str(v), "1")
+ v = LFSRPolynomial([1])
+ self.assertEqual(repr(v), "LFSRPolynomial([1, 0])")
+ self.assertEqual(str(v), "x + 1")
+ v = LFSRPolynomial([0, 1])
+ self.assertEqual(repr(v), "LFSRPolynomial([1, 0])")
+ self.assertEqual(str(v), "x + 1")
+ v = LFSRPolynomial([1, 2])
+ self.assertEqual(repr(v), "LFSRPolynomial([2, 1, 0])")
+ self.assertEqual(str(v), "x^2 + x + 1")
+ v = LFSRPolynomial([2])
+ self.assertEqual(repr(v), "LFSRPolynomial([2, 0])")
+ self.assertEqual(str(v), "x^2 + 1")
+ self.assertEqual(str(LFSR_POLY_3), "x^3 + x^2 + 1")
+ def test_lfsr_3(self):
+ module = LFSR(LFSR_POLY_3)
+ traces = [module.state, module.enable]
+ with Simulator(module,
+ vcd_file=open("Waveforms/test_LFSR2.vcd", "w"),
+ gtkw_file=open("Waveforms/test_LFSR2.gtkw", "w"),
+ traces=traces) as sim:
+ sim.add_clock(1e-6, 0.25e-6)
+ delay = Delay(1e-7)
+ def async_process():
+ yield module.enable.eq(0)
+ yield Tick()
+ self.assertEqual((yield module.state), 0x1)
+ yield Tick()
+ self.assertEqual((yield module.state), 0x1)
+ yield module.enable.eq(1)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x2)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x5)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x3)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x7)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x6)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x4)
+ yield Tick()
+ yield delay
+ self.assertEqual((yield module.state), 0x1)
+ yield Tick()
+ sim.add_process(async_process)
+ sim.run()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from TLB.AddressEncoder import AddressEncoder
+from TestUtil.test_helper import assert_eq, assert_ne, assert_op
+# This function allows for the easy setting of values to the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# i (Input): The array of single bits to be written
+def set_encoder(dut, i):
+ yield dut.i.eq(i)
+ yield
+# Checks the single match of the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# sm (Single Match): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+def check_single_match(dut, sm, op):
+ out_sm = yield dut.single_match
+ assert_op("Single Match", out_sm, sm, op)
+# Checks the multiple match of the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# mm (Multiple Match): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+def check_multiple_match(dut, mm, op):
+ out_mm = yield dut.multiple_match
+ assert_op("Multiple Match", out_mm, mm, op)
+# Checks the output of the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# o (Output): The expected output
+# op (Operation): (0 => ==), (1 => !=)
+def check_output(dut, o, op):
+ out_o = yield dut.o
+ assert_op("Output", out_o, o, op)
+# Checks the state of the AddressEncoder
+# Arguments:
+# dut: The AddressEncoder being tested
+# sm (Single Match): The expected match result
+# mm (Multiple Match): The expected match result
+# o (Output): The expected output
+# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+# o_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+def check_all(dut, sm, mm, o, sm_op, mm_op, o_op):
+ yield from check_single_match(dut, sm, sm_op)
+ yield from check_multiple_match(dut, mm, mm_op)
+ yield from check_output(dut, o, o_op)
+def tbench(dut):
+ # Check invalid input
+ in_val = 0b000
+ single_match = 0
+ multiple_match = 0
+ output = 0
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+ # Check single bit
+ in_val = 0b001
+ single_match = 1
+ multiple_match = 0
+ output = 0
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+ # Check another single bit
+ in_val = 0b100
+ single_match = 1
+ multiple_match = 0
+ output = 2
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+ # Check multiple match
+ # We expected the lowest bit to be returned which is address 0
+ in_val = 0b101
+ single_match = 0
+ multiple_match = 1
+ output = 0
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+ # Check another multiple match
+ # We expected the lowest bit to be returned which is address 1
+ in_val = 0b110
+ single_match = 0
+ multiple_match = 1
+ output = 1
+ yield from set_encoder(dut, in_val)
+ yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0)
+def test_addr():
+ dut = AddressEncoder(4)
+ run_simulation(dut, tbench(dut),
+ vcd_name="Waveforms/test_address_encoder.vcd")
+ print("AddressEncoder Unit Test Success")
+if __name__ == "__main__":
+ test_addr()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from TLB.Cam import Cam
+from TestUtil.test_helper import assert_eq, assert_ne, assert_op
+# This function allows for the easy setting of values to the Cam
+# Arguments:
+# dut: The Cam being tested
+# e (Enable): Whether the block is going to be enabled
+# we (Write Enable): Whether the Cam will write on the next cycle
+# a (Address): Where the data will be written if write enable is high
+# d (Data): Either what we are looking for or will write to the address
+def set_cam(dut, e, we, a, d):
+ yield dut.enable.eq(e)
+ yield dut.write_enable.eq(we)
+ yield dut.address_in.eq(a)
+ yield dut.data_in.eq(d)
+ yield
+# Checks the multiple match of the Cam
+# Arguments:
+# dut: The Cam being tested
+# mm (Multiple Match): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+def check_multiple_match(dut, mm, op):
+ out_mm = yield dut.multiple_match
+ assert_op("Multiple Match", out_mm, mm, op)
+# Checks the single match of the Cam
+# Arguments:
+# dut: The Cam being tested
+# sm (Single Match): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+def check_single_match(dut, sm, op):
+ out_sm = yield dut.single_match
+ assert_op("Single Match", out_sm, sm, op)
+# Checks the address output of the Cam
+# Arguments:
+# dut: The Cam being tested
+# ma (Match Address): The expected match result
+# op (Operation): (0 => ==), (1 => !=)
+def check_match_address(dut, ma, op):
+ out_ma = yield dut.match_address
+ assert_op("Match Address", out_ma, ma, op)
+# Checks the state of the Cam
+# Arguments:
+# dut: The Cam being tested
+# sm (Single Match): The expected match result
+# mm (Multiple Match): The expected match result
+# ma: (Match Address): The expected address output
+# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+# ma_op (Operation): Operation for the address assertion (0 => ==), (1 => !=)
+def check_all(dut, mm, sm, ma, mm_op, sm_op, ma_op):
+ yield from check_multiple_match(dut, mm, mm_op)
+ yield from check_single_match(dut, sm, sm_op)
+ yield from check_match_address(dut, ma, ma_op)
+def tbench(dut):
+ # NA
+ enable = 0
+ write_enable = 0
+ address = 0
+ data = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+ # Read Miss Multiple
+ # Note that the default starting entry data bits are all 0
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 0
+ multiple_match = 1
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_multiple_match(dut, multiple_match, 0)
+ # Read Miss
+ # Note that the default starting entry data bits are all 0
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 1
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+ # Write Entry 0
+ enable = 1
+ write_enable = 1
+ address = 0
+ data = 4
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+ # Read Hit Entry 0
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 4
+ multiple_match = 0
+ single_match = 1
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0)
+ # Search Hit
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 4
+ multiple_match = 0
+ single_match = 1
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0)
+ # Search Miss
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 5
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+ # Multiple Match test
+ # Write Entry 1
+ enable = 1
+ write_enable = 1
+ address = 1
+ data = 5
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+ # Write Entry 2
+ # Same data as Entry 1
+ enable = 1
+ write_enable = 1
+ address = 2
+ data = 5
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+ # Read Hit Data 5
+ enable = 1
+ write_enable = 0
+ address = 1
+ data = 5
+ multiple_match = 1
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_all(dut, multiple_match, single_match, address,0,0,0)
+ # Verify read_warning is not caused
+ # Write Entry 0
+ enable = 1
+ write_enable = 1
+ address = 0
+ data = 7
+ multiple_match = 0
+ single_match = 0
+ yield from set_cam(dut, enable, write_enable, address, data)
+ # Note there is no yield we immediately attempt to read in the next cycle
+ # Read Hit Data 7
+ enable = 1
+ write_enable = 0
+ address = 0
+ data = 7
+ multiple_match = 0
+ single_match = 1
+ yield from set_cam(dut, enable, write_enable, address, data)
+ yield
+ yield from check_single_match(dut, single_match, 0)
+ yield
+def test_cam():
+ dut = Cam(4, 4)
+ run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_cam.vcd")
+ print("Cam Unit Test Success")
+if __name__ == "__main__":
+ test_cam()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from TestUtil.test_helper import assert_eq, assert_ne, assert_op
+from TLB.CamEntry import CamEntry
+# This function allows for the easy setting of values to the Cam Entry
+# Arguments:
+# dut: The CamEntry being tested
+# c (command): NA (0), Read (1), Write (2), Reserve (3)
+# d (data): The data to be set
+def set_cam_entry(dut, c, d):
+ # Write desired values
+ yield dut.command.eq(c)
+ yield dut.data_in.eq(d)
+ yield
+ # Reset all lines
+ yield dut.command.eq(0)
+ yield dut.data_in.eq(0)
+ yield
+# Checks the data state of the CAM entry
+# Arguments:
+# dut: The CamEntry being tested
+# d (Data): The expected data
+# op (Operation): (0 => ==), (1 => !=)
+def check_data(dut, d, op):
+ out_d = yield dut.data
+ assert_op("Data", out_d, d, op)
+# Checks the match state of the CAM entry
+# Arguments:
+# dut: The CamEntry being tested
+# m (Match): The expected match
+# op (Operation): (0 => ==), (1 => !=)
+def check_match(dut, m, op):
+ out_m = yield dut.match
+ assert_op("Match", out_m, m, op)
+# Checks the state of the CAM entry
+# Arguments:
+# dut: The CamEntry being tested
+# d (data): The expected data
+# m (match): The expected match
+# d_op (Operation): Operation for the data assertion (0 => ==), (1 => !=)
+# m_op (Operation): Operation for the match assertion (0 => ==), (1 => !=)
+def check_all(dut, d, m, d_op, m_op):
+ yield from check_data(dut, d, d_op)
+ yield from check_match(dut, m, m_op)
+# This tbench goes through the paces of testing the CamEntry module
+# It is done by writing and then reading various combinations of key/data pairs
+# and reading the results with varying keys to verify the resulting stored
+# data is correct.
+def tbench(dut):
+ # Check write
+ command = 2
+ data = 1
+ match = 0
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 0, 0)
+ # Check read miss
+ command = 1
+ data = 2
+ match = 0
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 1, 0)
+ # Check read hit
+ command = 1
+ data = 1
+ match = 1
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 0, 0)
+ # Check overwrite
+ command = 2
+ data = 5
+ match = 0
+ yield from set_cam_entry(dut, command, data)
+ yield
+ yield from check_all(dut, data, match, 0, 0)
+ # Check read hit
+ command = 1
+ data = 5
+ match = 1
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 0, 0)
+ # Check reset
+ command = 3
+ data = 0
+ match = 0
+ yield from set_cam_entry(dut, command, data)
+ yield from check_all(dut, data, match, 0, 0)
+ # Extra clock cycle for waveform
+ yield
+def test_camentry():
+ dut = CamEntry(4)
+ run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_cam_entry.vcd")
+ print("CamEntry Unit Test Success")
+if __name__ == "__main__":
+ test_camentry()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from TLB.PermissionValidator import PermissionValidator
+from TestUtil.test_helper import assert_op
+def set_validator(dut, d, xwr, sm, sa, asid):
+ yield dut.data.eq(d)
+ yield dut.xwr.eq(xwr)
+ yield dut.super_mode.eq(sm)
+ yield dut.super_access.eq(sa)
+ yield dut.asid.eq(asid)
+ yield
+def check_valid(dut, v, op):
+ out_v = yield dut.valid
+ assert_op("Valid", out_v, v, op)
+def tbench(dut):
+ # 80 bits represented. Ignore the MSB as it will be truncated
+ # ASID is bits first 4 hex values (bits 64 - 78)
+ # Test user mode entry valid
+ # Global Bit matching ASID
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000031
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test user mode entry valid
+ # Global Bit nonmatching ASID
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000031
+ # Ignore MSB it will be truncated
+ asid = 0x7FF6
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test user mode entry invalid
+ # Global Bit nonmatching ASID
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000021
+ # Ignore MSB it will be truncated
+ asid = 0x7FF6
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 0
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test user mode entry valid
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000011
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test user mode entry invalid
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000011
+ # Ignore MSB it will be truncated
+ asid = 0x7FF6
+ super_mode = 0
+ super_access = 0
+ xwr = 0
+ valid = 0
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test supervisor mode entry valid
+ # The entry is NOT in user mode
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000001
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 1
+ super_access = 0
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test supervisor mode entry invalid
+ # The entry is in user mode
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000011
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 1
+ super_access = 0
+ xwr = 0
+ valid = 0
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test supervisor mode entry valid
+ # The entry is NOT in user mode with access
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000001
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 1
+ super_access = 1
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+ # Test supervisor mode entry valid
+ # The entry is in user mode with access
+ # Ensure that user mode and valid is enabled!
+ data = 0x7FFF0000000000000011
+ # Ignore MSB it will be truncated
+ asid = 0x7FFF
+ super_mode = 1
+ super_access = 1
+ xwr = 0
+ valid = 1
+ yield from set_validator(dut, data, xwr, super_mode, super_access, asid)
+ yield from check_valid(dut, valid, 0)
+def test_permv():
+ dut = PermissionValidator(15, 64);
+ run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_permission_validator.vcd")
+ print("PermissionValidator Unit Test Success")
+if __name__ == "__main__":
+ test_permv()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from TLB.PteEntry import PteEntry
+from TestUtil.test_helper import assert_op
+def set_entry(dut, i):
+ yield dut.i.eq(i)
+ yield
+def check_dirty(dut, d, op):
+ out_d = yield dut.d
+ assert_op("Dirty", out_d, d, op)
+def check_accessed(dut, a, op):
+ out_a = yield dut.a
+ assert_op("Accessed", out_a, a, op)
+def check_global(dut, o, op):
+ out = yield dut.g
+ assert_op("Global", out, o, op)
+def check_user(dut, o, op):
+ out = yield dut.u
+ assert_op("User Mode", out, o, op)
+def check_xwr(dut, o, op):
+ out = yield dut.xwr
+ assert_op("XWR", out, o, op)
+def check_asid(dut, o, op):
+ out = yield dut.asid
+ assert_op("ASID", out, o, op)
+def check_pte(dut, o, op):
+ out = yield dut.pte
+ assert_op("ASID", out, o, op)
+def check_valid(dut, v, op):
+ out_v = yield dut.v
+ assert_op("Valid", out_v, v, op)
+def check_all(dut, d, a, g, u, xwr, v, asid, pte):
+ yield from check_dirty(dut, d, 0)
+ yield from check_accessed(dut, a, 0)
+ yield from check_global(dut, g, 0)
+ yield from check_user(dut, u, 0)
+ yield from check_xwr(dut, xwr, 0)
+ yield from check_asid(dut, asid, 0)
+ yield from check_pte(dut, pte, 0)
+ yield from check_valid(dut, v, 0)
+def tbench(dut):
+ # 80 bits represented. Ignore the MSB as it will be truncated
+ # ASID is bits first 4 hex values (bits 64 - 78)
+ i = 0x7FFF0000000000000031
+ dirty = 0
+ access = 0
+ glob = 1
+ user = 1
+ xwr = 0
+ valid = 1
+ asid = 0x7FFF
+ pte = 0x0000000000000031
+ yield from set_entry(dut, i)
+ yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
+ i = 0x0FFF00000000000000FF
+ dirty = 1
+ access = 1
+ glob = 1
+ user = 1
+ xwr = 7
+ valid = 1
+ asid = 0x0FFF
+ pte = 0x00000000000000FF
+ yield from set_entry(dut, i)
+ yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
+ i = 0x0721000000001100001F
+ dirty = 0
+ access = 0
+ glob = 0
+ user = 1
+ xwr = 7
+ valid = 1
+ asid = 0x0721
+ pte = 0x000000001100001F
+ yield from set_entry(dut, i)
+ yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte)
+ yield
+def test_pteentry():
+ dut = PteEntry(15, 64);
+ run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_pte_entry.vcd")
+ print("PteEntry Unit Test Success")
+if __name__ == "__main__":
+ test_pteentry()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from TLB.SetAssociativeCache import SetAssociativeCache
+from TestUtil.test_helper import assert_eq, assert_ne, assert_op
+def set_sac(dut, e, c, s, t, d):
+ yield dut.enable.eq(e)
+ yield dut.command.eq(c)
+ yield dut.cset.eq(s)
+ yield dut.tag.eq(t)
+ yield dut.data_i.eq(d)
+ yield
+def tbench(dut):
+ enable = 1
+ command = 2
+ cset = 1
+ tag = 2
+ data = 3
+ yield from set_sac(dut, enable, command, cset, tag, data)
+ yield
+ enable = 1
+ command = 2
+ cset = 1
+ tag = 5
+ data = 8
+ yield from set_sac(dut, enable, command, cset, tag, data)
+ yield
+def test_assoc_cache():
+ dut = SetAssociativeCache(4, 4, 4, 4)
+ run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_set_associative_cache.vcd")
+ print("Set Associative Cache Unit Test Success")
+if __name__ == "__main__":
+ test_assoc_cache()
--- /dev/null
+#import tracemalloc
+from nmigen.compat.sim import run_simulation
+from TLB.TLB import TLB
+from TestUtil.test_helper import assert_op, assert_eq
+#self.supermode = Signal(1) # Supervisor Mode
+#self.super_access = Signal(1) # Supervisor Access
+#self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2
+#self.xwr = Signal(3) # Execute, Write, Read
+#self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64
+#self.address_L1 = Signal(max=L1_size)
+#self.asid = Signal(asid_size) # Address Space IDentifier (ASID)
+#self.vma = Signal(vma_size) # Virtual Memory Address (VMA)
+#self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE)
+#self.hit = Signal(1) # Denotes if the VMA had a mapped PTE
+#self.perm_valid = Signal(1) # Denotes if the permissions are correct
+#self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA
+# Checks the data state of the CAM entry
+# Arguments:
+# dut: The CamEntry being tested
+# d (Data): The expected data
+# op (Operation): (0 => ==), (1 => !=)
+def check_hit(dut, d):
+ hit_d = yield dut.hit
+ #assert_eq("hit", hit_d, d)
+def test_command(dut,cmd,xwr,cycles):
+ yield dut.command.eq(cmd)
+ yield dut.xwr.eq(xwr)
+ for i in range(0,cycles):
+ yield
+def test_write_L1(dut,vma,address_L1,asid,pte_in):
+ yield dut.address_L1.eq(address_L1)
+ yield dut.asid.eq(asid)
+ yield dut.vma.eq(vma)
+ yield dut.pte_in.eq(pte_in)
+ yield from test_command(dut,COMMAND_WRITE_L1,7,2)
+def test_search(dut,vma,found):
+ yield dut.vma.eq(vma)
+ yield from test_command(dut,COMMAND_READ,7,1)
+ yield from check_hit(dut,found)
+def zero(dut):
+ yield dut.supermode.eq(0)
+ yield dut.super_access.eq(0)
+ yield dut.mode.eq(0)
+ yield dut.address_L1.eq(0)
+ yield dut.asid.eq(0)
+ yield dut.vma.eq(0)
+ yield dut.pte_in.eq(0)
+def tbench(dut):
+ yield from zero(dut)
+ yield dut.mode.eq(0xF) # enable TLB
+ #test hit
+ yield from test_write_L1(dut,0xFEEDFACE,0,0xFFFF,0xF0F0)
+ yield from test_search(dut,0xFEEDFACE,1)
+ yield from test_search(dut,0xFACEFEED,0)
+def test_tlb():
+ dut = TLB(15,36,64,8)
+ run_simulation(dut, tbench(dut), vcd_name="Waveforms/test_tlb.vcd")
+ print("TLB Unit Test Success")
+if __name__ == "__main__":
+ test_tlb()
--- /dev/null
+def assert_op(pre, o, e, op):
+ """ Verifies the given values given the particular operand
+ Arguments:
+ p (Prefix): Appended to the front of the assert statement
+ e (Expected): The expected value
+ o (Output): The output result
+ op (Operation): (0 => ==), (1 => !=)
+ """
+ if op == 0:
+ assert_eq(pre, o, e)
+ else:
+ assert_ne(pre, o, e)
+def assert_eq(p, o, e):
+ """ Verifies the given values are equal
+ Arguments:
+ p (Prefix): Appended to the front of the assert statement
+ e (Expected): The expected value
+ o (Output): The output result
+ """
+ assert o == e, p + " Output " + str(o) + " Expected " + str(e)
+def assert_ne(p, o, e):
+ """ Verifies the given values are not equal
+ Arguments:
+ p (Prefix): Appended to the front of the assert statement
+ e (Expected): The expected value
+ o (Output): The output result
+ """
+ assert o != e, p + " Output " + str(o) + " Not Expecting " + str(e)
--- /dev/null
+"""Cascading Power ISA Decoder
+This module uses CSV tables in a hierarchical/peer cascading fashion,
+to create a multi-level instruction decoder by recognising appropriate
+patterns. The output is a flattened (1-level) series of fields suitable
+for a simple RISC engine.
+This is based on Anton Blanchard's excellent microwatt work:
+The basic principle is that the python code does the heavy lifting
+(reading the CSV files, constructing the hierarchy), creating the HDL
+AST with for-loops generating switch-case statements.
+PowerDecoder takes a *list* of CSV files with an associated bit-range
+that it is requested to match against the "opcode" row of the CSV file.
+This pattern can be either an integer, a binary number, *or* a wildcard
+nmigen Case pattern of the form "001--1-100".
+Subdecoders are *additional* cases with further decoding. The "pattern"
+argument is specified as one of the Case statements (a peer of the opcode
+row in the CSV file), and thus further fields of the opcode may be decoded
+giving increasing levels of detail.
+Top Level:
+ [ (extra.csv: bit-fields entire 32-bit range
+ opcode -> matches
+ 000000---------------01000000000 -> ILLEGAL instruction
+ 01100000000000000000000000000000 -> SIM_CONFIG instruction
+ ................................ ->
+ ),
+ (major.csv: first 6 bits ONLY
+ opcode -> matches
+ 001100 -> ALU,OP_ADD (add)
+ 001101 -> ALU,OP_ADD (another type of add)
+ ...... -> ...
+ ...... -> ...
+ subdecoders:
+ 001011 this must match *MAJOR*.CSV
+ [ (minor_19.csv: bits 21 through 30 inclusive:
+ opcode -> matches
+ 0b0000000000 -> ALU,OP_MCRF
+ ............ -> ....
+ ),
+ (minor_19_00000.csv: bits 21 through 25 inclusive:
+ opcode -> matches
+ 0b00010 -> ALU,add_pcis
+ )
+ ]
+ ),
+ ]
+from nmigen import Module, Elaboratable, Signal
+from nmigen.cli import rtlil
+from power_enums import (Function, Form, InternalOp, In1Sel, In2Sel, In3Sel,
+ OutSel, RC, LdstLen, CryIn, get_csv, single_bit_flags,
+ get_signal_name, default_values)
+from collections import namedtuple
+from power_fields import DecodeFields
+from power_fieldsn import SigDecode, SignalBitRange
+Subdecoder = namedtuple("Subdecoder", ["pattern", "opcodes", "opint",
+ "bitsel", "suffix", "subdecoders"])
+class PowerOp:
+ """PowerOp: spec for execution. op type (ADD etc.) reg specs etc.
+ """
+ def __init__(self):
+ self.function_unit = Signal(Function, reset_less=True)
+ self.internal_op = Signal(InternalOp, reset_less=True)
+ self.form = Signal(Form, reset_less=True)
+ self.in1_sel = Signal(In1Sel, reset_less=True)
+ self.in2_sel = Signal(In2Sel, reset_less=True)
+ self.in3_sel = Signal(In3Sel, reset_less=True)
+ self.out_sel = Signal(OutSel, reset_less=True)
+ self.ldst_len = Signal(LdstLen, reset_less=True)
+ self.rc_sel = Signal(RC, reset_less=True)
+ self.cry_in = Signal(CryIn, reset_less=True)
+ for bit in single_bit_flags:
+ name = get_signal_name(bit)
+ setattr(self, name, Signal(reset_less=True, name=name))
+ def _eq(self, row=None):
+ if row is None:
+ row = default_values
+ res = [self.function_unit.eq(Function[row['unit']]),
+ self.form.eq(Form[row['form']]),
+ self.internal_op.eq(InternalOp[row['internal op']]),
+ self.in1_sel.eq(In1Sel[row['in1']]),
+ self.in2_sel.eq(In2Sel[row['in2']]),
+ self.in3_sel.eq(In3Sel[row['in3']]),
+ self.out_sel.eq(OutSel[row['out']]),
+ self.ldst_len.eq(LdstLen[row['ldst len']]),
+ self.rc_sel.eq(RC[row['rc']]),
+ self.cry_in.eq(CryIn[row['cry in']]),
+ ]
+ for bit in single_bit_flags:
+ sig = getattr(self, get_signal_name(bit))
+ res.append(sig.eq(int(row.get(bit, 0))))
+ return res
+ def eq(self, otherop):
+ res = [self.function_unit.eq(otherop.function_unit),
+ self.form.eq(otherop.form),
+ self.internal_op.eq(otherop.internal_op),
+ self.in1_sel.eq(otherop.in1_sel),
+ self.in2_sel.eq(otherop.in2_sel),
+ self.in3_sel.eq(otherop.in3_sel),
+ self.out_sel.eq(otherop.out_sel),
+ self.rc_sel.eq(otherop.rc_sel),
+ self.ldst_len.eq(otherop.ldst_len),
+ self.cry_in.eq(otherop.cry_in)]
+ for bit in single_bit_flags:
+ sig = getattr(self, get_signal_name(bit))
+ res.append(sig.eq(getattr(otherop, get_signal_name(bit))))
+ return res
+ def ports(self):
+ regular = [self.function_unit,
+ self.in1_sel,
+ self.in2_sel,
+ self.in3_sel,
+ self.out_sel,
+ self.ldst_len,
+ self.rc_sel,
+ self.internal_op,
+ self.form]
+ single_bit_ports = [getattr(self, get_signal_name(x))
+ for x in single_bit_flags]
+ return regular + single_bit_ports
+class PowerDecoder(Elaboratable):
+ """PowerDecoder - decodes an incoming opcode into the type of operation
+ """
+ def __init__(self, width, dec):
+ if not isinstance(dec, list):
+ dec = [dec]
+ self.dec = dec
+ self.opcode_in = Signal(width, reset_less=True)
+ self.op = PowerOp()
+ for d in dec:
+ if d.suffix is not None and d.suffix >= width:
+ d.suffix = None
+ self.width = width
+ def suffix_mask(self, d):
+ return ((1 << d.suffix) - 1)
+ def divide_opcodes(self, d):
+ divided = {}
+ mask = self.suffix_mask(d)
+ print("mask", hex(mask))
+ for row in d.opcodes:
+ opcode = row['opcode']
+ if d.opint and '-' not in opcode:
+ opcode = int(opcode, 0)
+ key = opcode & mask
+ opcode = opcode >> d.suffix
+ if key not in divided:
+ divided[key] = []
+ r = row.copy()
+ r['opcode'] = opcode
+ divided[key].append(r)
+ return divided
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ # note: default opcode is "illegal" as this is a combinatorial block
+ # go through the list of CSV decoders first
+ for d in self.dec:
+ opcode_switch = Signal(d.bitsel[1] - d.bitsel[0],
+ reset_less=True)
+ comb += opcode_switch.eq(self.opcode_in[d.bitsel[0]:d.bitsel[1]])
+ if d.suffix:
+ opcodes = self.divide_opcodes(d)
+ opc_in = Signal(d.suffix, reset_less=True)
+ comb += opc_in.eq(opcode_switch[:d.suffix])
+ with m.Switch(opc_in):
+ for key, row in opcodes.items():
+ bitsel = (d.suffix+d.bitsel[0], d.bitsel[1])
+ sd = Subdecoder(pattern=None, opcodes=row,
+ bitsel=bitsel, suffix=None,
+ opint=False, subdecoders=[])
+ subdecoder = PowerDecoder(width=32, dec=sd)
+ setattr(m.submodules, "dec_sub%d" % key, subdecoder)
+ comb += subdecoder.opcode_in.eq(self.opcode_in)
+ with m.Case(key):
+ comb += self.op.eq(subdecoder.op)
+ else:
+ # TODO: arguments, here (all of them) need to be a list.
+ # a for-loop around the *list* of decoder args.
+ with m.Switch(opcode_switch):
+ self.handle_subdecoders(m, d)
+ for row in d.opcodes:
+ opcode = row['opcode']
+ if d.opint and '-' not in opcode:
+ opcode = int(opcode, 0)
+ if not row['unit']:
+ continue
+ with m.Case(opcode):
+ comb += self.op._eq(row)
+ return m
+ def handle_subdecoders(self, m, d):
+ for dec in d.subdecoders:
+ subdecoder = PowerDecoder(self.width, dec)
+ if isinstance(dec, list): # XXX HACK: take first pattern
+ dec = dec[0]
+ setattr(m.submodules, "dec%d" % dec.pattern, subdecoder)
+ m.d.comb += subdecoder.opcode_in.eq(self.opcode_in)
+ with m.Case(dec.pattern):
+ m.d.comb += self.op.eq(subdecoder.op)
+ def ports(self):
+ return [self.opcode_in] + self.op.ports()
+class TopPowerDecoder(PowerDecoder, DecodeFields):
+ def __init__(self, width, dec):
+ PowerDecoder.__init__(self, width, dec)
+ DecodeFields.__init__(self, SignalBitRange, [self.opcode_in])
+ self.create_specs()
+def create_pdecode():
+ # minor 19 has extra patterns
+ m19 = []
+ m19.append(Subdecoder(pattern=19, opcodes=get_csv("minor_19.csv"),
+ opint=True, bitsel=(1, 11), suffix=None, subdecoders=[]))
+ m19.append(Subdecoder(pattern=19, opcodes=get_csv("minor_19_00000.csv"),
+ opint=True, bitsel=(1, 6), suffix=None, subdecoders=[]))
+ # minor opcodes.
+ pminor = [
+ m19,
+ Subdecoder(pattern=30, opcodes=get_csv("minor_30.csv"),
+ opint=True, bitsel=(1, 6), suffix=None, subdecoders=[]),
+ Subdecoder(pattern=31, opcodes=get_csv("minor_31.csv"),
+ opint=True, bitsel=(1, 11), suffix=0b00101, subdecoders=[]),
+ Subdecoder(pattern=58, opcodes=get_csv("minor_58.csv"),
+ opint=True, bitsel=(0, 2), suffix=None, subdecoders=[]),
+ Subdecoder(pattern=62, opcodes=get_csv("minor_62.csv"),
+ opint=True, bitsel=(0, 2), suffix=None, subdecoders=[]),
+ ]
+ # top level: extra merged with major
+ dec = []
+ opcodes = get_csv("major.csv")
+ dec.append(Subdecoder(pattern=None, opint=True, opcodes=opcodes,
+ bitsel=(26, 32), suffix=None, subdecoders=pminor))
+ opcodes = get_csv("extra.csv")
+ dec.append(Subdecoder(pattern=None, opint=False, opcodes=opcodes,
+ bitsel=(0, 32), suffix=None, subdecoders=[]))
+ return TopPowerDecoder(32, dec)
+if __name__ == '__main__':
+ pdecode = create_pdecode()
+ vl = rtlil.convert(pdecode, ports=pdecode.ports())
+ with open("decoder.il", "w") as f:
+ f.write(vl)
--- /dev/null
+"""Power ISA Decoder second stage
+based on Anton Blanchard microwatt decode2.vhdl
+from nmigen import Module, Elaboratable, Signal, Mux, Const
+from nmigen.cli import rtlil
+from power_decoder import create_pdecode
+from power_enums import (InternalOp, CryIn, Function, LdstLen,
+ In1Sel, In2Sel, In3Sel, OutSel, SPR, RC)
+class DecodeA(Elaboratable):
+ """DecodeA from instruction
+ decodes register RA, whether immediate-zero, implicit and
+ explicit CSRs
+ """
+ def __init__(self, dec):
+ self.dec = dec
+ self.sel_in = Signal(In1Sel, reset_less=True)
+ self.insn_in = Signal(32, reset_less=True)
+ self.reg_out = Data(5, name="reg_a")
+ self.immz_out = Signal(reset_less=True)
+ self.spr_out = Data(10, "spr_a")
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ # select Register A field
+ with m.If((self.sel_in == In1Sel.RA) |
+ ((self.sel_in == In1Sel.RA_OR_ZERO) &
+ (self.reg_out.data != Const(0, 5)))):
+ comb += self.reg_out.data.eq(self.dec.RA[0:-1])
+ comb += self.reg_out.ok.eq(1)
+ # zero immediate requested
+ with m.If((self.sel_in == In1Sel.RA_OR_ZERO) &
+ (self.reg_out.data == Const(0, 5))):
+ comb += self.immz_out.eq(1)
+ # decode SPR1 based on instruction type
+ op = self.dec.op
+ # BC or BCREG: potential implicit register (CTR)
+ with m.If((op.internal_op == InternalOp.OP_BC) |
+ (op.internal_op == InternalOp.OP_BCREG)):
+ with m.If(~self.dec.BO[2]): # 3.0B p38 BO2=0, use CTR reg
+ comb += self.spr_out.data.eq(SPR.CTR) # constant: CTR
+ comb += self.spr_out.ok.eq(1)
+ # MFSPR or MTSPR: move-from / move-to SPRs
+ with m.If((op.internal_op == InternalOp.OP_MFSPR) |
+ (op.internal_op == InternalOp.OP_MTSPR)):
+ comb += self.spr_out.data.eq(self.dec.SPR[0:-1]) # SPR field, XFX
+ comb += self.spr_out.ok.eq(1)
+ return m
+class Data:
+ def __init__(self, width, name):
+ self.data = Signal(width, name=name, reset_less=True)
+ self.ok = Signal(name="%s_ok" % name, reset_less=True)
+ def eq(self, rhs):
+ return [self.data.eq(rhs.data),
+ self.ok.eq(rhs.ok)]
+ def ports(self):
+ return [self.data, self.ok]
+class DecodeB(Elaboratable):
+ """DecodeB from instruction
+ decodes register RB, different forms of immediate (signed, unsigned),
+ and implicit SPRs
+ """
+ def __init__(self, dec):
+ self.dec = dec
+ self.sel_in = Signal(In2Sel, reset_less=True)
+ self.insn_in = Signal(32, reset_less=True)
+ self.reg_out = Data(5, "reg_b")
+ self.imm_out = Data(64, "imm_b")
+ self.spr_out = Data(10, "spr_b")
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ # select Register B field
+ with m.Switch(self.sel_in):
+ with m.Case(In2Sel.RB):
+ comb += self.reg_out.data.eq(self.dec.RB[0:-1])
+ comb += self.reg_out.ok.eq(1)
+ with m.Case(In2Sel.CONST_UI):
+ comb += self.imm_out.data.eq(self.dec.UI[0:-1])
+ comb += self.imm_out.ok.eq(1)
+ with m.Case(In2Sel.CONST_SI): # TODO: sign-extend here?
+ comb += self.imm_out.data.eq(self.dec.SI[0:-1])
+ comb += self.imm_out.ok.eq(1)
+ with m.Case(In2Sel.CONST_UI_HI):
+ comb += self.imm_out.data.eq(self.dec.UI[0:-1]<<4)
+ comb += self.imm_out.ok.eq(1)
+ with m.Case(In2Sel.CONST_SI_HI): # TODO: sign-extend here?
+ comb += self.imm_out.data.eq(self.dec.SI[0:-1]<<4)
+ comb += self.imm_out.ok.eq(1)
+ with m.Case(In2Sel.CONST_LI):
+ comb += self.imm_out.data.eq(self.dec.LI[0:-1]<<2)
+ comb += self.imm_out.ok.eq(1)
+ with m.Case(In2Sel.CONST_BD):
+ comb += self.imm_out.data.eq(self.dec.BD[0:-1]<<2)
+ comb += self.imm_out.ok.eq(1)
+ with m.Case(In2Sel.CONST_DS):
+ comb += self.imm_out.data.eq(self.dec.DS[0:-1]<<2)
+ comb += self.imm_out.ok.eq(1)
+ with m.Case(In2Sel.CONST_M1):
+ comb += self.imm_out.data.eq(~Const(0, 64)) # all 1s
+ comb += self.imm_out.ok.eq(1)
+ with m.Case(In2Sel.CONST_SH):
+ comb += self.imm_out.data.eq(self.dec.sh[0:-1])
+ comb += self.imm_out.ok.eq(1)
+ with m.Case(In2Sel.CONST_SH32):
+ comb += self.imm_out.data.eq(self.dec.SH32[0:-1])
+ comb += self.imm_out.ok.eq(1)
+ # decode SPR2 based on instruction type
+ op = self.dec.op
+ # BCREG implicitly uses CTR or LR for 2nd reg
+ with m.If(op.internal_op == InternalOp.OP_BCREG):
+ with m.If(self.dec.FormXL.XO[9]): # 3.0B p38 top bit of XO
+ comb += self.spr_out.data.eq(SPR.CTR)
+ with m.Else():
+ comb += self.spr_out.data.eq(SPR.LR)
+ comb += self.spr_out.ok.eq(1)
+ return m
+class DecodeC(Elaboratable):
+ """DecodeC from instruction
+ decodes register RC
+ """
+ def __init__(self, dec):
+ self.dec = dec
+ self.sel_in = Signal(In3Sel, reset_less=True)
+ self.insn_in = Signal(32, reset_less=True)
+ self.reg_out = Data(5, "reg_c")
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ # select Register C field
+ with m.If(self.sel_in == In3Sel.RS):
+ comb += self.reg_out.data.eq(self.dec.RS[0:-1])
+ comb += self.reg_out.ok.eq(1)
+ return m
+class DecodeOut(Elaboratable):
+ """DecodeOut from instruction
+ decodes output register RA, RT or SPR
+ """
+ def __init__(self, dec):
+ self.dec = dec
+ self.sel_in = Signal(OutSel, reset_less=True)
+ self.insn_in = Signal(32, reset_less=True)
+ self.reg_out = Data(5, "reg_o")
+ self.spr_out = Data(10, "spr_o")
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ # select Register out field
+ with m.Switch(self.sel_in):
+ with m.Case(OutSel.RT):
+ comb += self.reg_out.data.eq(self.dec.RT[0:-1])
+ comb += self.reg_out.ok.eq(1)
+ with m.Case(OutSel.RA):
+ comb += self.reg_out.data.eq(self.dec.RA[0:-1])
+ comb += self.reg_out.ok.eq(1)
+ with m.Case(OutSel.SPR):
+ comb += self.spr_out.data.eq(self.dec.SPR[0:-1]) # from XFX
+ comb += self.spr_out.ok.eq(1)
+ return m
+class DecodeRC(Elaboratable):
+ """DecodeRc from instruction
+ decodes Record bit Rc
+ """
+ def __init__(self, dec):
+ self.dec = dec
+ self.sel_in = Signal(RC, reset_less=True)
+ self.insn_in = Signal(32, reset_less=True)
+ self.rc_out = Data(1, "rc")
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ # select Record bit out field
+ with m.Switch(self.sel_in):
+ with m.Case(RC.RC):
+ comb += self.rc_out.data.eq(self.dec.Rc[0:-1])
+ comb += self.rc_out.ok.eq(1)
+ with m.Case(RC.ONE):
+ comb += self.rc_out.data.eq(1)
+ comb += self.rc_out.ok.eq(1)
+ with m.Case(RC.NONE):
+ comb += self.rc_out.data.eq(0)
+ comb += self.rc_out.ok.eq(1)
+ return m
+class DecodeOE(Elaboratable):
+ """DecodeOE from instruction
+ decodes OE field: uses RC decode detection which might not be good
+ -- For now, use "rc" in the decode table to decide whether oe exists.
+ -- This is not entirely correct architecturally: For mulhd and
+ -- mulhdu, the OE field is reserved. It remains to be seen what an
+ -- actual POWER9 does if we set it on those instructions, for now we
+ -- test that further down when assigning to the multiplier oe input.
+ """
+ def __init__(self, dec):
+ self.dec = dec
+ self.sel_in = Signal(RC, reset_less=True)
+ self.insn_in = Signal(32, reset_less=True)
+ self.oe_out = Data(1, "oe")
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ # select OE bit out field
+ with m.Switch(self.sel_in):
+ with m.Case(RC.RC):
+ comb += self.oe_out.data.eq(self.dec.OE[0:-1])
+ comb += self.oe_out.ok.eq(1)
+ return m
+class XerBits:
+ def __init__(self):
+ self.ca = Signal(reset_less=True)
+ self.ca32 = Signal(reset_less=True)
+ self.ov = Signal(reset_less=True)
+ self.ov32 = Signal(reset_less=True)
+ self.so = Signal(reset_less=True)
+ def ports(self):
+ return [self.ca, self.ca32, self.ov, self.ov32, self.so, ]
+class Decode2ToExecute1Type:
+ def __init__(self):
+ self.valid = Signal(reset_less=True)
+ self.insn_type = Signal(InternalOp, reset_less=True)
+ self.nia = Signal(64, reset_less=True)
+ self.write_reg = Data(5, name="rego")
+ self.read_reg1 = Data(5, name="reg1")
+ self.read_reg2 = Data(5, name="reg2")
+ self.read_reg3 = Data(5, name="reg3")
+ self.imm_data = Data(64, name="imm")
+ self.write_spr = Data(10, name="spro")
+ self.read_spr1 = Data(10, name="spr1")
+ self.read_spr2 = Data(10, name="spr2")
+ #self.read_data1 = Signal(64, reset_less=True)
+ #self.read_data2 = Signal(64, reset_less=True)
+ #self.read_data3 = Signal(64, reset_less=True)
+ #self.cr = Signal(32, reset_less=True) # NO: this is from the CR SPR
+ #self.xerc = XerBits() # NO: this is from the XER SPR
+ self.lk = Signal(reset_less=True)
+ self.rc = Data(1, "rc")
+ self.oe = Data(1, "oe")
+ self.invert_a = Signal(reset_less=True)
+ self.invert_out = Signal(reset_less=True)
+ self.input_carry = Signal(CryIn, reset_less=True)
+ self.output_carry = Signal(reset_less=True)
+ self.input_cr = Signal(reset_less=True)
+ self.output_cr = Signal(reset_less=True)
+ self.is_32bit = Signal(reset_less=True)
+ self.is_signed = Signal(reset_less=True)
+ self.insn = Signal(32, reset_less=True)
+ self.data_len = Signal(4, reset_less=True) # bytes
+ self.byte_reverse = Signal(reset_less=True)
+ self.sign_extend = Signal(reset_less=True)# do we need this?
+ self.update = Signal(reset_less=True) # is this an update instruction?
+ def ports(self):
+ return [self.valid, self.insn_type, self.nia,
+ #self.read_data1, self.read_data2, self.read_data3,
+ #self.cr,
+ self.lk,
+ self.invert_a, self.invert_out,
+ self.input_carry, self.output_carry,
+ self.input_cr, self.output_cr,
+ self.is_32bit, self.is_signed,
+ self.insn,
+ self.data_len, self.byte_reverse , self.sign_extend ,
+ self.update] + \
+ self.oe.ports() + \
+ self.rc.ports() + \
+ self.write_spr.ports() + \
+ self.read_spr1.ports() + \
+ self.read_spr2.ports() + \
+ self.write_reg.ports() + \
+ self.read_reg1.ports() + \
+ self.read_reg2.ports() + \
+ self.read_reg3.ports() + \
+ self.imm_data.ports()
+ # + self.xerc.ports()
+class PowerDecode2(Elaboratable):
+ def __init__(self, dec):
+ self.dec = dec
+ self.e = Decode2ToExecute1Type()
+ def ports(self):
+ return self.dec.ports() + self.e.ports()
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ # set up submodule decoders
+ m.submodules.dec = self.dec
+ m.submodules.dec_a = dec_a = DecodeA(self.dec)
+ m.submodules.dec_b = dec_b = DecodeB(self.dec)
+ m.submodules.dec_c = dec_c = DecodeC(self.dec)
+ m.submodules.dec_o = dec_o = DecodeOut(self.dec)
+ m.submodules.dec_rc = dec_rc = DecodeRC(self.dec)
+ m.submodules.dec_oe = dec_oe = DecodeOE(self.dec)
+ # copy instruction through...
+ for i in [self.e.insn, dec_a.insn_in, dec_b.insn_in,
+ dec_c.insn_in, dec_o.insn_in, dec_rc.insn_in,
+ dec_oe.insn_in]:
+ comb += i.eq(self.dec.opcode_in)
+ # ...and subdecoders' input fields
+ comb += dec_a.sel_in.eq(self.dec.op.in1_sel)
+ comb += dec_b.sel_in.eq(self.dec.op.in2_sel)
+ comb += dec_c.sel_in.eq(self.dec.op.in3_sel)
+ comb += dec_o.sel_in.eq(self.dec.op.out_sel)
+ comb += dec_rc.sel_in.eq(self.dec.op.rc_sel)
+ comb += dec_oe.sel_in.eq(self.dec.op.rc_sel) # XXX should be OE sel
+ # decode LD/ST length
+ with m.Switch(self.dec.op.ldst_len):
+ with m.Case(LdstLen.is1B):
+ comb += self.e.data_len.eq(1)
+ with m.Case(LdstLen.is2B):
+ comb += self.e.data_len.eq(2)
+ with m.Case(LdstLen.is4B):
+ comb += self.e.data_len.eq(4)
+ with m.Case(LdstLen.is8B):
+ comb += self.e.data_len.eq(8)
+ #comb += self.e.nia.eq(self.dec.nia) # XXX TODO
+ itype = Mux(self.dec.op.function_unit == Function.NONE,
+ InternalOp.OP_ILLEGAL,
+ self.dec.op.internal_op)
+ comb += self.e.insn_type.eq(itype)
+ # registers a, b, c and out
+ comb += self.e.read_reg1.eq(dec_a.reg_out)
+ comb += self.e.read_reg2.eq(dec_b.reg_out)
+ comb += self.e.read_reg3.eq(dec_c.reg_out)
+ comb += self.e.write_reg.eq(dec_o.reg_out)
+ comb += self.e.imm_data.eq(dec_b.imm_out)
+ # rc and oe out
+ comb += self.e.rc.eq(dec_rc.rc_out)
+ comb += self.e.oe.eq(dec_oe.oe_out)
+ # SPRs out
+ comb += self.e.read_spr1.eq(dec_a.spr_out)
+ comb += self.e.read_spr2.eq(dec_b.spr_out)
+ comb += self.e.write_spr.eq(dec_o.spr_out)
+ # decoded/selected instruction flags
+ comb += self.e.invert_a.eq(self.dec.op.inv_a)
+ comb += self.e.invert_out.eq(self.dec.op.inv_out)
+ comb += self.e.input_carry.eq(self.dec.op.cry_in)
+ comb += self.e.output_carry.eq(self.dec.op.cry_out)
+ comb += self.e.is_32bit.eq(self.dec.op.is_32b)
+ comb += self.e.is_signed.eq(self.dec.op.sgn)
+ with m.If(self.dec.op.lk):
+ comb += self.e.lk.eq(self.dec.LK[0:-1]) # XXX TODO: accessor
+ comb += self.e.byte_reverse.eq(self.dec.op.br)
+ comb += self.e.sign_extend.eq(self.dec.op.sgn_ext)
+ comb += self.e.update.eq(self.dec.op.upd)
+ comb += self.e.input_cr.eq(self.dec.op.cr_in)
+ comb += self.e.output_cr.eq(self.dec.op.cr_out)
+ return m
+if __name__ == '__main__':
+ pdecode = create_pdecode()
+ dec2 = PowerDecode2(pdecode)
+ vl = rtlil.convert(dec2, ports=dec2.ports() + pdecode.ports())
+ with open("dec2.il", "w") as f:
+ f.write(vl)
--- /dev/null
+from enum import Enum, unique
+import csv
+import os
+import requests
+def get_csv(name):
+ file_dir = os.path.dirname(os.path.realpath(__file__))
+ file_path = os.path.join(file_dir, name)
+ if not os.path.isfile(file_path):
+ url = 'https://libre-riscv.org/openpower/isatables/' + name
+ r = requests.get(url, allow_redirects=True)
+ with open(file_path, 'w') as outfile:
+ outfile.write(r.content.decode("utf-8"))
+ with open(file_path, 'r') as csvfile:
+ reader = csv.DictReader(csvfile)
+ return list(reader)
+# names of the fields in the tables that don't correspond to an enum
+single_bit_flags = ['CR in', 'CR out', 'inv A', 'inv out',
+ 'cry out', 'BR', 'sgn ext', 'upd', 'rsrv', '32b',
+ 'sgn', 'lk', 'sgl pipe']
+# default values for fields in the table
+default_values = {'unit': "NONE", 'internal op': "OP_ILLEGAL",
+ 'in1': "RA", 'in2': 'NONE', 'in3': 'NONE', 'out': 'NONE',
+ 'ldst len': 'NONE',
+ 'rc' : 'NONE', 'cry in' : 'ZERO', 'form': 'NONE'}
+def get_signal_name(name):
+ if name[0].isdigit():
+ name = "is_" + name
+ return name.lower().replace(' ', '_')
+class Function(Enum):
+ NONE = 0
+ ALU = 1
+ LDST = 2
+class Form(Enum):
+ NONE = 0
+ I = 1
+ B = 2
+ SC = 3
+ D = 4
+ DS = 5
+ DQ = 6
+ DX = 7
+ X = 8
+ XL = 9
+ XFX = 10
+ XFL = 11
+ XX1 = 12
+ XX2 = 13
+ XX3 = 14
+ XX4 = 15
+ XS = 16
+ XO = 17
+ A = 18
+ M = 19
+ MD = 20
+ MDS = 21
+ VA = 22
+ VC = 23
+ VX = 24
+ EVX = 25
+ EVS = 26
+ Z22 = 27
+ Z23 = 28
+class InternalOp(Enum):
+ OP_NOP = 1
+ OP_ADD = 2
+ OP_AND = 4
+ OP_ATTN = 5
+ OP_B = 6
+ OP_BC = 7
+ OP_BCREG = 8
+ OP_BPERM = 9
+ OP_CMP = 10
+ OP_CMPB = 11
+ OP_CMPEQB = 12
+ OP_CMPRB = 13
+ OP_CNTZ = 14
+ OP_CRAND = 15
+ OP_CRANDC = 16
+ OP_CREQV = 17
+ OP_CRNAND = 18
+ OP_CRNOR = 19
+ OP_CROR = 20
+ OP_CRORC = 21
+ OP_CRXOR = 22
+ OP_DARN = 23
+ OP_DCBF = 24
+ OP_DCBST = 25
+ OP_DCBT = 26
+ OP_DCBTST = 27
+ OP_DCBZ = 28
+ OP_DIV = 29
+ OP_DIVE = 30
+ OP_EXTS = 31
+ OP_ICBI = 33
+ OP_ICBT = 34
+ OP_ISEL = 35
+ OP_ISYNC = 36
+ OP_LOAD = 37
+ OP_STORE = 38
+ OP_MADDHD = 39
+ OP_MADDLD = 41
+ OP_MCRF = 42
+ OP_MCRXR = 43
+ OP_MCRXRX = 44
+ OP_MFCR = 45
+ OP_MFSPR = 46
+ OP_MOD = 47
+ OP_MTCRF = 48
+ OP_MTSPR = 49
+ OP_MUL_L64 = 50
+ OP_MUL_H64 = 51
+ OP_MUL_H32 = 52
+ OP_OR = 53
+ OP_POPCNT = 54
+ OP_PRTY = 55
+ OP_RLC = 56
+ OP_RLCL = 57
+ OP_RLCR = 58
+ OP_SETB = 59
+ OP_SHL = 60
+ OP_SHR = 61
+ OP_SYNC = 62
+ OP_TD = 63
+ OP_TDI = 64
+ OP_TW = 65
+ OP_TWI = 66
+ OP_XOR = 67
+class In1Sel(Enum):
+ RA = 0
+ RA_OR_ZERO = 1
+ NONE = 2
+ SPR = 3
+class In2Sel(Enum):
+ NONE = 0
+ RB = 1
+ CONST_UI = 2
+ CONST_SI = 3
+ CONST_LI = 6
+ CONST_BD = 7
+ CONST_DS = 8
+ CONST_M1 = 9
+ CONST_SH = 10
+ CONST_SH32 = 11
+ SPR = 12
+class In3Sel(Enum):
+ NONE = 0
+ RS = 1
+class OutSel(Enum):
+ NONE = 0
+ RT = 1
+ RA = 2
+ SPR = 3
+class LdstLen(Enum):
+ NONE = 0
+ is1B = 1
+ is2B = 2
+ is4B = 3
+ is8B = 4
+class RC(Enum):
+ NONE = 0
+ ONE = 1
+ RC = 2
+class CryIn(Enum):
+ ZERO = 0
+ ONE = 1
+ CA = 2
+class SPR(Enum):
+ XER = 1
+ LR = 8
+ CTR = 9
+ TB = 268
+ SRR0 = 26
+ SRR1 = 27
+ HSRR0 = 314
+ HSRR1 = 315
+ SPRG0 = 272
+ SPRG1 = 273
+ SPRG2 = 274
+ SPRG3 = 275
+ SPRG3U = 259
+ HSPRG0 = 304
+ HSPRG1 = 305
--- /dev/null
+from collections import OrderedDict, namedtuple
+class BitRange(OrderedDict):
+ """BitRange: remaps from straight indices (0,1,2..) to bit numbers
+ """
+ def __getitem__(self, subscript):
+ if isinstance(subscript, slice):
+ return list(self)[subscript]
+ else:
+ return self[subscript]
+def decode_instructions(form):
+ res = {}
+ accum = []
+ for l in form:
+ if l.strip().startswith("Formats"):
+ l = l.strip().split(":")[-1]
+ l = l.replace(" ", "")
+ l = l.split(",")
+ for fmt in l:
+ if fmt not in res:
+ res[fmt] = [accum[0]]
+ else:
+ res[fmt].append(accum[0])
+ accum = []
+ else:
+ accum.append(l.strip())
+ return res
+def decode_form_header(hdr):
+ res = {}
+ count = 0
+ hdr = hdr.strip()
+ print (hdr.split('|'))
+ for f in hdr.split("|"):
+ if not f:
+ continue
+ if f[0].isdigit():
+ idx = int(f.strip().split(' ')[0])
+ res[count] = idx
+ count += len(f) + 1
+ return res
+def find_unique(d, key):
+ if key not in d:
+ return key
+ idx = 1
+ while "%s_%d" % (key, idx) in d:
+ idx += 1
+ return "%s_%d" % (key, idx)
+def decode_line(header, line):
+ line = line.strip()
+ res = {}
+ count = 0
+ print ("line", line)
+ prev_fieldname = None
+ for f in line.split("|"):
+ if not f:
+ continue
+ end = count + len(f) + 1
+ fieldname = f.strip()
+ if not fieldname or fieldname.startswith('/'):
+ if prev_fieldname is not None:
+ res[prev_fieldname] = (res[prev_fieldname], header[count])
+ prev_fieldname = None
+ count = end
+ continue
+ bitstart = header[count]
+ if prev_fieldname is not None:
+ res[prev_fieldname] = (res[prev_fieldname], bitstart)
+ res[fieldname] = bitstart
+ count = end
+ prev_fieldname = fieldname
+ res[prev_fieldname] = (bitstart, 32)
+ return res
+def decode_form(form):
+ header = decode_form_header(form[0])
+ res = []
+ print ("header", header)
+ for line in form[1:]:
+ dec = decode_line(header, line)
+ if dec:
+ res.append(dec)
+ fields = {}
+ falternate = {}
+ for l in res:
+ for k, (start,end) in l.items():
+ if k in fields:
+ if (start, end) == fields[k]:
+ continue # already in and matching for this Form
+ if k in falternate:
+ alternate = "%s_%d" % (k, falternate[k])
+ if (start, end) == fields[alternate]:
+ continue
+ falternate[k] = fidx = falternate.get(k, 0) + 1
+ fields["%s_%d" % (k, fidx)] = (start, end)
+ else:
+ fields[k] = (start, end)
+ return fields
+class DecodeFields:
+ def __init__(self, bitkls=BitRange, bitargs=(), fname="fields.txt"):
+ self.bitkls = bitkls
+ self.bitargs = bitargs
+ self.fname = fname
+ def create_specs(self):
+ self.forms, self.instrs = self.decode_fields()
+ self.form_names = forms = self.instrs.keys()
+ for form in forms:
+ fields = self.instrs[form]
+ fk = fields.keys()
+ Fields = namedtuple("Fields", fk)
+ instr = Fields(**fields)
+ setattr(self, "Form%s" % form, instr)
+ # now add in some commonly-used fields (should be done automatically)
+ # note that these should only be ones which are the same on all Forms
+ # note: these are from microwatt insn_helpers.vhdl
+ self.RS = self.FormX.RS
+ self.RT = self.FormX.RT
+ self.RA = self.FormX.RA
+ self.RB = self.FormX.RB
+ self.SI = self.FormD.SI
+ self.UI = self.FormD.UI
+ self.L = self.FormD.L
+ self.SH32 = self.FormM.SH
+ self.sh = self.FormMD.sh
+ self.MB32 = self.FormM.MB
+ self.ME32 = self.FormM.ME
+ self.LI = self.FormI.LI
+ self.LK = self.FormI.LK
+ self.AA = self.FormB.AA
+ self.Rc = self.FormX.Rc
+ self.OE = self.FormXO.Rc
+ self.BD = self.FormB.BD
+ self.BF = self.FormX.BF
+ self.CR = self.FormXL.XO # used by further mcrf decoding
+ self.BB = self.FormXL.BB
+ self.BA = self.FormXL.BA
+ self.BT = self.FormXL.BT
+ self.FXM = self.FormXFX.FXM
+ self.BO = self.FormXL.BO
+ self.BI = self.FormXL.BI
+ self.BH = self.FormXL.BH
+ self.D = self.FormD.D
+ self.DS = self.FormDS.DS
+ self.TO = self.FormX.TO
+ self.BC = self.FormA.BC
+ self.SH = self.FormX.SH
+ self.ME = self.FormM.ME
+ self.MB = self.FormM.MB
+ self.SPR = self.FormXFX.SPR
+ def decode_fields(self):
+ with open(self.fname) as f:
+ txt = f.readlines()
+ forms = {}
+ reading_data = False
+ for l in txt:
+ print ("line", l)
+ l = l.strip()
+ if len(l) == 0:
+ continue
+ if reading_data:
+ if l[0] == '#':
+ reading_data = False
+ else:
+ forms[heading].append(l)
+ if not reading_data:
+ assert l[0] == '#'
+ heading = l[1:].strip()
+ #if heading.startswith('1.6.28'): # skip instr fields for now
+ #break
+ heading = heading.split(' ')[-1]
+ print ("heading", heading)
+ reading_data = True
+ forms[heading] = []
+ res = {}
+ inst = {}
+ for hdr, form in forms.items():
+ print ("heading", hdr)
+ if heading == 'Fields':
+ i = decode_instructions(form)
+ for form, field in i.items():
+ inst[form] = self.decode_instruction_fields(field)
+ #else:
+ # res[hdr] = decode_form(form)
+ return res, inst
+ def decode_instruction_fields(self, fields):
+ res = {}
+ for field in fields:
+ f, spec = field.strip().split(" ")
+ d = self.bitkls(*self.bitargs)
+ idx = 0
+ for s in spec[1:-1].split(","):
+ s = s.split(':')
+ if len(s) == 1:
+ d[idx] = int(s[0])
+ idx += 1
+ else:
+ start = int(s[0])
+ end = int(s[1])
+ while start <= end:
+ d[idx] = start
+ idx += 1
+ start += 1
+ f = f.replace(",", "_")
+ unique = find_unique(res, f)
+ res[unique] = d
+ return res
+if __name__ == '__main__':
+ dec = DecodeFields()
+ dec.create_specs()
+ forms, instrs = dec.forms, dec.instrs
+ for hdr, form in forms.items():
+ print ()
+ print (hdr)
+ for k, v in form.items():
+ #print ("line", l)
+ #for k, v in l.items():
+ print ("%s: %d-%d" % (k, v[0], v[1]))
+ for form, field in instrs.items():
+ print ()
+ print (form)
+ for f, vals in field.items():
+ print (" ", f, vals)
+ print (dec.FormX)
+ print (dec.FormX.A)
+ print (dir(dec.FormX))
+ print (dec.FormX._fields)
--- /dev/null
+from collections import OrderedDict
+from power_fields import DecodeFields, BitRange
+from nmigen import Module, Elaboratable, Signal, Cat
+from nmigen.cli import rtlil
+class SignalBitRange(BitRange):
+ def __init__(self, signal):
+ BitRange.__init__(self)
+ self.signal = signal
+ def __getitem__(self, subs):
+ # *sigh* field numberings are bit-inverted. PowerISA 3.0B section 1.3.2
+ width = self.signal.shape()[0]
+ print (dir(self))
+ print (self.items())
+ if isinstance(subs, slice):
+ res = []
+ print (subs)
+ start, stop, step = subs.start, subs.stop, subs.step
+ if step is None:
+ step = 1
+ if start is None:
+ start = 0
+ if stop is None:
+ stop = -1
+ if start < 0:
+ start = len(self) - start - 1
+ if stop < 0:
+ stop = len(self) - stop - 1
+ print ("range", start, stop, step)
+ for t in range(start, stop, step):
+ k = OrderedDict.__getitem__(self, t)
+ print ("t", t, k)
+ res.append(self.signal[width-k-1])
+ return Cat(*res)
+ else:
+ k = OrderedDict.__getitem__(self, subs)
+ return self.signal[width-k-1]
+ print ("translated", subs, translated)
+class SigDecode(Elaboratable):
+ def __init__(self, width):
+ self.opcode_in = Signal(width, reset_less=False)
+ self.df = DecodeFields(SignalBitRange, [self.opcode_in])
+ self.df.create_specs()
+ self.x_s = Signal(len(self.df.FormX.S), reset_less=True)
+ self.x_sh = Signal(len(self.df.FormX.SH), reset_less=True)
+ self.dq_xs_s = Signal(len(self.df.FormDQ.SX_S), reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ comb += self.x_s.eq(self.df.FormX.S[0])
+ comb += self.x_sh.eq(self.df.FormX.SH[0:-1])
+ comb += self.dq_xs_s.eq(self.df.FormDQ.SX_S[0:-1])
+ return m
+ def ports(self):
+ return [self.opcode_in, self.x_s, self.x_sh]
+def create_sigdecode():
+ s = SigDecode(32)
+ return s
+if __name__ == '__main__':
+ sigdecode = create_sigdecode()
+ vl = rtlil.convert(sigdecode, ports=sigdecode.ports())
+ with open("decoder.il", "w") as f:
+ f.write(vl)
--- /dev/null
+from nmigen import Module, Signal
+from nmigen.back.pysim import Simulator, Delay
+from nmigen.test.utils import FHDLTestCase
+from nmigen.cli import rtlil
+import sys
+import os
+import unittest
+from power_decoder import (PowerDecoder, pdecode)
+from power_enums import (Function, InternalOp, In1Sel, In2Sel, In3Sel,
+ OutSel, RC, LdstLen, CryIn, single_bit_flags,
+ get_signal_name, get_csv)
+class DecoderTestCase(FHDLTestCase):
+ def run_tst(self, bitsel, csvname, minor=None, suffix=None, opint=True):
+ m = Module()
+ comb = m.d.comb
+ opcode = Signal(32)
+ function_unit = Signal(Function)
+ internal_op = Signal(InternalOp)
+ in1_sel = Signal(In1Sel)
+ in2_sel = Signal(In2Sel)
+ in3_sel = Signal(In3Sel)
+ out_sel = Signal(OutSel)
+ rc_sel = Signal(RC)
+ ldst_len = Signal(LdstLen)
+ cry_in = Signal(CryIn)
+ # opcodes = get_csv(csvname)
+ # m.submodules.dut = dut = PowerDecoder(32, opcodes, bitsel=bitsel,
+ # opint=opint, suffix=suffix)
+ m.submodules.dut = dut = pdecode
+ comb += [dut.opcode_in.eq(opcode),
+ function_unit.eq(dut.op.function_unit),
+ in1_sel.eq(dut.op.in1_sel),
+ in2_sel.eq(dut.op.in2_sel),
+ in3_sel.eq(dut.op.in3_sel),
+ out_sel.eq(dut.op.out_sel),
+ rc_sel.eq(dut.op.rc_sel),
+ ldst_len.eq(dut.op.ldst_len),
+ cry_in.eq(dut.op.cry_in),
+ internal_op.eq(dut.op.internal_op)]
+ sim = Simulator(m)
+ opcodes = get_csv(csvname)
+ def process():
+ for row in opcodes:
+ if not row['unit']:
+ continue
+ op = row['opcode']
+ if not opint: # HACK: convert 001---10 to 0b00100010
+ op = "0b" + op.replace('-', '0')
+ print ("opint", opint, row['opcode'], op)
+ print(row)
+ yield opcode.eq(0)
+ yield opcode[bitsel[0]:bitsel[1]].eq(int(op, 0))
+ if minor:
+ print(minor)
+ minorbits = minor[1]
+ yield opcode[minorbits[0]:minorbits[1]].eq(minor[0])
+ yield Delay(1e-6)
+ signals = [(function_unit, Function, 'unit'),
+ (internal_op, InternalOp, 'internal op'),
+ (in1_sel, In1Sel, 'in1'),
+ (in2_sel, In2Sel, 'in2'),
+ (in3_sel, In3Sel, 'in3'),
+ (out_sel, OutSel, 'out'),
+ (rc_sel, RC, 'rc'),
+ (cry_in, CryIn, 'cry in'),
+ (ldst_len, LdstLen, 'ldst len')]
+ for sig, enm, name in signals:
+ result = yield sig
+ expected = enm[row[name]]
+ msg = f"{sig.name} == {enm(result)}, expected: {expected}"
+ self.assertEqual(enm(result), expected, msg)
+ for bit in single_bit_flags:
+ sig = getattr(dut.op, get_signal_name(bit))
+ result = yield sig
+ expected = int(row[bit])
+ msg = f"{sig.name} == {result}, expected: {expected}"
+ self.assertEqual(expected, result, msg)
+ sim.add_process(process)
+ prefix = os.path.splitext(csvname)[0]
+ with sim.write_vcd("%s.vcd" % prefix, "%s.gtkw" % prefix, traces=[
+ opcode, function_unit, internal_op,
+ in1_sel, in2_sel]):
+ sim.run()
+ def generate_ilang(self):
+ vl = rtlil.convert(pdecode, ports=pdecode.ports())
+ with open("decoder.il", "w") as f:
+ f.write(vl)
+ def test_major(self):
+ self.run_tst((26, 32), "major.csv")
+ self.generate_ilang()
+ def test_minor_19(self):
+ self.run_tst((1, 11), "minor_19.csv", minor=(19, (26, 32)),
+ suffix=(0, 5))
+ # def test_minor_19_00000(self):
+ # self.run_tst((1, 11), "minor_19_00000.csv")
+ def test_minor_30(self):
+ self.run_tst((1, 5), "minor_30.csv", minor=(30, (26, 32)))
+ def test_minor_31(self):
+ self.run_tst((1, 11), "minor_31.csv", minor=(31, (26, 32)))
+ def test_minor_58(self):
+ self.run_tst((0, 2), "minor_58.csv", minor=(58, (26, 32)))
+ def test_minor_62(self):
+ self.run_tst((0, 2), "minor_62.csv", minor=(62, (26, 32)))
+ # #def test_minor_31_prefix(self):
+ # # self.run_tst(10, "minor_31.csv", suffix=(5, 10))
+ # def test_extra(self):
+ # self.run_tst(32, "extra.csv", opint=False)
+ # self.generate_ilang(32, "extra.csv", opint=False)
+if __name__ == "__main__":
+ unittest.main()
--- /dev/null
+from nmigen import Elaboratable, Signal, Module, Const, Mux
+from nmigen.cli import main
+from nmigen.cli import verilog, rtlil
+import operator
+class Adder(Elaboratable):
+ def __init__(self, width):
+ self.a = Signal(width)
+ self.b = Signal(width)
+ self.o = Signal(width)
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.o.eq(self.a + self.b)
+ return m
+class Subtractor(Elaboratable):
+ def __init__(self, width):
+ self.a = Signal(width)
+ self.b = Signal(width)
+ self.o = Signal(width)
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.o.eq(self.a - self.b)
+ return m
+class Multiplier(Elaboratable):
+ def __init__(self, width):
+ self.a = Signal(width)
+ self.b = Signal(width)
+ self.o = Signal(width)
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.o.eq(self.a * self.b)
+ return m
+class Shifter(Elaboratable):
+ def __init__(self, width):
+ self.width = width
+ self.a = Signal(width)
+ self.b = Signal(width)
+ self.o = Signal(width)
+ def elaborate(self, platform):
+ m = Module()
+ btrunc = Signal(self.width)
+ m.d.comb += btrunc.eq(self.b & Const((1<<self.width)-1))
+ m.d.comb += self.o.eq(self.a >> btrunc)
+ return m
+class ALU(Elaboratable):
+ def __init__(self, width):
+ self.p_valid_i = Signal()
+ self.p_ready_o = Signal()
+ self.n_ready_i = Signal()
+ self.n_valid_o = Signal()
+ self.counter = Signal(4)
+ self.op = Signal(2)
+ self.a = Signal(width)
+ self.b = Signal(width)
+ self.o = Signal(width)
+ self.width = width
+ def elaborate(self, platform):
+ m = Module()
+ add = Adder(self.width)
+ sub = Subtractor(self.width)
+ mul = Multiplier(self.width)
+ shf = Shifter(self.width)
+ m.submodules.add = add
+ m.submodules.sub = sub
+ m.submodules.mul = mul
+ m.submodules.shf = shf
+ for mod in [add, sub, mul, shf]:
+ m.d.comb += [
+ mod.a.eq(self.a),
+ mod.b.eq(self.b),
+ ]
+ go_now = Signal(reset_less=True) # testing no-delay ALU
+ with m.If(self.p_valid_i):
+ # input is valid. next check, if we already said "ready" or not
+ with m.If(~self.p_ready_o):
+ # we didn't say "ready" yet, so say so and initialise
+ m.d.sync += self.p_ready_o.eq(1)
+ # as this is a "fake" pipeline, just grab the output right now
+ with m.Switch(self.op):
+ for i, mod in enumerate([add, sub, mul, shf]):
+ with m.Case(i):
+ m.d.sync += self.o.eq(mod.o)
+ with m.If(self.op == 2): # MUL, to take 5 instructions
+ m.d.sync += self.counter.eq(5)
+ with m.Elif(self.op == 3): # SHIFT to take 7
+ m.d.sync += self.counter.eq(7)
+ with m.Elif(self.op == 1): # SUB to take 1, straight away
+ m.d.sync += self.counter.eq(1)
+ m.d.comb += go_now.eq(1)
+ with m.Else(): # ADD to take 2
+ m.d.sync += self.counter.eq(2)
+ with m.Else():
+ # input says no longer valid, so drop ready as well.
+ # a "proper" ALU would have had to sync in the opcode and a/b ops
+ m.d.sync += self.p_ready_o.eq(0)
+ # ok so the counter's running: when it gets to 1, fire the output
+ with m.If((self.counter == 1) | go_now):
+ # set the output as valid if the recipient is ready for it
+ m.d.sync += self.n_valid_o.eq(1)
+ with m.If(self.n_ready_i & self.n_valid_o):
+ m.d.sync += self.n_valid_o.eq(0)
+ # recipient said it was ready: reset back to known-good.
+ m.d.sync += self.counter.eq(0) # reset the counter
+ m.d.sync += self.o.eq(0) # clear the output for tidiness sake
+ # countdown to 1 (transition from 1 to 0 only on acknowledgement)
+ with m.If(self.counter > 1):
+ m.d.sync += self.counter.eq(self.counter - 1)
+ return m
+ def __iter__(self):
+ yield self.op
+ yield self.a
+ yield self.b
+ yield self.o
+ def ports(self):
+ return list(self)
+class BranchOp(Elaboratable):
+ def __init__(self, width, op):
+ self.a = Signal(width)
+ self.b = Signal(width)
+ self.o = Signal(width)
+ self.op = op
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.o.eq(Mux(self.op(self.a, self.b), 1, 0))
+ return m
+class BranchALU(Elaboratable):
+ def __init__(self, width):
+ self.p_valid_i = Signal()
+ self.p_ready_o = Signal()
+ self.n_ready_i = Signal()
+ self.n_valid_o = Signal()
+ self.counter = Signal(4)
+ self.op = Signal(2)
+ self.a = Signal(width)
+ self.b = Signal(width)
+ self.o = Signal(width)
+ self.width = width
+ def elaborate(self, platform):
+ m = Module()
+ bgt = BranchOp(self.width, operator.gt)
+ blt = BranchOp(self.width, operator.lt)
+ beq = BranchOp(self.width, operator.eq)
+ bne = BranchOp(self.width, operator.ne)
+ m.submodules.bgt = bgt
+ m.submodules.blt = blt
+ m.submodules.beq = beq
+ m.submodules.bne = bne
+ for mod in [bgt, blt, beq, bne]:
+ m.d.comb += [
+ mod.a.eq(self.a),
+ mod.b.eq(self.b),
+ ]
+ go_now = Signal(reset_less=True) # testing no-delay ALU
+ with m.If(self.p_valid_i):
+ # input is valid. next check, if we already said "ready" or not
+ with m.If(~self.p_ready_o):
+ # we didn't say "ready" yet, so say so and initialise
+ m.d.sync += self.p_ready_o.eq(1)
+ # as this is a "fake" pipeline, just grab the output right now
+ with m.Switch(self.op):
+ for i, mod in enumerate([bgt, blt, beq, bne]):
+ with m.Case(i):
+ m.d.sync += self.o.eq(mod.o)
+ m.d.sync += self.counter.eq(5) # branch to take 5 cycles (fake)
+ #m.d.comb += go_now.eq(1)
+ with m.Else():
+ # input says no longer valid, so drop ready as well.
+ # a "proper" ALU would have had to sync in the opcode and a/b ops
+ m.d.sync += self.p_ready_o.eq(0)
+ # ok so the counter's running: when it gets to 1, fire the output
+ with m.If((self.counter == 1) | go_now):
+ # set the output as valid if the recipient is ready for it
+ m.d.sync += self.n_valid_o.eq(1)
+ with m.If(self.n_ready_i & self.n_valid_o):
+ m.d.sync += self.n_valid_o.eq(0)
+ # recipient said it was ready: reset back to known-good.
+ m.d.sync += self.counter.eq(0) # reset the counter
+ m.d.sync += self.o.eq(0) # clear the output for tidiness sake
+ # countdown to 1 (transition from 1 to 0 only on acknowledgement)
+ with m.If(self.counter > 1):
+ m.d.sync += self.counter.eq(self.counter - 1)
+ return m
+ def __iter__(self):
+ yield self.op
+ yield self.a
+ yield self.b
+ yield self.o
+ def ports(self):
+ return list(self)
+if __name__ == "__main__":
+ alu = ALU(width=16)
+ vl = rtlil.convert(alu, ports=alu.ports())
+ with open("test_alu.il", "w") as f:
+ f.write(vl)
+ alu = BranchALU(width=16)
+ vl = rtlil.convert(alu, ports=alu.ports())
+ with open("test_branch_alu.il", "w") as f:
+ f.write(vl)
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Mux, Elaboratable
+from nmutil.latch import SRLatch, latchregister
+""" Computation Unit (aka "ALU Manager").
+ This module runs a "revolving door" set of three latches, based on
+ * Issue
+ * Go_Read
+ * Go_Write
+ where one of them cannot be set on any given cycle.
+ (Note however that opc_l has been inverted (and qn used), due to SRLatch
+ default reset state being "0" rather than "1")
+ * When issue is first raised, a busy signal is sent out.
+ The src1 and src2 registers and the operand can be latched in
+ at this point
+ * Read request is set, which is acknowledged through the Scoreboard
+ to the priority picker, which generates (one and only one) Go_Read
+ at a time. One of those will (eventually) be this Computation Unit.
+ * Once Go_Read is set, the src1/src2/operand latch door shuts (locking
+ src1/src2/operand in place), and the ALU is told to proceed.
+ * As this is currently a "demo" unit, a countdown timer is activated
+ to simulate an ALU "pipeline", which activates "write request release",
+ and the ALU's output is captured into a temporary register.
+ * Write request release will go through a similar process as Read request,
+ resulting (eventually) in Go_Write being asserted.
+ * When Go_Write is asserted, two things happen: (1) the data in the temp
+ register is placed combinatorially onto the output, and (2) the
+ req_l latch is cleared, busy is dropped, and the Comp Unit is back
+ through its revolving door to do another task.
+ Notes on oper_i:
+ * bits[0:2] are for the ALU, add=0, sub=1, shift=2, mul=3
+ * bit[2] are the immediate (bit[2]=1 == immediate mode)
+class ComputationUnitNoDelay(Elaboratable):
+ def __init__(self, rwid, opwid, alu):
+ self.opwid = opwid
+ self.rwid = rwid
+ self.alu = alu
+ self.counter = Signal(4)
+ self.go_rd_i = Signal(reset_less=True) # go read in
+ self.go_wr_i = Signal(reset_less=True) # go write in
+ self.issue_i = Signal(reset_less=True) # fn issue in
+ self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
+ self.go_die_i = Signal() # go die (reset)
+ self.oper_i = Signal(opwid, reset_less=True) # opcode in
+ self.imm_i = Signal(rwid, reset_less=True) # immediate in
+ self.src1_i = Signal(rwid, reset_less=True) # oper1 in
+ self.src2_i = Signal(rwid, reset_less=True) # oper2 in
+ self.busy_o = Signal(reset_less=True) # fn busy out
+ self.data_o = Signal(rwid, reset_less=True) # Dest out
+ self.rd_rel_o = Signal(reset_less=True) # release src1/src2 request
+ self.req_rel_o = Signal(reset_less=True) # release request out (valid_o)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.alu = self.alu
+ m.submodules.src_l = src_l = SRLatch(sync=False)
+ m.submodules.opc_l = opc_l = SRLatch(sync=False)
+ m.submodules.req_l = req_l = SRLatch(sync=False)
+ # shadow/go_die
+ reset_w = Signal(reset_less=True)
+ reset_r = Signal(reset_less=True)
+ m.d.comb += reset_w.eq(self.go_wr_i | self.go_die_i)
+ m.d.comb += reset_r.eq(self.go_rd_i | self.go_die_i)
+ # This is fascinating and very important to observe that this
+ # is in effect a "3-way revolving door". At no time may all 3
+ # latches be set at the same time.
+ # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
+ m.d.sync += opc_l.s.eq(self.issue_i) # XXX NOTE: INVERTED FROM book!
+ m.d.sync += opc_l.r.eq(reset_w) # XXX NOTE: INVERTED FROM book!
+ # src operand latch (not using go_wr_i)
+ m.d.sync += src_l.s.eq(self.issue_i)
+ m.d.sync += src_l.r.eq(reset_r)
+ # dest operand latch (not using issue_i)
+ m.d.sync += req_l.s.eq(self.go_rd_i)
+ m.d.sync += req_l.r.eq(reset_w)
+ # create a latch/register for the operand
+ oper_r = Signal(self.opwid+1, reset_less=True) # opcode reg
+ latchregister(m, self.oper_i, oper_r, self.issue_i)
+ # and one for the output from the ALU
+ data_r = Signal(self.rwid, reset_less=True) # Dest register
+ latchregister(m, self.alu.o, data_r, req_l.q)
+ # get the top 2 bits for the ALU
+ m.d.comb += self.alu.op.eq(oper_r[0:2])
+ # 3rd bit is whether this is an immediate or not
+ op_is_imm = Signal(reset_less=True)
+ m.d.comb += op_is_imm.eq(oper_r[2])
+ # select immediate if opcode says so. however also change the latch
+ # to trigger *from* the opcode latch instead.
+ src2_or_imm = Signal(self.rwid, reset_less=True)
+ src_sel = Signal(reset_less=True)
+ m.d.comb += src_sel.eq(Mux(op_is_imm, opc_l.qn, src_l.q))
+ m.d.comb += src2_or_imm.eq(Mux(op_is_imm, self.imm_i, self.src2_i))
+ # create a latch/register for src1/src2
+ latchregister(m, self.src1_i, self.alu.a, src_l.q)
+ latchregister(m, src2_or_imm, self.alu.b, src_sel)
+ # -----
+ # outputs
+ # -----
+ # all request signals gated by busy_o. prevents picker problems
+ busy_o = self.busy_o
+ m.d.comb += busy_o.eq(opc_l.q) # busy out
+ m.d.comb += self.rd_rel_o.eq(src_l.q & busy_o) # src1/src2 req rel
+ # on a go_read, tell the ALU we're accepting data.
+ # NOTE: this spells TROUBLE if the ALU isn't ready!
+ # go_read is only valid for one clock!
+ with m.If(self.go_rd_i): # src operands ready, GO!
+ with m.If(~self.alu.p_ready_o): # no ACK yet
+ m.d.comb += self.alu.p_valid_i.eq(1) # so indicate valid
+ # only proceed if ALU says its output is valid
+ with m.If(self.alu.n_valid_o):
+ # when ALU ready, write req release out. waits for shadow
+ m.d.comb += self.req_rel_o.eq(req_l.q & busy_o & self.shadown_i)
+ # when output latch is ready, and ALU says ready, accept ALU output
+ with m.If(self.req_rel_o):
+ m.d.comb += self.alu.n_ready_i.eq(1) # tells ALU "thanks got it"
+ # output the data from the latch on go_write
+ with m.If(self.go_wr_i):
+ m.d.comb += self.data_o.eq(data_r)
+ return m
+ def __iter__(self):
+ yield self.go_rd_i
+ yield self.go_wr_i
+ yield self.issue_i
+ yield self.shadown_i
+ yield self.go_die_i
+ yield self.oper_i
+ yield self.imm_i
+ yield self.src1_i
+ yield self.src2_i
+ yield self.busy_o
+ yield self.rd_rel_o
+ yield self.req_rel_o
+ yield self.data_o
+ def ports(self):
+ return list(self)
+def scoreboard_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+def test_scoreboard():
+ from alu_hier import ALU
+ alu = ALU(16)
+ dut = ComputationUnitNoDelay(16, 8, alu)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_compalu.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, scoreboard_sim(dut), vcd_name='test_compalu.vcd')
+if __name__ == '__main__':
+ test_scoreboard()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Mux, Cat, Elaboratable
+from nmutil.latch import SRLatch, latchregister
+""" LOAD / STORE Computation Unit. Also capable of doing ADD and ADD immediate
+ This module runs a "revolving door" set of four latches, based on
+ * Issue
+ * Go_Read
+ * Go_Addr
+ * Go_Write *OR* Go_Store
+ (Note that opc_l has been inverted (and qn used), due to SRLatch
+ default reset state being "0" rather than "1")
+# internal opcodes. hypothetically this could do more combinations.
+# meanings:
+# * bit 0: 0 = ADD , 1 = SUB
+# * bit 1: 0 = src1, 1 = IMM
+# * bit 2: 1 = LD
+# * bit 3: 1 = ST
+LDST_OP_ADDI = 0b0000 # plain ADD (src1 + src2)
+LDST_OP_SUBI = 0b0001 # plain SUB (src1 - src2)
+LDST_OP_ADD = 0b0010 # immed ADD (imm + src1)
+LDST_OP_SUB = 0b0011 # immed SUB (imm - src1)
+LDST_OP_ST = 0b0110 # immed ADD plus LD op. ADD result is address
+LDST_OP_LD = 0b1010 # immed ADD plus ST op. ADD result is address
+class LDSTCompUnit(Elaboratable):
+ """ LOAD / STORE / ADD / SUB Computation Unit
+ Inputs
+ ------
+ * :rwid: register width
+ * :alu: an ALU module
+ * :mem: a Memory Module (read-write capable)
+ Control Signals (In)
+ --------------------
+ * :issue_i: LD/ST is being "issued".
+ * :isalu_i: ADD/SUB is being "issued" (aka issue_alu_i)
+ * :shadown_i: Inverted-shadow is being held (stops STORE *and* WRITE)
+ * :go_rd_i: read is being actioned (latches in src regs)
+ * :go_ad_i: address is being actioned (triggers actual mem LD)
+ * :go_st_i: store is being actioned (triggers actual mem STORE)
+ * :go_die_i: resets the unit back to "wait for issue"
+ """
+ def __init__(self, rwid, opwid, alu, mem):
+ self.opwid = opwid
+ self.rwid = rwid
+ self.alu = alu
+ self.mem = mem
+ self.counter = Signal(4)
+ self.go_rd_i = Signal(reset_less=True) # go read in
+ self.go_ad_i = Signal(reset_less=True) # go address in
+ self.go_wr_i = Signal(reset_less=True) # go write in
+ self.go_st_i = Signal(reset_less=True) # go store in
+ self.issue_i = Signal(reset_less=True) # fn issue in
+ self.isalu_i = Signal(reset_less=True) # fn issue as ALU in
+ self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
+ self.go_die_i = Signal() # go die (reset)
+ self.oper_i = Signal(opwid, reset_less=True) # opcode in
+ self.imm_i = Signal(rwid, reset_less=True) # immediate in
+ self.src1_i = Signal(rwid, reset_less=True) # oper1 in
+ self.src2_i = Signal(rwid, reset_less=True) # oper2 in
+ self.busy_o = Signal(reset_less=True) # fn busy out
+ self.rd_rel_o = Signal(reset_less=True) # request src1/src2
+ self.adr_rel_o = Signal(reset_less=True) # request address (from mem)
+ self.sto_rel_o = Signal(reset_less=True) # request store (to mem)
+ self.req_rel_o = Signal(reset_less=True) # request write (result)
+ self.data_o = Signal(rwid, reset_less=True) # Dest out (LD or ALU)
+ self.addr_o = Signal(rwid, reset_less=True) # Address out (LD or ST)
+ # hmm... TODO... move these to outside of LDSTCompUnit
+ self.load_mem_o = Signal(reset_less=True) # activate memory LOAD
+ self.stwd_mem_o = Signal(reset_less=True) # activate memory STORE
+ self.ld_o = Signal(reset_less=True) # operation is a LD
+ self.st_o = Signal(reset_less=True) # operation is a ST
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ sync = m.d.sync
+ m.submodules.alu = self.alu
+ m.submodules.src_l = src_l = SRLatch(sync=False)
+ m.submodules.opc_l = opc_l = SRLatch(sync=False)
+ m.submodules.adr_l = adr_l = SRLatch(sync=False)
+ m.submodules.req_l = req_l = SRLatch(sync=False)
+ m.submodules.sto_l = sto_l = SRLatch(sync=False)
+ # shadow/go_die
+ reset_b = Signal(reset_less=True)
+ reset_w = Signal(reset_less=True)
+ reset_a = Signal(reset_less=True)
+ reset_s = Signal(reset_less=True)
+ reset_r = Signal(reset_less=True)
+ comb += reset_b.eq(self.go_st_i | self.go_wr_i | self.go_die_i)
+ comb += reset_w.eq(self.go_wr_i | self.go_die_i)
+ comb += reset_s.eq(self.go_st_i | self.go_die_i)
+ comb += reset_r.eq(self.go_rd_i | self.go_die_i)
+ # this one is slightly different, issue_alu_i selects go_wr_i)
+ a_sel = Mux(self.isalu_i, self.go_wr_i, self.go_ad_i)
+ comb += reset_a.eq(a_sel| self.go_die_i)
+ # opcode decode
+ op_alu = Signal(reset_less=True)
+ op_is_ld = Signal(reset_less=True)
+ op_is_st = Signal(reset_less=True)
+ op_ldst = Signal(reset_less=True)
+ op_is_imm = Signal(reset_less=True)
+ # select immediate or src2 reg to add
+ src2_or_imm = Signal(self.rwid, reset_less=True)
+ src_sel = Signal(reset_less=True)
+ # issue can be either issue_i or issue_alu_i (isalu_i)
+ issue_i = Signal(reset_less=True)
+ comb += issue_i.eq(self.issue_i | self.isalu_i)
+ # Ripple-down the latches, each one set cancels the previous.
+ # NOTE: use sync to stop combinatorial loops.
+ # opcode latch - inverted so that busy resets to 0
+ sync += opc_l.s.eq(issue_i) # XXX NOTE: INVERTED FROM book!
+ sync += opc_l.r.eq(reset_b) # XXX NOTE: INVERTED FROM book!
+ # src operand latch
+ sync += src_l.s.eq(issue_i)
+ sync += src_l.r.eq(reset_r)
+ # addr latch
+ sync += adr_l.s.eq(self.go_rd_i)
+ sync += adr_l.r.eq(reset_a)
+ # dest operand latch
+ sync += req_l.s.eq(self.go_ad_i)
+ sync += req_l.r.eq(reset_w)
+ # store latch
+ sync += sto_l.s.eq(self.go_ad_i)
+ sync += sto_l.r.eq(reset_s)
+ # outputs: busy and release signals
+ busy_o = self.busy_o
+ comb += self.busy_o.eq(opc_l.q) # busy out
+ comb += self.rd_rel_o.eq(src_l.q & busy_o) # src1/src2 req rel
+ comb += self.sto_rel_o.eq(sto_l.q & busy_o & self.shadown_i & op_is_st)
+ # request release enabled based on if op is a LD/ST or a plain ALU
+ # if op is an ADD/SUB or a LD, req_rel activates.
+ wr_q = Signal(reset_less=True)
+ comb += wr_q.eq(req_l.q & (~op_ldst | op_is_ld))
+ alulatch = Signal(reset_less=True)
+ comb += alulatch.eq((op_ldst & self.adr_rel_o) | \
+ (~op_ldst & self.req_rel_o))
+ # only proceed if ALU says its output is valid
+ with m.If(self.alu.n_valid_o):
+ # write req release out. waits until shadow is dropped.
+ comb += self.req_rel_o.eq(wr_q & busy_o & self.shadown_i)
+ # address release only happens on LD/ST, and is shadowed.
+ comb += self.adr_rel_o.eq(adr_l.q & op_ldst & busy_o & \
+ self.shadown_i)
+ # when output latch is ready, and ALU says ready, accept ALU output
+ with m.If(self.req_rel_o):
+ m.d.comb += self.alu.n_ready_i.eq(1) # tells ALU "thanks got it"
+ # select immediate if opcode says so. however also change the latch
+ # to trigger *from* the opcode latch instead.
+ comb += src_sel.eq(Mux(op_is_imm, opc_l.qn, src_l.q))
+ comb += src2_or_imm.eq(Mux(op_is_imm, self.imm_i, self.src2_i))
+ # create a latch/register for src1/src2 (include immediate select)
+ latchregister(m, self.src1_i, self.alu.a, src_l.q)
+ latchregister(m, src2_or_imm, self.alu.b, src_sel)
+ # create a latch/register for the operand
+ oper_r = Signal(self.opwid, reset_less=True) # Dest register
+ latchregister(m, self.oper_i, oper_r, self.issue_i)
+ alu_op = Cat(op_alu, 0, op_is_imm) # using alu_hier, here.
+ comb += self.alu.op.eq(alu_op)
+ # and one for the output from the ALU
+ data_r = Signal(self.rwid, reset_less=True) # Dest register
+ latchregister(m, self.alu.o, data_r, alulatch)
+ # decode bits of operand (latched)
+ comb += op_alu.eq(oper_r[0])
+ comb += op_is_imm.eq(oper_r[1])
+ comb += op_is_ld.eq(oper_r[2])
+ comb += op_is_st.eq(oper_r[3])
+ comb += op_ldst.eq(op_is_ld | op_is_st)
+ comb += self.load_mem_o.eq(op_is_ld & self.go_ad_i)
+ comb += self.stwd_mem_o.eq(op_is_st & self.go_st_i)
+ comb += self.ld_o.eq(op_is_ld)
+ comb += self.st_o.eq(op_is_st)
+ # on a go_read, tell the ALU we're accepting data.
+ # NOTE: this spells TROUBLE if the ALU isn't ready!
+ # go_read is only valid for one clock!
+ with m.If(self.go_rd_i): # src operands ready, GO!
+ with m.If(~self.alu.p_ready_o): # no ACK yet
+ m.d.comb += self.alu.p_valid_i.eq(1) # so indicate valid
+ # put the register directly onto the output bus on a go_write
+ with m.If(self.go_wr_i):
+ comb += self.data_o.eq(data_r)
+ # put the register directly onto the address bus
+ with m.If(self.go_ad_i):
+ comb += self.addr_o.eq(data_r)
+ return m
+ def __iter__(self):
+ yield self.go_rd_i
+ yield self.go_ad_i
+ yield self.go_wr_i
+ yield self.go_st_i
+ yield self.issue_i
+ yield self.isalu_i
+ yield self.shadown_i
+ yield self.go_die_i
+ yield self.oper_i
+ yield self.imm_i
+ yield self.src1_i
+ yield self.src2_i
+ yield self.busy_o
+ yield self.rd_rel_o
+ yield self.adr_rel_o
+ yield self.sto_rel_o
+ yield self.req_rel_o
+ yield self.data_o
+ yield self.load_mem_o
+ yield self.stwd_mem_o
+ def ports(self):
+ return list(self)
+def scoreboard_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_read_i.eq(1)
+ yield
+ yield dut.go_read_i.eq(0)
+ yield
+ yield dut.go_write_i.eq(1)
+ yield
+ yield dut.go_write_i.eq(0)
+ yield
+def test_scoreboard():
+ from alu_hier import ALU
+ alu = ALU(16)
+ mem = alu # fake
+ dut = LDSTCompUnit(16, 4, alu, mem)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_ldst_comp.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, scoreboard_sim(dut), vcd_name='test_ldst_comp.vcd')
+if __name__ == '__main__':
+ test_scoreboard()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
+from regfile.regfile import RegFileArray, treereduce
+from scoreboard.fn_unit import IntFnUnit, FPFnUnit, LDFnUnit, STFnUnit
+from scoreboard.fu_fu_matrix import FUFUDepMatrix
+from scoreboard.fu_reg_matrix import FURegDepMatrix
+from scoreboard.global_pending import GlobalPending
+from scoreboard.group_picker import GroupPicker
+from scoreboard.issue_unit import IntFPIssueUnit, RegDecode
+from compalu import ComputationUnitNoDelay
+from alu_hier import ALU
+from nmutil.latch import SRLatch
+from random import randint
+class Scoreboard(Elaboratable):
+ def __init__(self, rwid, n_regs):
+ """ Inputs:
+ * :rwid: bit width of register file(s) - both FP and INT
+ * :n_regs: depth of register file(s) - number of FP and INT regs
+ """
+ self.rwid = rwid
+ self.n_regs = n_regs
+ # Register Files
+ self.intregs = RegFileArray(rwid, n_regs)
+ self.fpregs = RegFileArray(rwid, n_regs)
+ # inputs
+ self.int_store_i = Signal(reset_less=True) # instruction is a store
+ self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
+ self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
+ self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
+ self.issue_o = Signal(reset_less=True) # instruction was accepted
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.intregs = self.intregs
+ m.submodules.fpregs = self.fpregs
+ # register ports
+ int_dest = self.intregs.write_port("dest")
+ int_src1 = self.intregs.read_port("src1")
+ int_src2 = self.intregs.read_port("src2")
+ fp_dest = self.fpregs.write_port("dest")
+ fp_src1 = self.fpregs.read_port("src1")
+ fp_src2 = self.fpregs.read_port("src2")
+ # Int ALUs
+ add = ALU(self.rwid)
+ sub = ALU(self.rwid)
+ m.submodules.comp1 = comp1 = ComputationUnitNoDelay(self.rwid, 1, add)
+ m.submodules.comp2 = comp2 = ComputationUnitNoDelay(self.rwid, 1, sub)
+ int_alus = [comp1, comp2]
+ m.d.comb += comp1.oper_i.eq(Const(0)) # temporary/experiment: op=add
+ m.d.comb += comp2.oper_i.eq(Const(1)) # temporary/experiment: op=sub
+ # Int FUs
+ if_l = []
+ int_src1_pend_v = []
+ int_src2_pend_v = []
+ int_rd_pend_v = []
+ int_wr_pend_v = []
+ for i, a in enumerate(int_alus):
+ # set up Integer Function Unit, add to module (and python list)
+ fu = IntFnUnit(self.n_regs, shadow_wid=0)
+ setattr(m.submodules, "intfu%d" % i, fu)
+ if_l.append(fu)
+ # collate the read/write pending vectors (to go into global pending)
+ int_src1_pend_v.append(fu.src1_pend_o)
+ int_src2_pend_v.append(fu.src2_pend_o)
+ int_rd_pend_v.append(fu.int_rd_pend_o)
+ int_wr_pend_v.append(fu.int_wr_pend_o)
+ int_fus = Array(if_l)
+ # Count of number of FUs
+ n_int_fus = len(if_l)
+ n_fp_fus = 0 # for now
+ n_fus = n_int_fus + n_fp_fus # plus FP FUs
+ # XXX replaced by array of FUs? *FnUnit
+ # # Integer FU-FU Dep Matrix
+ # m.submodules.intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
+ # Integer FU-Reg Dep Matrix
+ # intregdeps = FURegDepMatrix(self.n_regs, n_int_fus)
+ # m.submodules.intregdeps = intregdeps
+ # Integer Priority Picker 1: Adder + Subtractor
+ intpick1 = GroupPicker(2) # picks between add and sub
+ m.submodules.intpick1 = intpick1
+ # Global Pending Vectors (INT and FP)
+ # NOTE: number of vectors is NOT same as number of FUs.
+ g_int_src1_pend_v = GlobalPending(self.n_regs, int_src1_pend_v)
+ g_int_src2_pend_v = GlobalPending(self.n_regs, int_src2_pend_v)
+ g_int_rd_pend_v = GlobalPending(self.n_regs, int_rd_pend_v, True)
+ g_int_wr_pend_v = GlobalPending(self.n_regs, int_wr_pend_v, True)
+ m.submodules.g_int_src1_pend_v = g_int_src1_pend_v
+ m.submodules.g_int_src2_pend_v = g_int_src2_pend_v
+ m.submodules.g_int_rd_pend_v = g_int_rd_pend_v
+ m.submodules.g_int_wr_pend_v = g_int_wr_pend_v
+ # INT/FP Issue Unit
+ regdecode = RegDecode(self.n_regs)
+ m.submodules.regdecode = regdecode
+ issueunit = IntFPIssueUnit(self.n_regs, n_int_fus, n_fp_fus)
+ m.submodules.issueunit = issueunit
+ # FU-FU Dependency Matrices
+ intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
+ m.submodules.intfudeps = intfudeps
+ #---------
+ # ok start wiring things together...
+ # "now hear de word of de looord... dem bones dem bones dem dryy bones"
+ # https://www.youtube.com/watch?v=pYb8Wm6-QfA
+ #---------
+ #---------
+ # Issue Unit is where it starts. set up some in/outs for this module
+ #---------
+ m.d.comb += [issueunit.i.store_i.eq(self.int_store_i),
+ regdecode.dest_i.eq(self.int_dest_i),
+ regdecode.src1_i.eq(self.int_src1_i),
+ regdecode.src2_i.eq(self.int_src2_i),
+ regdecode.enable_i.eq(1),
+ self.issue_o.eq(issueunit.issue_o),
+ issueunit.i.dest_i.eq(regdecode.dest_o),
+ ]
+ self.int_insn_i = issueunit.i.insn_i # enabled by instruction decode
+ # connect global rd/wr pending vectors
+ m.d.comb += issueunit.i.g_wr_pend_i.eq(g_int_wr_pend_v.g_pend_o)
+ # TODO: issueunit.f (FP)
+ # and int function issue / busy arrays, and dest/src1/src2
+ fn_issue_l = []
+ fn_busy_l = []
+ for i, fu in enumerate(if_l):
+ fn_issue_l.append(fu.issue_i)
+ fn_busy_l.append(fu.busy_o)
+ m.d.sync += fu.issue_i.eq(issueunit.i.fn_issue_o[i])
+ m.d.sync += fu.dest_i.eq(self.int_dest_i)
+ m.d.sync += fu.src1_i.eq(self.int_src1_i)
+ m.d.sync += fu.src2_i.eq(self.int_src2_i)
+ # XXX sync, so as to stop a simulation infinite loop
+ m.d.comb += issueunit.i.busy_i[i].eq(fu.busy_o)
+ #---------
+ # connect Function Units
+ #---------
+ # Group Picker... done manually for now. TODO: cat array of pick sigs
+ m.d.comb += if_l[0].go_rd_i.eq(intpick1.go_rd_o[0]) # add rd
+ m.d.comb += if_l[0].go_wr_i.eq(intpick1.go_wr_o[0]) # add wr
+ m.d.comb += if_l[1].go_rd_i.eq(intpick1.go_rd_o[1]) # subtract rd
+ m.d.comb += if_l[1].go_wr_i.eq(intpick1.go_wr_o[1]) # subtract wr
+ # create read-pending FU-FU vectors
+ intfu_rd_pend_v = Signal(n_int_fus, reset_less = True)
+ intfu_wr_pend_v = Signal(n_int_fus, reset_less = True)
+ for i in range(n_int_fus):
+ #m.d.comb += intfu_rd_pend_v[i].eq(if_l[i].int_rd_pend_o.bool())
+ #m.d.comb += intfu_wr_pend_v[i].eq(if_l[i].int_wr_pend_o.bool())
+ m.d.comb += intfu_rd_pend_v[i].eq(if_l[i].int_readable_o)
+ m.d.comb += intfu_wr_pend_v[i].eq(if_l[i].int_writable_o)
+ # Connect INT Fn Unit global wr/rd pending
+ for fu in if_l:
+ m.d.comb += fu.g_int_wr_pend_i.eq(g_int_wr_pend_v.g_pend_o)
+ m.d.comb += fu.g_int_rd_pend_i.eq(g_int_rd_pend_v.g_pend_o)
+ # Connect FU-FU Matrix, NOTE: FN Units readable/writable considered
+ # to be unit "read-pending / write-pending"
+ m.d.comb += intfudeps.rd_pend_i.eq(intfu_rd_pend_v)
+ m.d.comb += intfudeps.wr_pend_i.eq(intfu_wr_pend_v)
+ m.d.comb += intfudeps.issue_i.eq(issueunit.i.fn_issue_o)
+ for i in range(n_int_fus):
+ m.d.comb += intfudeps.go_rd_i[i].eq(intpick1.go_rd_o[i])
+ m.d.comb += intfudeps.go_wr_i[i].eq(intpick1.go_wr_o[i])
+ # Connect Picker (note connection to FU-FU)
+ #---------
+ readable_o = intfudeps.readable_o
+ writable_o = intfudeps.writable_o
+ m.d.comb += intpick1.rd_rel_i[0].eq(int_alus[0].rd_rel_o)
+ m.d.comb += intpick1.rd_rel_i[1].eq(int_alus[1].rd_rel_o)
+ m.d.comb += intpick1.req_rel_i[0].eq(int_alus[0].req_rel_o)
+ m.d.comb += intpick1.req_rel_i[1].eq(int_alus[1].req_rel_o)
+ m.d.comb += intpick1.readable_i[0].eq(readable_o[0]) # add rd
+ m.d.comb += intpick1.writable_i[0].eq(writable_o[0]) # add wr
+ m.d.comb += intpick1.readable_i[1].eq(readable_o[1]) # sub rd
+ m.d.comb += intpick1.writable_i[1].eq(writable_o[1]) # sub wr
+ #---------
+ # Connect Register File(s)
+ #---------
+ #with m.If(if_l[0].go_wr_i | if_l[1].go_wr_i):
+ m.d.sync += int_dest.wen.eq(g_int_wr_pend_v.g_pend_o)
+ #with m.If(intpick1.go_rd_o):
+ #with m.If(if_l[0].go_rd_i | if_l[1].go_rd_i):
+ m.d.sync += int_src1.ren.eq(g_int_src1_pend_v.g_pend_o)
+ m.d.sync += int_src2.ren.eq(g_int_src2_pend_v.g_pend_o)
+ # merge (OR) all integer FU / ALU outputs to a single value
+ # bit of a hack: treereduce needs a list with an item named "dest_o"
+ dest_o = treereduce(int_alus)
+ m.d.sync += int_dest.data_i.eq(dest_o)
+ # connect ALUs
+ for i, alu in enumerate(int_alus):
+ m.d.comb += alu.go_rd_i.eq(intpick1.go_rd_o[i])
+ m.d.comb += alu.go_wr_i.eq(intpick1.go_wr_o[i])
+ m.d.comb += alu.issue_i.eq(fn_issue_l[i])
+ #m.d.comb += fn_busy_l[i].eq(alu.busy_o) # XXX ignore, use fnissue
+ m.d.comb += alu.src1_i.eq(int_src1.data_o)
+ m.d.comb += alu.src2_i.eq(int_src2.data_o)
+ m.d.comb += if_l[i].req_rel_i.eq(alu.req_rel_o) # pipe out ready
+ return m
+ def __iter__(self):
+ yield from self.intregs
+ yield from self.fpregs
+ yield self.int_store_i
+ yield self.int_dest_i
+ yield self.int_src1_i
+ yield self.int_src2_i
+ yield self.issue_o
+ #yield from self.int_src1
+ #yield from self.int_dest
+ #yield from self.int_src1
+ #yield from self.int_src2
+ #yield from self.fp_dest
+ #yield from self.fp_src1
+ #yield from self.fp_src2
+ def ports(self):
+ return list(self)
+IADD = 0
+ISUB = 1
+class RegSim:
+ def __init__(self, rwidth, nregs):
+ self.rwidth = rwidth
+ self.regs = [0] * nregs
+ def op(self, op, src1, src2, dest):
+ src1 = self.regs[src1]
+ src2 = self.regs[src2]
+ if op == IADD:
+ val = (src1 + src2) & ((1<<(self.rwidth))-1)
+ elif op == ISUB:
+ val = (src1 - src2) & ((1<<(self.rwidth))-1)
+ self.regs[dest] = val
+ def setval(self, dest, val):
+ self.regs[dest] = val
+ def dump(self, dut):
+ for i, val in enumerate(self.regs):
+ reg = yield dut.intregs.regs[i].reg
+ okstr = "OK" if reg == val else "!ok"
+ print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
+ def check(self, dut):
+ for i, val in enumerate(self.regs):
+ reg = yield dut.intregs.regs[i].reg
+ if reg != val:
+ print("reg %d expected %x received %x\n" % (i, val, reg))
+ yield from self.dump(dut)
+ assert False
+def int_instr(dut, alusim, op, src1, src2, dest):
+ for i in range(len(dut.int_insn_i)):
+ yield dut.int_insn_i[i].eq(0)
+ yield dut.int_dest_i.eq(dest)
+ yield dut.int_src1_i.eq(src1)
+ yield dut.int_src2_i.eq(src2)
+ yield dut.int_insn_i[op].eq(1)
+ alusim.op(op, src1, src2, dest)
+def print_reg(dut, rnums):
+ rs = []
+ for rnum in rnums:
+ reg = yield dut.intregs.regs[rnum].reg
+ rs.append("%x" % reg)
+ rnums = map(str, rnums)
+ print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
+def scoreboard_sim(dut, alusim):
+ yield dut.int_store_i.eq(0)
+ for i in range(1, dut.n_regs):
+ yield dut.intregs.regs[i].reg.eq(i)
+ alusim.setval(i, i)
+ if False:
+ yield from int_instr(dut, alusim, IADD, 4, 3, 5)
+ yield from print_reg(dut, [3,4,5])
+ yield
+ yield from int_instr(dut, alusim, IADD, 5, 2, 5)
+ yield from print_reg(dut, [3,4,5])
+ yield
+ yield from int_instr(dut, alusim, ISUB, 5, 1, 3)
+ yield from print_reg(dut, [3,4,5])
+ yield
+ for i in range(len(dut.int_insn_i)):
+ yield dut.int_insn_i[i].eq(0)
+ yield from print_reg(dut, [3,4,5])
+ yield
+ yield from print_reg(dut, [3,4,5])
+ yield
+ yield from print_reg(dut, [3,4,5])
+ yield
+ yield from alusim.check(dut)
+ for i in range(2):
+ src1 = randint(1, dut.n_regs-1)
+ src2 = randint(1, dut.n_regs-1)
+ while True:
+ dest = randint(1, dut.n_regs-1)
+ break
+ if dest not in [src1, src2]:
+ break
+ op = randint(0, 1)
+ if False:
+ if i % 2 == 0:
+ src1 = 6
+ src2 = 6
+ dest = 1
+ else:
+ src1 = 1
+ src2 = 7
+ dest = 2
+ #src1 = 2
+ #src2 = 3
+ #dest = 2
+ op = i
+ if True:
+ if i == 0:
+ src1 = 2
+ src2 = 3
+ dest = 3
+ else:
+ src1 = 5
+ src2 = 3
+ dest = 4
+ #op = (i+1) % 2
+ op = i
+ print ("random %d: %d %d %d %d\n" % (i, op, src1, src2, dest))
+ yield from int_instr(dut, alusim, op, src1, src2, dest)
+ yield from print_reg(dut, [3,4,5])
+ while True:
+ yield
+ issue_o = yield dut.issue_o
+ if issue_o:
+ yield from print_reg(dut, [3,4,5])
+ for i in range(len(dut.int_insn_i)):
+ yield dut.int_insn_i[i].eq(0)
+ break
+ print ("busy",)
+ yield from print_reg(dut, [3,4,5])
+ yield
+ yield
+ yield
+ yield
+ yield from print_reg(dut, [3,4,5])
+ yield
+ yield from print_reg(dut, [3,4,5])
+ yield
+ yield from print_reg(dut, [3,4,5])
+ yield
+ yield from print_reg(dut, [3,4,5])
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield from alusim.check(dut)
+ yield from alusim.dump(dut)
+def explore_groups(dut):
+ from nmigen.hdl.ir import Fragment
+ from nmigen.hdl.xfrm import LHSGroupAnalyzer
+ fragment = dut.elaborate(platform=None)
+ fr = Fragment.get(fragment, platform=None)
+ groups = LHSGroupAnalyzer()(fragment._statements)
+ print (groups)
+def test_scoreboard():
+ dut = Scoreboard(16, 8)
+ alusim = RegSim(16, 8)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_scoreboard.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, scoreboard_sim(dut, alusim),
+ vcd_name='test_scoreboard.vcd')
+if __name__ == '__main__':
+ test_scoreboard()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen.hdl.ast import unsigned
+from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
+from regfile.regfile import RegFileArray, treereduce
+from scoreboard.fu_fu_matrix import FUFUDepMatrix
+from scoreboard.fu_reg_matrix import FURegDepMatrix
+from scoreboard.global_pending import GlobalPending
+from scoreboard.group_picker import GroupPicker
+from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
+from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
+from scoreboard.instruction_q import Instruction, InstructionQ
+from scoreboard.memfu import MemFunctionUnits
+from compalu import ComputationUnitNoDelay
+from compldst import LDSTCompUnit
+from alu_hier import ALU, BranchALU
+from nmutil.latch import SRLatch
+from nmutil.nmoperator import eq
+from random import randint, seed
+from copy import deepcopy
+from math import log
+class TestMemory(Elaboratable):
+ def __init__(self, regwid, addrw):
+ self.ddepth = 1 # regwid //8
+ depth = (1<<addrw) // self.ddepth
+ self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.rdport = self.rdport = self.mem.read_port()
+ m.submodules.wrport = self.wrport = self.mem.write_port()
+ return m
+class MemSim:
+ def __init__(self, regwid, addrw):
+ self.regwid = regwid
+ self.ddepth = 1 # regwid//8
+ depth = (1<<addrw) // self.ddepth
+ self.mem = list(range(0, depth))
+ def ld(self, addr):
+ return self.mem[addr>>self.ddepth]
+ def st(self, addr, data):
+ self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
+class CompUnitsBase(Elaboratable):
+ """ Computation Unit Base class.
+ Amazingly, this class works recursively. It's supposed to just
+ look after some ALUs (that can handle the same operations),
+ grouping them together, however it turns out that the same code
+ can also group *groups* of Computation Units together as well.
+ Basically it was intended just to concatenate the ALU's issue,
+ go_rd etc. signals together, which start out as bits and become
+ sequences. Turns out that the same trick works just as well
+ on Computation Units!
+ So this class may be used recursively to present a top-level
+ sequential concatenation of all the signals in and out of
+ ALUs, whilst at the same time making it convenient to group
+ ALUs together.
+ At the lower level, the intent is that groups of (identical)
+ ALUs may be passed the same operation. Even beyond that,
+ the intent is that that group of (identical) ALUs actually
+ share the *same pipeline* and as such become a "Concurrent
+ Computation Unit" as defined by Mitch Alsup (see section
+ """
+ def __init__(self, rwid, units, ldstmode=False):
+ """ Inputs:
+ * :rwid: bit width of register file(s) - both FP and INT
+ * :units: sequence of ALUs (or CompUnitsBase derivatives)
+ """
+ self.units = units
+ self.ldstmode = ldstmode
+ self.rwid = rwid
+ self.rwid = rwid
+ if units and isinstance(units[0], CompUnitsBase):
+ self.n_units = 0
+ for u in self.units:
+ self.n_units += u.n_units
+ else:
+ self.n_units = len(units)
+ n_units = self.n_units
+ # inputs
+ self.issue_i = Signal(n_units, reset_less=True)
+ self.go_rd_i = Signal(n_units, reset_less=True)
+ self.go_wr_i = Signal(n_units, reset_less=True)
+ self.shadown_i = Signal(n_units, reset_less=True)
+ self.go_die_i = Signal(n_units, reset_less=True)
+ if ldstmode:
+ self.go_ad_i = Signal(n_units, reset_less=True)
+ self.go_st_i = Signal(n_units, reset_less=True)
+ # outputs
+ self.busy_o = Signal(n_units, reset_less=True)
+ self.rd_rel_o = Signal(n_units, reset_less=True)
+ self.req_rel_o = Signal(n_units, reset_less=True)
+ if ldstmode:
+ self.ld_o = Signal(n_units, reset_less=True) # op is LD
+ self.st_o = Signal(n_units, reset_less=True) # op is ST
+ self.adr_rel_o = Signal(n_units, reset_less=True)
+ self.sto_rel_o = Signal(n_units, reset_less=True)
+ self.req_rel_o = Signal(n_units, reset_less=True)
+ self.load_mem_o = Signal(n_units, reset_less=True)
+ self.stwd_mem_o = Signal(n_units, reset_less=True)
+ self.addr_o = Signal(rwid, reset_less=True)
+ # in/out register data (note: not register#, actual data)
+ self.data_o = Signal(rwid, reset_less=True)
+ self.src1_i = Signal(rwid, reset_less=True)
+ self.src2_i = Signal(rwid, reset_less=True)
+ # input operand
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ for i, alu in enumerate(self.units):
+ setattr(m.submodules, "comp%d" % i, alu)
+ go_rd_l = []
+ go_wr_l = []
+ issue_l = []
+ busy_l = []
+ req_rel_l = []
+ rd_rel_l = []
+ shadow_l = []
+ godie_l = []
+ for alu in self.units:
+ req_rel_l.append(alu.req_rel_o)
+ rd_rel_l.append(alu.rd_rel_o)
+ shadow_l.append(alu.shadown_i)
+ godie_l.append(alu.go_die_i)
+ go_wr_l.append(alu.go_wr_i)
+ go_rd_l.append(alu.go_rd_i)
+ issue_l.append(alu.issue_i)
+ busy_l.append(alu.busy_o)
+ comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
+ comb += self.req_rel_o.eq(Cat(*req_rel_l))
+ comb += self.busy_o.eq(Cat(*busy_l))
+ comb += Cat(*godie_l).eq(self.go_die_i)
+ comb += Cat(*shadow_l).eq(self.shadown_i)
+ comb += Cat(*go_wr_l).eq(self.go_wr_i)
+ comb += Cat(*go_rd_l).eq(self.go_rd_i)
+ comb += Cat(*issue_l).eq(self.issue_i)
+ # connect data register input/output
+ # merge (OR) all integer FU / ALU outputs to a single value
+ if self.units:
+ data_o = treereduce(self.units, "data_o")
+ comb += self.data_o.eq(data_o)
+ if self.ldstmode:
+ addr_o = treereduce(self.units, "addr_o")
+ comb += self.addr_o.eq(addr_o)
+ for i, alu in enumerate(self.units):
+ comb += alu.src1_i.eq(self.src1_i)
+ comb += alu.src2_i.eq(self.src2_i)
+ if not self.ldstmode:
+ return m
+ ldmem_l = []
+ stmem_l = []
+ go_ad_l = []
+ go_st_l = []
+ ld_l = []
+ st_l = []
+ adr_rel_l = []
+ sto_rel_l = []
+ for alu in self.units:
+ ld_l.append(alu.ld_o)
+ st_l.append(alu.st_o)
+ adr_rel_l.append(alu.adr_rel_o)
+ sto_rel_l.append(alu.sto_rel_o)
+ ldmem_l.append(alu.load_mem_o)
+ stmem_l.append(alu.stwd_mem_o)
+ go_ad_l.append(alu.go_ad_i)
+ go_st_l.append(alu.go_st_i)
+ comb += self.ld_o.eq(Cat(*ld_l))
+ comb += self.st_o.eq(Cat(*st_l))
+ comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
+ comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
+ comb += self.load_mem_o.eq(Cat(*ldmem_l))
+ comb += self.stwd_mem_o.eq(Cat(*stmem_l))
+ comb += Cat(*go_ad_l).eq(self.go_ad_i)
+ comb += Cat(*go_st_l).eq(self.go_st_i)
+ return m
+class CompUnitLDSTs(CompUnitsBase):
+ def __init__(self, rwid, opwid, n_ldsts, mem):
+ """ Inputs:
+ * :rwid: bit width of register file(s) - both FP and INT
+ * :opwid: operand bit width
+ """
+ self.opwid = opwid
+ # inputs
+ self.oper_i = Signal(opwid, reset_less=True)
+ self.imm_i = Signal(rwid, reset_less=True)
+ # Int ALUs
+ self.alus = []
+ for i in range(n_ldsts):
+ self.alus.append(ALU(rwid))
+ units = []
+ for alu in self.alus:
+ aluopwid = 4 # see compldst.py for "internal" opcode
+ units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
+ CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
+ def elaborate(self, platform):
+ m = CompUnitsBase.elaborate(self, platform)
+ comb = m.d.comb
+ # hand the same operation to all units, 4 lower bits though
+ for alu in self.units:
+ comb += alu.oper_i[0:4].eq(self.oper_i)
+ comb += alu.imm_i.eq(self.imm_i)
+ comb += alu.isalu_i.eq(0)
+ return m
+class CompUnitALUs(CompUnitsBase):
+ def __init__(self, rwid, opwid, n_alus):
+ """ Inputs:
+ * :rwid: bit width of register file(s) - both FP and INT
+ * :opwid: operand bit width
+ """
+ self.opwid = opwid
+ # inputs
+ self.oper_i = Signal(opwid, reset_less=True)
+ self.imm_i = Signal(rwid, reset_less=True)
+ # Int ALUs
+ alus = []
+ for i in range(n_alus):
+ alus.append(ALU(rwid))
+ units = []
+ for alu in alus:
+ aluopwid = 3 # extra bit for immediate mode
+ units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
+ CompUnitsBase.__init__(self, rwid, units)
+ def elaborate(self, platform):
+ m = CompUnitsBase.elaborate(self, platform)
+ comb = m.d.comb
+ # hand the same operation to all units, only lower 3 bits though
+ for alu in self.units:
+ comb += alu.oper_i[0:3].eq(self.oper_i)
+ comb += alu.imm_i.eq(self.imm_i)
+ return m
+class CompUnitBR(CompUnitsBase):
+ def __init__(self, rwid, opwid):
+ """ Inputs:
+ * :rwid: bit width of register file(s) - both FP and INT
+ * :opwid: operand bit width
+ Note: bgt unit is returned so that a shadow unit can be created
+ for it
+ """
+ self.opwid = opwid
+ # inputs
+ self.oper_i = Signal(opwid, reset_less=True)
+ self.imm_i = Signal(rwid, reset_less=True)
+ # Branch ALU and CU
+ self.bgt = BranchALU(rwid)
+ aluopwid = 3 # extra bit for immediate mode
+ self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
+ CompUnitsBase.__init__(self, rwid, [self.br1])
+ def elaborate(self, platform):
+ m = CompUnitsBase.elaborate(self, platform)
+ comb = m.d.comb
+ # hand the same operation to all units
+ for alu in self.units:
+ comb += alu.oper_i.eq(self.oper_i)
+ comb += alu.imm_i.eq(self.imm_i)
+ return m
+class FunctionUnits(Elaboratable):
+ def __init__(self, n_regs, n_int_alus):
+ self.n_regs = n_regs
+ self.n_int_alus = n_int_alus
+ self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
+ self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
+ self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
+ self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
+ self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
+ self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
+ self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
+ self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
+ self.readable_o = Signal(n_int_alus, reset_less=True)
+ self.writable_o = Signal(n_int_alus, reset_less=True)
+ self.go_rd_i = Signal(n_int_alus, reset_less=True)
+ self.go_wr_i = Signal(n_int_alus, reset_less=True)
+ self.go_die_i = Signal(n_int_alus, reset_less=True)
+ self.fn_issue_i = Signal(n_int_alus, reset_less=True)
+ # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ sync = m.d.sync
+ n_intfus = self.n_int_alus
+ # Integer FU-FU Dep Matrix
+ intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
+ m.submodules.intfudeps = intfudeps
+ # Integer FU-Reg Dep Matrix
+ intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
+ m.submodules.intregdeps = intregdeps
+ comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
+ comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
+ comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
+ comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
+ comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
+ comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
+ self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
+ comb += intfudeps.issue_i.eq(self.fn_issue_i)
+ comb += intfudeps.go_rd_i.eq(self.go_rd_i)
+ comb += intfudeps.go_wr_i.eq(self.go_wr_i)
+ comb += intfudeps.go_die_i.eq(self.go_die_i)
+ comb += self.readable_o.eq(intfudeps.readable_o)
+ comb += self.writable_o.eq(intfudeps.writable_o)
+ # Connect function issue / arrays, and dest/src1/src2
+ comb += intregdeps.dest_i.eq(self.dest_i)
+ comb += intregdeps.src_i[0].eq(self.src1_i)
+ comb += intregdeps.src_i[1].eq(self.src2_i)
+ comb += intregdeps.go_rd_i.eq(self.go_rd_i)
+ comb += intregdeps.go_wr_i.eq(self.go_wr_i)
+ comb += intregdeps.go_die_i.eq(self.go_die_i)
+ comb += intregdeps.issue_i.eq(self.fn_issue_i)
+ comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
+ comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
+ comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
+ return m
+class Scoreboard(Elaboratable):
+ def __init__(self, rwid, n_regs):
+ """ Inputs:
+ * :rwid: bit width of register file(s) - both FP and INT
+ * :n_regs: depth of register file(s) - number of FP and INT regs
+ """
+ self.rwid = rwid
+ self.n_regs = n_regs
+ # Register Files
+ self.intregs = RegFileArray(rwid, n_regs)
+ self.fpregs = RegFileArray(rwid, n_regs)
+ # Memory (test for now)
+ self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
+ # issue q needs to get at these
+ self.aluissue = IssueUnitGroup(2)
+ self.lsissue = IssueUnitGroup(2)
+ self.brissue = IssueUnitGroup(1)
+ # and these
+ self.alu_oper_i = Signal(4, reset_less=True)
+ self.alu_imm_i = Signal(rwid, reset_less=True)
+ self.br_oper_i = Signal(4, reset_less=True)
+ self.br_imm_i = Signal(rwid, reset_less=True)
+ self.ls_oper_i = Signal(4, reset_less=True)
+ self.ls_imm_i = Signal(rwid, reset_less=True)
+ # inputs
+ self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
+ self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
+ self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
+ self.reg_enable_i = Signal(reset_less=True) # enable reg decode
+ # outputs
+ self.issue_o = Signal(reset_less=True) # instruction was accepted
+ self.busy_o = Signal(reset_less=True) # at least one CU is busy
+ # for branch speculation experiment. branch_direction = 0 if
+ # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
+ # branch_succ and branch_fail are requests to have the current
+ # instruction be dependent on the branch unit "shadow" capability.
+ self.branch_succ_i = Signal(reset_less=True)
+ self.branch_fail_i = Signal(reset_less=True)
+ self.branch_direction_o = Signal(2, reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ sync = m.d.sync
+ m.submodules.intregs = self.intregs
+ m.submodules.fpregs = self.fpregs
+ m.submodules.mem = mem = self.mem
+ # register ports
+ int_dest = self.intregs.write_port("dest")
+ int_src1 = self.intregs.read_port("src1")
+ int_src2 = self.intregs.read_port("src2")
+ fp_dest = self.fpregs.write_port("dest")
+ fp_src1 = self.fpregs.read_port("src1")
+ fp_src2 = self.fpregs.read_port("src2")
+ # Int ALUs and BR ALUs
+ n_int_alus = 5
+ cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
+ cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
+ # LDST Comp Units
+ n_ldsts = 2
+ cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
+ # Comp Units
+ m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
+ bgt = cub.bgt # get at the branch computation unit
+ br1 = cub.br1
+ # Int FUs
+ m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
+ # Memory FUs
+ m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
+ # Memory Priority Picker 1: one gateway per memory port
+ mempick1 = GroupPicker(n_ldsts) # picks 1 reader and 1 writer to intreg
+ m.submodules.mempick1 = mempick1
+ # Count of number of FUs
+ n_intfus = n_int_alus
+ n_fp_fus = 0 # for now
+ # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
+ intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
+ m.submodules.intpick1 = intpick1
+ # INT/FP Issue Unit
+ regdecode = RegDecode(self.n_regs)
+ m.submodules.regdecode = regdecode
+ issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
+ m.submodules.issueunit = issueunit
+ # Shadow Matrix. currently n_intfus shadows, to be used for
+ # write-after-write hazards. NOTE: there is one extra for branches,
+ # so the shadow width is increased by 1
+ m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
+ m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
+ # record previous instruction to cast shadow on current instruction
+ prev_shadow = Signal(n_intfus)
+ # Branch Speculation recorder. tracks the success/fail state as
+ # each instruction is issued, so that when the branch occurs the
+ # allow/cancel can be issued as appropriate.
+ m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
+ #---------
+ # ok start wiring things together...
+ # "now hear de word of de looord... dem bones dem bones dem dryy bones"
+ # https://www.youtube.com/watch?v=pYb8Wm6-QfA
+ #---------
+ #---------
+ # Issue Unit is where it starts. set up some in/outs for this module
+ #---------
+ comb += [ regdecode.dest_i.eq(self.int_dest_i),
+ regdecode.src1_i.eq(self.int_src1_i),
+ regdecode.src2_i.eq(self.int_src2_i),
+ regdecode.enable_i.eq(self.reg_enable_i),
+ self.issue_o.eq(issueunit.issue_o)
+ ]
+ # take these to outside (issue needs them)
+ comb += cua.oper_i.eq(self.alu_oper_i)
+ comb += cua.imm_i.eq(self.alu_imm_i)
+ comb += cub.oper_i.eq(self.br_oper_i)
+ comb += cub.imm_i.eq(self.br_imm_i)
+ comb += cul.oper_i.eq(self.ls_oper_i)
+ comb += cul.imm_i.eq(self.ls_imm_i)
+ # TODO: issueunit.f (FP)
+ # and int function issue / busy arrays, and dest/src1/src2
+ comb += intfus.dest_i.eq(regdecode.dest_o)
+ comb += intfus.src1_i.eq(regdecode.src1_o)
+ comb += intfus.src2_i.eq(regdecode.src2_o)
+ fn_issue_o = issueunit.fn_issue_o
+ comb += intfus.fn_issue_i.eq(fn_issue_o)
+ comb += issueunit.busy_i.eq(cu.busy_o)
+ comb += self.busy_o.eq(cu.busy_o.bool())
+ #---------
+ # Memory Function Unit
+ #---------
+ reset_b = Signal(cul.n_units, reset_less=True)
+ sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
+ comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
+ comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
+ comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
+ # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
+ # in a transitive fashion). This cycle activates based on LDSTCompUnit
+ # issue_i. multi-issue gets a bit more complex but not a lot.
+ prior_ldsts = Signal(cul.n_units, reset_less=True)
+ sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
+ with m.If(self.ls_oper_i[2]): # LD bit of operand
+ comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
+ with m.If(self.ls_oper_i[3]): # ST bit of operand
+ comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
+ # TODO: adr_rel_o needs to go into L1 Cache. for now,
+ # just immediately activate go_adr
+ comb += cul.go_ad_i.eq(cul.adr_rel_o)
+ # connect up address data
+ comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
+ comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
+ # connect loadable / storable to go_ld/go_st.
+ # XXX should only be done when the memory ld/st has actually happened!
+ go_st_i = Signal(cul.n_units, reset_less=True)
+ go_ld_i = Signal(cul.n_units, reset_less=True)
+ comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &\
+ cul.req_rel_o & cul.ld_o)
+ comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
+ cul.sto_rel_o & cul.st_o)
+ comb += memfus.go_ld_i.eq(go_ld_i)
+ comb += memfus.go_st_i.eq(go_st_i)
+ #comb += cul.go_wr_i.eq(go_ld_i)
+ comb += cul.go_st_i.eq(go_st_i)
+ #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
+ #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
+ #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
+ #---------
+ # merge shadow matrices outputs
+ #---------
+ # these are explained in ShadowMatrix docstring, and are to be
+ # connected to the FUReg and FUFU Matrices, to get them to reset
+ anydie = Signal(n_intfus, reset_less=True)
+ allshadown = Signal(n_intfus, reset_less=True)
+ shreset = Signal(n_intfus, reset_less=True)
+ comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
+ comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
+ comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
+ #---------
+ # connect fu-fu matrix
+ #---------
+ # Group Picker... done manually for now.
+ go_rd_o = intpick1.go_rd_o
+ go_wr_o = intpick1.go_wr_o
+ go_rd_i = intfus.go_rd_i
+ go_wr_i = intfus.go_wr_i
+ go_die_i = intfus.go_die_i
+ # NOTE: connect to the shadowed versions so that they can "die" (reset)
+ comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
+ comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
+ comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
+ # Connect Picker
+ #---------
+ comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
+ comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
+ int_rd_o = intfus.readable_o
+ int_wr_o = intfus.writable_o
+ comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
+ comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
+ #---------
+ # Shadow Matrix
+ #---------
+ comb += shadows.issue_i.eq(fn_issue_o)
+ #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
+ comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
+ #---------
+ # NOTE; this setup is for the instruction order preservation...
+ # connect shadows / go_dies to Computation Units
+ comb += cu.shadown_i[0:n_intfus].eq(allshadown)
+ comb += cu.go_die_i[0:n_intfus].eq(anydie)
+ # ok connect first n_int_fu shadows to busy lines, to create an
+ # instruction-order linked-list-like arrangement, using a bit-matrix
+ # (instead of e.g. a ring buffer).
+ # when written, the shadow can be cancelled (and was good)
+ for i in range(n_intfus):
+ comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
+ # *previous* instruction shadows *current* instruction, and, obviously,
+ # if the previous is completed (!busy) don't cast the shadow!
+ comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
+ for i in range(n_intfus):
+ comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
+ #---------
+ # ... and this is for branch speculation. it uses the extra bit
+ # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
+ # only needs to set shadow_i, s_fail_i and s_good_i
+ # issue captures shadow_i (if enabled)
+ comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
+ bactive = Signal(reset_less=True)
+ comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
+ # instruction being issued (fn_issue_o) has a shadow cast by the branch
+ with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
+ comb += bshadow.issue_i.eq(fn_issue_o)
+ for i in range(n_intfus):
+ with m.If(fn_issue_o & (Const(1<<i))):
+ comb += bshadow.shadow_i[i][0].eq(1)
+ # finally, we need an indicator to the test infrastructure as to
+ # whether the branch succeeded or failed, plus, link up to the
+ # "recorder" of whether the instruction was under shadow or not
+ with m.If(br1.issue_i):
+ sync += bspec.active_i.eq(1)
+ with m.If(self.branch_succ_i):
+ comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
+ with m.If(self.branch_fail_i):
+ comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
+ # branch is active (TODO: a better signal: this is over-using the
+ # go_write signal - actually the branch should not be "writing")
+ with m.If(br1.go_wr_i):
+ sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
+ sync += bspec.active_i.eq(0)
+ comb += bspec.br_i.eq(1)
+ # branch occurs if data == 1, failed if data == 0
+ comb += bspec.br_ok_i.eq(br1.data_o == 1)
+ for i in range(n_intfus):
+ # *expected* direction of the branch matched against *actual*
+ comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
+ # ... or it didn't
+ comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
+ #---------
+ # Connect Register File(s)
+ #---------
+ comb += int_dest.wen.eq(intfus.dest_rsel_o)
+ comb += int_src1.ren.eq(intfus.src1_rsel_o)
+ comb += int_src2.ren.eq(intfus.src2_rsel_o)
+ # connect ALUs to regfule
+ comb += int_dest.data_i.eq(cu.data_o)
+ comb += cu.src1_i.eq(int_src1.data_o)
+ comb += cu.src2_i.eq(int_src2.data_o)
+ # connect ALU Computation Units
+ comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
+ comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
+ comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
+ return m
+ def __iter__(self):
+ yield from self.intregs
+ yield from self.fpregs
+ yield self.int_dest_i
+ yield self.int_src1_i
+ yield self.int_src2_i
+ yield self.issue_o
+ yield self.branch_succ_i
+ yield self.branch_fail_i
+ yield self.branch_direction_o
+ def ports(self):
+ return list(self)
+class IssueToScoreboard(Elaboratable):
+ def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
+ self.qlen = qlen
+ self.n_in = n_in
+ self.n_out = n_out
+ self.rwid = rwid
+ self.opw = opwid
+ self.n_regs = n_regs
+ mqbits = unsigned(int(log(qlen) / log(2))+2)
+ self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
+ self.p_ready_o = Signal() # instructions were added
+ self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
+ self.busy_o = Signal(reset_less=True) # at least one CU is busy
+ self.qlen_o = Signal(mqbits, reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ sync = m.d.sync
+ iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
+ sc = Scoreboard(self.rwid, self.n_regs)
+ m.submodules.iq = iq
+ m.submodules.sc = sc
+ # get at the regfile for testing
+ self.intregs = sc.intregs
+ # and the "busy" signal and instruction queue length
+ comb += self.busy_o.eq(sc.busy_o)
+ comb += self.qlen_o.eq(iq.qlen_o)
+ # link up instruction queue
+ comb += iq.p_add_i.eq(self.p_add_i)
+ comb += self.p_ready_o.eq(iq.p_ready_o)
+ for i in range(self.n_in):
+ comb += eq(iq.data_i[i], self.data_i[i])
+ # take instruction and process it. note that it's possible to
+ # "inspect" the queue contents *without* actually removing the
+ # items. items are only removed when the
+ # in "waiting" state
+ wait_issue_br = Signal()
+ wait_issue_alu = Signal()
+ wait_issue_ls = Signal()
+ with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
+ # set instruction pop length to 1 if the unit accepted
+ with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
+ with m.If(iq.qlen_o != 0):
+ comb += iq.n_sub_i.eq(1)
+ with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
+ with m.If(iq.qlen_o != 0):
+ comb += iq.n_sub_i.eq(1)
+ with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
+ with m.If(iq.qlen_o != 0):
+ comb += iq.n_sub_i.eq(1)
+ # see if some instruction(s) are here. note that this is
+ # "inspecting" the in-place queue. note also that on the
+ # cycle following "waiting" for fn_issue_o to be set, the
+ # "resetting" done above (insn_i=0) could be re-ASSERTed.
+ with m.If(iq.qlen_o != 0):
+ # get the operands and operation
+ imm = iq.data_o[0].imm_i
+ dest = iq.data_o[0].dest_i
+ src1 = iq.data_o[0].src1_i
+ src2 = iq.data_o[0].src2_i
+ op = iq.data_o[0].oper_i
+ opi = iq.data_o[0].opim_i # immediate set
+ # set the src/dest regs
+ comb += sc.int_dest_i.eq(dest)
+ comb += sc.int_src1_i.eq(src1)
+ comb += sc.int_src2_i.eq(src2)
+ comb += sc.reg_enable_i.eq(1) # enable the regfile
+ # choose a Function-Unit-Group
+ with m.If((op & (0x3<<2)) != 0): # branch
+ comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
+ comb += sc.br_imm_i.eq(imm)
+ comb += sc.brissue.insn_i.eq(1)
+ comb += wait_issue_br.eq(1)
+ with m.Elif((op & (0x3<<4)) != 0): # ld/st
+ # see compldst.py
+ # bit 0: ADD/SUB
+ # bit 1: immed
+ # bit 4: LD
+ # bit 5: ST
+ comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
+ comb += sc.ls_imm_i.eq(imm)
+ comb += sc.lsissue.insn_i.eq(1)
+ comb += wait_issue_ls.eq(1)
+ with m.Else(): # alu
+ comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
+ comb += sc.alu_imm_i.eq(imm)
+ comb += sc.aluissue.insn_i.eq(1)
+ comb += wait_issue_alu.eq(1)
+ # these indicate that the instruction is to be made
+ # shadow-dependent on
+ # (either) branch success or branch fail
+ #yield sc.branch_fail_i.eq(branch_fail)
+ #yield sc.branch_succ_i.eq(branch_success)
+ return m
+ def __iter__(self):
+ yield self.p_ready_o
+ for o in self.data_i:
+ yield from list(o)
+ yield self.p_add_i
+ def ports(self):
+ return list(self)
+IADD = 0
+ISUB = 1
+IMUL = 2
+ISHF = 3
+IBGT = 4
+IBLT = 5
+IBEQ = 6
+IBNE = 7
+class RegSim:
+ def __init__(self, rwidth, nregs):
+ self.rwidth = rwidth
+ self.regs = [0] * nregs
+ def op(self, op, op_imm, imm, src1, src2, dest):
+ maxbits = (1 << self.rwidth) - 1
+ src1 = self.regs[src1] & maxbits
+ if op_imm:
+ src2 = imm
+ else:
+ src2 = self.regs[src2] & maxbits
+ if op == IADD:
+ val = src1 + src2
+ elif op == ISUB:
+ val = src1 - src2
+ elif op == IMUL:
+ val = src1 * src2
+ elif op == ISHF:
+ val = src1 >> (src2 & maxbits)
+ elif op == IBGT:
+ val = int(src1 > src2)
+ elif op == IBLT:
+ val = int(src1 < src2)
+ elif op == IBEQ:
+ val = int(src1 == src2)
+ elif op == IBNE:
+ val = int(src1 != src2)
+ else:
+ return 0 # LD/ST TODO
+ val &= maxbits
+ self.setval(dest, val)
+ return val
+ def setval(self, dest, val):
+ print ("sim setval", dest, hex(val))
+ self.regs[dest] = val
+ def dump(self, dut):
+ for i, val in enumerate(self.regs):
+ reg = yield dut.intregs.regs[i].reg
+ okstr = "OK" if reg == val else "!ok"
+ print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
+ def check(self, dut):
+ for i, val in enumerate(self.regs):
+ reg = yield dut.intregs.regs[i].reg
+ if reg != val:
+ print("reg %d expected %x received %x\n" % (i, val, reg))
+ yield from self.dump(dut)
+ assert False
+def instr_q(dut, op, op_imm, imm, src1, src2, dest,
+ branch_success, branch_fail):
+ instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
+ 'src1_i': src1, 'src2_i': src2}]
+ sendlen = 1
+ for idx in range(sendlen):
+ yield from eq(dut.data_i[idx], instrs[idx])
+ di = yield dut.data_i[idx]
+ print ("senddata %d %x" % (idx, di))
+ yield dut.p_add_i.eq(sendlen)
+ yield
+ o_p_ready = yield dut.p_ready_o
+ while not o_p_ready:
+ yield
+ o_p_ready = yield dut.p_ready_o
+ yield dut.p_add_i.eq(0)
+def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
+ yield from disable_issue(dut)
+ yield dut.int_dest_i.eq(dest)
+ yield dut.int_src1_i.eq(src1)
+ yield dut.int_src2_i.eq(src2)
+ if (op & (0x3<<2)) != 0: # branch
+ yield dut.brissue.insn_i.eq(1)
+ yield dut.br_oper_i.eq(Const(op & 0x3, 2))
+ yield dut.br_imm_i.eq(imm)
+ dut_issue = dut.brissue
+ else:
+ yield dut.aluissue.insn_i.eq(1)
+ yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
+ yield dut.alu_imm_i.eq(imm)
+ dut_issue = dut.aluissue
+ yield dut.reg_enable_i.eq(1)
+ # these indicate that the instruction is to be made shadow-dependent on
+ # (either) branch success or branch fail
+ yield dut.branch_fail_i.eq(branch_fail)
+ yield dut.branch_succ_i.eq(branch_success)
+ yield
+ yield from wait_for_issue(dut, dut_issue)
+def print_reg(dut, rnums):
+ rs = []
+ for rnum in rnums:
+ reg = yield dut.intregs.regs[rnum].reg
+ rs.append("%x" % reg)
+ rnums = map(str, rnums)
+ print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
+def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
+ insts = []
+ for i in range(n_ops):
+ src1 = randint(1, dut.n_regs-1)
+ src2 = randint(1, dut.n_regs-1)
+ imm = randint(1, (1<<dut.rwid)-1)
+ dest = randint(1, dut.n_regs-1)
+ op = randint(0, max_opnums)
+ opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
+ if shadowing:
+ insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
+ else:
+ insts.append((src1, src2, dest, op, opi, imm))
+ return insts
+def wait_for_busy_clear(dut):
+ while True:
+ busy_o = yield dut.busy_o
+ if not busy_o:
+ break
+ print ("busy",)
+ yield
+def disable_issue(dut):
+ yield dut.aluissue.insn_i.eq(0)
+ yield dut.brissue.insn_i.eq(0)
+ yield dut.lsissue.insn_i.eq(0)
+def wait_for_issue(dut, dut_issue):
+ while True:
+ issue_o = yield dut_issue.fn_issue_o
+ if issue_o:
+ yield from disable_issue(dut)
+ yield dut.reg_enable_i.eq(0)
+ break
+ print ("busy",)
+ #yield from print_reg(dut, [1,2,3])
+ yield
+ #yield from print_reg(dut, [1,2,3])
+def scoreboard_branch_sim(dut, alusim):
+ iseed = 3
+ for i in range(1):
+ print ("rseed", iseed)
+ seed(iseed)
+ iseed += 1
+ yield dut.branch_direction_o.eq(0)
+ # set random values in the registers
+ for i in range(1, dut.n_regs):
+ val = 31+i*3
+ val = randint(0, (1<<alusim.rwidth)-1)
+ yield dut.intregs.regs[i].reg.eq(val)
+ alusim.setval(i, val)
+ if False:
+ # create some instructions: branches create a tree
+ insts = create_random_ops(dut, 1, True, 1)
+ #insts.append((6, 6, 1, 2, (0, 0)))
+ #insts.append((4, 3, 3, 0, (0, 0)))
+ src1 = randint(1, dut.n_regs-1)
+ src2 = randint(1, dut.n_regs-1)
+ #op = randint(4, 7)
+ op = 4 # only BGT at the moment
+ branch_ok = create_random_ops(dut, 1, True, 1)
+ branch_fail = create_random_ops(dut, 1, True, 1)
+ insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
+ if True:
+ insts = []
+ insts.append( (3, 5, 2, 0, (0, 0)) )
+ branch_ok = []
+ branch_fail = []
+ #branch_ok.append ( (5, 7, 5, 1, (1, 0)) )
+ branch_ok.append( None )
+ branch_fail.append( (1, 1, 2, 0, (0, 1)) )
+ #branch_fail.append( None )
+ insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
+ siminsts = deepcopy(insts)
+ # issue instruction(s)
+ i = -1
+ instrs = insts
+ branch_direction = 0
+ while instrs:
+ yield
+ yield
+ i += 1
+ branch_direction = yield dut.branch_direction_o # way branch went
+ (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
+ if branch_direction == 1 and shadow_on:
+ print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
+ continue # branch was "success" and this is a "failed"... skip
+ if branch_direction == 2 and shadow_off:
+ print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
+ continue # branch was "fail" and this is a "success"... skip
+ if branch_direction != 0:
+ shadow_on = 0
+ shadow_off = 0
+ is_branch = op >= 4
+ if is_branch:
+ branch_ok, branch_fail = dest
+ dest = src2
+ # ok zip up the branch success / fail instructions and
+ # drop them into the queue, one marked "to have branch success"
+ # the other to be marked shadow branch "fail".
+ # one out of each of these will be cancelled
+ for ok, fl in zip(branch_ok, branch_fail):
+ if ok:
+ instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
+ if fl:
+ instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
+ print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
+ (i, src1, src2, dest, op, shadow_on, shadow_off))
+ yield from int_instr(dut, op, src1, src2, dest,
+ shadow_on, shadow_off)
+ # wait for all instructions to stop before checking
+ yield
+ yield from wait_for_busy_clear(dut)
+ i = -1
+ while siminsts:
+ instr = siminsts.pop(0)
+ if instr is None:
+ continue
+ (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
+ i += 1
+ is_branch = op >= 4
+ if is_branch:
+ branch_ok, branch_fail = dest
+ dest = src2
+ print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
+ (i, src1, src2, dest, op, shadow_on, shadow_off))
+ branch_res = alusim.op(op, src1, src2, dest)
+ if is_branch:
+ if branch_res:
+ siminsts += branch_ok
+ else:
+ siminsts += branch_fail
+ # check status
+ yield from alusim.check(dut)
+ yield from alusim.dump(dut)
+def scoreboard_sim(dut, alusim):
+ seed(0)
+ for i in range(1):
+ # set random values in the registers
+ for i in range(1, dut.n_regs):
+ val = randint(0, (1<<alusim.rwidth)-1)
+ #val = 31+i*3
+ #val = i
+ yield dut.intregs.regs[i].reg.eq(val)
+ alusim.setval(i, val)
+ # create some instructions (some random, some regression tests)
+ instrs = []
+ if False:
+ instrs = create_random_ops(dut, 15, True, 4)
+ if False: # LD/ST test (with immediate)
+ instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
+ #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
+ if True:
+ instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
+ if True:
+ instrs.append( (7, 3, 2, 4, 0, 0, (0, 0)) )
+ instrs.append( (7, 6, 6, 2, 0, 0, (0, 0)) )
+ instrs.append( (1, 7, 2, 2, 0, 0, (0, 0)) )
+ if True:
+ instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
+ instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
+ instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
+ instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
+ instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
+ if False:
+ instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
+ instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
+ instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
+ if False:
+ instrs.append((5, 6, 2, 1))
+ instrs.append((2, 2, 4, 0))
+ #instrs.append((2, 2, 3, 1))
+ if False:
+ instrs.append((2, 1, 2, 3))
+ if False:
+ instrs.append((2, 6, 2, 1))
+ instrs.append((2, 1, 2, 0))
+ if False:
+ instrs.append((1, 2, 7, 2))
+ instrs.append((7, 1, 5, 0))
+ instrs.append((4, 4, 1, 1))
+ if False:
+ instrs.append((5, 6, 2, 2))
+ instrs.append((1, 1, 4, 1))
+ instrs.append((6, 5, 3, 0))
+ if False:
+ # Write-after-Write Hazard
+ instrs.append( (3, 6, 7, 2) )
+ instrs.append( (4, 4, 7, 1) )
+ if False:
+ # self-read/write-after-write followed by Read-after-Write
+ instrs.append((1, 1, 1, 1))
+ instrs.append((1, 5, 3, 0))
+ if False:
+ # Read-after-Write followed by self-read-after-write
+ instrs.append((5, 6, 1, 2))
+ instrs.append((1, 1, 1, 1))
+ if False:
+ # self-read-write sandwich
+ instrs.append((5, 6, 1, 2))
+ instrs.append((1, 1, 1, 1))
+ instrs.append((1, 5, 3, 0))
+ if False:
+ # very weird failure
+ instrs.append( (5, 2, 5, 2) )
+ instrs.append( (2, 6, 3, 0) )
+ instrs.append( (4, 2, 2, 1) )
+ if False:
+ v1 = 4
+ yield dut.intregs.regs[5].reg.eq(v1)
+ alusim.setval(5, v1)
+ yield dut.intregs.regs[3].reg.eq(5)
+ alusim.setval(3, 5)
+ instrs.append((5, 3, 3, 4, (0, 0)))
+ instrs.append((4, 2, 1, 2, (0, 1)))
+ if False:
+ v1 = 6
+ yield dut.intregs.regs[5].reg.eq(v1)
+ alusim.setval(5, v1)
+ yield dut.intregs.regs[3].reg.eq(5)
+ alusim.setval(3, 5)
+ instrs.append((5, 3, 3, 4, (0, 0)))
+ instrs.append((4, 2, 1, 2, (1, 0)))
+ if False:
+ instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
+ instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
+ instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
+ instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
+ instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
+ instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
+ instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
+ instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
+ instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
+ # issue instruction(s), wait for issue to be free before proceeding
+ for i, instr in enumerate(instrs):
+ src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
+ print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
+ (i, src1, src2, dest, op, opi, imm))
+ alusim.op(op, opi, imm, src1, src2, dest)
+ yield from instr_q(dut, op, opi, imm, src1, src2, dest,
+ br_ok, br_fail)
+ # wait for all instructions to stop before checking
+ while True:
+ iqlen = yield dut.qlen_o
+ if iqlen == 0:
+ break
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield from wait_for_busy_clear(dut)
+ # check status
+ yield from alusim.check(dut)
+ yield from alusim.dump(dut)
+def test_scoreboard():
+ dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
+ alusim = RegSim(16, 8)
+ memsim = MemSim(16, 16)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_scoreboard6600.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, scoreboard_sim(dut, alusim),
+ vcd_name='test_scoreboard6600.vcd')
+ #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
+ # vcd_name='test_scoreboard6600.vcd')
+if __name__ == '__main__':
+ test_scoreboard()
--- /dev/null
+# Copyright 2018 ETH Zurich and University of Bologna.
+# Copyright and related rights are licensed under the Solderpad Hardware
+# License, Version 0.51 (the "License"); you may not use this file except in
+# compliance with the License. You may obtain a copy of the License at
+# http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# or agreed to in writing, software, hardware and materials distributed under
+# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+# module axi4_ar_buffer
+# #(
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+# input logic [AXI_ID_WIDTH-1:0] s_axi4_arid,
+# input logic [31:0] s_axi4_araddr,
+# input logic s_axi4_arvalid,
+# output logic s_axi4_arready,
+# input logic [7:0] s_axi4_arlen,
+# input logic [2:0] s_axi4_arsize,
+# input logic [1:0] s_axi4_arburst,
+# input logic s_axi4_arlock,
+# input logic [2:0] s_axi4_arprot,
+# input logic [3:0] s_axi4_arcache,
+# input logic [AXI_USER_WIDTH-1:0] s_axi4_aruser,
+# output logic [AXI_ID_WIDTH-1:0] m_axi4_arid,
+# output logic [31:0] m_axi4_araddr,
+# output logic m_axi4_arvalid,
+# input logic m_axi4_arready,
+# output logic [7:0] m_axi4_arlen,
+# output logic [2:0] m_axi4_arsize,
+# output logic [1:0] m_axi4_arburst,
+# output logic m_axi4_arlock,
+# output logic [2:0] m_axi4_arprot,
+# output logic [3:0] m_axi4_arcache,
+# output logic [AXI_USER_WIDTH-1:0] m_axi4_aruser
+# );
+class axi4_ar_buffer(Elaboratable):
+ def __init__(self):
+ # self.axi4_aclk = Signal() # input
+ # self.axi4_arstn = Signal() # input
+ self.s_axi4_arid = Signal(AXI_ID_WIDTH) # input
+ self.s_axi4_araddr = Signal(32) # input
+ self.s_axi4_arvalid = Signal() # input
+ self.s_axi4_arready = Signal() # output
+ self.s_axi4_arlen = Signal(8) # input
+ self.s_axi4_arsize = Signal(3) # input
+ self.s_axi4_arburst = Signal(2) # input
+ self.s_axi4_arlock = Signal() # input
+ self.s_axi4_arprot = Signal(3) # input
+ self.s_axi4_arcache = Signal(4) # input
+ self.s_axi4_aruser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_arid = Signal(AXI_ID_WIDTH) # output
+ self.m_axi4_araddr = Signal(32) # output
+ self.m_axi4_arvalid = Signal() # output
+ self.m_axi4_arready = Signal() # input
+ self.m_axi4_arlen = Signal(8) # output
+ self.m_axi4_arsize = Signal(3) # output
+ self.m_axi4_arburst = Signal(2) # output
+ self.m_axi4_arlock = Signal() # output
+ self.m_axi4_arprot = Signal(3) # output
+ self.m_axi4_arcache = Signal(4) # output
+ self.m_axi4_aruser = Signal(AXI_USER_WIDTH) # output
+ def elaborate(self, platform=None):
+ m = Module()
+ # #TODO use record types here
+ # wire [AXI_ID_WIDTH+AXI_USER_WIDTH+52:0] data_in;
+ # wire [AXI_ID_WIDTH+AXI_USER_WIDTH+52:0] data_out;
+ # assign data_in [3:0] = s_axi4_arcache;
+ # assign data_in [6:4] = s_axi4_arprot;
+ # assign data_in [7] = s_axi4_arlock;
+ # assign data_in [9:8] = s_axi4_arburst;
+ # assign data_in [12:10] = s_axi4_arsize;
+ # assign data_in [20:13] = s_axi4_arlen;
+ # assign data_in [52:21] = s_axi4_araddr;
+ # assign data_in [52+AXI_ID_WIDTH:53] = s_axi4_arid;
+ # assign data_in[52+AXI_ID_WIDTH+AXI_USER_WIDTH:53+AXI_ID_WIDTH] = s_axi4_aruser;
+ #
+ # assign m_axi4_arcache = data_out[3:0];
+ # assign m_axi4_arprot = data_out[6:4];
+ # assign m_axi4_arlock = data_out[7];
+ # assign m_axi4_arburst = data_out[9:8];
+ # assign m_axi4_arsize = data_out[12:10];
+ # assign m_axi4_arlen = data_out[20:13];
+ # assign m_axi4_araddr = data_out[52:21];
+ # assign m_axi4_arid = data_out[52+AXI_ID_WIDTH:53];
+ # assign m_axi4_aruser = data_out[52+AXI_ID_WIDTH+AXI_USER_WIDTH:53+AXI_ID_WIDTH];
+ # m.d.comb += self.m_axi4_arcache.eq(..)
+ # m.d.comb += self.m_axi4_arprot.eq(..)
+ # m.d.comb += self.m_axi4_arlock.eq(..)
+ # m.d.comb += self.m_axi4_arburst.eq(..)
+ # m.d.comb += self.m_axi4_arsize.eq(..)
+ # m.d.comb += self.m_axi4_arlen.eq(..)
+ # m.d.comb += self.m_axi4_araddr.eq(..)
+ # m.d.comb += self.m_axi4_arid.eq(..)
+ # m.d.comb += self.m_axi4_aruser.eq(..)
+ return m
+# TODO convert axi_buffer_rab.sv
+# axi_buffer_rab
+# #(
+# .BUFFER_DEPTH ( 4 )
+# )
+# u_buffer
+# (
+# .clk ( axi4_aclk ),
+# .rstn ( axi4_arstn ),
+# .valid_out ( m_axi4_arvalid ),
+# .data_out ( data_out ),
+# .ready_in ( m_axi4_arready ),
+# .valid_in ( s_axi4_arvalid ),
+# .data_in ( data_in ),
+# .ready_out ( s_axi4_arready )
+# );
+# endmodule
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class axi4_ar_sender(Elaboratable):
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.l1_done_o = Signal() # output
+ self.l1_accept_i = Signal() # input
+ self.l1_drop_i = Signal() # input
+ self.l1_save_i = Signal() # input
+ self.l2_done_o = Signal() # output
+ self.l2_accept_i = Signal() # input
+ self.l2_drop_i = Signal() # input
+ self.l2_sending_o = Signal() # output
+ self.l1_araddr_i = Signal(AXI_ADDR_WIDTH) # input
+ self.l2_araddr_i = Signal(AXI_ADDR_WIDTH) # input
+ self.s_axi4_arid = Signal(AXI_ID_WIDTH) # input
+ self.s_axi4_arvalid = Signal() # input
+ self.s_axi4_arready = Signal() # output
+ self.s_axi4_arlen = Signal(8) # input
+ self.s_axi4_arsize = Signal(3) # input
+ self.s_axi4_arburst = Signal(2) # input
+ self.s_axi4_arlock = Signal() # input
+ self.s_axi4_arprot = Signal(3) # input
+ self.s_axi4_arcache = Signal(4) # input
+ self.s_axi4_aruser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_arid = Signal(AXI_ID_WIDTH) # output
+ self.m_axi4_araddr = Signal(AXI_ADDR_WIDTH) # output
+ self.m_axi4_arvalid = Signal() # output
+ self.m_axi4_arready = Signal() # input
+ self.m_axi4_arlen = Signal(8) # output
+ self.m_axi4_arsize = Signal(3) # output
+ self.m_axi4_arburst = Signal(2) # output
+ self.m_axi4_arlock = Signal() # output
+ self.m_axi4_arprot = Signal(3) # output
+ self.m_axi4_arcache = Signal(4) # output
+ self.m_axi4_aruser = Signal(AXI_USER_WIDTH) # output
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.l1_save.eq(self.None)
+ m.d.comb += self.l1_done_o.eq(self.None)
+ m.d.comb += self.m_axi4_arvalid.eq(self.None)
+ m.d.comb += self.s_axi4_arready.eq(self.None)
+ m.d.comb += self.m_axi4_aruser.eq(self.None)
+ m.d.comb += self.m_axi4_arcache.eq(self.None)
+ m.d.comb += self.m_axi4_arprot.eq(self.None)
+ m.d.comb += self.m_axi4_arlock.eq(self.None)
+ m.d.comb += self.m_axi4_arburst.eq(self.None)
+ m.d.comb += self.m_axi4_arsize.eq(self.None)
+ m.d.comb += self.m_axi4_arlen.eq(self.None)
+ m.d.comb += self.m_axi4_araddr.eq(self.None)
+ m.d.comb += self.m_axi4_arid.eq(self.None)
+ m.d.comb += self.l2_sending_o.eq(self.None)
+ m.d.comb += self.l2_sent.eq(self.None)
+ m.d.comb += self.l2_done_o.eq(self.None)
+ m.d.comb += self.m_axi4_aruser.eq(self.s_axi4_aruser)
+ m.d.comb += self.m_axi4_arcache.eq(self.s_axi4_arcache)
+ m.d.comb += self.m_axi4_arprot.eq(self.s_axi4_arprot)
+ m.d.comb += self.m_axi4_arlock.eq(self.s_axi4_arlock)
+ m.d.comb += self.m_axi4_arburst.eq(self.s_axi4_arburst)
+ m.d.comb += self.m_axi4_arsize.eq(self.s_axi4_arsize)
+ m.d.comb += self.m_axi4_arlen.eq(self.s_axi4_arlen)
+ m.d.comb += self.m_axi4_araddr.eq(self.l1_araddr_i)
+ m.d.comb += self.m_axi4_arid.eq(self.s_axi4_arid)
+ m.d.comb += self.l2_sending_o.eq(self.1: 'b0)
+ m.d.comb += self.l2_available_q.eq(self.1: 'b0)
+ m.d.comb += self.l2_done_o.eq(self.1: 'b0)
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# module axi4_ar_sender
+# #(
+# parameter AXI_ADDR_WIDTH = 40,
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4,
+# parameter ENABLE_L2TLB = 0
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+# output logic l1_done_o,
+# input logic l1_accept_i,
+# input logic l1_drop_i,
+# input logic l1_save_i,
+# output logic l2_done_o,
+# input logic l2_accept_i,
+# input logic l2_drop_i,
+# output logic l2_sending_o,
+# input logic [AXI_ADDR_WIDTH-1:0] l1_araddr_i,
+# input logic [AXI_ADDR_WIDTH-1:0] l2_araddr_i,
+# input logic [AXI_ID_WIDTH-1:0] s_axi4_arid,
+# input logic s_axi4_arvalid,
+# output logic s_axi4_arready,
+# input logic [7:0] s_axi4_arlen,
+# input logic [2:0] s_axi4_arsize,
+# input logic [1:0] s_axi4_arburst,
+# input logic s_axi4_arlock,
+# input logic [2:0] s_axi4_arprot,
+# input logic [3:0] s_axi4_arcache,
+# input logic [AXI_USER_WIDTH-1:0] s_axi4_aruser,
+# output logic [AXI_ID_WIDTH-1:0] m_axi4_arid,
+# output logic [AXI_ADDR_WIDTH-1:0] m_axi4_araddr,
+# output logic m_axi4_arvalid,
+# input logic m_axi4_arready,
+# output logic [7:0] m_axi4_arlen,
+# output logic [2:0] m_axi4_arsize,
+# output logic [1:0] m_axi4_arburst,
+# output logic m_axi4_arlock,
+# output logic [2:0] m_axi4_arprot,
+# output logic [3:0] m_axi4_arcache,
+# output logic [AXI_USER_WIDTH-1:0] m_axi4_aruser
+# );
+# logic l1_save;
+# logic l2_sent;
+# logic l2_available_q;
+# assign l1_save = l1_save_i & l2_available_q;
+# assign l1_done_o = s_axi4_arvalid & s_axi4_arready ;
+# // if 1: accept and forward a transaction translated by L1
+# // 2: drop or save request (if L2 slot not occupied already)
+# assign m_axi4_arvalid = (s_axi4_arvalid & l1_accept_i) |
+# l2_sending_o;
+# assign s_axi4_arready = (m_axi4_arvalid & m_axi4_arready & ~l2_sending_o) |
+# (s_axi4_arvalid & (l1_drop_i | l1_save));
+# generate
+# if (ENABLE_L2TLB == 1) begin
+# logic [AXI_USER_WIDTH-1:0] l2_axi4_aruser ;
+# logic [3:0] l2_axi4_arcache ;
+# logic [3:0] l2_axi4_arregion;
+# logic [3:0] l2_axi4_arqos ;
+# logic [2:0] l2_axi4_arprot ;
+# logic l2_axi4_arlock ;
+# logic [1:0] l2_axi4_arburst ;
+# logic [2:0] l2_axi4_arsize ;
+# logic [7:0] l2_axi4_arlen ;
+# logic [AXI_ID_WIDTH-1:0] l2_axi4_arid ;
+# assign m_axi4_aruser = l2_sending_o ? l2_axi4_aruser : s_axi4_aruser;
+# assign m_axi4_arcache = l2_sending_o ? l2_axi4_arcache : s_axi4_arcache;
+# assign m_axi4_arprot = l2_sending_o ? l2_axi4_arprot : s_axi4_arprot;
+# assign m_axi4_arlock = l2_sending_o ? l2_axi4_arlock : s_axi4_arlock;
+# assign m_axi4_arburst = l2_sending_o ? l2_axi4_arburst : s_axi4_arburst;
+# assign m_axi4_arsize = l2_sending_o ? l2_axi4_arsize : s_axi4_arsize;
+# assign m_axi4_arlen = l2_sending_o ? l2_axi4_arlen : s_axi4_arlen;
+# assign m_axi4_araddr = l2_sending_o ? l2_araddr_i : l1_araddr_i;
+# assign m_axi4_arid = l2_sending_o ? l2_axi4_arid : s_axi4_arid;
+# // Buffer AXI signals in case of L1 miss
+# always @(posedge axi4_aclk or negedge axi4_arstn) begin
+# if (axi4_arstn == 1'b0) begin
+# l2_axi4_aruser <= 'b0;
+# l2_axi4_arcache <= 'b0;
+# l2_axi4_arprot <= 'b0;
+# l2_axi4_arlock <= 1'b0;
+# l2_axi4_arburst <= 'b0;
+# l2_axi4_arsize <= 'b0;
+# l2_axi4_arlen <= 'b0;
+# l2_axi4_arid <= 'b0;
+# end else if (l1_save) begin
+# l2_axi4_aruser <= s_axi4_aruser;
+# l2_axi4_arcache <= s_axi4_arcache;
+# l2_axi4_arprot <= s_axi4_arprot;
+# l2_axi4_arlock <= s_axi4_arlock;
+# l2_axi4_arburst <= s_axi4_arburst;
+# l2_axi4_arsize <= s_axi4_arsize;
+# l2_axi4_arlen <= s_axi4_arlen;
+# l2_axi4_arid <= s_axi4_arid;
+# end
+# end
+# // signal that an l1_save_i can be accepted
+# always @(posedge axi4_aclk or negedge axi4_arstn) begin
+# if (axi4_arstn == 1'b0) begin
+# l2_available_q <= 1'b1;
+# end else if (l2_sent | l2_drop_i) begin
+# l2_available_q <= 1'b1;
+# end else if (l1_save) begin
+# l2_available_q <= 1'b0;
+# end
+# end
+# assign l2_sending_o = l2_accept_i & ~l2_available_q;
+# assign l2_sent = l2_sending_o & m_axi4_arvalid & m_axi4_arready;
+# // if 1: having sent out a transaction translated by L2
+# // 2: drop request (L2 slot is available again)
+# assign l2_done_o = l2_sent | l2_drop_i;
+# end else begin // !`ifdef ENABLE_L2TLB
+# assign m_axi4_aruser = s_axi4_aruser;
+# assign m_axi4_arcache = s_axi4_arcache;
+# assign m_axi4_arprot = s_axi4_arprot;
+# assign m_axi4_arlock = s_axi4_arlock;
+# assign m_axi4_arburst = s_axi4_arburst;
+# assign m_axi4_arsize = s_axi4_arsize;
+# assign m_axi4_arlen = s_axi4_arlen;
+# assign m_axi4_araddr = l1_araddr_i;
+# assign m_axi4_arid = s_axi4_arid;
+# assign l2_sending_o = 1'b0;
+# assign l2_available_q = 1'b0;
+# assign l2_done_o = 1'b0;
+# end // else: !if(ENABLE_L2TLB == 1)
+# endgenerate
+# endmodule
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class axi4_aw_buffer(Elaboratable):
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.s_axi4_awid = Signal(AXI_ID_WIDTH) # input
+ self.s_axi4_awaddr = Signal(32) # input
+ self.s_axi4_awvalid = Signal() # input
+ self.s_axi4_awready = Signal() # output
+ self.s_axi4_awlen = Signal(8) # input
+ self.s_axi4_awsize = Signal(3) # input
+ self.s_axi4_awburst = Signal(2) # input
+ self.s_axi4_awlock = Signal() # input
+ self.s_axi4_awprot = Signal(3) # input
+ self.s_axi4_awcache = Signal(4) # input
+ self.s_axi4_awregion = Signal(4) # input
+ self.s_axi4_awqos = Signal(4) # input
+ self.s_axi4_awuser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_awid = Signal(AXI_ID_WIDTH) # output
+ self.m_axi4_awaddr = Signal(32) # output
+ self.m_axi4_awvalid = Signal() # output
+ self.m_axi4_awready = Signal() # input
+ self.m_axi4_awlen = Signal(8) # output
+ self.m_axi4_awsize = Signal(3) # output
+ self.m_axi4_awburst = Signal(2) # output
+ self.m_axi4_awlock = Signal() # output
+ self.m_axi4_awprot = Signal(3) # output
+ self.m_axi4_awcache = Signal(4) # output
+ self.m_axi4_awregion = Signal(4) # output
+ self.m_axi4_awqos = Signal(4) # output
+ self.m_axi4_awuser = Signal(AXI_USER_WIDTH) # output
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.None.eq(self.s_axi4_awcache)
+ m.d.comb += self.None.eq(self.s_axi4_awprot)
+ m.d.comb += self.None.eq(self.s_axi4_awlock)
+ m.d.comb += self.None.eq(self.s_axi4_awburst)
+ m.d.comb += self.None.eq(self.s_axi4_awsize)
+ m.d.comb += self.None.eq(self.s_axi4_awlen)
+ m.d.comb += self.None.eq(self.s_axi4_awaddr)
+ m.d.comb += self.None.eq(self.s_axi4_awregion)
+ m.d.comb += self.None.eq(self.s_axi4_awqos)
+ m.d.comb += self.None.eq(self.s_axi4_awid)
+ m.d.comb += self.None.eq(self.s_axi4_awuser)
+ m.d.comb += self.m_axi4_awcache.eq(self.None)
+ m.d.comb += self.m_axi4_awprot.eq(self.None)
+ m.d.comb += self.m_axi4_awlock.eq(self.None)
+ m.d.comb += self.m_axi4_awburst.eq(self.None)
+ m.d.comb += self.m_axi4_awsize.eq(self.None)
+ m.d.comb += self.m_axi4_awlen.eq(self.None)
+ m.d.comb += self.m_axi4_awaddr.eq(self.None)
+ m.d.comb += self.m_axi4_awregion.eq(self.None)
+ m.d.comb += self.m_axi4_awqos.eq(self.None)
+ m.d.comb += self.m_axi4_awid.eq(self.None)
+ m.d.comb += self.m_axi4_awuser.eq(self.None)
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# module axi4_aw_buffer
+# #(
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+# input logic [AXI_ID_WIDTH-1:0] s_axi4_awid,
+# input logic [31:0] s_axi4_awaddr,
+# input logic s_axi4_awvalid,
+# output logic s_axi4_awready,
+# input logic [7:0] s_axi4_awlen,
+# input logic [2:0] s_axi4_awsize,
+# input logic [1:0] s_axi4_awburst,
+# input logic s_axi4_awlock,
+# input logic [2:0] s_axi4_awprot,
+# input logic [3:0] s_axi4_awcache,
+# input logic [3:0] s_axi4_awregion,
+# input logic [3:0] s_axi4_awqos,
+# input logic [AXI_USER_WIDTH-1:0] s_axi4_awuser,
+# output logic [AXI_ID_WIDTH-1:0] m_axi4_awid,
+# output logic [31:0] m_axi4_awaddr,
+# output logic m_axi4_awvalid,
+# input logic m_axi4_awready,
+# output logic [7:0] m_axi4_awlen,
+# output logic [2:0] m_axi4_awsize,
+# output logic [1:0] m_axi4_awburst,
+# output logic m_axi4_awlock,
+# output logic [2:0] m_axi4_awprot,
+# output logic [3:0] m_axi4_awcache,
+# output logic [3:0] m_axi4_awregion,
+# output logic [3:0] m_axi4_awqos,
+# output logic [AXI_USER_WIDTH-1:0] m_axi4_awuser
+# );
+# wire [AXI_USER_WIDTH+AXI_ID_WIDTH+60:0] data_in;
+# wire [AXI_USER_WIDTH+AXI_ID_WIDTH+60:0] data_out;
+# assign data_in [3:0] = s_axi4_awcache;
+# assign data_in [6:4] = s_axi4_awprot;
+# assign data_in [7] = s_axi4_awlock;
+# assign data_in [9:8] = s_axi4_awburst;
+# assign data_in [12:10] = s_axi4_awsize;
+# assign data_in [20:13] = s_axi4_awlen;
+# assign data_in [52:21] = s_axi4_awaddr;
+# assign data_in [56:53] = s_axi4_awregion;
+# assign data_in [60:57] = s_axi4_awqos;
+# assign data_in [60+AXI_ID_WIDTH:61] = s_axi4_awid;
+# assign data_in [60+AXI_ID_WIDTH+AXI_USER_WIDTH:61+AXI_ID_WIDTH] = s_axi4_awuser;
+# assign m_axi4_awcache = data_out[3:0];
+# assign m_axi4_awprot = data_out[6:4];
+# assign m_axi4_awlock = data_out[7];
+# assign m_axi4_awburst = data_out[9:8];
+# assign m_axi4_awsize = data_out[12:10];
+# assign m_axi4_awlen = data_out[20:13];
+# assign m_axi4_awaddr = data_out[52:21];
+# assign m_axi4_awregion = data_out[56:53];
+# assign m_axi4_awqos = data_out[60:57];
+# assign m_axi4_awid = data_out[60+AXI_ID_WIDTH:61];
+# assign m_axi4_awuser = data_out[60+AXI_ID_WIDTH+AXI_USER_WIDTH:61+AXI_ID_WIDTH];
+# axi_buffer_rab
+# #(
+# .BUFFER_DEPTH ( 4 )
+# )
+# u_buffer
+# (
+# .clk ( axi4_aclk ),
+# .rstn ( axi4_arstn ),
+# .valid_out ( m_axi4_awvalid ),
+# .data_out ( data_out ),
+# .ready_in ( m_axi4_awready ),
+# .valid_in ( s_axi4_awvalid ),
+# .data_in ( data_in ),
+# .ready_out ( s_axi4_awready )
+# );
+# endmodule
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class axi4_aw_sender(Elaboratable):
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.l1_done_o = Signal() # output
+ self.l1_accept_i = Signal() # input
+ self.l1_drop_i = Signal() # input
+ self.l1_save_i = Signal() # input
+ self.l2_done_o = Signal() # output
+ self.l2_accept_i = Signal() # input
+ self.l2_drop_i = Signal() # input
+ self.l2_sending_o = Signal() # output
+ self.l1_awaddr_i = Signal(AXI_ADDR_WIDTH) # input
+ self.l2_awaddr_i = Signal(AXI_ADDR_WIDTH) # input
+ self.s_axi4_awid = Signal(AXI_ID_WIDTH) # input
+ self.s_axi4_awvalid = Signal() # input
+ self.s_axi4_awready = Signal() # output
+ self.s_axi4_awlen = Signal(8) # input
+ self.s_axi4_awsize = Signal(3) # input
+ self.s_axi4_awburst = Signal(2) # input
+ self.s_axi4_awlock = Signal() # input
+ self.s_axi4_awprot = Signal(3) # input
+ self.s_axi4_awcache = Signal(4) # input
+ self.s_axi4_awregion = Signal(4) # input
+ self.s_axi4_awqos = Signal(4) # input
+ self.s_axi4_awuser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_awid = Signal(AXI_ID_WIDTH) # output
+ self.m_axi4_awaddr = Signal(AXI_ADDR_WIDTH) # output
+ self.m_axi4_awvalid = Signal() # output
+ self.m_axi4_awready = Signal() # input
+ self.m_axi4_awlen = Signal(8) # output
+ self.m_axi4_awsize = Signal(3) # output
+ self.m_axi4_awburst = Signal(2) # output
+ self.m_axi4_awlock = Signal() # output
+ self.m_axi4_awprot = Signal(3) # output
+ self.m_axi4_awcache = Signal(4) # output
+ self.m_axi4_awregion = Signal(4) # output
+ self.m_axi4_awqos = Signal(4) # output
+ self.m_axi4_awuser = Signal(AXI_USER_WIDTH) # output
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.l1_save.eq(self.None)
+ m.d.comb += self.l1_done_o.eq(self.None)
+ m.d.comb += self.m_axi4_awvalid.eq(self.None)
+ m.d.comb += self.s_axi4_awready.eq(self.None)
+ m.d.comb += self.m_axi4_awuser.eq(self.None)
+ m.d.comb += self.m_axi4_awcache.eq(self.None)
+ m.d.comb += self.m_axi4_awregion.eq(self.None)
+ m.d.comb += self.m_axi4_awqos.eq(self.None)
+ m.d.comb += self.m_axi4_awprot.eq(self.None)
+ m.d.comb += self.m_axi4_awlock.eq(self.None)
+ m.d.comb += self.m_axi4_awburst.eq(self.None)
+ m.d.comb += self.m_axi4_awsize.eq(self.None)
+ m.d.comb += self.m_axi4_awlen.eq(self.None)
+ m.d.comb += self.m_axi4_awaddr.eq(self.None)
+ m.d.comb += self.m_axi4_awid.eq(self.None)
+ m.d.comb += self.l2_sending_o.eq(self.None)
+ m.d.comb += self.l2_sent.eq(self.None)
+ m.d.comb += self.l2_done_o.eq(self.None)
+ m.d.comb += self.m_axi4_awuser.eq(self.s_axi4_awuser)
+ m.d.comb += self.m_axi4_awcache.eq(self.s_axi4_awcache)
+ m.d.comb += self.m_axi4_awregion.eq(self.s_axi4_awregion)
+ m.d.comb += self.m_axi4_awqos.eq(self.s_axi4_awqos)
+ m.d.comb += self.m_axi4_awprot.eq(self.s_axi4_awprot)
+ m.d.comb += self.m_axi4_awlock.eq(self.s_axi4_awlock)
+ m.d.comb += self.m_axi4_awburst.eq(self.s_axi4_awburst)
+ m.d.comb += self.m_axi4_awsize.eq(self.s_axi4_awsize)
+ m.d.comb += self.m_axi4_awlen.eq(self.s_axi4_awlen)
+ m.d.comb += self.m_axi4_awaddr.eq(self.l1_awaddr_i)
+ m.d.comb += self.m_axi4_awid.eq(self.s_axi4_awid)
+ m.d.comb += self.l2_sending_o.eq(self.1: 'b0)
+ m.d.comb += self.l2_available_q.eq(self.1: 'b0)
+ m.d.comb += self.l2_done_o.eq(self.1: 'b0)
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# module axi4_aw_sender
+# #(
+# parameter AXI_ADDR_WIDTH = 40,
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4,
+# parameter ENABLE_L2TLB = 0
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+# output logic l1_done_o,
+# input logic l1_accept_i,
+# input logic l1_drop_i,
+# input logic l1_save_i,
+# output logic l2_done_o,
+# input logic l2_accept_i,
+# input logic l2_drop_i,
+# output logic l2_sending_o,
+# input logic [AXI_ADDR_WIDTH-1:0] l1_awaddr_i,
+# input logic [AXI_ADDR_WIDTH-1:0] l2_awaddr_i,
+# input logic [AXI_ID_WIDTH-1:0] s_axi4_awid,
+# input logic s_axi4_awvalid,
+# output logic s_axi4_awready,
+# input logic [7:0] s_axi4_awlen,
+# input logic [2:0] s_axi4_awsize,
+# input logic [1:0] s_axi4_awburst,
+# input logic s_axi4_awlock,
+# input logic [2:0] s_axi4_awprot,
+# input logic [3:0] s_axi4_awcache,
+# input logic [3:0] s_axi4_awregion,
+# input logic [3:0] s_axi4_awqos,
+# input logic [AXI_USER_WIDTH-1:0] s_axi4_awuser,
+# output logic [AXI_ID_WIDTH-1:0] m_axi4_awid,
+# output logic [AXI_ADDR_WIDTH-1:0] m_axi4_awaddr,
+# output logic m_axi4_awvalid,
+# input logic m_axi4_awready,
+# output logic [7:0] m_axi4_awlen,
+# output logic [2:0] m_axi4_awsize,
+# output logic [1:0] m_axi4_awburst,
+# output logic m_axi4_awlock,
+# output logic [2:0] m_axi4_awprot,
+# output logic [3:0] m_axi4_awcache,
+# output logic [3:0] m_axi4_awregion,
+# output logic [3:0] m_axi4_awqos,
+# output logic [AXI_USER_WIDTH-1:0] m_axi4_awuser
+# );
+# logic l1_save;
+# logic l2_sent;
+# logic l2_available_q;
+# assign l1_save = l1_save_i & l2_available_q;
+# assign l1_done_o = s_axi4_awvalid & s_axi4_awready ;
+# // if 1: accept and forward a transaction translated by L1
+# // 2: drop or save request (if L2 slot not occupied already)
+# assign m_axi4_awvalid = (s_axi4_awvalid & l1_accept_i) |
+# l2_sending_o;
+# assign s_axi4_awready = (m_axi4_awvalid & m_axi4_awready & ~l2_sending_o) |
+# (s_axi4_awvalid & (l1_drop_i | l1_save));
+# generate
+# if (ENABLE_L2TLB == 1) begin
+# logic [AXI_USER_WIDTH-1:0] l2_axi4_awuser ;
+# logic [3:0] l2_axi4_awcache ;
+# logic [3:0] l2_axi4_awregion;
+# logic [3:0] l2_axi4_awqos ;
+# logic [2:0] l2_axi4_awprot ;
+# logic l2_axi4_awlock ;
+# logic [1:0] l2_axi4_awburst ;
+# logic [2:0] l2_axi4_awsize ;
+# logic [7:0] l2_axi4_awlen ;
+# logic [AXI_ID_WIDTH-1:0] l2_axi4_awid ;
+# assign m_axi4_awuser = l2_sending_o ? l2_axi4_awuser : s_axi4_awuser;
+# assign m_axi4_awcache = l2_sending_o ? l2_axi4_awcache : s_axi4_awcache;
+# assign m_axi4_awregion = l2_sending_o ? l2_axi4_awregion : s_axi4_awregion;
+# assign m_axi4_awqos = l2_sending_o ? l2_axi4_awqos : s_axi4_awqos;
+# assign m_axi4_awprot = l2_sending_o ? l2_axi4_awprot : s_axi4_awprot;
+# assign m_axi4_awlock = l2_sending_o ? l2_axi4_awlock : s_axi4_awlock;
+# assign m_axi4_awburst = l2_sending_o ? l2_axi4_awburst : s_axi4_awburst;
+# assign m_axi4_awsize = l2_sending_o ? l2_axi4_awsize : s_axi4_awsize;
+# assign m_axi4_awlen = l2_sending_o ? l2_axi4_awlen : s_axi4_awlen;
+# assign m_axi4_awaddr = l2_sending_o ? l2_awaddr_i : l1_awaddr_i;
+# assign m_axi4_awid = l2_sending_o ? l2_axi4_awid : s_axi4_awid;
+# // buffer AXI signals in case of L1 miss
+# always @(posedge axi4_aclk or negedge axi4_arstn) begin
+# if (axi4_arstn == 1'b0) begin
+# l2_axi4_awuser <= 'b0;
+# l2_axi4_awcache <= 'b0;
+# l2_axi4_awregion <= 'b0;
+# l2_axi4_awqos <= 'b0;
+# l2_axi4_awprot <= 'b0;
+# l2_axi4_awlock <= 1'b0;
+# l2_axi4_awburst <= 'b0;
+# l2_axi4_awsize <= 'b0;
+# l2_axi4_awlen <= 'b0;
+# l2_axi4_awid <= 'b0;
+# end else if (l1_save) begin
+# l2_axi4_awuser <= s_axi4_awuser;
+# l2_axi4_awcache <= s_axi4_awcache;
+# l2_axi4_awregion <= s_axi4_awregion;
+# l2_axi4_awqos <= s_axi4_awqos;
+# l2_axi4_awprot <= s_axi4_awprot;
+# l2_axi4_awlock <= s_axi4_awlock;
+# l2_axi4_awburst <= s_axi4_awburst;
+# l2_axi4_awsize <= s_axi4_awsize;
+# l2_axi4_awlen <= s_axi4_awlen;
+# l2_axi4_awid <= s_axi4_awid;
+# end
+# end
+# // signal that an l1_save_i can be accepted
+# always @(posedge axi4_aclk or negedge axi4_arstn) begin
+# if (axi4_arstn == 1'b0) begin
+# l2_available_q <= 1'b1;
+# end else if (l2_sent | l2_drop_i) begin
+# l2_available_q <= 1'b1;
+# end else if (l1_save) begin
+# l2_available_q <= 1'b0;
+# end
+# end
+# assign l2_sending_o = l2_accept_i & ~l2_available_q;
+# assign l2_sent = l2_sending_o & m_axi4_awvalid & m_axi4_awready;
+# // if 1: having sent out a transaction translated by L2
+# // 2: drop request (L2 slot is available again)
+# assign l2_done_o = l2_sent | l2_drop_i;
+# end else begin // !`ifdef ENABLE_L2TLB
+# assign m_axi4_awuser = s_axi4_awuser;
+# assign m_axi4_awcache = s_axi4_awcache;
+# assign m_axi4_awregion = s_axi4_awregion;
+# assign m_axi4_awqos = s_axi4_awqos;
+# assign m_axi4_awprot = s_axi4_awprot;
+# assign m_axi4_awlock = s_axi4_awlock;
+# assign m_axi4_awburst = s_axi4_awburst;
+# assign m_axi4_awsize = s_axi4_awsize;
+# assign m_axi4_awlen = s_axi4_awlen;
+# assign m_axi4_awaddr = l1_awaddr_i;
+# assign m_axi4_awid = s_axi4_awid;
+# assign l2_sending_o = 1'b0;
+# assign l2_available_q = 1'b0;
+# assign l2_done_o = 1'b0;
+# end // !`ifdef ENABLE_L2TLB
+# endgenerate
+# endmodule
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class axi4_b_buffer(Elaboratable):
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.s_axi4_bid = Signal(AXI_ID_WIDTH) # output
+ self.s_axi4_bresp = Signal(2) # output
+ self.s_axi4_bvalid = Signal() # output
+ self.s_axi4_buser = Signal(AXI_USER_WIDTH) # output
+ self.s_axi4_bready = Signal() # input
+ self.m_axi4_bid = Signal(AXI_ID_WIDTH) # input
+ self.m_axi4_bresp = Signal(2) # input
+ self.m_axi4_bvalid = Signal() # input
+ self.m_axi4_buser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_bready = Signal() # output
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.None.eq(self.m_axi4_bresp)
+ m.d.comb += self.None.eq(self.m_axi4_bid)
+ m.d.comb += self.None.eq(self.m_axi4_buser)
+ m.d.comb += self.s_axi4_buser.eq(self.None)
+ m.d.comb += self.s_axi4_bid.eq(self.None)
+ m.d.comb += self.s_axi4_bresp.eq(self.None)
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# module axi4_b_buffer
+# #(
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+# output logic [AXI_ID_WIDTH-1:0] s_axi4_bid,
+# output logic [1:0] s_axi4_bresp,
+# output logic s_axi4_bvalid,
+# output logic [AXI_USER_WIDTH-1:0] s_axi4_buser,
+# input logic s_axi4_bready,
+# input logic [AXI_ID_WIDTH-1:0] m_axi4_bid,
+# input logic [1:0] m_axi4_bresp,
+# input logic m_axi4_bvalid,
+# input logic [AXI_USER_WIDTH-1:0] m_axi4_buser,
+# output logic m_axi4_bready
+# );
+# wire [AXI_ID_WIDTH+AXI_USER_WIDTH+1:0] data_in;
+# wire [AXI_ID_WIDTH+AXI_USER_WIDTH+1:0] data_out;
+# assign data_in [1:0] = m_axi4_bresp;
+# assign data_in [AXI_ID_WIDTH+1:2] = m_axi4_bid;
+# assign data_in[AXI_ID_WIDTH+AXI_USER_WIDTH+1:AXI_ID_WIDTH+2] = m_axi4_buser;
+# assign s_axi4_buser = data_out[AXI_ID_WIDTH+AXI_USER_WIDTH+1:AXI_ID_WIDTH+2];
+# assign s_axi4_bid = data_out[AXI_ID_WIDTH+1:2];
+# assign s_axi4_bresp = data_out[1:0];
+# axi_buffer_rab
+# #(
+# .BUFFER_DEPTH ( 4 )
+# )
+# u_buffer
+# (
+# .clk ( axi4_aclk ),
+# .rstn ( axi4_arstn ),
+# .valid_out( s_axi4_bvalid ),
+# .data_out ( data_out ),
+# .ready_in ( s_axi4_bready ),
+# .valid_in ( m_axi4_bvalid ),
+# .data_in ( data_in ),
+# .ready_out( m_axi4_bready )
+# );
+# endmodule
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class axi4_b_sender(Elaboratable):
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.drop_i = Signal() # input
+ self.done_o = Signal() # output
+ self.id_i = Signal(AXI_ID_WIDTH) # input
+ self.prefetch_i = Signal() # input
+ self.hit_i = Signal() # input
+ self.s_axi4_bid = Signal(AXI_ID_WIDTH) # output
+ self.s_axi4_bresp = Signal(2) # output
+ self.s_axi4_bvalid = Signal() # output
+ self.s_axi4_buser = Signal(AXI_USER_WIDTH) # output
+ self.s_axi4_bready = Signal() # input
+ self.m_axi4_bid = Signal(AXI_ID_WIDTH) # input
+ self.m_axi4_bresp = Signal(2) # input
+ self.m_axi4_bvalid = Signal() # input
+ self.m_axi4_buser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_bready = Signal() # output
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.fifo_push.eq(self.None)
+ m.d.comb += self.done_o.eq(self.fifo_push)
+ m.d.comb += self.fifo_pop.eq(self.None)
+ m.d.comb += self.s_axi4_buser.eq(self.None)
+ m.d.comb += self.s_axi4_bid.eq(self.None)
+ m.d.comb += self.s_axi4_bresp.eq(self.None)
+ m.d.comb += self.s_axi4_bvalid.eq(self.None)
+ m.d.comb += self.m_axi4_bready.eq(self.None)
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# module axi4_b_sender
+# #(
+# parameter AXI_ID_WIDTH = 10,
+# parameter AXI_USER_WIDTH = 4
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+# input logic drop_i,
+# output logic done_o,
+# input logic [AXI_ID_WIDTH-1:0] id_i,
+# input logic prefetch_i,
+# input logic hit_i,
+# output logic [AXI_ID_WIDTH-1:0] s_axi4_bid,
+# output logic [1:0] s_axi4_bresp,
+# output logic s_axi4_bvalid,
+# output logic [AXI_USER_WIDTH-1:0] s_axi4_buser,
+# input logic s_axi4_bready,
+# input logic [AXI_ID_WIDTH-1:0] m_axi4_bid,
+# input logic [1:0] m_axi4_bresp,
+# input logic m_axi4_bvalid,
+# input logic [AXI_USER_WIDTH-1:0] m_axi4_buser,
+# output logic m_axi4_bready
+# );
+# logic fifo_valid;
+# logic fifo_pop;
+# logic fifo_push;
+# logic fifo_ready;
+# logic [AXI_ID_WIDTH-1:0] id;
+# logic prefetch;
+# logic hit;
+# logic dropping;
+# axi_buffer_rab
+# #(
+# .BUFFER_DEPTH ( 4 )
+# )
+# u_fifo
+# (
+# .clk ( axi4_aclk ),
+# .rstn ( axi4_arstn ),
+# // Pop
+# .data_out ( {prefetch, hit, id} ),
+# .valid_out ( fifo_valid ),
+# .ready_in ( fifo_pop ),
+# // Push
+# .valid_in ( fifo_push ),
+# .data_in ( {prefetch_i, hit_i, id_i} ),
+# .ready_out ( fifo_ready )
+# );
+# assign fifo_push = drop_i & fifo_ready;
+# assign done_o = fifo_push;
+# assign fifo_pop = dropping & s_axi4_bready;
+# always @ (posedge axi4_aclk or negedge axi4_arstn) begin
+# if (axi4_arstn == 1'b0) begin
+# dropping <= 1'b0;
+# end else begin
+# if (fifo_valid && ~dropping)
+# dropping <= 1'b1;
+# else if (fifo_pop)
+# dropping <= 1'b0;
+# end
+# end
+# assign s_axi4_buser = dropping ? {AXI_USER_WIDTH{1'b0}} : m_axi4_buser;
+# assign s_axi4_bid = dropping ? id : m_axi4_bid;
+# assign s_axi4_bresp = (dropping & prefetch & hit) ? 2'b00 : // prefetch hit, mutli, prot
+# (dropping & prefetch ) ? 2'b10 : // prefetch miss
+# (dropping & hit) ? 2'b10 : // non-prefetch multi, prot
+# (dropping ) ? 2'b10 : // non-prefetch miss
+# m_axi4_bresp;
+# assign s_axi4_bvalid = dropping | m_axi4_bvalid;
+# assign m_axi4_bready = ~dropping & s_axi4_bready;
+# endmodule
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class axi4_r_buffer(Elaboratable):
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.s_axi4_rid = Signal(AXI_ID_WIDTH) # output
+ self.s_axi4_rresp = Signal(2) # output
+ self.s_axi4_rdata = Signal(AXI_DATA_WIDTH) # output
+ self.s_axi4_rlast = Signal() # output
+ self.s_axi4_rvalid = Signal() # output
+ self.s_axi4_ruser = Signal(AXI_USER_WIDTH) # output
+ self.s_axi4_rready = Signal() # input
+ self.m_axi4_rid = Signal(AXI_ID_WIDTH) # input
+ self.m_axi4_rresp = Signal(2) # input
+ self.m_axi4_rdata = Signal(AXI_DATA_WIDTH) # input
+ self.m_axi4_rlast = Signal() # input
+ self.m_axi4_rvalid = Signal() # input
+ self.m_axi4_ruser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_rready = Signal() # output
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.None.eq(self.m_axi4_rresp)
+ m.d.comb += self.None.eq(self.m_axi4_rlast)
+ m.d.comb += self.None.eq(self.m_axi4_rid)
+ m.d.comb += self.None.eq(self.m_axi4_rdata)
+ m.d.comb += self.None.eq(self.m_axi4_ruser)
+ m.d.comb += self.s_axi4_rresp.eq(self.None)
+ m.d.comb += self.s_axi4_rlast.eq(self.None)
+ m.d.comb += self.s_axi4_rid.eq(self.None)
+ m.d.comb += self.s_axi4_rdata.eq(self.None)
+ m.d.comb += self.s_axi4_ruser.eq(self.None)
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# module axi4_r_buffer
+# #(
+# parameter AXI_DATA_WIDTH = 32,
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+# output logic [AXI_ID_WIDTH-1:0] s_axi4_rid,
+# output logic [1:0] s_axi4_rresp,
+# output logic [AXI_DATA_WIDTH-1:0] s_axi4_rdata,
+# output logic s_axi4_rlast,
+# output logic s_axi4_rvalid,
+# output logic [AXI_USER_WIDTH-1:0] s_axi4_ruser,
+# input logic s_axi4_rready,
+# input logic [AXI_ID_WIDTH-1:0] m_axi4_rid,
+# input logic [1:0] m_axi4_rresp,
+# input logic [AXI_DATA_WIDTH-1:0] m_axi4_rdata,
+# input logic m_axi4_rlast,
+# input logic m_axi4_rvalid,
+# input logic [AXI_USER_WIDTH-1:0] m_axi4_ruser,
+# output logic m_axi4_rready
+# );
+# wire [AXI_DATA_WIDTH+AXI_ID_WIDTH+AXI_USER_WIDTH+3-1:0] data_out;
+# localparam ID_START = 3;
+# localparam ID_END = AXI_ID_WIDTH-1 + ID_START;
+# localparam DATA_START = ID_END + 1;
+# localparam USER_START = DATA_END + 1;
+# assign data_in [1:0] = m_axi4_rresp;
+# assign data_in [2] = m_axi4_rlast;
+# assign data_in [ID_END:ID_START] = m_axi4_rid;
+# assign data_in[DATA_END:DATA_START] = m_axi4_rdata;
+# assign data_in[USER_END:USER_START] = m_axi4_ruser;
+# assign s_axi4_rresp = data_out [1:0];
+# assign s_axi4_rlast = data_out [2];
+# assign s_axi4_rid = data_out [ID_END:ID_START];
+# assign s_axi4_rdata = data_out[DATA_END:DATA_START];
+# assign s_axi4_ruser = data_out[USER_END:USER_START];
+# axi_buffer_rab
+# #(
+# .BUFFER_DEPTH ( 4 )
+# )
+# u_buffer
+# (
+# .clk ( axi4_aclk ),
+# .rstn ( axi4_arstn ),
+# // Pop
+# .valid_out ( s_axi4_rvalid ),
+# .data_out ( data_out ),
+# .ready_in ( s_axi4_rready ),
+# // Push
+# .valid_in ( m_axi4_rvalid ),
+# .data_in ( data_in ),
+# .ready_out ( m_axi4_rready )
+# );
+# endmodule
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class axi4_r_sender(Elaboratable):
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.drop_i = Signal() # input
+ self.drop_len_i = Signal(8) # input
+ self.done_o = Signal() # output
+ self.id_i = Signal(AXI_ID_WIDTH) # input
+ self.prefetch_i = Signal() # input
+ self.hit_i = Signal() # input
+ self.s_axi4_rid = Signal(AXI_ID_WIDTH) # output
+ self.s_axi4_rresp = Signal(2) # output
+ self.s_axi4_rdata = Signal(AXI_DATA_WIDTH) # output
+ self.s_axi4_rlast = Signal() # output
+ self.s_axi4_rvalid = Signal() # output
+ self.s_axi4_ruser = Signal(AXI_USER_WIDTH) # output
+ self.s_axi4_rready = Signal() # input
+ self.m_axi4_rid = Signal(AXI_ID_WIDTH) # input
+ self.m_axi4_rresp = Signal(2) # input
+ self.m_axi4_rdata = Signal(AXI_DATA_WIDTH) # input
+ self.m_axi4_rlast = Signal() # input
+ self.m_axi4_rvalid = Signal() # input
+ self.m_axi4_ruser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_rready = Signal() # output
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.fifo_push.eq(self.None)
+ m.d.comb += self.done_o.eq(self.fifo_push)
+ m.d.comb += self.s_axi4_rdata.eq(self.m_axi4_rdata)
+ m.d.comb += self.s_axi4_ruser.eq(self.None)
+ m.d.comb += self.s_axi4_rid.eq(self.None)
+ m.d.comb += self.s_axi4_rresp.eq(self.None)
+ m.d.comb += self.s_axi4_rvalid.eq(self.None)
+ m.d.comb += self.m_axi4_rready.eq(self.None)
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# //import CfMath::log2;
+# module axi4_r_sender
+# #(
+# parameter AXI_DATA_WIDTH = 32,
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+# input logic drop_i,
+# input logic [7:0] drop_len_i,
+# output logic done_o,
+# input logic [AXI_ID_WIDTH-1:0] id_i,
+# input logic prefetch_i,
+# input logic hit_i,
+# output logic [AXI_ID_WIDTH-1:0] s_axi4_rid,
+# output logic [1:0] s_axi4_rresp,
+# output logic [AXI_DATA_WIDTH-1:0] s_axi4_rdata,
+# output logic s_axi4_rlast,
+# output logic s_axi4_rvalid,
+# output logic [AXI_USER_WIDTH-1:0] s_axi4_ruser,
+# input logic s_axi4_rready,
+# input logic [AXI_ID_WIDTH-1:0] m_axi4_rid,
+# input logic [1:0] m_axi4_rresp,
+# input logic [AXI_DATA_WIDTH-1:0] m_axi4_rdata,
+# input logic m_axi4_rlast,
+# input logic m_axi4_rvalid,
+# input logic [AXI_USER_WIDTH-1:0] m_axi4_ruser,
+# output logic m_axi4_rready
+# );
+# localparam BUFFER_DEPTH = 16;
+# logic fifo_valid;
+# logic fifo_pop;
+# logic fifo_push;
+# logic fifo_ready;
+# logic [AXI_ID_WIDTH-1:0] id;
+# logic [7:0] len;
+# logic prefetch;
+# logic hit;
+# logic dropping;
+# enum logic [1:0] { FORWARDING, DROPPING }
+# state_d, state_q;
+# logic burst_ongoing_d, burst_ongoing_q;
+# logic [7:0] drop_cnt_d, drop_cnt_q;
+# axi_buffer_rab
+# #(
+# )
+# u_fifo
+# (
+# .clk ( axi4_aclk ),
+# .rstn ( axi4_arstn ),
+# // Pop
+# .data_out ( {prefetch, hit, id, len} ),
+# .valid_out ( fifo_valid ),
+# .ready_in ( fifo_pop ),
+# // Push
+# .valid_in ( fifo_push ),
+# .data_in ( {prefetch_i, hit_i, id_i, drop_len_i} ),
+# .ready_out ( fifo_ready )
+# );
+# assign fifo_push = drop_i & fifo_ready;
+# assign done_o = fifo_push;
+# always_comb begin
+# burst_ongoing_d = burst_ongoing_q;
+# drop_cnt_d = drop_cnt_q;
+# dropping = 1'b0;
+# s_axi4_rlast = 1'b0;
+# fifo_pop = 1'b0;
+# state_d = state_q;
+# case (state_q)
+# FORWARDING: begin
+# s_axi4_rlast = m_axi4_rlast;
+# // Remember whether there is currently a burst ongoing.
+# if (m_axi4_rvalid && m_axi4_rready) begin
+# if (m_axi4_rlast) begin
+# burst_ongoing_d = 1'b0;
+# end else begin
+# burst_ongoing_d = 1'b1;
+# end
+# end
+# // If there is no burst ongoing and the FIFO has a drop request ready, process it.
+# if (!burst_ongoing_d && fifo_valid) begin
+# drop_cnt_d = len;
+# state_d = DROPPING;
+# end
+# end
+# DROPPING: begin
+# dropping = 1'b1;
+# s_axi4_rlast = (drop_cnt_q == '0);
+# // Handshake on slave interface
+# if (s_axi4_rready) begin
+# drop_cnt_d -= 1;
+# if (drop_cnt_q == '0) begin
+# drop_cnt_d = '0;
+# fifo_pop = 1'b1;
+# state_d = FORWARDING;
+# end
+# end
+# end
+# default: begin
+# state_d = FORWARDING;
+# end
+# endcase
+# end
+# assign s_axi4_rdata = m_axi4_rdata;
+# assign s_axi4_ruser = dropping ? {AXI_USER_WIDTH{1'b0}} : m_axi4_ruser;
+# assign s_axi4_rid = dropping ? id : m_axi4_rid;
+# assign s_axi4_rresp = (dropping & prefetch & hit) ? 2'b00 : // prefetch hit, mutli, prot
+# (dropping & prefetch ) ? 2'b10 : // prefetch miss
+# (dropping & hit) ? 2'b10 : // non-prefetch multi, prot
+# (dropping ) ? 2'b10 : // non-prefetch miss
+# m_axi4_rresp;
+# assign s_axi4_rvalid = dropping | m_axi4_rvalid;
+# assign m_axi4_rready = ~dropping & s_axi4_rready;
+# always_ff @(posedge axi4_aclk, negedge axi4_arstn) begin
+# if (axi4_arstn == 1'b0) begin
+# burst_ongoing_q <= 1'b0;
+# drop_cnt_q <= 'b0;
+# state_q <= FORWARDING;
+# end else begin
+# burst_ongoing_q <= burst_ongoing_d;
+# drop_cnt_q <= drop_cnt_d;
+# state_q <= state_d;
+# end
+# end
+# endmodule
--- /dev/null
+# this file has been generated by sv2nmigen
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class axi4_w_buffer(Elaboratable):
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.l1_done_o = Signal() # output
+ self.l1_accept_i = Signal() # input
+ self.l1_save_i = Signal() # input
+ self.l1_drop_i = Signal() # input
+ self.l1_master_i = Signal() # input
+ self.l1_id_i = Signal(AXI_ID_WIDTH) # input
+ self.l1_len_i = Signal(8) # input
+ self.l1_prefetch_i = Signal() # input
+ self.l1_hit_i = Signal() # input
+ self.l2_done_o = Signal() # output
+ self.l2_accept_i = Signal() # input
+ self.l2_drop_i = Signal() # input
+ self.l2_master_i = Signal() # input
+ self.l2_id_i = Signal(AXI_ID_WIDTH) # input
+ self.l2_len_i = Signal(8) # input
+ self.l2_prefetch_i = Signal() # input
+ self.l2_hit_i = Signal() # input
+ self.master_select_o = Signal() # output
+ self.input_stall_o = Signal() # output
+ self.output_stall_o = Signal() # output
+ self.b_drop_o = Signal() # output
+ self.b_done_i = Signal() # input
+ self.id_o = Signal(AXI_ID_WIDTH) # output
+ self.prefetch_o = Signal() # output
+ self.hit_o = Signal() # output
+ self.s_axi4_wdata = Signal(AXI_DATA_WIDTH) # input
+ self.s_axi4_wvalid = Signal() # input
+ self.s_axi4_wready = Signal() # output
+ self.s_axi4_wstrb = Signal(1+ERROR p_expression_25) # input
+ self.s_axi4_wlast = Signal() # input
+ self.s_axi4_wuser = Signal(AXI_USER_WIDTH) # input
+ self.m_axi4_wdata = Signal(AXI_DATA_WIDTH) # output
+ self.m_axi4_wvalid = Signal() # output
+ self.m_axi4_wready = Signal() # input
+ self.m_axi4_wstrb = Signal(1+ERROR p_expression_25) # output
+ self.m_axi4_wlast = Signal() # output
+ self.m_axi4_wuser = Signal(AXI_USER_WIDTH) # output
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+# //import CfMath::log2;
+# module axi4_w_buffer
+# #(
+# parameter AXI_DATA_WIDTH = 32,
+# parameter AXI_ID_WIDTH = 4,
+# parameter AXI_USER_WIDTH = 4,
+# parameter ENABLE_L2TLB = 0,
+# parameter HUM_BUFFER_DEPTH = 16
+# )
+# (
+# input logic axi4_aclk,
+# input logic axi4_arstn,
+# // L1 & L2 interfaces
+# output logic l1_done_o,
+# input logic l1_accept_i,
+# input logic l1_save_i,
+# input logic l1_drop_i,
+# input logic l1_master_i,
+# input logic [AXI_ID_WIDTH-1:0] l1_id_i,
+# input logic [7:0] l1_len_i,
+# input logic l1_prefetch_i,
+# input logic l1_hit_i,
+# output logic l2_done_o,
+# input logic l2_accept_i,
+# input logic l2_drop_i,
+# input logic l2_master_i,
+# input logic [AXI_ID_WIDTH-1:0] l2_id_i,
+# input logic [7:0] l2_len_i,
+# input logic l2_prefetch_i,
+# input logic l2_hit_i,
+# output logic master_select_o,
+# output logic input_stall_o,
+# output logic output_stall_o,
+# // B sender interface
+# output logic b_drop_o,
+# input logic b_done_i,
+# output logic [AXI_ID_WIDTH-1:0] id_o,
+# output logic prefetch_o,
+# output logic hit_o,
+# // AXI W channel interfaces
+# input logic [AXI_DATA_WIDTH-1:0] s_axi4_wdata,
+# input logic s_axi4_wvalid,
+# output logic s_axi4_wready,
+# input logic [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb,
+# input logic s_axi4_wlast,
+# input logic [AXI_USER_WIDTH-1:0] s_axi4_wuser,
+# output logic [AXI_DATA_WIDTH-1:0] m_axi4_wdata,
+# output logic m_axi4_wvalid,
+# input logic m_axi4_wready,
+# output logic [AXI_DATA_WIDTH/8-1:0] m_axi4_wstrb,
+# output logic m_axi4_wlast,
+# output logic [AXI_USER_WIDTH-1:0] m_axi4_wuser
+# );
+ localparam INPUT_BUFFER_DEPTH = 4;
+ localparam L1_FIFO_DEPTH = 8;
+ localparam L2_FIFO_DEPTH = 4;
+ logic [AXI_DATA_WIDTH-1:0] axi4_wdata;
+ logic axi4_wvalid;
+ logic axi4_wready;
+ logic [AXI_DATA_WIDTH/8-1:0] axi4_wstrb;
+ logic axi4_wlast;
+ logic [AXI_USER_WIDTH-1:0] axi4_wuser;
+ logic l1_fifo_valid_out;
+ logic l1_fifo_ready_in;
+ logic l1_fifo_valid_in;
+ logic l1_fifo_ready_out;
+ logic l1_req;
+ logic l1_accept_cur, l1_save_cur, l1_drop_cur;
+ logic l1_master_cur;
+ logic [AXI_ID_WIDTH-1:0] l1_id_cur;
+ logic [7:0] l1_len_cur;
+ logic l1_hit_cur, l1_prefetch_cur;
+ logic l1_save_in, l1_save_out;
+ logic [log2(L1_FIFO_DEPTH)-1:0] n_l1_save_SP;
+ logic l2_fifo_valid_out;
+ logic l2_fifo_ready_in;
+ logic l2_fifo_valid_in;
+ logic l2_fifo_ready_out;
+ logic l2_req;
+ logic l2_accept_cur, l2_drop_cur;
+ logic l2_master_cur;
+ logic [AXI_ID_WIDTH-1:0] l2_id_cur;
+ logic [7:0] l2_len_cur;
+ logic l2_hit_cur, l2_prefetch_cur;
+ logic fifo_select, fifo_select_SN, fifo_select_SP;
+ logic w_done;
+ logic b_drop_set;
+ // HUM buffer signals
+ logic hum_buf_ready_out;
+ logic hum_buf_valid_in;
+ logic hum_buf_ready_in;
+ logic hum_buf_valid_out;
+ logic hum_buf_underfull;
+ logic [AXI_DATA_WIDTH-1:0] hum_buf_wdata;
+ logic [AXI_DATA_WIDTH/8-1:0] hum_buf_wstrb;
+ logic hum_buf_wlast;
+ logic [AXI_USER_WIDTH-1:0] hum_buf_wuser;
+ logic hum_buf_drop_req_SN, hum_buf_drop_req_SP;
+ logic [7:0] hum_buf_drop_len_SN, hum_buf_drop_len_SP;
+ logic hum_buf_almost_full;
+ logic stop_store;
+ logic wlast_in, wlast_out;
+ logic signed [3:0] n_wlast_SN, n_wlast_SP;
+ logic block_forwarding;
+ // Search FSM
+ typedef enum logic [3:0] {STORE, BYPASS,
+ hum_buf_state_t;
+ hum_buf_state_t hum_buf_SP; // Present state
+ hum_buf_state_tbg hum_buf_SN; // Next State
+ axi_buffer_rab
+ #(
+ )
+ u_input_buf
+ (
+ .clk ( axi4_aclk ),
+ .rstn ( axi4_arstn ),
+ // Push
+ .data_in ( {s_axi4_wuser, s_axi4_wstrb, s_axi4_wdata, s_axi4_wlast} ),
+ .valid_in ( s_axi4_wvalid ),
+ .ready_out ( s_axi4_wready ),
+ // Pop
+ .data_out ( {axi4_wuser, axi4_wstrb, axi4_wdata, axi4_wlast} ),
+ .valid_out ( axi4_wvalid ),
+ .ready_in ( axi4_wready )
+ );
+ axi_buffer_rab
+ #(
+ )
+ u_l1_fifo
+ (
+ .clk ( axi4_aclk ),
+ .rstn ( axi4_arstn ),
+ // Push
+ .data_in ( {l1_prefetch_i, l1_hit_i, l1_id_i, l1_len_i, l1_master_i, l1_accept_i, l1_save_i, l1_drop_i} ),
+ .valid_in ( l1_fifo_valid_in ),
+ .ready_out ( l1_fifo_ready_out ),
+ // Pop
+ .data_out ( {l1_prefetch_cur, l1_hit_cur, l1_id_cur, l1_len_cur, l1_master_cur, l1_accept_cur, l1_save_cur, l1_drop_cur} ),
+ .valid_out ( l1_fifo_valid_out ),
+ .ready_in ( l1_fifo_ready_in )
+ );
+ // Push upon receiving new requests from the TLB.
+ assign l1_req = l1_accept_i | l1_save_i | l1_drop_i;
+ assign l1_fifo_valid_in = l1_req & l1_fifo_ready_out;
+ // Signal handshake
+ assign l1_done_o = l1_fifo_valid_in;
+ assign l2_done_o = l2_fifo_valid_in;
+ // Stall AW input of L1 TLB
+ assign input_stall_o = ~(l1_fifo_ready_out & l2_fifo_ready_out);
+ // Interface b_drop signals + handshake
+ always_comb begin
+ if (fifo_select == 1'b0) begin
+ prefetch_o = l1_prefetch_cur;
+ hit_o = l1_hit_cur;
+ id_o = l1_id_cur;
+ l1_fifo_ready_in = w_done | b_done_i;
+ l2_fifo_ready_in = 1'b0;
+ end else begin
+ prefetch_o = l2_prefetch_cur;
+ hit_o = l2_hit_cur;
+ id_o = l2_id_cur;
+ l1_fifo_ready_in = 1'b0;
+ l2_fifo_ready_in = w_done | b_done_i;
+ end
+ end
+ // Detect when an L1 transaction save request enters or exits the L1 FIFO.
+ assign l1_save_in = l1_fifo_valid_in & l1_save_i;
+ assign l1_save_out = l1_fifo_ready_in & l1_save_cur;
+ // Count the number of L1 transaction to save in the L1 FIFO.
+ always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin
+ if (axi4_arstn == 0) begin
+ n_l1_save_SP <= '0;
+ end else if (l1_save_in ^ l1_save_out) begin
+ if (l1_save_in) begin
+ n_l1_save_SP <= n_l1_save_SP + 1'b1;
+ end else if (l1_save_out) begin
+ n_l1_save_SP <= n_l1_save_SP - 1'b1;
+ end
+ end
+ end
+ // Stall forwarding of AW L1 hits if:
+ // 1. The HUM buffer does not allow to be bypassed.
+ // 2. There are multiple L1 save requests in the FIFO, i.e., multiple L2 outputs pending.
+ assign output_stall_o = (n_l1_save_SP > 1) || (block_forwarding == 1'b1);
+ generate
+ if (ENABLE_L2TLB == 1) begin : HUM_BUFFER
+ axi_buffer_rab_bram
+ #(
+ )
+ u_hum_buf
+ (
+ .clk ( axi4_aclk ),
+ .rstn ( axi4_arstn ),
+ // Push
+ .data_in ( {axi4_wuser, axi4_wstrb, axi4_wdata, axi4_wlast} ),
+ .valid_in ( hum_buf_valid_in ),
+ .ready_out ( hum_buf_ready_out ),
+ // Pop
+ .data_out ( {hum_buf_wuser, hum_buf_wstrb, hum_buf_wdata, hum_buf_wlast} ),
+ .valid_out ( hum_buf_valid_out ),
+ .ready_in ( hum_buf_ready_in ),
+ // Clear
+ .almost_full ( hum_buf_almost_full ),
+ .underfull ( hum_buf_underfull ),
+ .drop_req ( hum_buf_drop_req_SP ),
+ .drop_len ( hum_buf_drop_len_SP )
+ );
+ axi_buffer_rab
+ #(
+ )
+ u_l2_fifo
+ (
+ .clk ( axi4_aclk ),
+ .rstn ( axi4_arstn ),
+ // Push
+ .data_in ( {l2_prefetch_i, l2_hit_i, l2_id_i, l2_len_i, l2_master_i, l2_accept_i, l2_drop_i} ),
+ .valid_in ( l2_fifo_valid_in ),
+ .ready_out ( l2_fifo_ready_out ),
+ // Pop
+ .data_out ( {l2_prefetch_cur, l2_hit_cur, l2_id_cur, l2_len_cur, l2_master_cur, l2_accept_cur, l2_drop_cur} ),
+ .valid_out ( l2_fifo_valid_out ),
+ .ready_in ( l2_fifo_ready_in )
+ );
+ // Push upon receiving new result from TLB.
+ assign l2_req = l2_accept_i | l2_drop_i;
+ assign l2_fifo_valid_in = l2_req & l2_fifo_ready_out;
+ assign wlast_in = axi4_wlast & hum_buf_valid_in & hum_buf_ready_out;
+ assign wlast_out = hum_buf_wlast & hum_buf_valid_out & hum_buf_ready_in;
+ always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin
+ if (axi4_arstn == 0) begin
+ fifo_select_SP <= 1'b0;
+ hum_buf_drop_len_SP <= 'b0;
+ hum_buf_drop_req_SP <= 1'b0;
+ hum_buf_SP <= STORE;
+ n_wlast_SP <= 'b0;
+ end else begin
+ fifo_select_SP <= fifo_select_SN;
+ hum_buf_drop_len_SP <= hum_buf_drop_len_SN;
+ hum_buf_drop_req_SP <= hum_buf_drop_req_SN;
+ hum_buf_SP <= hum_buf_SN;
+ n_wlast_SP <= n_wlast_SN;
+ end
+ end
+ always_comb begin
+ n_wlast_SN = n_wlast_SP;
+ if (hum_buf_drop_req_SP) begin // Happens exactly once per burst to be dropped.
+ n_wlast_SN -= 1;
+ end
+ if (wlast_in) begin
+ n_wlast_SN += 1;
+ end
+ if (wlast_out) begin
+ n_wlast_SN -= 1;
+ end
+ end
+ always_comb begin : HUM_BUFFER_FSM
+ hum_buf_SN = hum_buf_SP;
+ m_axi4_wlast = 1'b0;
+ m_axi4_wdata = 'b0;
+ m_axi4_wstrb = 'b0;
+ m_axi4_wuser = 'b0;
+ m_axi4_wvalid = 1'b0;
+ axi4_wready = 1'b0;
+ hum_buf_valid_in = 1'b0;
+ hum_buf_ready_in = 1'b0;
+ hum_buf_drop_req_SN = hum_buf_drop_req_SP;
+ hum_buf_drop_len_SN = hum_buf_drop_len_SP;
+ master_select_o = 1'b0;
+ w_done = 1'b0; // read from FIFO without handshake with B sender
+ b_drop_o = 1'b0; // send data from FIFO to B sender (with handshake)
+ fifo_select = 1'b0;
+ fifo_select_SN = fifo_select_SP;
+ stop_store = 1'b0;
+ block_forwarding = 1'b0;
+ unique case (hum_buf_SP)
+ STORE : begin
+ // Simply store the data in the buffer.
+ hum_buf_valid_in = axi4_wvalid & hum_buf_ready_out;
+ axi4_wready = hum_buf_ready_out;
+ // We have got a full burst in the HUM buffer, thus stop storing.
+ if (wlast_in & !hum_buf_underfull | (n_wlast_SP > $signed(0))) begin
+ hum_buf_SN = WAIT_L1_BYPASS_YES;
+ // The buffer is full, thus wait for decision.
+ end else if (~hum_buf_ready_out) begin
+ hum_buf_SN = WAIT_L1_BYPASS_NO;
+ end
+ // Avoid the forwarding of L1 hits until we know whether we can bypass.
+ if (l1_fifo_valid_out & l1_save_cur) begin
+ block_forwarding = 1'b1;
+ end
+ end
+ WAIT_L1_BYPASS_YES : begin
+ // Wait for orders from L1 TLB.
+ if (l1_fifo_valid_out) begin
+ // L1 hit - forward data from buffer
+ if (l1_accept_cur) begin
+ m_axi4_wlast = hum_buf_wlast;
+ m_axi4_wdata = hum_buf_wdata;
+ m_axi4_wstrb = hum_buf_wstrb;
+ m_axi4_wuser = hum_buf_wuser;
+ m_axi4_wvalid = hum_buf_valid_out;
+ hum_buf_ready_in = m_axi4_wready;
+ master_select_o = l1_master_cur;
+ // Detect last data beat.
+ if (wlast_out) begin
+ fifo_select = 1'b0;
+ w_done = 1'b1;
+ hum_buf_SN = STORE;
+ end
+ // L1 miss - wait for L2
+ end else if (l1_save_cur) begin
+ fifo_select = 1'b0;
+ w_done = 1'b1;
+ hum_buf_SN = WAIT_L2_BYPASS_YES;
+ // L1 prefetch, prot, multi - drop data
+ end else if (l1_drop_cur) begin
+ fifo_select_SN = 1'b0; // L1
+ hum_buf_drop_req_SN = 1'b1;
+ hum_buf_drop_len_SN = l1_len_cur;
+ hum_buf_SN = FLUSH;
+ end
+ end
+ end
+ WAIT_L2_BYPASS_YES : begin
+ // Wait for orders from L2 TLB.
+ if (l2_fifo_valid_out) begin
+ // L2 hit - forward data from buffer
+ if (l2_accept_cur) begin
+ m_axi4_wlast = hum_buf_wlast;
+ m_axi4_wdata = hum_buf_wdata;
+ m_axi4_wstrb = hum_buf_wstrb;
+ m_axi4_wuser = hum_buf_wuser;
+ m_axi4_wvalid = hum_buf_valid_out;
+ hum_buf_ready_in = m_axi4_wready;
+ master_select_o = l2_master_cur;
+ // Detect last data beat.
+ if (wlast_out) begin
+ fifo_select = 1'b1;
+ w_done = 1'b1;
+ hum_buf_SN = STORE;
+ end
+ // L2 miss/prefetch hit
+ end else if (l2_drop_cur) begin
+ fifo_select_SN = 1'b1; // L2
+ hum_buf_drop_req_SN = 1'b1;
+ hum_buf_drop_len_SN = l2_len_cur;
+ hum_buf_SN = FLUSH;
+ end
+ // While we wait for orders from L2 TLB, we can still drop and accept L1 transactions.
+ end else if (l1_fifo_valid_out) begin
+ // L1 hit
+ if (l1_accept_cur) begin
+ hum_buf_SN = BYPASS;
+ // L1 prefetch/prot/multi
+ end else if (l1_drop_cur) begin
+ hum_buf_SN = DISCARD;
+ end
+ end
+ end
+ FLUSH : begin
+ // Clear HUM buffer flush request.
+ hum_buf_drop_req_SN = 1'b0;
+ // perform handshake with B sender
+ fifo_select = fifo_select_SP;
+ b_drop_o = 1'b1;
+ if (b_done_i) begin
+ hum_buf_SN = STORE;
+ end
+ end
+ BYPASS : begin
+ // Forward one full transaction from input buffer.
+ m_axi4_wlast = axi4_wlast;
+ m_axi4_wdata = axi4_wdata;
+ m_axi4_wstrb = axi4_wstrb;
+ m_axi4_wuser = axi4_wuser;
+ m_axi4_wvalid = axi4_wvalid;
+ axi4_wready = m_axi4_wready;
+ master_select_o = l1_master_cur;
+ // We have got a full transaction.
+ if (axi4_wlast & axi4_wready & axi4_wvalid) begin
+ fifo_select = 1'b0;
+ w_done = 1'b1;
+ hum_buf_SN = WAIT_L2_BYPASS_YES;
+ end
+ end
+ DISCARD : begin
+ // Discard one full transaction from input buffer.
+ axi4_wready = 1'b1;
+ // We have got a full transaction.
+ if (axi4_wlast & axi4_wready & axi4_wvalid) begin
+ // Try to perform handshake with B sender.
+ fifo_select = 1'b0;
+ b_drop_o = 1'b1;
+ // We cannot wait here due to axi4_wready.
+ if (b_done_i) begin
+ hum_buf_SN = WAIT_L2_BYPASS_YES;
+ end else begin
+ hum_buf_SN = DISCARD_FINISH;
+ end
+ end
+ end
+ // Perform handshake with B sender.
+ fifo_select = 1'b0;
+ b_drop_o = 1'b1;
+ if (b_done_i) begin
+ hum_buf_SN = WAIT_L2_BYPASS_YES;
+ end
+ end
+ WAIT_L1_BYPASS_NO : begin
+ // Do not allow the forwarding of L1 hits.
+ block_forwarding = 1'b1;
+ // Wait for orders from L1 TLB.
+ if (l1_fifo_valid_out) begin
+ // L1 hit - forward data from/through HUM buffer and refill the buffer
+ if (l1_accept_cur) begin
+ // Forward data from HUM buffer.
+ m_axi4_wlast = hum_buf_wlast;
+ m_axi4_wdata = hum_buf_wdata;
+ m_axi4_wstrb = hum_buf_wstrb;
+ m_axi4_wuser = hum_buf_wuser;
+ m_axi4_wvalid = hum_buf_valid_out;
+ hum_buf_ready_in = m_axi4_wready;
+ master_select_o = l1_master_cur;
+ // Refill the HUM buffer. Stop when buffer full.
+ stop_store = ~hum_buf_ready_out;
+ hum_buf_valid_in = stop_store ? 1'b0 : axi4_wvalid ;
+ axi4_wready = stop_store ? 1'b0 : hum_buf_ready_out;
+ // Detect last data beat.
+ if (wlast_out) begin
+ fifo_select = 1'b0;
+ w_done = 1'b1;
+ if (~hum_buf_ready_out | hum_buf_almost_full) begin
+ hum_buf_SN = WAIT_L1_BYPASS_NO;
+ end else begin
+ hum_buf_SN = STORE;
+ end
+ end
+ // Allow the forwarding of L1 hits.
+ block_forwarding = 1'b0;
+ // L1 miss - wait for L2
+ end else if (l1_save_cur) begin
+ fifo_select = 1'b0;
+ w_done = 1'b1;
+ hum_buf_SN = WAIT_L2_BYPASS_NO;
+ // L1 prefetch, prot, multi - drop data
+ end else if (l1_drop_cur) begin
+ fifo_select_SN = 1'b0; // L1
+ hum_buf_drop_req_SN = 1'b1;
+ hum_buf_drop_len_SN = l1_len_cur;
+ hum_buf_SN = FLUSH;
+ // Allow the forwarding of L1 hits.
+ block_forwarding = 1'b0;
+ end
+ end
+ end
+ WAIT_L2_BYPASS_NO : begin
+ // Do not allow the forwarding of L1 hits.
+ block_forwarding = 1'b1;
+ // Wait for orders from L2 TLB.
+ if (l2_fifo_valid_out) begin
+ // L2 hit - forward first part from HUM buffer, rest from input buffer
+ if (l2_accept_cur) begin
+ // Forward data from HUM buffer.
+ m_axi4_wlast = hum_buf_wlast;
+ m_axi4_wdata = hum_buf_wdata;
+ m_axi4_wstrb = hum_buf_wstrb;
+ m_axi4_wuser = hum_buf_wuser;
+ m_axi4_wvalid = hum_buf_valid_out;
+ hum_buf_ready_in = m_axi4_wready;
+ master_select_o = l2_master_cur;
+ // Refill the HUM buffer. Stop when buffer full.
+ stop_store = ~hum_buf_ready_out;
+ hum_buf_valid_in = stop_store ? 1'b0 : axi4_wvalid ;
+ axi4_wready = stop_store ? 1'b0 : hum_buf_ready_out;
+ // Detect last data beat.
+ if (wlast_out) begin
+ fifo_select = 1'b1;
+ w_done = 1'b1;
+ if (~hum_buf_ready_out | hum_buf_almost_full) begin
+ hum_buf_SN = WAIT_L1_BYPASS_NO;
+ end else begin
+ hum_buf_SN = STORE;
+ end
+ end
+ // Allow the forwarding of L1 hits.
+ block_forwarding = 1'b0;
+ // L2 miss/prefetch hit - drop data
+ end else if (l2_drop_cur) begin
+ fifo_select_SN = 1'b1; // L2
+ hum_buf_drop_req_SN = 1'b1;
+ hum_buf_drop_len_SN = l2_len_cur;
+ hum_buf_SN = FLUSH;
+ // Allow the forwarding of L1 hits.
+ block_forwarding = 1'b0;
+ end
+ end
+ end
+ default: begin
+ hum_buf_SN = STORE;
+ end
+ endcase // hum_buf_SP
+ assign b_drop_set = 1'b0;
+ end else begin // HUM_BUFFER
+ // register to perform the handshake with B sender
+ always_ff @(posedge axi4_aclk or negedge axi4_arstn) begin
+ if (axi4_arstn == 0) begin
+ b_drop_o <= 1'b0;
+ end else if (b_done_i) begin
+ b_drop_o <= 1'b0;
+ end else if (b_drop_set) begin
+ b_drop_o <= 1'b1;;
+ end
+ end
+ always_comb begin : OUTPUT_CTRL
+ fifo_select = 1'b0;
+ w_done = 1'b0;
+ b_drop_set = 1'b0;
+ m_axi4_wlast = 1'b0;
+ m_axi4_wdata = 'b0;
+ m_axi4_wstrb = 'b0;
+ m_axi4_wuser = 'b0;
+ m_axi4_wvalid = 1'b0;
+ axi4_wready = 1'b0;
+ if (l1_fifo_valid_out) begin
+ // forward data
+ if (l1_accept_cur) begin
+ m_axi4_wlast = axi4_wlast;
+ m_axi4_wdata = axi4_wdata;
+ m_axi4_wstrb = axi4_wstrb;
+ m_axi4_wuser = axi4_wuser;
+ m_axi4_wvalid = axi4_wvalid;
+ axi4_wready = m_axi4_wready;
+ // Simply pop from FIFO upon last data beat.
+ w_done = axi4_wlast & axi4_wvalid & axi4_wready;
+ // discard entire burst
+ end else if (b_drop_o == 1'b0) begin
+ axi4_wready = 1'b1;
+ // Simply pop from FIFO upon last data beat. Perform handshake with B sender.
+ if (axi4_wlast & axi4_wvalid & axi4_wready)
+ b_drop_set = 1'b1;
+ end
+ end
+ end // OUTPUT_CTRL
+ assign master_select_o = l1_master_cur;
+ assign l2_fifo_ready_out = 1'b1;
+ assign block_forwarding = 1'b0;
+ // unused signals
+ assign hum_buf_ready_out = 1'b0;
+ assign hum_buf_valid_in = 1'b0;
+ assign hum_buf_ready_in = 1'b0;
+ assign hum_buf_valid_out = 1'b0;
+ assign hum_buf_wdata = 'b0;
+ assign hum_buf_wstrb = 'b0;
+ assign hum_buf_wlast = 1'b0;
+ assign hum_buf_wuser = 'b0;
+ assign hum_buf_drop_len_SN = 'b0;
+ assign hum_buf_drop_req_SN = 1'b0;
+ assign hum_buf_almost_full = 1'b0;
+ assign l2_fifo_valid_in = 1'b0;
+ assign l2_fifo_valid_out = 1'b0;
+ assign l2_prefetch_cur = 1'b0;
+ assign l2_hit_cur = 1'b0;
+ assign l2_id_cur = 'b0;
+ assign l2_len_cur = 'b0;
+ assign l2_master_cur = 1'b0;
+ assign l2_accept_cur = 1'b0;
+ assign l2_drop_cur = 1'b0;
+ assign l2_req = 1'b0;
+ assign fifo_select_SN = 1'b0;
+ assign fifo_select_SP = 1'b0;
+ assign stop_store = 1'b0;
+ assign n_wlast_SP = 'b0;
+ assign wlast_in = 1'b0;
+ assign wlast_out = 1'b0;
+ end // HUM_BUFFER
+ endgenerate
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class axi4_w_sender(Elaboratable):
+ def __init__(self):
+ self.axi4_aclk = Signal() # input
+ self.axi4_arstn = Signal() # input
+ self.s_axi4_wdata = Signal() # input
+ self.s_axi4_wvalid = Signal() # input
+ self.s_axi4_wready = Signal() # output
+ self.s_axi4_wstrb = Signal() # input
+ self.s_axi4_wlast = Signal() # input
+ self.s_axi4_wuser = Signal() # input
+ self.m_axi4_wdata = Signal() # output
+ self.m_axi4_wvalid = Signal() # output
+ self.m_axi4_wready = Signal() # input
+ self.m_axi4_wstrb = Signal() # output
+ self.m_axi4_wlast = Signal() # output
+ self.m_axi4_wuser = Signal() # output
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.m_axi4_wdata.eq(self.s_axi4_wdata)
+ m.d.comb += self.m_axi4_wstrb.eq(self.s_axi4_wstrb)
+ m.d.comb += self.m_axi4_wlast.eq(self.s_axi4_wlast)
+ m.d.comb += self.m_axi4_wuser.eq(self.s_axi4_wuser)
+ m.d.comb += self.m_axi4_wvalid.eq(self.s_axi4_wvalid)
+ m.d.comb += self.s_axi4_wready.eq(self.m_axi4_wready)
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# module axi4_w_sender
+# #(
+# parameter AXI_DATA_WIDTH = 32,
+# parameter AXI_USER_WIDTH = 2
+# )
+# (
+# input axi4_aclk,
+# input axi4_arstn,
+# input [AXI_DATA_WIDTH-1:0] s_axi4_wdata,
+# input s_axi4_wvalid,
+# output s_axi4_wready,
+# input [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb,
+# input s_axi4_wlast,
+# input [AXI_USER_WIDTH-1:0] s_axi4_wuser,
+# output [AXI_DATA_WIDTH-1:0] m_axi4_wdata,
+# output m_axi4_wvalid,
+# input m_axi4_wready,
+# output [AXI_DATA_WIDTH/8-1:0] m_axi4_wstrb,
+# output m_axi4_wlast,
+# output [AXI_USER_WIDTH-1:0] m_axi4_wuser
+# );
+# assign m_axi4_wdata = s_axi4_wdata;
+# assign m_axi4_wstrb = s_axi4_wstrb;
+# assign m_axi4_wlast = s_axi4_wlast;
+# assign m_axi4_wuser = s_axi4_wuser;
+# assign m_axi4_wvalid = s_axi4_wvalid;
+# assign s_axi4_wready = m_axi4_wready;
+# endmodule
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class axi_buffer_rab(Elaboratable):
+ def __init__(self):
+ self.clk = Signal() # input
+ self.rstn = Signal() # input
+ self.data_out = Signal(DATA_WIDTH) # output
+ self.valid_out = Signal() # output
+ self.ready_in = Signal() # input
+ self.valid_in = Signal() # input
+ self.data_in = Signal(DATA_WIDTH) # input
+ self.ready_out = Signal() # output
+ def elaborate(self, platform=None):
+ m = Module()
+ m.d.comb += self.full.eq(self.None)
+ m.d.comb += self.data_out.eq(self.None)
+ m.d.comb += self.valid_out.eq(self.None)
+ m.d.comb += self.ready_out.eq(self.None)
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# //import CfMath::log2;
+# module axi_buffer_rab
+# //#(
+# // parameter DATA_WIDTH,
+# // parameter BUFFER_DEPTH
+# //)
+# (
+# input logic clk,
+# input logic rstn,
+# // Downstream port
+# output logic [DATA_WIDTH-1:0] data_out,
+# output logic valid_out,
+# input logic ready_in,
+# // Upstream port
+# input logic valid_in,
+# input logic [DATA_WIDTH-1:0] data_in,
+# output logic ready_out
+# );
+# localparam integer LOG_BUFFER_DEPTH = log2(BUFFER_DEPTH);
+# // Internal data structures
+# reg [LOG_BUFFER_DEPTH - 1 : 0] pointer_in; // location to which we last wrote
+# reg [LOG_BUFFER_DEPTH - 1 : 0] pointer_out; // location from which we last sent
+# reg [LOG_BUFFER_DEPTH : 0] elements; // number of elements in the buffer
+# reg [DATA_WIDTH - 1 : 0] buffer [BUFFER_DEPTH - 1 : 0];
+# wire full;
+# integer loop1;
+# assign full = (elements == BUFFER_DEPTH);
+# always @(posedge clk or negedge rstn)
+# begin: elements_sequential
+# if (rstn == 1'b0)
+# elements <= 0;
+# else
+# begin
+# // ------------------
+# // Are we filling up?
+# // ------------------
+# // One out, none in
+# if (ready_in && valid_out && (!valid_in || full))
+# elements <= elements - 1;
+# // None out, one in
+# else if ((!valid_out || !ready_in) && valid_in && !full)
+# elements <= elements + 1;
+# // Else, either one out and one in, or none out and none in - stays unchanged
+# end
+# end
+# always @(posedge clk or negedge rstn)
+# begin: buffers_sequential
+# if (rstn == 1'b0)
+# begin
+# for (loop1 = 0 ; loop1 < BUFFER_DEPTH ; loop1 = loop1 + 1)
+# buffer[loop1] <= 0;
+# end
+# else
+# begin
+# // Update the memory
+# if (valid_in && !full)
+# buffer[pointer_in] <= data_in;
+# end
+# end
+# always @(posedge clk or negedge rstn)
+# begin: sequential
+# if (rstn == 1'b0)
+# begin
+# pointer_out <= 0;
+# pointer_in <= 0;
+# end
+# else
+# begin
+# // ------------------------------------
+# // Check what to do with the input side
+# // ------------------------------------
+# // We have some input, increase by 1 the input pointer
+# if (valid_in && !full)
+# begin
+# if (pointer_in == $unsigned(BUFFER_DEPTH - 1))
+# pointer_in <= 0;
+# else
+# pointer_in <= pointer_in + 1;
+# end
+# // Else we don't have any input, the input pointer stays the same
+# // -------------------------------------
+# // Check what to do with the output side
+# // -------------------------------------
+# // We had pushed one flit out, we can try to go for the next one
+# if (ready_in && valid_out)
+# begin
+# if (pointer_out == $unsigned(BUFFER_DEPTH - 1))
+# pointer_out <= 0;
+# else
+# pointer_out <= pointer_out + 1;
+# end
+# // Else stay on the same output location
+# end
+# end
+# // Update output ports
+# assign data_out = buffer[pointer_out];
+# assign valid_out = (elements != 0);
+# assign ready_out = ~full;
+# endmodule
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class axi_buffer_rab_bram(Elaboratable):
+ def __init__(self):
+ self.clk = Signal() # input
+ self.rstn = Signal() # input
+ self.data_out = Signal(DATA_WIDTH) # output
+ self.valid_out = Signal() # output
+ self.ready_in = Signal() # input
+ self.valid_in = Signal() # input
+ self.data_in = Signal(DATA_WIDTH) # input
+ self.ready_out = Signal() # output
+ self.almost_full = Signal() # output
+ self.underfull = Signal() # output
+ self.drop_req = Signal() # input
+ self.drop_len = Signal(8) # input
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# ////import CfMath::log2;
+# module axi_buffer_rab_bram
+# //#(
+# // parameter DATA_WIDTH,
+# // parameter BUFFER_DEPTH
+# // )
+# (
+# input logic clk,
+# input logic rstn,
+# // Downstream port
+# output logic [DATA_WIDTH-1:0] data_out,
+# output logic valid_out,
+# input logic ready_in,
+# // Upstream port
+# input logic valid_in,
+# input logic [DATA_WIDTH-1:0] data_in,
+# output logic ready_out,
+# // Status and drop control
+# output logic almost_full,
+# output logic underfull,
+# input logic drop_req,
+# // Number of items to drop. As for AXI lengths, counting starts at zero, i.e., `drop_len == 0`
+# // and `drop_req` means drop one item.
+# input logic [7:0] drop_len
+# );
+""" #docstring_begin
+ // The BRAM needs to be in "write-first" mode for first-word fall-through FIFO behavior.
+ // To still push and pop simultaneously if the buffer is full, we internally increase the
+ // buffer depth by 1.
+ /**
+ * Internal data structures
+ */
+ // Location to which we last wrote
+ logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_in_d, ptr_in_q;
+ // Location from which we last sent
+ logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_out_d, ptr_out_q;
+ // Required for fall-through behavior on the first word
+ logic [ACT_LOG_BUFFER_DEPTH-1:0] ptr_out_bram;
+ // Number of elements in the buffer. Can be negative if elements that have been dropped have not
+ // yet been written.
+ logic signed [ACT_LOG_BUFFER_DEPTH:0] n_elems_d, n_elems_q;
+ logic [DATA_WIDTH-1:0] data_out_bram, data_out_q;
+ logic valid_out_q;
+ logic full;
+ assign almost_full = (n_elems_q == BUFFER_DEPTH-1);
+ assign full = (n_elems_q == BUFFER_DEPTH);
+ always_ff @(posedge clk, negedge rstn) begin
+ if (~rstn) begin
+ n_elems_q <= '0;
+ ptr_in_q <= '0;
+ ptr_out_q <= '0;
+ end else begin
+ n_elems_q <= n_elems_d;
+ ptr_in_q <= ptr_in_d;
+ ptr_out_q <= ptr_out_d;
+ end
+ end
+ // Update the number of elements.
+ always_comb begin
+ n_elems_d = n_elems_q;
+ if (drop_req) begin
+ n_elems_d -= (drop_len + 1);
+ end
+ if (valid_in && ready_out) begin
+ n_elems_d += 1;
+ end
+ if (valid_out && ready_in) begin
+ n_elems_d -= 1;
+ end
+ end
+ // Update the output pointer.
+ always_comb begin
+ ptr_out_d = ptr_out_q;
+ if (drop_req) begin
+ if ((ptr_out_q + drop_len + 1) > (ACT_BUFFER_DEPTH - 1)) begin
+ ptr_out_d = drop_len + 1 - (ACT_BUFFER_DEPTH - ptr_out_q);
+ end else begin
+ ptr_out_d += (drop_len + 1);
+ end
+ end
+ if (valid_out && ready_in) begin
+ if (ptr_out_d == (ACT_BUFFER_DEPTH - 1)) begin
+ ptr_out_d = '0;
+ end else begin
+ ptr_out_d += 1;
+ end
+ end
+ end
+ // The BRAM has a read latency of one cycle, so apply the new address one cycle earlier for
+ // first-word fall-through FIFO behavior.
+ //assign ptr_out_bram = (ptr_out_q == (ACT_BUFFER_DEPTH-1)) ? '0 : (ptr_out_q + 1);
+ assign ptr_out_bram = ptr_out_d;
+ // Update the input pointer.
+ always_comb begin
+ ptr_in_d = ptr_in_q;
+ if (valid_in && ready_out) begin
+ if (ptr_in_d == (ACT_BUFFER_DEPTH - 1)) begin
+ ptr_in_d = '0;
+ end else begin
+ ptr_in_d += 1;
+ end
+ end
+ end
+ // Update output ports.
+ assign valid_out = (n_elems_q > $signed(0));
+ assign underfull = (n_elems_q < $signed(0));
+ assign ready_out = ~full;
+ ram_tp_write_first #(
+ )
+ ram_tp_write_first_0
+ (
+ .clk ( clk ),
+ .we ( valid_in & ~full ),
+ .addr0 ( ptr_in_q ),
+ .addr1 ( ptr_out_bram ),
+ .d_i ( data_in ),
+ .d0_o ( ),
+ .d1_o ( data_out_bram )
+ );
+ // When reading from/writing two the same address on both ports ("Write-Read Collision"),
+ // the data on the read port is invalid (during the write cycle). In this implementation,
+ // this can happen only when the buffer is empty. Thus, we forward the data from an
+ // register in this case.
+ always @(posedge clk) begin
+ if (rstn == 1'b0) begin
+ data_out_q <= 'b0;
+ end else if ( (ptr_out_bram == ptr_in_q) && (valid_in && !full) ) begin
+ data_out_q <= data_in;
+ end
+ end
+ always @(posedge clk) begin
+ if (rstn == 1'b0) begin
+ valid_out_q <= 'b0;
+ end else begin
+ valid_out_q <= valid_out;
+ end
+ end
+ // Drive output data
+ always_comb begin
+ if (valid_out && !valid_out_q) begin // We have just written to an empty FIFO
+ data_out = data_out_q;
+ end else begin
+ data_out = data_out_bram;
+ end
+ end
+# endmodule
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class axi_rab_cfg(Elaboratable):
+ def __init__(self):
+ self.Clk_CI = Signal() # input
+ self.Rst_RBI = Signal() # input
+ self.s_axi_awaddr = Signal(AXI_ADDR_WIDTH) # input
+ self.s_axi_awvalid = Signal() # input
+ self.s_axi_awready = Signal() # output
+ self.s_axi_wdata = Signal() # input
+ self.s_axi_wstrb = Signal(1+ERROR p_expression_25) # input
+ self.s_axi_wvalid = Signal() # input
+ self.s_axi_wready = Signal() # output
+ self.s_axi_bresp = Signal(2) # output
+ self.s_axi_bvalid = Signal() # output
+ self.s_axi_bready = Signal() # input
+ self.s_axi_araddr = Signal(AXI_ADDR_WIDTH) # input
+ self.s_axi_arvalid = Signal() # input
+ self.s_axi_arready = Signal() # output
+ self.s_axi_rdata = Signal(AXI_DATA_WIDTH) # output
+ self.s_axi_rresp = Signal(2) # output
+ self.s_axi_rvalid = Signal() # output
+ self.s_axi_rready = Signal() # input
+ self.L1Cfg_DO = Signal() # output
+ self.L1AllowMultiHit_SO = Signal() # output
+ self.MissAddr_DI = Signal(ADDR_WIDTH_VIRT) # input
+ self.MissMeta_DI = Signal(MISS_META_WIDTH) # input
+ self.Miss_SI = Signal() # input
+ self.MhFifoFull_SO = Signal() # output
+ self.wdata_l2 = Signal() # output
+ self.waddr_l2 = Signal() # output
+ self.wren_l2 = Signal(N_PORTS) # output
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# // --=========================================================================--
+# //
+# // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ██████╗███████╗ ██████╗
+# // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔════╝██╔════╝
+# // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ █████╗ ██║ ███╗
+# // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██╔══╝ ██║ ██║
+# // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ╚██████╗██║ ╚██████╔╝
+# // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝╚═╝ ╚═════╝
+# //
+# //
+# // Author: Pirmin Vogel - vogelpi@iis.ee.ethz.ch
+# //
+# // Purpose : AXI4-Lite configuration and miss handling interface for RAB
+# //
+# // --=========================================================================--
+# //import CfMath::log2;
+# module axi_rab_cfg
+# #(
+# parameter N_PORTS = 3,
+# parameter N_REGS = 196,
+# parameter N_L2_SETS = 32,
+# parameter N_L2_SET_ENTRIES= 32,
+# parameter ADDR_WIDTH_PHYS = 40,
+# parameter ADDR_WIDTH_VIRT = 32,
+# parameter N_FLAGS = 4,
+# parameter AXI_DATA_WIDTH = 64,
+# parameter AXI_ADDR_WIDTH = 32,
+# parameter MISS_META_WIDTH = 10, // <= FIFO_WIDTH
+# parameter MH_FIFO_DEPTH = 16
+# )
+# (
+# input logic Clk_CI,
+# input logic Rst_RBI,
+# // AXI Lite interface
+# input logic [AXI_ADDR_WIDTH-1:0] s_axi_awaddr,
+# input logic s_axi_awvalid,
+# output logic s_axi_awready,
+# input logic [AXI_DATA_WIDTH/8-1:0][7:0] s_axi_wdata,
+# input logic [AXI_DATA_WIDTH/8-1:0] s_axi_wstrb,
+# input logic s_axi_wvalid,
+# output logic s_axi_wready,
+# output logic [1:0] s_axi_bresp,
+# output logic s_axi_bvalid,
+# input logic s_axi_bready,
+# input logic [AXI_ADDR_WIDTH-1:0] s_axi_araddr,
+# input logic s_axi_arvalid,
+# output logic s_axi_arready,
+# output logic [AXI_DATA_WIDTH-1:0] s_axi_rdata,
+# output logic [1:0] s_axi_rresp,
+# output logic s_axi_rvalid,
+# input logic s_axi_rready,
+# // Slice configuration
+# output logic [N_REGS-1:0][63:0] L1Cfg_DO,
+# output logic L1AllowMultiHit_SO,
+# // Miss handling
+# input logic [ADDR_WIDTH_VIRT-1:0] MissAddr_DI,
+# input logic [MISS_META_WIDTH-1:0] MissMeta_DI,
+# input logic Miss_SI,
+# output logic MhFifoFull_SO,
+# // L2 TLB
+# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] wdata_l2,
+# output logic [N_PORTS-1:0] [AXI_ADDR_WIDTH-1:0] waddr_l2,
+# output logic [N_PORTS-1:0] wren_l2
+# );
+""" #docstring_begin
+ localparam ADDR_LSB = log2(64/8); // 64 even if the AXI Lite interface is 32,
+ // because RAB slices are 64 bit wide.
+ localparam ADDR_MSB = log2(N_REGS)+ADDR_LSB-1;
+ localparam L2SINGLE_AMAP_SIZE = 16'h4000; // Maximum 2048 TLB entries in L2
+ localparam integer N_L2_ENTRIES = N_L2_SETS * N_L2_SET_ENTRIES;
+ localparam logic [AXI_ADDR_WIDTH-1:0] L2_VA_MAX_ADDR = (N_L2_ENTRIES-1) << 2;
+ logic [AXI_DATA_WIDTH/8-1:0][7:0] L1Cfg_DP[N_REGS]; // [Byte][Bit]
+ genvar j;
+ // █████╗ ██╗ ██╗██╗██╗ ██╗ ██╗ ██╗████████╗███████╗
+ // ██╔══██╗╚██╗██╔╝██║██║ ██║ ██║ ██║╚══██╔══╝██╔════╝
+ // ███████║ ╚███╔╝ ██║███████║█████╗██║ ██║ ██║ █████╗
+ // ██╔══██║ ██╔██╗ ██║╚════██║╚════╝██║ ██║ ██║ ██╔══╝
+ // ██║ ██║██╔╝ ██╗██║ ██║ ███████╗██║ ██║ ███████╗
+ // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚═╝ ╚══════╝
+ //
+ logic [AXI_ADDR_WIDTH-1:0] awaddr_reg;
+ logic awaddr_done_rise;
+ logic awaddr_done_reg;
+ logic awaddr_done_reg_dly;
+ logic [AXI_DATA_WIDTH/8-1:0][7:0] wdata_reg;
+ logic [AXI_DATA_WIDTH/8-1:0] wstrb_reg;
+ logic wdata_done_rise;
+ logic wdata_done_reg;
+ logic wdata_done_reg_dly;
+ logic wresp_done_reg;
+ logic wresp_running_reg;
+ logic [AXI_ADDR_WIDTH-1:0] araddr_reg;
+ logic araddr_done_reg;
+ logic [AXI_DATA_WIDTH-1:0] rdata_reg;
+ logic rresp_done_reg;
+ logic rresp_running_reg;
+ logic awready;
+ logic wready;
+ logic bvalid;
+ logic arready;
+ logic rvalid;
+ logic wren;
+ logic wren_l1;
+ assign wren = ( wdata_done_rise & awaddr_done_reg ) | ( awaddr_done_rise & wdata_done_reg );
+ assign wdata_done_rise = wdata_done_reg & ~wdata_done_reg_dly;
+ assign awaddr_done_rise = awaddr_done_reg & ~awaddr_done_reg_dly;
+ // reg_dly
+ always @(posedge Clk_CI or negedge Rst_RBI)
+ begin
+ if (!Rst_RBI)
+ begin
+ wdata_done_reg_dly <= 1'b0;
+ awaddr_done_reg_dly <= 1'b0;
+ end
+ else
+ begin
+ wdata_done_reg_dly <= wdata_done_reg;
+ awaddr_done_reg_dly <= awaddr_done_reg;
+ end
+ end
+ // AW Channel
+ always @(posedge Clk_CI or negedge Rst_RBI)
+ begin
+ if (!Rst_RBI)
+ begin
+ awaddr_done_reg <= 1'b0;
+ awaddr_reg <= '0;
+ awready <= 1'b1;
+ end
+ else
+ begin
+ if (awready && s_axi_awvalid)
+ begin
+ awready <= 1'b0;
+ awaddr_done_reg <= 1'b1;
+ awaddr_reg <= s_axi_awaddr;
+ end
+ else if (awaddr_done_reg && wresp_done_reg)
+ begin
+ awready <= 1'b1;
+ awaddr_done_reg <= 1'b0;
+ end
+ end
+ end
+ // W Channel
+ always @(posedge Clk_CI or negedge Rst_RBI)
+ begin
+ if (!Rst_RBI)
+ begin
+ wdata_done_reg <= 1'b0;
+ wready <= 1'b1;
+ wdata_reg <= '0;
+ wstrb_reg <= '0;
+ end
+ else
+ begin
+ if (wready && s_axi_wvalid)
+ begin
+ wready <= 1'b0;
+ wdata_done_reg <= 1'b1;
+ wdata_reg <= s_axi_wdata;
+ wstrb_reg <= s_axi_wstrb;
+ end
+ else if (wdata_done_reg && wresp_done_reg)
+ begin
+ wready <= 1'b1;
+ wdata_done_reg <= 1'b0;
+ end
+ end
+ end
+ // B Channel
+ always @(posedge Clk_CI or negedge Rst_RBI)
+ begin
+ if (!Rst_RBI)
+ begin
+ bvalid <= 1'b0;
+ wresp_done_reg <= 1'b0;
+ wresp_running_reg <= 1'b0;
+ end
+ else
+ begin
+ if (awaddr_done_reg && wdata_done_reg && !wresp_done_reg)
+ begin
+ if (!wresp_running_reg)
+ begin
+ bvalid <= 1'b1;
+ wresp_running_reg <= 1'b1;
+ end
+ else if (s_axi_bready)
+ begin
+ bvalid <= 1'b0;
+ wresp_done_reg <= 1'b1;
+ wresp_running_reg <= 1'b0;
+ end
+ end
+ else
+ begin
+ bvalid <= 1'b0;
+ wresp_done_reg <= 1'b0;
+ wresp_running_reg <= 1'b0;
+ end
+ end
+ end
+ // AR Channel
+ always @(posedge Clk_CI or negedge Rst_RBI)
+ begin
+ if (!Rst_RBI)
+ begin
+ araddr_done_reg <= 1'b0;
+ arready <= 1'b1;
+ araddr_reg <= '0;
+ end
+ else
+ begin
+ if (arready && s_axi_arvalid)
+ begin
+ arready <= 1'b0;
+ araddr_done_reg <= 1'b1;
+ araddr_reg <= s_axi_araddr;
+ end
+ else if (araddr_done_reg && rresp_done_reg)
+ begin
+ arready <= 1'b1;
+ araddr_done_reg <= 1'b0;
+ end
+ end
+ end
+ // R Channel
+ always @(posedge Clk_CI or negedge Rst_RBI)
+ begin
+ if (!Rst_RBI)
+ begin
+ rresp_done_reg <= 1'b0;
+ rvalid <= 1'b0;
+ rresp_running_reg <= 1'b0;
+ end
+ else
+ begin
+ if (araddr_done_reg && !rresp_done_reg)
+ begin
+ if (!rresp_running_reg)
+ begin
+ rvalid <= 1'b1;
+ rresp_running_reg <= 1'b1;
+ end
+ else if (s_axi_rready)
+ begin
+ rvalid <= 1'b0;
+ rresp_done_reg <= 1'b1;
+ rresp_running_reg <= 1'b0;
+ end
+ end
+ else
+ begin
+ rvalid <= 1'b0;
+ rresp_done_reg <= 1'b0;
+ rresp_running_reg <= 1'b0;
+ end
+ end
+ end
+ // ██╗ ██╗ ██████╗███████╗ ██████╗ ██████╗ ███████╗ ██████╗
+ // ██║ ███║ ██╔════╝██╔════╝██╔════╝ ██╔══██╗██╔════╝██╔════╝
+ // ██║ ╚██║ ██║ █████╗ ██║ ███╗ ██████╔╝█████╗ ██║ ███╗
+ // ██║ ██║ ██║ ██╔══╝ ██║ ██║ ██╔══██╗██╔══╝ ██║ ██║
+ // ███████╗██║ ╚██████╗██║ ╚██████╔╝ ██║ ██║███████╗╚██████╔╝
+ // ╚══════╝╚═╝ ╚═════╝╚═╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝ ╚═════╝
+ //
+ assign wren_l1 = wren && (awaddr_reg < L2SINGLE_AMAP_SIZE);
+ always @( posedge Clk_CI or negedge Rst_RBI )
+ begin
+ var integer idx_reg, idx_byte;
+ if ( Rst_RBI == 1'b0 )
+ begin
+ for ( idx_reg = 0; idx_reg < N_REGS; idx_reg++ )
+ L1Cfg_DP[idx_reg] <= '0;
+ end
+ else if ( wren_l1 )
+ begin
+ if ( awaddr_reg[ADDR_LSB+1] == 1'b0 ) begin // VIRT_ADDR
+ for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin
+ if ( (idx_byte < ADDR_WIDTH_VIRT/8) ) begin
+ if ( wstrb_reg[idx_byte] ) begin
+ L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte];
+ end
+ end
+ else begin // Let synthesizer optimize away unused registers.
+ L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0;
+ end
+ end
+ end
+ else if ( awaddr_reg[ADDR_LSB+1:ADDR_LSB] == 2'b10 ) begin // PHYS_ADDR
+ for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin
+ if ( (idx_byte < ADDR_WIDTH_PHYS/8) ) begin
+ if ( wstrb_reg[idx_byte] ) begin
+ L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte];
+ end
+ end
+ else begin // Let synthesizer optimize away unused registers.
+ L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0;
+ end
+ end
+ end
+ else begin // ( awaddr_reg[ADDR_LSB+1:ADDR_LSB] == 2'b11 ) // FLAGS
+ for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ ) begin
+ if ( (idx_byte < 1) ) begin
+ if ( wstrb_reg[idx_byte] ) begin
+ L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= wdata_reg[idx_byte] & { {{8-N_FLAGS}{1'b0}}, {{N_FLAGS}{1'b1}} };
+ end
+ end
+ else begin // Let synthesizer optimize away unused registers.
+ L1Cfg_DP[awaddr_reg[ADDR_MSB:ADDR_LSB]][idx_byte] <= '0;
+ end
+ end
+ end
+ end
+ end // always @ ( posedge Clk_CI or negedge Rst_RBI )
+ generate
+ // Mask unused bits -> Synthesizer should optimize away unused registers
+ for( j=0; j<N_REGS; j++ ) begin
+ if ( j[1] == 1'b0 ) // VIRT_ADDR
+ assign L1Cfg_DO[j] = { {{64-ADDR_WIDTH_VIRT}{1'b0}},{ADDR_WIDTH_VIRT{1'b1}} } & L1Cfg_DP[j];
+ else if ( j[1:0] == 2'b10 ) // PHYS_ADDR
+ assign L1Cfg_DO[j] = { {{64-ADDR_WIDTH_PHYS}{1'b0}},{ADDR_WIDTH_PHYS{1'b1}} } & L1Cfg_DP[j];
+ else // if ( j[1:0] == 2'b11 ) // FLAGS
+ assign L1Cfg_DO[j] = { {{64-N_FLAGS}{1'b0}},{N_FLAGS{1'b1}} } & L1Cfg_DP[j];
+ end
+ endgenerate
+ always_comb
+ begin
+ if ( araddr_reg[ADDR_LSB-1] == 1'b1 ) // read upper 32 bit, for debugging over 32-bit interface
+ rdata_reg = { {32'h00000000},{L1Cfg_DO[araddr_reg[ADDR_MSB:ADDR_LSB]][63:32]} };
+ else
+ rdata_reg = L1Cfg_DO[araddr_reg[ADDR_MSB:ADDR_LSB]];
+ end
+ assign s_axi_awready = awready;
+ assign s_axi_wready = wready;
+ assign s_axi_bresp = 2'b00;
+ assign s_axi_bvalid = bvalid;
+ assign s_axi_arready = arready;
+ assign s_axi_rresp = 2'b00;
+ assign s_axi_rvalid = rvalid;
+ // ██╗ ██████╗ ██████╗███████╗ ██████╗
+ // ██║ ╚════██╗ ██╔════╝██╔════╝██╔════╝
+ // ██║ █████╔╝ ██║ █████╗ ██║ ███╗
+ // ██║ ██╔═══╝ ██║ ██╔══╝ ██║ ██║
+ // ███████╗███████╗ ╚██████╗██║ ╚██████╔╝
+ // ╚══════╝╚══════╝ ╚═════╝╚═╝ ╚═════╝
+ //
+ logic [N_PORTS-1:0] l2_addr_is_in_va_rams;
+ logic [N_PORTS-1:0] upper_word_is_written;
+ logic [N_PORTS-1:0] lower_word_is_written;
+ generate
+ for( j=0; j< N_PORTS; j++)
+ begin
+ if (AXI_DATA_WIDTH == 64) begin
+ assign l2_addr_is_in_va_rams[j] = (awaddr_reg >= (j+1)*L2SINGLE_AMAP_SIZE) && (awaddr_reg[log2(L2SINGLE_AMAP_SIZE)-1:0] <= L2_VA_MAX_ADDR);
+ assign upper_word_is_written[j] = (wstrb_reg[7:4] != 4'b0000);
+ assign lower_word_is_written[j] = (wstrb_reg[3:0] != 4'b0000);
+ end else begin
+ assign l2_addr_is_in_va_rams[j] = 1'b0;
+ assign upper_word_is_written[j] = 1'b0;
+ assign lower_word_is_written[j] = 1'b0;
+ end
+ always @( posedge Clk_CI or negedge Rst_RBI ) begin
+ var integer idx_byte, off_byte;
+ if ( Rst_RBI == 1'b0 )
+ begin
+ wren_l2[j] <= 1'b0;
+ wdata_l2[j] <= '0;
+ end
+ else if (wren)
+ begin
+ if ( (awaddr_reg >= (j+1)*L2SINGLE_AMAP_SIZE) && (awaddr_reg < (j+2)*L2SINGLE_AMAP_SIZE) && (|wstrb_reg) )
+ wren_l2[j] <= 1'b1;
+ if (AXI_DATA_WIDTH == 32) begin
+ for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8; idx_byte++ )
+ wdata_l2[j][idx_byte*8 +: 8] <= wdata_reg[idx_byte] & {8{wstrb_reg[idx_byte]}};
+ end
+ else if (AXI_DATA_WIDTH == 64) begin
+ if (lower_word_is_written[j] == 1'b1)
+ off_byte = 0;
+ else
+ off_byte = 4;
+ // always put the payload in the lower word and set upper word to 0
+ for ( idx_byte = 0; idx_byte < AXI_DATA_WIDTH/8/2; idx_byte++ )
+ wdata_l2[j][idx_byte*8 +: 8] <= wdata_reg[idx_byte+off_byte] & {8{wstrb_reg[idx_byte+off_byte]}};
+ wdata_l2[j][AXI_DATA_WIDTH-1:AXI_DATA_WIDTH/2] <= 'b0;
+ end
+ // pragma translate_off
+ else
+ $fatal(1, "Unsupported AXI_DATA_WIDTH!");
+ // pragma translate_on
+ end
+ else
+ wren_l2[j] <= '0;
+ end // always @ ( posedge Clk_CI or negedge Rst_RBI )
+ // Properly align the 32-bit word address when writing from 64-bit interface:
+ // Depending on the system, the incoming address is (non-)aligned to the 64-bit
+ // word when writing the upper 32-bit word.
+ always_comb begin
+ waddr_l2[j] = (awaddr_reg -(j+1)*L2SINGLE_AMAP_SIZE)/4;
+ if (wren_l2[j]) begin
+ if (AXI_DATA_WIDTH == 64) begin
+ if (upper_word_is_written[j] == 1'b1) begin
+ // address must be non-aligned
+ waddr_l2[j][0] = 1'b1;
+ end
+ end
+ // pragma translate_off
+ else if (AXI_DATA_WIDTH != 32) begin
+ $fatal(1, "Unsupported AXI_DATA_WIDTH!");
+ end
+ // pragma translate_on
+ end
+ end
+ // Assert that only one 32-bit word is ever written at a time to VA RAMs on 64-bit data
+ // systems.
+ // pragma translate_off
+ always_ff @ (posedge Clk_CI) begin
+ if (AXI_DATA_WIDTH == 64) begin
+ if (l2_addr_is_in_va_rams[j]) begin
+ if (upper_word_is_written[j]) begin
+ assert (!lower_word_is_written[j])
+ else $error("Unsupported write across two 32-bit words to VA RAMs!");
+ end
+ else if (lower_word_is_written[j]) begin
+ assert (!upper_word_is_written[j])
+ else $error("Unsupported write across two 32-bit words to VA RAMs!");
+ end
+ end
+ end
+ end
+ // pragma translate_on
+ end // for (j=0; j< N_PORTS; j++)
+ endgenerate
+ // ███╗ ███╗██╗ ██╗ ███████╗██╗███████╗ ██████╗ ███████╗
+ // ████╗ ████║██║ ██║ ██╔════╝██║██╔════╝██╔═══██╗██╔════╝
+ // ██╔████╔██║███████║ █████╗ ██║█████╗ ██║ ██║███████╗
+ // ██║╚██╔╝██║██╔══██║ ██╔══╝ ██║██╔══╝ ██║ ██║╚════██║
+ // ██║ ╚═╝ ██║██║ ██║ ██║ ██║██║ ╚██████╔╝███████║
+ // ╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝ ╚═╝╚═╝ ╚═════╝ ╚══════╝
+ //
+ logic [ADDR_WIDTH_VIRT-1:0] AddrFifoDin_D;
+ logic AddrFifoWen_S;
+ logic AddrFifoRen_S;
+ logic [ADDR_WIDTH_VIRT-1:0] AddrFifoDout_D;
+ logic AddrFifoFull_S;
+ logic AddrFifoEmpty_S;
+ logic AddrFifoEmpty_SB;
+ logic AddrFifoFull_SB;
+ logic [MISS_META_WIDTH-1:0] MetaFifoDin_D;
+ logic MetaFifoWen_S;
+ logic MetaFifoRen_S;
+ logic [MISS_META_WIDTH-1:0] MetaFifoDout_D;
+ logic MetaFifoFull_S;
+ logic MetaFifoEmpty_S;
+ logic MetaFifoEmpty_SB;
+ logic MetaFifoFull_SB;
+ logic FifosDisabled_S;
+ logic ConfRegWen_S;
+ logic [1:0] ConfReg_DN;
+ logic [1:0] ConfReg_DP;
+ logic [AXI_DATA_WIDTH-1:0] wdata_reg_vec;
+ assign FifosDisabled_S = ConfReg_DP[0];
+ assign L1AllowMultiHit_SO = ConfReg_DP[1];
+ assign AddrFifoEmpty_S = ~AddrFifoEmpty_SB;
+ assign MetaFifoEmpty_S = ~MetaFifoEmpty_SB;
+ assign AddrFifoFull_S = ~AddrFifoFull_SB;
+ assign MetaFifoFull_S = ~MetaFifoFull_SB;
+ assign MhFifoFull_SO = (AddrFifoWen_S & AddrFifoFull_S) | (MetaFifoWen_S & MetaFifoFull_S);
+ generate
+ for ( j=0; j<AXI_DATA_WIDTH/8; j++ )
+ assign wdata_reg_vec[(j+1)*8-1:j*8] = wdata_reg[j];
+ endgenerate
+ // write address FIFO
+ always_comb
+ begin
+ AddrFifoWen_S = 1'b0;
+ AddrFifoDin_D = 'b0;
+ if ( (Miss_SI == 1'b1) && (FifosDisabled_S == 1'b0) ) // register a new miss
+ begin
+ AddrFifoWen_S = 1'b1;
+ AddrFifoDin_D = MissAddr_DI;
+ end
+ else if ( (wren_l1 == 1'b1) && (awaddr_reg[ADDR_MSB:0] == 'b0) && (FifosDisabled_S == 1'b0)) // write request from AXI interface
+ begin
+ AddrFifoWen_S = 1'b1;
+ AddrFifoDin_D = wdata_reg_vec[ADDR_WIDTH_VIRT-1:0];
+ end
+ end
+ // write meta FIFO
+ always_comb
+ begin
+ MetaFifoWen_S = 1'b0;
+ MetaFifoDin_D = 'b0;
+ if ( (Miss_SI == 1'b1) && (FifosDisabled_S == 1'b0) ) // register a new miss
+ begin
+ MetaFifoWen_S = 1'b1;
+ MetaFifoDin_D[MISS_META_WIDTH-1:0] = MissMeta_DI;
+ end
+ else if ( (wren_l1 == 1'b1) && (awaddr_reg[ADDR_MSB:0] == 4'h8) && (FifosDisabled_S == 1'b0) ) // write request from AXI interface
+ begin
+ MetaFifoWen_S = 1'b1;
+ MetaFifoDin_D = wdata_reg_vec[MISS_META_WIDTH-1:0];
+ end
+ end
+ // write configuration register
+ always_comb
+ begin
+ ConfRegWen_S = 1'b0;
+ ConfReg_DN = 1'b0;
+ if ( (wren_l1 == 1'b1) && (awaddr_reg[ADDR_MSB:0] == 8'h10) ) // write request from AXI interface
+ begin
+ ConfRegWen_S = 1'b1;
+ ConfReg_DN = wdata_reg_vec[$high(ConfReg_DN):0];
+ end
+ end
+ // AXI read data
+ always_comb
+ begin
+ s_axi_rdata = rdata_reg; // read L1 config
+ AddrFifoRen_S = 1'b0;
+ MetaFifoRen_S = 1'b0;
+ if ( rvalid == 1'b1 )
+ begin
+ // read address FIFO
+ if ( araddr_reg[ADDR_MSB:0] == 'b0 )
+ begin
+ s_axi_rdata = {AXI_DATA_WIDTH{1'b0}};
+ s_axi_rdata[ADDR_WIDTH_VIRT-1:0] = AddrFifoDout_D;
+ if ( AddrFifoEmpty_S == 1'b0 )
+ AddrFifoRen_S = 1'b1;
+ end
+ // read meta FIFO
+ else if ( araddr_reg[ADDR_MSB:0] == 4'h8 )
+ begin
+ s_axi_rdata = {AXI_DATA_WIDTH{1'b0}};
+ s_axi_rdata[31] = MetaFifoEmpty_S;
+ s_axi_rdata[MISS_META_WIDTH-1:0] = MetaFifoDout_D;
+ if ( MetaFifoEmpty_S == 1'b0 )
+ MetaFifoRen_S = 1'b1;
+ end
+ // read configuration register
+ else if ( araddr_reg[ADDR_MSB:0] == 8'h10 )
+ begin
+ s_axi_rdata = {AXI_DATA_WIDTH{1'b0}};
+ s_axi_rdata[$high(ConfReg_DP):0] = ConfReg_DP;
+ end
+ end // if ( rvalid == 1'b1 )
+ end // always_comb begin
+ // configuration register
+ always_ff @(posedge Clk_CI or negedge Rst_RBI) begin
+ if (Rst_RBI == 1'b0)
+ begin
+ ConfReg_DP <= 'b0;
+ end
+ else if (ConfRegWen_S == 1'b1)
+ begin
+ ConfReg_DP <= ConfReg_DN;
+ end
+ end
+ generic_fifo
+ #(
+ )
+ fifo_addr_i
+ (
+ .clk ( Clk_CI ),
+ .rst_n ( Rst_RBI ),
+ .data_i ( AddrFifoDin_D ),
+ .valid_i ( AddrFifoWen_S & AddrFifoFull_SB ),
+ .grant_o ( AddrFifoFull_SB ),
+ .data_o ( AddrFifoDout_D ),
+ .valid_o ( AddrFifoEmpty_SB ),
+ .grant_i ( AddrFifoRen_S ),
+ .test_mode_i ( 1'b0 )
+ );
+ generic_fifo
+ #(
+ )
+ fifo_meta_i
+ (
+ .clk ( Clk_CI ),
+ .rst_n ( Rst_RBI ),
+ .data_i ( MetaFifoDin_D ),
+ .valid_i ( MetaFifoWen_S & MetaFifoFull_SB ),
+ .grant_o ( MetaFifoFull_SB ),
+ .data_o ( MetaFifoDout_D ),
+ .valid_o ( MetaFifoEmpty_SB ),
+ .grant_i ( MetaFifoRen_S ),
+ .test_mode_i ( 1'b0 )
+ );
+# endmodule
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class axi_rab_top(Elaboratable):
+ def __init__(self):
+ self.Clk_CI = Signal() # input
+ self.NonGatedClk_CI = Signal() # input
+ self.Rst_RBI = Signal() # input
+ self.s_axi4_awid = Signal() # input
+ self.s_axi4_awaddr = Signal() # input
+ self.s_axi4_awvalid = Signal(N_PORTS) # input
+ self.s_axi4_awready = Signal(N_PORTS) # output
+ self.s_axi4_awlen = Signal() # input
+ self.s_axi4_awsize = Signal() # input
+ self.s_axi4_awburst = Signal() # input
+ self.s_axi4_awlock = Signal(N_PORTS) # input
+ self.s_axi4_awprot = Signal() # input
+ self.s_axi4_awcache = Signal() # input
+ self.s_axi4_awregion = Signal() # input
+ self.s_axi4_awqos = Signal() # input
+ self.s_axi4_awuser = Signal() # input
+ self.s_axi4_wdata = Signal() # input
+ self.s_axi4_wvalid = Signal(N_PORTS) # input
+ self.s_axi4_wready = Signal(N_PORTS) # output
+ self.s_axi4_wstrb = Signal() # input
+ self.s_axi4_wlast = Signal(N_PORTS) # input
+ self.s_axi4_wuser = Signal() # input
+ self.s_axi4_bid = Signal() # output
+ self.s_axi4_bresp = Signal() # output
+ self.s_axi4_bvalid = Signal(N_PORTS) # output
+ self.s_axi4_buser = Signal() # output
+ self.s_axi4_bready = Signal(N_PORTS) # input
+ self.s_axi4_arid = Signal() # input
+ self.s_axi4_araddr = Signal() # input
+ self.s_axi4_arvalid = Signal(N_PORTS) # input
+ self.s_axi4_arready = Signal(N_PORTS) # output
+ self.s_axi4_arlen = Signal() # input
+ self.s_axi4_arsize = Signal() # input
+ self.s_axi4_arburst = Signal() # input
+ self.s_axi4_arlock = Signal(N_PORTS) # input
+ self.s_axi4_arprot = Signal() # input
+ self.s_axi4_arcache = Signal() # input
+ self.s_axi4_aruser = Signal() # input
+ self.s_axi4_rid = Signal() # output
+ self.s_axi4_rdata = Signal() # output
+ self.s_axi4_rresp = Signal() # output
+ self.s_axi4_rvalid = Signal(N_PORTS) # output
+ self.s_axi4_rready = Signal(N_PORTS) # input
+ self.s_axi4_rlast = Signal(N_PORTS) # output
+ self.s_axi4_ruser = Signal() # output
+ self.m0_axi4_awid = Signal() # output
+ self.m0_axi4_awaddr = Signal() # output
+ self.m0_axi4_awvalid = Signal(N_PORTS) # output
+ self.m0_axi4_awready = Signal(N_PORTS) # input
+ self.m0_axi4_awlen = Signal() # output
+ self.m0_axi4_awsize = Signal() # output
+ self.m0_axi4_awburst = Signal() # output
+ self.m0_axi4_awlock = Signal(N_PORTS) # output
+ self.m0_axi4_awprot = Signal() # output
+ self.m0_axi4_awcache = Signal() # output
+ self.m0_axi4_awregion = Signal() # output
+ self.m0_axi4_awqos = Signal() # output
+ self.m0_axi4_awuser = Signal() # output
+ self.m0_axi4_wdata = Signal() # output
+ self.m0_axi4_wvalid = Signal(N_PORTS) # output
+ self.m0_axi4_wready = Signal(N_PORTS) # input
+ self.m0_axi4_wstrb = Signal() # output
+ self.m0_axi4_wlast = Signal(N_PORTS) # output
+ self.m0_axi4_wuser = Signal() # output
+ self.m0_axi4_bid = Signal() # input
+ self.m0_axi4_bresp = Signal() # input
+ self.m0_axi4_bvalid = Signal(N_PORTS) # input
+ self.m0_axi4_buser = Signal() # input
+ self.m0_axi4_bready = Signal(N_PORTS) # output
+ self.m0_axi4_arid = Signal() # output
+ self.m0_axi4_araddr = Signal() # output
+ self.m0_axi4_arvalid = Signal(N_PORTS) # output
+ self.m0_axi4_arready = Signal(N_PORTS) # input
+ self.m0_axi4_arlen = Signal() # output
+ self.m0_axi4_arsize = Signal() # output
+ self.m0_axi4_arburst = Signal() # output
+ self.m0_axi4_arlock = Signal(N_PORTS) # output
+ self.m0_axi4_arprot = Signal() # output
+ self.m0_axi4_arcache = Signal() # output
+ self.m0_axi4_aruser = Signal() # output
+ self.m0_axi4_rid = Signal() # input
+ self.m0_axi4_rdata = Signal() # input
+ self.m0_axi4_rresp = Signal() # input
+ self.m0_axi4_rvalid = Signal(N_PORTS) # input
+ self.m0_axi4_rready = Signal(N_PORTS) # output
+ self.m0_axi4_rlast = Signal(N_PORTS) # input
+ self.m0_axi4_ruser = Signal() # input
+ self.m1_axi4_awid = Signal() # output
+ self.m1_axi4_awaddr = Signal() # output
+ self.m1_axi4_awvalid = Signal(N_PORTS) # output
+ self.m1_axi4_awready = Signal(N_PORTS) # input
+ self.m1_axi4_awlen = Signal() # output
+ self.m1_axi4_awsize = Signal() # output
+ self.m1_axi4_awburst = Signal() # output
+ self.m1_axi4_awlock = Signal(N_PORTS) # output
+ self.m1_axi4_awprot = Signal() # output
+ self.m1_axi4_awcache = Signal() # output
+ self.m1_axi4_awregion = Signal() # output
+ self.m1_axi4_awqos = Signal() # output
+ self.m1_axi4_awuser = Signal() # output
+ self.m1_axi4_wdata = Signal() # output
+ self.m1_axi4_wvalid = Signal(N_PORTS) # output
+ self.m1_axi4_wready = Signal(N_PORTS) # input
+ self.m1_axi4_wstrb = Signal() # output
+ self.m1_axi4_wlast = Signal(N_PORTS) # output
+ self.m1_axi4_wuser = Signal() # output
+ self.m1_axi4_bid = Signal() # input
+ self.m1_axi4_bresp = Signal() # input
+ self.m1_axi4_bvalid = Signal(N_PORTS) # input
+ self.m1_axi4_buser = Signal() # input
+ self.m1_axi4_bready = Signal(N_PORTS) # output
+ self.m1_axi4_arid = Signal() # output
+ self.m1_axi4_araddr = Signal() # output
+ self.m1_axi4_arvalid = Signal(N_PORTS) # output
+ self.m1_axi4_arready = Signal(N_PORTS) # input
+ self.m1_axi4_arlen = Signal() # output
+ self.m1_axi4_arsize = Signal() # output
+ self.m1_axi4_arburst = Signal() # output
+ self.m1_axi4_arlock = Signal(N_PORTS) # output
+ self.m1_axi4_arprot = Signal() # output
+ self.m1_axi4_arcache = Signal() # output
+ self.m1_axi4_aruser = Signal() # output
+ self.m1_axi4_rid = Signal() # input
+ self.m1_axi4_rdata = Signal() # input
+ self.m1_axi4_rresp = Signal() # input
+ self.m1_axi4_rvalid = Signal(N_PORTS) # input
+ self.m1_axi4_rready = Signal(N_PORTS) # output
+ self.m1_axi4_rlast = Signal(N_PORTS) # input
+ self.m1_axi4_ruser = Signal() # input
+ self.s_axi4lite_awaddr = Signal(AXI_LITE_ADDR_WIDTH) # input
+ self.s_axi4lite_awvalid = Signal() # input
+ self.s_axi4lite_awready = Signal() # output
+ self.s_axi4lite_wdata = Signal(AXI_LITE_DATA_WIDTH) # input
+ self.s_axi4lite_wvalid = Signal() # input
+ self.s_axi4lite_wready = Signal() # output
+ self.s_axi4lite_wstrb = Signal(1+ERROR p_expression_25) # input
+ self.s_axi4lite_bresp = Signal(2) # output
+ self.s_axi4lite_bvalid = Signal() # output
+ self.s_axi4lite_bready = Signal() # input
+ self.s_axi4lite_araddr = Signal(AXI_LITE_ADDR_WIDTH) # input
+ self.s_axi4lite_arvalid = Signal() # input
+ self.s_axi4lite_arready = Signal() # output
+ self.s_axi4lite_rdata = Signal(AXI_LITE_DATA_WIDTH) # output
+ self.s_axi4lite_rresp = Signal(2) # output
+ self.s_axi4lite_rvalid = Signal() # output
+ self.s_axi4lite_rready = Signal() # input
+ self.int_miss = Signal(N_PORTS) # output
+ self.int_multi = Signal(N_PORTS) # output
+ self.int_prot = Signal(N_PORTS) # output
+ self.int_mhf_full = Signal() # output
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# // --=========================================================================--
+# //
+# // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ████████╗ ██████╗ ██████╗
+# // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ╚══██╔══╝██╔═══██╗██╔══██╗
+# // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ ██║ ██║██████╔╝
+# // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██║ ██║██╔═══╝
+# // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ██║ ╚██████╔╝██║
+# // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═════╝ ╚═╝
+# //
+# // --=========================================================================--
+# /*
+# * axi_rab_top
+# *
+# * The remapping address block (RAB) performs address translation for AXI
+# * transactions arriving at the input port and forwards them to different
+# * downstream AXI ports.
+# *
+# * The five axi channels are each buffered on the input side using a FIFO,
+# * described in axi4_XX_buffer. The RAB lookup result is merged into the
+# * AXI transaction via the axi4_XX_sender instances, which manages upstream
+# * error signaling for failed lookups.
+# *
+# * Address translation is performed based on data stored in up to two
+# * translation lookaside buffers (TLBs), which are private per RAB port (each
+# * of which having two AXI master ports and one AXI slave port). These TLBs
+# * are managed in software through the AXI-Lite interface.
+# *
+# * If ACP is enabled, the `cache_coherent` flag in the TLBs is used to
+# * multiplex between the two ports. If ACP is disabled, only the first master
+# * port is used. In this case, the `cache_coherent` flag is used to set the
+# * AxCACHE signals of the AXI bus accordingly.
+# *
+# * Authors:
+# * Antonio Pullini <pullinia@iis.ee.ethz.ch>
+# * Conrad Burchert <bconrad@ethz.ch>
+# * Maheshwara Sharma <msharma@student.ethz.ch>
+# * Andreas Kurth <akurth@iis.ee.ethz.ch>
+# * Johannes Weinbuch <jweinbuch@student.ethz.ch>
+# * Pirmin Vogel <vogelpi@iis.ee.ethz.ch>
+# */
+# //`include "pulp_soc_defines.sv"
+# ////import CfMath::log2;
+# module axi_rab_top
+# // Parameters {{{
+# #(
+# parameter N_PORTS = 2,
+# parameter N_L2_SETS = 32,
+# parameter N_L2_SET_ENTRIES = 32,
+# parameter AXI_DATA_WIDTH = 64,
+# parameter AXI_S_ADDR_WIDTH = 32,
+# parameter AXI_M_ADDR_WIDTH = 40,
+# parameter AXI_LITE_DATA_WIDTH = 64,
+# parameter AXI_LITE_ADDR_WIDTH = 32,
+# parameter AXI_ID_WIDTH = 10,
+# parameter AXI_USER_WIDTH = 6,
+# parameter MH_FIFO_DEPTH = 16
+# )
+# // }}}
+# // Ports {{{
+# (
+# input logic Clk_CI, // This clock may be gated.
+# input logic NonGatedClk_CI,
+# input logic Rst_RBI,
+# // For every slave port there are two master ports. The master
+# // port to use can be set using the master_select flag of the protection
+# // bits of a slice
+# // AXI4 Slave {{{
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_awid,
+# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] s_axi4_awaddr,
+# input logic [N_PORTS-1:0] s_axi4_awvalid,
+# output logic [N_PORTS-1:0] s_axi4_awready,
+# input logic [N_PORTS-1:0] [7:0] s_axi4_awlen,
+# input logic [N_PORTS-1:0] [2:0] s_axi4_awsize,
+# input logic [N_PORTS-1:0] [1:0] s_axi4_awburst,
+# input logic [N_PORTS-1:0] s_axi4_awlock,
+# input logic [N_PORTS-1:0] [2:0] s_axi4_awprot,
+# input logic [N_PORTS-1:0] [3:0] s_axi4_awcache,
+# input logic [N_PORTS-1:0] [3:0] s_axi4_awregion,
+# input logic [N_PORTS-1:0] [3:0] s_axi4_awqos,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_awuser,
+# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] s_axi4_wdata,
+# input logic [N_PORTS-1:0] s_axi4_wvalid,
+# output logic [N_PORTS-1:0] s_axi4_wready,
+# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] s_axi4_wstrb,
+# input logic [N_PORTS-1:0] s_axi4_wlast,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_wuser,
+# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_bid,
+# output logic [N_PORTS-1:0] [1:0] s_axi4_bresp,
+# output logic [N_PORTS-1:0] s_axi4_bvalid,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_buser,
+# input logic [N_PORTS-1:0] s_axi4_bready,
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_arid,
+# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] s_axi4_araddr,
+# input logic [N_PORTS-1:0] s_axi4_arvalid,
+# output logic [N_PORTS-1:0] s_axi4_arready,
+# input logic [N_PORTS-1:0] [7:0] s_axi4_arlen,
+# input logic [N_PORTS-1:0] [2:0] s_axi4_arsize,
+# input logic [N_PORTS-1:0] [1:0] s_axi4_arburst,
+# input logic [N_PORTS-1:0] s_axi4_arlock,
+# input logic [N_PORTS-1:0] [2:0] s_axi4_arprot,
+# input logic [N_PORTS-1:0] [3:0] s_axi4_arcache,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_aruser,
+# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] s_axi4_rid,
+# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] s_axi4_rdata,
+# output logic [N_PORTS-1:0] [1:0] s_axi4_rresp,
+# output logic [N_PORTS-1:0] s_axi4_rvalid,
+# input logic [N_PORTS-1:0] s_axi4_rready,
+# output logic [N_PORTS-1:0] s_axi4_rlast,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] s_axi4_ruser,
+# // }}}
+# // AXI4 Master 0 {{{
+# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_awid,
+# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m0_axi4_awaddr,
+# output logic [N_PORTS-1:0] m0_axi4_awvalid,
+# input logic [N_PORTS-1:0] m0_axi4_awready,
+# output logic [N_PORTS-1:0] [7:0] m0_axi4_awlen,
+# output logic [N_PORTS-1:0] [2:0] m0_axi4_awsize,
+# output logic [N_PORTS-1:0] [1:0] m0_axi4_awburst,
+# output logic [N_PORTS-1:0] m0_axi4_awlock,
+# output logic [N_PORTS-1:0] [2:0] m0_axi4_awprot,
+# output logic [N_PORTS-1:0] [3:0] m0_axi4_awcache,
+# output logic [N_PORTS-1:0] [3:0] m0_axi4_awregion,
+# output logic [N_PORTS-1:0] [3:0] m0_axi4_awqos,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_awuser,
+# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m0_axi4_wdata,
+# output logic [N_PORTS-1:0] m0_axi4_wvalid,
+# input logic [N_PORTS-1:0] m0_axi4_wready,
+# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] m0_axi4_wstrb,
+# output logic [N_PORTS-1:0] m0_axi4_wlast,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_wuser,
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_bid,
+# input logic [N_PORTS-1:0] [1:0] m0_axi4_bresp,
+# input logic [N_PORTS-1:0] m0_axi4_bvalid,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_buser,
+# output logic [N_PORTS-1:0] m0_axi4_bready,
+# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_arid,
+# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m0_axi4_araddr,
+# output logic [N_PORTS-1:0] m0_axi4_arvalid,
+# input logic [N_PORTS-1:0] m0_axi4_arready,
+# output logic [N_PORTS-1:0] [7:0] m0_axi4_arlen,
+# output logic [N_PORTS-1:0] [2:0] m0_axi4_arsize,
+# output logic [N_PORTS-1:0] [1:0] m0_axi4_arburst,
+# output logic [N_PORTS-1:0] m0_axi4_arlock,
+# output logic [N_PORTS-1:0] [2:0] m0_axi4_arprot,
+# output logic [N_PORTS-1:0] [3:0] m0_axi4_arcache,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_aruser,
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m0_axi4_rid,
+# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m0_axi4_rdata,
+# input logic [N_PORTS-1:0] [1:0] m0_axi4_rresp,
+# input logic [N_PORTS-1:0] m0_axi4_rvalid,
+# output logic [N_PORTS-1:0] m0_axi4_rready,
+# input logic [N_PORTS-1:0] m0_axi4_rlast,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m0_axi4_ruser,
+# // }}}
+# // AXI4 Master 1 {{{
+# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_awid,
+# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m1_axi4_awaddr,
+# output logic [N_PORTS-1:0] m1_axi4_awvalid,
+# input logic [N_PORTS-1:0] m1_axi4_awready,
+# output logic [N_PORTS-1:0] [7:0] m1_axi4_awlen,
+# output logic [N_PORTS-1:0] [2:0] m1_axi4_awsize,
+# output logic [N_PORTS-1:0] [1:0] m1_axi4_awburst,
+# output logic [N_PORTS-1:0] m1_axi4_awlock,
+# output logic [N_PORTS-1:0] [2:0] m1_axi4_awprot,
+# output logic [N_PORTS-1:0] [3:0] m1_axi4_awcache,
+# output logic [N_PORTS-1:0] [3:0] m1_axi4_awregion,
+# output logic [N_PORTS-1:0] [3:0] m1_axi4_awqos,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_awuser,
+# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m1_axi4_wdata,
+# output logic [N_PORTS-1:0] m1_axi4_wvalid,
+# input logic [N_PORTS-1:0] m1_axi4_wready,
+# output logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] m1_axi4_wstrb,
+# output logic [N_PORTS-1:0] m1_axi4_wlast,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_wuser,
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_bid,
+# input logic [N_PORTS-1:0] [1:0] m1_axi4_bresp,
+# input logic [N_PORTS-1:0] m1_axi4_bvalid,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_buser,
+# output logic [N_PORTS-1:0] m1_axi4_bready,
+# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_arid,
+# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] m1_axi4_araddr,
+# output logic [N_PORTS-1:0] m1_axi4_arvalid,
+# input logic [N_PORTS-1:0] m1_axi4_arready,
+# output logic [N_PORTS-1:0] [7:0] m1_axi4_arlen,
+# output logic [N_PORTS-1:0] [2:0] m1_axi4_arsize,
+# output logic [N_PORTS-1:0] [1:0] m1_axi4_arburst,
+# output logic [N_PORTS-1:0] m1_axi4_arlock,
+# output logic [N_PORTS-1:0] [2:0] m1_axi4_arprot,
+# output logic [N_PORTS-1:0] [3:0] m1_axi4_arcache,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_aruser,
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] m1_axi4_rid,
+# input logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] m1_axi4_rdata,
+# input logic [N_PORTS-1:0] [1:0] m1_axi4_rresp,
+# input logic [N_PORTS-1:0] m1_axi4_rvalid,
+# output logic [N_PORTS-1:0] m1_axi4_rready,
+# input logic [N_PORTS-1:0] m1_axi4_rlast,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] m1_axi4_ruser,
+# // }}}
+# // AXI 4 Lite Slave (Configuration Interface) {{{
+# // AXI4-Lite port to setup the rab slices
+# // use this to program the configuration registers
+# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi4lite_awaddr,
+# input logic s_axi4lite_awvalid,
+# output logic s_axi4lite_awready,
+# input logic [AXI_LITE_DATA_WIDTH-1:0] s_axi4lite_wdata,
+# input logic s_axi4lite_wvalid,
+# output logic s_axi4lite_wready,
+# input logic [AXI_LITE_DATA_WIDTH/8-1:0] s_axi4lite_wstrb,
+# output logic [1:0] s_axi4lite_bresp,
+# output logic s_axi4lite_bvalid,
+# input logic s_axi4lite_bready,
+# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi4lite_araddr,
+# input logic s_axi4lite_arvalid,
+# output logic s_axi4lite_arready,
+# output logic [AXI_LITE_DATA_WIDTH-1:0] s_axi4lite_rdata,
+# output logic [1:0] s_axi4lite_rresp,
+# output logic s_axi4lite_rvalid,
+# input logic s_axi4lite_rready,
+# // }}}
+# // BRAMs {{{
+# //`ifdef RAB_AX_LOG_EN
+# // BramPort.Slave ArBram_PS,
+# // BramPort.Slave AwBram_PS,
+# //`endif
+# // }}}
+# // Logger Control {{{
+# //`ifdef RAB_AX_LOG_EN
+# // input logic LogEn_SI,
+# // input logic ArLogClr_SI,
+# // input logic AwLogClr_SI,
+# // output logic ArLogRdy_SO,
+# // output logic AwLogRdy_SO,
+# //`endif
+# // }}}
+# // Interrupt Outputs {{{
+# // Interrupt lines to handle misses, collisions of slices/multiple hits,
+# // protection faults and overflow of the miss handling fifo
+# //`ifdef RAB_AX_LOG_EN
+# // output logic int_ar_log_full,
+# // output logic int_aw_log_full,
+# //`endif
+# output logic [N_PORTS-1:0] int_miss,
+# output logic [N_PORTS-1:0] int_multi,
+# output logic [N_PORTS-1:0] int_prot,
+# output logic int_mhf_full
+# // }}}
+# );
+ // }}}
+ // Signals {{{
+ // ███████╗██╗ ██████╗ ███╗ ██╗ █████╗ ██╗ ███████╗
+ // ██╔════╝██║██╔════╝ ████╗ ██║██╔══██╗██║ ██╔════╝
+ // ███████╗██║██║ ███╗██╔██╗ ██║███████║██║ ███████╗
+ // ╚════██║██║██║ ██║██║╚██╗██║██╔══██║██║ ╚════██║
+ // ███████║██║╚██████╔╝██║ ╚████║██║ ██║███████╗███████║
+ // ╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚══════╝
+ //
+ // Internal AXI4 lines, these connect buffers on the slave side to the rab core and
+ // multiplexers which switch between the two master outputs
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_awid;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_awaddr;
+ logic [N_PORTS-1:0] int_awvalid;
+ logic [N_PORTS-1:0] int_awready;
+ logic [N_PORTS-1:0] [7:0] int_awlen;
+ logic [N_PORTS-1:0] [2:0] int_awsize;
+ logic [N_PORTS-1:0] [1:0] int_awburst;
+ logic [N_PORTS-1:0] int_awlock;
+ logic [N_PORTS-1:0] [2:0] int_awprot;
+ logic [N_PORTS-1:0] [3:0] int_awcache;
+ logic [N_PORTS-1:0] [3:0] int_awregion;
+ logic [N_PORTS-1:0] [3:0] int_awqos;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_awuser;
+ logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_wdata;
+ logic [N_PORTS-1:0] int_wvalid;
+ logic [N_PORTS-1:0] int_wready;
+ logic [N_PORTS-1:0] [AXI_DATA_WIDTH/8-1:0] int_wstrb;
+ logic [N_PORTS-1:0] int_wlast;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_wuser;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_bid;
+ logic [N_PORTS-1:0] [1:0] int_bresp;
+ logic [N_PORTS-1:0] int_bvalid;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_buser;
+ logic [N_PORTS-1:0] int_bready;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_arid;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_araddr;
+ logic [N_PORTS-1:0] int_arvalid;
+ logic [N_PORTS-1:0] int_arready;
+ logic [N_PORTS-1:0] [7:0] int_arlen;
+ logic [N_PORTS-1:0] [2:0] int_arsize;
+ logic [N_PORTS-1:0] [1:0] int_arburst;
+ logic [N_PORTS-1:0] int_arlock;
+ logic [N_PORTS-1:0] [2:0] int_arprot;
+ logic [N_PORTS-1:0] [3:0] int_arcache;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_aruser;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_rid;
+ logic [N_PORTS-1:0] [1:0] int_rresp;
+ logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_rdata;
+ logic [N_PORTS-1:0] int_rlast;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_ruser;
+ logic [N_PORTS-1:0] int_rvalid;
+ logic [N_PORTS-1:0] int_rready;
+ // rab_core outputs
+ logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] int_wtrans_addr;
+ logic [N_PORTS-1:0] int_wtrans_accept;
+ logic [N_PORTS-1:0] int_wtrans_drop;
+ logic [N_PORTS-1:0] int_wtrans_miss;
+ logic [N_PORTS-1:0] int_wtrans_sent;
+ logic [N_PORTS-1:0] int_wtrans_cache_coherent;
+ logic [N_PORTS-1:0] int_wmaster_select;
+ logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] int_rtrans_addr;
+ logic [N_PORTS-1:0] int_rtrans_accept;
+ logic [N_PORTS-1:0] int_rtrans_drop;
+ logic [N_PORTS-1:0] int_rtrans_miss;
+ logic [N_PORTS-1:0] int_rtrans_sent;
+ logic [N_PORTS-1:0] int_rtrans_cache_coherent;
+ logic [N_PORTS-1:0] int_rmaster_select;
+ logic [N_PORTS-1:0] w_master_select;
+ // Internal master0 AXI4 lines. These connect the first master port to the
+ // multiplexers
+ // For channels read address, write address and write data the other lines
+ // are ignored if valid is not set, therefore we only need to multiplex those
+ logic [N_PORTS-1:0] int_m0_awvalid;
+ logic [N_PORTS-1:0] int_m0_awready;
+ logic [N_PORTS-1:0] int_m0_wvalid;
+ logic [N_PORTS-1:0] int_m0_wready;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m0_bid;
+ logic [N_PORTS-1:0] [1:0] int_m0_bresp;
+ logic [N_PORTS-1:0] int_m0_bvalid;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m0_buser;
+ logic [N_PORTS-1:0] int_m0_bready;
+ logic [N_PORTS-1:0] int_m0_arvalid;
+ logic [N_PORTS-1:0] int_m0_arready;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m0_rid;
+ logic [N_PORTS-1:0] [1:0] int_m0_rresp;
+ logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_m0_rdata;
+ logic [N_PORTS-1:0] int_m0_rlast;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m0_ruser;
+ logic [N_PORTS-1:0] int_m0_rready;
+ logic [N_PORTS-1:0] int_m0_rvalid;
+ logic [N_PORTS-1:0] l1_m0_ar_accept;
+ logic [N_PORTS-1:0] l1_m0_ar_drop;
+ logic [N_PORTS-1:0] l1_m0_ar_save;
+ logic [N_PORTS-1:0] l1_m0_ar_done;
+ logic [N_PORTS-1:0] l2_m0_ar_accept;
+ logic [N_PORTS-1:0] l2_m0_ar_drop;
+ logic [N_PORTS-1:0] l2_m0_ar_done;
+ logic [N_PORTS-1:0] l2_m0_ar_sending;
+ logic [N_PORTS-1:0] l1_m0_aw_accept;
+ logic [N_PORTS-1:0] l1_m0_aw_drop;
+ logic [N_PORTS-1:0] l1_m0_aw_save;
+ logic [N_PORTS-1:0] l1_m0_aw_done;
+ logic [N_PORTS-1:0] l2_m0_aw_accept;
+ logic [N_PORTS-1:0] l2_m0_aw_drop;
+ logic [N_PORTS-1:0] l2_m0_aw_done;
+ logic [N_PORTS-1:0] l2_m0_aw_sending;
+ // Internal master1 AXI4 lines. These connect the second master port to the
+ // multiplexers
+ // For channels read address, write address and write data the other lines
+ // are ignored if valid is not set, therefore we only need to multiplex those
+ logic [N_PORTS-1:0] int_m1_awvalid;
+ logic [N_PORTS-1:0] int_m1_awready;
+ logic [N_PORTS-1:0] int_m1_wvalid;
+ logic [N_PORTS-1:0] int_m1_wready;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m1_bid;
+ logic [N_PORTS-1:0] [1:0] int_m1_bresp;
+ logic [N_PORTS-1:0] int_m1_bvalid;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m1_buser;
+ logic [N_PORTS-1:0] int_m1_bready;
+ logic [N_PORTS-1:0] int_m1_arvalid;
+ logic [N_PORTS-1:0] int_m1_arready;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_m1_rid;
+ logic [N_PORTS-1:0] [1:0] int_m1_rresp;
+ logic [N_PORTS-1:0] [AXI_DATA_WIDTH-1:0] int_m1_rdata;
+ logic [N_PORTS-1:0] int_m1_rlast;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_m1_ruser;
+ logic [N_PORTS-1:0] int_m1_rvalid;
+ logic [N_PORTS-1:0] int_m1_rready;
+ logic [N_PORTS-1:0] l1_m1_ar_accept;
+ logic [N_PORTS-1:0] l1_m1_ar_drop;
+ logic [N_PORTS-1:0] l1_m1_ar_save;
+ logic [N_PORTS-1:0] l1_m1_ar_done;
+ logic [N_PORTS-1:0] l2_m1_ar_accept;
+ logic [N_PORTS-1:0] l2_m1_ar_drop;
+ logic [N_PORTS-1:0] l2_m1_ar_done;
+ logic [N_PORTS-1:0] l1_m1_aw_accept;
+ logic [N_PORTS-1:0] l1_m1_aw_drop;
+ logic [N_PORTS-1:0] l1_m1_aw_save;
+ logic [N_PORTS-1:0] l1_m1_aw_done;
+ logic [N_PORTS-1:0] l2_m1_aw_accept;
+ logic [N_PORTS-1:0] l2_m1_aw_drop;
+ logic [N_PORTS-1:0] l2_m1_aw_done;
+ // L1 outputs
+ logic [N_PORTS-1:0] rab_miss; // L1 RAB miss
+ logic [N_PORTS-1:0] rab_prot;
+ logic [N_PORTS-1:0] rab_multi;
+ logic [N_PORTS-1:0] rab_prefetch;
+ //
+ // Signals used to support L2 TLB
+ //
+ // L2 RAM configuration signals
+ logic [N_PORTS-1:0] [AXI_LITE_DATA_WIDTH-1:0] L2CfgWData_D;
+ logic [N_PORTS-1:0] [AXI_LITE_ADDR_WIDTH-1:0] L2CfgWAddr_D;
+ logic [N_PORTS-1:0] L2CfgWE_S;
+ // L1 output and drop Buffer
+ logic [N_PORTS-1:0] L1OutRwType_D, L1DropRwType_DP;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L1OutUser_D, L1DropUser_DP;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L1OutId_D, L1DropId_DP;
+ logic [N_PORTS-1:0] [7:0] L1OutLen_D, L1DropLen_DP;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L1OutAddr_D, L1DropAddr_DP;
+ logic [N_PORTS-1:0] L1OutProt_D, L1DropProt_DP;
+ logic [N_PORTS-1:0] L1OutMulti_D, L1DropMulti_DP;
+ logic [N_PORTS-1:0] L1DropEn_S;
+ logic [N_PORTS-1:0] L1DropPrefetch_S;
+ logic [N_PORTS-1:0] L1DropValid_SN, L1DropValid_SP;
+ // L2 input Buffer
+ logic [N_PORTS-1:0] L2InRwType_DP;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L2InUser_DP;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L2InId_DP;
+ logic [N_PORTS-1:0] [7:0] L2InLen_DP;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L2InAddr_DP;
+ logic [N_PORTS-1:0] L2InEn_S;
+ // L2 output Buffer
+ logic [N_PORTS-1:0] L2OutRwType_DP;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] L2OutUser_DP;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] L2OutId_DP;
+ logic [N_PORTS-1:0] [7:0] L2OutLen_DP;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] L2OutInAddr_DP;
+ logic [N_PORTS-1:0] L2OutHit_SN, L2OutHit_SP;
+ logic [N_PORTS-1:0] L2OutMiss_SN, L2OutMiss_SP;
+ logic [N_PORTS-1:0] L2OutProt_SN, L2OutProt_SP;
+ logic [N_PORTS-1:0] L2OutMulti_SN, L2OutMulti_SP;
+ logic [N_PORTS-1:0] L2OutCC_SN, L2OutCC_SP;
+ logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] L2OutAddr_DN, L2OutAddr_DP;
+ logic [N_PORTS-1:0] L2OutValid_SN, L2OutValid_SP;
+ logic [N_PORTS-1:0] L2OutPrefetch_S;
+ logic [N_PORTS-1:0] L2OutReady_S;
+ logic [N_PORTS-1:0] L2OutEn_S;
+ // L2 outputs
+ logic [N_PORTS-1:0] L2Busy_S;
+ logic [N_PORTS-1:0] L2OutValid_S;
+ logic [N_PORTS-1:0] L2Miss_S;
+ // Signals for interfacing the AXI modules
+ logic [N_PORTS-1:0] l1_ar_accept;
+ logic [N_PORTS-1:0] l1_aw_accept;
+ logic [N_PORTS-1:0] l1_w_accept;
+ logic [N_PORTS-1:0] l1_xw_accept;
+ logic [N_PORTS-1:0] l1_ar_drop;
+ logic [N_PORTS-1:0] l1_aw_drop;
+ logic [N_PORTS-1:0] l1_w_drop;
+ logic [N_PORTS-1:0] l1_xw_drop;
+ logic [N_PORTS-1:0] l1_ar_save;
+ logic [N_PORTS-1:0] l1_aw_save;
+ logic [N_PORTS-1:0] l1_w_save;
+ logic [N_PORTS-1:0] l1_xw_save;
+ logic [N_PORTS-1:0] l1_ar_done;
+ logic [N_PORTS-1:0] l1_r_done;
+ logic [N_PORTS-1:0] l1_r_drop;
+ logic [N_PORTS-1:0] lx_r_drop;
+ logic [N_PORTS-1:0] lx_r_done;
+ logic [N_PORTS-1:0] l1_aw_done;
+ logic [N_PORTS-1:0] l1_w_done;
+ logic [N_PORTS-1:0] l1_xw_done;
+ logic [N_PORTS-1:0] l1_aw_done_SP;
+ logic [N_PORTS-1:0] l1_w_done_SP;
+ logic [N_PORTS-1:0] l2_ar_accept;
+ logic [N_PORTS-1:0] l2_aw_accept;
+ logic [N_PORTS-1:0] l2_w_accept;
+ logic [N_PORTS-1:0] l2_xw_accept;
+ logic [N_PORTS-1:0] l2_ar_drop;
+ logic [N_PORTS-1:0] l2_r_drop;
+ logic [N_PORTS-1:0] l2_xr_drop;
+ logic [N_PORTS-1:0] l2_aw_drop;
+ logic [N_PORTS-1:0] l2_w_drop;
+ logic [N_PORTS-1:0] l2_xw_drop;
+ logic [N_PORTS-1:0] l2_aw_done;
+ logic [N_PORTS-1:0] l2_w_done;
+ logic [N_PORTS-1:0] l2_xw_done;
+ logic [N_PORTS-1:0] l2_aw_done_SP;
+ logic [N_PORTS-1:0] l2_w_done_SP;
+ logic [N_PORTS-1:0] l2_ar_done;
+ logic [N_PORTS-1:0] l2_r_done;
+ logic [N_PORTS-1:0] l2_xr_done;
+ logic [N_PORTS-1:0] l2_ar_done_SP;
+ logic [N_PORTS-1:0] l2_r_done_SP;
+ logic [N_PORTS-1:0] l1_mx_aw_done;
+ logic [N_PORTS-1:0] l1_mx_ar_done;
+ logic [N_PORTS-1:0] l1_m0_aw_done_SP;
+ logic [N_PORTS-1:0] l1_m0_ar_done_SP;
+ logic [N_PORTS-1:0] l1_m1_aw_done_SP;
+ logic [N_PORTS-1:0] l1_m1_ar_done_SP;
+ logic [N_PORTS-1:0] l2_mx_aw_done;
+ logic [N_PORTS-1:0] l2_mx_ar_done;
+ logic [N_PORTS-1:0] l2_m0_aw_done_SP;
+ logic [N_PORTS-1:0] l2_m0_ar_done_SP;
+ logic [N_PORTS-1:0] l2_m1_aw_done_SP;
+ logic [N_PORTS-1:0] l2_m1_ar_done_SP;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] l1_id_drop, lx_id_drop, b_id_drop;
+ logic [N_PORTS-1:0] [7:0] l1_len_drop, lx_len_drop;
+ logic [N_PORTS-1:0] l1_prefetch_drop, lx_prefetch_drop, b_prefetch_drop;
+ logic [N_PORTS-1:0] l1_hit_drop, lx_hit_drop, b_hit_drop;
+ logic [N_PORTS-1:0] b_drop;
+ logic [N_PORTS-1:0] b_done;
+ logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] l2_aw_addr;
+ logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] l2_ar_addr;
+ logic [N_PORTS-1:0] l2_cache_coherent;
+ logic [N_PORTS-1:0] l2_master_select;
+ logic [N_PORTS-1:0] aw_in_stall;
+ logic [N_PORTS-1:0] aw_out_stall;
+ genvar i;
+ typedef enum logic {IDLE, BUSY} r_resp_mux_ctrl_state_t;
+ r_resp_mux_ctrl_state_t [N_PORTS-1:0] RRespMuxCtrl_SN, RRespMuxCtrl_SP;
+ logic [N_PORTS-1:0] RRespSel_SN, RRespSel_SP;
+ logic [N_PORTS-1:0] RRespBurst_S;
+ logic [N_PORTS-1:0] RRespSelIm_S;
+ // }}}
+ // Local parameters {{{
+ // Enable L2 for select ports
+ localparam integer ENABLE_L2TLB[N_PORTS-1:0] = `EN_L2TLB_ARRAY;
+ // L2TLB parameters
+ localparam integer HUM_BUFFER_DEPTH = (N_L2_SET_ENTRIES/2/`RAB_L2_N_PAR_VA_RAMS)+13;
+ // }}}
+ // Derive `master_select` from cache coherency flag. {{{
+ `ifdef EN_ACP
+ assign int_wmaster_select = int_wtrans_cache_coherent;
+ assign int_rmaster_select = int_rtrans_cache_coherent;
+ assign l2_master_select = l2_cache_coherent;
+ `else
+ assign int_wmaster_select = '0;
+ assign int_rmaster_select = '0;
+ assign l2_master_select = '0;
+ `endif
+ // }}}
+ // Buf and Send {{{
+ // ██████╗ ██╗ ██╗███████╗ ██╗ ███████╗███████╗███╗ ██╗██████╗
+ // ██╔══██╗██║ ██║██╔════╝ ██║ ██╔════╝██╔════╝████╗ ██║██╔══██╗
+ // ██████╔╝██║ ██║█████╗ ████████╗ ███████╗█████╗ ██╔██╗ ██║██║ ██║
+ // ██╔══██╗██║ ██║██╔══╝ ██╔═██╔═╝ ╚════██║██╔══╝ ██║╚██╗██║██║ ██║
+ // ██████╔╝╚██████╔╝██║ ██████║ ███████║███████╗██║ ╚████║██████╔╝
+ // ╚═════╝ ╚═════╝ ╚═╝ ╚═════╝ ╚══════╝╚══════╝╚═╝ ╚═══╝╚═════╝
+ //
+ logic[N_PORTS-1:0] m0_write_is_burst, m0_read_is_burst;
+ logic[N_PORTS-1:0] m1_write_is_burst, m1_read_is_burst;
+ generate for (i = 0; i < N_PORTS; i++) begin : BUF_AND_SEND
+ // Write Address channel (aw) {{{
+ /*
+ * write address channel (aw)
+ *
+ * ██╗ ██╗██████╗ ██╗████████╗███████╗ █████╗ ██████╗ ██████╗ ██████╗
+ * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔══██╗██╔══██╗██╔══██╗
+ * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ███████║██║ ██║██║ ██║██████╔╝
+ * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██╔══██║██║ ██║██║ ██║██╔══██╗
+ * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██║ ██║██████╔╝██████╔╝██║ ██║
+ * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝
+ *
+ */
+ axi4_aw_buffer
+ #(
+ )
+ u_aw_buffer
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_awid ( s_axi4_awid[i] ),
+ .s_axi4_awaddr ( s_axi4_awaddr[i] ),
+ .s_axi4_awvalid ( s_axi4_awvalid[i] ),
+ .s_axi4_awready ( s_axi4_awready[i] ),
+ .s_axi4_awlen ( s_axi4_awlen[i] ),
+ .s_axi4_awsize ( s_axi4_awsize[i] ),
+ .s_axi4_awburst ( s_axi4_awburst[i] ),
+ .s_axi4_awlock ( s_axi4_awlock[i] ),
+ .s_axi4_awprot ( s_axi4_awprot[i] ),
+ .s_axi4_awcache ( s_axi4_awcache[i] ),
+ .s_axi4_awregion ( s_axi4_awregion[i] ),
+ .s_axi4_awqos ( s_axi4_awqos[i] ),
+ .s_axi4_awuser ( s_axi4_awuser[i] ),
+ .m_axi4_awid ( int_awid[i] ),
+ .m_axi4_awaddr ( int_awaddr[i] ),
+ .m_axi4_awvalid ( int_awvalid[i] ),
+ .m_axi4_awready ( int_awready[i] ),
+ .m_axi4_awlen ( int_awlen[i] ),
+ .m_axi4_awsize ( int_awsize[i] ),
+ .m_axi4_awburst ( int_awburst[i] ),
+ .m_axi4_awlock ( int_awlock[i] ),
+ .m_axi4_awprot ( int_awprot[i] ),
+ .m_axi4_awcache ( int_awcache[i] ),
+ .m_axi4_awregion ( int_awregion[i] ),
+ .m_axi4_awqos ( int_awqos[i] ),
+ .m_axi4_awuser ( int_awuser[i] )
+ );
+ axi4_aw_sender
+ #(
+ )
+ u_aw_sender_m0
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .l1_done_o ( l1_m0_aw_done[i] ),
+ .l1_accept_i ( l1_m0_aw_accept[i] ),
+ .l1_drop_i ( l1_m0_aw_drop[i] ),
+ .l1_save_i ( l1_m0_aw_save[i] ),
+ .l2_done_o ( l2_m0_aw_done[i] ),
+ .l2_accept_i ( l2_m0_aw_accept[i] ),
+ .l2_drop_i ( l2_m0_aw_drop[i] ),
+ .l2_sending_o ( l2_m0_aw_sending[i] ),
+ .l1_awaddr_i ( int_wtrans_addr[i] ),
+ .l2_awaddr_i ( l2_aw_addr[i] ),
+ .s_axi4_awid ( int_awid[i] ),
+ .s_axi4_awvalid ( int_m0_awvalid[i] ),
+ .s_axi4_awready ( int_m0_awready[i] ),
+ .s_axi4_awlen ( int_awlen[i] ),
+ .s_axi4_awsize ( int_awsize[i] ),
+ .s_axi4_awburst ( int_awburst[i] ),
+ .s_axi4_awlock ( int_awlock[i] ),
+ .s_axi4_awprot ( int_awprot[i] ),
+ .s_axi4_awcache ( int_awcache[i] ),
+ .s_axi4_awregion ( int_awregion[i] ),
+ .s_axi4_awqos ( int_awqos[i] ),
+ .s_axi4_awuser ( int_awuser[i] ),
+ .m_axi4_awid ( m0_axi4_awid[i] ),
+ .m_axi4_awaddr ( m0_axi4_awaddr[i] ),
+ .m_axi4_awvalid ( m0_axi4_awvalid[i] ),
+ .m_axi4_awready ( m0_axi4_awready[i] ),
+ .m_axi4_awlen ( m0_axi4_awlen[i] ),
+ .m_axi4_awsize ( m0_axi4_awsize[i] ),
+ .m_axi4_awburst ( m0_axi4_awburst[i] ),
+ .m_axi4_awlock ( m0_axi4_awlock[i] ),
+ .m_axi4_awprot ( m0_axi4_awprot[i] ),
+ .m_axi4_awcache ( ),
+ .m_axi4_awregion ( m0_axi4_awregion[i] ),
+ .m_axi4_awqos ( m0_axi4_awqos[i] ),
+ .m_axi4_awuser ( m0_axi4_awuser[i] )
+ );
+ // The AXCACHE signals are set according to burstiness and cache coherence or statically
+ // when not connected to ACP on Zynq (implemented below).
+ assign m0_write_is_burst[i] = (m0_axi4_awlen[i] != {8{1'b0}}) && (m0_axi4_awburst[i] != 2'b00);
+ `ifndef EN_ACP
+ always_comb begin
+ if ( (l2_m0_aw_sending[i] & l2_cache_coherent[i]) | int_wtrans_cache_coherent[i]) begin
+ if (m0_write_is_burst[i]) begin
+ m0_axi4_awcache[i] = 4'b0111;
+ end else begin
+ m0_axi4_awcache[i] = 4'b1111;
+ end
+ end else begin
+ m0_axi4_awcache[i] = 4'b0011;
+ end
+ end
+ `else
+ assign m0_axi4_awcache[i] = 4'b0011;
+ `endif
+ axi4_aw_sender
+ #(
+ )
+ u_aw_sender_m1
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .l1_accept_i ( l1_m1_aw_accept[i] ),
+ .l1_drop_i ( l1_m1_aw_drop[i] ),
+ .l1_save_i ( l1_m1_aw_save[i] ),
+ .l1_done_o ( l1_m1_aw_done[i] ),
+ .l2_accept_i ( l2_m1_aw_accept[i] ),
+ .l2_drop_i ( l2_m1_aw_drop[i] ),
+ .l2_done_o ( l2_m1_aw_done[i] ),
+ .l2_sending_o ( ), // just helps to set axcache
+ .l1_awaddr_i ( int_wtrans_addr[i] ),
+ .l2_awaddr_i ( l2_aw_addr[i] ),
+ .s_axi4_awid ( int_awid[i] ),
+ .s_axi4_awvalid ( int_m1_awvalid[i] ),
+ .s_axi4_awready ( int_m1_awready[i] ),
+ .s_axi4_awlen ( int_awlen[i] ),
+ .s_axi4_awsize ( int_awsize[i] ),
+ .s_axi4_awburst ( int_awburst[i] ),
+ .s_axi4_awlock ( int_awlock[i] ),
+ .s_axi4_awprot ( int_awprot[i] ),
+ .s_axi4_awcache ( int_awcache[i] ),
+ .s_axi4_awregion ( int_awregion[i] ),
+ .s_axi4_awqos ( int_awqos[i] ),
+ .s_axi4_awuser ( int_awuser[i] ),
+ .m_axi4_awid ( m1_axi4_awid[i] ),
+ .m_axi4_awaddr ( m1_axi4_awaddr[i] ),
+ .m_axi4_awvalid ( m1_axi4_awvalid[i] ),
+ .m_axi4_awready ( m1_axi4_awready[i] ),
+ .m_axi4_awlen ( m1_axi4_awlen[i] ),
+ .m_axi4_awsize ( m1_axi4_awsize[i] ),
+ .m_axi4_awburst ( m1_axi4_awburst[i] ),
+ .m_axi4_awlock ( m1_axi4_awlock[i] ),
+ .m_axi4_awprot ( m1_axi4_awprot[i] ),
+ .m_axi4_awcache ( ),
+ .m_axi4_awregion ( m1_axi4_awregion[i] ),
+ .m_axi4_awqos ( m1_axi4_awqos[i] ),
+ .m_axi4_awuser ( m1_axi4_awuser[i] )
+ );
+ // The AXCACHE signals are set according to burstiness and cache coherence or statically
+ // when not connected to ACP on Zynq (implemented below).
+ assign m1_write_is_burst[i] = (m1_axi4_awlen[i] != {8{1'b0}}) && (m1_axi4_awburst[i] != 2'b00);
+ `ifdef EN_ACP
+ always_comb begin
+ if (m1_write_is_burst[i]) begin
+ m1_axi4_awcache[i] = 4'b1011;
+ end else begin
+ m1_axi4_awcache[i] = 4'b1111;
+ end
+ end
+ `else
+ assign m1_axi4_awcache[i] = 4'b0011;
+ `endif
+ // }}}
+ // Write Data channel (w) {{{
+ /*
+ * write data channel (w)
+ *
+ * ██╗ ██╗██████╗ ██╗████████╗███████╗ ██████╗ █████╗ ████████╗ █████╗
+ * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔══██╗╚══██╔══╝██╔══██╗
+ * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ██║ ██║███████║ ██║ ███████║
+ * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██║ ██║██╔══██║ ██║ ██╔══██║
+ * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██████╔╝██║ ██║ ██║ ██║ ██║
+ * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚═╝
+ *
+ */
+ axi4_w_buffer
+ #(
+ )
+ u_w_buffer
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ // L1 interface
+ .l1_done_o ( l1_w_done[i] ),
+ .l1_accept_i ( l1_w_accept[i] ),
+ .l1_save_i ( l1_w_save[i] ),
+ .l1_drop_i ( l1_w_drop[i] ),
+ .l1_master_i ( int_wmaster_select[i] ),
+ .l1_id_i ( l1_id_drop[i] ),
+ .l1_len_i ( l1_len_drop[i] ),
+ .l1_prefetch_i ( l1_prefetch_drop[i] ),
+ .l1_hit_i ( l1_hit_drop[i] ),
+ // L2 interface
+ .l2_done_o ( l2_w_done[i] ),
+ .l2_accept_i ( l2_w_accept[i] ),
+ .l2_drop_i ( l2_w_drop[i] ),
+ .l2_master_i ( l2_master_select[i] ),
+ .l2_id_i ( lx_id_drop[i] ),
+ .l2_len_i ( lx_len_drop[i] ),
+ .l2_prefetch_i ( lx_prefetch_drop[i] ),
+ .l2_hit_i ( lx_hit_drop[i] ),
+ // Top-level control outputs
+ .master_select_o ( w_master_select[i] ),
+ .input_stall_o ( aw_in_stall[i] ), // stall L1 AW input if request buffers full
+ .output_stall_o ( aw_out_stall[i] ), // stall L1 AW hit forwarding if bypass not possible
+ // B sender interface
+ .b_drop_o ( b_drop[i] ),
+ .b_done_i ( b_done[i] ),
+ .id_o ( b_id_drop[i] ),
+ .prefetch_o ( b_prefetch_drop[i] ),
+ .hit_o ( b_hit_drop[i] ),
+ // AXI W channel interfaces
+ .s_axi4_wdata ( s_axi4_wdata[i] ),
+ .s_axi4_wvalid ( s_axi4_wvalid[i] ),
+ .s_axi4_wready ( s_axi4_wready[i] ),
+ .s_axi4_wstrb ( s_axi4_wstrb[i] ),
+ .s_axi4_wlast ( s_axi4_wlast[i] ),
+ .s_axi4_wuser ( s_axi4_wuser[i] ),
+ .m_axi4_wdata ( int_wdata[i] ),
+ .m_axi4_wvalid ( int_wvalid[i] ),
+ .m_axi4_wready ( int_wready[i] ),
+ .m_axi4_wstrb ( int_wstrb[i] ),
+ .m_axi4_wlast ( int_wlast[i] ),
+ .m_axi4_wuser ( int_wuser[i] )
+ );
+ axi4_w_sender
+ #(
+ )
+ u_w_sender_m0
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_wdata ( int_wdata[i] ),
+ .s_axi4_wvalid ( int_m0_wvalid[i] ),
+ .s_axi4_wready ( int_m0_wready[i] ),
+ .s_axi4_wstrb ( int_wstrb[i] ),
+ .s_axi4_wlast ( int_wlast[i] ),
+ .s_axi4_wuser ( int_wuser[i] ),
+ .m_axi4_wdata ( m0_axi4_wdata[i] ),
+ .m_axi4_wvalid ( m0_axi4_wvalid[i] ),
+ .m_axi4_wready ( m0_axi4_wready[i] ),
+ .m_axi4_wstrb ( m0_axi4_wstrb[i] ),
+ .m_axi4_wlast ( m0_axi4_wlast[i] ),
+ .m_axi4_wuser ( m0_axi4_wuser[i] )
+ );
+ axi4_w_sender
+ #(
+ )
+ u_w_sender_m1
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_wdata ( int_wdata[i] ),
+ .s_axi4_wvalid ( int_m1_wvalid[i] ),
+ .s_axi4_wready ( int_m1_wready[i] ),
+ .s_axi4_wstrb ( int_wstrb[i] ),
+ .s_axi4_wlast ( int_wlast[i] ),
+ .s_axi4_wuser ( int_wuser[i] ),
+ .m_axi4_wdata ( m1_axi4_wdata[i] ),
+ .m_axi4_wvalid ( m1_axi4_wvalid[i] ),
+ .m_axi4_wready ( m1_axi4_wready[i] ),
+ .m_axi4_wstrb ( m1_axi4_wstrb[i] ),
+ .m_axi4_wlast ( m1_axi4_wlast[i] ),
+ .m_axi4_wuser ( m1_axi4_wuser[i] )
+ );
+ /*
+ * Multiplexer to switch between the two output master ports on the write data (w) channel
+ */
+ always_comb begin
+ /* Only one output can be selected at any time */
+ if (w_master_select[i] == 1'b0) begin
+ int_m0_wvalid[i] = int_wvalid[i];
+ int_m1_wvalid[i] = 1'b0;
+ int_wready[i] = int_m0_wready[i];
+ end else begin
+ int_m0_wvalid[i] = 1'b0;
+ int_m1_wvalid[i] = int_wvalid[i];
+ int_wready[i] = int_m1_wready[i];
+ end
+ end
+ // }}}
+ // Write Response channel (b) {{{
+ /*
+ * write response channel (b)
+ *
+ * ██╗ ██╗██████╗ ██╗████████╗███████╗ ██████╗ ███████╗███████╗██████╗
+ * ██║ ██║██╔══██╗██║╚══██╔══╝██╔════╝ ██╔══██╗██╔════╝██╔════╝██╔══██╗
+ * ██║ █╗ ██║██████╔╝██║ ██║ █████╗ ██████╔╝█████╗ ███████╗██████╔╝
+ * ██║███╗██║██╔══██╗██║ ██║ ██╔══╝ ██╔══██╗██╔══╝ ╚════██║██╔═══╝
+ * ╚███╔███╔╝██║ ██║██║ ██║ ███████╗ ██║ ██║███████╗███████║██║
+ * ╚══╝╚══╝ ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝
+ *
+ */
+ axi4_b_buffer
+ #(
+ )
+ u_b_buffer_m0
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_bid ( int_m0_bid[i] ),
+ .s_axi4_bresp ( int_m0_bresp[i] ),
+ .s_axi4_bvalid ( int_m0_bvalid[i] ),
+ .s_axi4_buser ( int_m0_buser[i] ),
+ .s_axi4_bready ( int_m0_bready[i] ),
+ .m_axi4_bid ( m0_axi4_bid[i] ),
+ .m_axi4_bresp ( m0_axi4_bresp[i] ),
+ .m_axi4_bvalid ( m0_axi4_bvalid[i] ),
+ .m_axi4_buser ( m0_axi4_buser[i] ),
+ .m_axi4_bready ( m0_axi4_bready[i] )
+ );
+ axi4_b_buffer
+ #(
+ )
+ u_b_buffer_m1
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_bid ( int_m1_bid[i] ),
+ .s_axi4_bresp ( int_m1_bresp[i] ),
+ .s_axi4_bvalid ( int_m1_bvalid[i] ),
+ .s_axi4_buser ( int_m1_buser[i] ),
+ .s_axi4_bready ( int_m1_bready[i] ),
+ .m_axi4_bid ( m1_axi4_bid[i] ),
+ .m_axi4_bresp ( m1_axi4_bresp[i] ),
+ .m_axi4_bvalid ( m1_axi4_bvalid[i] ),
+ .m_axi4_buser ( m1_axi4_buser[i] ),
+ .m_axi4_bready ( m1_axi4_bready[i] )
+ );
+ axi4_b_sender
+ #(
+ )
+ u_b_sender
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .drop_i ( b_drop[i] ),
+ .done_o ( b_done[i] ),
+ .id_i ( b_id_drop[i] ),
+ .prefetch_i ( b_prefetch_drop[i] ),
+ .hit_i ( b_hit_drop[i] ),
+ .s_axi4_bid ( s_axi4_bid[i] ),
+ .s_axi4_bresp ( s_axi4_bresp[i] ),
+ .s_axi4_bvalid ( s_axi4_bvalid[i] ),
+ .s_axi4_buser ( s_axi4_buser[i] ),
+ .s_axi4_bready ( s_axi4_bready[i] ),
+ .m_axi4_bid ( int_bid[i] ),
+ .m_axi4_bresp ( int_bresp[i] ),
+ .m_axi4_bvalid ( int_bvalid[i] ),
+ .m_axi4_buser ( int_buser[i] ),
+ .m_axi4_bready ( int_bready[i] )
+ );
+ /*
+ * Multiplexer to switch between the two output master ports on the write response (b) channel
+ */
+ always_comb begin
+ /* Output 1 always gets priority, so if it has something to send connect
+ it and let output 0 wait using rready = 0 */
+ if (int_m1_bvalid[i] == 1'b1) begin
+ int_m0_bready[i] = 1'b0;
+ int_m1_bready[i] = int_bready[i];
+ int_bid[i] = int_m1_bid[i];
+ int_bresp[i] = int_m1_bresp[i];
+ int_buser[i] = int_m1_buser[i];
+ int_bvalid[i] = int_m1_bvalid[i];
+ end else begin
+ int_m0_bready[i] = int_bready[i];
+ int_m1_bready[i] = 1'b0;
+ int_bid[i] = int_m0_bid[i];
+ int_bresp[i] = int_m0_bresp[i];
+ int_buser[i] = int_m0_buser[i];
+ int_bvalid[i] = int_m0_bvalid[i];
+ end
+ end
+ // }}}
+ // Read Address channel (ar) {{{
+ /*
+ * read address channel (ar)
+ *
+ * ██████╗ ███████╗ █████╗ ██████╗ █████╗ ██████╗ ██████╗ ██████╗
+ * ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ██╔══██╗██╔══██╗██╔══██╗██╔══██╗
+ * ██████╔╝█████╗ ███████║██║ ██║ ███████║██║ ██║██║ ██║██████╔╝
+ * ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ██╔══██║██║ ██║██║ ██║██╔══██╗
+ * ██║ ██║███████╗██║ ██║██████╔╝ ██║ ██║██████╔╝██████╔╝██║ ██║
+ * ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═╝ ╚═╝
+ *
+ */
+ axi4_ar_buffer
+ #(
+ )
+ u_ar_buffer
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_arid ( s_axi4_arid[i] ),
+ .s_axi4_araddr ( s_axi4_araddr[i] ),
+ .s_axi4_arvalid ( s_axi4_arvalid[i] ),
+ .s_axi4_arready ( s_axi4_arready[i] ),
+ .s_axi4_arlen ( s_axi4_arlen[i] ),
+ .s_axi4_arsize ( s_axi4_arsize[i] ),
+ .s_axi4_arburst ( s_axi4_arburst[i] ),
+ .s_axi4_arlock ( s_axi4_arlock[i] ),
+ .s_axi4_arprot ( s_axi4_arprot[i] ),
+ .s_axi4_arcache ( s_axi4_arcache[i] ),
+ .s_axi4_aruser ( s_axi4_aruser[i] ),
+ .m_axi4_arid ( int_arid[i] ),
+ .m_axi4_araddr ( int_araddr[i] ),
+ .m_axi4_arvalid ( int_arvalid[i] ),
+ .m_axi4_arready ( int_arready[i] ),
+ .m_axi4_arlen ( int_arlen[i] ),
+ .m_axi4_arsize ( int_arsize[i] ),
+ .m_axi4_arburst ( int_arburst[i] ),
+ .m_axi4_arlock ( int_arlock[i] ),
+ .m_axi4_arprot ( int_arprot[i] ),
+ .m_axi4_arcache ( int_arcache[i] ),
+ .m_axi4_aruser ( int_aruser[i] )
+ );
+ axi4_ar_sender
+ #(
+ )
+ u_ar_sender_m0
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .l1_done_o ( l1_m0_ar_done[i] ),
+ .l1_accept_i ( l1_m0_ar_accept[i] ),
+ .l1_drop_i ( l1_m0_ar_drop[i] ),
+ .l1_save_i ( l1_m0_ar_save[i] ),
+ .l2_done_o ( l2_m0_ar_done[i] ),
+ .l2_accept_i ( l2_m0_ar_accept[i] ),
+ .l2_drop_i ( l2_m0_ar_drop[i] ),
+ .l2_sending_o ( l2_m0_ar_sending[i] ),
+ .l1_araddr_i ( int_rtrans_addr[i] ),
+ .l2_araddr_i ( l2_ar_addr[i] ),
+ .s_axi4_arid ( int_arid[i] ),
+ .s_axi4_arvalid ( int_m0_arvalid[i] ),
+ .s_axi4_arready ( int_m0_arready[i] ),
+ .s_axi4_arlen ( int_arlen[i] ),
+ .s_axi4_arsize ( int_arsize[i] ),
+ .s_axi4_arburst ( int_arburst[i] ),
+ .s_axi4_arlock ( int_arlock[i] ),
+ .s_axi4_arprot ( int_arprot[i] ),
+ .s_axi4_arcache ( int_arcache[i] ),
+ .s_axi4_aruser ( int_aruser[i] ),
+ .m_axi4_arid ( m0_axi4_arid[i] ),
+ .m_axi4_araddr ( m0_axi4_araddr[i] ),
+ .m_axi4_arvalid ( m0_axi4_arvalid[i] ),
+ .m_axi4_arready ( m0_axi4_arready[i] ),
+ .m_axi4_arlen ( m0_axi4_arlen[i] ),
+ .m_axi4_arsize ( m0_axi4_arsize[i] ),
+ .m_axi4_arburst ( m0_axi4_arburst[i] ),
+ .m_axi4_arlock ( m0_axi4_arlock[i] ),
+ .m_axi4_arprot ( m0_axi4_arprot[i] ),
+ .m_axi4_arcache ( ),
+ .m_axi4_aruser ( m0_axi4_aruser[i] )
+ );
+ // The AXCACHE signals are set according to burstiness and cache coherence or statically
+ // when not connected to ACP on Zynq (implemented below).
+ assign m0_read_is_burst[i] = (m0_axi4_arlen[i] != {8{1'b0}}) && (m0_axi4_arburst[i] != 2'b00);
+ `ifndef EN_ACP
+ always_comb begin
+ if ( (l2_m0_ar_sending[i] & l2_cache_coherent[i]) | int_rtrans_cache_coherent[i]) begin
+ if (m0_read_is_burst[i]) begin
+ m0_axi4_arcache[i] = 4'b1011;
+ end else begin
+ m0_axi4_arcache[i] = 4'b1111;
+ end
+ end else begin
+ m0_axi4_arcache[i] = 4'b0011;
+ end
+ end
+ `else
+ assign m0_axi4_arcache[i] = 4'b0011;
+ `endif
+ axi4_ar_sender
+ #(
+ )
+ u_ar_sender_m1
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .l1_done_o ( l1_m1_ar_done[i] ),
+ .l1_accept_i ( l1_m1_ar_accept[i] ),
+ .l1_drop_i ( l1_m1_ar_drop[i] ),
+ .l1_save_i ( l1_m1_ar_save[i] ),
+ .l2_done_o ( l2_m1_ar_done[i] ),
+ .l2_accept_i ( l2_m1_ar_accept[i] ),
+ .l2_drop_i ( l2_m1_ar_drop[i] ),
+ .l2_sending_o ( ), // just helps to set axcache
+ .l1_araddr_i ( int_rtrans_addr[i] ),
+ .l2_araddr_i ( l2_ar_addr[i] ),
+ .s_axi4_arid ( int_arid[i] ),
+ .s_axi4_arvalid ( int_m1_arvalid[i] ),
+ .s_axi4_arready ( int_m1_arready[i] ),
+ .s_axi4_arlen ( int_arlen[i] ),
+ .s_axi4_arsize ( int_arsize[i] ),
+ .s_axi4_arburst ( int_arburst[i] ),
+ .s_axi4_arlock ( int_arlock[i] ),
+ .s_axi4_arprot ( int_arprot[i] ),
+ .s_axi4_arcache ( int_arcache[i] ),
+ .s_axi4_aruser ( int_aruser[i] ),
+ .m_axi4_arid ( m1_axi4_arid[i] ),
+ .m_axi4_araddr ( m1_axi4_araddr[i] ),
+ .m_axi4_arvalid ( m1_axi4_arvalid[i] ),
+ .m_axi4_arready ( m1_axi4_arready[i] ),
+ .m_axi4_arlen ( m1_axi4_arlen[i] ),
+ .m_axi4_arsize ( m1_axi4_arsize[i] ),
+ .m_axi4_arburst ( m1_axi4_arburst[i] ),
+ .m_axi4_arlock ( m1_axi4_arlock[i] ),
+ .m_axi4_arprot ( m1_axi4_arprot[i] ),
+ .m_axi4_arcache ( ),
+ .m_axi4_aruser ( m1_axi4_aruser[i] )
+ );
+ // The AXCACHE signals are set according to burstiness and cache coherence or statically
+ // when not connected to ACP on Zynq (implemented below).
+ assign m1_read_is_burst[i] = (m1_axi4_arlen[i] != {8{1'b0}}) && (m1_axi4_arburst[i] != 2'b00);
+ `ifdef EN_ACP
+ always_comb begin
+ if (m1_read_is_burst[i]) begin
+ m1_axi4_arcache[i] = 4'b1011;
+ end else begin
+ m1_axi4_arcache[i] = 4'b1111;
+ end
+ end
+ `else
+ assign m1_axi4_arcache[i] = 4'b0011;
+ `endif
+ // }}}
+ // Read Response channel (r) {{{
+ /*
+ * read response channel (r)
+ *
+ * ██████╗ ███████╗ █████╗ ██████╗ ██████╗ ███████╗███████╗██████╗
+ * ██╔══██╗██╔════╝██╔══██╗██╔══██╗ ██╔══██╗██╔════╝██╔════╝██╔══██╗
+ * ██████╔╝█████╗ ███████║██║ ██║ ██████╔╝█████╗ ███████╗██████╔╝
+ * ██╔══██╗██╔══╝ ██╔══██║██║ ██║ ██╔══██╗██╔══╝ ╚════██║██╔═══╝
+ * ██║ ██║███████╗██║ ██║██████╔╝ ██║ ██║███████╗███████║██║
+ * ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝
+ *
+ */
+ axi4_r_buffer
+ #(
+ )
+ u_r_buffer_m0
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_rid ( int_m0_rid[i] ),
+ .s_axi4_rresp ( int_m0_rresp[i] ),
+ .s_axi4_rdata ( int_m0_rdata[i] ),
+ .s_axi4_rlast ( int_m0_rlast[i] ),
+ .s_axi4_rvalid ( int_m0_rvalid[i] ),
+ .s_axi4_ruser ( int_m0_ruser[i] ),
+ .s_axi4_rready ( int_m0_rready[i] ),
+ .m_axi4_rid ( m0_axi4_rid[i] ),
+ .m_axi4_rresp ( m0_axi4_rresp[i] ),
+ .m_axi4_rdata ( m0_axi4_rdata[i] ),
+ .m_axi4_rlast ( m0_axi4_rlast[i] ),
+ .m_axi4_rvalid ( m0_axi4_rvalid[i] ),
+ .m_axi4_ruser ( m0_axi4_ruser[i] ),
+ .m_axi4_rready ( m0_axi4_rready[i] )
+ );
+ axi4_r_buffer
+ #(
+ )
+ u_r_buffer_m1
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .s_axi4_rid ( int_m1_rid[i] ),
+ .s_axi4_rresp ( int_m1_rresp[i] ),
+ .s_axi4_rdata ( int_m1_rdata[i] ),
+ .s_axi4_rlast ( int_m1_rlast[i] ),
+ .s_axi4_rvalid ( int_m1_rvalid[i] ),
+ .s_axi4_ruser ( int_m1_ruser[i] ),
+ .s_axi4_rready ( int_m1_rready[i] ),
+ .m_axi4_rid ( m1_axi4_rid[i] ),
+ .m_axi4_rresp ( m1_axi4_rresp[i] ),
+ .m_axi4_rdata ( m1_axi4_rdata[i] ),
+ .m_axi4_rlast ( m1_axi4_rlast[i] ),
+ .m_axi4_rvalid ( m1_axi4_rvalid[i] ),
+ .m_axi4_ruser ( m1_axi4_ruser[i] ),
+ .m_axi4_rready ( m1_axi4_rready[i] )
+ );
+ axi4_r_sender
+ #(
+ )
+ u_r_sender
+ (
+ .axi4_aclk ( Clk_CI ),
+ .axi4_arstn ( Rst_RBI ),
+ .drop_i ( lx_r_drop[i] ),
+ .drop_len_i ( lx_len_drop[i] ),
+ .done_o ( lx_r_done[i] ),
+ .id_i ( lx_id_drop[i] ),
+ .prefetch_i ( lx_prefetch_drop[i] ),
+ .hit_i ( lx_hit_drop[i] ),
+ .s_axi4_rid ( s_axi4_rid[i] ),
+ .s_axi4_rresp ( s_axi4_rresp[i] ),
+ .s_axi4_rdata ( s_axi4_rdata[i] ),
+ .s_axi4_rlast ( s_axi4_rlast[i] ),
+ .s_axi4_rvalid ( s_axi4_rvalid[i] ),
+ .s_axi4_ruser ( s_axi4_ruser[i] ),
+ .s_axi4_rready ( s_axi4_rready[i] ),
+ .m_axi4_rid ( int_rid[i] ),
+ .m_axi4_rresp ( int_rresp[i] ),
+ .m_axi4_rdata ( int_rdata[i] ),
+ .m_axi4_rlast ( int_rlast[i] ),
+ .m_axi4_rvalid ( int_rvalid[i] ),
+ .m_axi4_ruser ( int_ruser[i] ),
+ .m_axi4_rready ( int_rready[i] )
+ );
+ /*
+ * Multiplexer to switch between the two output master ports on the read response(r) channel
+ *
+ * Do not perform read burst interleaving as the DMA does not support it. This means we can only
+ * switch between the two masters upon sending rlast or when idle.
+ *
+ * However, if the downstream already performs burst interleaving, this cannot be undone here.
+ * Also, the downstream may interleave a burst reponse with a single-beat transaction. In this
+ * case, the FSM below falls out of the burst mode. To avoid it performing burst interleaving
+ * after such an event, it gives priority to the master which received the last burst in case
+ * both have a have a burst ready (rvalid).
+ *
+ * Order of priority:
+ * 1. Ongoing burst transaction
+ * 2. Single-beat transaction on Master 1.
+ * 3. Single-beat transaction on Master 0.
+ * 4. Burst transaction on master that received the last burst.
+ */
+ // Select signal
+ always_ff @(posedge Clk_CI) begin
+ if (Rst_RBI == 0) begin
+ RRespSel_SP[i] <= 1'b0;
+ end else begin
+ RRespSel_SP[i] <= RRespSel_SN[i];
+ end
+ end
+ // FSM
+ always_comb begin : RRespMuxFsm
+ RRespMuxCtrl_SN[i] = RRespMuxCtrl_SP[i];
+ RRespSel_SN[i] = RRespSel_SP[i];
+ RRespBurst_S[i] = 1'b0;
+ RRespSelIm_S[i] = 1'b0;
+ unique case (RRespMuxCtrl_SP[i])
+ IDLE: begin
+ // immediately forward single-beat transactions
+ if (int_m1_rvalid[i] && int_m1_rlast[i])
+ RRespSelIm_S[i] = 1'b1;
+ else if (int_m0_rvalid[i] && int_m0_rlast[i])
+ RRespSelIm_S[i] = 1'b0;
+ // bursts - they also start immediately
+ else if (int_m1_rvalid[i] || int_m0_rvalid[i]) begin
+ RRespMuxCtrl_SN[i] = BUSY;
+ // in case both are ready, continue with the master that had the last burst
+ if (int_m1_rvalid[i] && int_m0_rvalid[i]) begin
+ RRespSel_SN[i] = RRespSel_SP[i];
+ RRespSelIm_S[i] = RRespSel_SP[i];
+ end else if (int_m1_rvalid[i]) begin
+ RRespSel_SN[i] = 1'b1;
+ RRespSelIm_S[i] = 1'b1;
+ end else begin
+ RRespSel_SN[i] = 1'b0;
+ RRespSelIm_S[i] = 1'b0;
+ end
+ end
+ end
+ BUSY: begin
+ RRespBurst_S[i] = 1'b1;
+ // detect last handshake of currently ongoing transfer
+ if (int_rvalid[i] && int_rready[i] && int_rlast[i])
+ RRespMuxCtrl_SN[i] = IDLE;
+ end
+ default: begin
+ RRespMuxCtrl_SN[i] = IDLE;
+ end
+ endcase
+ end
+ // FSM state
+ always_ff @(posedge Clk_CI) begin
+ if (Rst_RBI == 0) begin
+ RRespMuxCtrl_SP[i] <= IDLE;
+ end else begin
+ RRespMuxCtrl_SP[i] <= RRespMuxCtrl_SN[i];
+ end
+ end
+ // Actual multiplexer
+ always_comb begin
+ if ( (RRespBurst_S[i] && RRespSel_SP[i]) || (!RRespBurst_S[i] && RRespSelIm_S[i]) ) begin
+ int_m0_rready[i] = 1'b0;
+ int_m1_rready[i] = int_rready[i];
+ int_rid[i] = int_m1_rid[i];
+ int_rresp[i] = int_m1_rresp[i];
+ int_rdata[i] = int_m1_rdata[i];
+ int_rlast[i] = int_m1_rlast[i];
+ int_ruser[i] = int_m1_ruser[i];
+ int_rvalid[i] = int_m1_rvalid[i];
+ end else begin
+ int_m0_rready[i] = int_rready[i];
+ int_m1_rready[i] = 1'b0;
+ int_rid[i] = int_m0_rid[i];
+ int_rresp[i] = int_m0_rresp[i];
+ int_rdata[i] = int_m0_rdata[i];
+ int_rlast[i] = int_m0_rlast[i];
+ int_ruser[i] = int_m0_ruser[i];
+ int_rvalid[i] = int_m0_rvalid[i];
+ end
+ end
+ end // BUF & SEND
+ // }}}
+ endgenerate // BUF & SEND }}}
+ // Log {{{
+`ifdef RAB_AX_LOG_EN
+ AxiBramLogger
+ #(
+ )
+ u_aw_logger
+ (
+ .Clk_CI ( NonGatedClk_CI ),
+ .TimestampClk_CI ( Clk_CI ),
+ .Rst_RBI ( Rst_RBI ),
+ .AxiValid_SI ( s_axi4_awvalid[1] ),
+ .AxiReady_SI ( s_axi4_awready[1] ),
+ .AxiId_DI ( s_axi4_awid[1] ),
+ .AxiAddr_DI ( s_axi4_awaddr[1] ),
+ .AxiLen_DI ( s_axi4_awlen[1] ),
+ .Clear_SI ( AwLogClr_SI ),
+ .LogEn_SI ( LogEn_SI ),
+ .Full_SO ( int_aw_log_full ),
+ .Ready_SO ( AwLogRdy_SO ),
+ .Bram_PS ( AwBram_PS )
+ );
+ AxiBramLogger
+ #(
+ )
+ u_ar_logger
+ (
+ .Clk_CI ( NonGatedClk_CI ),
+ .TimestampClk_CI ( Clk_CI ),
+ .Rst_RBI ( Rst_RBI ),
+ .AxiValid_SI ( s_axi4_arvalid[1] ),
+ .AxiReady_SI ( s_axi4_arready[1] ),
+ .AxiId_DI ( s_axi4_arid[1] ),
+ .AxiAddr_DI ( s_axi4_araddr[1] ),
+ .AxiLen_DI ( s_axi4_arlen[1] ),
+ .Clear_SI ( ArLogClr_SI ),
+ .LogEn_SI ( LogEn_SI ),
+ .Full_SO ( int_ar_log_full ),
+ .Ready_SO ( ArLogRdy_SO ),
+ .Bram_PS ( ArBram_PS )
+ );
+ // }}}
+ // RAB Core {{{
+ // ██████╗ █████╗ ██████╗ ██████╗ ██████╗ ██████╗ ███████╗
+ // ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔═══██╗██╔══██╗██╔════╝
+ // ██████╔╝███████║██████╔╝ ██║ ██║ ██║██████╔╝█████╗
+ // ██╔══██╗██╔══██║██╔══██╗ ██║ ██║ ██║██╔══██╗██╔══╝
+ // ██║ ██║██║ ██║██████╔╝ ╚██████╗╚██████╔╝██║ ██║███████╗
+ // ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝ ╚═════╝ ╚═╝ ╚═╝╚══════╝
+ //
+ /*
+ * rab_core
+ *
+ * The rab core translates addresses. It has two ports, which can be used
+ * independently, however they will compete for time internally, as lookups
+ * are serialized.
+ *
+ * type is the read(0) or write(1) used to check the protection flags. If they
+ * don't match an interrupt is created on the int_prot line.
+ */
+ rab_core
+ #(
+ .N_L2_SETS ( N_L2_SETS ),
+ )
+ u_rab_core
+ (
+ .Clk_CI ( Clk_CI ),
+ .Rst_RBI ( Rst_RBI ),
+ // Config IF
+ .s_axi_awaddr ( s_axi4lite_awaddr ),
+ .s_axi_awvalid ( s_axi4lite_awvalid ),
+ .s_axi_awready ( s_axi4lite_awready ),
+ .s_axi_wdata ( s_axi4lite_wdata ),
+ .s_axi_wstrb ( s_axi4lite_wstrb ),
+ .s_axi_wvalid ( s_axi4lite_wvalid ),
+ .s_axi_wready ( s_axi4lite_wready ),
+ .s_axi_bresp ( s_axi4lite_bresp ),
+ .s_axi_bvalid ( s_axi4lite_bvalid ),
+ .s_axi_bready ( s_axi4lite_bready ),
+ .s_axi_araddr ( s_axi4lite_araddr ),
+ .s_axi_arvalid ( s_axi4lite_arvalid ),
+ .s_axi_arready ( s_axi4lite_arready ),
+ .s_axi_rready ( s_axi4lite_rready ),
+ .s_axi_rdata ( s_axi4lite_rdata ),
+ .s_axi_rresp ( s_axi4lite_rresp ),
+ .s_axi_rvalid ( s_axi4lite_rvalid ),
+ // L1 miss info outputs -> L2 TLB arbitration
+ .int_miss ( rab_miss ),
+ .int_multi ( rab_multi ),
+ .int_prot ( rab_prot ),
+ .int_prefetch ( rab_prefetch ),
+ .int_mhf_full ( int_mhf_full ),
+ // L1 transaction info outputs -> L2 TLB arbitration
+ .int_axaddr_o ( L1OutAddr_D ),
+ .int_axid_o ( L1OutId_D ),
+ .int_axlen_o ( L1OutLen_D ),
+ .int_axuser_o ( L1OutUser_D ),
+ // Write Req IF
+ .port1_addr ( int_awaddr ),
+ .port1_id ( int_awid ),
+ .port1_len ( int_awlen ),
+ .port1_size ( int_awsize ),
+ .port1_addr_valid ( int_awvalid & ~aw_in_stall ), // avoid the FSM accepting new AW requests
+ .port1_type ( {N_PORTS{1'b1}} ),
+ .port1_user ( int_awuser ),
+ .port1_sent ( int_wtrans_sent ), // signal done to L1 FSM
+ .port1_out_addr ( int_wtrans_addr ),
+ .port1_cache_coherent ( int_wtrans_cache_coherent ),
+ .port1_accept ( int_wtrans_accept ),
+ .port1_drop ( int_wtrans_drop ),
+ .port1_miss ( int_wtrans_miss ),
+ // Read Req IF
+ .port2_addr ( int_araddr ),
+ .port2_id ( int_arid ),
+ .port2_len ( int_arlen ),
+ .port2_size ( int_arsize ),
+ .port2_addr_valid ( int_arvalid ),
+ .port2_type ( {N_PORTS{1'b0}} ),
+ .port2_user ( int_aruser ),
+ .port2_sent ( int_rtrans_sent ), // signal done to L1 FSM
+ .port2_out_addr ( int_rtrans_addr ),
+ .port2_cache_coherent ( int_rtrans_cache_coherent ),
+ .port2_accept ( int_rtrans_accept ),
+ .port2_drop ( int_rtrans_drop ),
+ .port2_miss ( int_rtrans_miss ),
+ // L2 miss info inputs -> axi_rab_cfg
+ .miss_l2_i ( L2Miss_S ),
+ .miss_l2_addr_i ( L2OutInAddr_DP ),
+ .miss_l2_id_i ( L2OutId_DP ),
+ .miss_l2_user_i ( L2OutUser_DP ),
+ // L2 config outputs
+ .wdata_l2_o ( L2CfgWData_D ),
+ .waddr_l2_o ( L2CfgWAddr_D ),
+ .wren_l2_o ( L2CfgWE_S )
+ );
+ // }}}
+ // AX SPLITS {{{
+ // █████╗ ██╗ ██╗ ███████╗██████╗ ██╗ ██╗████████╗
+ // ██╔══██╗╚██╗██╔╝ ██╔════╝██╔══██╗██║ ██║╚══██╔══╝
+ // ███████║ ╚███╔╝ ███████╗██████╔╝██║ ██║ ██║
+ // ██╔══██║ ██╔██╗ ╚════██║██╔═══╝ ██║ ██║ ██║
+ // ██║ ██║██╔╝ ██╗ ███████║██║ ███████╗██║ ██║
+ // ╚═╝ ╚═╝╚═╝ ╚═╝ ╚══════╝╚═╝ ╚══════╝╚═╝ ╚═╝
+ //
+ /**
+ * Multiplex the two output master ports of the Read Address and Write Address (AR/AW) channels.
+ *
+ * Use the `int_xmaster_select` signal to route the signals to either Master 0 (to memory) or
+ * Master 1 (to ACP). In case of an L1 miss: Route the signals to both masters. They shall be
+ * saved until the L2 outputs are available.
+ */
+ generate for (i = 0; i < N_PORTS; i++) begin : AX_SPLIT
+ /*
+ * When accepting L1 transactions, we must just do so on the selected master. Drop requests must
+ * be performed on any one of the two masters. Save requests must be performed by both masters.
+ */
+ always_comb begin : AW_L1_SPLIT
+ // TLB handshake
+ l1_m0_aw_accept[i] = 1'b0;
+ l1_m1_aw_accept[i] = 1'b0;
+ l1_m0_aw_drop[i] = 1'b0;
+ l1_m1_aw_drop[i] = 1'b0;
+ l1_m0_aw_save[i] = 1'b0;
+ l1_m1_aw_save[i] = 1'b0;
+ l1_mx_aw_done[i] = 1'b0;
+ // AXI sender input handshake
+ int_m0_awvalid[i] = 1'b0;
+ int_m1_awvalid[i] = 1'b0;
+ int_awready[i] = 1'b0;
+ // accept on selected master only
+ if (l1_aw_accept[i]) begin
+ if (int_wmaster_select[i]) begin
+ l1_m1_aw_accept[i] = 1'b1;
+ l1_mx_aw_done[i] = l1_m1_aw_done[i];
+ int_m1_awvalid[i] = int_awvalid[i];
+ int_awready[i] = int_m1_awready[i];
+ end else begin
+ l1_m0_aw_accept[i] = 1'b1;
+ l1_mx_aw_done[i] = l1_m0_aw_done[i];
+ int_m0_awvalid[i] = int_awvalid[i];
+ int_awready[i] = int_m0_awready[i];
+ end
+ // drop on Master 0 only
+ end else if (l1_aw_drop[i]) begin
+ l1_m0_aw_drop[i] = 1'b1;
+ l1_mx_aw_done[i] = l1_m0_aw_done[i];
+ int_m0_awvalid[i] = int_awvalid[i];
+ int_awready[i] = l1_m0_aw_done[i];
+ // save on both masters
+ end else if (l1_aw_save[i]) begin
+ // split save
+ l1_m0_aw_save[i] = ~l1_m0_aw_done_SP[i];
+ l1_m1_aw_save[i] = ~l1_m1_aw_done_SP[i];
+ // combine done
+ l1_mx_aw_done[i] = l1_m0_aw_done_SP[i] & l1_m1_aw_done_SP[i];
+ int_m0_awvalid[i] = int_awvalid[i];
+ int_m1_awvalid[i] = int_awvalid[i];
+ int_awready[i] = l1_mx_aw_done[i];
+ end
+ end
+ // signal back to handshake splitter
+ assign l1_aw_done[i] = l1_mx_aw_done[i];
+ always_ff @(posedge Clk_CI) begin : L1_MX_AW_DONE_REG
+ if (Rst_RBI == 0) begin
+ l1_m0_aw_done_SP[i] <= 1'b0;
+ l1_m1_aw_done_SP[i] <= 1'b0;
+ end else if (l1_mx_aw_done[i]) begin
+ l1_m0_aw_done_SP[i] <= 1'b0;
+ l1_m1_aw_done_SP[i] <= 1'b0;
+ end else begin
+ l1_m0_aw_done_SP[i] <= l1_m0_aw_done_SP[i] | l1_m0_aw_done[i];
+ l1_m1_aw_done_SP[i] <= l1_m1_aw_done_SP[i] | l1_m1_aw_done[i];
+ end
+ end
+ /*
+ * When accepting L2 transactions, we must drop the corresponding transaction from the other
+ * master to make it available again for save requests from L1_DROP_SAVE.
+ */
+ always_comb begin : AW_L2_SPLIT
+ l2_m0_aw_accept[i] = 1'b0;
+ l2_m1_aw_accept[i] = 1'b0;
+ l2_m0_aw_drop[i] = 1'b0;
+ l2_m1_aw_drop[i] = 1'b0;
+ // de-assert request signals individually upon handshakes
+ if (l2_aw_accept[i]) begin
+ if (l2_master_select[i]) begin
+ l2_m1_aw_accept[i] = ~l2_m1_aw_done_SP[i];
+ l2_m0_aw_drop[i] = ~l2_m0_aw_done_SP[i];
+ end else begin
+ l2_m0_aw_accept[i] = ~l2_m0_aw_done_SP[i];
+ l2_m1_aw_drop[i] = ~l2_m1_aw_done_SP[i];
+ end
+ end else begin
+ l2_m0_aw_drop[i] = ~l2_m0_aw_done_SP[i] ? l2_aw_drop[i] : 1'b0;
+ l2_m1_aw_drop[i] = ~l2_m1_aw_done_SP[i] ? l2_aw_drop[i] : 1'b0;
+ end
+ // combine done
+ l2_mx_aw_done[i] = l2_m0_aw_done_SP[i] & l2_m1_aw_done_SP[i];
+ l2_aw_done[i] = l2_mx_aw_done[i];
+ end
+ always_ff @(posedge Clk_CI) begin : L2_MX_AW_DONE_REG
+ if (Rst_RBI == 0) begin
+ l2_m0_aw_done_SP[i] <= 1'b0;
+ l2_m1_aw_done_SP[i] <= 1'b0;
+ end else if (l2_mx_aw_done[i]) begin
+ l2_m0_aw_done_SP[i] <= 1'b0;
+ l2_m1_aw_done_SP[i] <= 1'b0;
+ end else begin
+ l2_m0_aw_done_SP[i] <= l2_m0_aw_done_SP[i] | l2_m0_aw_done[i];
+ l2_m1_aw_done_SP[i] <= l2_m1_aw_done_SP[i] | l2_m1_aw_done[i];
+ end
+ end
+ /*
+ * When accepting L1 transactions, we must just do so on the selected master. Drop requests must
+ * be performed on any one of the two masters. Save requests must be performed by both masters.
+ */
+ always_comb begin : AR_L1_SPLIT
+ // TLB handshake
+ l1_m0_ar_accept[i] = 1'b0;
+ l1_m1_ar_accept[i] = 1'b0;
+ l1_m0_ar_drop[i] = 1'b0;
+ l1_m1_ar_drop[i] = 1'b0;
+ l1_m0_ar_save[i] = 1'b0;
+ l1_m1_ar_save[i] = 1'b0;
+ l1_mx_ar_done[i] = 1'b0;
+ // AXI sender input handshake
+ int_m0_arvalid[i] = 1'b0;
+ int_m1_arvalid[i] = 1'b0;
+ int_arready[i] = 1'b0;
+ // accept on selected master only
+ if (l1_ar_accept[i]) begin
+ if (int_rmaster_select[i]) begin
+ l1_m1_ar_accept[i] = 1'b1;
+ l1_mx_ar_done[i] = l1_m1_ar_done[i];
+ int_m1_arvalid[i] = int_arvalid[i];
+ int_arready[i] = int_m1_arready[i];
+ end else begin
+ l1_m0_ar_accept[i] = 1'b1;
+ l1_mx_ar_done[i] = l1_m0_ar_done[i];
+ int_m0_arvalid[i] = int_arvalid[i];
+ int_arready[i] = int_m0_arready[i];
+ end
+ // drop on Master 0 only
+ end else if (l1_ar_drop[i]) begin
+ l1_m0_ar_drop[i] = 1'b1;
+ l1_mx_ar_done[i] = l1_m0_ar_done[i];
+ int_m0_arvalid[i] = int_arvalid[i];
+ int_arready[i] = l1_m0_ar_done[i];
+ // save on both masters
+ end else if (l1_ar_save[i]) begin
+ // split save
+ l1_m0_ar_save[i] = ~l1_m0_ar_done_SP[i];
+ l1_m1_ar_save[i] = ~l1_m1_ar_done_SP[i];
+ // combine done
+ l1_mx_ar_done[i] = l1_m0_ar_done_SP[i] & l1_m1_ar_done_SP[i];
+ int_m0_arvalid[i] = int_arvalid[i];
+ int_m1_arvalid[i] = int_arvalid[i];
+ int_arready[i] = l1_mx_ar_done[i];
+ end
+ end
+ // signal back to handshake splitter
+ assign l1_ar_done[i] = l1_mx_ar_done[i];
+ always_ff @(posedge Clk_CI) begin : L1_MX_AR_DONE_REG
+ if (Rst_RBI == 0) begin
+ l1_m0_ar_done_SP[i] <= 1'b0;
+ l1_m1_ar_done_SP[i] <= 1'b0;
+ end else if (l1_mx_ar_done[i]) begin
+ l1_m0_ar_done_SP[i] <= 1'b0;
+ l1_m1_ar_done_SP[i] <= 1'b0;
+ end else begin
+ l1_m0_ar_done_SP[i] <= l1_m0_ar_done_SP[i] | l1_m0_ar_done[i];
+ l1_m1_ar_done_SP[i] <= l1_m1_ar_done_SP[i] | l1_m1_ar_done[i];
+ end
+ end
+ /*
+ * When accepting L2 transactions, we must drop the corresponding transaction from the other
+ * master to make it available again for save requests from L1_DROP_SAVE.
+ */
+ always_comb begin : AR_L2_SPLIT
+ l2_m0_ar_accept[i] = 1'b0;
+ l2_m1_ar_accept[i] = 1'b0;
+ l2_m0_ar_drop[i] = 1'b0;
+ l2_m1_ar_drop[i] = 1'b0;
+ // de-assert request signals individually upon handshakes
+ if (l2_ar_accept[i]) begin
+ if (l2_master_select[i]) begin
+ l2_m1_ar_accept[i] = ~l2_m1_ar_done_SP[i];
+ l2_m0_ar_drop[i] = ~l2_m0_ar_done_SP[i];
+ end else begin
+ l2_m0_ar_accept[i] = ~l2_m0_ar_done_SP[i];
+ l2_m1_ar_drop[i] = ~l2_m1_ar_done_SP[i];
+ end
+ end else if (l2_ar_drop[i]) begin
+ l2_m0_ar_drop[i] = ~l2_m0_ar_done_SP[i] ? l2_ar_drop[i] : 1'b0;
+ l2_m1_ar_drop[i] = ~l2_m1_ar_done_SP[i] ? l2_ar_drop[i] : 1'b0;
+ end
+ // combine done
+ l2_mx_ar_done[i] = l2_m0_ar_done_SP[i] & l2_m1_ar_done_SP[i];
+ l2_ar_done[i] = l2_mx_ar_done[i];
+ end
+ always_ff @(posedge Clk_CI) begin : L2_MX_AR_DONE_REG
+ if (Rst_RBI == 0) begin
+ l2_m0_ar_done_SP[i] <= 1'b0;
+ l2_m1_ar_done_SP[i] <= 1'b0;
+ end else if (l2_mx_ar_done[i]) begin
+ l2_m0_ar_done_SP[i] <= 1'b0;
+ l2_m1_ar_done_SP[i] <= 1'b0;
+ end else begin
+ l2_m0_ar_done_SP[i] <= l2_m0_ar_done_SP[i] | l2_m0_ar_done[i];
+ l2_m1_ar_done_SP[i] <= l2_m1_ar_done_SP[i] | l2_m1_ar_done[i];
+ end
+ end
+ end // AX_SPLIT
+ endgenerate // AX_SPLIT
+ // }}}
+ // ██╗ ██╗███████╗ ███████╗██████╗ ██╗ ██╗████████╗
+ // ██║ ██║██╔════╝ ██╔════╝██╔══██╗██║ ██║╚══██╔══╝
+ // ███████║███████╗ ███████╗██████╔╝██║ ██║ ██║
+ // ██╔══██║╚════██║ ╚════██║██╔═══╝ ██║ ██║ ██║
+ // ██║ ██║███████║ ███████║██║ ███████╗██║ ██║
+ // ╚═╝ ╚═╝╚══════╝ ╚══════╝╚═╝ ╚══════╝╚═╝ ╚═╝
+ //
+ /*
+ * We need to perform combined handshakes with multiple AXI modules
+ * upon transactions drops, accepts, saves etc. from two TLBs.
+ */
+ generate for (i = 0; i < N_PORTS; i++) begin : HANDSHAKE_SPLIT
+ assign l1_xw_accept[i] = int_wtrans_accept[i] & ~aw_out_stall[i];
+ assign int_wtrans_sent[i] = l1_xw_done[i];
+ assign l1_ar_accept[i] = int_rtrans_accept[i];
+ assign int_rtrans_sent[i] = l1_ar_done[i];
+ /*
+ * L1 AW sender + W buffer handshake split
+ */
+ // forward
+ assign l1_aw_accept[i] = l1_xw_accept[i] & ~l1_aw_done_SP[i];
+ assign l1_w_accept[i] = l1_xw_accept[i] & ~l1_w_done_SP[i];
+ assign l1_aw_save[i] = l1_xw_save[i] & ~l1_aw_done_SP[i];
+ assign l1_w_save[i] = l1_xw_save[i] & ~l1_w_done_SP[i];
+ assign l1_aw_drop[i] = l1_xw_drop[i] & ~l1_aw_done_SP[i];
+ assign l1_w_drop[i] = l1_xw_drop[i] & ~l1_w_done_SP[i];
+ // backward
+ assign l1_xw_done[i] = l1_aw_done_SP[i] & l1_w_done_SP[i];
+ always_ff @(posedge Clk_CI) begin : L1_XW_HS_SPLIT
+ if (Rst_RBI == 0) begin
+ l1_aw_done_SP[i] <= 1'b0;
+ l1_w_done_SP[i] <= 1'b0;
+ end else if (l1_xw_done[i]) begin
+ l1_aw_done_SP[i] <= 1'b0;
+ l1_w_done_SP[i] <= 1'b0;
+ end else begin
+ l1_aw_done_SP[i] <= l1_aw_done_SP[i] | l1_aw_done[i];
+ l1_w_done_SP[i] <= l1_w_done_SP[i] | l1_w_done[i];
+ end
+ end
+ if (ENABLE_L2TLB[i] == 1) begin : L2_HS_SPLIT
+ /*
+ * L1 AR sender + R sender handshake split
+ *
+ * AR and R do not need to be strictly in sync. We thus use separate handshakes.
+ * But the handshake signals for the R sender are multiplexed with the those for
+ * the L2. However, L2_ACCEPT_DROP_SAVE has always higher priority.
+ */
+ assign lx_r_drop[i] = l2_r_drop[i] | l1_r_drop[i];
+ assign l1_r_done[i] = l2_r_drop[i] ? 1'b0 : lx_r_done[i];
+ assign l2_r_done[i] = l2_r_drop[i] ? lx_r_done[i] : 1'b0;
+ /*
+ * L2 AW sender + W buffer handshake split
+ */
+ // forward
+ assign l2_aw_accept[i] = l2_xw_accept[i] & ~l2_aw_done_SP[i];
+ assign l2_w_accept[i] = l2_xw_accept[i] & ~l2_w_done_SP[i];
+ assign l2_aw_drop[i] = l2_xw_drop[i] & ~l2_aw_done_SP[i];
+ assign l2_w_drop[i] = l2_xw_drop[i] & ~l2_w_done_SP[i];
+ // backward
+ assign l2_xw_done[i] = l2_aw_done_SP[i] & l2_w_done_SP[i];
+ always_ff @(posedge Clk_CI) begin : L2_XW_HS_SPLIT
+ if (Rst_RBI == 0) begin
+ l2_aw_done_SP[i] <= 1'b0;
+ l2_w_done_SP[i] <= 1'b0;
+ end else if (l2_xw_done[i]) begin
+ l2_aw_done_SP[i] <= 1'b0;
+ l2_w_done_SP[i] <= 1'b0;
+ end else begin
+ l2_aw_done_SP[i] <= l2_aw_done_SP[i] | l2_aw_done[i];
+ l2_w_done_SP[i] <= l2_w_done_SP[i] | l2_w_done[i];
+ end
+ end
+ /*
+ * L2 AR + R sender handshake split
+ */
+ // forward
+ assign l2_ar_drop[i] = l2_xr_drop[i] & ~l2_ar_done_SP[i];
+ assign l2_r_drop[i] = l2_xr_drop[i] & ~l2_r_done_SP[i];
+ // backward - make sure to always clear L2_XR_HS_SPLIT
+ always_comb begin
+ if (l2_xr_drop[i]) begin
+ l2_xr_done[i] = l2_ar_done_SP[i] & l2_r_done_SP[i];
+ end else begin
+ l2_xr_done[i] = l2_ar_done_SP[i];
+ end
+ end
+ always_ff @(posedge Clk_CI) begin : L2_XR_HS_SPLIT
+ if (Rst_RBI == 0) begin
+ l2_ar_done_SP[i] <= 1'b0;
+ l2_r_done_SP[i] <= 1'b0;
+ end else if (l2_xr_done[i]) begin
+ l2_ar_done_SP[i] <= 1'b0;
+ l2_r_done_SP[i] <= 1'b0;
+ end else begin
+ l2_ar_done_SP[i] <= l2_ar_done_SP[i] | l2_ar_done[i];
+ l2_r_done_SP[i] <= l2_r_done_SP[i] | l2_r_done[i];
+ end
+ end
+ end else begin // if (ENABLE_L2TLB[i] == 1)
+ assign lx_r_drop[i] = l1_r_drop[i];
+ assign l1_r_done[i] = lx_r_done[i];
+ assign l2_aw_accept[i] = 1'b0;
+ assign l2_w_accept[i] = 1'b0;
+ assign l2_aw_drop[i] = 1'b0;
+ assign l2_w_drop[i] = 1'b0;
+ assign l2_xw_done[i] = 1'b0;
+ assign l2_aw_done_SP[i] = 1'b0;
+ assign l2_w_done_SP[i] = 1'b0;
+ assign l2_ar_accept[i] = 1'b0;
+ assign l2_ar_drop[i] = 1'b0;
+ assign l2_r_drop[i] = 1'b0;
+ assign l2_xr_done[i] = 1'b0;
+ assign l2_r_done[i] = 1'b0;
+ assign l2_ar_done_SP[i] = 1'b0;
+ assign l2_r_done_SP[i] = 1'b0;
+ end // if (ENABLE_L2TLB[i] == 1)
+ endgenerate // HANDSHAKE_SPLIT
+ // }}}
+ // L2 TLB {{{
+ // ██╗ ██████╗ ████████╗██╗ ██████╗
+ // ██║ ╚════██╗ ╚══██╔══╝██║ ██╔══██╗
+ // ██║ █████╔╝ ██║ ██║ ██████╔╝
+ // ██║ ██╔═══╝ ██║ ██║ ██╔══██╗
+ // ███████╗███████╗ ██║ ███████╗██████╔╝
+ // ╚══════╝╚══════╝ ╚═╝ ╚══════╝╚═════╝
+ //
+ /*
+ * l2_tlb
+ *
+ * The L2 TLB translates addresses upon misses in the L1 TLB (rab_core).
+ *
+ * It supports one ongoing translation at a time. If an L1 miss occurs while the L2 is busy,
+ * the L1 is stalled untill the L2 is available again.
+ *
+ */
+ generate for (i = 0; i < N_PORTS; i++) begin : L2_TLB
+ if (ENABLE_L2TLB[i] == 1) begin : L2_TLB
+ /*
+ * L1 output selector
+ */
+ assign L1OutRwType_D[i] = int_wtrans_drop[i] ? 1'b1 : 1'b0;
+ assign L1OutProt_D[i] = rab_prot[i];
+ assign L1OutMulti_D[i] = rab_multi[i];
+ /*
+ * L1 output control + L1_DROP_BUF, L2_IN_BUF management
+ *
+ * Forward the L1 drop request to AR/AW sender modules if
+ * 1. the transactions needs to be dropped (L1 multi, prot, prefetch), or
+ * 2. if a lookup in the L2 TLB is required (L1 miss) and the input buffer is not full.
+ *
+ * The AR/AW senders do not support more than 1 oustanding L1 miss. The push back towards
+ * the upstream is realized by not accepting the save request (saving the L1 transaction)
+ * in the senders as long as the L2 TLB is busy or has valid output. This ultimately
+ * blocks the L1 TLB.
+ *
+ * Together with the AW drop/save, we also perform the W drop/save as AW and W need to
+ * absolutely remain in order. In contrast, the R drop is performed
+ */
+ always_comb begin : L1_DROP_SAVE
+ l1_ar_drop[i] = 1'b0;
+ l1_ar_save[i] = 1'b0;
+ l1_xw_drop[i] = 1'b0;
+ l1_xw_save[i] = 1'b0;
+ l1_id_drop[i] = L1OutId_D[i];
+ l1_len_drop[i] = L1OutLen_D[i];
+ l1_prefetch_drop[i] = rab_prefetch[i];
+ l1_hit_drop[i] = 1'b1; // there are no drops for L1 misses
+ L1DropEn_S[i] = 1'b0;
+ L2InEn_S[i] = 1'b0;
+ if ( rab_prot[i] | rab_multi[i] | rab_prefetch[i] ) begin
+ // 1. Drop
+ l1_ar_drop[i] = int_rtrans_drop[i] & ~L1DropValid_SP[i];
+ l1_xw_drop[i] = int_wtrans_drop[i] & ~L1DropValid_SP[i];
+ // Store to L1_DROP_BUF upon handshake
+ L1DropEn_S[i] = (l1_ar_drop[i] & l1_ar_done[i]) |
+ (l1_xw_drop[i] & l1_xw_done[i]);
+ end else if ( rab_miss[i] ) begin
+ // 2. Save - Make sure L2 is really available.
+ l1_ar_save[i] = int_rtrans_drop[i] & ~L2Busy_S[i];
+ l1_xw_save[i] = int_wtrans_drop[i] & ~L2Busy_S[i];
+ // Store to L2_IN_BUF upon handshake - triggers the L2 TLB
+ L2InEn_S[i] = (l1_ar_save[i] & l1_ar_done[i]) |
+ (l1_xw_save[i] & l1_xw_done[i]);
+ end
+ end
+ /*
+ * L2 output control + L2_OUT_BUF management + R/B sender control + W buffer control
+ *
+ * Perform L1 R transaction drops unless the L2 output buffer holds valid data. The AXI specs
+ * require the B response to be sent only after consuming/discarding the corresponding data
+ * in the W channel. Thus, we only send L2 drop request to the W buffer here. The drop
+ * request to the B sender is then sent by the W buffer autonomously.
+ *
+ * L1 AW/W drop requests are managed by L1_DROP_SAVE.
+ */
+ always_comb begin : L2_ACCEPT_DROP_SAVE
+ l2_ar_addr[i] = 'b0;
+ l2_aw_addr[i] = 'b0;
+ l2_ar_accept[i] = 1'b0;
+ l2_xr_drop[i] = 1'b0;
+ l2_xw_accept[i] = 1'b0;
+ l2_xw_drop[i] = 1'b0;
+ l1_r_drop[i] = 1'b0;
+ lx_id_drop[i] = 'b0;
+ lx_len_drop[i] = 'b0;
+ lx_prefetch_drop[i] = 1'b0;
+ lx_hit_drop[i] = 1'b0;
+ L1DropValid_SN[i] = L1DropValid_SP[i] | L1DropEn_S[i];
+ L2OutValid_SN[i] = L2OutValid_SP[i];
+ L2OutReady_S[i] = 1'b0;
+ L2OutEn_S[i] = 1'b0;
+ L2Miss_S[i] = 1'b0;
+ int_multi[i] = 1'b0;
+ int_prot[i] = 1'b0;
+ if (L2OutValid_SP[i] == 1'b0) begin
+ // Drop L1 from R senders
+ if (L1DropValid_SP[i] == 1'b1) begin
+ // Only perform the R sender drop here.
+ if (~L1DropRwType_DP[i]) begin
+ l1_r_drop[i] = 1'b1;
+ lx_id_drop[i] = L1DropId_DP[i];
+ lx_len_drop[i] = L1DropLen_DP[i];
+ lx_prefetch_drop[i] = L1DropPrefetch_S[i];
+ lx_hit_drop[i] = 1'b1; // there are no drops for L1 misses
+ // Invalidate L1_DROP_BUF upon handshake
+ if ( l1_r_drop[i] & l1_r_done[i] ) begin
+ L1DropValid_SN[i] = 1'b0;
+ int_prot[i] = L1DropProt_DP[i];
+ int_multi[i] = L1DropMulti_DP[i];
+ end
+ end else begin
+ // Invalidate L1_DROP_BUF
+ L1DropValid_SN[i] = 1'b0;
+ int_prot[i] = L1DropProt_DP[i];
+ int_multi[i] = L1DropMulti_DP[i];
+ end
+ end
+ end else begin // L2_OUT_BUF has valid data
+ if ( L2OutHit_SP[i] & ~(L2OutPrefetch_S[i] | L2OutProt_SP[i] | L2OutMulti_SP[i]) ) begin
+ l2_ar_addr[i] = L2OutAddr_DP[i];
+ l2_aw_addr[i] = L2OutAddr_DP[i];
+ l2_ar_accept[i] = L2OutRwType_DP[i] ? 1'b0 : 1'b1;
+ l2_xw_accept[i] = L2OutRwType_DP[i] ? 1'b1 : 1'b0;
+ // Invalidate L2_OUT_BUF upon handshake
+ L2OutValid_SN[i] = ~( (l2_ar_accept[i] & l2_ar_done[i]) |
+ (l2_xw_accept[i] & l2_xw_done[i]) );
+ end else begin
+ lx_id_drop[i] = L2OutId_DP[i];
+ lx_len_drop[i] = L2OutLen_DP[i];
+ lx_prefetch_drop[i] = L2OutPrefetch_S[i];
+ lx_hit_drop[i] = L2OutHit_SP[i];
+ // The l2_xr_drop will also perform the handshake with the R sender
+ l2_xr_drop[i] = L2OutRwType_DP[i] ? 1'b0 : 1'b1;
+ l2_xw_drop[i] = L2OutRwType_DP[i] ? 1'b1 : 1'b0;
+ // Invalidate L1_DROP_BUF upon handshake
+ if ( (l2_xr_drop[i] & l2_xr_done[i]) | (l2_xw_drop[i] & l2_xw_done[i]) ) begin
+ L2OutValid_SN[i] = 1'b0;
+ L2Miss_S[i] = ~L2OutHit_SP[i];
+ int_prot[i] = L2OutProt_SP[i];
+ int_multi[i] = L2OutMulti_SP[i];
+ end
+ end
+ end
+ // Only accept new L2 output after ongoing drops have finished.
+ if ( (l2_xr_drop[i] == l2_xr_done[i]) &
+ (l2_xw_drop[i] == l2_xw_done[i]) &
+ (l1_r_drop[i] == l1_r_done[i] ) ) begin
+ // Store to L2_OUT_BUF upon handshake with L2 TLB module
+ if ( (L2OutValid_SP[i] == 1'b0) && (L2OutValid_S[i] == 1'b1) ) begin
+ L2OutValid_SN[i] = 1'b1;
+ L2OutReady_S[i] = 1'b1;
+ L2OutEn_S[i] = 1'b1;
+ end
+ end
+ end
+ /*
+ * L1 drop buffer
+ *
+ * Used in case of multi, prot and prefetch hits in the L1 TLB.
+ */
+ always_ff @(posedge Clk_CI) begin : L1_DROP_BUF
+ if (Rst_RBI == 0) begin
+ L1DropProt_DP[i] <= 1'b0;
+ L1DropMulti_DP[i] <= 1'b0;
+ L1DropRwType_DP[i] <= 1'b0;
+ L1DropUser_DP[i] <= 'b0;
+ L1DropId_DP[i] <= 'b0;
+ L1DropLen_DP[i] <= 'b0;
+ L1DropAddr_DP[i] <= 'b0;
+ end else if (L1DropEn_S[i] == 1'b1) begin
+ L1DropProt_DP[i] <= L1OutProt_D[i] ;
+ L1DropMulti_DP[i] <= L1OutMulti_D[i] ;
+ L1DropRwType_DP[i] <= L1OutRwType_D[i];
+ L1DropUser_DP[i] <= L1OutUser_D[i] ;
+ L1DropId_DP[i] <= L1OutId_D[i] ;
+ L1DropLen_DP[i] <= L1OutLen_D[i] ;
+ L1DropAddr_DP[i] <= L1OutAddr_D[i] ;
+ end
+ end // always_ff @ (posedge Clk_CI)
+ /*
+ * L2 input buffer
+ *
+ * Make sure there are no combinational paths between L1 TLB/inputs and L2 TLB.
+ */
+ always_ff @(posedge Clk_CI) begin : L2_IN_BUF
+ if (Rst_RBI == 0) begin
+ L2InRwType_DP[i] <= 1'b0;
+ L2InUser_DP[i] <= 'b0;
+ L2InId_DP[i] <= 'b0;
+ L2InLen_DP[i] <= 'b0;
+ L2InAddr_DP[i] <= 'b0;
+ end else if (L2InEn_S[i] == 1'b1) begin
+ L2InRwType_DP[i] <= L1OutRwType_D[i];
+ L2InUser_DP[i] <= L1OutUser_D[i] ;
+ L2InId_DP[i] <= L1OutId_D[i] ;
+ L2InLen_DP[i] <= L1OutLen_D[i] ;
+ L2InAddr_DP[i] <= L1OutAddr_D[i] ;
+ end
+ end // always_ff @ (posedge Clk_CI)
+ l2_tlb
+ #(
+ .N_SETS ( `RAB_L2_N_SETS ),
+ )
+ u_l2_tlb
+ (
+ .clk_i ( Clk_CI ),
+ .rst_ni ( Rst_RBI ),
+ // Config inputs
+ .we_i ( L2CfgWE_S[i] ),
+ .waddr_i ( L2CfgWAddr_D[i] ),
+ .wdata_i ( L2CfgWData_D[i] ),
+ // Request input
+ .start_i ( L2InEn_S[i] ),
+ .busy_o ( L2Busy_S[i] ),
+ .rw_type_i ( L2InRwType_DP[i] ),
+ .in_addr_i ( L2InAddr_DP[i] ),
+ // Response output
+ .out_ready_i ( L2OutReady_S[i] ),
+ .out_valid_o ( L2OutValid_S[i] ),
+ .hit_o ( L2OutHit_SN[i] ),
+ .miss_o ( L2OutMiss_SN[i] ),
+ .prot_o ( L2OutProt_SN[i] ),
+ .multi_o ( L2OutMulti_SN[i] ),
+ .cache_coherent_o ( L2OutCC_SN[i] ),
+ .out_addr_o ( L2OutAddr_DN[i] )
+ );
+ /*
+ * L2 output buffer
+ *
+ * Make sure there are no combinational paths between L1 TLB/inputs and L2 TLB.
+ */
+ always_ff @(posedge Clk_CI) begin : L2_OUT_BUF
+ if (Rst_RBI == 0) begin
+ L2OutRwType_DP[i] <= 1'b0;
+ L2OutUser_DP[i] <= 'b0;
+ L2OutLen_DP[i] <= 'b0;
+ L2OutId_DP[i] <= 'b0;
+ L2OutInAddr_DP[i] <= 'b0;
+ L2OutHit_SP[i] <= 1'b0;
+ L2OutMiss_SP[i] <= 1'b0;
+ L2OutProt_SP[i] <= 1'b0;
+ L2OutMulti_SP[i] <= 1'b0;
+ L2OutCC_SP[i] <= 1'b0;
+ L2OutAddr_DP[i] <= 'b0;
+ end else if (L2OutEn_S[i] == 1'b1) begin
+ L2OutRwType_DP[i] <= L2InRwType_DP[i];
+ L2OutUser_DP[i] <= L2InUser_DP[i] ;
+ L2OutLen_DP[i] <= L2InLen_DP[i] ;
+ L2OutId_DP[i] <= L2InId_DP[i] ;
+ L2OutInAddr_DP[i] <= L2InAddr_DP[i] ;
+ L2OutHit_SP[i] <= L2OutHit_SN[i] ;
+ L2OutMiss_SP[i] <= L2OutMiss_SN[i] ;
+ L2OutProt_SP[i] <= L2OutProt_SN[i] ;
+ L2OutMulti_SP[i] <= L2OutMulti_SN[i];
+ L2OutCC_SP[i] <= L2OutCC_SN[i] ;
+ L2OutAddr_DP[i] <= L2OutAddr_DN[i] ;
+ end
+ end // always_ff @ (posedge Clk_CI)
+ always_ff @(posedge Clk_CI) begin : BUF_VALID
+ if (Rst_RBI == 0) begin
+ L1DropValid_SP[i] = 1'b0;
+ L2OutValid_SP[i] = 1'b0;
+ end else begin
+ L1DropValid_SP[i] = L1DropValid_SN[i];
+ L2OutValid_SP[i] = L2OutValid_SN[i];
+ end
+ end
+ always_comb begin : BUF_TO_PREFETCH
+ // L1 Drop Buf
+ if (L1DropUser_DP[i] == {AXI_USER_WIDTH{1'b1}})
+ L1DropPrefetch_S[i] = 1'b1;
+ else
+ L1DropPrefetch_S[i] = 1'b0;
+ // L2 Out Buf
+ if (L2OutUser_DP[i] == {AXI_USER_WIDTH{1'b1}})
+ L2OutPrefetch_S[i] = 1'b1;
+ else
+ L2OutPrefetch_S[i] = 1'b0;
+ end
+ assign l2_cache_coherent[i] = L2OutCC_SP[i];
+ assign int_miss[i] = L2Miss_S[i];
+ end else begin : L2_TLB_STUB // if (ENABLE_L2TLB[i] == 1)
+ assign l1_ar_drop[i] = int_rtrans_drop[i];
+ assign l1_r_drop[i] = int_rtrans_drop[i];
+ assign l1_xw_drop[i] = int_wtrans_drop[i];
+ assign l1_ar_save[i] = 1'b0;
+ assign l1_xw_save[i] = 1'b0;
+ assign l2_xw_accept[i] = 1'b0;
+ assign l2_xr_drop[i] = 1'b0;
+ assign l2_xw_drop[i] = 1'b0;
+ assign l2_ar_addr[i] = 'b0;
+ assign l2_aw_addr[i] = 'b0;
+ assign l1_id_drop[i] = int_wtrans_drop[i] ? int_awid[i] :
+ int_rtrans_drop[i] ? int_arid[i] :
+ '0;
+ assign l1_len_drop[i] = int_wtrans_drop[i] ? int_awlen[i] :
+ int_rtrans_drop[i] ? int_arlen[i] :
+ '0;
+ assign l1_prefetch_drop[i] = rab_prefetch[i];
+ assign l1_hit_drop[i] = ~rab_miss[i];
+ assign lx_id_drop[i] = int_wtrans_drop[i] ? int_awid[i] :
+ int_rtrans_drop[i] ? int_arid[i] :
+ '0;
+ assign lx_len_drop[i] = int_wtrans_drop[i] ? int_awlen[i] :
+ int_rtrans_drop[i] ? int_arlen[i] :
+ '0;
+ assign lx_prefetch_drop[i] = rab_prefetch[i];
+ assign lx_hit_drop[i] = ~rab_miss[i];
+ assign l2_cache_coherent[i] = 1'b0;
+ assign int_miss[i] = rab_miss[i];
+ assign int_prot[i] = rab_prot[i];
+ assign int_multi[i] = rab_multi[i];
+ // unused signals
+ assign L2Miss_S[i] = 1'b0;
+ assign L1OutRwType_D[i] = 1'b0;
+ assign L1OutProt_D[i] = 1'b0;
+ assign L1OutMulti_D[i] = 1'b0;
+ assign L1DropRwType_DP[i] = 1'b0;
+ assign L1DropUser_DP[i] = 'b0;
+ assign L1DropId_DP[i] = 'b0;
+ assign L1DropLen_DP[i] = 'b0;
+ assign L1DropAddr_DP[i] = 'b0;
+ assign L1DropProt_DP[i] = 1'b0;
+ assign L1DropMulti_DP[i] = 1'b0;
+ assign L1DropEn_S[i] = 1'b0;
+ assign L1DropPrefetch_S[i] = 1'b0;
+ assign L1DropValid_SN[i] = 1'b0;
+ assign L1DropValid_SP[i] = 1'b0;
+ assign L2InRwType_DP[i] = 1'b0;
+ assign L2InUser_DP[i] = 'b0;
+ assign L2InId_DP[i] = 'b0;
+ assign L2InLen_DP[i] = 'b0;
+ assign L2InAddr_DP[i] = 'b0;
+ assign L2InEn_S[i] = 1'b0;
+ assign L2OutHit_SN[i] = 1'b0;
+ assign L2OutMiss_SN[i] = 1'b0;
+ assign L2OutProt_SN[i] = 1'b0;
+ assign L2OutMulti_SN[i] = 1'b0;
+ assign L2OutCC_SN[i] = 1'b0;
+ assign L2OutAddr_DN[i] = 'b0;
+ assign L2OutRwType_DP[i] = 1'b0;
+ assign L2OutUser_DP[i] = 'b0;
+ assign L2OutId_DP[i] = 'b0;
+ assign L2OutLen_DP[i] = 'b0;
+ assign L2OutInAddr_DP[i] = 'b0;
+ assign L2OutHit_SP[i] = 1'b0;
+ assign L2OutMiss_SP[i] = 1'b0;
+ assign L2OutProt_SP[i] = 1'b0;
+ assign L2OutMulti_SP[i] = 1'b0;
+ assign L2OutCC_SP[i] = 1'b0;
+ assign L2OutAddr_DP[i] = 'b0;
+ assign L2OutEn_S[i] = 1'b0;
+ assign L2OutPrefetch_S[i] = 1'b0;
+ assign L2Busy_S[i] = 1'b0;
+ assign L2OutValid_S[i] = 1'b0;
+ assign L2OutValid_SN[i] = 1'b0;
+ assign L2OutValid_SP[i] = 1'b0;
+ assign L2OutReady_S[i] = 1'b0;
+ end // !`ifdef ENABLE_L2TLB
+ end // for (i = 0; i < N_PORTS; i++)
+ endgenerate
+// }}}
+# endmodule
+# // vim: ts=2 sw=2 sts=2 et nosmartindent autoindent foldmethod=marker
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class check_ram(Elaboratable):
+ def __init__(self):
+ self.clk_i = Signal() # input
+ self.rst_ni = Signal() # input
+ self.in_addr = Signal(ADDR_WIDTH) # input
+ self.rw_type = Signal() # input
+ self.ram_we = Signal() # input
+ self.port0_addr = Signal(1+ERROR p_expression_25) # input
+ self.port1_addr = Signal(1+ERROR p_expression_25) # input
+ self.ram_wdata = Signal(RAM_DATA_WIDTH) # input
+ self.output_sent = Signal() # input
+ self.output_valid = Signal() # input
+ self.offset_addr_d = Signal(OFFSET_WIDTH) # input
+ self.hit_addr = Signal(1+ERROR p_expression_25) # output
+ self.master = Signal() # output
+ self.hit = Signal() # output
+ self.multi_hit = Signal() # output
+ self.prot = Signal() # output
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# //import CfMath::log2;
+# //`define MULTI_HIT_FULL_SET
+# module check_ram
+# //#(
+# // parameter ADDR_WIDTH = 32,
+# // parameter RAM_DATA_WIDTH = 32,
+# // parameter PAGE_SIZE = 4096, // 4kB
+# // parameter SET_WIDTH = 5,
+# // parameter OFFSET_WIDTH = 4
+# // )
+# (
+# input logic clk_i,
+# input logic rst_ni,
+# input logic [ADDR_WIDTH-1:0] in_addr,
+# input logic rw_type, // 1 => write, 0=> read
+# input logic ram_we,
+# input logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr,
+# input logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port1_addr,
+# input logic [RAM_DATA_WIDTH-1:0] ram_wdata,
+# input logic output_sent,
+# input logic output_valid,
+# input logic [OFFSET_WIDTH-1:0] offset_addr_d,
+# output logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr,
+# output logic master,
+# output logic hit,
+# output logic multi_hit,
+# output logic prot
+# );
+""" #docstring_begin
+ localparam IGNORE_LSB = log2(PAGE_SIZE); // 12
+ logic [RAM_DATA_WIDTH-1:0] port0_data_o, port1_data_o; // RAM read data outputs
+ logic port0_hit, port1_hit; // Ram output matches in_addr
+ logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr_saved, port1_addr_saved;
+ // Hit FSM Signals
+ typedef enum logic {SEARCH, HIT} hit_state_t;
+ hit_state_t hit_SP; // Hit FSM state
+ hit_state_t hit_SN; // Hit FSM next state
+ // Multi Hit FSM signals
+ typedef enum logic[1:0] {NO_HITS, ONE_HIT, MULTI_HIT} multi_state_t;
+ multi_state_t multi_SP; // Multi Hit FSM state
+ multi_state_t multi_SN; // Multi Hit FSM next state
+ logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr_saved;
+ logic master_saved;
+ //// --------------- Block RAM (Dual Port) -------------- ////
+ // The outputs of the BRAMs are only valid if in the previous cycle:
+ // 1. the inputs were valid, and
+ // 2. the BRAM was not written to.
+ // Otherwise, the outputs must be ignored which is controlled by the output_valid signal.
+ // This signal is driven by the uppler level L2 TLB module.
+ ram_tp_no_change #(
+ )
+ ram_tp_no_change_0
+ (
+ .clk ( clk_i ),
+ .we ( ram_we ),
+ .addr0 ( port0_addr ),
+ .addr1 ( port1_addr ),
+ .d_i ( ram_wdata ),
+ .d0_o ( port0_data_o ),
+ .d1_o ( port1_data_o )
+ );
+ //// Check Ram Outputs
+ assign port0_hit = (port0_data_o[0] == 1'b1) && (in_addr[ADDR_WIDTH-1: IGNORE_LSB] == port0_data_o[RAM_DATA_WIDTH-1:4]);
+ assign port1_hit = (port1_data_o[0] == 1'b1) && (in_addr[ADDR_WIDTH-1: IGNORE_LSB] == port1_data_o[RAM_DATA_WIDTH-1:4]);
+ //// ----------------------------------------------------- /////
+ //// ------------------- Check if Hit ------------------------ ////
+ // FSM
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ hit_SP <= SEARCH;
+ end else begin
+ hit_SP <= hit_SN;
+ end
+ end
+ always_ff @(posedge clk_i, negedge rst_ni) begin
+ if (!rst_ni) begin
+ port0_addr_saved <= '0;
+ port1_addr_saved <= '0;
+ end else begin
+ port0_addr_saved <= port0_addr;
+ port1_addr_saved <= port1_addr;
+ end
+ end
+ always_comb begin
+ hit_SN = hit_SP;
+ hit = 1'b0;
+ hit_addr = 0;
+ master = 1'b0;
+ unique case(hit_SP)
+ if (output_valid)
+ if (port0_hit || port1_hit) begin
+ hit_SN = HIT;
+ hit = 1'b1;
+ hit_addr = port0_hit ? {port0_addr_saved[SET_WIDTH+OFFSET_WIDTH:OFFSET_WIDTH], offset_addr_d} :
+ port1_hit ? {port1_addr_saved[SET_WIDTH+OFFSET_WIDTH:OFFSET_WIDTH], offset_addr_d} :
+ 0;
+ master = port0_hit ? port0_data_o[3] :
+ port1_hit ? port1_data_o[3] :
+ 1'b0;
+ end
+ HIT : begin
+`ifdef MULTI_HIT_FULL_SET // Since the search continues after the first hit, it needs to be saved to be accessed later.
+ hit = 1'b1;
+ hit_addr = hit_addr_saved;
+ master = master_saved;
+ if (output_sent)
+ hit_SN = SEARCH;
+ end
+ default : begin
+ hit_SN = SEARCH;
+ end
+ endcase // case (hit_SP)
+ end // always_comb begin
+ //// ------------------------------------------- ////
+ assign prot = output_valid && port0_hit ? ((~port0_data_o[2] && rw_type) || (~port0_data_o[1] && ~rw_type)) :
+ output_valid && port1_hit ? ((~port1_data_o[2] && rw_type) || (~port1_data_o[1] && ~rw_type)) :
+ 1'b0;
+ //// ------------------- Multi ------------------- ////
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ hit_addr_saved <= 0;
+ master_saved <= 1'b0;
+ end else if (output_valid) begin
+ hit_addr_saved <= hit_addr;
+ master_saved <= master;
+ end
+ end
+ // FSM
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ multi_SP <= NO_HITS;
+ end else begin
+ multi_SP <= multi_SN;
+ end
+ end
+ always_comb begin
+ multi_SN = multi_SP;
+ multi_hit = 1'b0;
+ unique case(multi_SP)
+ if(output_valid && (port0_hit && port1_hit)) begin
+ multi_SN = MULTI_HIT;
+ multi_hit = 1'b1;
+ end else if(output_valid && (port0_hit || port1_hit))
+ multi_SN = ONE_HIT;
+ if(output_valid && (port0_hit || port1_hit)) begin
+ multi_SN = MULTI_HIT;
+ multi_hit = 1'b1;
+ end else if (output_sent)
+ multi_SN = NO_HITS;
+ MULTI_HIT : begin
+ multi_hit = 1'b1;
+ if (output_sent)
+ multi_SN = NO_HITS;
+ end
+ endcase // case (multi_SP)
+ end // always_comb begin
+`else // !`ifdef MULTI_HIT_FULL_SET
+ assign multi_hit = output_valid && port0_hit && port1_hit;
+`endif // !`ifdef MULTI_HIT_FULL_SET
+ //// ------------------------------------------- ////
+# endmodule
--- /dev/null
+class CoreConfig:
+ def __init__(self):
+ self.N_SLICES = 16
+ self.N_REGS = 4*self.N_SLICES
+ self.ADDR_WIDTH_PHYS = 40
+ self.ADDR_WIDTH_VIRT = 32
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class fsm(Elaboratable):
+ def __init__(self):
+ self.Clk_CI = Signal() # input
+ self.Rst_RBI = Signal() # input
+ self.port1_addr_valid_i = Signal() # input
+ self.port2_addr_valid_i = Signal() # input
+ self.port1_sent_i = Signal() # input
+ self.port2_sent_i = Signal() # input
+ self.select_i = Signal() # input
+ self.no_hit_i = Signal() # input
+ self.multi_hit_i = Signal() # input
+ self.no_prot_i = Signal() # input
+ self.prefetch_i = Signal() # input
+ self.out_addr_i = Signal(AXI_M_ADDR_WIDTH) # input
+ self.cache_coherent_i = Signal() # input
+ self.port1_accept_o = Signal() # output
+ self.port1_drop_o = Signal() # output
+ self.port1_miss_o = Signal() # output
+ self.port2_accept_o = Signal() # output
+ self.port2_drop_o = Signal() # output
+ self.port2_miss_o = Signal() # output
+ self.out_addr_o = Signal(AXI_M_ADDR_WIDTH) # output
+ self.cache_coherent_o = Signal() # output
+ self.miss_o = Signal() # output
+ self.multi_o = Signal() # output
+ self.prot_o = Signal() # output
+ self.prefetch_o = Signal() # output
+ self.in_addr_i = Signal(AXI_S_ADDR_WIDTH) # input
+ self.in_id_i = Signal(AXI_ID_WIDTH) # input
+ self.in_len_i = Signal(8) # input
+ self.in_user_i = Signal(AXI_USER_WIDTH) # input
+ self.in_addr_o = Signal(AXI_S_ADDR_WIDTH) # output
+ self.in_id_o = Signal(AXI_ID_WIDTH) # output
+ self.in_len_o = Signal(8) # output
+ self.in_user_o = Signal(AXI_USER_WIDTH) # output
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# //`timescale 1ns / 1ps
+# module fsm
+# #(
+# parameter AXI_M_ADDR_WIDTH = 40,
+# parameter AXI_S_ADDR_WIDTH = 32,
+# parameter AXI_ID_WIDTH = 8,
+# parameter AXI_USER_WIDTH = 6
+# )
+# (
+# input logic Clk_CI,
+# input logic Rst_RBI,
+# input logic port1_addr_valid_i,
+# input logic port2_addr_valid_i,
+# input logic port1_sent_i,
+# input logic port2_sent_i,
+# input logic select_i,
+# input logic no_hit_i,
+# input logic multi_hit_i,
+# input logic no_prot_i,
+# input logic prefetch_i,
+# input logic [AXI_M_ADDR_WIDTH-1:0] out_addr_i,
+# input logic cache_coherent_i,
+# output logic port1_accept_o,
+# output logic port1_drop_o,
+# output logic port1_miss_o,
+# output logic port2_accept_o,
+# output logic port2_drop_o,
+# output logic port2_miss_o,
+# output logic [AXI_M_ADDR_WIDTH-1:0] out_addr_o,
+# output logic cache_coherent_o,
+# output logic miss_o,
+# output logic multi_o,
+# output logic prot_o,
+# output logic prefetch_o,
+# input logic [AXI_S_ADDR_WIDTH-1:0] in_addr_i,
+# input logic [AXI_ID_WIDTH-1:0] in_id_i,
+# input logic [7:0] in_len_i,
+# input logic [AXI_USER_WIDTH-1:0] in_user_i,
+# output logic [AXI_S_ADDR_WIDTH-1:0] in_addr_o,
+# output logic [AXI_ID_WIDTH-1:0] in_id_o,
+# output logic [7:0] in_len_o,
+# output logic [AXI_USER_WIDTH-1:0] in_user_o
+# );
+""" #docstring_begin
+ //-------------Internal Signals----------------------
+ typedef enum logic {IDLE, WAIT} state_t;
+ logic state_SP; // Present state
+ logic state_SN; // Next State
+ logic port1_accept_SN;
+ logic port1_drop_SN;
+ logic port1_miss_SN;
+ logic port2_accept_SN;
+ logic port2_drop_SN;
+ logic port2_miss_SN;
+ logic miss_SN;
+ logic multi_SN;
+ logic prot_SN;
+ logic prefetch_SN;
+ logic cache_coherent_SN;
+ logic [AXI_M_ADDR_WIDTH-1:0] out_addr_DN;
+ logic out_reg_en_S;
+ //----------FSM comb------------------------------
+ always_comb begin: FSM_COMBO
+ state_SN = state_SP;
+ port1_accept_SN = 1'b0;
+ port1_drop_SN = 1'b0;
+ port1_miss_SN = 1'b0;
+ port2_accept_SN = 1'b0;
+ port2_drop_SN = 1'b0;
+ port2_miss_SN = 1'b0;
+ miss_SN = 1'b0;
+ multi_SN = 1'b0;
+ prot_SN = 1'b0;
+ prefetch_SN = 1'b0;
+ cache_coherent_SN = 1'b0;
+ out_addr_DN = '0;
+ out_reg_en_S = 1'b0; // by default hold register output
+ unique case(state_SP)
+ IDLE :
+ if ( (port1_addr_valid_i & select_i) | (port2_addr_valid_i & ~select_i) ) begin
+ out_reg_en_S = 1'b1;
+ state_SN = WAIT;
+ // Select inputs for output registers
+ if (port1_addr_valid_i & select_i) begin
+ port1_accept_SN = ~(no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
+ port1_drop_SN = (no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
+ port1_miss_SN = no_hit_i;
+ port2_accept_SN = 1'b0;
+ port2_drop_SN = 1'b0;
+ port2_miss_SN = 1'b0;
+ end else if (port2_addr_valid_i & ~select_i) begin
+ port1_accept_SN = 1'b0;
+ port1_drop_SN = 1'b0;
+ port1_miss_SN = 1'b0;
+ port2_accept_SN = ~(no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
+ port2_drop_SN = (no_hit_i | multi_hit_i | ~no_prot_i | prefetch_i);
+ port2_miss_SN = no_hit_i;
+ end
+ miss_SN = port1_miss_SN | port2_miss_SN;
+ multi_SN = multi_hit_i;
+ prot_SN = ~no_prot_i;
+ prefetch_SN = ~no_hit_i & prefetch_i;
+ cache_coherent_SN = cache_coherent_i;
+ out_addr_DN = out_addr_i;
+ end
+ WAIT :
+ if ( port1_sent_i | port2_sent_i ) begin
+ out_reg_en_S = 1'b1; // "clear" the register
+ state_SN = IDLE;
+ end
+ default : begin
+ state_SN = IDLE;
+ end
+ endcase
+ end
+ //----------FSM seq-------------------------------
+ always_ff @(posedge Clk_CI, negedge Rst_RBI) begin: FSM_SEQ
+ if (Rst_RBI == 1'b0)
+ state_SP <= IDLE;
+ else
+ state_SP <= state_SN;
+ end
+ //----------Output seq--------------------------
+ always_ff @(posedge Clk_CI, negedge Rst_RBI) begin: OUTPUT_SEQ
+ if (Rst_RBI == 1'b0) begin
+ port1_accept_o = 1'b0;
+ port1_drop_o = 1'b0;
+ port1_miss_o = 1'b0;
+ port2_accept_o = 1'b0;
+ port2_drop_o = 1'b0;
+ port2_miss_o = 1'b0;
+ miss_o = 1'b0;
+ multi_o = 1'b0;
+ prot_o = 1'b0;
+ prefetch_o = 1'b0;
+ cache_coherent_o = 1'b0;
+ out_addr_o = '0;
+ in_addr_o = '0;
+ in_id_o = '0;
+ in_len_o = '0;
+ in_user_o = '0;
+ end else if (out_reg_en_S == 1'b1) begin
+ port1_accept_o = port1_accept_SN;
+ port1_drop_o = port1_drop_SN;
+ port1_miss_o = port1_miss_SN;
+ port2_accept_o = port2_accept_SN;
+ port2_drop_o = port2_drop_SN;
+ port2_miss_o = port2_miss_SN;
+ miss_o = miss_SN;
+ multi_o = multi_SN;
+ prot_o = prot_SN;
+ prefetch_o = prefetch_SN;
+ cache_coherent_o = cache_coherent_SN;
+ out_addr_o = out_addr_DN;
+ in_addr_o = in_addr_i;
+ in_id_o = in_id_i;
+ in_len_o = in_len_i;
+ in_user_o = in_user_i;
+ end
+ end // block: OUTPUT_SEQ
+# endmodule
--- /dev/null
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class l2_tlb(Elaboratable):
+ def __init__(self):
+ self.clk_i = Signal() # input
+ self.rst_ni = Signal() # input
+ self.we_i = Signal() # input
+ self.waddr_i = Signal(AXI_LITE_ADDR_WIDTH) # input
+ self.wdata_i = Signal(AXI_LITE_DATA_WIDTH) # input
+ self.start_i = Signal() # input
+ self.busy_o = Signal() # output
+ self.in_addr_i = Signal(AXI_S_ADDR_WIDTH) # input
+ self.rw_type_i = Signal() # input
+ self.out_ready_i = Signal() # input
+ self.out_valid_o = Signal() # output
+ self.hit_o = Signal() # output
+ self.miss_o = Signal() # output
+ self.prot_o = Signal() # output
+ self.multi_o = Signal() # output
+ self.cache_coherent_o = Signal() # output
+ self.out_addr_o = Signal(AXI_M_ADDR_WIDTH) # output
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# //`include "pulp_soc_defines.sv"
+# ////import CfMath::log2;
+# //`define MULTI_HIT_FULL_SET // Enable full multi hit detection. Always the entire set is searched.
+# //`define MULTI_HIT_CUR_CYCLE // Enable partial multi hit detection. Only multi hits in the same search cycle are detected.
+# //`ifdef MULTI_HIT_FULL_SET
+# // `ifndef MULTI_HIT_CUR_CYCLE
+# // `define MULTI_HIT_CUR_CYCLE
+# // `endif
+# //`endif
+# module l2_tlb
+# //#(
+# // parameter AXI_S_ADDR_WIDTH = 32,
+# // parameter AXI_M_ADDR_WIDTH = 40,
+# // parameter AXI_LITE_DATA_WIDTH = 64,
+# // parameter AXI_LITE_ADDR_WIDTH = 32,
+# // parameter N_SETS = 32,
+# // parameter N_OFFSETS = 4, //per port. There are 2 ports.
+# // parameter PAGE_SIZE = 4096, // 4kB
+# // parameter N_PAR_VA_RAMS = 4,
+# // parameter HIT_OFFSET_STORE_WIDTH = 2 // Num of bits of VA RAM offset stored. This should not be greater than OFFSET_WIDTH
+# // )
+# (
+# input logic clk_i,
+# input logic rst_ni,
+# input logic we_i,
+# input logic [AXI_LITE_ADDR_WIDTH-1:0] waddr_i,
+# input logic [AXI_LITE_DATA_WIDTH-1:0] wdata_i,
+# input logic start_i,
+# output logic busy_o,
+# input logic [AXI_S_ADDR_WIDTH-1:0] in_addr_i,
+# input logic rw_type_i, //1 => write, 0=> read
+# input logic out_ready_i,
+# output logic out_valid_o,
+# output logic hit_o,
+# output logic miss_o,
+# output logic prot_o,
+# output logic multi_o,
+# output logic cache_coherent_o,
+# output logic [AXI_M_ADDR_WIDTH-1:0] out_addr_o
+# );
+""" #docstring_begin
+ localparam VA_RAM_DEPTH = N_SETS * N_OFFSETS * 2;
+ localparam VA_RAM_ADDR_WIDTH = log2(VA_RAM_DEPTH);
+ localparam PA_RAM_ADDR_WIDTH = log2(PA_RAM_DEPTH);
+ localparam SET_WIDTH = log2(N_SETS);
+ localparam OFFSET_WIDTH = log2(N_OFFSETS);
+ localparam LL_WIDTH = log2(N_PAR_VA_RAMS);
+ localparam IGNORE_LSB = log2(PAGE_SIZE);
+ logic [N_PAR_VA_RAMS-1:0] hit, prot, multi_hit, cache_coherent;
+ logic [N_PAR_VA_RAMS-1:0] ram_we;
+ logic last_search, last_search_next;
+ logic first_search, first_search_next;
+ logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] ram_waddr;
+ logic [N_PAR_VA_RAMS-1:0][SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr;
+ logic pa_ram_we;
+ logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_raddr, pa_port0_waddr; // PA RAM read, Write addr;
+ logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_raddr_reg_SN, pa_port0_raddr_reg_SP; // registered addresses, needed for WAIT_ON_WRITE;
+ logic [PA_RAM_ADDR_WIDTH-1:0] pa_port0_addr; // PA RAM addr
+ logic [PA_RAM_DATA_WIDTH-1:0] pa_port0_data, pa_data, pa_port0_data_reg; // PA RAM data
+ logic pa_ram_store_data_SN, pa_ram_store_data_SP;
+ logic hit_top, prot_top, multi_hit_top, first_hit_top;
+ logic output_sent;
+ int hit_block_num;
+ logic searching, search_done;
+ logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port0_addr, port0_raddr; // VA RAM port0 addr
+ logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] port1_addr; // VA RAM port1 addr
+ logic [OFFSET_WIDTH-1:0] offset_addr, offset_addr_d;
+ logic [OFFSET_WIDTH-1:0] offset_start_addr, offset_end_addr;
+ logic [SET_WIDTH-1:0] set_num;
+ logic va_output_valid;
+ logic searching_q;
+ genvar z;
+ // Search FSM
+ typedef enum logic [1:0] {IDLE, SEARCH, DONE} search_state_t;
+ search_state_t search_SP; // Present state
+ search_state_t search_SN; // Next State
+ // Output FSM
+ typedef enum logic [1:0] {OUT_IDLE, SEND_OUTPUT, WAIT_ON_WRITE} out_state_t;
+ out_state_t out_SP; // Present state
+ out_state_t out_SN; // Next State
+ logic miss_next;
+ logic hit_next;
+ logic prot_next;
+ logic multi_next;
+ logic cache_coherent_next;
+ // Generate the VA Block rams and their surrounding logic
+ generate
+ for (z = 0; z < N_PAR_VA_RAMS; z++) begin : VA_RAMS
+ check_ram
+ #(
+ )
+ u_check_ram
+ (
+ .clk_i ( clk_i ),
+ .rst_ni ( rst_ni ),
+ .in_addr ( in_addr_i ),
+ .rw_type ( rw_type_i ),
+ .ram_we ( ram_we[z] ),
+ .port0_addr ( port0_addr ),
+ .port1_addr ( port1_addr ),
+ .ram_wdata ( wdata_i[VA_RAM_DATA_WIDTH-1:0] ),
+ .output_sent ( output_sent ),
+ .output_valid ( va_output_valid ),
+ .offset_addr_d ( offset_addr_d ),
+ .hit_addr ( hit_addr[z] ),
+ .master ( cache_coherent[z] ),
+ .hit ( hit[z] ),
+ .multi_hit ( multi_hit[z] ),
+ .prot ( prot[z] )
+ );
+ end // for (z = 0; z < N_PORTS; z++)
+ endgenerate
+ ////////////////// ---------------- Control and Address --------------- ////////////////////////
+ // FSM
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ search_SP <= IDLE;
+ end else begin
+ search_SP <= search_SN;
+ end
+ end
+ always_comb begin : SEARCH_FSM
+ search_SN = search_SP;
+ busy_o = 1'b0;
+ searching = 1'b0;
+ search_done = 1'b0;
+ last_search_next = 1'b0;
+ first_search_next = first_search;
+ unique case (search_SP)
+ IDLE : begin
+ if (start_i) begin
+ search_SN = SEARCH;
+ first_search_next = 1'b1;
+ end
+ end
+ SEARCH : begin
+ busy_o = 1'b1;
+ // detect last search cycle
+ if ( (first_search == 1'b0) && (offset_addr == offset_end_addr) )
+ last_search_next = 1'b1;
+ // pause search during VA RAM reconfigration
+ if (|ram_we) begin
+ searching = 1'b0;
+ end else begin
+ searching = 1'b1;
+ first_search_next = 1'b0;
+ end
+ if (va_output_valid) begin
+ // stop search
+ if (last_search | prot_top | multi_hit_top) begin
+ if (last_search | prot_top | multi_hit_top | hit_top ) begin
+ search_SN = DONE;
+ search_done = 1'b1;
+ end
+ end
+ end
+ DONE : begin
+ busy_o = 1'b1;
+ if (out_valid_o & out_ready_i)
+ search_SN = IDLE;
+ end
+ default : begin
+ search_SN = IDLE;
+ end
+ endcase // case (prot_SP)
+ end // always_comb begin
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ last_search <= 1'b0;
+ first_search <= 1'b0;
+ end else begin
+ last_search <= last_search_next;
+ first_search <= first_search_next;
+ end
+ end
+ /*
+ * VA RAM address generation
+ *
+ * The input address and set number, and thus the offset start address, are available in the
+ * cycle after the start signal. The buffered offset_addr becomes available one cycle later.
+ * During the first search cycle, we therefore directly use offset_addr_start for the lookup.
+ */
+ assign set_num = in_addr_i[SET_WIDTH+IGNORE_LSB -1 : IGNORE_LSB];
+ assign port0_raddr[OFFSET_WIDTH] = 1'b0;
+ assign port1_addr [OFFSET_WIDTH] = 1'b1;
+ assign port0_raddr[OFFSET_WIDTH-1:0] = first_search ? offset_start_addr : offset_addr;
+ assign port1_addr [OFFSET_WIDTH-1:0] = first_search ? offset_start_addr : offset_addr;
+ assign port0_raddr[SET_WIDTH+OFFSET_WIDTH : OFFSET_WIDTH+1] = set_num;
+ assign port1_addr [SET_WIDTH+OFFSET_WIDTH : OFFSET_WIDTH+1] = set_num;
+ assign port0_addr = ram_we ? ram_waddr : port0_raddr;
+ // The outputs of the BRAMs are only valid if in the previous cycle:
+ // 1. the inputs were valid, and
+ // 2. the BRAMs were not written to.
+ // Otherwise, the outputs must be ignored.
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ searching_q <= 1'b0;
+ end else begin
+ searching_q <= searching;
+ end
+ end
+ assign va_output_valid = searching_q;
+ // Address offset for looking up the VA RAMs
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ offset_addr <= 0;
+ end else if (first_search) begin
+ offset_addr <= offset_start_addr + 1'b1;
+ end else if (searching) begin
+ offset_addr <= offset_addr + 1'b1;
+ end
+ end
+ // Delayed address offest for looking up the PA RAM upon a hit in the VA RAMs
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ offset_addr_d <= 0;
+ end else if (first_search) begin
+ offset_addr_d <= offset_start_addr;
+ end else if (searching) begin
+ offset_addr_d <= offset_addr_d + 1'b1;
+ end
+ end
+ // Store the offset addr for hit to reduce latency for next search.
+ generate
+ logic [N_SETS-1:0][HIT_OFFSET_STORE_WIDTH-1:0] hit_offset_addr; // Contains offset addr for previous hit for every SET.
+ logic [SET_WIDTH+OFFSET_WIDTH+1-1:0] hit_addr_reg;
+ assign offset_start_addr = { hit_offset_addr[set_num] , {{OFFSET_WIDTH-HIT_OFFSET_STORE_WIDTH}{1'b0}} };
+ assign offset_end_addr = hit_offset_addr[set_num]-1'b1;
+ // Register the hit addr
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ hit_addr_reg <= 0;
+ end else if (hit_top) begin
+ hit_addr_reg <= hit_addr[hit_block_num];
+ end
+ end
+ // Store hit addr for each set. The next search in the same set will start from the saved addr.
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ hit_offset_addr <= 0;
+ end else if (hit_o) begin
+ hit_offset_addr[set_num][HIT_OFFSET_STORE_WIDTH-1:0] <= hit_addr_reg[OFFSET_WIDTH-1 : (OFFSET_WIDTH - HIT_OFFSET_STORE_WIDTH)];
+ end
+ end
+`else // No need to store offset if full multi hit detection is enabled because the entire SET is searched.
+ assign offset_start_addr = 0;
+ assign offset_end_addr = {OFFSET_WIDTH{1'b1}};
+ end else begin // if (HIT_OFFSET_STORE_WIDTH > 0)
+ assign offset_start_addr = 0;
+ assign offset_end_addr = {OFFSET_WIDTH{1'b1}};
+ end
+ endgenerate
+ assign prot_top = |prot;
+ //////////////////////////////////////////////////////////////////////////////////////
+ // check for hit, multi hit
+ // In case of a multi hit, the hit_block_num indicates the lowest VA RAM with a hit.
+ // In case of a multi hit in the same VA RAM, Port 0 is given priority.
+ always_comb begin : HIT_CHECK
+ hit_top = |hit;
+ hit_block_num = 0;
+ first_hit_top = 1'b0;
+ multi_hit_top = 1'b0;
+ for (int i=N_PAR_VA_RAMS-1; i>=0; i--) begin
+ if (hit[i] == 1'b1) begin
+ if (multi_hit[i] | first_hit_top ) begin
+ multi_hit_top = 1'b1;
+ end
+ first_hit_top = 1'b1;
+ hit_block_num = i;
+ end
+ end // for (int i=0; i<N_PAR_VA_RAMS; i++)
+ end // always_comb begin
+ ///////////////////// ------------- Outputs ------------ //////////////////////////////////
+ //// FSM
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ out_SP <= OUT_IDLE;
+ pa_ram_store_data_SP <= 1'b0;
+ pa_port0_raddr_reg_SP <= 'b0;
+ end else begin
+ out_SP <= out_SN;
+ pa_ram_store_data_SP <= pa_ram_store_data_SN;
+ pa_port0_raddr_reg_SP <= pa_port0_raddr_reg_SN;
+ end
+ end
+ always_comb begin : OUTPUT_FSM
+ out_SN = out_SP;
+ miss_next = miss_o;
+ prot_next = prot_o;
+ multi_next = multi_o;
+ hit_next = hit_o;
+ cache_coherent_next = cache_coherent_o;
+ pa_port0_raddr_reg_SN = pa_port0_raddr_reg_SP;
+ pa_port0_raddr = 'b0;
+ pa_ram_store_data_SN = 1'b0;
+ out_valid_o = 1'b0;
+ output_sent = 1'b0;
+ unique case (out_SP)
+ OUT_IDLE : begin
+ hit_next = 1'b0;
+ miss_next = 1'b0;
+ prot_next = 1'b0;
+ multi_next = 1'b0;
+ cache_coherent_next = 1'b0;
+ // abort transaction
+ if ((search_done & ~hit_top) | prot_top | multi_hit_top) begin
+ if (search_done & ~hit_top) begin
+ miss_next = 1'b1;
+ end
+ if (prot_top) begin
+ prot_next = 1'b1;
+ hit_next = 1'b1;
+ end
+ if (multi_hit_top) begin
+ multi_next = 1'b1;
+ hit_next = 1'b1;
+ end
+ // read PA RAM
+ end else if (search_done & hit_top) begin
+ hit_next = 1'b1;
+ cache_coherent_next = cache_coherent[hit_block_num];
+ pa_port0_raddr = (N_PAR_VA_RAMS * hit_addr[hit_block_num]) + hit_block_num;
+ pa_port0_raddr_reg_SN = pa_port0_raddr;
+ // read PA RAM now
+ if (~pa_ram_we) begin
+ pa_ram_store_data_SN = 1'b1;
+ // read PA RAM after PA RAM reconfiguration
+ end else begin // pa_ram_we
+ end
+ end
+ end
+ WAIT_ON_WRITE : begin
+ if ( ~pa_ram_we ) begin
+ pa_port0_raddr = pa_port0_raddr_reg_SP;
+ pa_ram_store_data_SN = 1'b1;
+ end
+ end
+ SEND_OUTPUT : begin
+ out_valid_o = 1'b1;
+ if (out_ready_i) begin
+ out_SN = OUT_IDLE;
+ output_sent = 1'b1;
+ end
+ end
+ default : begin
+ out_SN = OUT_IDLE;
+ end
+ endcase // case (out_SP)
+ end // always_comb begin
+ //// Output signals
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ miss_o <= 1'b0;
+ prot_o <= 1'b0;
+ multi_o <= 1'b0;
+ hit_o <= 1'b0;
+ cache_coherent_o <= 1'b0;
+ end else begin
+ miss_o <= miss_next;
+ prot_o <= prot_next;
+ multi_o <= multi_next;
+ hit_o <= hit_next;
+ cache_coherent_o <= cache_coherent_next;
+ end
+ end
+ ///////////////////////////////////////////////////////////////////////////////////////////////////
+ ///////////////////// --------------- Physical Address -------------- ////////////////////////////
+ /// PA Block RAM
+ ram_tp_no_change #(
+ )
+ pa_ram
+ (
+ .clk ( clk_i ),
+ .we ( pa_ram_we ),
+ .addr0 ( pa_port0_addr ),
+ .addr1 ( '0 ),
+ .d_i ( wdata_i[PA_RAM_DATA_WIDTH-1:0] ),
+ .d0_o ( pa_port0_data ),
+ .d1_o ( )
+ );
+ assign out_addr_o[IGNORE_LSB-1:0] = in_addr_i[IGNORE_LSB-1:0];
+ assign out_addr_o[AXI_M_ADDR_WIDTH-1:IGNORE_LSB] = pa_data;
+ always_ff @(posedge clk_i) begin
+ if (rst_ni == 0) begin
+ pa_port0_data_reg <= 0;
+ end else if (pa_ram_store_data_SP) begin
+ pa_port0_data_reg <= pa_port0_data;
+ end
+ end
+ assign pa_data = pa_ram_store_data_SP ? pa_port0_data : pa_port0_data_reg;
+///// Write enable for all block rams
+generate if (LL_WIDTH != 0) begin
+ always_comb begin
+ var reg[LL_WIDTH:0] para;
+ var int para_int;
+ for (para = 0; para < N_PAR_VA_RAMS; para=para+1'b1) begin
+ para_int = int'(para);
+ ram_we[para_int] = we_i && (waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] == 1'b0) && (waddr_i[LL_WIDTH-1:0] == para);
+ end
+ end
+end else begin
+ assign ram_we[0] = we_i && (waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] == 1'b0);
+// Addresses are word, not byte addresses
+assign pa_ram_we = we_i && (waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] == 1'b1); //waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH] will be 0 for all VA writes and 1 for all PA writes
+assign ram_waddr = waddr_i[LL_WIDTH+VA_RAM_ADDR_WIDTH-1:LL_WIDTH];
+assign pa_port0_waddr = waddr_i[PA_RAM_ADDR_WIDTH-1:0];
+assign pa_port0_addr = pa_ram_we ? pa_port0_waddr : pa_port0_raddr;
+# endmodule
+# // vim: ts=3 sw=3 sts=3 et nosmartindent autoindent foldmethod=marker tw=100
--- /dev/null
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# this file has been generated by sv2nmigen
+# //`include "pulp_soc_defines.sv"
+# ////import CfMath::log2;
+# //`define MY_ARRAY_SUM(MY_ARRAY,ARRAY_SIZE) ( (ARRAY_SIZE==1) ? MY_ARRAY[0] : (ARRAY_SIZE==2) ? MY_ARRAY[0] + MY_ARRAY[1] : (ARRAY_SIZE==3) ? MY_ARRAY[0] + MY_ARRAY[1] + MY_ARRAY[2] : (ARRAY_SIZE==4) ? MY_ARRAY[0] + MY_ARRAY[1] + MY_ARRAY[2] + MY_ARRAY[3] : 0 )
+# module rab_core
+# #(
+# parameter N_PORTS = 3,
+# parameter N_L2_SETS = 32,
+# parameter N_L2_SET_ENTRIES = 32,
+# parameter AXI_DATA_WIDTH = 64,
+# parameter AXI_S_ADDR_WIDTH = 32,
+# parameter AXI_M_ADDR_WIDTH = 40,
+# parameter AXI_LITE_DATA_WIDTH = 64,
+# parameter AXI_LITE_ADDR_WIDTH = 32,
+# parameter AXI_ID_WIDTH = 8,
+# parameter AXI_USER_WIDTH = 6,
+# parameter MH_FIFO_DEPTH = 16
+# )
+# (
+# input logic Clk_CI,
+# input logic Rst_RBI,
+# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi_awaddr,
+# input logic s_axi_awvalid,
+# output logic s_axi_awready,
+# input logic [AXI_LITE_DATA_WIDTH-1:0] s_axi_wdata,
+# input logic [AXI_LITE_DATA_WIDTH/8-1:0] s_axi_wstrb,
+# input logic s_axi_wvalid,
+# output logic s_axi_wready,
+# input logic [AXI_LITE_ADDR_WIDTH-1:0] s_axi_araddr,
+# input logic s_axi_arvalid,
+# output logic s_axi_arready,
+# input logic s_axi_rready,
+# output logic [AXI_LITE_DATA_WIDTH-1:0] s_axi_rdata,
+# output logic [1:0] s_axi_rresp,
+# output logic s_axi_rvalid,
+# output logic [1:0] s_axi_bresp,
+# output logic s_axi_bvalid,
+# input logic s_axi_bready,
+# output logic [N_PORTS-1:0] int_miss,
+# output logic [N_PORTS-1:0] int_prot,
+# output logic [N_PORTS-1:0] int_multi,
+# output logic [N_PORTS-1:0] int_prefetch,
+# output logic int_mhf_full,
+# output logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_axaddr_o,
+# output logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_axid_o,
+# output logic [N_PORTS-1:0] [7:0] int_axlen_o,
+# output logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_axuser_o,
+# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] port1_addr,
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] port1_id,
+# input logic [N_PORTS-1:0] [7:0] port1_len,
+# input logic [N_PORTS-1:0] [2:0] port1_size,
+# input logic [N_PORTS-1:0] port1_addr_valid,
+# input logic [N_PORTS-1:0] port1_type,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] port1_user,
+# input logic [N_PORTS-1:0] port1_sent,
+# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] port1_out_addr,
+# output logic [N_PORTS-1:0] port1_cache_coherent,
+# output logic [N_PORTS-1:0] port1_accept,
+# output logic [N_PORTS-1:0] port1_drop,
+# output logic [N_PORTS-1:0] port1_miss,
+# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] port2_addr,
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] port2_id,
+# input logic [N_PORTS-1:0] [7:0] port2_len,
+# input logic [N_PORTS-1:0] [2:0] port2_size,
+# input logic [N_PORTS-1:0] port2_addr_valid,
+# input logic [N_PORTS-1:0] port2_type,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] port2_user,
+# input logic [N_PORTS-1:0] port2_sent,
+# output logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] port2_out_addr,
+# output logic [N_PORTS-1:0] port2_cache_coherent,
+# output logic [N_PORTS-1:0] port2_accept,
+# output logic [N_PORTS-1:0] port2_drop,
+# output logic [N_PORTS-1:0] port2_miss,
+# input logic [N_PORTS-1:0] miss_l2_i,
+# input logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] miss_l2_addr_i,
+# input logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] miss_l2_id_i,
+# input logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] miss_l2_user_i,
+# output logic [N_PORTS-1:0] [AXI_LITE_DATA_WIDTH-1:0] wdata_l2_o,
+# output logic [N_PORTS-1:0] [AXI_LITE_ADDR_WIDTH-1:0] waddr_l2_o,
+# output logic [N_PORTS-1:0] wren_l2_o
+# );
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class rab_core(Elaboratable):
+ def __init__(self):
+ self.s_axi_awaddr = Signal(AXI_LITE_ADDR_WIDTH) # input
+ self.s_axi_awvalid = Signal() # input
+ self.s_axi_awready = Signal() # output
+ self.s_axi_wdata = Signal(AXI_LITE_DATA_WIDTH) # input
+ self.s_axi_wstrb = Signal(FIXME) # input
+ self.s_axi_wvalid = Signal() # input
+ self.s_axi_wready = Signal() # output
+ self.s_axi_araddr = Signal(AXI_LITE_ADDR_WIDTH) # input
+ self.s_axi_arvalid = Signal() # input
+ self.s_axi_arready = Signal() # output
+ self.s_axi_rready = Signal() # input
+ self.s_axi_rdata = Signal(AXI_LITE_DATA_WIDTH) # output
+ self.s_axi_rresp = Signal(2) # output
+ self.s_axi_rvalid = Signal() # output
+ self.s_axi_bresp = Signal(2) # output
+ self.s_axi_bvalid = Signal() # output
+ self.s_axi_bready = Signal() # input
+ self.int_miss = Signal(N_PORTS) # output
+ self.int_prot = Signal(N_PORTS) # output
+ self.int_multi = Signal(N_PORTS) # output
+ self.int_prefetch = Signal(N_PORTS) # output
+ self.int_mhf_full = Signal() # output
+ self.int_axaddr_o = Signal() # output
+ self.int_axid_o = Signal() # output
+ self.int_axlen_o = Signal() # output
+ self.int_axuser_o = Signal() # output
+ self.port1_addr = Signal() # input
+ self.port1_id = Signal() # input
+ self.port1_len = Signal() # input
+ self.port1_size = Signal() # input
+ self.port1_addr_valid = Signal(N_PORTS) # input
+ self.port1_type = Signal(N_PORTS) # input
+ self.port1_user = Signal() # input
+ self.port1_sent = Signal(N_PORTS) # input
+ self.port1_out_addr = Signal() # output
+ self.port1_cache_coherent = Signal(N_PORTS) # output
+ self.port1_accept = Signal(N_PORTS) # output
+ self.port1_drop = Signal(N_PORTS) # output
+ self.port1_miss = Signal(N_PORTS) # output
+ self.port2_addr = Signal() # input
+ self.port2_id = Signal() # input
+ self.port2_len = Signal() # input
+ self.port2_size = Signal() # input
+ self.port2_addr_valid = Signal(N_PORTS) # input
+ self.port2_type = Signal(N_PORTS) # input
+ self.port2_user = Signal() # input
+ self.port2_sent = Signal(N_PORTS) # input
+ self.port2_out_addr = Signal() # output
+ self.port2_cache_coherent = Signal(N_PORTS) # output
+ self.port2_accept = Signal(N_PORTS) # output
+ self.port2_drop = Signal(N_PORTS) # output
+ self.port2_miss = Signal(N_PORTS) # output
+ self.miss_l2_i = Signal(N_PORTS) # input
+ self.miss_l2_addr_i = Signal() # input
+ self.miss_l2_id_i = Signal() # input
+ self.miss_l2_user_i = Signal() # input
+ self.wdata_l2_o = Signal() # output
+ self.waddr_l2_o = Signal() # output
+ self.wren_l2_o = Signal(N_PORTS) # output
+ def elaborate(self, platform=None):
+ m = Module()
+ return m
+ // ███████╗██╗ ██████╗ ███╗ ██╗ █████╗ ██╗ ███████╗
+ // ██╔════╝██║██╔════╝ ████╗ ██║██╔══██╗██║ ██╔════╝
+ // ███████╗██║██║ ███╗██╔██╗ ██║███████║██║ ███████╗
+ // ╚════██║██║██║ ██║██║╚██╗██║██╔══██║██║ ╚════██║
+ // ███████║██║╚██████╔╝██║ ╚████║██║ ██║███████╗███████║
+ // ╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚══════╝
+ // signals
+ localparam integer ENABLE_L2TLB[N_PORTS-1:0] = `EN_L2TLB_ARRAY;
+ localparam integer N_SLICES[N_PORTS-1:0] = `N_SLICES_ARRAY;
+ localparam N_SLICES_MAX = `N_SLICES_MAX;
+ localparam N_REGS = 4*N_SLICES_TOT + 4;
+ localparam AXI_SIZE_WIDTH = log2(AXI_DATA_WIDTH/8);
+ localparam PORT_ID_WIDTH = (N_PORTS < 2) ? 1 : log2(N_PORTS);
+ logic [N_PORTS-1:0] [15:0] p1_burst_size;
+ logic [N_PORTS-1:0] [15:0] p2_burst_size;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p1_align_addr;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p2_align_addr;
+ logic [N_PORTS-1:0] [AXI_SIZE_WIDTH-1:0] p1_mask;
+ logic [N_PORTS-1:0] [AXI_SIZE_WIDTH-1:0] p2_mask;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p1_max_addr;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] p2_max_addr;
+ logic [N_PORTS-1:0] p1_prefetch;
+ logic [N_PORTS-1:0] p2_prefetch;
+ logic [N_PORTS-1:0] int_rw;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_addr_min;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] int_addr_max;
+ logic [N_PORTS-1:0] [AXI_ID_WIDTH-1:0] int_id;
+ logic [N_PORTS-1:0] [7:0] int_len;
+ logic [N_PORTS-1:0] [AXI_USER_WIDTH-1:0] int_user;
+ logic [N_PORTS-1:0] hit;
+ logic [N_PORTS-1:0] prot;
+ logic [N_PORTS-1:0] prefetch;
+ logic [N_PORTS-1:0] no_hit;
+ logic [N_PORTS-1:0] no_prot;
+ logic [N_PORTS-1:0] [N_SLICES_MAX-1:0] hit_slices;
+ logic [N_PORTS-1:0] [N_SLICES_MAX-1:0] prot_slices;
+ logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] out_addr;
+ logic [N_PORTS-1:0] [AXI_M_ADDR_WIDTH-1:0] out_addr_reg;
+ logic [N_PORTS-1:0] cache_coherent;
+ logic [N_PORTS-1:0] cache_coherent_reg;
+ logic [N_PORTS-1:0] select;
+ reg [N_PORTS-1:0] curr_priority;
+ reg [N_PORTS-1:0] multi_hit;
+ logic [N_PORTS-1:0] miss_valid_mhf;
+ logic [N_PORTS-1:0] [AXI_S_ADDR_WIDTH-1:0] miss_addr_mhf;
+ logic [N_PORTS-1:0] [MISS_META_WIDTH-1:0] miss_meta_mhf;
+ logic [N_REGS-1:0] [63:0] int_cfg_regs;
+ logic [N_PORTS-1:0] [4*N_SLICES_MAX-1:0] [63:0] int_cfg_regs_slices;
+ logic L1AllowMultiHit_S;
+ genvar z;
+ // █████╗ ███████╗███████╗██╗ ██████╗ ███╗ ██╗███╗ ███╗███████╗███╗ ██╗████████╗███████╗
+ // ██╔══██╗██╔════╝██╔════╝██║██╔════╝ ████╗ ██║████╗ ████║██╔════╝████╗ ██║╚══██╔══╝██╔════╝
+ // ███████║███████╗███████╗██║██║ ███╗██╔██╗ ██║██╔████╔██║█████╗ ██╔██╗ ██║ ██║ ███████╗
+ // ██╔══██║╚════██║╚════██║██║██║ ██║██║╚██╗██║██║╚██╔╝██║██╔══╝ ██║╚██╗██║ ██║ ╚════██║
+ // ██║ ██║███████║███████║██║╚██████╔╝██║ ╚████║██║ ╚═╝ ██║███████╗██║ ╚████║ ██║ ███████║
+ // ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝ ╚═════╝ ╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚═╝ ╚═══╝ ╚═╝ ╚══════╝
+ // assignments
+ always_comb
+ begin : PORT_SELECT
+ var integer idx;
+ for (idx=0; idx<N_PORTS; idx++) begin
+ // select = 1 -> port1 active
+ // select = 0 -> port2 active
+ select[idx] = (curr_priority[idx] & port1_addr_valid[idx]) | ~port2_addr_valid[idx];
+ p1_burst_size[idx] = (port1_len[idx] + 1) << port1_size[idx];
+ p2_burst_size[idx] = (port2_len[idx] + 1) << port2_size[idx];
+ // align min addr for max addr computation to allow for smart AXI bursts around the 4k boundary
+ if (port1_size[idx] == 3'b001)
+ p1_mask[idx] = 3'b110;
+ else if (port1_size[idx] == 3'b010)
+ p1_mask[idx] = 3'b100;
+ else if (port1_size[idx] == 3'b011)
+ p1_mask[idx] = 3'b000;
+ else
+ p1_mask[idx] = 3'b111;
+ p1_align_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH] = port1_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH];
+ p1_align_addr[idx][AXI_SIZE_WIDTH-1:0] = port1_addr[idx][AXI_SIZE_WIDTH-1:0] & p1_mask[idx];
+ if (port2_size[idx] == 3'b001)
+ p2_mask[idx] = 3'b110;
+ else if (port2_size[idx] == 3'b010)
+ p2_mask[idx] = 3'b100;
+ else if (port2_size[idx] == 3'b011)
+ p2_mask[idx] = 3'b000;
+ else
+ p2_mask[idx] = 3'b111;
+ if (port1_user[idx] == {AXI_USER_WIDTH{1'b1}})
+ p1_prefetch[idx] = 1'b1;
+ else
+ p1_prefetch[idx] = 1'b0;
+ if (port2_user[idx] == {AXI_USER_WIDTH{1'b1}})
+ p2_prefetch[idx] = 1'b1;
+ else
+ p2_prefetch[idx] = 1'b0;
+ p2_align_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH] = port2_addr[idx][AXI_S_ADDR_WIDTH-1:AXI_SIZE_WIDTH];
+ p2_align_addr[idx][AXI_SIZE_WIDTH-1:0] = port2_addr[idx][AXI_SIZE_WIDTH-1:0] & p2_mask[idx];
+ p1_max_addr[idx] = p1_align_addr[idx] + p1_burst_size[idx] - 1;
+ p2_max_addr[idx] = p2_align_addr[idx] + p2_burst_size[idx] - 1;
+ int_addr_min[idx] = select[idx] ? port1_addr[idx] : port2_addr[idx];
+ int_addr_max[idx] = select[idx] ? p1_max_addr[idx] : p2_max_addr[idx];
+ int_rw[idx] = select[idx] ? port1_type[idx] : port2_type[idx];
+ int_id[idx] = select[idx] ? port1_id[idx] : port2_id[idx];
+ int_len[idx] = select[idx] ? port1_len[idx] : port2_len[idx];
+ int_user[idx] = select[idx] ? port1_user[idx] : port2_user[idx];
+ prefetch[idx] = select[idx] ? p1_prefetch[idx] : p2_prefetch[idx];
+ hit [idx] = | hit_slices [idx];
+ prot[idx] = | prot_slices[idx];
+ no_hit [idx] = ~hit [idx];
+ no_prot[idx] = ~prot[idx];
+ port1_out_addr[idx] = out_addr_reg[idx];
+ port2_out_addr[idx] = out_addr_reg[idx];
+ port1_cache_coherent[idx] = cache_coherent_reg[idx];
+ port2_cache_coherent[idx] = cache_coherent_reg[idx];
+ end
+ end
+ always_comb
+ begin
+ var integer idx_port, idx_slice;
+ var integer reg_num;
+ reg_num=0;
+ for ( idx_port = 0; idx_port < N_PORTS; idx_port++ ) begin
+ for ( idx_slice = 0; idx_slice < 4*N_SLICES[idx_port]; idx_slice++ ) begin
+ int_cfg_regs_slices[idx_port][idx_slice] = int_cfg_regs[4+reg_num];
+ reg_num++;
+ end
+ // int_cfg_regs_slices[idx_port][N_SLICES_MAX:N_SLICES[idx_port]] will be dangling
+ // Fix to zero. Synthesis will remove these signals.
+ // int_cfg_regs_slices[idx_port][4*N_SLICES_MAX-1:4*N_SLICES[idx_port]] = 0;
+ end
+ end
+ always @(posedge Clk_CI or negedge Rst_RBI)
+ var integer idx;
+ if (Rst_RBI == 1'b0)
+ curr_priority = 'h0;
+ else begin
+ for (idx=0; idx<N_PORTS; idx++) begin
+ if (port1_accept[idx] || port1_drop[idx])
+ curr_priority[idx] = 1'b1;
+ else if (port2_accept[idx] || port2_drop[idx])
+ curr_priority[idx] = 1'b0;
+ end
+ end
+ end
+ // find port that misses
+ logic [PORT_ID_WIDTH-1:0] PortIdx_D; // index of the first missing port
+ var integer idx_miss;
+ always_comb begin : MHF_PORT_SELECT
+ PortIdx_D = 'b0;
+ for (idx_miss = 0; idx_miss < N_PORTS; idx_miss++) begin
+ if (miss_valid_mhf[idx_miss] == 1'b1) begin
+ PortIdx_D = idx_miss;
+ break;
+ end
+ end
+ end // always_comb begin
+ // █████╗ ██╗ ██╗██╗ ██████╗ █████╗ ██████╗ ██████╗███████╗ ██████╗
+ // ██╔══██╗╚██╗██╔╝██║ ██╔══██╗██╔══██╗██╔══██╗ ██╔════╝██╔════╝██╔════╝
+ // ███████║ ╚███╔╝ ██║ ██████╔╝███████║██████╔╝ ██║ █████╗ ██║ ███╗
+ // ██╔══██║ ██╔██╗ ██║ ██╔══██╗██╔══██║██╔══██╗ ██║ ██╔══╝ ██║ ██║
+ // ██║ ██║██╔╝ ██╗██║ ██║ ██║██║ ██║██████╔╝ ╚██████╗██║ ╚██████╔╝
+ // ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═════╝╚═╝ ╚═════╝
+ axi_rab_cfg
+ #(
+ .N_REGS ( N_REGS ),
+ .N_L2_SETS ( N_L2_SETS ),
+ .N_FLAGS ( 4 ),
+ )
+ u_axi_rab_cfg
+ (
+ .Clk_CI ( Clk_CI ),
+ .Rst_RBI ( Rst_RBI ),
+ .s_axi_awaddr ( s_axi_awaddr ),
+ .s_axi_awvalid ( s_axi_awvalid ),
+ .s_axi_wdata ( s_axi_wdata ),
+ .s_axi_wstrb ( s_axi_wstrb ),
+ .s_axi_wvalid ( s_axi_wvalid ),
+ .s_axi_bready ( s_axi_bready ),
+ .s_axi_araddr ( s_axi_araddr ),
+ .s_axi_arvalid ( s_axi_arvalid ),
+ .s_axi_rready ( s_axi_rready ),
+ .s_axi_arready ( s_axi_arready ),
+ .s_axi_rdata ( s_axi_rdata ),
+ .s_axi_rresp ( s_axi_rresp ),
+ .s_axi_rvalid ( s_axi_rvalid ),
+ .s_axi_wready ( s_axi_wready ),
+ .s_axi_bresp ( s_axi_bresp ),
+ .s_axi_bvalid ( s_axi_bvalid ),
+ .s_axi_awready ( s_axi_awready ),
+ .L1Cfg_DO ( int_cfg_regs ),
+ .L1AllowMultiHit_SO ( L1AllowMultiHit_S ),
+ .MissAddr_DI ( miss_addr_mhf[PortIdx_D] ),
+ .MissMeta_DI ( miss_meta_mhf[PortIdx_D] ),
+ .Miss_SI ( miss_valid_mhf[PortIdx_D] ),
+ .MhFifoFull_SO ( int_mhf_full ),
+ .wdata_l2 ( wdata_l2_o ),
+ .waddr_l2 ( waddr_l2_o ),
+ .wren_l2 ( wren_l2_o )
+ );
+ generate for (z = 0; z < N_PORTS; z++) begin : MHF_TLB_SELECT
+ if (ENABLE_L2TLB[z] == 1) begin // L2 TLB is enabled
+ assign miss_valid_mhf[z] = miss_l2_i[z];
+ assign miss_addr_mhf[z] = miss_l2_addr_i[z];
+ assign miss_meta_mhf[z] = {miss_l2_user_i[z], PortIdx_D, miss_l2_id_i[z]};
+ end else begin// L2 TLB is disabled
+ assign miss_valid_mhf[z] = int_miss[z];
+ assign miss_addr_mhf[z] = int_addr_min[z];
+ assign miss_meta_mhf[z] = {int_user[z], PortIdx_D, int_id[z]};
+ end
+ end
+ endgenerate
+ // ███████╗██╗ ██╗ ██████╗███████╗ ████████╗ ██████╗ ██████╗
+ // ██╔════╝██║ ██║██╔════╝██╔════╝ ╚══██╔══╝██╔═══██╗██╔══██╗
+ // ███████╗██║ ██║██║ █████╗ ██║ ██║ ██║██████╔╝
+ // ╚════██║██║ ██║██║ ██╔══╝ ██║ ██║ ██║██╔═══╝
+ // ███████║███████╗██║╚██████╗███████╗ ██║ ╚██████╔╝██║
+ // ╚══════╝╚══════╝╚═╝ ╚═════╝╚══════╝ ╚═╝ ╚═════╝ ╚═╝
+ generate for (z = 0; z < N_PORTS; z++) begin : SLICE_TOP_GEN
+ slice_top
+ #(
+ .N_SLICES ( N_SLICES[z] ),
+ .N_REGS ( 4*N_SLICES[z] ),
+ )
+ u_slice_top
+ (
+ .int_cfg_regs ( int_cfg_regs_slices[z][4*N_SLICES[z]-1:0] ),
+ .int_rw ( int_rw[z] ),
+ .int_addr_min ( int_addr_min[z] ),
+ .int_addr_max ( int_addr_max[z] ),
+ .multi_hit_allow ( L1AllowMultiHit_S ),
+ .multi_hit ( multi_hit[z] ),
+ .prot ( prot_slices[z][N_SLICES[z]-1:0] ),
+ .hit ( hit_slices [z][N_SLICES[z]-1:0] ),
+ .cache_coherent ( cache_coherent[z] ),
+ .out_addr ( out_addr[z] )
+ );
+ // hit_slices [N_SLICES_MAX-1:N_SLICES_MAX-N_SLICES[z]] will be dangling
+ // prot_slices[N_SLICES_MAX-1:N_SLICES_MAX-N_SLICES[z]] will be dangling
+ // Fix to zero. Synthesis will remove these signals.
+ if ( N_SLICES[z] < N_SLICES_MAX ) begin
+ assign hit_slices [z][N_SLICES_MAX-1:N_SLICES[z]] = 0;
+ assign prot_slices[z][N_SLICES_MAX-1:N_SLICES[z]] = 0;
+ end
+ end // for (z = 0; z < N_PORTS; z++)
+ endgenerate
+ // ███████╗███████╗███╗ ███╗
+ // ██╔════╝██╔════╝████╗ ████║
+ // █████╗ ███████╗██╔████╔██║
+ // ██╔══╝ ╚════██║██║╚██╔╝██║
+ // ██║ ███████║██║ ╚═╝ ██║
+ // ╚═╝ ╚══════╝╚═╝ ╚═╝
+ //
+ generate for (z = 0; z < N_PORTS; z++) begin : FSM_GEN
+ fsm
+ #(
+ )
+ u_fsm
+ (
+ .Clk_CI ( Clk_CI ),
+ .Rst_RBI ( Rst_RBI ),
+ .port1_addr_valid_i ( port1_addr_valid[z] ),
+ .port2_addr_valid_i ( port2_addr_valid[z] ),
+ .port1_sent_i ( port1_sent[z] ),
+ .port2_sent_i ( port2_sent[z] ),
+ .select_i ( select[z] ),
+ .no_hit_i ( no_hit[z] ),
+ .multi_hit_i ( multi_hit[z] ),
+ .no_prot_i ( no_prot[z] ),
+ .prefetch_i ( prefetch[z] ),
+ .out_addr_i ( out_addr[z] ),
+ .cache_coherent_i ( cache_coherent[z] ),
+ .port1_accept_o ( port1_accept[z] ),
+ .port1_drop_o ( port1_drop[z] ),
+ .port1_miss_o ( port1_miss[z] ),
+ .port2_accept_o ( port2_accept[z] ),
+ .port2_drop_o ( port2_drop[z] ),
+ .port2_miss_o ( port2_miss[z] ),
+ .out_addr_o ( out_addr_reg[z] ),
+ .cache_coherent_o ( cache_coherent_reg[z] ),
+ .miss_o ( int_miss[z] ),
+ .multi_o ( int_multi[z] ),
+ .prot_o ( int_prot[z] ),
+ .prefetch_o ( int_prefetch[z] ),
+ .in_addr_i ( int_addr_min[z] ),
+ .in_id_i ( int_id[z] ),
+ .in_len_i ( int_len[z] ),
+ .in_user_i ( int_user[z] ),
+ .in_addr_o ( int_axaddr_o[z] ),
+ .in_id_o ( int_axid_o[z] ),
+ .in_len_o ( int_axlen_o[z] ),
+ .in_user_o ( int_axuser_o[z] )
+ );
+ end
+ endgenerate
--- /dev/null
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# module rab_slice
+# #(
+# parameter ADDR_WIDTH_PHYS = 40,
+# parameter ADDR_WIDTH_VIRT = 32
+# )
+# (
+# input logic [ADDR_WIDTH_VIRT-1:0] cfg_min,
+# input logic [ADDR_WIDTH_VIRT-1:0] cfg_max,
+# input logic [ADDR_WIDTH_PHYS-1:0] cfg_offset,
+# input logic cfg_wen,
+# input logic cfg_ren,
+# input logic cfg_en,
+# input logic in_trans_type,
+# input logic [ADDR_WIDTH_VIRT-1:0] in_addr_min,
+# input logic [ADDR_WIDTH_VIRT-1:0] in_addr_max,
+# output logic out_hit,
+# output logic out_prot,
+# output logic [ADDR_WIDTH_PHYS-1:0] out_addr
+# );
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+class rab_slice(Elaboratable):
+ def __init__(self, params): # pass config object
+ # TODO parameters
+ self.params = params
+ self.cfg_min = Signal(params.ADDR_WIDTH_VIRT) # input
+ self.cfg_max = Signal(params.ADDR_WIDTH_VIRT) # input
+ self.cfg_offset = Signal(params.ADDR_WIDTH_PHYS) # input
+ self.cfg_wen = Signal() # input
+ self.cfg_ren = Signal() # input
+ self.cfg_en = Signal() # input
+ self.in_trans_type = Signal() # input
+ self.in_addr_min = Signal(params.ADDR_WIDTH_VIRT) # input
+ self.in_addr_max = Signal(params.ADDR_WIDTH_VIRT) # input
+ self.out_hit = Signal() # output
+ self.out_prot = Signal() # output
+ self.out_addr = Signal(params.ADDR_WIDTH_PHYS) # output
+ def elaborate(self, platform=None):
+ m = Module()
+ min_above_min = Signal()
+ min_below_max = Signal()
+ max_below_max = Signal()
+ # assign min_above_min = (in_addr_min >= cfg_min) ? 1'b1 : 1'b0;
+ # assign min_below_max = (in_addr_min <= cfg_max) ? 1'b1 : 1'b0;
+ # assign max_below_max = (in_addr_max <= cfg_max) ? 1'b1 : 1'b0;
+ # assign out_hit = cfg_en & min_above_min & min_below_max & max_below_max;
+ # assign out_prot = out_hit & ((in_trans_type & ~cfg_wen) | (~in_trans_type & ~cfg_ren));
+ # assign out_addr = in_addr_min - cfg_min + cfg_offset;
+ m.d.comb += [
+ min_above_min.eq(self.in_addr_min >= self.cfg_min),
+ min_below_max.eq(self.in_addr_min <= self.cfg_max),
+ max_below_max.eq(self.in_addr_max <= self.cfg_max),
+ self.out_hit.eq(self.cfg_en & min_above_min &
+ min_below_max & max_below_max),
+ self.out_prot.eq(self.out_hit & (
+ (self.in_trans_type & ~self.cfg_wen) | (~self.in_trans_type & ~self.cfg_ren))),
+ self.out_addr.eq(self.in_addr_min - self.cfg_min + self.cfg_offset)
+ ]
+ return m
--- /dev/null
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# /*
+# * ram_tp_no_change
+# *
+# * This code implements a parameterizable two-port memory. Port 0 can read and
+# * write while Port 1 can read only. The Xilinx tools will infer a BRAM with
+# * Port 0 in "no change" mode, i.e., during a write, it retains the last read
+# * value on the output. Port 1 (read-only) is in "write first" mode. Still, it
+# * outputs the old data during the write cycle. Note: Port 1 outputs invalid
+# * data in the cycle after the write when reading the same address.
+# *
+# * For more information, see Xilinx PG058 Block Memory Generator Product Guide.
+# */
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+from nmigen import Memory
+import math
+# module ram_tp_no_change
+# #(
+# )
+# (
+# input clk,
+# input we,
+# input [ADDR_WIDTH-1:0] addr0,
+# input [ADDR_WIDTH-1:0] addr1,
+# input [DATA_WIDTH-1:0] d_i,
+# output [DATA_WIDTH-1:0] d0_o,
+# output [DATA_WIDTH-1:0] d1_o
+# );
+class ram_tp_no_change(Elaboratable):
+ def __init__(self):
+ self.we = Signal() # input
+ self.addr0 = Signal(ADDR_WIDTH) # input
+ self.addr1 = Signal(ADDR_WIDTH) # input
+ self.d_i = Signal(DATA_WIDTH) # input
+ self.d0_o = Signal(DATA_WIDTH) # output
+ self.d1_o = Signal(DATA_WIDTH) # output
+ DEPTH = int(math.pow(2, ADDR_WIDTH))
+ self.ram = Memory(DATA_WIDTH, DEPTH)
+ #
+ # localparam DEPTH = 2**ADDR_WIDTH;
+ #
+ # (* ram_style = "block" *) reg [DATA_WIDTH-1:0] ram[DEPTH];
+ # reg [DATA_WIDTH-1:0] d0;
+ # reg [DATA_WIDTH-1:0] d1;
+ #
+ # always_ff @(posedge clk) begin
+ # if(we == 1'b1) begin
+ # ram[addr0] <= d_i;
+ # end else begin
+ # only change data if we==false
+ # d0 <= ram[addr0];
+ # end
+ # d1 <= ram[addr1];
+ # end
+ #
+ # assign d0_o = d0;
+ # assign d1_o = d1;
+ #
+ def elaborate(self, platform=None):
+ m = Module()
+ m.submodules.read_ram0 = read_ram0 = self.ram.read_port()
+ m.submodules.read_ram1 = read_ram1 = self.ram.read_port()
+ m.submodules.write_ram = write_ram = self.ram.write_port()
+ # write port
+ m.d.comb += write_ram.en.eq(self.we)
+ m.d.comb += write_ram.addr.eq(self.addr0)
+ m.d.comb += write_ram.data.eq(self.d_i)
+ # read ports
+ m.d.comb += read_ram0.addr.eq(self.addr0)
+ m.d.comb += read_ram1.addr.eq(self.addr1)
+ with m.If(self.we == 0):
+ m.d.sync += self.d0_o.eq(read_ram0.data)
+ m.d.sync += self.d1_o.eq(read_ram1.data)
+ return m
--- /dev/null
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# /*
+# * ram_tp_write_first
+# *
+# * This code implements a parameterizable two-port memory. Port 0 can read and
+# * write while Port 1 can read only. Xilinx Vivado will infer a BRAM in
+# * "write first" mode, i.e., upon a read and write to the same address, the
+# * new value is read. Note: Port 1 outputs invalid data in the cycle after
+# * the write when reading the same address.
+# *
+# * For more information, see Xilinx PG058 Block Memory Generator Product Guide.
+# */
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+from nmigen import Memory
+import math
+# module ram_tp_write_first
+# #(
+# )
+# (
+# input clk,
+# input we,
+# input [ADDR_WIDTH-1:0] addr0,
+# input [ADDR_WIDTH-1:0] addr1,
+# input [DATA_WIDTH-1:0] d_i,
+# output [DATA_WIDTH-1:0] d0_o,
+# output [DATA_WIDTH-1:0] d1_o
+# );
+class ram_tp_write_first(Elaboratable):
+ def __init__(self):
+ self.we = Signal() # input
+ self.addr0 = Signal(ADDR_WIDTH) # input
+ self.addr1 = Signal(ADDR_WIDTH) # input
+ self.d_i = Signal(DATA_WIDTH) # input
+ self.d0_o = Signal(DATA_WIDTH) # output
+ self.d1_o = Signal(DATA_WIDTH) # output
+ DEPTH = int(math.pow(2, ADDR_WIDTH))
+ self.ram = Memory(DATA_WIDTH, DEPTH)
+ #
+ # localparam DEPTH = 2**ADDR_WIDTH;
+ #
+ # (* ram_style = "block" *) reg [DATA_WIDTH-1:0] ram[DEPTH];
+ # reg [ADDR_WIDTH-1:0] raddr0;
+ # reg [ADDR_WIDTH-1:0] raddr1;
+ #
+ # always_ff @(posedge clk) begin
+ # if(we == 1'b1) begin
+ # ram[addr0] <= d_i;
+ # end
+ # raddr0 <= addr0;
+ # raddr1 <= addr1;
+ # end
+ #
+ # assign d0_o = ram[raddr0];
+ # assign d1_o = ram[raddr1];
+ #
+ def elaborate(self, platform=None):
+ m = Module()
+ m.submodules.read_ram0 = read_ram0 = self.ram.read_port()
+ m.submodules.read_ram1 = read_ram1 = self.ram.read_port()
+ m.submodules.write_ram = write_ram = self.ram.write_port()
+ # write port
+ m.d.comb += write_ram.en.eq(self.we)
+ m.d.comb += write_ram.addr.eq(self.addr0)
+ m.d.comb += write_ram.data.eq(self.d_i)
+ # read ports
+ m.d.comb += read_ram0.addr.eq(self.addr0)
+ m.d.comb += read_ram1.addr.eq(self.addr1)
+ m.d.sync += self.d0_o.eq(read_ram0.data)
+ m.d.sync += self.d1_o.eq(read_ram1.data)
+ return m
--- /dev/null
+# // Copyright 2018 ETH Zurich and University of Bologna.
+# // Copyright and related rights are licensed under the Solderpad Hardware
+# // License, Version 0.51 (the "License"); you may not use this file except in
+# // compliance with the License. You may obtain a copy of the License at
+# // http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+# // or agreed to in writing, software, hardware and materials distributed under
+# // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# // CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# // specific language governing permissions and limitations under the License.
+# this file has been generated by sv2nmigen
+from nmigen import Signal, Module, Const, Cat, Elaboratable
+import rab_slice
+import coreconfig
+# module slice_top
+# //#(
+# // parameter N_SLICES = 16,
+# // parameter N_REGS = 4*N_SLICES,
+# // parameter ADDR_WIDTH_PHYS = 40,
+# // parameter ADDR_WIDTH_VIRT = 32
+# // )
+# (
+# input logic [N_REGS-1:0] [63:0] int_cfg_regs,
+# input logic int_rw,
+# input logic [ADDR_WIDTH_VIRT-1:0] int_addr_min,
+# input logic [ADDR_WIDTH_VIRT-1:0] int_addr_max,
+# input logic multi_hit_allow,
+# output logic multi_hit,
+# output logic [N_SLICES-1:0] prot,
+# output logic [N_SLICES-1:0] hit,
+# output logic cache_coherent,
+# output logic [ADDR_WIDTH_PHYS-1:0] out_addr
+# );
+class slice_top(Elaboratable):
+ def __init__(self):
+ # FIXME self.int_cfg_regs = Signal() # input
+ self.params = coreconfig.CoreConfig() # rename ?
+ self.int_rw = Signal() # input
+ self.int_addr_min = Signal(self.params.ADDR_WIDTH_VIRT) # input
+ self.int_addr_max = Signal(self.params.ADDR_WIDTH_VIRT) # input
+ self.multi_hit_allow = Signal() # input
+ self.multi_hit = Signal() # output
+ self.prot = Signal(self.params.N_SLICES) # output
+ self.hit = Signal(self.params.N_SLICES) # output
+ self.cache_coherent = Signal() # output
+ self.out_addr = Signal(self.params.ADDR_WIDTH_PHYS) # output
+ def elaborate(self, platform=None):
+ m = Module()
+ first_hit = Signal()
+ for i in range(self.params.N_SLICES):
+ # TODO pass params / core config here
+ u_slice = rab_slice.rab_slice(self.params)
+ setattr(m.submodules, "u_slice%d" % i, u_slice)
+ # TODO set param and connect ports
+ # In case of a multi hit, the lowest slice with a hit is selected.
+ # TODO always_comb begin : HIT_CHECK
+ m.d.comb += [
+ first_hit.eq(0),
+ self.multi_hit.eq(0),
+ self.out_addr.eq(0),
+ self.cache_coherent.eq(0)]
+ for j in range(self.params.N_SLICES):
+ with m.If(self.hit[j] == 1):
+ with m.If(first_hit == 1):
+ with m.If(self.multi_hit_allow == 0):
+ m.d.comb += [self.multi_hit.eq(1)]
+ with m.Elif(first_hit == 1):
+ m.d.comb += [first_hit.eq(1)
+ # only output first slice that was hit
+ # SV self.out_addr.eq(slice_out_addr[ADDR_WIDTH_PHYS*j + : ADDR_WIDTH_PHYS]),
+ # SV self.cache_coherent.eq(int_cfg_regs[4*j+3][3]),
+ ]
+ return m
+ # TODO translate generate statement
+ logic [ADDR_WIDTH_PHYS*N_SLICES-1:0] slice_out_addr;
+ generate
+ for ( i=0; i<N_SLICES; i++ )
+ begin
+ rab_slice
+ #(
+ )
+ u_slice
+ (
+ .cfg_min ( int_cfg_regs[4*i] [ADDR_WIDTH_VIRT-1:0] ),
+ .cfg_max ( int_cfg_regs[4*i+1][ADDR_WIDTH_VIRT-1:0] ),
+ .cfg_offset ( int_cfg_regs[4*i+2][ADDR_WIDTH_PHYS-1:0] ),
+ .cfg_wen ( int_cfg_regs[4*i+3][2] ),
+ .cfg_ren ( int_cfg_regs[4*i+3][1] ),
+ .cfg_en ( int_cfg_regs[4*i+3][0] ),
+ .in_trans_type ( int_rw ),
+ .in_addr_min ( int_addr_min ),
+ .in_addr_max ( int_addr_max ),
+ .out_addr ( slice_out_addr[ADDR_WIDTH_PHYS*i+ADDR_WIDTH_PHYS-1:ADDR_WIDTH_PHYS*i] ),
+ .out_prot ( prot[i] ),
+ .out_hit ( hit[i] )
+ );
+ end
+ endgenerate
+ // In case of a multi hit, the lowest slice with a hit is selected.
+ always_comb begin : HIT_CHECK
+ first_hit = 0;
+ multi_hit = 0;
+ out_addr = '0;
+ cache_coherent = 0;
+ for (j = 0; j < N_SLICES; j++) begin
+ if (hit[j] == 1'b1) begin
+ if (first_hit == 1'b1) begin
+ if (multi_hit_allow == 1'b0) begin
+ multi_hit = 1'b1;
+ end
+ end else begin
+ first_hit = 1'b1;
+ out_addr = slice_out_addr[ADDR_WIDTH_PHYS*j +: ADDR_WIDTH_PHYS];
+ cache_coherent = int_cfg_regs[4*j+3][3];
+ end
+ end
+ end
+ end
+# sv 2 migen: TODO add translate code for generate statements and for loops inside always_comb
--- /dev/null
+from ram_tp_write_first import ram_tp_write_first
+from nmigen.compat.sim import run_simulation
+import sys
+def tbench(dut):
+ yield dut.we.eq(1)
+ for i in range(0, 255):
+ yield dut.addr0.eq(i)
+ yield dut.d_i.eq(i)
+ yield
+if __name__ == "__main__":
+ dut = ram_tp_write_first()
+ run_simulation(dut, tbench(dut), vcd_name="ram_tp_write_first.vcd")
+ print("ram_tp_write_first Unit Test Success")
--- /dev/null
+from nmigen.compat.sim import run_simulation
+import sys
+# sys.path.append("../../../TestUtil")
+from slice_top import slice_top
+def tbench(dut):
+ yield
+if __name__ == "__main__":
+ dut = slice_top()
+ run_simulation(dut, tbench(dut), vcd_name="test_slice_top.vcd")
+ print("slice_top Unit Test Success")
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Cat, Const, Array, Signal, Elaboratable, Module
+from nmutil.iocontrol import RecordObject
+from math import log
+from functools import reduce
+import operator
+class Register(Elaboratable):
+ def __init__(self, width, writethru=True):
+ self.width = width
+ self.writethru = writethru
+ self._rdports = []
+ self._wrports = []
+ def read_port(self, name=None):
+ port = RecordObject([("ren", 1),
+ ("data_o", self.width)],
+ name=name)
+ self._rdports.append(port)
+ return port
+ def write_port(self, name=None):
+ port = RecordObject([("wen", 1),
+ ("data_i", self.width)],
+ name=name)
+ self._wrports.append(port)
+ return port
+ def elaborate(self, platform):
+ m = Module()
+ self.reg = reg = Signal(self.width, name="reg")
+ # read ports. has write-through detection (returns data written)
+ for rp in self._rdports:
+ with m.If(rp.ren):
+ if self.writethru:
+ wr_detect = Signal(reset_less=False)
+ m.d.comb += wr_detect.eq(0)
+ for wp in self._wrports:
+ with m.If(wp.wen):
+ m.d.comb += rp.data_o.eq(wp.data_i)
+ m.d.comb += wr_detect.eq(1)
+ with m.If(~wr_detect):
+ m.d.comb += rp.data_o.eq(reg)
+ else:
+ m.d.comb += rp.data_o.eq(reg)
+ # write ports, don't allow write to address 0 (ignore it)
+ for wp in self._wrports:
+ with m.If(wp.wen):
+ m.d.sync += reg.eq(wp.data_i)
+ return m
+ def __iter__(self):
+ for p in self._rdports:
+ yield from p
+ for p in self._wrports:
+ yield from p
+ def ports(self):
+ res = list(self)
+def treereduce(tree, attr="data_o"):
+ #print ("treereduce", tree)
+ if not isinstance(tree, list):
+ return tree
+ if len(tree) == 1:
+ return getattr(tree[0], attr)
+ if len(tree) == 2:
+ return getattr(tree[0], attr) | getattr(tree[1], attr)
+ split = len(tree) // 2
+ return treereduce(tree[:split], attr) | treereduce(tree[split:], attr)
+class RegFileArray(Elaboratable):
+ """ an array-based register file (register having write-through capability)
+ that has no "address" decoder, instead it has individual write-en
+ and read-en signals (per port).
+ """
+ def __init__(self, width, depth):
+ self.width = width
+ self.depth = depth
+ self.regs = Array(Register(width) for _ in range(self.depth))
+ self._rdports = []
+ self._wrports = []
+ def read_port(self, name=None):
+ regs = []
+ for i in range(self.depth):
+ port = self.regs[i].read_port(name)
+ regs.append(port)
+ regs = Array(regs)
+ port = RecordObject([("ren", self.depth),
+ ("data_o", self.width)], name)
+ self._rdports.append((regs, port))
+ return port
+ def write_port(self, name=None):
+ regs = []
+ for i in range(self.depth):
+ port = self.regs[i].write_port(name)
+ regs.append(port)
+ regs = Array(regs)
+ port = RecordObject([("wen", self.depth),
+ ("data_i", self.width)])
+ self._wrports.append((regs, port))
+ return port
+ def _get_en_sig(self, port, typ):
+ wen = []
+ for p in port:
+ wen.append(p[typ])
+ return Cat(*wen)
+ def elaborate(self, platform):
+ m = Module()
+ for i, reg in enumerate(self.regs):
+ setattr(m.submodules, "reg_%d" % i, reg)
+ for (regs, p) in self._rdports:
+ #print (p)
+ m.d.comb += self._get_en_sig(regs, 'ren').eq(p.ren)
+ ror = treereduce(list(regs))
+ m.d.comb += p.data_o.eq(ror)
+ for (regs, p) in self._wrports:
+ m.d.comb += self._get_en_sig(regs, 'wen').eq(p.wen)
+ for r in regs:
+ m.d.comb += r.data_i.eq(p.data_i)
+ return m
+ def __iter__(self):
+ for r in self.regs:
+ yield from r
+ def ports(self):
+ return list(self)
+class RegFile(Elaboratable):
+ def __init__(self, width, depth):
+ self.width = width
+ self.depth = depth
+ self._rdports = []
+ self._wrports = []
+ def read_port(self):
+ bsz = int(log(self.width) / log(2))
+ port = RecordObject([("raddr", bsz),
+ ("ren", 1),
+ ("data_o", self.width)])
+ self._rdports.append(port)
+ return port
+ def write_port(self):
+ bsz = int(log(self.width) / log(2))
+ port = RecordObject([("waddr", bsz),
+ ("wen", 1),
+ ("data_i", self.width)])
+ self._wrports.append(port)
+ return port
+ def elaborate(self, platform):
+ m = Module()
+ bsz = int(log(self.width) / log(2))
+ regs = Array(Signal(self.width, name="reg") for _ in range(self.depth))
+ # read ports. has write-through detection (returns data written)
+ for rp in self._rdports:
+ wr_detect = Signal(reset_less=False)
+ with m.If(rp.ren):
+ m.d.comb += wr_detect.eq(0)
+ for wp in self._wrports:
+ addrmatch = Signal(reset_less=False)
+ m.d.comb += addrmatch.eq(wp.waddr == rp.raddr)
+ with m.If(wp.wen & addrmatch):
+ m.d.comb += rp.data_o.eq(wp.data_i)
+ m.d.comb += wr_detect.eq(1)
+ with m.If(~wr_detect):
+ m.d.comb += rp.data_o.eq(regs[rp.raddr])
+ # write ports, don't allow write to address 0 (ignore it)
+ for wp in self._wrports:
+ with m.If(wp.wen & (wp.waddr != Const(0, bsz))):
+ m.d.sync += regs[wp.waddr].eq(wp.data_i)
+ return m
+ def __iter__(self):
+ yield from self._rdports
+ yield from self._wrports
+ def ports(self):
+ res = list(self)
+ for r in res:
+ if isinstance(r, RecordObject):
+ yield from r
+ else:
+ yield r
+def regfile_sim(dut, rp, wp):
+ yield wp.waddr.eq(1)
+ yield wp.data_i.eq(2)
+ yield wp.wen.eq(1)
+ yield
+ yield wp.wen.eq(0)
+ yield rp.ren.eq(1)
+ yield rp.raddr.eq(1)
+ yield
+ data = yield rp.data_o
+ print (data)
+ assert data == 2
+ yield wp.waddr.eq(5)
+ yield rp.raddr.eq(5)
+ yield rp.ren.eq(1)
+ yield wp.wen.eq(1)
+ yield wp.data_i.eq(6)
+ data = yield rp.data_o
+ print (data)
+ yield
+ yield wp.wen.eq(0)
+ yield rp.ren.eq(0)
+ data = yield rp.data_o
+ print (data)
+ assert data == 6
+ yield
+ data = yield rp.data_o
+ print (data)
+def regfile_array_sim(dut, rp1, rp2, wp):
+ yield wp.data_i.eq(2)
+ yield wp.wen.eq(1<<1)
+ yield
+ yield wp.wen.eq(0)
+ yield rp1.ren.eq(1<<1)
+ yield
+ data = yield rp1.data_o
+ print (data)
+ assert data == 2
+ yield rp1.ren.eq(1<<5)
+ yield rp2.ren.eq(1<<1)
+ yield wp.wen.eq(1<<5)
+ yield wp.data_i.eq(6)
+ data = yield rp1.data_o
+ print (data)
+ yield
+ yield wp.wen.eq(0)
+ yield rp1.ren.eq(0)
+ yield rp2.ren.eq(0)
+ data1 = yield rp1.data_o
+ print (data1)
+ data2 = yield rp2.data_o
+ print (data2)
+ assert data1 == 6
+ yield
+ data = yield rp1.data_o
+ print (data)
+def test_regfile():
+ dut = RegFile(32, 8)
+ rp = dut.read_port()
+ wp = dut.write_port()
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_regfile.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, regfile_sim(dut, rp, wp), vcd_name='test_regfile.vcd')
+ dut = RegFileArray(32, 8)
+ rp1 = dut.read_port("read1")
+ rp2 = dut.read_port("read2")
+ wp = dut.write_port("write")
+ ports=dut.ports()
+ print ("ports", ports)
+ vl = rtlil.convert(dut, ports=ports)
+ with open("test_regfile_array.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, regfile_array_sim(dut, rp1, rp2, wp),
+ vcd_name='test_regfile_array.vcd')
+if __name__ == '__main__':
+ test_regfile()
--- /dev/null
+""" Load / Store partial address matcher
+Loads and Stores do not need a full match (CAM), they need "good enough"
+avoidance. Around 11 bits on a 64-bit address is "good enough".
+The simplest way to use this module is to ignore not only the top bits,
+but also the bottom bits as well: in this case (this RV64 processor),
+enough to cover a DWORD (64-bit). that means ignore the bottom 4 bits,
+due to the possibility of 64-bit LD/ST being misaligned.
+To reiterate: the use of this module is an *optimisation*. All it has
+to do is cover the cases that are *definitely* matches (by checking 11
+bits or so), and if a few opportunities for parallel LD/STs are missed
+because the top (or bottom) bits weren't checked, so what: all that
+happens is: the mis-matched addresses are LD/STd on single-cycles. Big Deal.
+However, if we wanted to enhance this algorithm (without using a CAM and
+without using expensive comparators) probably the best way to do so would
+be to turn the last 16 bits into a byte-level bitmap. LD/ST on a byte
+would have 1 of the 16 bits set. LD/ST on a DWORD would have 8 of the 16
+bits set (offset if the LD/ST was misaligned). TODO.
+> I have used bits <11:6> as they are not translated (4KB pages)
+> and larger than a cache line (64 bytes).
+> I have used bits <11:4> when the L1 cache was QuadW sized and
+> the L2 cache was Line sized.
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Const, Array, Cat, Elaboratable
+from nmutil.latch import latchregister, SRLatch
+class PartialAddrMatch(Elaboratable):
+ """A partial address matcher
+ """
+ def __init__(self, n_adr, bitwid):
+ self.n_adr = n_adr
+ self.bitwid = bitwid
+ # inputs
+ self.addrs_i = Array(Signal(bitwid, name="addr") for i in range(n_adr))
+ self.addr_we_i = Signal(n_adr) # write-enable for incoming address
+ self.addr_en_i = Signal(n_adr) # address latched in
+ self.addr_rs_i = Signal(n_adr) # address deactivated
+ # output
+ self.addr_nomatch_o = Signal(n_adr, name="nomatch_o")
+ self.addr_nomatch_a_o = Array(Signal(n_adr, name="nomatch_array_o") \
+ for i in range(n_adr))
+ def elaborate(self, platform):
+ m = Module()
+ return self._elaborate(m, platform)
+ def _elaborate(self, m, platform):
+ comb = m.d.comb
+ sync = m.d.sync
+ m.submodules.l = l = SRLatch(llen=self.n_adr, sync=False)
+ addrs_r = Array(Signal(self.bitwid, name="a_r") \
+ for i in range(self.n_adr))
+ # latch set/reset
+ comb += l.s.eq(self.addr_en_i)
+ comb += l.r.eq(self.addr_rs_i)
+ # copy in addresses (and "enable" signals)
+ for i in range(self.n_adr):
+ latchregister(m, self.addrs_i[i], addrs_r[i], l.q[i])
+ # is there a clash, yes/no
+ matchgrp = []
+ for i in range(self.n_adr):
+ match = []
+ for j in range(self.n_adr):
+ if i == j:
+ match.append(Const(0)) # don't match against self!
+ else:
+ match.append(addrs_r[i] == addrs_r[j])
+ comb += self.addr_nomatch_a_o[i].eq(~Cat(*match) & l.q)
+ matchgrp.append(self.addr_nomatch_a_o[i] == l.q)
+ comb += self.addr_nomatch_o.eq(Cat(*matchgrp) & l.q)
+ return m
+ def __iter__(self):
+ yield from self.addrs_i
+ yield self.addr_we_i
+ yield self.addr_en_i
+ yield from self.addr_nomatch_a_o
+ yield self.addr_nomatch_o
+ def ports(self):
+ return list(self)
+def part_addr_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_rd_i.eq(1)
+ yield
+ yield dut.go_rd_i.eq(0)
+ yield
+ yield dut.go_wr_i.eq(1)
+ yield
+ yield dut.go_wr_i.eq(0)
+ yield
+def test_part_addr():
+ dut = PartialAddrMatch(3, 10)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_part_addr.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, part_addr_sim(dut), vcd_name='test_part_addr.vcd')
+if __name__ == '__main__':
+ test_part_addr()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Elaboratable, Array, Cat, Repl
+from nmutil.latch import SRLatch
+from functools import reduce
+from operator import or_
+class DependencyRow(Elaboratable):
+ """ implements 11.4.7 mitch alsup dependence cell, p27
+ adjusted to be clock-sync'd on rising edge only.
+ mitch design (as does 6600) requires alternating rising/falling clock
+ * SET mode: issue_i HI, go_i LO, reg_i HI - register is captured
+ - FWD is DISABLED (~issue_i)
+ * QRY mode: issue_i LO, go_i LO, haz_i HI - FWD is ASSERTED
+ reg_i HI - ignored
+ * GO mode : issue_i LO, go_i HI - RSEL is ASSERTED
+ haz_i HI - FWD still can be ASSERTED
+ FWD assertion (hazard protection) therefore still occurs in both
+ Query and Go Modes, for this cycle, due to the cq register
+ GO mode works for one cycle, again due to the cq register capturing
+ the latch output. Without the cq register, the SR Latch (which is
+ asynchronous) would be reset at the exact moment that GO was requested,
+ and the RSEL would be garbage.
+ """
+ def __init__(self, n_reg, n_src, cancel_mode=False):
+ self.cancel_mode = cancel_mode
+ self.n_reg = n_reg
+ self.n_src = n_src
+ # arrays
+ src = []
+ rsel = []
+ fwd = []
+ for i in range(n_src):
+ j = i + 1 # name numbering to match src1/src2
+ src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
+ rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
+ fwd.append(Signal(n_reg, name="src%d_fwd_o" % j, reset_less=True))
+ # inputs
+ self.dest_i = Signal(n_reg, reset_less=True) # Dest in (top)
+ self.src_i = Array(src) # operands in (top)
+ self.issue_i = Signal(reset_less=True) # Issue in (top)
+ self.rd_pend_i = Signal(n_reg, reset_less=True) # Read pend in (top)
+ self.wr_pend_i = Signal(n_reg, reset_less=True) # Write pend in (top)
+ self.v_rd_rsel_o = Signal(n_reg, reset_less=True) # Read pend out (bot)
+ self.v_wr_rsel_o = Signal(n_reg, reset_less=True) # Write pend out (bot)
+ self.go_wr_i = Signal(reset_less=True) # Go Write in (left)
+ self.go_rd_i = Signal(reset_less=True) # Go Read in (left)
+ if self.cancel_mode:
+ self.go_die_i = Signal(n_reg, reset_less=True) # Go Die in (left)
+ else:
+ self.go_die_i = Signal(reset_less=True) # Go Die in (left)
+ # for Register File Select Lines (vertical)
+ self.dest_rsel_o = Signal(n_reg, reset_less=True) # dest reg sel (bot)
+ self.src_rsel_o = Array(rsel) # src reg sel (bot)
+ self.src2_rsel_o = Signal(n_reg, reset_less=True) # src2 reg sel (bot)
+ # for Function Unit "forward progress" (horizontal)
+ self.dest_fwd_o = Signal(n_reg, reset_less=True) # dest FU fw (right)
+ self.src_fwd_o = Array(fwd) # src FU fw (right)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.dest_c = dest_c = SRLatch(sync=False, llen=self.n_reg)
+ src_c = []
+ for i in range(self.n_src):
+ src_l = SRLatch(sync=False, llen=self.n_reg)
+ setattr(m.submodules, "src%d_c" % (i+1), src_l)
+ src_c.append(src_l)
+ # connect go_rd / go_wr (dest->wr, src->rd)
+ wr_die = Signal(self.n_reg, reset_less=True)
+ rd_die = Signal(self.n_reg, reset_less=True)
+ if self.cancel_mode:
+ go_die = self.go_die_i
+ else:
+ go_die = Repl(self.go_die_i, self.n_reg)
+ m.d.comb += wr_die.eq(Repl(self.go_wr_i, self.n_reg) | go_die)
+ m.d.comb += rd_die.eq(Repl(self.go_rd_i, self.n_reg) | go_die)
+ m.d.comb += dest_c.r.eq(wr_die)
+ for i in range(self.n_src):
+ m.d.comb += src_c[i].r.eq(rd_die)
+ # connect input reg bit (unary)
+ i_ext = Repl(self.issue_i, self.n_reg)
+ m.d.comb += dest_c.s.eq(i_ext & self.dest_i)
+ for i in range(self.n_src):
+ m.d.comb += src_c[i].s.eq(i_ext & self.src_i[i])
+ # connect up hazard checks: read-after-write and write-after-read
+ m.d.comb += self.dest_fwd_o.eq(dest_c.q & self.rd_pend_i)
+ for i in range(self.n_src):
+ m.d.comb += self.src_fwd_o[i].eq(src_c[i].q & self.wr_pend_i)
+ # connect reg-sel outputs
+ rd_ext = Repl(self.go_rd_i, self.n_reg)
+ wr_ext = Repl(self.go_wr_i, self.n_reg)
+ m.d.comb += self.dest_rsel_o.eq(dest_c.qlq & wr_ext)
+ for i in range(self.n_src):
+ m.d.comb += self.src_rsel_o[i].eq(src_c[i].qlq & rd_ext)
+ # to be accumulated to indicate if register is in use (globally)
+ # after ORing, is fed back in to rd_pend_i / wr_pend_i
+ src_q = []
+ for i in range(self.n_src):
+ src_q.append(src_c[i].qlq)
+ m.d.comb += self.v_rd_rsel_o.eq(reduce(or_, src_q))
+ m.d.comb += self.v_wr_rsel_o.eq(dest_c.qlq)
+ return m
+ def __iter__(self):
+ yield self.dest_i
+ yield from self.src_i
+ yield self.rd_pend_i
+ yield self.wr_pend_i
+ yield self.issue_i
+ yield self.go_wr_i
+ yield self.go_rd_i
+ yield self.go_die_i
+ yield self.dest_rsel_o
+ yield from self.src_rsel_o
+ yield self.dest_fwd_o
+ yield from self.src_fwd_o
+ def ports(self):
+ return list(self)
+def dcell_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_rd_i.eq(1)
+ yield
+ yield dut.go_rd_i.eq(0)
+ yield
+ yield dut.go_wr_i.eq(1)
+ yield
+ yield dut.go_wr_i.eq(0)
+ yield
+def test_dcell():
+ dut = DependencyRow(4, 2, True)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_drow.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, dcell_sim(dut), vcd_name='test_dcell.vcd')
+if __name__ == '__main__':
+ test_dcell()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Cat, Array, Const, Elaboratable
+from nmigen.lib.coding import Decoder
+from nmutil.latch import SRLatch, latchregister
+from scoreboard.shadow import Shadow
+class FnUnit(Elaboratable):
+ """ implements 11.4.8 function unit, p31
+ also implements optional shadowing 11.5.1, p55
+ shadowing can be used for branches as well as exceptions (interrupts),
+ load/store hold (exceptions again), and vector-element predication
+ (once the predicate is known, which it may not be at instruction issue)
+ Inputs
+ * :wid: register file width
+ * :shadow_wid: number of shadow/fail/good/go_die sets
+ * :n_dests: number of destination regfile(s) (index: rfile_sel_i)
+ * :wr_pend: if true, writable observes the g_wr_pend_i vector
+ otherwise observes g_rd_pend_i
+ notes:
+ * dest_i / src1_i / src2_i are in *binary*, whereas...
+ * ...g_rd_pend_i / g_wr_pend_i and rd_pend_o / wr_pend_o are UNARY
+ * req_rel_i (request release) is the direct equivalent of pipeline
+ "output valid" (valid_o)
+ * recover is a local python variable (actually go_die_o)
+ * when shadow_wid = 0, recover and shadown are Consts (i.e. do nothing)
+ * wr_pend is set False for the majority of uses: however for
+ use in a STORE Function Unit it is set to True
+ """
+ def __init__(self, wid, shadow_wid=0, n_dests=1, wr_pend=False):
+ self.reg_width = wid
+ self.n_dests = n_dests
+ self.shadow_wid = shadow_wid
+ self.wr_pend = wr_pend
+ # inputs
+ if n_dests > 1:
+ self.rfile_sel_i = Signal(max=n_dests, reset_less=True)
+ else:
+ self.rfile_sel_i = Const(0) # no selection. gets Array[0]
+ self.dest_i = Signal(max=wid, reset_less=True) # Dest R# in (top)
+ self.src1_i = Signal(max=wid, reset_less=True) # oper1 R# in (top)
+ self.src2_i = Signal(max=wid, reset_less=True) # oper2 R# in (top)
+ self.issue_i = Signal(reset_less=True) # Issue in (top)
+ self.go_wr_i = Signal(reset_less=True) # Go Write in (left)
+ self.go_rd_i = Signal(reset_less=True) # Go Read in (left)
+ self.req_rel_i = Signal(reset_less=True) # request release (left)
+ self.g_xx_pend_i = Array(Signal(wid, reset_less=True, name="g_pend_i") \
+ for i in range(n_dests)) # global rd (right)
+ self.g_wr_pend_i = Signal(wid, reset_less=True) # global wr (right)
+ if shadow_wid:
+ self.shadow_i = Signal(shadow_wid, reset_less=True)
+ self.s_fail_i = Signal(shadow_wid, reset_less=True)
+ self.s_good_i = Signal(shadow_wid, reset_less=True)
+ self.go_die_o = Signal(reset_less=True)
+ # outputs
+ self.readable_o = Signal(reset_less=True) # Readable out (right)
+ self.writable_o = Array(Signal(reset_less=True, name="writable_o") \
+ for i in range(n_dests)) # writable out (right)
+ self.busy_o = Signal(reset_less=True) # busy out (left)
+ self.src1_pend_o = Signal(wid, reset_less=True) # src1 pending
+ self.src2_pend_o = Signal(wid, reset_less=True) # src1 pending
+ self.rd_pend_o = Signal(wid, reset_less=True) # rd pending (right)
+ self.xx_pend_o = Array(Signal(wid, reset_less=True, name="pend_o") \
+ for i in range(n_dests))# wr pending (right)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.rd_l = rd_l = SRLatch(sync=False)
+ m.submodules.wr_l = wr_l = SRLatch(sync=False)
+ m.submodules.dest_d = dest_d = Decoder(self.reg_width)
+ m.submodules.src1_d = src1_d = Decoder(self.reg_width)
+ m.submodules.src2_d = src2_d = Decoder(self.reg_width)
+ # shadow / recover (optional: shadow_wid > 0)
+ m.submodules.shadow = shadow = Shadow(self.shadow_wid)
+ if self.shadow_wid:
+ m.d.comb += shadow.issue_i.eq(self.issue_i)
+ m.d.comb += shadow.s_fail_i.eq(self.s_fail_i)
+ m.d.comb += shadow.s_good_i.eq(self.s_good_i)
+ m.d.comb += shadow.shadow_i.eq(self.shadow_i)
+ shadown = shadow.shadown_o
+ recover = shadow.go_die_o
+ # selector
+ xx_pend_o = self.xx_pend_o[self.rfile_sel_i]
+ writable_o = self.writable_o[self.rfile_sel_i]
+ g_pend_i = self.g_xx_pend_i[self.rfile_sel_i]
+ for i in range(self.n_dests):
+ m.d.comb += self.xx_pend_o[i].eq(0) # initialise all array
+ m.d.comb += self.writable_o[i].eq(0) # to zero
+ m.d.comb += self.readable_o.eq(0) # to zero
+ # go_wr latch: reset on go_wr HI, set on issue
+ m.d.comb += wr_l.s.eq(self.issue_i)
+ m.d.comb += wr_l.r.eq(self.go_wr_i | recover)
+ # src1 latch: reset on go_rd HI, set on issue
+ m.d.comb += rd_l.s.eq(self.issue_i)
+ m.d.comb += rd_l.r.eq(self.go_rd_i | recover)
+ # latch/registers for dest / src1 / src2
+ dest_r = Signal(max=self.reg_width, reset_less=True)
+ src1_r = Signal(max=self.reg_width, reset_less=True)
+ src2_r = Signal(max=self.reg_width, reset_less=True)
+ # XXX latch based on *issue* rather than !latch (as in book)
+ latchregister(m, self.dest_i, dest_r, self.issue_i) #wr_l.qn)
+ latchregister(m, self.src1_i, src1_r, self.issue_i) #wr_l.qn)
+ latchregister(m, self.src2_i, src2_r, self.issue_i) #wr_l.qn)
+ # dest decoder (use dest reg as input): write-pending out
+ m.d.comb += dest_d.i.eq(dest_r)
+ m.d.comb += dest_d.n.eq(wr_l.qn) # decode is inverted
+ m.d.comb += self.busy_o.eq(wr_l.q) # busy if set
+ m.d.comb += xx_pend_o.eq(dest_d.o)
+ # src1/src2 decoder (use src1/2 regs as input): read-pending out
+ m.d.comb += src1_d.i.eq(src1_r)
+ m.d.comb += src1_d.n.eq(rd_l.qn) # decode is inverted
+ m.d.comb += src2_d.i.eq(src2_r)
+ m.d.comb += src2_d.n.eq(rd_l.qn) # decode is inverted
+ m.d.comb += self.src1_pend_o.eq(src1_d.o)
+ m.d.comb += self.src2_pend_o.eq(src2_d.o)
+ m.d.comb += self.rd_pend_o.eq(src1_d.o | src2_d.o)
+ # readable output signal
+ g_rd = Signal(self.reg_width, reset_less=True)
+ ro = Signal(reset_less=True)
+ m.d.comb += g_rd.eq(~self.g_wr_pend_i & self.rd_pend_o)
+ m.d.comb += ro.eq(~g_rd.bool())
+ m.d.comb += self.readable_o.eq(ro)
+ # writable output signal
+ g_wr_v = Signal(self.reg_width, reset_less=True)
+ g_wr = Signal(reset_less=True)
+ wo = Signal(reset_less=True)
+ m.d.comb += g_wr_v.eq(g_pend_i & xx_pend_o)
+ m.d.comb += g_wr.eq(~g_wr_v.bool())
+ m.d.comb += wo.eq(g_wr & rd_l.qn & self.req_rel_i & shadown)
+ m.d.comb += writable_o.eq(wo)
+ return m
+ def __iter__(self):
+ yield self.dest_i
+ yield self.src1_i
+ yield self.src2_i
+ yield self.issue_i
+ yield self.go_wr_i
+ yield self.go_rd_i
+ yield self.req_rel_i
+ yield from self.g_xx_pend_i
+ yield self.g_wr_pend_i
+ yield self.readable_o
+ yield from self.writable_o
+ yield self.rd_pend_o
+ yield from self.xx_pend_o
+ def ports(self):
+ return list(self)
+############# ###############
+# --- --- #
+# --- renamed / redirected from base class --- #
+# --- --- #
+# --- below are convenience classes which match the names --- #
+# --- of the various mitch alsup book chapter gate diagrams --- #
+# --- --- #
+############# ###############
+class IntFnUnit(FnUnit):
+ def __init__(self, wid, shadow_wid=0):
+ FnUnit.__init__(self, wid, shadow_wid)
+ self.int_rd_pend_o = self.rd_pend_o
+ self.int_wr_pend_o = self.xx_pend_o[0]
+ self.g_int_wr_pend_i = self.g_wr_pend_i
+ self.g_int_rd_pend_i = self.g_xx_pend_i[0]
+ self.int_readable_o = self.readable_o
+ self.int_writable_o = self.writable_o[0]
+ self.int_rd_pend_o.name = "int_rd_pend_o"
+ self.int_wr_pend_o.name = "int_wr_pend_o"
+ self.g_int_rd_pend_i.name = "g_int_rd_pend_i"
+ self.g_int_wr_pend_i.name = "g_int_wr_pend_i"
+ self.int_readable_o.name = "int_readable_o"
+ self.int_writable_o.name = "int_writable_o"
+class FPFnUnit(FnUnit):
+ def __init__(self, wid, shadow_wid=0):
+ FnUnit.__init__(self, wid, shadow_wid)
+ self.fp_rd_pend_o = self.rd_pend_o
+ self.fp_wr_pend_o = self.xx_pend_o[0]
+ self.g_fp_wr_pend_i = self.g_wr_pend_i
+ self.g_fp_rd_pend_i = self.g_xx_pend_i[0]
+ self.fp_writable_o = self.writable_o[0]
+ self.fp_readable_o = self.readable_o
+ self.fp_rd_pend_o.name = "fp_rd_pend_o"
+ self.fp_wr_pend_o.name = "fp_wr_pend_o"
+ self.g_fp_rd_pend_i.name = "g_fp_rd_pend_i"
+ self.g_fp_wr_pend_i.name = "g_fp_wr_pend_i"
+ self.fp_writable_o.name = "fp_writable_o"
+ self.fp_readable_o.name = "fp_readable_o"
+class LDFnUnit(FnUnit):
+ """ number of dest selectors: 2. assumes len(int_regfile) == len(fp_regfile)
+ * when rfile_sel_i == 0, int_wr_pend_o is set
+ * when rfile_sel_i == 1, fp_wr_pend_o is set
+ """
+ def __init__(self, wid, shadow_wid=0):
+ FnUnit.__init__(self, wid, shadow_wid, n_dests=2)
+ self.int_rd_pend_o = self.rd_pend_o
+ self.int_wr_pend_o = self.xx_pend_o[0]
+ self.fp_wr_pend_o = self.xx_pend_o[1]
+ self.g_int_wr_pend_i = self.g_wr_pend_i
+ self.g_int_rd_pend_i = self.g_xx_pend_i[0]
+ self.g_fp_rd_pend_i = self.g_xx_pend_i[1]
+ self.int_readable_o = self.readable_o
+ self.int_writable_o = self.writable_o[0]
+ self.fp_writable_o = self.writable_o[1]
+ self.int_rd_pend_o.name = "int_rd_pend_o"
+ self.int_wr_pend_o.name = "int_wr_pend_o"
+ self.fp_wr_pend_o.name = "fp_wr_pend_o"
+ self.g_int_wr_pend_i.name = "g_int_wr_pend_i"
+ self.g_int_rd_pend_i.name = "g_int_rd_pend_i"
+ self.g_fp_rd_pend_i.name = "g_fp_rd_pend_i"
+ self.int_readable_o.name = "int_readable_o"
+ self.int_writable_o.name = "int_writable_o"
+ self.fp_writable_o.name = "fp_writable_o"
+class STFnUnit(FnUnit):
+ """ number of dest selectors: 2. assumes len(int_regfile) == len(fp_regfile)
+ * wr_pend=False indicates to observe global fp write pending
+ * when rfile_sel_i == 0, int_wr_pend_o is set
+ * when rfile_sel_i == 1, fp_wr_pend_o is set
+ *
+ """
+ def __init__(self, wid, shadow_wid=0):
+ FnUnit.__init__(self, wid, shadow_wid, n_dests=2, wr_pend=True)
+ self.int_rd_pend_o = self.rd_pend_o # 1st int read-pending vector
+ self.int2_rd_pend_o = self.xx_pend_o[0] # 2nd int read-pending vector
+ self.fp_rd_pend_o = self.xx_pend_o[1] # 1x FP read-pending vector
+ # yes overwrite FnUnit base class g_wr_pend_i vector
+ self.g_int_wr_pend_i = self.g_wr_pend_i = self.g_xx_pend_i[0]
+ self.g_fp_wr_pend_i = self.g_xx_pend_i[1]
+ self.int_readable_o = self.readable_o
+ self.int_writable_o = self.writable_o[0]
+ self.fp_writable_o = self.writable_o[1]
+ self.int_rd_pend_o.name = "int_rd_pend_o"
+ self.int2_rd_pend_o.name = "int2_rd_pend_o"
+ self.fp_rd_pend_o.name = "fp_rd_pend_o"
+ self.g_int_wr_pend_i.name = "g_int_wr_pend_i"
+ self.g_fp_wr_pend_i.name = "g_fp_wr_pend_i"
+ self.int_readable_o.name = "int_readable_o"
+ self.int_writable_o.name = "int_writable_o"
+ self.fp_writable_o.name = "fp_writable_o"
+def int_fn_unit_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_rd_i.eq(1)
+ yield
+ yield dut.go_rd_i.eq(0)
+ yield
+ yield dut.go_wr_i.eq(1)
+ yield
+ yield dut.go_wr_i.eq(0)
+ yield
+def test_int_fn_unit():
+ dut = FnUnit(32, 2, 2)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_fn_unit.il", "w") as f:
+ f.write(vl)
+ dut = LDFnUnit(32, 2)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_ld_fn_unit.il", "w") as f:
+ f.write(vl)
+ dut = STFnUnit(32, 0)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_st_fn_unit.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, int_fn_unit_sim(dut), vcd_name='test_fn_unit.vcd')
+if __name__ == '__main__':
+ test_int_fn_unit()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Const, Elaboratable
+from nmutil.latch import SRLatch
+class FUDependenceCell(Elaboratable):
+ """ implements 11.4.7 mitch alsup dependence cell, p27
+ """
+ def __init__(self, dummy, n_fu=1):
+ self.n_fu = n_fu
+ self.dummy = Const(~(1<<dummy), n_fu)
+ # inputs
+ self.rd_pend_i = Signal(n_fu, reset_less=True) # read pend in (left)
+ self.wr_pend_i = Signal(n_fu, reset_less=True) # write pend in (left)
+ self.issue_i = Signal(n_fu, reset_less=True) # Issue in (top)
+ self.go_wr_i = Signal(n_fu, reset_less=True) # Go Write in (left)
+ self.go_rd_i = Signal(n_fu, reset_less=True) # Go Read in (left)
+ self.go_die_i = Signal(n_fu, reset_less=True) # Go Die in (left)
+ # outputs (latched rd/wr wait)
+ self.rd_wait_o = Signal(n_fu, reset_less=True) # read wait out (right)
+ self.wr_wait_o = Signal(n_fu, reset_less=True) # write wait out (right)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.rd_c = rd_c = SRLatch(sync=False, llen=self.n_fu)
+ m.submodules.wr_c = wr_c = SRLatch(sync=False, llen=self.n_fu)
+ # reset on go HI, set on dest and issue
+ m.d.comb += rd_c.s.eq(self.issue_i & self.rd_pend_i)
+ m.d.comb += wr_c.s.eq(self.issue_i & self.wr_pend_i)
+ # connect go_rd / go_wr
+ m.d.comb += wr_c.r.eq(self.go_wr_i | self.go_die_i)
+ m.d.comb += rd_c.r.eq(self.go_rd_i | self.go_die_i)
+ # connect pend_i
+ m.d.comb += rd_c.s.eq(self.issue_i & self.rd_pend_i & self.dummy)
+ m.d.comb += wr_c.s.eq(self.issue_i & self.wr_pend_i & self.dummy)
+ # connect output
+ m.d.comb += self.rd_wait_o.eq(rd_c.qlq & ~self.issue_i)
+ m.d.comb += self.wr_wait_o.eq(wr_c.qlq & ~self.issue_i)
+ return m
+ def __iter__(self):
+ yield self.rd_pend_i
+ yield self.wr_pend_i
+ yield self.issue_i
+ yield self.go_wr_i
+ yield self.go_rd_i
+ yield self.go_die_i
+ yield self.rd_wait_o
+ yield self.wr_wait_o
+ def ports(self):
+ return list(self)
+def dcell_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_rd_i.eq(1)
+ yield
+ yield dut.go_rd_i.eq(0)
+ yield
+ yield dut.go_wr_i.eq(1)
+ yield
+ yield dut.go_wr_i.eq(0)
+ yield
+def test_dcell():
+ dut = FUDependenceCell(dummy=0, n_fu=4)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_fu_dcell.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, dcell_sim(dut), vcd_name='test_fu_dcell.vcd')
+if __name__ == '__main__':
+ test_dcell()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Elaboratable, Array, Cat, Const
+from .fu_dep_cell import FUDependenceCell
+from .fu_picker_vec import FU_Pick_Vec
+ 6600 Function Unit Dependency Table Matrix inputs / outputs
+ -----------------------------------------------------------
+class FUFUDepMatrix(Elaboratable):
+ """ implements 11.4.7 mitch alsup FU-to-Reg Dependency Matrix, p26
+ """
+ def __init__(self, n_fu_row, n_fu_col):
+ self.n_fu_row = n_fu_row # Y (FU row#) ^v
+ self.n_fu_col = n_fu_col # X (FU col #) <>
+ self.rd_pend_i = Signal(n_fu_row, reset_less=True) # Rd pending (left)
+ self.wr_pend_i = Signal(n_fu_row, reset_less=True) # Wr pending (left)
+ self.issue_i = Signal(n_fu_col, reset_less=True) # Issue in (top)
+ self.go_wr_i = Signal(n_fu_row, reset_less=True) # Go Write in (left)
+ self.go_rd_i = Signal(n_fu_row, reset_less=True) # Go Read in (left)
+ self.go_die_i = Signal(n_fu_row, reset_less=True) # Go Die in (left)
+ # for Function Unit Readable/Writable (horizontal)
+ self.readable_o = Signal(n_fu_col, reset_less=True) # readable (bot)
+ self.writable_o = Signal(n_fu_col, reset_less=True) # writable (bot)
+ def elaborate(self, platform):
+ m = Module()
+ # ---
+ # matrix of dependency cells
+ # ---
+ dm = Array(FUDependenceCell(f, self.n_fu_col) \
+ for f in range(self.n_fu_row))
+ for y in range(self.n_fu_row):
+ setattr(m.submodules, "dm%d" % y, dm[y])
+ # ---
+ # array of Function Unit Readable/Writable: row-length, horizontal
+ # ---
+ fur = Array(FU_Pick_Vec(self.n_fu_row) for r in range(self.n_fu_col))
+ for x in range(self.n_fu_col):
+ setattr(m.submodules, "fur_x%d" % (x), fur[x])
+ # ---
+ # connect FU Readable/Writable vector
+ # ---
+ readable = []
+ writable = []
+ for y in range(self.n_fu_row):
+ fu = fur[y]
+ # accumulate Readable/Writable Vector outputs
+ readable.append(fu.readable_o)
+ writable.append(fu.writable_o)
+ # ... and output them from this module (horizontal, width=REGs)
+ m.d.comb += self.readable_o.eq(Cat(*readable))
+ m.d.comb += self.writable_o.eq(Cat(*writable))
+ # ---
+ # connect FU Pending
+ # ---
+ for y in range(self.n_fu_row):
+ dc = dm[y]
+ fu = fur[y]
+ # connect cell reg-select outputs to Reg Vector In
+ m.d.comb += [fu.rd_pend_i.eq(dc.rd_wait_o),
+ fu.wr_pend_i.eq(dc.wr_wait_o),
+ ]
+ # ---
+ # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i
+ # ---
+ for x in range(self.n_fu_col):
+ issue_i = []
+ for y in range(self.n_fu_row):
+ dc = dm[y]
+ # accumulate cell inputs issue
+ issue_i.append(dc.issue_i[x])
+ # wire up inputs from module to row cell inputs
+ m.d.comb += Cat(*issue_i).eq(self.issue_i)
+ # ---
+ # connect Matrix go_rd_i/go_wr_i to module readable/writable
+ # ---
+ for y in range(self.n_fu_row):
+ dc = dm[y]
+ # wire up inputs from module to row cell inputs
+ m.d.comb += [dc.go_rd_i.eq(self.go_rd_i),
+ dc.go_wr_i.eq(self.go_wr_i),
+ dc.go_die_i.eq(self.go_die_i),
+ ]
+ # ---
+ # connect Matrix pending
+ # ---
+ for y in range(self.n_fu_row):
+ dc = dm[y]
+ # wire up inputs from module to row cell inputs
+ m.d.comb += [dc.rd_pend_i.eq(self.rd_pend_i),
+ dc.wr_pend_i.eq(self.wr_pend_i),
+ ]
+ return m
+ def __iter__(self):
+ yield self.rd_pend_i
+ yield self.wr_pend_i
+ yield self.issue_i
+ yield self.go_wr_i
+ yield self.go_rd_i
+ yield self.readable_o
+ yield self.writable_o
+ def ports(self):
+ return list(self)
+def d_matrix_sim(dut):
+ """ XXX TODO
+ """
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_rd_i.eq(1)
+ yield
+ yield dut.go_rd_i.eq(0)
+ yield
+ yield dut.go_wr_i.eq(1)
+ yield
+ yield dut.go_wr_i.eq(0)
+ yield
+def test_fu_fu_matrix():
+ dut = FUFUDepMatrix(n_fu_row=3, n_fu_col=4)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_fu_fu_matrix.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_fu_matrix.vcd')
+if __name__ == '__main__':
+ test_fu_fu_matrix()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Elaboratable, Array, Cat, Const
+from scoreboard.fumem_dep_cell import FUMemDependenceCell
+from scoreboard.fu_mem_picker_vec import FUMem_Pick_Vec
+ 6600 Function Unit Dependency Table Matrix inputs / outputs
+ -----------------------------------------------------------
+class FUMemDepMatrix(Elaboratable):
+ """ implements FU-to-FU Memory Dependency Matrix
+ """
+ def __init__(self, n_fu_row, n_fu_col):
+ self.n_fu_row = n_fu_row # Y (FU row#) ^v
+ self.n_fu_col = n_fu_col # X (FU col #) <>
+ self.st_pend_i = Signal(n_fu_row, reset_less=True) # Rd pending (left)
+ self.ld_pend_i = Signal(n_fu_row, reset_less=True) # Wr pending (left)
+ self.issue_i = Signal(n_fu_col, reset_less=True) # Issue in (top)
+ self.go_ld_i = Signal(n_fu_row, reset_less=True) # Go Write in (left)
+ self.go_st_i = Signal(n_fu_row, reset_less=True) # Go Read in (left)
+ self.go_die_i = Signal(n_fu_row, reset_less=True) # Go Die in (left)
+ # for Function Unit Readable/Writable (horizontal)
+ self.storable_o = Signal(n_fu_col, reset_less=True) # storable (bot)
+ self.loadable_o = Signal(n_fu_col, reset_less=True) # loadable (bot)
+ def elaborate(self, platform):
+ m = Module()
+ # ---
+ # matrix of dependency cells
+ # ---
+ dm = Array(FUMemDependenceCell(f, self.n_fu_col) \
+ for f in range(self.n_fu_row))
+ for y in range(self.n_fu_row):
+ setattr(m.submodules, "dm%d" % y, dm[y])
+ # ---
+ # array of Function Unit Readable/Writable: row-length, horizontal
+ # ---
+ fur = Array(FUMem_Pick_Vec(self.n_fu_row) for r in range(self.n_fu_col))
+ for x in range(self.n_fu_col):
+ setattr(m.submodules, "fur_x%d" % (x), fur[x])
+ # ---
+ # connect FU Readable/Writable vector
+ # ---
+ storable = []
+ loadable = []
+ for y in range(self.n_fu_row):
+ fu = fur[y]
+ # accumulate Readable/Writable Vector outputs
+ storable.append(fu.storable_o)
+ loadable.append(fu.loadable_o)
+ # ... and output them from this module (horizontal, width=REGs)
+ m.d.comb += self.storable_o.eq(Cat(*storable))
+ m.d.comb += self.loadable_o.eq(Cat(*loadable))
+ # ---
+ # connect FU Pending
+ # ---
+ for y in range(self.n_fu_row):
+ dc = dm[y]
+ fu = fur[y]
+ # connect cell reg-select outputs to Reg Vector In
+ m.d.comb += [fu.st_pend_i.eq(dc.st_wait_o),
+ fu.ld_pend_i.eq(dc.ld_wait_o),
+ ]
+ # ---
+ # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i
+ # ---
+ for x in range(self.n_fu_col):
+ issue_i = []
+ for y in range(self.n_fu_row):
+ dc = dm[y]
+ # accumulate cell inputs issue
+ issue_i.append(dc.issue_i[x])
+ # wire up inputs from module to row cell inputs
+ m.d.comb += Cat(*issue_i).eq(self.issue_i)
+ # ---
+ # connect Matrix go_st_i/go_ld_i to module storable/loadable
+ # ---
+ for y in range(self.n_fu_row):
+ dc = dm[y]
+ # wire up inputs from module to row cell inputs
+ m.d.comb += [dc.go_st_i.eq(self.go_st_i),
+ dc.go_ld_i.eq(self.go_ld_i),
+ dc.go_die_i.eq(self.go_die_i),
+ ]
+ # ---
+ # connect Matrix pending
+ # ---
+ for y in range(self.n_fu_row):
+ dc = dm[y]
+ # wire up inputs from module to row cell inputs
+ m.d.comb += [dc.st_pend_i.eq(self.st_pend_i),
+ dc.ld_pend_i.eq(self.ld_pend_i),
+ ]
+ return m
+ def __iter__(self):
+ yield self.st_pend_i
+ yield self.ld_pend_i
+ yield self.issue_i
+ yield self.go_ld_i
+ yield self.go_st_i
+ yield self.storable_o
+ yield self.loadable_o
+ def ports(self):
+ return list(self)
+def d_matrix_sim(dut):
+ """ XXX TODO
+ """
+ yield dut.ld_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.st_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_st_i.eq(1)
+ yield
+ yield dut.go_st_i.eq(0)
+ yield
+ yield dut.go_ld_i.eq(1)
+ yield
+ yield dut.go_ld_i.eq(0)
+ yield
+def test_fu_fu_matrix():
+ dut = FUMemDepMatrix(n_fu_row=3, n_fu_col=3)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_fu_mem_matrix.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_mem_matrix.vcd')
+if __name__ == '__main__':
+ test_fu_fu_matrix()
--- /dev/null
+from nmigen import Elaboratable, Module, Signal, Cat
+class FUMem_Pick_Vec(Elaboratable):
+ """ these are allocated per-FU (horizontally),
+ and are of length fu_row_n
+ """
+ def __init__(self, fu_row_n):
+ self.fu_row_n = fu_row_n
+ self.st_pend_i = Signal(fu_row_n, reset_less=True)
+ self.ld_pend_i = Signal(fu_row_n, reset_less=True)
+ self.storable_o = Signal(reset_less=True)
+ self.loadable_o = Signal(reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ # Readable if there are no writes pending
+ m.d.comb += self.storable_o.eq(~self.ld_pend_i.bool())
+ # Writable if there are no reads pending
+ m.d.comb += self.loadable_o.eq(~self.st_pend_i.bool())
+ return m
--- /dev/null
+from nmigen import Elaboratable, Module, Signal, Cat
+class FU_Pick_Vec(Elaboratable):
+ """ these are allocated per-FU (horizontally),
+ and are of length fu_row_n
+ """
+ def __init__(self, fu_row_n):
+ self.fu_row_n = fu_row_n
+ self.rd_pend_i = Signal(fu_row_n, reset_less=True)
+ self.wr_pend_i = Signal(fu_row_n, reset_less=True)
+ self.readable_o = Signal(reset_less=True)
+ self.writable_o = Signal(reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ # Readable if there are no writes pending
+ m.d.comb += self.readable_o.eq(~self.wr_pend_i.bool())
+ # Writable if there are no reads pending
+ m.d.comb += self.writable_o.eq(~self.rd_pend_i.bool())
+ return m
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Elaboratable, Array, Cat, Repl
+from scoreboard.dependence_cell import DependencyRow
+from scoreboard.fu_wr_pending import FU_RW_Pend
+from scoreboard.reg_select import Reg_Rsv
+from scoreboard.global_pending import GlobalPending
+ 6600 Dependency Table Matrix inputs / outputs
+ ---------------------------------------------
+ d s1 s2 i d s1 s2 i d s1 s2 i d s1 s2 i
+ | | | | | | | | | | | | | | | |
+ v v v v v v v v v v v v v v v v
+ go_rd/go_wr -> dm-r0-fu0 dm-r1-fu0 dm-r2-fu0 dm-r3-fu0 -> wr/rd-pend
+ go_rd/go_wr -> dm-r0-fu1 dm-r1-fu1 dm-r2-fu1 dm-r3-fu1 -> wr/rd-pend
+ go_rd/go_wr -> dm-r0-fu2 dm-r1-fu2 dm-r2-fu2 dm-r3-fu2 -> wr/rd-pend
+ | | | | | | | | | | | |
+ v v v v v v v v v v v v
+ d s1 s2 d s1 s2 d s1 s2 d s1 s2
+ reg sel reg sel reg sel reg sel
+class FURegDepMatrix(Elaboratable):
+ """ implements 11.4.7 mitch alsup FU-to-Reg Dependency Matrix, p26
+ """
+ def __init__(self, n_fu_row, n_reg_col, n_src, cancel=None):
+ self.n_src = n_src
+ self.n_fu_row = nf = n_fu_row # Y (FUs) ^v
+ self.n_reg_col = n_reg = n_reg_col # X (Regs) <>
+ # arrays
+ src = []
+ rsel = []
+ for i in range(n_src):
+ j = i + 1 # name numbering to match src1/src2
+ src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
+ rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
+ pend = []
+ for i in range(nf):
+ j = i + 1 # name numbering to match src1/src2
+ pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
+ self.dest_i = Signal(n_reg_col, reset_less=True) # Dest in (top)
+ self.src_i = Array(src) # oper in (top)
+ # cancellation array (from Address Matching), ties in with go_die_i
+ self.cancel = cancel
+ # Register "Global" vectors for determining RaW and WaR hazards
+ self.wr_pend_i = Signal(n_reg_col, reset_less=True) # wr pending (top)
+ self.rd_pend_i = Signal(n_reg_col, reset_less=True) # rd pending (top)
+ self.v_wr_rsel_o = Signal(n_reg_col, reset_less=True) # wr pending (bot)
+ self.v_rd_rsel_o = Signal(n_reg_col, reset_less=True) # rd pending (bot)
+ self.issue_i = Signal(n_fu_row, reset_less=True) # Issue in (top)
+ self.go_wr_i = Signal(n_fu_row, reset_less=True) # Go Write in (left)
+ self.go_rd_i = Signal(n_fu_row, reset_less=True) # Go Read in (left)
+ self.go_die_i = Signal(n_fu_row, reset_less=True) # Go Die in (left)
+ # for Register File Select Lines (horizontal), per-reg
+ self.dest_rsel_o = Signal(n_reg_col, reset_less=True) # dest reg (bot)
+ self.src_rsel_o = Array(rsel) # src reg (bot)
+ # for Function Unit "forward progress" (vertical), per-FU
+ self.wr_pend_o = Signal(n_fu_row, reset_less=True) # wr pending (right)
+ self.rd_pend_o = Signal(n_fu_row, reset_less=True) # rd pending (right)
+ self.rd_src_pend_o = Array(pend) # src1 pending
+ def elaborate(self, platform):
+ m = Module()
+ return self._elaborate(m, platform)
+ def _elaborate(self, m, platform):
+ # ---
+ # matrix of dependency cells
+ # ---
+ cancel_mode = self.cancel is not None
+ dm = Array(DependencyRow(self.n_reg_col, self.n_src, cancel_mode) \
+ for r in range(self.n_fu_row))
+ for fu in range(self.n_fu_row):
+ setattr(m.submodules, "dr_fu%d" % fu, dm[fu])
+ # ---
+ # array of Function Unit Pending vectors
+ # ---
+ fupend = Array(FU_RW_Pend(self.n_reg_col, self.n_src) \
+ for f in range(self.n_fu_row))
+ for fu in range(self.n_fu_row):
+ setattr(m.submodules, "fu_fu%d" % (fu), fupend[fu])
+ # ---
+ # array of Register Reservation vectors
+ # ---
+ regrsv = Array(Reg_Rsv(self.n_fu_row, self.n_src) \
+ for r in range(self.n_reg_col))
+ for rn in range(self.n_reg_col):
+ setattr(m.submodules, "rr_r%d" % (rn), regrsv[rn])
+ # ---
+ # connect Function Unit vector
+ # ---
+ wr_pend = []
+ rd_pend = []
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ fup = fupend[fu]
+ dest_fwd_o = []
+ for rn in range(self.n_reg_col):
+ # accumulate cell fwd outputs for dest/src1/src2
+ dest_fwd_o.append(dc.dest_fwd_o[rn])
+ # connect cell fwd outputs to FU Vector in [Cat is gooood]
+ m.d.comb += [fup.dest_fwd_i.eq(Cat(*dest_fwd_o)),
+ ]
+ # accumulate FU Vector outputs
+ wr_pend.append(fup.reg_wr_pend_o)
+ rd_pend.append(fup.reg_rd_pend_o)
+ # ... and output them from this module (vertical, width=FUs)
+ m.d.comb += self.wr_pend_o.eq(Cat(*wr_pend))
+ m.d.comb += self.rd_pend_o.eq(Cat(*rd_pend))
+ # same for src
+ for i in range(self.n_src):
+ rd_src_pend = []
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ fup = fupend[fu]
+ src_fwd_o = []
+ for rn in range(self.n_reg_col):
+ # accumulate cell fwd outputs for dest/src1/src2
+ src_fwd_o.append(dc.src_fwd_o[i][rn])
+ # connect cell fwd outputs to FU Vector in [Cat is gooood]
+ m.d.comb += [fup.src_fwd_i[i].eq(Cat(*src_fwd_o)),
+ ]
+ # accumulate FU Vector outputs
+ rd_src_pend.append(fup.reg_rd_src_pend_o[i])
+ # ... and output them from this module (vertical, width=FUs)
+ m.d.comb += self.rd_src_pend_o[i].eq(Cat(*rd_src_pend))
+ # ---
+ # connect Reg Selection vector
+ # ---
+ dest_rsel = []
+ for rn in range(self.n_reg_col):
+ rsv = regrsv[rn]
+ dest_rsel_o = []
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ # accumulate cell reg-select outputs dest/src1/src2
+ dest_rsel_o.append(dc.dest_rsel_o[rn])
+ # connect cell reg-select outputs to Reg Vector In
+ m.d.comb += rsv.dest_rsel_i.eq(Cat(*dest_rsel_o)),
+ # accumulate Reg-Sel Vector outputs
+ dest_rsel.append(rsv.dest_rsel_o)
+ # ... and output them from this module (horizontal, width=REGs)
+ m.d.comb += self.dest_rsel_o.eq(Cat(*dest_rsel))
+ # same for src
+ for i in range(self.n_src):
+ src_rsel = []
+ for rn in range(self.n_reg_col):
+ rsv = regrsv[rn]
+ src_rsel_o = []
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ # accumulate cell reg-select outputs dest/src1/src2
+ src_rsel_o.append(dc.src_rsel_o[i][rn])
+ # connect cell reg-select outputs to Reg Vector In
+ m.d.comb += rsv.src_rsel_i[i].eq(Cat(*src_rsel_o)),
+ # accumulate Reg-Sel Vector outputs
+ src_rsel.append(rsv.src_rsel_o[i])
+ # ... and output them from this module (horizontal, width=REGs)
+ m.d.comb += self.src_rsel_o[i].eq(Cat(*src_rsel))
+ # ---
+ # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i
+ # ---
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ # wire up inputs from module to row cell inputs (Cat is gooood)
+ m.d.comb += [dc.dest_i.eq(self.dest_i),
+ dc.rd_pend_i.eq(self.rd_pend_i),
+ dc.wr_pend_i.eq(self.wr_pend_i),
+ ]
+ # same for src
+ for i in range(self.n_src):
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ # wire up inputs from module to row cell inputs (Cat is gooood)
+ m.d.comb += dc.src_i[i].eq(self.src_i[i])
+ # accumulate rsel bits into read/write pending vectors.
+ rd_pend_v = []
+ wr_pend_v = []
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ rd_pend_v.append(dc.v_rd_rsel_o)
+ wr_pend_v.append(dc.v_wr_rsel_o)
+ rd_v = GlobalPending(self.n_reg_col, rd_pend_v)
+ wr_v = GlobalPending(self.n_reg_col, wr_pend_v)
+ m.submodules.rd_v = rd_v
+ m.submodules.wr_v = wr_v
+ m.d.comb += self.v_rd_rsel_o.eq(rd_v.g_pend_o)
+ m.d.comb += self.v_wr_rsel_o.eq(wr_v.g_pend_o)
+ # ---
+ # connect Dep issue_i/go_rd_i/go_wr_i to module issue_i/go_rd/go_wr
+ # ---
+ go_rd_i = []
+ go_wr_i = []
+ issue_i = []
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ # accumulate cell fwd outputs for dest/src1/src2
+ go_rd_i.append(dc.go_rd_i)
+ go_wr_i.append(dc.go_wr_i)
+ issue_i.append(dc.issue_i)
+ # wire up inputs from module to row cell inputs (Cat is gooood)
+ m.d.comb += [Cat(*go_rd_i).eq(self.go_rd_i),
+ Cat(*go_wr_i).eq(self.go_wr_i),
+ Cat(*issue_i).eq(self.issue_i),
+ ]
+ # ---
+ # connect Dep go_die_i
+ # ---
+ if cancel_mode:
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ go_die = Repl(self.go_die_i[fu], self.n_fu_row)
+ go_die = go_die | self.cancel[fu]
+ m.d.comb += dc.go_die_i.eq(go_die)
+ else:
+ go_die_i = []
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ # accumulate cell fwd outputs for dest/src1/src2
+ go_die_i.append(dc.go_die_i)
+ # wire up inputs from module to row cell inputs (Cat is gooood)
+ m.d.comb += Cat(*go_die_i).eq(self.go_die_i)
+ return m
+ def __iter__(self):
+ yield self.dest_i
+ yield from self.src_i
+ yield self.issue_i
+ yield self.go_wr_i
+ yield self.go_rd_i
+ yield self.go_die_i
+ yield self.dest_rsel_o
+ yield from self.src_rsel_o
+ yield self.wr_pend_o
+ yield self.rd_pend_o
+ yield self.wr_pend_i
+ yield self.rd_pend_i
+ yield self.v_wr_rsel_o
+ yield self.v_rd_rsel_o
+ yield from self.rd_src_pend_o
+ def ports(self):
+ return list(self)
+def d_matrix_sim(dut):
+ """ XXX TODO
+ """
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_rd_i.eq(1)
+ yield
+ yield dut.go_rd_i.eq(0)
+ yield
+ yield dut.go_wr_i.eq(1)
+ yield
+ yield dut.go_wr_i.eq(0)
+ yield
+def test_d_matrix():
+ dut = FURegDepMatrix(n_fu_row=3, n_reg_col=4, n_src=2)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_fu_reg_matrix.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_reg_matrix.vcd')
+if __name__ == '__main__':
+ test_d_matrix()
--- /dev/null
+from nmigen import Elaboratable, Module, Signal, Array
+class FU_RW_Pend(Elaboratable):
+ """ these are allocated per-FU (horizontally),
+ and are of length reg_count
+ """
+ def __init__(self, reg_count, n_src):
+ self.n_src = n_src
+ self.reg_count = reg_count
+ self.dest_fwd_i = Signal(reg_count, reset_less=True)
+ src = []
+ for i in range(n_src):
+ j = i + 1 # name numbering to match src1/src2
+ src.append(Signal(reg_count, name="src%d" % j, reset_less=True))
+ self.src_fwd_i = Array(src)
+ self.reg_wr_pend_o = Signal(reset_less=True)
+ self.reg_rd_pend_o = Signal(reset_less=True)
+ self.reg_rd_src_pend_o = Signal(n_src, reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.reg_wr_pend_o.eq(self.dest_fwd_i.bool())
+ for i in range(self.n_src):
+ m.d.comb += self.reg_rd_src_pend_o[i].eq(self.src_fwd_i[i].bool())
+ m.d.comb += self.reg_rd_pend_o.eq(self.reg_rd_src_pend_o.bool())
+ return m
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Const, Elaboratable
+from nmutil.latch import SRLatch
+class FUMemDependenceCell(Elaboratable):
+ """ implements 11.4.7 mitch alsup dependence cell, p27
+ """
+ def __init__(self, dummy, n_fu=1):
+ self.n_fu = n_fu
+ self.dummy = Const(~(1<<dummy), n_fu)
+ # inputs
+ self.st_pend_i = Signal(n_fu, reset_less=True) # read pend in (left)
+ self.ld_pend_i = Signal(n_fu, reset_less=True) # write pend in (left)
+ self.issue_i = Signal(n_fu, reset_less=True) # Issue in (top)
+ self.go_ld_i = Signal(n_fu, reset_less=True) # Go Write in (left)
+ self.go_st_i = Signal(n_fu, reset_less=True) # Go Read in (left)
+ self.go_die_i = Signal(n_fu, reset_less=True) # Go Die in (left)
+ # outputs (latched rd/wr wait)
+ self.st_wait_o = Signal(n_fu, reset_less=True) # read wait out (right)
+ self.ld_wait_o = Signal(n_fu, reset_less=True) # write wait out (right)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.st_c = st_c = SRLatch(sync=False, llen=self.n_fu)
+ m.submodules.ld_c = ld_c = SRLatch(sync=False, llen=self.n_fu)
+ # reset on go HI, set on dest and issue
+ m.d.comb += st_c.s.eq(self.issue_i & self.st_pend_i)
+ m.d.comb += ld_c.s.eq(self.issue_i & self.ld_pend_i)
+ # connect go_rd / go_wr
+ m.d.comb += ld_c.r.eq(self.go_ld_i | self.go_die_i)
+ m.d.comb += st_c.r.eq(self.go_st_i | self.go_die_i)
+ # connect pend_i
+ m.d.comb += st_c.s.eq(self.issue_i & self.st_pend_i & self.dummy)
+ m.d.comb += ld_c.s.eq(self.issue_i & self.ld_pend_i & self.dummy)
+ # connect output
+ m.d.comb += self.st_wait_o.eq(st_c.qlq & ~self.issue_i)
+ m.d.comb += self.ld_wait_o.eq(ld_c.qlq & ~self.issue_i)
+ return m
+ def __iter__(self):
+ yield self.st_pend_i
+ yield self.ld_pend_i
+ yield self.issue_i
+ yield self.go_ld_i
+ yield self.go_st_i
+ yield self.go_die_i
+ yield self.st_wait_o
+ yield self.ld_wait_o
+ def ports(self):
+ return list(self)
+def dcell_sim(dut):
+ yield dut.ld_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.st_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_st_i.eq(1)
+ yield
+ yield dut.go_st_i.eq(0)
+ yield
+ yield dut.go_ld_i.eq(1)
+ yield
+ yield dut.go_ld_i.eq(0)
+ yield
+def test_dcell():
+ dut = FUMemDependenceCell(dummy=0, n_fu=4)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_fumem_dcell.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, dcell_sim(dut), vcd_name='test_fumem_dcell.vcd')
+if __name__ == '__main__':
+ test_dcell()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Cat, Elaboratable
+class GlobalPending(Elaboratable):
+ """ implements Global Pending Vector, basically ORs all incoming Function
+ Unit vectors together. Can be used for creating Read or Write Global
+ Pending. Can be used for INT or FP Global Pending.
+ Inputs:
+ * :dep: register file depth
+ * :fu_vecs: a python list of function unit "pending" vectors, each
+ vector being a Signal of width equal to the reg file.
+ Notes:
+ * the regfile may be Int or FP, this code doesn't care which.
+ obviously do not try to put in a mixture of regfiles into fu_vecs.
+ * this code also doesn't care if it's used for Read Pending or Write
+ pending, it can be used for both: again, obviously, do not try to
+ put in a mixture of read *and* write pending vectors in.
+ * if some Function Units happen not to be uniform (don't operate
+ on a particular register (extremely unusual), they must set a Const
+ zero bit in the vector.
+ """
+ def __init__(self, dep, fu_vecs, sync=False):
+ self.reg_dep = dep
+ # inputs
+ self.fu_vecs = fu_vecs
+ self.sync = sync
+ for v in fu_vecs:
+ assert len(v) == dep, "FU Vector must be same width as regfile"
+ self.g_pend_o = Signal(dep, reset_less=True) # global pending vector
+ def elaborate(self, platform):
+ m = Module()
+ pend_l = []
+ for i in range(self.reg_dep): # per-register
+ vec_bit_l = []
+ for v in self.fu_vecs:
+ vec_bit_l.append(v[i]) # fu bit for same register
+ pend_l.append(Cat(*vec_bit_l).bool()) # OR all bits for same reg
+ if self.sync:
+ m.d.sync += self.g_pend_o.eq(Cat(*pend_l)) # merge all OR'd bits
+ else:
+ m.d.comb += self.g_pend_o.eq(Cat(*pend_l)) # merge all OR'd bits
+ return m
+ def __iter__(self):
+ yield from self.fu_vecs
+ yield self.g_pend_o
+ def ports(self):
+ return list(self)
+def g_vec_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_rd_i.eq(1)
+ yield
+ yield dut.go_rd_i.eq(0)
+ yield
+ yield dut.go_wr_i.eq(1)
+ yield
+ yield dut.go_wr_i.eq(0)
+ yield
+def test_g_vec():
+ vecs = []
+ for i in range(3):
+ vecs.append(Signal(32, name="fu%d" % i))
+ dut = GlobalPending(32, vecs)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_global_pending.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, g_vec_sim(dut), vcd_name='test_global_pending.vcd')
+if __name__ == '__main__':
+ test_g_vec()
--- /dev/null
+""" Group Picker: to select an instruction that is permitted to read (or write)
+ based on the Function Unit expressing a *desire* to read (or write).
+ The job of the Group Picker is extremely simple yet extremely important.
+ It sits in front of a register file port (read or write) and stops it from
+ being corrupted. It's a "port contention selector", basically.
+ The way it works is:
+ * Function Units need to read from (or write to) the register file,
+ in order to get (or store) their operands, so they each have a signal,
+ readable (or writable), which "expresses" this need. This is an
+ *unary* encoding.
+ * The Function Units also have a signal which indicates that they
+ are requesting "release" of the register file port (this because
+ in the scoreboard, readable/writable can be permanently HI even
+ if the FU is idle, whereas the "release" signal is very specifically
+ only HI if the read (or write) latch is still active)
+ * The Group Picker takes this unary encoding of the desire to read
+ (or write) and, on a priority basis, activates one *and only* one
+ of those signals, again as an unary output.
+ * Due to the way that the Computation Unit works, that signal (Go_Read
+ or Go_Write) will fire for one (and only one) cycle, and can be used
+ to enable the register file port read (or write) lines. The Go_Read/Wr
+ signal basically loops back to the Computation Unit and resets the
+ "desire-to-read/write-expressing" latch.
+ In theory (and in practice!) the following is possible:
+ * Separate src1 and src2 Group Pickers. This would allow instructions
+ with only one operand to read to not block up other instructions,
+ and it would also allow 3-operand instructions to be interleaved
+ with 1 and 2 operand instructions.
+ * *Multiple* Group Pickers (multi-issue). This would require
+ a corresponding increase in the number of register file ports,
+ either 4R2W (or more) or by "striping" the register file into
+ split banks (a strategy best deployed on Vector Processors)
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Elaboratable
+from nmutil.picker import PriorityPicker
+class GroupPicker(Elaboratable):
+ """ implements 10.5 mitch alsup group picker, p27
+ """
+ def __init__(self, wid):
+ self.gp_wid = wid
+ # inputs
+ self.readable_i = Signal(wid, reset_less=True) # readable in (top)
+ self.writable_i = Signal(wid, reset_less=True) # writable in (top)
+ self.rd_rel_i = Signal(wid, reset_less=True) # go read in (top)
+ self.req_rel_i = Signal(wid, reset_less=True) # release request in (top)
+ # outputs
+ self.go_rd_o = Signal(wid, reset_less=True) # go read (bottom)
+ self.go_wr_o = Signal(wid, reset_less=True) # go write (bottom)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.rpick = rpick = PriorityPicker(self.gp_wid)
+ m.submodules.wpick = wpick = PriorityPicker(self.gp_wid)
+ # combine release (output ready signal) with writeable
+ m.d.comb += wpick.i.eq(self.writable_i & self.req_rel_i)
+ m.d.comb += self.go_wr_o.eq(wpick.o)
+ m.d.comb += rpick.i.eq(self.readable_i & self.rd_rel_i)
+ m.d.comb += self.go_rd_o.eq(rpick.o)
+ return m
+ def __iter__(self):
+ yield self.readable_i
+ yield self.writable_i
+ yield self.req_rel_i
+ yield self.go_rd_o
+ yield self.go_wr_o
+ def ports(self):
+ return list(self)
+def grp_pick_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.rd_rel_i.eq(1)
+ yield
+ yield dut.rd_rel_i.eq(0)
+ yield
+ yield dut.go_wr_i.eq(1)
+ yield
+ yield dut.go_wr_i.eq(0)
+ yield
+def test_grp_pick():
+ dut = GroupPicker(4)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_grp_pick.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, grp_pick_sim(dut), vcd_name='test_grp_pick.vcd')
+if __name__ == '__main__':
+ test_grp_pick()
--- /dev/null
+from math import log
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Cat, Array, Const, Repl, Elaboratable
+from nmutil.iocontrol import RecordObject
+from nmutil.nmoperator import eq, shape, cat
+class Instruction(RecordObject):
+ def __init__(self, name, wid, opwid):
+ RecordObject.__init__(self, name=name)
+ self.oper_i = Signal(opwid, reset_less=True)
+ self.opim_i = Signal(1, reset_less=True) # src2 is an immediate
+ self.imm_i = Signal(wid, reset_less=True)
+ self.dest_i = Signal(wid, reset_less=True)
+ self.src1_i = Signal(wid, reset_less=True)
+ self.src2_i = Signal(wid, reset_less=True)
+ @staticmethod
+ def nq(n_insns, name, wid, opwid):
+ q = []
+ for i in range(n_insns):
+ q.append(Instruction("%s%d" % (name, i), wid, opwid))
+ return Array(q)
+class InstructionQ(Elaboratable):
+ """ contains a queue of (part-decoded) instructions.
+ output is copied combinatorially from the front of the queue,
+ for easy access on the clock cycle. only "n_in" instructions
+ are made available this way
+ input and shifting occurs on sync.
+ """
+ def __init__(self, wid, opwid, iqlen, n_in, n_out):
+ """ constructor
+ Inputs
+ * :wid: register file width
+ * :opwid: operand width
+ * :iqlen: instruction queue length
+ * :n_in: max number of instructions allowed "in"
+ """
+ self.iqlen = iqlen
+ self.reg_width = wid
+ self.opwid = opwid
+ self.n_in = n_in
+ self.n_out = n_out
+ mqbits = (int(log(iqlen) / log(2))+2, False)
+ self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
+ self.p_ready_o = Signal() # instructions were added
+ self.data_i = Instruction.nq(n_in, "data_i", wid, opwid)
+ self.data_o = Instruction.nq(n_out, "data_o", wid, opwid)
+ self.n_sub_i = Signal(mqbits) # number of instructions to remove
+ self.n_sub_o = Signal(mqbits) # number of instructions removed
+ self.qsz = shape(self.data_o[0])[0]
+ q = []
+ for i in range(iqlen):
+ q.append(Signal(self.qsz, name="q%d" % i))
+ self.q = Array(q)
+ self.qlen_o = Signal(mqbits)
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ sync = m.d.sync
+ iqlen = self.iqlen
+ mqbits = int(log(iqlen) / log(2))
+ left = Signal((mqbits+2, False))
+ spare = Signal((mqbits+2, False))
+ qmaxed = Signal()
+ start_q = Signal(mqbits)
+ end_q = Signal(mqbits)
+ mqlen = Const(iqlen, (len(left), False))
+ print ("mqlen", mqlen)
+ # work out how many can be subtracted from the queue
+ with m.If(self.n_sub_i):
+ qinmax = Signal()
+ comb += qinmax.eq(self.n_sub_i > self.qlen_o)
+ with m.If(qinmax):
+ comb += self.n_sub_o.eq(self.qlen_o)
+ with m.Else():
+ comb += self.n_sub_o.eq(self.n_sub_i)
+ # work out how many new items are going to be in the queue
+ comb += left.eq(self.qlen_o )#- self.n_sub_o)
+ comb += spare.eq(mqlen - self.p_add_i)
+ comb += qmaxed.eq(left <= spare)
+ comb += self.p_ready_o.eq(qmaxed & (self.p_add_i != 0))
+ # put q (flattened) into output
+ for i in range(self.n_out):
+ opos = Signal(mqbits)
+ comb += opos.eq(end_q + i)
+ comb += cat(self.data_o[i]).eq(self.q[opos])
+ with m.If(self.n_sub_o):
+ # ok now the end's moved
+ sync += end_q.eq(end_q + self.n_sub_o)
+ with m.If(self.p_ready_o):
+ # copy in the input... insanely gate-costly... *sigh*...
+ for i in range(self.n_in):
+ with m.If(self.p_add_i > Const(i, len(self.p_add_i))):
+ ipos = Signal(mqbits)
+ comb += ipos.eq(start_q + i) # should roll round
+ sync += self.q[ipos].eq(cat(self.data_i[i]))
+ sync += start_q.eq(start_q + self.p_add_i)
+ with m.If(self.p_ready_o):
+ # update the queue length
+ add2 = Signal(mqbits+1)
+ comb += add2.eq(self.qlen_o + self.p_add_i)
+ sync += self.qlen_o.eq(add2 - self.n_sub_o)
+ with m.Else():
+ sync += self.qlen_o.eq(self.qlen_o - self.n_sub_o)
+ return m
+ def __iter__(self):
+ yield from self.q
+ yield self.p_ready_o
+ for o in self.data_i:
+ yield from list(o)
+ yield self.p_add_i
+ for o in self.data_o:
+ yield from list(o)
+ yield self.n_sub_i
+ yield self.n_sub_o
+ def ports(self):
+ return list(self)
+def instruction_q_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_rd_i.eq(1)
+ yield
+ yield dut.go_rd_i.eq(0)
+ yield
+ yield dut.go_wr_i.eq(1)
+ yield
+ yield dut.go_wr_i.eq(0)
+ yield
+def test_instruction_q():
+ dut = InstructionQ(16, 4, 4, n_in=2, n_out=2)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_instruction_q.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, instruction_q_sim(dut),
+ vcd_name='test_instruction_q.vcd')
+if __name__ == '__main__':
+ test_instruction_q()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Cat, Array, Const, Repl, Elaboratable
+from nmigen.lib.coding import Decoder
+from scoreboard.group_picker import PriorityPicker
+class RegDecode(Elaboratable):
+ """ decodes registers into unary
+ Inputs
+ * :wid: register file width
+ """
+ def __init__(self, wid):
+ self.reg_width = wid
+ # inputs
+ self.enable_i = Signal(reset_less=True) # enable decoders
+ self.dest_i = Signal(range(wid), reset_less=True) # Dest R# in
+ self.src1_i = Signal(range(wid), reset_less=True) # oper1 R# in
+ self.src2_i = Signal(range(wid), reset_less=True) # oper2 R# in
+ # outputs
+ self.dest_o = Signal(wid, reset_less=True) # Dest unary out
+ self.src1_o = Signal(wid, reset_less=True) # oper1 unary out
+ self.src2_o = Signal(wid, reset_less=True) # oper2 unary out
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.dest_d = dest_d = Decoder(self.reg_width)
+ m.submodules.src1_d = src1_d = Decoder(self.reg_width)
+ m.submodules.src2_d = src2_d = Decoder(self.reg_width)
+ # dest decoder: write-pending
+ for d, i, o in [(dest_d, self.dest_i, self.dest_o),
+ (src1_d, self.src1_i, self.src1_o),
+ (src2_d, self.src2_i, self.src2_o)]:
+ m.d.comb += d.i.eq(i)
+ m.d.comb += d.n.eq(~self.enable_i)
+ m.d.comb += o.eq(d.o)
+ return m
+ def __iter__(self):
+ yield self.enable_i
+ yield self.dest_i
+ yield self.src1_i
+ yield self.src2_i
+ yield self.dest_o
+ yield self.src1_o
+ yield self.src2_o
+ def ports(self):
+ return list(self)
+class IssueUnitGroup(Elaboratable):
+ """ Manages a batch of Computation Units all of which can do the same task
+ A priority picker will allocate one instruction in this cycle based
+ on whether the others are busy.
+ insn_i indicates to this module that there is an instruction to be
+ issued which this group can handle
+ busy_i is a vector of signals that indicate, in this cycle, which
+ of the units are currently busy.
+ busy_o indicates whether it is "safe to proceed" i.e. whether
+ there is a unit here that can *be* issued an instruction
+ fn_issue_o indicates, out of the available (non-busy) units,
+ which one may be selected
+ """
+ def __init__(self, n_insns):
+ """ Set up inputs and outputs for the Group
+ Input Parameters
+ * :n_insns: number of instructions in this issue unit.
+ """
+ self.n_insns = n_insns
+ # inputs
+ self.insn_i = Signal(reset_less=True, name="insn_i")
+ self.busy_i = Signal(n_insns, reset_less=True, name="busy_i")
+ # outputs
+ self.fn_issue_o = Signal(n_insns, reset_less=True, name="fn_issue_o")
+ self.busy_o = Signal(reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ if self.n_insns == 0:
+ return m
+ m.submodules.pick = pick = PriorityPicker(self.n_insns)
+ # temporaries
+ allissue = Signal(self.n_insns, reset_less=True)
+ m.d.comb += allissue.eq(Repl(self.insn_i, self.n_insns))
+ # Pick one (and only one) of the units to proceed in this cycle
+ m.d.comb += pick.i.eq(~self.busy_i & allissue)
+ # "Safe to issue" condition is basically when all units are not busy
+ m.d.comb += self.busy_o.eq(~((~self.busy_i).bool()))
+ # Picker only raises one signal, therefore it's also the fn_issue
+ m.d.comb += self.fn_issue_o.eq(pick.o & Repl(~self.busy_o, self.n_insns))
+ return m
+ def __iter__(self):
+ yield self.insn_i
+ yield self.busy_i
+ yield self.fn_issue_o
+ yield self.g_issue_o
+ def ports(self):
+ return list(self)
+class IssueUnitArray(Elaboratable):
+ """ Convenience module that amalgamates the issue and busy signals
+ unit issue_i is to be set externally, at the same time as the
+ ALU group oper_i
+ """
+ def __init__(self, units):
+ self.units = units
+ self.issue_o = Signal(reset_less=True)
+ n_insns = 0
+ for u in self.units:
+ n_insns += len(u.fn_issue_o)
+ self.busy_i = Signal(n_insns, reset_less=True)
+ self.fn_issue_o = Signal(n_insns, reset_less=True)
+ self.n_insns = n_insns
+ def elaborate(self, platform):
+ m = Module()
+ for i, u in enumerate(self.units):
+ setattr(m.submodules, "issue%d" % i, u)
+ g_issue_o = []
+ busy_i = []
+ fn_issue_o = []
+ for u in self.units:
+ busy_i.append(u.busy_i)
+ g_issue_o.append(u.busy_o)
+ fn_issue_o.append(u.fn_issue_o)
+ m.d.comb += self.issue_o.eq(~(Cat(*g_issue_o).bool()))
+ m.d.comb += self.fn_issue_o.eq(Cat(*fn_issue_o))
+ m.d.comb += Cat(*busy_i).eq(self.busy_i)
+ return m
+ def ports(self):
+ yield self.busy_i
+ yield self.issue_o
+ yield self.fn_issue_o
+ yield from self.units
+class IssueUnit(Elaboratable):
+ """ implements 11.4.14 issue unit, p50
+ Inputs
+ * :n_insns: number of instructions in this issue unit.
+ """
+ def __init__(self, n_insns):
+ self.n_insns = n_insns
+ # inputs
+ self.insn_i = Signal(n_insns, reset_less=True, name="insn_i")
+ self.busy_i = Signal(n_insns, reset_less=True, name="busy_i")
+ # outputs
+ self.fn_issue_o = Signal(n_insns, reset_less=True, name="fn_issue_o")
+ self.g_issue_o = Signal(reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ if self.n_insns == 0:
+ return m
+ # temporaries
+ fu_stall = Signal(reset_less=True)
+ ib_l = []
+ for i in range(self.n_insns):
+ ib_l.append(self.insn_i[i] & self.busy_i[i])
+ m.d.comb += fu_stall.eq(Cat(*ib_l).bool())
+ m.d.comb += self.g_issue_o.eq(~(fu_stall))
+ for i in range(self.n_insns):
+ m.d.comb += self.fn_issue_o[i].eq(self.g_issue_o & self.insn_i[i])
+ return m
+ def __iter__(self):
+ yield self.insn_i
+ yield self.busy_i
+ yield self.fn_issue_o
+ yield self.g_issue_o
+ def ports(self):
+ return list(self)
+class IntFPIssueUnit(Elaboratable):
+ def __init__(self, n_int_insns, n_fp_insns):
+ self.i = IssueUnit(n_int_insns)
+ self.f = IssueUnit(n_fp_insns)
+ self.issue_o = Signal(reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.intissue = self.i
+ m.submodules.fpissue = self.f
+ m.d.comb += self.issue_o.eq(self.i.g_issue_o | self.f.g_issue_o)
+ return m
+ def ports(self):
+ yield self.issue_o
+ yield from self.i
+ yield from self.f
+def issue_unit_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_rd_i.eq(1)
+ yield
+ yield dut.go_rd_i.eq(0)
+ yield
+ yield dut.go_wr_i.eq(1)
+ yield
+ yield dut.go_wr_i.eq(0)
+ yield
+def test_issue_unit():
+ dut = IssueUnitGroup(3)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_issue_unit_group.il", "w") as f:
+ f.write(vl)
+ dut = IssueUnit(32, 3)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_issue_unit.il", "w") as f:
+ f.write(vl)
+ dut = IntFPIssueUnit(32, 3, 3)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_intfp_issue_unit.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, issue_unit_sim(dut), vcd_name='test_issue_unit.vcd')
+if __name__ == '__main__':
+ test_issue_unit()
--- /dev/null
+""" Mitch Alsup 6600-style LD/ST scoreboard Dependency Cell
+Relevant bugreports:
+* http://bugs.libre-riscv.org/show_bug.cgi?id=81
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Repl, Elaboratable
+from nmutil.latch import SRLatch
+class LDSTDepCell(Elaboratable):
+ """ implements 11.4.12 mitch alsup load/store dependence cell, p45
+ """
+ def __init__(self, n_ls=1):
+ self.n_ls = n_ls
+ # inputs
+ self.load_h_i = Signal(reset_less=True) # load in (left)
+ self.stor_h_i = Signal(reset_less=True) # store in (left)
+ self.load_v_i = Signal(n_ls, reset_less=True) # load in (top)
+ self.stor_v_i = Signal(n_ls, reset_less=True) # store in (top)
+ self.issue_i = Signal(reset_less=True) # Issue in (left)
+ self.go_die_i = Signal(reset_less=True) # Issue in (left)
+ # load / store hit - basically connect these to go_wr from LD/STCompUnit
+ # LD.go_wr -> load_hit_i, ST.go_wr -> stwd_hit_i.
+ self.load_hit_i = Signal(n_ls, reset_less=True) # ld hit in (right)
+ self.stwd_hit_i = Signal(n_ls, reset_less=True) # st w/ hit in (right)
+ # outputs (latched rd/wr pend)
+ self.ld_hold_st_o = Signal(reset_less=True) # ld holds st out (l)
+ self.st_hold_ld_o = Signal(reset_less=True) # st holds ld out (l)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.war_l = war_l = SRLatch(sync=False, llen=self.n_ls) # WaR
+ m.submodules.raw_l = raw_l = SRLatch(sync=False, llen=self.n_ls) # RaW
+ # temporaries (repeat-extend)
+ issue = Repl(self.issue_i, self.n_ls)
+ die = Repl(self.go_die_i, self.n_ls)
+ # issue & store & load - used for WAR Setting. LD is left, ST is top
+ i_s = Signal(reset_less=True)
+ i_s_l = Signal(self.n_ls, reset_less=True)
+ m.d.comb += i_s.eq(issue & self.stor_h_i) # horizontal single-signal
+ m.d.comb += i_s_l.eq(Repl(i_s, self.n_ls) & self.load_v_i) # multi, vert
+ # issue & load & store - used for RAW Setting. ST is left, LD is top
+ i_l = Signal(reset_less=True)
+ i_l_s = Signal(self.n_ls, reset_less=True)
+ m.d.comb += i_l.eq(issue & self.load_h_i) # horizontal single-signal
+ m.d.comb += i_l_s.eq(Repl(i_l, self.n_ls) & self.stor_v_i) # multi, vert
+ # write after read latch: loads block stores
+ m.d.comb += war_l.s.eq(i_s_l)
+ m.d.comb += war_l.r.eq(die | ~self.load_v_i) # reset on LD
+ # read after write latch: stores block loads
+ m.d.comb += raw_l.s.eq(i_s_l)
+ m.d.comb += raw_l.r.eq(die | ~self.stor_v_i) # reset on ST
+ # Hold results (read out horizontally, accumulate in OR fashion)
+ m.d.comb += self.ld_hold_st_o.eq((war_l.qn & self.load_hit_i).bool())
+ m.d.comb += self.st_hold_ld_o.eq((raw_l.qn & self.stwd_hit_i).bool())
+ return m
+ def __iter__(self):
+ yield self.load_h_i
+ yield self.load_v_i
+ yield self.stor_h_i
+ yield self.stor_h_i
+ yield self.issue_i
+ yield self.load_hit_i
+ yield self.stwd_hit_i
+ yield self.ld_hold_st_o
+ yield self.st_hold_ld_o
+ def ports(self):
+ return list(self)
+def dcell_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_rd_i.eq(1)
+ yield
+ yield dut.go_rd_i.eq(0)
+ yield
+ yield dut.go_wr_i.eq(1)
+ yield
+ yield dut.go_wr_i.eq(0)
+ yield
+def test_dcell():
+ dut = LDSTDepCell()
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_ldst_dcell.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, dcell_sim(dut), vcd_name='test_ldst_dcell.vcd')
+if __name__ == '__main__':
+ test_dcell()
--- /dev/null
+""" Mitch Alsup 6600-style LD/ST Memory Scoreboard Matrix (sparse vector)
+6600 LD/ST Dependency Table Matrix inputs / outputs
+Relevant comments (p45-46):
+* If there are no WAR dependencies on a Load instruction with a computed
+ address it can assert Bank_Addressable and Translate_Addressable.
+* If there are no RAW dependencies on a Store instruction with both a
+ write permission and store data present it can assert Bank_Addressable
+Relevant bugreports:
+* http://bugs.libre-riscv.org/show_bug.cgi?id=81
+* Load Hit (or Store Hit with Data) are asserted by the LD/ST Computation
+ Unit when it has data and address ready
+* Asserting the ld_hit_i (or stwd_hit_i) *requires* that the output be
+ captured or at least taken into consideration for the next LD/STs
+ *right then*. Failure to observe the xx_hold_xx_o *will* result in
+ data corruption, as they are *only* asserted if xx_hit_i is asserted
+* The hold signals still have to go through "maybe address clashes"
+ detection, they cannot just be used as-is to stop a LD/ST.
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Elaboratable, Array, Cat, Const
+from ldst_dep_cell import LDSTDepCell
+class LDSTDepMatrix(Elaboratable):
+ """ implements 11.4.12 mitch alsup LD/ST Dependency Matrix, p46
+ actually a sparse matrix along the diagonal.
+ load-hold-store and store-hold-load accumulate in a priority-picking
+ fashion, ORing together. the OR gate from the dependency cell is
+ here.
+ """
+ def __init__(self, n_ldst):
+ self.n_ldst = n_ldst # X and Y (FUs)
+ self.ld_pend_i = Signal(n_ldst, reset_less=True) # load pending in
+ self.st_pend_i = Signal(n_ldst, reset_less=True) # store pending in
+ self.issue_i = Signal(n_ldst, reset_less=True) # Issue in
+ self.go_die_i = Signal(n_ldst, reset_less=True) # Die/Reset in
+ self.load_hit_i = Signal(n_ldst, reset_less=True) # load hit in
+ self.stwd_hit_i = Signal(n_ldst, reset_less=True) # store w/data hit in
+ # outputs
+ self.ld_hold_st_o = Signal(n_ldst, reset_less=True) # load holds st out
+ self.st_hold_ld_o = Signal(n_ldst, reset_less=True) # st holds load out
+ def elaborate(self, platform):
+ m = Module()
+ # ---
+ # matrix of dependency cells. actually, LDSTDepCell is a row, now
+ # ---
+ dm = Array(LDSTDepCell(self.n_ldst) for f in range(self.n_ldst))
+ for fu in range(self.n_ldst):
+ setattr(m.submodules, "dm_fu%d" % (fu), dm[fu])
+ # ---
+ # connect Function Unit vector, all horizontal
+ # ---
+ lhs_l = []
+ shl_l = []
+ issue_l = []
+ go_die_l = []
+ lh_l = []
+ sh_l = []
+ for fu in range(self.n_ldst):
+ dc = dm[fu]
+ # accumulate load-hold-store / store-hold-load bits (horizontal)
+ lhs_l.append(dc.ld_hold_st_o)
+ shl_l.append(dc.st_hold_ld_o)
+ # accumulate inputs (for Cat'ing later) - TODO: must be a better way
+ issue_l.append(dc.issue_i)
+ go_die_l.append(dc.go_die_i)
+ # load-hit and store-with-data-hit go in vertically (top)
+ m.d.comb += [dc.load_hit_i.eq(self.load_hit_i),
+ dc.stwd_hit_i.eq(self.stwd_hit_i),
+ dc.load_v_i.eq(self.ld_pend_i),
+ dc.stor_v_i.eq(self.st_pend_i),
+ ]
+ # connect cell inputs using Cat(*list_of_stuff)
+ m.d.comb += [Cat(*issue_l).eq(self.issue_i),
+ Cat(*go_die_l).eq(self.go_die_i),
+ ]
+ # connect the load-hold-store / store-hold-load OR-accumulated outputs
+ m.d.comb += self.ld_hold_st_o.eq(Cat(*lhs_l))
+ m.d.comb += self.st_hold_ld_o.eq(Cat(*shl_l))
+ # the load/store input also needs to be connected to "top" (vertically)
+ for fu in range(self.n_ldst):
+ load_h_l = []
+ stor_h_l = []
+ for fux in range(self.n_ldst):
+ dc = dm[fux]
+ load_h_l.append(dc.load_h_i)
+ stor_h_l.append(dc.stor_h_i)
+ m.d.comb += [Cat(*load_h_l).eq(self.ld_pend_i),
+ Cat(*stor_h_l).eq(self.st_pend_i),
+ ]
+ return m
+ def __iter__(self):
+ yield self.ld_pend_i
+ yield self.st_pend_i
+ yield self.issue_i
+ yield self.go_die_i
+ yield self.load_hit_i
+ yield self.stwd_hit_i
+ yield self.ld_hold_st_o
+ yield self.st_hold_ld_o
+ def ports(self):
+ return list(self)
+def d_matrix_sim(dut):
+ """ XXX TODO
+ """
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_rd_i.eq(1)
+ yield
+ yield dut.go_rd_i.eq(0)
+ yield
+ yield dut.go_wr_i.eq(1)
+ yield
+ yield dut.go_wr_i.eq(0)
+ yield
+def test_d_matrix():
+ dut = LDSTDepMatrix(n_ldst=4)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_ld_st_matrix.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, d_matrix_sim(dut), vcd_name='test_ld_st_matrix.vcd')
+if __name__ == '__main__':
+ test_d_matrix()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module
+from scoreboard.fu_reg_matrix import FURegDepMatrix
+from scoreboard.addr_match import PartialAddrMatch
+class FUMemMatchMatrix(FURegDepMatrix, PartialAddrMatch):
+ """ implement a FU-Regs overload with memory-address matching
+ """
+ def __init__(self, n_fu, addrbitwid):
+ PartialAddrMatch.__init__(self, n_fu, addrbitwid)
+ FURegDepMatrix.__init__(self, n_fu, n_fu, 1, self.addr_nomatch_o)
+ def elaborate(self, platform):
+ m = Module()
+ PartialAddrMatch._elaborate(self, m, platform)
+ FURegDepMatrix._elaborate(self, m, platform)
+ return m
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Elaboratable, Array, Cat, Repl
+from nmutil.latch import SRLatch
+class MemDepRow(Elaboratable):
+ """ implements 1st phase Memory Depencency cell
+ """
+ def __init__(self, n_reg):
+ self.n_reg = n_reg
+ # inputs
+ self.ld_i = Signal(n_reg, reset_less=True) # Dest in (top)
+ self.st_i = Signal(n_reg, reset_less=True) # oper1 in (top)
+ self.issue_i = Signal(reset_less=True) # Issue in (top)
+ self.st_pend_i = Signal(n_reg, reset_less=True) # Read pend in (top)
+ self.ld_pend_i = Signal(n_reg, reset_less=True) # Write pend in (top)
+ self.v_st_rsel_o = Signal(n_reg, reset_less=True) # Read pend out (bot)
+ self.v_ld_rsel_o = Signal(n_reg, reset_less=True) # Write pend out (bot)
+ self.go_ld_i = Signal(reset_less=True) # Go Write in (left)
+ self.go_st_i = Signal(reset_less=True) # Go Read in (left)
+ self.go_die_i = Signal(reset_less=True) # Go Die in (left)
+ # for Register File Select Lines (vertical)
+ self.ld_rsel_o = Signal(n_reg, reset_less=True) # dest reg sel (bot)
+ self.st_rsel_o = Signal(n_reg, reset_less=True) # src1 reg sel (bot)
+ # for Function Unit "forward progress" (horizontal)
+ self.ld_fwd_o = Signal(n_reg, reset_less=True) # dest FU fw (right)
+ self.st_fwd_o = Signal(n_reg, reset_less=True) # src1 FU fw (right)
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.ld_c = ld_c = SRLatch(sync=False, llen=self.n_reg)
+ m.submodules.st_c = st_c = SRLatch(sync=False, llen=self.n_reg)
+ # connect go_rd / go_wr (dest->wr, src->rd)
+ ld_die = Signal(reset_less=True)
+ st_die = Signal(reset_less=True)
+ m.d.comb += ld_die.eq(self.go_ld_i | self.go_die_i)
+ m.d.comb += st_die.eq(self.go_st_i | self.go_die_i)
+ m.d.comb += ld_c.r.eq(Repl(ld_die, self.n_reg))
+ m.d.comb += st_c.r.eq(Repl(st_die, self.n_reg))
+ # connect input reg bit (unary)
+ i_ext = Repl(self.issue_i, self.n_reg)
+ m.d.comb += ld_c.s.eq(i_ext & self.ld_i)
+ m.d.comb += st_c.s.eq(i_ext & self.st_i)
+ # connect up hazard checks: read-after-write and write-after-read
+ m.d.comb += self.ld_fwd_o.eq(ld_c.q & self.st_pend_i)
+ m.d.comb += self.st_fwd_o.eq(st_c.q & self.ld_pend_i)
+ # connect reg-sel outputs
+ st_ext = Repl(self.go_st_i, self.n_reg)
+ ld_ext = Repl(self.go_ld_i, self.n_reg)
+ m.d.comb += self.ld_rsel_o.eq(ld_c.qlq & ld_ext)
+ m.d.comb += self.st_rsel_o.eq(st_c.qlq & st_ext)
+ # to be accumulated to indicate if register is in use (globally)
+ # after ORing, is fed back in to st_pend_i / ld_pend_i
+ m.d.comb += self.v_st_rsel_o.eq(st_c.qlq)
+ m.d.comb += self.v_ld_rsel_o.eq(ld_c.qlq)
+ return m
+ def __iter__(self):
+ yield self.ld_i
+ yield self.st_i
+ yield self.st_pend_i
+ yield self.ld_pend_i
+ yield self.issue_i
+ yield self.go_ld_i
+ yield self.go_st_i
+ yield self.go_die_i
+ yield self.v_ld_rsel_o
+ yield self.v_st_rsel_o
+ yield self.ld_rsel_o
+ yield self.st_rsel_o
+ yield self.ld_fwd_o
+ yield self.st_fwd_o
+ def ports(self):
+ return list(self)
+def dcell_sim(dut):
+ yield dut.ld_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.st_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_st_i.eq(1)
+ yield
+ yield dut.go_st_i.eq(0)
+ yield
+ yield dut.go_ld_i.eq(1)
+ yield
+ yield dut.go_ld_i.eq(0)
+ yield
+def test_dcell():
+ dut = MemDepRow(4)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_mem_drow.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, dcell_sim(dut), vcd_name='test_mem_dcell.vcd')
+if __name__ == '__main__':
+ test_dcell()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Elaboratable, Array, Cat
+from scoreboard.mem_dependence_cell import MemDepRow
+from scoreboard.mem_fu_pending import MemFU_Pend
+from scoreboard.mem_select import Mem_Rsv
+from scoreboard.global_pending import GlobalPending
+class MemFUDepMatrix(Elaboratable):
+ """ implements 1st phase Memory-to-FU Dependency Matrix
+ """
+ def __init__(self, n_fu_row, n_reg_col):
+ self.n_fu_row = n_fu_row # Y (FUs) ^v
+ self.n_reg_col = n_reg_col # X (Regs) <>
+ self.ld_i = Signal(n_reg_col, reset_less=True) # LD in (top)
+ self.st_i = Signal(n_reg_col, reset_less=True) # ST in (top)
+ # Register "Global" vectors for determining RaW and WaR hazards
+ self.ld_pend_i = Signal(n_reg_col, reset_less=True) # ld pending (top)
+ self.st_pend_i = Signal(n_reg_col, reset_less=True) # st pending (top)
+ self.v_ld_rsel_o = Signal(n_reg_col, reset_less=True) # ld pending (bot)
+ self.v_st_rsel_o = Signal(n_reg_col, reset_less=True) # st pending (bot)
+ self.issue_i = Signal(n_fu_row, reset_less=True) # Issue in (top)
+ self.go_ld_i = Signal(n_fu_row, reset_less=True) # Go LOAD in (left)
+ self.go_st_i = Signal(n_fu_row, reset_less=True) # Go STOR in (left)
+ self.go_die_i = Signal(n_fu_row, reset_less=True) # Go Die in (left)
+ # for Register File Select Lines (horizontal), per-reg
+ self.ld_rsel_o = Signal(n_reg_col, reset_less=True) # ld reg (bot)
+ self.st_rsel_o = Signal(n_reg_col, reset_less=True) # st reg (bot)
+ # for Function Unit "forward progress" (vertical), per-FU
+ self.ld_pend_o = Signal(n_fu_row, reset_less=True) # ld pending (right)
+ self.st_pend_o = Signal(n_fu_row, reset_less=True) # st pending (right)
+ def elaborate(self, platform):
+ m = Module()
+ # ---
+ # matrix of dependency cells
+ # ---
+ dm = Array(MemDepRow(self.n_reg_col) for r in range(self.n_fu_row))
+ for fu in range(self.n_fu_row):
+ setattr(m.submodules, "dr_fu%d" % fu, dm[fu])
+ # ---
+ # array of Function Unit Pending vectors
+ # ---
+ fupend = Array(MemFU_Pend(self.n_reg_col) for f in range(self.n_fu_row))
+ for fu in range(self.n_fu_row):
+ setattr(m.submodules, "fu_fu%d" % (fu), fupend[fu])
+ # ---
+ # array of Register Reservation vectors
+ # ---
+ regrsv = Array(Mem_Rsv(self.n_fu_row) for r in range(self.n_reg_col))
+ for rn in range(self.n_reg_col):
+ setattr(m.submodules, "rr_r%d" % (rn), regrsv[rn])
+ # ---
+ # connect Function Unit vector
+ # ---
+ ld_pend = []
+ st_pend = []
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ fup = fupend[fu]
+ ld_fwd_o = []
+ st_fwd_o = []
+ for rn in range(self.n_reg_col):
+ # accumulate cell fwd outputs for dest/src1
+ ld_fwd_o.append(dc.ld_fwd_o[rn])
+ st_fwd_o.append(dc.st_fwd_o[rn])
+ # connect cell fwd outputs to FU Vector in [Cat is gooood]
+ m.d.comb += [fup.ld_fwd_i.eq(Cat(*ld_fwd_o)),
+ fup.st_fwd_i.eq(Cat(*st_fwd_o)),
+ ]
+ # accumulate FU Vector outputs
+ ld_pend.append(fup.reg_ld_pend_o)
+ st_pend.append(fup.reg_st_pend_o)
+ # ... and output them from this module (vertical, width=FUs)
+ m.d.comb += self.ld_pend_o.eq(Cat(*ld_pend))
+ m.d.comb += self.st_pend_o.eq(Cat(*st_pend))
+ # ---
+ # connect Reg Selection vector
+ # ---
+ ld_rsel = []
+ st_rsel = []
+ for rn in range(self.n_reg_col):
+ rsv = regrsv[rn]
+ ld_rsel_o = []
+ st_rsel_o = []
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ # accumulate cell reg-select outputs dest/src1
+ ld_rsel_o.append(dc.ld_rsel_o[rn])
+ st_rsel_o.append(dc.st_rsel_o[rn])
+ # connect cell reg-select outputs to Reg Vector In
+ m.d.comb += [rsv.ld_rsel_i.eq(Cat(*ld_rsel_o)),
+ rsv.st_rsel_i.eq(Cat(*st_rsel_o)),
+ ]
+ # accumulate Reg-Sel Vector outputs
+ ld_rsel.append(rsv.ld_rsel_o)
+ st_rsel.append(rsv.st_rsel_o)
+ # ... and output them from this module (horizontal, width=REGs)
+ m.d.comb += self.ld_rsel_o.eq(Cat(*ld_rsel))
+ m.d.comb += self.st_rsel_o.eq(Cat(*st_rsel))
+ # ---
+ # connect Dependency Matrix dest/src1/issue to module d/s/s/i
+ # ---
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ # wire up inputs from module to row cell inputs (Cat is gooood)
+ m.d.comb += [dc.ld_i.eq(self.ld_i),
+ dc.st_i.eq(self.st_i),
+ dc.st_pend_i.eq(self.st_pend_i),
+ dc.ld_pend_i.eq(self.ld_pend_i),
+ ]
+ # accumulate rsel bits into read/write pending vectors.
+ st_pend_v = []
+ ld_pend_v = []
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ st_pend_v.append(dc.v_st_rsel_o)
+ ld_pend_v.append(dc.v_ld_rsel_o)
+ st_v = GlobalPending(self.n_reg_col, st_pend_v)
+ ld_v = GlobalPending(self.n_reg_col, ld_pend_v)
+ m.submodules.st_v = st_v
+ m.submodules.ld_v = ld_v
+ m.d.comb += self.v_st_rsel_o.eq(st_v.g_pend_o)
+ m.d.comb += self.v_ld_rsel_o.eq(ld_v.g_pend_o)
+ # ---
+ # connect Dep issue_i/go_st_i/go_ld_i to module issue_i/go_rd/go_wr
+ # ---
+ go_st_i = []
+ go_ld_i = []
+ go_die_i = []
+ issue_i = []
+ for fu in range(self.n_fu_row):
+ dc = dm[fu]
+ # accumulate cell fwd outputs for dest/src1
+ go_st_i.append(dc.go_st_i)
+ go_ld_i.append(dc.go_ld_i)
+ go_die_i.append(dc.go_die_i)
+ issue_i.append(dc.issue_i)
+ # wire up inputs from module to row cell inputs (Cat is gooood)
+ m.d.comb += [Cat(*go_st_i).eq(self.go_st_i),
+ Cat(*go_ld_i).eq(self.go_ld_i),
+ Cat(*go_die_i).eq(self.go_die_i),
+ Cat(*issue_i).eq(self.issue_i),
+ ]
+ return m
+ def __iter__(self):
+ yield self.ld_i
+ yield self.st_i
+ yield self.issue_i
+ yield self.go_ld_i
+ yield self.go_st_i
+ yield self.go_die_i
+ yield self.ld_rsel_o
+ yield self.st_rsel_o
+ yield self.ld_pend_o
+ yield self.st_pend_o
+ yield self.ld_pend_i
+ yield self.st_pend_i
+ yield self.ld_rsel_o
+ yield self.st_rsel_o
+ def ports(self):
+ return list(self)
+def d_matrix_sim(dut):
+ """ XXX TODO
+ """
+ yield dut.ld_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.st_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_st_i.eq(1)
+ yield
+ yield dut.go_st_i.eq(0)
+ yield
+ yield dut.go_ld_i.eq(1)
+ yield
+ yield dut.go_ld_i.eq(0)
+ yield
+def test_d_matrix():
+ dut = MemFUDepMatrix(n_fu_row=3, n_reg_col=3)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_fu_mem_matrix.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_mem_matrix.vcd')
+if __name__ == '__main__':
+ test_d_matrix()
--- /dev/null
+from nmigen import Elaboratable, Module, Signal, Cat
+class MemFU_Pend(Elaboratable):
+ """ these are allocated per-FU (horizontally),
+ and are of length reg_count
+ """
+ def __init__(self, reg_count):
+ self.reg_count = reg_count
+ self.ld_fwd_i = Signal(reg_count, reset_less=True)
+ self.st_fwd_i = Signal(reg_count, reset_less=True)
+ self.reg_ld_pend_o = Signal(reset_less=True)
+ self.reg_st_pend_o = Signal(reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.reg_ld_pend_o.eq(self.ld_fwd_i.bool())
+ m.d.comb += self.reg_st_pend_o.eq(self.st_fwd_i.bool())
+ return m
--- /dev/null
+from nmigen import Elaboratable, Module, Signal
+class Mem_Rsv(Elaboratable):
+ """ these are allocated per-Register (vertically),
+ and are each of length fu_count
+ """
+ def __init__(self, fu_count):
+ self.fu_count = fu_count
+ self.ld_rsel_i = Signal(fu_count, reset_less=True)
+ self.st_rsel_i = Signal(fu_count, reset_less=True)
+ self.ld_rsel_o = Signal(reset_less=True)
+ self.st_rsel_o = Signal(reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.ld_rsel_o.eq(self.ld_rsel_i.bool())
+ m.d.comb += self.st_rsel_o.eq(self.st_rsel_i.bool())
+ return m
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Array, Elaboratable
+from scoreboard.fu_fu_matrix import FUFUDepMatrix
+from scoreboard.mdm import FUMemMatchMatrix
+class MemFunctionUnits(Elaboratable):
+ def __init__(self, n_ldsts, addrbitwid):
+ self.n_ldsts = n_ldsts
+ self.bitwid = addrbitwid
+ self.st_i = Signal(n_ldsts, reset_less=True) # Dest R# in
+ self.ld_i = Signal(n_ldsts, reset_less=True) # oper1 R# in
+ self.g_int_ld_pend_o = Signal(n_ldsts, reset_less=True)
+ self.g_int_st_pend_o = Signal(n_ldsts, reset_less=True)
+ self.st_rsel_o = Signal(n_ldsts, reset_less=True) # dest reg (bot)
+ self.ld_rsel_o = Signal(n_ldsts, reset_less=True) # src1 reg (bot)
+ self.loadable_o = Signal(n_ldsts, reset_less=True)
+ self.storable_o = Signal(n_ldsts, reset_less=True)
+ self.addr_nomatch_o = Signal(n_ldsts, reset_less=True)
+ self.go_ld_i = Signal(n_ldsts, reset_less=True)
+ self.go_st_i = Signal(n_ldsts, reset_less=True)
+ self.go_die_i = Signal(n_ldsts, reset_less=True)
+ self.fn_issue_i = Signal(n_ldsts, reset_less=True)
+ # address matching
+ self.addrs_i = Array(Signal(self.bitwid, name="addrs_i%d" % i) \
+ for i in range(n_ldsts))
+ self.addr_we_i = Signal(n_ldsts) # write-enable for incoming address
+ self.addr_en_i = Signal(n_ldsts) # address latched in
+ self.addr_rs_i = Signal(n_ldsts) # address deactivated
+ # Note: FURegs st_pend_o is also outputted from here, for use in WaWGrid
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ sync = m.d.sync
+ n_fus = self.n_ldsts
+ # Integer FU-FU Dep Matrix
+ intfudeps = FUFUDepMatrix(n_fus, n_fus)
+ m.submodules.intfudeps = intfudeps
+ # Integer FU-Reg Dep Matrix
+ intregdeps = FUMemMatchMatrix(n_fus, self.bitwid)
+ m.submodules.intregdeps = intregdeps
+ # ok, because we do not know in advance what the AGEN (address gen)
+ # is, we have to make a transitive dependency set. i.e. the LD
+ # (or ST) being requested now must depend on ALL prior LDs *AND* STs.
+ # these get dropped very rapidly once AGEN is carried out.
+ # connect fureg matrix as a mem system
+ comb += self.g_int_ld_pend_o.eq(intregdeps.v_rd_rsel_o)
+ comb += self.g_int_st_pend_o.eq(intregdeps.v_wr_rsel_o)
+ comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
+ comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
+ comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
+ comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
+ self.st_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
+ comb += intfudeps.issue_i.eq(self.fn_issue_i)
+ comb += intfudeps.go_rd_i.eq(self.go_ld_i)
+ comb += intfudeps.go_wr_i.eq(self.go_st_i)
+ comb += intfudeps.go_die_i.eq(self.go_die_i)
+ comb += self.loadable_o.eq(intfudeps.readable_o)
+ comb += self.storable_o.eq(intfudeps.writable_o)
+ comb += self.addr_nomatch_o.eq(intregdeps.addr_nomatch_o)
+ # Connect function issue / arrays, and dest/src1/src2
+ comb += intregdeps.dest_i.eq(self.st_i)
+ comb += intregdeps.src_i[0].eq(self.ld_i)
+ comb += intregdeps.go_rd_i.eq(self.go_ld_i)
+ comb += intregdeps.go_wr_i.eq(self.go_st_i)
+ comb += intregdeps.go_die_i.eq(self.go_die_i)
+ comb += intregdeps.issue_i.eq(self.fn_issue_i)
+ comb += self.st_rsel_o.eq(intregdeps.dest_rsel_o)
+ comb += self.ld_rsel_o.eq(intregdeps.src_rsel_o[0])
+ # connect address matching: these get connected to the Addr CUs
+ for i in range(self.n_ldsts):
+ comb += intregdeps.addrs_i[i].eq(self.addrs_i[i])
+ comb += intregdeps.addr_we_i.eq(self.addr_we_i)
+ comb += intregdeps.addr_en_i.eq(self.addr_en_i)
+ comb += intregdeps.addr_rs_i.eq(self.addr_rs_i)
+ return m
+ def __iter__(self):
+ yield self.ld_i
+ yield self.st_i
+ yield self.g_int_st_pend_o
+ yield self.g_int_ld_pend_o
+ yield self.ld_rsel_o
+ yield self.st_rsel_o
+ yield self.loadable_o
+ yield self.storable_o
+ yield self.go_st_i
+ yield self.go_ld_i
+ yield self.go_die_i
+ yield self.fn_issue_i
+ yield from self.addrs_i
+ yield self.addr_we_i
+ yield self.addr_en_i
+ def ports(self):
+ return list(self)
--- /dev/null
+from nmigen import Elaboratable, Module, Signal, Array
+class Reg_Rsv(Elaboratable):
+ """ these are allocated per-Register (vertically),
+ and are each of length fu_count
+ """
+ def __init__(self, fu_count, n_src):
+ self.n_src = n_src
+ self.fu_count = fu_count
+ self.dest_rsel_i = Signal(fu_count, reset_less=True)
+ self.src_rsel_i = Array(Signal(fu_count, name="src_rsel_i",
+ reset_less=True) \
+ for i in range(n_src))
+ self.dest_rsel_o = Signal(reset_less=True)
+ self.src_rsel_o = Signal(n_src, reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ m.d.comb += self.dest_rsel_o.eq(self.dest_rsel_i.bool())
+ for i in range(self.n_src):
+ m.d.comb += self.src_rsel_o[i].eq(self.src_rsel_i[i].bool())
+ return m
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Cat, Array, Const, Elaboratable, Repl
+from nmigen.lib.coding import Decoder
+from scoreboard.shadow_fn import ShadowFn
+class ShadowMatrix(Elaboratable):
+ """ Matrix of Shadow Functions. One per FU.
+ Inputs
+ * :n_fus: register file width
+ * :shadow_wid: number of shadow/fail/good/go_die sets
+ Notes:
+ * Shadow enable/fail/good are all connected to all Shadow Functions
+ (incoming at the top)
+ * Output is an array of "shadow active" (schroedinger wires: neither
+ alive nor dead) and an array of "go die" signals, one per FU.
+ * the shadown must be connected to the Computation Unit's
+ write release request, preventing it (ANDing) from firing
+ (and thus preventing Writable. this by the way being the
+ whole point of having the Shadow Matrix...)
+ * go_die_o must be connected to *both* the Computation Unit's
+ src-operand and result-operand latch resets, causing both
+ of them to reset.
+ * go_die_o also needs to be wired into the Dependency and Function
+ Unit Matrices by way of over-enabling (ORing) into Go_Read and
+ Go_Write, resetting every cell that is required to "die"
+ """
+ def __init__(self, n_fus, shadow_wid=0, syncreset=False):
+ self.syncreset = syncreset
+ self.n_fus = n_fus
+ self.shadow_wid = shadow_wid
+ # inputs
+ self.issue_i = Signal(n_fus, reset_less=True)
+ self.reset_i = Signal(n_fus, reset_less=True)
+ self.shadow_i = Array(Signal(shadow_wid, name="sh_i", reset_less=True) \
+ for f in range(n_fus))
+ self.s_fail_i = Array(Signal(shadow_wid, name="fl_i", reset_less=True) \
+ for f in range(n_fus))
+ self.s_good_i = Array(Signal(shadow_wid, name="gd_i", reset_less=True) \
+ for f in range(n_fus))
+ # outputs
+ self.go_die_o = Signal(n_fus, reset_less=True)
+ self.shadown_o = Signal(n_fus, reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ shadows = []
+ for i in range(self.n_fus):
+ sh = ShadowFn(self.shadow_wid, self.syncreset)
+ setattr(m.submodules, "sh%d" % i, sh)
+ shadows.append(sh)
+ # connect shadow/fail/good to all shadows
+ m.d.comb += sh.s_fail_i.eq(self.s_fail_i[i])
+ m.d.comb += sh.s_good_i.eq(self.s_good_i[i])
+ # this one is the matrix (shadow enables)
+ m.d.comb += sh.shadow_i.eq(self.shadow_i[i])
+ # connect all shadow outputs and issue input
+ issue_l = []
+ reset_l = []
+ sho_l = []
+ rec_l = []
+ for l in shadows:
+ issue_l.append(l.issue_i)
+ reset_l.append(l.reset_i)
+ sho_l.append(l.shadown_o)
+ rec_l.append(l.go_die_o)
+ m.d.comb += Cat(*issue_l).eq(self.issue_i)
+ m.d.comb += Cat(*reset_l).eq(self.reset_i)
+ m.d.comb += self.shadown_o.eq(Cat(*sho_l))
+ m.d.comb += self.go_die_o.eq(Cat(*rec_l))
+ return m
+ def __iter__(self):
+ yield self.issue_i
+ yield self.reset_i
+ yield from self.shadow_i
+ yield from self.s_fail_i
+ yield from self.s_good_i
+ yield self.go_die_o
+ yield self.shadown_o
+ def ports(self):
+ return list(self)
+class BranchSpeculationRecord(Elaboratable):
+ """ A record of which function units will be cancelled and which
+ allowed to proceed, on a branch.
+ Whilst the input is a pair that says whether the instruction is
+ under the "success" branch shadow (good_i) or the "fail" shadow
+ (fail_i path), when the branch result is known, the "good" path
+ must be cancelled if "fail" occurred, and the "fail" path cancelled
+ if "good" occurred.
+ therefore, use "good|~fail" and "fail|~good" respectively as
+ output.
+ """
+ def __init__(self, n_fus):
+ self.n_fus = n_fus
+ # inputs: record *expected* status
+ self.active_i = Signal(reset_less=True)
+ self.good_i = Signal(n_fus, reset_less=True)
+ self.fail_i = Signal(n_fus, reset_less=True)
+ # inputs: status of branch (when result was known)
+ self.br_i = Signal(reset_less=True)
+ self.br_ok_i = Signal(reset_less=True)
+ # outputs: true if the *expected* outcome matched the *actual* outcome
+ self.match_f_o = Signal(n_fus, reset_less=True)
+ self.match_g_o = Signal(n_fus, reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ # registers to record *expected* status
+ good_r = Signal(self.n_fus)
+ fail_r = Signal(self.n_fus)
+ for i in range(self.n_fus):
+ with m.If(self.active_i):
+ m.d.sync += good_r[i].eq(good_r[i] | self.good_i[i])
+ m.d.sync += fail_r[i].eq(fail_r[i] | self.fail_i[i])
+ with m.If(self.br_i):
+ with m.If(good_r[i]):
+ # we expected good, return OK that good was EXPECTED
+ m.d.comb += self.match_g_o[i].eq(self.br_ok_i)
+ m.d.comb += self.match_f_o[i].eq(~self.br_ok_i)
+ with m.If(fail_r[i]):
+ # we expected fail, return OK that fail was EXPECTED
+ m.d.comb += self.match_g_o[i].eq(~self.br_ok_i)
+ m.d.comb += self.match_f_o[i].eq(self.br_ok_i)
+ m.d.sync += good_r[i].eq(0) # might be set if issue set as well
+ m.d.sync += fail_r[i].eq(0) # might be set if issue set as well
+ return m
+ def __iter__(self):
+ yield self.active_i
+ yield self.good_i
+ yield self.fail_i
+ yield self.br_i
+ yield self.br_good_i
+ yield self.br_fail_i
+ yield self.good_o
+ yield self.fail_o
+ def ports(self):
+ return list(self)
+class WaWGrid(Elaboratable):
+ """ An NxM grid-selector which raises a 2D bit selected by N and M
+ """
+ def __init__(self, n_fus, shadow_wid):
+ self.n_fus = n_fus
+ self.shadow_wid = shadow_wid
+ self.shadow_i = Signal(shadow_wid, reset_less=True)
+ self.fu_i = Signal(n_fus, reset_less=True)
+ self.waw_o = Array(Signal(shadow_wid, name="waw_o", reset_less=True) \
+ for f in range(n_fus))
+ def elaborate(self, platform):
+ m = Module()
+ for i in range(self.n_fus):
+ v = Repl(self.fu_i[i], self.shadow_wid)
+ m.d.comb += self.waw_o[i].eq(v & self.shadow_i)
+ return m
+def shadow_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_rd_i.eq(1)
+ yield
+ yield dut.go_rd_i.eq(0)
+ yield
+ yield dut.go_wr_i.eq(1)
+ yield
+ yield dut.go_wr_i.eq(0)
+ yield
+def test_shadow():
+ dut = ShadowMatrix(4, 2)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_shadow.il", "w") as f:
+ f.write(vl)
+ dut = BranchSpeculationRecord(4)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_branchspecrecord.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, shadow_sim(dut), vcd_name='test_shadow.vcd')
+if __name__ == '__main__':
+ test_shadow()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Signal, Cat, Repl, Const, Elaboratable
+from nmutil.latch import SRLatch
+class ShadowFn(Elaboratable):
+ """ implements shadowing 11.5.1, p55, just the individual shadow function
+ shadowing can be used for branches as well as exceptions (interrupts),
+ load/store hold (exceptions again), and vector-element predication
+ (once the predicate is known, which it may not be at instruction issue)
+ Inputs
+ * :shadow_wid: number of shadow/fail/good/go_die sets
+ notes:
+ * when shadow_wid = 0, recover and shadown are Consts (i.e. do nothing)
+ """
+ def __init__(self, slen, syncreset=False):
+ self.slen = slen
+ self.syncreset = syncreset
+ if self.slen:
+ # inputs
+ self.issue_i = Signal(reset_less=True)
+ self.shadow_i = Signal(slen, reset_less=True)
+ self.reset_i = Signal(reset_less=True)
+ self.s_fail_i = Signal(slen, reset_less=True)
+ self.s_good_i = Signal(slen, reset_less=True)
+ # outputs
+ self.shadown_o = Signal(reset_less=True)
+ self.go_die_o = Signal(reset_less=True)
+ else:
+ # outputs when no shadowing needed
+ self.shadown_o = Const(1)
+ self.go_die_o = Const(0)
+ def elaborate(self, platform):
+ m = Module()
+ if self.slen == 0:
+ return
+ m.submodules.sl = sl = SRLatch(sync=False, llen=self.slen)
+ r_ext = Repl(self.reset_i, self.slen)
+ reset_r = Signal(self.slen)
+ if self.syncreset:
+ m.d.comb += reset_r.eq(self.s_good_i | self.s_fail_i | r_ext)
+ else:
+ m.d.comb += reset_r.eq(self.s_good_i | self.s_fail_i | r_ext)
+ i_ext = Repl(self.issue_i, self.slen)
+ m.d.comb += sl.s.eq(self.shadow_i & i_ext & \
+ ~self.s_good_i & ~reset_r)
+ m.d.comb += sl.r.eq(r_ext | reset_r | self.s_good_i | \
+ (i_ext & ~self.shadow_i))
+ m.d.comb += self.go_die_o.eq((sl.qlq & self.s_fail_i).bool())
+ m.d.comb += self.shadown_o.eq(~sl.qlq.bool())
+ return m
+ def __iter__(self):
+ yield self.issue_i
+ yield self.reset_i
+ yield self.shadow_i
+ yield self.s_fail_i
+ yield self.s_good_i
+ yield self.shadown_o
+ yield self.go_die_o
+ def ports(self):
+ return list(self)
+def shadow_fn_unit_sim(dut):
+ yield dut.dest_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.src1_i.eq(1)
+ yield dut.issue_i.eq(1)
+ yield
+ yield
+ yield
+ yield dut.issue_i.eq(0)
+ yield
+ yield dut.go_rd_i.eq(1)
+ yield
+ yield dut.go_rd_i.eq(0)
+ yield
+ yield dut.go_wr_i.eq(1)
+ yield
+ yield dut.go_wr_i.eq(0)
+ yield
+def test_shadow_fn_unit():
+ dut = ShadowFn(4)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_shadow_fn_unit.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, shadow_fn_unit_sim(dut),
+ vcd_name='test_shadow_fn_unit.vcd')
+if __name__ == '__main__':
+ test_shadow_fn_unit()
--- /dev/null
+""" testing of InstructionQ
+from copy import deepcopy
+from random import randint
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from scoreboard.instruction_q import InstructionQ
+from nmutil.nmoperator import eq
+class IQSim:
+ def __init__(self, dut, iq, n_in, n_out):
+ self.dut = dut
+ self.iq = iq
+ self.oq = []
+ self.n_in = n_in
+ self.n_out = n_out
+ def send(self):
+ i = 0
+ while i < len(self.iq):
+ sendlen = randint(1, self.n_in)
+ sendlen = 1
+ sendlen = min(len(self.iq) - i, sendlen)
+ print ("sendlen", len(self.iq)-i, sendlen)
+ for idx in range(sendlen):
+ instr = self.iq[i+idx]
+ yield from eq(self.dut.data_i[idx], instr)
+ di = yield self.dut.data_i[idx]#.src1_i
+ print ("senddata %d %x" % ((i+idx), di))
+ self.oq.append(di)
+ yield self.dut.p_add_i.eq(sendlen)
+ yield
+ o_p_ready = yield self.dut.p_ready_o
+ while not o_p_ready:
+ yield
+ o_p_ready = yield self.dut.p_ready_o
+ yield self.dut.p_add_i.eq(0)
+ print ("send", len(self.iq), i, sendlen)
+ # wait random period of time before queueing another value
+ for j in range(randint(0, 3)):
+ yield
+ i += sendlen
+ yield self.dut.p_add_i.eq(0)
+ yield
+ print ("send ended")
+ ## wait random period of time before queueing another value
+ #for i in range(randint(0, 3)):
+ # yield
+ #send_range = randint(0, 3)
+ #if send_range == 0:
+ # send = True
+ #else:
+ # send = randint(0, send_range) != 0
+ def rcv(self):
+ i = 0
+ yield
+ yield
+ yield
+ while i < len(self.iq):
+ rcvlen = randint(1, self.n_out)
+ #print ("outreq", rcvlen)
+ yield self.dut.n_sub_i.eq(rcvlen)
+ n_sub_o = yield self.dut.n_sub_o
+ print ("recv", n_sub_o)
+ for j in range(n_sub_o):
+ r = yield self.dut.data_o[j]#.src1_i
+ print ("recvdata %x %s" % (r, repr(self.iq[i+j])))
+ assert r == self.oq[i+j]
+ yield
+ if n_sub_o == 0:
+ continue
+ yield self.dut.n_sub_i.eq(0)
+ i += n_sub_o
+ print ("recv ended")
+def mk_insns(n_insns, wid, opwid):
+ res = []
+ for i in range(n_insns):
+ op1 = randint(0, (1<<wid)-1)
+ opi = randint(0, 1)
+ op2 = randint(0, (1<<wid)-1)
+ dst = randint(0, (1<<wid)-1)
+ oper = randint(0, (1<<opwid)-1)
+ imm = randint(0, (1<<wid)-1)
+ res.append({'oper_i': oper, 'opim_i': opi,
+ 'imm_i': imm, 'dest_i': dst,
+ 'src1_i': op1, 'src2_i': op2})
+ return res
+def test_iq():
+ wid = 8
+ opwid = 4
+ qlen = 2
+ n_in = 1
+ n_out = 1
+ dut = InstructionQ(wid, opwid, qlen, n_in, n_out)
+ insns = mk_insns(1000, wid, opwid)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_iq.il", "w") as f:
+ f.write(vl)
+ test = IQSim(dut, insns, n_in, n_out)
+ print (insns)
+ run_simulation(dut, [test.rcv(), test.send()
+ ],
+ vcd_name="test_iq.vcd")
+if __name__ == '__main__':
+ test_iq()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
+from regfile.regfile import RegFileArray, treereduce
+from scoreboard.global_pending import GlobalPending
+from scoreboard.group_picker import GroupPicker
+from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
+from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
+from scoreboard.memfu import MemFunctionUnits
+from nmutil.latch import SRLatch
+from nmutil.nmoperator import eq
+from random import randint, seed
+from copy import deepcopy
+from math import log
+class Memory(Elaboratable):
+ def __init__(self, regwid, addrw):
+ self.ddepth = regwid/8
+ depth = (1<<addrw) / self.ddepth
+ self.adr = Signal(addrw)
+ self.dat_r = Signal(regwid)
+ self.dat_w = Signal(regwid)
+ self.we = Signal()
+ self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.rdport = rdport = self.mem.read_port()
+ m.submodules.wrport = wrport = self.mem.write_port()
+ m.d.comb += [
+ rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
+ self.dat_r.eq(rdport.data),
+ wrport.addr.eq(self.adr),
+ wrport.data.eq(self.dat_w),
+ wrport.en.eq(self.we),
+ ]
+ return m
+class MemSim:
+ def __init__(self, regwid, addrw):
+ self.regwid = regwid
+ self.ddepth = regwid//8
+ depth = (1<<addrw) // self.ddepth
+ self.mem = list(range(0, depth))
+ def ld(self, addr):
+ return self.mem[addr>>self.ddepth]
+ def st(self, addr, data):
+ self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
+class Scoreboard(Elaboratable):
+ def __init__(self, rwid, n_regs):
+ """ Inputs:
+ * :rwid: bit width of register file(s) - both FP and INT
+ * :n_regs: depth of register file(s) - number of FP and INT regs
+ """
+ self.rwid = rwid
+ self.n_regs = n_regs
+ # Register Files
+ self.intregs = RegFileArray(rwid, n_regs)
+ self.fpregs = RegFileArray(rwid, n_regs)
+ # issue q needs to get at these
+ self.aluissue = IssueUnitGroup(4)
+ self.brissue = IssueUnitGroup(1)
+ # and these
+ self.alu_oper_i = Signal(4, reset_less=True)
+ self.alu_imm_i = Signal(rwid, reset_less=True)
+ self.br_oper_i = Signal(4, reset_less=True)
+ self.br_imm_i = Signal(rwid, reset_less=True)
+ # inputs
+ self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
+ self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
+ self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
+ self.reg_enable_i = Signal(reset_less=True) # enable reg decode
+ # outputs
+ self.issue_o = Signal(reset_less=True) # instruction was accepted
+ self.busy_o = Signal(reset_less=True) # at least one CU is busy
+ # for branch speculation experiment. branch_direction = 0 if
+ # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
+ # branch_succ and branch_fail are requests to have the current
+ # instruction be dependent on the branch unit "shadow" capability.
+ self.branch_succ_i = Signal(reset_less=True)
+ self.branch_fail_i = Signal(reset_less=True)
+ self.branch_direction_o = Signal(2, reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ sync = m.d.sync
+ m.submodules.intregs = self.intregs
+ m.submodules.fpregs = self.fpregs
+ # register ports
+ int_dest = self.intregs.write_port("dest")
+ int_src1 = self.intregs.read_port("src1")
+ int_src2 = self.intregs.read_port("src2")
+ fp_dest = self.fpregs.write_port("dest")
+ fp_src1 = self.fpregs.read_port("src1")
+ fp_src2 = self.fpregs.read_port("src2")
+ # Int ALUs and Comp Units
+ n_int_alus = 5
+ cua = CompUnitALUs(self.rwid, 3)
+ cub = CompUnitBR(self.rwid, 3)
+ m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
+ bgt = cub.bgt # get at the branch computation unit
+ br1 = cub.br1
+ # Int FUs
+ m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
+ # Count of number of FUs
+ n_intfus = n_int_alus
+ n_fp_fus = 0 # for now
+ # Integer Priority Picker 1: Adder + Subtractor
+ intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
+ m.submodules.intpick1 = intpick1
+ # INT/FP Issue Unit
+ regdecode = RegDecode(self.n_regs)
+ m.submodules.regdecode = regdecode
+ issueunit = IssueUnitArray([self.aluissue, self.brissue])
+ m.submodules.issueunit = issueunit
+ # Shadow Matrix. currently n_intfus shadows, to be used for
+ # write-after-write hazards. NOTE: there is one extra for branches,
+ # so the shadow width is increased by 1
+ m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
+ m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
+ # record previous instruction to cast shadow on current instruction
+ prev_shadow = Signal(n_intfus)
+ # Branch Speculation recorder. tracks the success/fail state as
+ # each instruction is issued, so that when the branch occurs the
+ # allow/cancel can be issued as appropriate.
+ m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
+ #---------
+ # ok start wiring things together...
+ # "now hear de word of de looord... dem bones dem bones dem dryy bones"
+ # https://www.youtube.com/watch?v=pYb8Wm6-QfA
+ #---------
+ #---------
+ # Issue Unit is where it starts. set up some in/outs for this module
+ #---------
+ comb += [ regdecode.dest_i.eq(self.int_dest_i),
+ regdecode.src1_i.eq(self.int_src1_i),
+ regdecode.src2_i.eq(self.int_src2_i),
+ regdecode.enable_i.eq(self.reg_enable_i),
+ self.issue_o.eq(issueunit.issue_o)
+ ]
+ # take these to outside (issue needs them)
+ comb += cua.oper_i.eq(self.alu_oper_i)
+ comb += cua.imm_i.eq(self.alu_imm_i)
+ comb += cub.oper_i.eq(self.br_oper_i)
+ comb += cub.imm_i.eq(self.br_imm_i)
+ # TODO: issueunit.f (FP)
+ # and int function issue / busy arrays, and dest/src1/src2
+ comb += intfus.dest_i.eq(regdecode.dest_o)
+ comb += intfus.src1_i.eq(regdecode.src1_o)
+ comb += intfus.src2_i.eq(regdecode.src2_o)
+ fn_issue_o = issueunit.fn_issue_o
+ comb += intfus.fn_issue_i.eq(fn_issue_o)
+ comb += issueunit.busy_i.eq(cu.busy_o)
+ comb += self.busy_o.eq(cu.busy_o.bool())
+ #---------
+ # merge shadow matrices outputs
+ #---------
+ # these are explained in ShadowMatrix docstring, and are to be
+ # connected to the FUReg and FUFU Matrices, to get them to reset
+ anydie = Signal(n_intfus, reset_less=True)
+ allshadown = Signal(n_intfus, reset_less=True)
+ shreset = Signal(n_intfus, reset_less=True)
+ comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
+ comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
+ comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
+ #---------
+ # connect fu-fu matrix
+ #---------
+ # Group Picker... done manually for now.
+ go_rd_o = intpick1.go_rd_o
+ go_wr_o = intpick1.go_wr_o
+ go_rd_i = intfus.go_rd_i
+ go_wr_i = intfus.go_wr_i
+ go_die_i = intfus.go_die_i
+ # NOTE: connect to the shadowed versions so that they can "die" (reset)
+ comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
+ comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
+ comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
+ # Connect Picker
+ #---------
+ comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
+ comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
+ int_rd_o = intfus.readable_o
+ int_wr_o = intfus.writable_o
+ comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
+ comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
+ #---------
+ # Shadow Matrix
+ #---------
+ comb += shadows.issue_i.eq(fn_issue_o)
+ #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
+ comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
+ #---------
+ # NOTE; this setup is for the instruction order preservation...
+ # connect shadows / go_dies to Computation Units
+ comb += cu.shadown_i[0:n_intfus].eq(allshadown)
+ comb += cu.go_die_i[0:n_intfus].eq(anydie)
+ # ok connect first n_int_fu shadows to busy lines, to create an
+ # instruction-order linked-list-like arrangement, using a bit-matrix
+ # (instead of e.g. a ring buffer).
+ # when written, the shadow can be cancelled (and was good)
+ for i in range(n_intfus):
+ comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
+ # *previous* instruction shadows *current* instruction, and, obviously,
+ # if the previous is completed (!busy) don't cast the shadow!
+ comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
+ for i in range(n_intfus):
+ comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
+ #---------
+ # ... and this is for branch speculation. it uses the extra bit
+ # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
+ # only needs to set shadow_i, s_fail_i and s_good_i
+ # issue captures shadow_i (if enabled)
+ comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
+ bactive = Signal(reset_less=True)
+ comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
+ # instruction being issued (fn_issue_o) has a shadow cast by the branch
+ with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
+ comb += bshadow.issue_i.eq(fn_issue_o)
+ for i in range(n_intfus):
+ with m.If(fn_issue_o & (Const(1<<i))):
+ comb += bshadow.shadow_i[i][0].eq(1)
+ # finally, we need an indicator to the test infrastructure as to
+ # whether the branch succeeded or failed, plus, link up to the
+ # "recorder" of whether the instruction was under shadow or not
+ with m.If(br1.issue_i):
+ sync += bspec.active_i.eq(1)
+ with m.If(self.branch_succ_i):
+ comb += bspec.good_i.eq(fn_issue_o & 0x1f)
+ with m.If(self.branch_fail_i):
+ comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
+ # branch is active (TODO: a better signal: this is over-using the
+ # go_write signal - actually the branch should not be "writing")
+ with m.If(br1.go_wr_i):
+ sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
+ sync += bspec.active_i.eq(0)
+ comb += bspec.br_i.eq(1)
+ # branch occurs if data == 1, failed if data == 0
+ comb += bspec.br_ok_i.eq(br1.data_o == 1)
+ for i in range(n_intfus):
+ # *expected* direction of the branch matched against *actual*
+ comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
+ # ... or it didn't
+ comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
+ #---------
+ # Connect Register File(s)
+ #---------
+ comb += int_dest.wen.eq(intfus.dest_rsel_o)
+ comb += int_src1.ren.eq(intfus.src1_rsel_o)
+ comb += int_src2.ren.eq(intfus.src2_rsel_o)
+ # connect ALUs to regfule
+ comb += int_dest.data_i.eq(cu.data_o)
+ comb += cu.src1_i.eq(int_src1.data_o)
+ comb += cu.src2_i.eq(int_src2.data_o)
+ # connect ALU Computation Units
+ comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
+ comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
+ comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
+ return m
+ def __iter__(self):
+ yield from self.intregs
+ yield from self.fpregs
+ yield self.int_dest_i
+ yield self.int_src1_i
+ yield self.int_src2_i
+ yield self.issue_o
+ yield self.branch_succ_i
+ yield self.branch_fail_i
+ yield self.branch_direction_o
+ def ports(self):
+ return list(self)
+def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
+ yield from disable_issue(dut)
+ yield dut.int_dest_i.eq(dest)
+ yield dut.int_src1_i.eq(src1)
+ yield dut.int_src2_i.eq(src2)
+ if (op & (0x3<<2)) != 0: # branch
+ yield dut.brissue.insn_i.eq(1)
+ yield dut.br_oper_i.eq(Const(op & 0x3, 2))
+ yield dut.br_imm_i.eq(imm)
+ dut_issue = dut.brissue
+ else:
+ yield dut.aluissue.insn_i.eq(1)
+ yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
+ yield dut.alu_imm_i.eq(imm)
+ dut_issue = dut.aluissue
+ yield dut.reg_enable_i.eq(1)
+ # these indicate that the instruction is to be made shadow-dependent on
+ # (either) branch success or branch fail
+ yield dut.branch_fail_i.eq(branch_fail)
+ yield dut.branch_succ_i.eq(branch_success)
+ yield
+ yield from wait_for_issue(dut, dut_issue)
+def print_reg(dut, rnums):
+ rs = []
+ for rnum in rnums:
+ reg = yield dut.intregs.regs[rnum].reg
+ rs.append("%x" % reg)
+ rnums = map(str, rnums)
+ print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
+def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
+ insts = []
+ for i in range(n_ops):
+ src1 = randint(1, dut.n_regs-1)
+ src2 = randint(1, dut.n_regs-1)
+ imm = randint(1, (1<<dut.rwid)-1)
+ dest = randint(1, dut.n_regs-1)
+ op = randint(0, max_opnums)
+ opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
+ if shadowing:
+ insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
+ else:
+ insts.append((src1, src2, dest, op, opi, imm))
+ return insts
+def scoreboard_sim(dut, alusim):
+ seed(0)
+ for i in range(50):
+ # set random values in the registers
+ for i in range(1, dut.n_regs):
+ val = randint(0, (1<<alusim.rwidth)-1)
+ #val = 31+i*3
+ #val = i
+ yield dut.intregs.regs[i].reg.eq(val)
+ alusim.setval(i, val)
+ # create some instructions (some random, some regression tests)
+ instrs = []
+ if True:
+ instrs = create_random_ops(dut, 15, True, 4)
+ if False:
+ instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
+ if False:
+ instrs.append( (7, 3, 2, 4, (0, 0)) )
+ instrs.append( (7, 6, 6, 2, (0, 0)) )
+ instrs.append( (1, 7, 2, 2, (0, 0)) )
+ if False:
+ instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
+ instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
+ instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
+ instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
+ instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
+ if False:
+ instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
+ instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
+ instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
+ if False:
+ instrs.append((5, 6, 2, 1))
+ instrs.append((2, 2, 4, 0))
+ #instrs.append((2, 2, 3, 1))
+ if False:
+ instrs.append((2, 1, 2, 3))
+ if False:
+ instrs.append((2, 6, 2, 1))
+ instrs.append((2, 1, 2, 0))
+ if False:
+ instrs.append((1, 2, 7, 2))
+ instrs.append((7, 1, 5, 0))
+ instrs.append((4, 4, 1, 1))
+ if False:
+ instrs.append((5, 6, 2, 2))
+ instrs.append((1, 1, 4, 1))
+ instrs.append((6, 5, 3, 0))
+ if False:
+ # Write-after-Write Hazard
+ instrs.append( (3, 6, 7, 2) )
+ instrs.append( (4, 4, 7, 1) )
+ if False:
+ # self-read/write-after-write followed by Read-after-Write
+ instrs.append((1, 1, 1, 1))
+ instrs.append((1, 5, 3, 0))
+ if False:
+ # Read-after-Write followed by self-read-after-write
+ instrs.append((5, 6, 1, 2))
+ instrs.append((1, 1, 1, 1))
+ if False:
+ # self-read-write sandwich
+ instrs.append((5, 6, 1, 2))
+ instrs.append((1, 1, 1, 1))
+ instrs.append((1, 5, 3, 0))
+ if False:
+ # very weird failure
+ instrs.append( (5, 2, 5, 2) )
+ instrs.append( (2, 6, 3, 0) )
+ instrs.append( (4, 2, 2, 1) )
+ if False:
+ v1 = 4
+ yield dut.intregs.regs[5].reg.eq(v1)
+ alusim.setval(5, v1)
+ yield dut.intregs.regs[3].reg.eq(5)
+ alusim.setval(3, 5)
+ instrs.append((5, 3, 3, 4, (0, 0)))
+ instrs.append((4, 2, 1, 2, (0, 1)))
+ if False:
+ v1 = 6
+ yield dut.intregs.regs[5].reg.eq(v1)
+ alusim.setval(5, v1)
+ yield dut.intregs.regs[3].reg.eq(5)
+ alusim.setval(3, 5)
+ instrs.append((5, 3, 3, 4, (0, 0)))
+ instrs.append((4, 2, 1, 2, (1, 0)))
+ if False:
+ instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
+ instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
+ instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
+ instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
+ instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
+ instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
+ instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
+ instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
+ instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
+ # issue instruction(s), wait for issue to be free before proceeding
+ for i, instr in enumerate(instrs):
+ src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
+ print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
+ (i, src1, src2, dest, op, opi, imm))
+ alusim.op(op, opi, imm, src1, src2, dest)
+ yield from instr_q(dut, op, opi, imm, src1, src2, dest,
+ br_ok, br_fail)
+ # wait for all instructions to stop before checking
+ while True:
+ iqlen = yield dut.qlen_o
+ if iqlen == 0:
+ break
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield from wait_for_busy_clear(dut)
+ # check status
+ yield from alusim.check(dut)
+ yield from alusim.dump(dut)
+def test_scoreboard():
+ dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
+ alusim = RegSim(16, 8)
+ memsim = MemSim(16, 16)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_scoreboard6600.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, scoreboard_sim(dut, alusim),
+ vcd_name='test_scoreboard6600.vcd')
+ #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
+ # vcd_name='test_scoreboard6600.vcd')
+def mem_sim(dut):
+ yield dut.ld_i.eq(0x1)
+ yield dut.fn_issue_i.eq(0x1)
+ yield
+ yield dut.ld_i.eq(0x0)
+ yield dut.st_i.eq(0x3)
+ yield dut.fn_issue_i.eq(0x2)
+ yield
+ yield dut.st_i.eq(0x0)
+ yield dut.fn_issue_i.eq(0x0)
+ yield
+ yield dut.addrs_i[0].eq(0x012)
+ yield dut.addrs_i[1].eq(0x012)
+ yield dut.addrs_i[2].eq(0x010)
+ yield dut.addr_en_i.eq(0x3)
+ yield
+ yield dut.addr_we_i.eq(0x3)
+ yield
+ yield dut.go_ld_i.eq(0x1)
+ yield
+ yield dut.go_ld_i.eq(0x0)
+ yield
+ yield dut.go_st_i.eq(0x2)
+ yield
+ yield dut.go_st_i.eq(0x0)
+ yield
+def test_mem_fus():
+ dut = MemFunctionUnits(3, 11)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_mem_fus.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, mem_sim(dut),
+ vcd_name='test_mem_fus.vcd')
+if __name__ == '__main__':
+ test_mem_fus()
--- /dev/null
+from nmigen.compat.sim import run_simulation
+from nmigen.cli import verilog, rtlil
+from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
+from regfile.regfile import RegFileArray, treereduce
+from scoreboard.ldst_matrix import LDSTDepMatrix
+from scoreboard.fu_mem_matrix import FUMemDepMatrix
+from scoreboard.global_pending import GlobalPending
+from scoreboard.group_picker import GroupPicker
+from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
+from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
+from nmutil.latch import SRLatch
+from nmutil.nmoperator import eq
+from random import randint, seed
+from copy import deepcopy
+from math import log
+class Memory(Elaboratable):
+ def __init__(self, regwid, addrw):
+ self.ddepth = regwid/8
+ depth = (1<<addrw) / self.ddepth
+ self.adr = Signal(addrw)
+ self.dat_r = Signal(regwid)
+ self.dat_w = Signal(regwid)
+ self.we = Signal()
+ self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.rdport = rdport = self.mem.read_port()
+ m.submodules.wrport = wrport = self.mem.write_port()
+ m.d.comb += [
+ rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
+ self.dat_r.eq(rdport.data),
+ wrport.addr.eq(self.adr),
+ wrport.data.eq(self.dat_w),
+ wrport.en.eq(self.we),
+ ]
+ return m
+class MemSim:
+ def __init__(self, regwid, addrw):
+ self.regwid = regwid
+ self.ddepth = regwid//8
+ depth = (1<<addrw) // self.ddepth
+ self.mem = list(range(0, depth))
+ def ld(self, addr):
+ return self.mem[addr>>self.ddepth]
+ def st(self, addr, data):
+ self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
+class MemFunctionUnits(Elaboratable):
+ def __init__(self, n_int_alus):
+ self.n_int_alus = n_int_alus
+ self.ld_i = Signal(n_int_alus, reset_less=True) # Dest R# in
+ self.st_i = Signal(n_int_alus, reset_less=True) # oper1 R# in
+ self.load_hit_i = Signal(n_int_alus, reset_less=True) # Load Hit
+ self.stwd_hit_i = Signal(n_int_alus, reset_less=True) # Store Hit
+ #self.g_int_st_pend_o = Signal(n_int_alus, reset_less=True)
+ #self.g_int_ld_pend_o = Signal(n_int_alus, reset_less=True)
+ #self.ld_rsel_o = Signal(n_int_alus, reset_less=True) # dest reg (bot)
+ #self.st_rsel_o = Signal(n_int_alus, reset_less=True) # src1 reg (bot)
+ self.req_rel_i = Signal(n_int_alus, reset_less = True)
+ self.loadable_o = Signal(n_int_alus, reset_less=True)
+ self.storable_o = Signal(n_int_alus, reset_less=True)
+ self.go_st_i = Signal(n_int_alus, reset_less=True)
+ self.go_ld_i = Signal(n_int_alus, reset_less=True)
+ self.go_die_i = Signal(n_int_alus, reset_less=True)
+ self.req_rel_o = Signal(n_int_alus, reset_less=True)
+ self.fn_issue_i = Signal(n_int_alus, reset_less=True)
+ # Note: FURegs ld_pend_o is also outputted from here, for use in WaWGrid
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ sync = m.d.sync
+ n_intfus = self.n_int_alus
+ # Integer LD/ST Dep Matrix
+ ldstdeps = LDSTDepMatrix(n_intfus)
+ m.submodules.ldstdeps = ldstdeps
+ # Integer FU-Mem Dep Matrix
+ fumemdeps = FUMemDepMatrix(n_intfus, n_intfus)
+ m.submodules.fumemdeps = fumemdeps
+ #comb += self.g_int_st_pend_o.eq(fumemdeps.v_st_rsel_o)
+ #comb += self.g_int_ld_pend_o.eq(fumemdeps.v_ld_rsel_o)
+ #comb += fumemdeps.st_pend_i.eq(fumemdeps.v_st_rsel_o)
+ #comb += fumemdeps.ld_pend_i.eq(fumemdeps.v_ld_rsel_o)
+ #comb += ldstdeps.st_pend_i.eq(fumemdeps.st_pend_o)
+ #comb += ldstdeps.ld_pend_i.eq(fumemdeps.ld_pend_o)
+ #self.ld_pend_o = fumemdeps.ld_pend_o # also output for use in WaWGrid
+ comb += ldstdeps.ld_pend_i.eq(self.ld_i)
+ comb += ldstdeps.st_pend_i.eq(self.st_i)
+ comb += ldstdeps.issue_i.eq(self.fn_issue_i)
+ comb += ldstdeps.load_hit_i.eq(self.load_hit_i)
+ comb += ldstdeps.stwd_hit_i.eq(self.stwd_hit_i)
+ comb += ldstdeps.go_die_i.eq(self.go_die_i)
+ comb += self.storable_o.eq(fumemdeps.storable_o)
+ comb += self.loadable_o.eq(fumemdeps.loadable_o)
+ comb += fumemdeps.ld_pend_i.eq(ldstdeps.ld_hold_st_o)
+ comb += fumemdeps.st_pend_i.eq(ldstdeps.st_hold_ld_o)
+ # Connect function issue / arrays, and dest/src1/src2
+ comb += fumemdeps.go_st_i.eq(self.stwd_hit_i)
+ comb += fumemdeps.go_ld_i.eq(self.load_hit_i)
+ comb += fumemdeps.go_die_i.eq(self.go_die_i)
+ comb += fumemdeps.issue_i.eq(self.fn_issue_i)
+ #comb += self.ld_rsel_o.eq(fumemdeps.ld_rsel_o)
+ #comb += self.st_rsel_o.eq(fumemdeps.st_rsel_o)
+ return m
+ def __iter__(self):
+ yield self.ld_i
+ yield self.st_i
+ #yield self.g_int_st_pend_o
+ #yield self.g_int_ld_pend_o
+ #yield self.ld_rsel_o
+ #yield self.st_rsel_o
+ yield self.req_rel_i
+ yield self.loadable_o
+ yield self.storable_o
+ yield self.load_hit_i
+ yield self.stwd_hit_i
+ yield self.go_st_i
+ yield self.go_ld_i
+ yield self.go_die_i
+ yield self.req_rel_o
+ yield self.fn_issue_i
+ def ports(self):
+ return list(self)
+class Scoreboard(Elaboratable):
+ def __init__(self, rwid, n_regs):
+ """ Inputs:
+ * :rwid: bit width of register file(s) - both FP and INT
+ * :n_regs: depth of register file(s) - number of FP and INT regs
+ """
+ self.rwid = rwid
+ self.n_regs = n_regs
+ # Register Files
+ self.intregs = RegFileArray(rwid, n_regs)
+ self.fpregs = RegFileArray(rwid, n_regs)
+ # issue q needs to get at these
+ self.aluissue = IssueUnitGroup(4)
+ self.brissue = IssueUnitGroup(1)
+ # and these
+ self.alu_oper_i = Signal(4, reset_less=True)
+ self.alu_imm_i = Signal(rwid, reset_less=True)
+ self.br_oper_i = Signal(4, reset_less=True)
+ self.br_imm_i = Signal(rwid, reset_less=True)
+ # inputs
+ self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
+ self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
+ self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
+ self.reg_enable_i = Signal(reset_less=True) # enable reg decode
+ # outputs
+ self.issue_o = Signal(reset_less=True) # instruction was accepted
+ self.busy_o = Signal(reset_less=True) # at least one CU is busy
+ # for branch speculation experiment. branch_direction = 0 if
+ # the branch hasn't been met yet. 1 indicates "success", 2 is "fail"
+ # branch_succ and branch_fail are requests to have the current
+ # instruction be dependent on the branch unit "shadow" capability.
+ self.branch_succ_i = Signal(reset_less=True)
+ self.branch_fail_i = Signal(reset_less=True)
+ self.branch_direction_o = Signal(2, reset_less=True)
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ sync = m.d.sync
+ m.submodules.intregs = self.intregs
+ m.submodules.fpregs = self.fpregs
+ # register ports
+ int_dest = self.intregs.write_port("dest")
+ int_src1 = self.intregs.read_port("src1")
+ int_src2 = self.intregs.read_port("src2")
+ fp_dest = self.fpregs.write_port("dest")
+ fp_src1 = self.fpregs.read_port("src1")
+ fp_src2 = self.fpregs.read_port("src2")
+ # Int ALUs and Comp Units
+ n_int_alus = 5
+ cua = CompUnitALUs(self.rwid, 3)
+ cub = CompUnitBR(self.rwid, 3)
+ m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub])
+ bgt = cub.bgt # get at the branch computation unit
+ br1 = cub.br1
+ # Int FUs
+ m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
+ # Count of number of FUs
+ n_intfus = n_int_alus
+ n_fp_fus = 0 # for now
+ # Integer Priority Picker 1: Adder + Subtractor
+ intpick1 = GroupPicker(n_intfus) # picks between add, sub, mul and shf
+ m.submodules.intpick1 = intpick1
+ # INT/FP Issue Unit
+ regdecode = RegDecode(self.n_regs)
+ m.submodules.regdecode = regdecode
+ issueunit = IssueUnitArray([self.aluissue, self.brissue])
+ m.submodules.issueunit = issueunit
+ # Shadow Matrix. currently n_intfus shadows, to be used for
+ # write-after-write hazards. NOTE: there is one extra for branches,
+ # so the shadow width is increased by 1
+ m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
+ m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
+ # record previous instruction to cast shadow on current instruction
+ prev_shadow = Signal(n_intfus)
+ # Branch Speculation recorder. tracks the success/fail state as
+ # each instruction is issued, so that when the branch occurs the
+ # allow/cancel can be issued as appropriate.
+ m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
+ #---------
+ # ok start wiring things together...
+ # "now hear de word of de looord... dem bones dem bones dem dryy bones"
+ # https://www.youtube.com/watch?v=pYb8Wm6-QfA
+ #---------
+ #---------
+ # Issue Unit is where it starts. set up some in/outs for this module
+ #---------
+ comb += [ regdecode.dest_i.eq(self.int_dest_i),
+ regdecode.src1_i.eq(self.int_src1_i),
+ regdecode.src2_i.eq(self.int_src2_i),
+ regdecode.enable_i.eq(self.reg_enable_i),
+ self.issue_o.eq(issueunit.issue_o)
+ ]
+ # take these to outside (issue needs them)
+ comb += cua.oper_i.eq(self.alu_oper_i)
+ comb += cua.imm_i.eq(self.alu_imm_i)
+ comb += cub.oper_i.eq(self.br_oper_i)
+ comb += cub.imm_i.eq(self.br_imm_i)
+ # TODO: issueunit.f (FP)
+ # and int function issue / busy arrays, and dest/src1/src2
+ comb += intfus.dest_i.eq(regdecode.dest_o)
+ comb += intfus.src1_i.eq(regdecode.src1_o)
+ comb += intfus.src2_i.eq(regdecode.src2_o)
+ fn_issue_o = issueunit.fn_issue_o
+ comb += intfus.fn_issue_i.eq(fn_issue_o)
+ comb += issueunit.busy_i.eq(cu.busy_o)
+ comb += self.busy_o.eq(cu.busy_o.bool())
+ #---------
+ # merge shadow matrices outputs
+ #---------
+ # these are explained in ShadowMatrix docstring, and are to be
+ # connected to the FUReg and FUFU Matrices, to get them to reset
+ anydie = Signal(n_intfus, reset_less=True)
+ allshadown = Signal(n_intfus, reset_less=True)
+ shreset = Signal(n_intfus, reset_less=True)
+ comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
+ comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
+ comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
+ #---------
+ # connect fu-fu matrix
+ #---------
+ # Group Picker... done manually for now.
+ go_rd_o = intpick1.go_rd_o
+ go_wr_o = intpick1.go_wr_o
+ go_rd_i = intfus.go_rd_i
+ go_wr_i = intfus.go_wr_i
+ go_die_i = intfus.go_die_i
+ # NOTE: connect to the shadowed versions so that they can "die" (reset)
+ comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
+ comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
+ comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
+ # Connect Picker
+ #---------
+ comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
+ comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
+ int_rd_o = intfus.readable_o
+ int_wr_o = intfus.writable_o
+ comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
+ comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
+ #---------
+ # Shadow Matrix
+ #---------
+ comb += shadows.issue_i.eq(fn_issue_o)
+ #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
+ comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
+ #---------
+ # NOTE; this setup is for the instruction order preservation...
+ # connect shadows / go_dies to Computation Units
+ comb += cu.shadown_i[0:n_intfus].eq(allshadown)
+ comb += cu.go_die_i[0:n_intfus].eq(anydie)
+ # ok connect first n_int_fu shadows to busy lines, to create an
+ # instruction-order linked-list-like arrangement, using a bit-matrix
+ # (instead of e.g. a ring buffer).
+ # when written, the shadow can be cancelled (and was good)
+ for i in range(n_intfus):
+ comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
+ # *previous* instruction shadows *current* instruction, and, obviously,
+ # if the previous is completed (!busy) don't cast the shadow!
+ comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
+ for i in range(n_intfus):
+ comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
+ #---------
+ # ... and this is for branch speculation. it uses the extra bit
+ # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
+ # only needs to set shadow_i, s_fail_i and s_good_i
+ # issue captures shadow_i (if enabled)
+ comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
+ bactive = Signal(reset_less=True)
+ comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
+ # instruction being issued (fn_issue_o) has a shadow cast by the branch
+ with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
+ comb += bshadow.issue_i.eq(fn_issue_o)
+ for i in range(n_intfus):
+ with m.If(fn_issue_o & (Const(1<<i))):
+ comb += bshadow.shadow_i[i][0].eq(1)
+ # finally, we need an indicator to the test infrastructure as to
+ # whether the branch succeeded or failed, plus, link up to the
+ # "recorder" of whether the instruction was under shadow or not
+ with m.If(br1.issue_i):
+ sync += bspec.active_i.eq(1)
+ with m.If(self.branch_succ_i):
+ comb += bspec.good_i.eq(fn_issue_o & 0x1f)
+ with m.If(self.branch_fail_i):
+ comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
+ # branch is active (TODO: a better signal: this is over-using the
+ # go_write signal - actually the branch should not be "writing")
+ with m.If(br1.go_wr_i):
+ sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
+ sync += bspec.active_i.eq(0)
+ comb += bspec.br_i.eq(1)
+ # branch occurs if data == 1, failed if data == 0
+ comb += bspec.br_ok_i.eq(br1.data_o == 1)
+ for i in range(n_intfus):
+ # *expected* direction of the branch matched against *actual*
+ comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
+ # ... or it didn't
+ comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
+ #---------
+ # Connect Register File(s)
+ #---------
+ comb += int_dest.wen.eq(intfus.dest_rsel_o)
+ comb += int_src1.ren.eq(intfus.src1_rsel_o)
+ comb += int_src2.ren.eq(intfus.src2_rsel_o)
+ # connect ALUs to regfule
+ comb += int_dest.data_i.eq(cu.data_o)
+ comb += cu.src1_i.eq(int_src1.data_o)
+ comb += cu.src2_i.eq(int_src2.data_o)
+ # connect ALU Computation Units
+ comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
+ comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
+ comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
+ return m
+ def __iter__(self):
+ yield from self.intregs
+ yield from self.fpregs
+ yield self.int_dest_i
+ yield self.int_src1_i
+ yield self.int_src2_i
+ yield self.issue_o
+ yield self.branch_succ_i
+ yield self.branch_fail_i
+ yield self.branch_direction_o
+ def ports(self):
+ return list(self)
+def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
+ yield from disable_issue(dut)
+ yield dut.int_dest_i.eq(dest)
+ yield dut.int_src1_i.eq(src1)
+ yield dut.int_src2_i.eq(src2)
+ if (op & (0x3<<2)) != 0: # branch
+ yield dut.brissue.insn_i.eq(1)
+ yield dut.br_oper_i.eq(Const(op & 0x3, 2))
+ yield dut.br_imm_i.eq(imm)
+ dut_issue = dut.brissue
+ else:
+ yield dut.aluissue.insn_i.eq(1)
+ yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
+ yield dut.alu_imm_i.eq(imm)
+ dut_issue = dut.aluissue
+ yield dut.reg_enable_i.eq(1)
+ # these indicate that the instruction is to be made shadow-dependent on
+ # (either) branch success or branch fail
+ yield dut.branch_fail_i.eq(branch_fail)
+ yield dut.branch_succ_i.eq(branch_success)
+ yield
+ yield from wait_for_issue(dut, dut_issue)
+def print_reg(dut, rnums):
+ rs = []
+ for rnum in rnums:
+ reg = yield dut.intregs.regs[rnum].reg
+ rs.append("%x" % reg)
+ rnums = map(str, rnums)
+ print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
+def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
+ insts = []
+ for i in range(n_ops):
+ src1 = randint(1, dut.n_regs-1)
+ src2 = randint(1, dut.n_regs-1)
+ imm = randint(1, (1<<dut.rwid)-1)
+ dest = randint(1, dut.n_regs-1)
+ op = randint(0, max_opnums)
+ opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
+ if shadowing:
+ insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
+ else:
+ insts.append((src1, src2, dest, op, opi, imm))
+ return insts
+def scoreboard_sim(dut, alusim):
+ seed(0)
+ for i in range(50):
+ # set random values in the registers
+ for i in range(1, dut.n_regs):
+ val = randint(0, (1<<alusim.rwidth)-1)
+ #val = 31+i*3
+ #val = i
+ yield dut.intregs.regs[i].reg.eq(val)
+ alusim.setval(i, val)
+ # create some instructions (some random, some regression tests)
+ instrs = []
+ if True:
+ instrs = create_random_ops(dut, 15, True, 4)
+ if False:
+ instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
+ if False:
+ instrs.append( (7, 3, 2, 4, (0, 0)) )
+ instrs.append( (7, 6, 6, 2, (0, 0)) )
+ instrs.append( (1, 7, 2, 2, (0, 0)) )
+ if False:
+ instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
+ instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
+ instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
+ instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
+ instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
+ if False:
+ instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
+ instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
+ instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
+ if False:
+ instrs.append((5, 6, 2, 1))
+ instrs.append((2, 2, 4, 0))
+ #instrs.append((2, 2, 3, 1))
+ if False:
+ instrs.append((2, 1, 2, 3))
+ if False:
+ instrs.append((2, 6, 2, 1))
+ instrs.append((2, 1, 2, 0))
+ if False:
+ instrs.append((1, 2, 7, 2))
+ instrs.append((7, 1, 5, 0))
+ instrs.append((4, 4, 1, 1))
+ if False:
+ instrs.append((5, 6, 2, 2))
+ instrs.append((1, 1, 4, 1))
+ instrs.append((6, 5, 3, 0))
+ if False:
+ # Write-after-Write Hazard
+ instrs.append( (3, 6, 7, 2) )
+ instrs.append( (4, 4, 7, 1) )
+ if False:
+ # self-read/write-after-write followed by Read-after-Write
+ instrs.append((1, 1, 1, 1))
+ instrs.append((1, 5, 3, 0))
+ if False:
+ # Read-after-Write followed by self-read-after-write
+ instrs.append((5, 6, 1, 2))
+ instrs.append((1, 1, 1, 1))
+ if False:
+ # self-read-write sandwich
+ instrs.append((5, 6, 1, 2))
+ instrs.append((1, 1, 1, 1))
+ instrs.append((1, 5, 3, 0))
+ if False:
+ # very weird failure
+ instrs.append( (5, 2, 5, 2) )
+ instrs.append( (2, 6, 3, 0) )
+ instrs.append( (4, 2, 2, 1) )
+ if False:
+ v1 = 4
+ yield dut.intregs.regs[5].reg.eq(v1)
+ alusim.setval(5, v1)
+ yield dut.intregs.regs[3].reg.eq(5)
+ alusim.setval(3, 5)
+ instrs.append((5, 3, 3, 4, (0, 0)))
+ instrs.append((4, 2, 1, 2, (0, 1)))
+ if False:
+ v1 = 6
+ yield dut.intregs.regs[5].reg.eq(v1)
+ alusim.setval(5, v1)
+ yield dut.intregs.regs[3].reg.eq(5)
+ alusim.setval(3, 5)
+ instrs.append((5, 3, 3, 4, (0, 0)))
+ instrs.append((4, 2, 1, 2, (1, 0)))
+ if False:
+ instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
+ instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
+ instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
+ instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
+ instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
+ instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
+ instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
+ instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
+ instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
+ # issue instruction(s), wait for issue to be free before proceeding
+ for i, instr in enumerate(instrs):
+ src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
+ print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
+ (i, src1, src2, dest, op, opi, imm))
+ alusim.op(op, opi, imm, src1, src2, dest)
+ yield from instr_q(dut, op, opi, imm, src1, src2, dest,
+ br_ok, br_fail)
+ # wait for all instructions to stop before checking
+ while True:
+ iqlen = yield dut.qlen_o
+ if iqlen == 0:
+ break
+ yield
+ yield
+ yield
+ yield
+ yield
+ yield from wait_for_busy_clear(dut)
+ # check status
+ yield from alusim.check(dut)
+ yield from alusim.dump(dut)
+def test_scoreboard():
+ dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
+ alusim = RegSim(16, 8)
+ memsim = MemSim(16, 16)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_scoreboard6600.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, scoreboard_sim(dut, alusim),
+ vcd_name='test_scoreboard6600.vcd')
+ #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
+ # vcd_name='test_scoreboard6600.vcd')
+def mem_sim(dut):
+ yield dut.ld_i.eq(0x1)
+ yield dut.fn_issue_i.eq(0x1)
+ yield
+ #yield dut.ld_i.eq(0x0)
+ yield dut.st_i.eq(0x2)
+ yield dut.fn_issue_i.eq(0x2)
+ yield
+ #yield dut.st_i.eq(0x0)
+ yield dut.fn_issue_i.eq(0x0)
+ yield
+ yield dut.load_hit_i.eq(0x1)
+ yield
+ yield dut.load_hit_i.eq(0x0)
+ yield
+ yield dut.stwd_hit_i.eq(0x2)
+ yield
+ yield dut.stwd_hit_i.eq(0x0)
+ yield
+def test_mem_fus():
+ dut = MemFunctionUnits(3)
+ vl = rtlil.convert(dut, ports=dut.ports())
+ with open("test_mem_fus.il", "w") as f:
+ f.write(vl)
+ run_simulation(dut, mem_sim(dut),
+ vcd_name='test_mem_fus.vcd')
+if __name__ == '__main__':
+ test_mem_fus()