From: Luke Kenneth Casson Leighton Date: Tue, 7 May 2019 05:40:15 +0000 (+0100) Subject: move main python code to src directory X-Git-Tag: div_pipeline~2136 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=69a96bad42316114b5611ee65585e62ef4ca541a;p=soc.git move main python code to src directory --- diff --git a/TLB/.gitignore b/TLB/.gitignore deleted file mode 100644 index 3324664b..00000000 --- a/TLB/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.wpr -__pycache__ diff --git a/TLB/src/AddressEncoder.py b/TLB/src/AddressEncoder.py deleted file mode 100644 index 4c4b8d76..00000000 --- a/TLB/src/AddressEncoder.py +++ /dev/null @@ -1,75 +0,0 @@ -from nmigen import Module, Signal -from nmigen.lib.coding import Encoder, PriorityEncoder - -class AddressEncoder(): - """Address Encoder - - The purpose of this module is to take in a vector and - encode the bits that are one hot into an address. This module - combines both nmigen's Encoder and PriorityEncoder and will state - whether the input line has a single bit hot, multiple bits hot, - or no bits hot. The output line will always have the lowest value - address output. - - Usage: - The output is valid when either single or multiple match is high. - Otherwise output is 0. - """ - def __init__(self, width): - """ Arguments: - * width: The desired length of the input vector - """ - # Internal - self.encoder = Encoder(width) - self.p_encoder = PriorityEncoder(width) - - # Input - self.i = Signal(width) - - # Output - self.single_match = Signal(1) - self.multiple_match = Signal(1) - self.o = Signal(max=width) - - def elaborate(self, platform=None): - m = Module() - - # Add internal submodules - m.submodules.encoder = self.encoder - m.submodules.p_encoder = self.p_encoder - - m.d.comb += [ - self.encoder.i.eq(self.i), - self.p_encoder.i.eq(self.i) - ] - - # Steps: - # 1. check if the input vector is non-zero - # 2. if non-zero, check if single match or multiple match - # 3. set output line to be lowest value address output - - # If the priority encoder recieves an input of 0 - # If n is 1 then the output is not valid - with m.If(self.p_encoder.n): - m.d.comb += [ - self.single_match.eq(0), - self.multiple_match.eq(0), - self.o.eq(0) - ] - # If the priority encoder recieves an input > 0 - with m.Else(): - # Multiple Match if encoder n is invalid - with m.If(self.encoder.n): - m.d.comb += [ - self.single_match.eq(0), - self.multiple_match.eq(1) - ] - # Single Match if encoder n is valid - with m.Else(): - m.d.comb += [ - self.single_match.eq(1), - self.multiple_match.eq(0) - ] - # Always set output based on priority encoder output - m.d.comb += self.o.eq(self.p_encoder.o) - return m diff --git a/TLB/src/Cam.py b/TLB/src/Cam.py deleted file mode 100644 index 3c499211..00000000 --- a/TLB/src/Cam.py +++ /dev/null @@ -1,124 +0,0 @@ -from nmigen import Array, Cat, Module, Signal -from nmigen.lib.coding import Decoder -from nmigen.cli import main #, verilog - -from CamEntry import CamEntry -from AddressEncoder import AddressEncoder - -class Cam(): - """ Content Addressable Memory (CAM) - - The purpose of this module is to quickly look up whether an - entry exists given a data key. - This module will search for the given data in all internal entries - and output whether a single or multiple match was found. - If an single entry is found the address be returned and single_match - is set HIGH. If multiple entries are found the lowest address is - returned and multiple_match is set HIGH. If neither single_match or - multiple_match are HIGH this implies no match was found. To write - to the CAM set the address bus to the desired entry and set write_enable - HIGH. Entry managment should be performed one level above this block - as lookup is performed within. - - Notes: - The read and write operations take one clock cycle to complete. - Currently the read_warning line is present for interfacing but - is not necessary for this design. This module is capable of writing - in the first cycle, reading on the second, and output the correct - address on the third. - """ - - def __init__(self, data_size, cam_size): - """ Arguments: - * data_size: (bits) The bit size of the data - * cam_size: (number) The number of entries in the CAM - """ - - # Internal - self.cam_size = cam_size - self.encoder = AddressEncoder(cam_size) - self.decoder = Decoder(cam_size) - self.entry_array = Array(CamEntry(data_size) for x in range(cam_size)) - - # Input - self.enable = Signal(1) - self.write_enable = Signal(1) - self.data_in = Signal(data_size) # The data to be written - self.data_mask = Signal(data_size) # mask for ternary writes - self.address_in = Signal(max=cam_size) # address of CAM Entry to write - - # Output - self.read_warning = Signal(1) # High when a read interrupts a write - self.single_match = Signal(1) # High when there is only one match - self.multiple_match = Signal(1) # High when there at least two matches - self.match_address = Signal(max=cam_size) # The lowest address matched - - def elaborate(self, platform=None): - m = Module() - # AddressEncoder for match types and output address - m.submodules.AddressEncoder = self.encoder - # Decoder is used to select which entry will be written to - m.submodules.Decoder = self.decoder - # CamEntry Array Submodules - # Note these area added anonymously - entry_array = self.entry_array - m.submodules += entry_array - - # Decoder logic - m.d.comb += [ - self.decoder.i.eq(self.address_in), - self.decoder.n.eq(0) - ] - - encoder_vector = [] - with m.If(self.enable): - # Set the key value for every CamEntry - for index in range(self.cam_size): - - # Write Operation - with m.If(self.write_enable): - with m.If(self.decoder.o[index]): - m.d.comb += entry_array[index].command.eq(2) - with m.Else(): - m.d.comb += entry_array[index].command.eq(0) - - # Read Operation - with m.Else(): - m.d.comb += entry_array[index].command.eq(1) - - # Send data input to all entries - m.d.comb += entry_array[index].data_in.eq(self.data_in) - # Send all entry matches to encoder - ematch = entry_array[index].match - encoder_vector.append(ematch) - - # Give input to and accept output from encoder module - m.d.comb += [ - self.encoder.i.eq(Cat(*encoder_vector)), - self.single_match.eq(self.encoder.single_match), - self.multiple_match.eq(self.encoder.multiple_match), - self.match_address.eq(self.encoder.o) - ] - - # If the CAM is not enabled set all outputs to 0 - with m.Else(): - m.d.comb += [ - self.read_warning.eq(0), - self.single_match.eq(0), - self.multiple_match.eq(0), - self.match_address.eq(0) - ] - - return m - - def ports(self): - return [self.enable, self.write_enable, - self.data_in, self.data_mask, - self.read_warning, self.single_match, - self.multiple_match, self.match_address] - - -if __name__ == '__main__': - cam = Cam(4, 4) - main(cam, ports=cam.ports()) - diff --git a/TLB/src/CamEntry.py b/TLB/src/CamEntry.py deleted file mode 100644 index 73081ce5..00000000 --- a/TLB/src/CamEntry.py +++ /dev/null @@ -1,45 +0,0 @@ -from nmigen import Module, Signal - -class CamEntry: - """ Content Addressable Memory (CAM) Entry - - The purpose of this module is to represent an entry within a CAM. - This module when given a read command will compare the given data - and output whether a match was found or not. When given a write - command it will write the given data into internal registers. - """ - - def __init__(self, data_size): - """ Arguments: - * data_size: (bit count) The size of the data - """ - # Input - self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset - self.data_in = Signal(data_size) # Data input when writing - - # Output - self.match = Signal(1) # Result of the internal/input key comparison - self.data = Signal(data_size) - - def elaborate(self, platform=None): - m = Module() - with m.Switch(self.command): - with m.Case("00"): - m.d.sync += self.match.eq(0) - with m.Case("01"): - with m.If(self.data == self.data_in): - m.d.sync += self.match.eq(1) - with m.Else(): - m.d.sync += self.match.eq(0) - with m.Case("10"): - m.d.sync += [ - self.data.eq(self.data_in), - self.match.eq(0) - ] - with m.Case(): - m.d.sync += [ - self.match.eq(0), - self.data.eq(0) - ] - - return m diff --git a/TLB/src/LFSR.py b/TLB/src/LFSR.py deleted file mode 100644 index d8b606ec..00000000 --- a/TLB/src/LFSR.py +++ /dev/null @@ -1,109 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -# See Notices.txt for copyright information -from nmigen import Signal, Module, Const, Cat, Elaboratable -from nmigen.cli import verilog, rtlil - - -class LFSRPolynomial(set): - """ implements a polynomial for use in LFSR - """ - def __init__(self, exponents=()): - for e in exponents: - assert isinstance(e, int), TypeError("%s must be an int" % repr(e)) - assert (e >= 0), ValueError("%d must not be negative" % e) - set.__init__(self, set(exponents).union({0})) # must contain zero - - @property - def max_exponent(self): - return max(self) # derived from set, so this returns the max exponent - - @property - def exponents(self): - exponents = list(self) # get elements of set as a list - exponents.sort(reverse=True) - return exponents - - def __str__(self): - expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2) - retval = map(lambda i: expd[min(i,2)].format(i), self.exponents) - return " + ".join(retval) - - def __repr__(self): - return "LFSRPolynomial(%s)" % self.exponents - - -# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs # noqa -LFSR_POLY_2 = LFSRPolynomial([2, 1, 0]) -LFSR_POLY_3 = LFSRPolynomial([3, 2, 0]) -LFSR_POLY_4 = LFSRPolynomial([4, 3, 0]) -LFSR_POLY_5 = LFSRPolynomial([5, 3, 0]) -LFSR_POLY_6 = LFSRPolynomial([6, 5, 0]) -LFSR_POLY_7 = LFSRPolynomial([7, 6, 0]) -LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0]) -LFSR_POLY_9 = LFSRPolynomial([9, 5, 0]) -LFSR_POLY_10 = LFSRPolynomial([10, 7, 0]) -LFSR_POLY_11 = LFSRPolynomial([11, 9, 0]) -LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0]) -LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0]) -LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0]) -LFSR_POLY_15 = LFSRPolynomial([15, 14, 0]) -LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0]) -LFSR_POLY_17 = LFSRPolynomial([17, 14, 0]) -LFSR_POLY_18 = LFSRPolynomial([18, 11, 0]) -LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0]) -LFSR_POLY_20 = LFSRPolynomial([20, 17, 0]) -LFSR_POLY_21 = LFSRPolynomial([21, 19, 0]) -LFSR_POLY_22 = LFSRPolynomial([22, 21, 0]) -LFSR_POLY_23 = LFSRPolynomial([23, 18, 0]) -LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0]) - - -class LFSR(LFSRPolynomial, Elaboratable): - """ implements a Linear Feedback Shift Register - """ - def __init__(self, polynomial): - """ Inputs: - ------ - :polynomial: the polynomial to feedback on. may be a LFSRPolynomial - instance or an iterable of ints (list/tuple/generator) - :enable: enable (set LO to disable. NOTE: defaults to HI) - - Outputs: - ------- - :state: the LFSR state. bitwidth is taken from the polynomial - maximum exponent. - - Note: if an LFSRPolynomial is passed in as the input, because - LFSRPolynomial is derived from set() it's ok: - LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p) - """ - LFSRPolynomial.__init__(self, polynomial) - self.state = Signal(self.max_exponent, reset=1) - self.enable = Signal(reset=1) - - def elaborate(self, platform): - m = Module() - # do absolutely nothing if the polynomial is empty (always has a zero) - if self.max_exponent <= 1: - return m - - # create XOR-bunch, select bits from state based on exponent - feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain) - for exponent in self: - if exponent > 0: # don't have to skip, saves CPU cycles though - feedback ^= self.state[exponent - 1] - - # if enabled, shift-and-feedback - with m.If(self.enable): - # shift up lower bits by Cat'ing in a new bit zero (feedback) - newstate = Cat(feedback, self.state[:-1]) - m.d.sync += self.state.eq(newstate) - - return m - - -# example: Poly24 -if __name__ == '__main__': - p24 = rtlil.convert(LFSR(LFSR_POLY_24)) - with open("lfsr2_p24.il", "w") as f: - f.write(p24) diff --git a/TLB/src/LFSR.pyi b/TLB/src/LFSR.pyi deleted file mode 100644 index 64eb9115..00000000 --- a/TLB/src/LFSR.pyi +++ /dev/null @@ -1,23 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -# See Notices.txt for copyright information -from nmigen import Module -from typing import Iterable, Optional, Iterator, Any, Union -from typing_extensions import final - - -@final -class LFSRPolynomial(set): - def __init__(self, exponents: Iterable[int] = ()): - def elements() -> Iterable[int]: ... - @property - def exponents(self) -> list[int]: ... - def __str__(self) -> str: ... - def __repr__(self) -> str: ... - - -@final -class LFSR: - def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ... - @property - def width(self) -> int: ... - def elaborate(self, platform: Any) -> Module: ... diff --git a/TLB/src/Makefile b/TLB/src/Makefile deleted file mode 100644 index 1eb67acc..00000000 --- a/TLB/src/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -verilog: - python3 Cam.py generate -t v > Cam.v diff --git a/TLB/src/MemorySet.py b/TLB/src/MemorySet.py deleted file mode 100644 index ea61bdf5..00000000 --- a/TLB/src/MemorySet.py +++ /dev/null @@ -1,66 +0,0 @@ -from nmigen import Cat, Memory, Module, Signal, Elaboratable -from nmigen.cli import main -from nmigen.cli import verilog, rtlil - - -class MemorySet(Elaboratable): - def __init__(self, data_size, tag_size, set_count, active): - self.active = active - input_size = tag_size + data_size # Size of the input data - memory_width = input_size + 1 # The width of the cache memory - self.active = active - self.data_size = data_size - self.tag_size = tag_size - - # XXX TODO, use rd-enable and wr-enable? - self.mem = Memory(memory_width, set_count) - self.r = self.mem.read_port() - self.w = self.mem.write_port() - - # inputs (address) - self.cset = Signal(max=set_count) # The set to be checked - self.tag = Signal(tag_size) # The tag to find - self.data_i = Signal(data_size) # Incoming data - - # outputs - self.valid = Signal() - self.data_o = Signal(data_size) # Outgoing data (excludes tag) - - def elaborate(self, platform): - m = Module() - m.submodules.mem = self.mem - m.submodules.r = self.r - m.submodules.w = self.w - - # temporaries - active_bit = Signal() - tag_valid = Signal() - data_start = self.active + 1 - data_end = data_start + self.data_size - tag_start = data_end - tag_end = tag_start + self.tag_size - - # connect the read port address to the set/entry - read_port = self.r - m.d.comb += read_port.addr.eq(self.cset) - # Pull out active bit from data - data = read_port.data - m.d.comb += active_bit.eq(data[self.active]) - # Validate given tag vs stored tag - tag = data[tag_start:tag_end] - m.d.comb += tag_valid.eq(self.tag == tag) - # An entry is only valid if the tags match AND - # is marked as a valid entry - m.d.comb += self.valid.eq(tag_valid & active_bit) - - # output data: TODO, check rd-enable? - m.d.comb += self.data_o.eq(data[data_start:data_end]) - - # connect the write port addr to the set/entry (only if write enabled) - # (which is only done on a match, see SAC.write_entry below) - write_port = self.w - with m.If(write_port.en): - m.d.comb += write_port.addr.eq(self.cset) - m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag)) - - return m diff --git a/TLB/src/PermissionValidator.py b/TLB/src/PermissionValidator.py deleted file mode 100644 index 14f01e42..00000000 --- a/TLB/src/PermissionValidator.py +++ /dev/null @@ -1,67 +0,0 @@ -from nmigen import Module, Signal -from nmigen.cli import main - -from PteEntry import PteEntry - -class PermissionValidator(): - """ The purpose of this Module is to check the Permissions of a given PTE - against the requested access permissions. - - This module will either validate (by setting the valid bit HIGH) - the request or find a permission fault and invalidate (by setting - the valid bit LOW) the request - """ - - def __init__(self, asid_size, pte_size): - """ Arguments: - * asid_size: (bit count) The size of the asid to be processed - * pte_size: (bit count) The size of the pte to be processed - - Return: - * valid HIGH when permissions are correct - """ - # Internal - self.pte_entry = PteEntry(asid_size, pte_size) - - # Input - self.data = Signal(asid_size + pte_size); - self.xwr = Signal(3) # Execute, Write, Read - self.super_mode = Signal(1) # Supervisor Mode - self.super_access = Signal(1) # Supervisor Access - self.asid = Signal(15) # Address Space IDentifier (ASID) - - # Output - self.valid = Signal(1) # Denotes if the permissions are correct - - def elaborate(self, platform=None): - m = Module() - - m.submodules.pte_entry = self.pte_entry - - m.d.comb += self.pte_entry.i.eq(self.data) - - # Check if the entry is valid - with m.If(self.pte_entry.v): - # ASID match or Global Permission - # Note that the MSB bound is exclusive - with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g): - # Check Execute, Write, Read (XWR) Permissions - with m.If(self.pte_entry.xwr == self.xwr): - # Supervisor Logic - with m.If(self.super_mode): - # Valid if entry is not in user mode or supervisor - # has Supervisor User Memory (SUM) access via the - # SUM bit in the sstatus register - m.d.comb += self.valid.eq((~self.pte_entry.u) \ - | self.super_access) - # User logic - with m.Else(): - # Valid if the entry is in user mode only - m.d.comb += self.valid.eq(self.pte_entry.u) - with m.Else(): - m.d.comb += self.valid.eq(0) - with m.Else(): - m.d.comb += self.valid.eq(0) - with m.Else(): - m.d.comb += self.valid.eq(0) - return m \ No newline at end of file diff --git a/TLB/src/PteEntry.py b/TLB/src/PteEntry.py deleted file mode 100644 index c0705457..00000000 --- a/TLB/src/PteEntry.py +++ /dev/null @@ -1,66 +0,0 @@ -from nmigen import Module, Signal -from nmigen.cli import main - -class PteEntry(): - """ The purpose of this Module is to centralize the parsing of Page - Table Entries (PTE) into one module to prevent common mistakes - and duplication of code. The control bits are parsed out for - ease of use. - - This module parses according to the standard PTE given by the - Volume II: RISC-V Privileged Architectures V1.10 Pg 60. - The Address Space IDentifier (ASID) is appended to the MSB of the input - and is parsed out as such. - - An valid input Signal would be: - ASID PTE - Bits:[78-64][63-0] - - The output PTE value will include the control bits. - """ - def __init__(self, asid_size, pte_size): - """ Arguments: - * asid_size: (bit count) The size of the asid to be processed - * pte_size: (bit count) The size of the pte to be processed - - Return: - * d The Dirty bit from the PTE portion of i - * a The Accessed bit from the PTE portion of i - * g The Global bit from the PTE portion of i - * u The User Mode bit from the PTE portion of i - * xwr The Execute/Write/Read bit from the PTE portion of i - * v The Valid bit from the PTE portion of i - * asid The asid portion of i - * pte The pte portion of i - """ - # Internal - self.asid_start = pte_size - self.asid_end = pte_size + asid_size - - # Input - self.i = Signal(asid_size + pte_size) - - # Output - self.d = Signal(1) # Dirty bit (From pte) - self.a = Signal(1) # Accessed bit (From pte) - self.g = Signal(1) # Global Access (From pte) - self.u = Signal(1) # User Mode (From pte) - self.xwr = Signal(3) # Execute Read Write (From pte) - self.v = Signal(1) # Valid (From pte) - self.asid = Signal(asid_size) # Associated Address Space IDentifier - self.pte = Signal(pte_size) # Full Page Table Entry - - def elaborate(self, platform=None): - m = Module() - # Pull out all control bites from PTE - m.d.comb += [ - self.d.eq(self.i[7]), - self.a.eq(self.i[6]), - self.g.eq(self.i[5]), - self.u.eq(self.i[4]), - self.xwr.eq(self.i[1:4]), - self.v.eq(self.i[0]) - ] - m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end]) - m.d.comb += self.pte.eq(self.i[0:self.asid_start]) - return m \ No newline at end of file diff --git a/TLB/src/SetAssociativeCache.py b/TLB/src/SetAssociativeCache.py deleted file mode 100644 index 0acd3488..00000000 --- a/TLB/src/SetAssociativeCache.py +++ /dev/null @@ -1,274 +0,0 @@ -""" - -Online simulator of 4-way set-associative cache: -http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/sa4.html - -Python simulator of a N-way set-associative cache: -https://github.com/vaskevich/CacheSim/blob/master/cachesim.py -""" -import sys -sys.path.append("ariane/src/") - -from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable -from nmigen.compat.genlib import fsm -from nmigen.cli import main -from nmigen.cli import verilog, rtlil - -from AddressEncoder import AddressEncoder -from MemorySet import MemorySet - -# TODO: use a LFSR that advances continuously and picking the bottom -# few bits from it to select which cache line to replace, instead of PLRU -# http://bugs.libre-riscv.org/show_bug.cgi?id=71 -from plru import PLRU -from LFSR import LFSR, LFSR_POLY_24 - -SA_NA = "00" # no action (none) -SA_RD = "01" # read -SA_WR = "10" # write - - -class SetAssociativeCache(Elaboratable): - """ Set Associative Cache Memory - - The purpose of this module is to generate a memory cache given the - constraints passed in. This will create a n-way set associative cache. - It is expected for the SV TLB that the VMA will provide the set number - while the ASID provides the tag (still to be decided). - - """ - def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False): - """ Arguments - * tag_size (bits): The bit count of the tag - * data_size (bits): The bit count of the data to be stored - * set_count (number): The number of sets/entries in the cache - * way_count (number): The number of slots a data can be stored - in one set - * lfsr: if set, use an LFSR for (pseudo-randomly) selecting - set/entry to write to. otherwise, use a PLRU - """ - # Internals - self.lfsr_mode = lfsr - self.way_count = way_count # The number of slots in one set - self.tag_size = tag_size # The bit count of the tag - self.data_size = data_size # The bit count of the data to be stored - - # set up Memory array - self.mem_array = Array() # memory array - for i in range(way_count): - ms = MemorySet(data_size, tag_size, set_count, active=0) - self.mem_array.append(ms) - - # Finds valid entries - self.encoder = AddressEncoder(way_count) - - # setup PLRU or LFSR - if lfsr: - # LFSR mode - self.lfsr = LFSR(LFSR_POLY_24) - else: - # PLRU mode - self.plru = PLRU(way_count) # One block to handle plru calculations - self.plru_array = Array() # PLRU data on each set - for i in range(set_count): - name="plru%d" % i - self.plru_array.append(Signal(self.plru.TLBSZ, name=name)) - - # Input - self.enable = Signal(1) # Whether the cache is enabled - self.command = Signal(2) # 00=None, 01=Read, 10=Write (see SA_XX) - self.cset = Signal(max=set_count) # The set to be checked - self.tag = Signal(tag_size) # The tag to find - self.data_i = Signal(data_size) # The input data - - # Output - self.ready = Signal(1) # 0 => Processing 1 => Ready for commands - self.hit = Signal(1) # Tag matched one way in the given set - self.multiple_hit = Signal(1) # Tag matched many ways in the given set - self.data_o = Signal(data_size) # The data linked to the matched tag - - def check_tags(self, m): - """ Validate the tags in the selected set. If one and only one - tag matches set its state to zero and increment all others - by one. We only advance to next state if a single hit is found. - """ - # Vector to store way valid results - # A zero denotes a way is invalid - valid_vector = [] - # Loop through memory to prep read/write ports and set valid_vector - for i in range(self.way_count): - valid_vector.append(self.mem_array[i].valid) - - # Pass encoder the valid vector - m.d.comb += self.encoder.i.eq(Cat(*valid_vector)) - - # Only one entry should be marked - # This is due to already verifying the tags - # matched and the valid bit is high - with m.If(self.hit): - m.next = "FINISHED_READ" - # Pull out data from the read port - data = self.mem_array[self.encoder.o].data_o - m.d.comb += self.data_o.eq(data) - if not self.lfsr_mode: - self.access_plru(m) - - # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k - with m.Elif(self.multiple_hit): - # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck - m.d.comb += self.data_o.eq(0) - - # No tag matches means no data - with m.Else(): - # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck - m.d.comb += self.data_o.eq(0) - - def access_plru(self, m): - """ An entry was accessed and the plru tree must now be updated - """ - # Pull out the set's entry being edited - plru_entry = self.plru_array[self.cset] - m.d.comb += [ - # Set the plru data to the current state - self.plru.plru_tree.eq(plru_entry), - # Set that the cache was accessed - self.plru.lu_access_i.eq(1) - ] - - def read(self, m): - """ Go through the read process of the cache. - This takes two cycles to complete. First it checks for a valid tag - and secondly it updates the LRU values. - """ - with m.FSM() as fsm_read: - with m.State("READY"): - m.d.comb += self.ready.eq(0) - # check_tags will set the state if the conditions are met - self.check_tags(m) - with m.State("FINISHED_READ"): - m.next = "READY" - m.d.comb += self.ready.eq(1) - if not self.lfsr_mode: - plru_tree_o = self.plru.plru_tree_o - m.d.sync += self.plru_array[self.cset].eq(plru_tree_o) - - def write_entry(self, m): - if not self.lfsr_mode: - m.d.comb += [# set cset (mem address) into PLRU - self.plru.plru_tree.eq(self.plru_array[self.cset]), - # and connect plru to encoder for write - self.encoder.i.eq(self.plru.replace_en_o) - ] - write_port = self.mem_array[self.encoder.o].w - else: - # use the LFSR to generate a random(ish) one of the mem array - lfsr_output = Signal(max=self.way_count) - lfsr_random = Signal(max=self.way_count) - m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits - # address too big, limit to range of array - m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count, - lfsr_output - self.way_count, - lfsr_output)) - write_port = self.mem_array[lfsr_random].w - - # then if there is a match from the encoder, enable the selected write - with m.If(self.encoder.single_match): - m.d.comb += write_port.en.eq(1) - - def write(self, m): - """ Go through the write process of the cache. - This takes two cycles to complete. First it writes the entry, - and secondly it updates the PLRU (in plru mode) - """ - with m.FSM() as fsm_write: - with m.State("READY"): - m.d.comb += self.ready.eq(0) - self.write_entry(m) - m.next ="FINISHED_WRITE" - with m.State("FINISHED_WRITE"): - m.d.comb += self.ready.eq(1) - if not self.lfsr_mode: - plru_entry = self.plru_array[self.cset] - m.d.sync += plru_entry.eq(self.plru.plru_tree_o) - m.next = "READY" - - - def elaborate(self, platform=None): - m = Module() - - # ---- - # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array - # ---- - - m.submodules.AddressEncoder = self.encoder - if self.lfsr_mode: - m.submodules.LFSR = self.lfsr - else: - m.submodules.PLRU = self.plru - - for i, mem in enumerate(self.mem_array): - setattr(m.submodules, "mem%d" % i, mem) - - # ---- - # select mode: PLRU connect to encoder, LFSR do... something - # ---- - - if not self.lfsr_mode: - # Set what entry was hit - m.d.comb += self.plru.lu_hit.eq(self.encoder.o) - else: - # enable LFSR - m.d.comb += self.lfsr.enable.eq(self.enable) - - # ---- - # connect hit/multiple hit to encoder output - # ---- - - m.d.comb += [ - self.hit.eq(self.encoder.single_match), - self.multiple_hit.eq(self.encoder.multiple_match), - ] - - # ---- - # connect incoming data/tag/cset(addr) to mem_array - # ---- - - for mem in self.mem_array: - write_port = mem.w - m.d.comb += [mem.cset.eq(self.cset), - mem.tag.eq(self.tag), - mem.data_i.eq(self.data_i), - write_port.en.eq(0), # default: disable write - ] - # ---- - # Commands: READ/WRITE/TODO - # ---- - - with m.If(self.enable): - with m.Switch(self.command): - # Search all sets at a particular tag - with m.Case(SA_RD): - self.read(m) - with m.Case(SA_WR): - self.write(m) - # Maybe catch multiple tags write here? - # TODO - # TODO: invalidate/flush, flush-all? - - return m - - def ports(self): - return [self.enable, self.command, self.cset, self.tag, self.data_i, - self.ready, self.hit, self.multiple_hit, self.data_o] - - -if __name__ == '__main__': - sac = SetAssociativeCache(4, 8, 4, 6) - vl = rtlil.convert(sac, ports=sac.ports()) - with open("SetAssociativeCache.il", "w") as f: - f.write(vl) - - sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True) - vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports()) - with open("SetAssociativeCacheLFSR.il", "w") as f: - f.write(vl) diff --git a/TLB/src/TLB.py b/TLB/src/TLB.py deleted file mode 100644 index 3538bdc1..00000000 --- a/TLB/src/TLB.py +++ /dev/null @@ -1,173 +0,0 @@ -""" TLB Module - - The expected form of the data is: - * Item (Bits) - * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0) -""" - -from nmigen import Memory, Module, Signal, Cat -from nmigen.cli import main - -from PermissionValidator import PermissionValidator -from Cam import Cam - -class TLB(): - def __init__(self, asid_size, vma_size, pte_size, L1_size): - """ Arguments - * asid_size: Address Space IDentifier (ASID) typically 15 bits - * vma_size: Virtual Memory Address (VMA) typically 36 bits - * pte_size: Page Table Entry (PTE) typically 64 bits - - Notes: - These arguments should represent the largest possible size - defined by the MODE settings. See - Volume II: RISC-V Privileged Architectures V1.10 Page 57 - """ - - # Internal - self.state = 0 - # L1 Cache Modules - L1_size = 8 # XXX overridden incoming argument? - self.cam_L1 = Cam(vma_size, L1_size) - self.mem_L1 = Memory(asid_size + pte_size, L1_size) - - # Permission Validator - self.perm_validator = PermissionValidator(asid_size, pte_size) - - # Inputs - self.supermode = Signal(1) # Supervisor Mode - self.super_access = Signal(1) # Supervisor Access - self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2 - self.xwr = Signal(3) # Execute, Write, Read - self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64 - self.address_L1 = Signal(max=L1_size) - self.asid = Signal(asid_size) # Address Space IDentifier (ASID) - self.vma = Signal(vma_size) # Virtual Memory Address (VMA) - self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE) - - # Outputs - self.hit = Signal(1) # Denotes if the VMA had a mapped PTE - self.perm_valid = Signal(1) # Denotes if the permissions are correct - self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA - - def search(self, m, read_L1, write_L1): - """ searches the TLB - """ - m.d.comb += [ - write_L1.en.eq(0), - self.cam_L1.write_enable.eq(0), - self.cam_L1.data_in.eq(self.vma) - ] - # Match found in L1 CAM - match_found = Signal(reset_less=True) - m.d.comb += match_found.eq(self.cam_L1.single_match - | self.cam_L1.multiple_match) - with m.If(match_found): - # Memory shortcut variables - mem_address = self.cam_L1.match_address - # Memory Logic - m.d.comb += read_L1.addr.eq(mem_address) - # Permission Validator Logic - m.d.comb += [ - self.hit.eq(1), - # Set permission validator data to the correct - # register file data according to CAM match - # address - self.perm_validator.data.eq(read_L1.data), - # Execute, Read, Write - self.perm_validator.xwr.eq(self.xwr), - # Supervisor Mode - self.perm_validator.super_mode.eq(self.supermode), - # Supverisor Access - self.perm_validator.super_access.eq(self.super_access), - # Address Space IDentifier (ASID) - self.perm_validator.asid.eq(self.asid), - # Output result of permission validation - self.perm_valid.eq(self.perm_validator.valid) - ] - # Only output PTE if permissions are valid - with m.If(self.perm_validator.valid): - # XXX TODO - dummy for now - reg_data = Signal.like(self.pte_out) - m.d.comb += [ - self.pte_out.eq(reg_data) - ] - with m.Else(): - m.d.comb += [ - self.pte_out.eq(0) - ] - # Miss Logic - with m.Else(): - m.d.comb += [ - self.hit.eq(0), - self.perm_valid.eq(0), - self.pte_out.eq(0) - ] - - def write_l1(self, m, read_L1, write_L1): - """ writes to the L1 cache - """ - # Memory_L1 Logic - m.d.comb += [ - write_L1.en.eq(1), - write_L1.addr.eq(self.address_L1), - # The Cat places arguments from LSB -> MSB - write_L1.data.eq(Cat(self.pte_in, self.asid)) - ] - # CAM_L1 Logic - m.d.comb += [ - self.cam_L1.write_enable.eq(1), - self.cam_L1.data_in.eq(self.vma), - ] - - def elaborate(self, platform): - m = Module() - # Add submodules - # Submodules for L1 Cache - m.d.submodules.cam_L1 = self.cam_L1 - m.d.sumbmodules.read_L1 = read_L1 = self.mem_L1.read_port() - m.d.sumbmodules.read_L1 = write_L1 = self.mem_L1.write_port() - # Permission Validator Submodule - m.d.submodules.perm_valididator = self.perm_validator - - # When MODE specifies translation - # TODO add in different bit length handling ie prefix 0s - tlb_enable = Signal(reset_less=True) - m.d.comb += tlb_enable.eq(self.mode != 0) - - with m.If(tlb_enable): - m.d.comb += [ - self.cam_L1.enable.eq(1) - ] - with m.Switch(self.command): - # Search - with m.Case("01"): - self.search(m, read_L1, write_L1) - - # Write L1 - # Expected that the miss will be handled in software - with m.Case("10"): - self.write_l1(m, read_L1, write_L1) - - # TODO - #with m.Case("11"): - - # When disabled - with m.Else(): - m.d.comb += [ - self.cam_L1.enable.eq(0), - # XXX TODO - self.reg_file.enable.eq(0), - self.hit.eq(0), - self.perm_valid.eq(0), # XXX TODO, check this - self.pte_out.eq(0) - ] - return m - - -if __name__ == '__main__': - tlb = TLB(15, 36, 64, 4) - main(tlb, ports=[ tlb.supermode, tlb.super_access, tlb.command, - tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid, - tlb.vma, tlb.pte_in, - tlb.hit, tlb.perm_valid, tlb.pte_out, - ] + tlb.cam_L1.ports()) diff --git a/TLB/src/__init__.py b/TLB/src/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/TLB/src/ariane/TreePLRU.cpp b/TLB/src/ariane/TreePLRU.cpp deleted file mode 100644 index 2f6aeea5..00000000 --- a/TLB/src/ariane/TreePLRU.cpp +++ /dev/null @@ -1,211 +0,0 @@ -#include -#include -#include - - -#define NWAY 4 -#define NLINE 256 -#define HIT 0 -#define MISS 1 -#define MS 1000 -/* -Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing -Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt -four-way set associative - three bits - each bit represents one branch point in a binary decision tree; let 1 - represent that the left side has been referenced more recently than the - right side, and 0 vice-versa - are all 4 lines valid? - / \ - yes no, use an invalid line - | - | - | - bit_0 == 0? state | replace ref to | next state - / \ ------+-------- -------+----------- - y n 00x | line_0 line_0 | 11_ - / \ 01x | line_1 line_1 | 10_ - bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1 - / \ / \ 1x1 | line_3 line_3 | 0_0 - y n y n - / \ / \ ('x' means ('_' means unchanged) - line_0 line_1 line_2 line_3 don't care) - 8-way set associative - 7 = 1+2+4 bits -16-way set associative - 15 = 1+2+4+8 bits -32-way set associative - 31 = 1+2+4+8+16 bits -64-way set associative - 63 = 1+2+4+8+16+32 bits -*/ -using namespace std; -struct AddressField { - uint64_t wd_idx : 2;//Unused - uint64_t offset : 4;//Unused - uint64_t index : 8;//NLINE = 256 = 2^8 - uint64_t tag : 50; -}; - -union Address { - uint32_t* p; - AddressField fields; -}; - -struct Cell { - bool v; - uint64_t tag; - - Cell() : v(false), tag(0) {} - - bool isHit(uint64_t tag) { - return v && (tag == this->tag); - } - - void fetch(uint32_t* address) { - Address addr; - addr.p = address; - addr.fields.offset = 0; - addr.fields.wd_idx = 0; - tag = addr.fields.tag; - v = true; - } -}; - -ostream& operator<<(ostream & out, const Cell& cell) { - out << " v:" << cell.v << " tag:" << hex << cell.tag; - return out; -} - -struct Block { - Cell cell[NWAY]; - uint32_t state; - uint64_t *mask;//Mask the state to get accurate value for specified 1 bit. - uint64_t *value; - uint64_t *next_value; - - Block() : state(0) { - switch (NWAY) { - case 4: - mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101}; - value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101}; - next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000}; - break; - case 8: - mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001, - 0b1010001}; - value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000, - 0b1010001}; - next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000, - 0b0000001, 0b0000000}; - break; - //TODO - more NWAY goes here. - default: - std::cout << "Error definition NWAY = " << NWAY << std::endl; - } - } - - uint32_t *getByTag(uint64_t tag, uint32_t *pway) { - for (int i = 0; i < NWAY; ++i) { - if (cell[i].isHit(tag)) { - *pway = i; - return pway; - } - } - return NULL; - } - - void setLRU(uint32_t *address) { - int way = 0; - uint32_t st = state; - for (int i = 0; i < NWAY; ++i) { - if ((state & mask[i]) == value[i]) { - state ^= mask[i]; - way = i; - break; - } - } - cell[way].fetch(address); - cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl; - } - - uint32_t *get(uint32_t *address, uint32_t *pway) { - Address addr; - addr.p = address; - uint32_t *d = getByTag(addr.fields.tag, pway); - if (d != NULL) { - return &d[addr.fields.offset]; - } - return d; - } - - int set(uint32_t *address) { - uint32_t way = 0; - uint32_t *p = get(address, &way); - if (p != NULL) { - printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state); - state &= ~mask[way]; - printf("%X --> ", state); - state |= next_value[way]; - printf("%X\n", state); - // *p = *address; //skip since address is fake. - return HIT; - } else { - setLRU(address); - return MISS; - } - } -}; - -ostream& operator<<(ostream & out, const Block& block) { - out << "state:" << block.state << " "; - for (int i = 0; i signal with a page fault exception - # 2. We got an access error because of insufficient permissions -> - # throw an access exception - m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0) - # Check whether we are allowed to access this memory region - # from a fetch perspective - - # XXX TODO: use PermissionValidator instead [we like modules] - m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \ - (((self.priv_lvl_i == PRIV_LVL_U) & \ - ~itlb_content.u) | \ - ((self.priv_lvl_i == PRIV_LVL_S) & \ - itlb_content.u))) - - # MMU enabled: address from TLB, request delayed until hit. - # Error when TLB hit and no access right or TLB hit and - # translated address not valid (e.g. AXI decode error), - # or when PTW performs walk due to ITLB miss and raises - # an error. - with m.If (self.enable_translation_i): - # we work with SV39, so if VM is enabled, check that - # all bits [63:38] are equal - with m.If (self.icache_areq_i.fetch_req & \ - ~(((~self.icache_areq_i.fetch_vaddr[38:64]) == 0) | \ - (self.icache_areq_i.fetch_vaddr[38:64]) == 0)): - fe = self.icache_areq_o.fetch_exception - m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT), - fe.tval.eq(self.icache_areq_i.fetch_vaddr), - fe.valid.eq(1) - ] - - m.d.comb += self.icache_areq_o.fetch_valid.eq(0) - - # 4K page - paddr = Signal.like(self.icache_areq_o.fetch_paddr) - paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12], - itlb_content.ppn) - m.d.comb += paddr.eq(paddr4k) - # Mega page - with m.If(itlb_is_2M): - m.d.comb += paddr[12:21].eq( - self.icache_areq_i.fetch_vaddr[12:21]) - # Giga page - with m.If(itlb_is_1G): - m.d.comb += paddr[12:30].eq( - self.icache_areq_i.fetch_vaddr[12:30]) - m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr) - - # --------- - # ITLB Hit - # -------- - # if we hit the ITLB output the request signal immediately - with m.If(itlb_lu_hit): - m.d.comb += self.icache_areq_o.fetch_valid.eq( - self.icache_areq_i.fetch_req) - # we got an access error - with m.If (iaccess_err): - # throw a page fault - fe = self.icache_areq_o.fetch_exception - m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT), - fe.tval.eq(self.icache_areq_i.fetch_vaddr), - fe.valid.eq(1) - ] - # --------- - # ITLB Miss - # --------- - # watch out for exceptions happening during walking the page table - with m.Elif(ptw_active & walking_instr): - m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error) - fe = self.icache_areq_o.fetch_exception - m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT), - fe.tval.eq(uaddr64), - fe.valid.eq(1) - ] - - #----------------------- - # Data Interface - #----------------------- - - lsu_vaddr = Signal(64) - dtlb_pte = PTE() - misaligned_ex = RVException() - lsu_req = Signal() - lsu_is_store = Signal() - dtlb_hit = Signal() - dtlb_is_2M = Signal() - dtlb_is_1G = Signal() - - # check if we need to do translation or if we are always - # ready (e.g.: we are not translating anything) - m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i, - dtlb_lu_hit, 1)) - - # The data interface is simpler and only consists of a - # request/response interface - m.d.comb += [ - # save request and DTLB response - lsu_vaddr.eq(self.lsu_vaddr_i), - lsu_req.eq(self.lsu_req_i), - misaligned_ex.eq(self.misaligned_ex_i), - dtlb_pte.eq(dtlb_content), - dtlb_hit.eq(dtlb_lu_hit), - lsu_is_store.eq(self.lsu_is_store_i), - dtlb_is_2M.eq(dtlb_is_2M), - dtlb_is_1G.eq(dtlb_is_1G), - ] - m.d.sync += [ - self.lsu_paddr_o.eq(lsu_vaddr), - self.lsu_valid_o.eq(lsu_req), - self.lsu_exception_o.eq(misaligned_ex), - ] - - sverr = Signal() - usrerr = Signal() - - m.d.comb += [ - # mute misaligned exceptions if there is no request - # otherwise they will throw accidental exceptions - misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i), - - # SUM is not set and we are trying to access a user - # page in supervisor mode - sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \ - dtlb_pte.u), - # this is not a user page but we are in user mode and - # trying to access it - usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u), - - # Check if the User flag is set, then we may only - # access it in supervisor mode if SUM is enabled - daccess_err.eq(sverr | usrerr), - ] - - # translation is enabled and no misaligned exception occurred - with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid): - m.d.comb += lsu_req.eq(0) - # 4K page - paddr = Signal.like(lsu_vaddr) - paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn) - m.d.comb += paddr.eq(paddr4k) - # Mega page - with m.If(dtlb_is_2M): - m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21]) - # Giga page - with m.If(dtlb_is_1G): - m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30]) - m.d.sync += self.lsu_paddr_o.eq(paddr) - - # --------- - # DTLB Hit - # -------- - with m.If(dtlb_hit & lsu_req): - m.d.comb += lsu_req.eq(1) - # this is a store - with m.If (lsu_is_store): - # check if the page is write-able and - # we are not violating privileges - # also check if the dirty flag is set - with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d): - le = self.lsu_exception_o - m.d.sync += [le.cause.eq(STORE_PAGE_FAULT), - le.tval.eq(lsu_vaddr), - le.valid.eq(1) - ] - - # this is a load, check for sufficient access - # privileges - throw a page fault if necessary - with m.Elif(daccess_err): - le = self.lsu_exception_o - m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT), - le.tval.eq(lsu_vaddr), - le.valid.eq(1) - ] - # --------- - # DTLB Miss - # --------- - # watch out for exceptions - with m.Elif (ptw_active & ~walking_instr): - # page table walker threw an exception - with m.If (ptw_error): - # an error makes the translation valid - m.d.comb += lsu_req.eq(1) - # the page table walker can only throw page faults - with m.If (lsu_is_store): - le = self.lsu_exception_o - m.d.sync += [le.cause.eq(STORE_PAGE_FAULT), - le.tval.eq(uaddr64), - le.valid.eq(1) - ] - with m.Else(): - m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT), - le.tval.eq(uaddr64), - le.valid.eq(1) - ] - - return m - - def ports(self): - return [self.flush_i, self.enable_translation_i, - self.en_ld_st_translation_i, - self.lsu_req_i, - self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o, - self.lsu_valid_o, self.lsu_paddr_o, - self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i, - self.satp_ppn_i, self.asid_i, self.flush_tlb_i, - self.itlb_miss_o, self.dtlb_miss_o] + \ - self.icache_areq_i.ports() + self.icache_areq_o.ports() + \ - self.req_port_i.ports() + self.req_port_o.ports() + \ - self.misaligned_ex_i.ports() + self.lsu_exception_o.ports() - -if __name__ == '__main__': - mmu = MMU() - vl = rtlil.convert(mmu, ports=mmu.ports()) - with open("test_mmu.il", "w") as f: - f.write(vl) - diff --git a/TLB/src/ariane/src/plru.py b/TLB/src/ariane/src/plru.py deleted file mode 100644 index 95d515c4..00000000 --- a/TLB/src/ariane/src/plru.py +++ /dev/null @@ -1,106 +0,0 @@ -from nmigen import Signal, Module, Cat, Const -from nmigen.hdl.ir import Elaboratable -from math import log2 - -from ptw import TLBUpdate, PTE, ASID_WIDTH - -class PLRU(Elaboratable): - """ PLRU - Pseudo Least Recently Used Replacement - - PLRU-tree indexing: - lvl0 0 - / \ - / \ - lvl1 1 2 - / \ / \ - lvl2 3 4 5 6 - / \ /\/\ /\ - ... ... ... ... - """ - def __init__(self, entries): - self.entries = entries - self.lu_hit = Signal(entries) - self.replace_en_o = Signal(entries) - self.lu_access_i = Signal() - # Tree (bit per entry) - self.TLBSZ = 2*(self.entries-1) - self.plru_tree = Signal(self.TLBSZ) - self.plru_tree_o = Signal(self.TLBSZ) - - def elaborate(self, platform=None): - m = Module() - - # Just predefine which nodes will be set/cleared - # E.g. for a TLB with 8 entries, the for-loop is semantically - # equivalent to the following pseudo-code: - # unique case (1'b1) - # lu_hit[7]: plru_tree[0, 2, 6] = {1, 1, 1}; - # lu_hit[6]: plru_tree[0, 2, 6] = {1, 1, 0}; - # lu_hit[5]: plru_tree[0, 2, 5] = {1, 0, 1}; - # lu_hit[4]: plru_tree[0, 2, 5] = {1, 0, 0}; - # lu_hit[3]: plru_tree[0, 1, 4] = {0, 1, 1}; - # lu_hit[2]: plru_tree[0, 1, 4] = {0, 1, 0}; - # lu_hit[1]: plru_tree[0, 1, 3] = {0, 0, 1}; - # lu_hit[0]: plru_tree[0, 1, 3] = {0, 0, 0}; - # default: begin /* No hit */ end - # endcase - LOG_TLB = int(log2(self.entries)) - print(LOG_TLB) - for i in range(self.entries): - # we got a hit so update the pointer as it was least recently used - hit = Signal(reset_less=True) - m.d.comb += hit.eq(self.lu_hit[i] & self.lu_access_i) - with m.If(hit): - # Set the nodes to the values we would expect - for lvl in range(LOG_TLB): - idx_base = (1< MSB, lvl1 <=> MSB-1, ... - shift = LOG_TLB - lvl; - new_idx = Const(~((i >> (shift-1)) & 1), (1, False)) - plru_idx = idx_base + (i >> shift) - print ("plru", i, lvl, hex(idx_base), - plru_idx, shift, new_idx) - m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx) - - # Decode tree to write enable signals - # Next for-loop basically creates the following logic for e.g. - # an 8 entry TLB (note: pseudo-code obviously): - # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1} - # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0} - # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1} - # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0} - # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1} - # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0} - # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1} - # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0} - # For each entry traverse the tree. If every tree-node matches - # the corresponding bit of the entry's index, this is - # the next entry to replace. - replace = [] - for i in range(self.entries): - en = [] - for lvl in range(LOG_TLB): - idx_base = (1< MSB, lvl1 <=> MSB-1, ... - shift = LOG_TLB - lvl; - new_idx = (i >> (shift-1)) & 1; - plru_idx = idx_base + (i>>shift) - plru = Signal(reset_less=True, - name="plru-%d-%d-%d" % (i, lvl, plru_idx)) - m.d.comb += plru.eq(self.plru_tree[plru_idx]) - # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx; - if new_idx: - en.append(~plru) # yes inverted (using bool()) - else: - en.append(plru) # yes inverted (using bool()) - print ("plru", i, en) - # boolean logic manipulation: - # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2) - replace.append(~Cat(*en).bool()) - m.d.comb += self.replace_en_o.eq(Cat(*replace)) - - return m - - def ports(self): - return [self.entries, self.lu_hit, self.replace_en_o, - self.lu_access_i, self.plru_tree, self.plru_tree_o] \ No newline at end of file diff --git a/TLB/src/ariane/src/ptw.py b/TLB/src/ariane/src/ptw.py deleted file mode 100644 index 05ec2d7d..00000000 --- a/TLB/src/ariane/src/ptw.py +++ /dev/null @@ -1,539 +0,0 @@ -""" -# Copyright 2018 ETH Zurich and University of Bologna. -# Copyright and related rights are licensed under the Solderpad Hardware -# License, Version 0.51 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# or agreed to in writing, software, hardware and materials distributed under -# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. -# -# Author: David Schaffenrath, TU Graz -# Author: Florian Zaruba, ETH Zurich -# Date: 24.4.2017 -# Description: Hardware-PTW - -/* verilator lint_off WIDTH */ -import ariane_pkg::*; - -see linux kernel source: - -* "arch/riscv/include/asm/page.h" -* "arch/riscv/include/asm/mmu_context.h" -* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET) - -""" - -from nmigen import Const, Signal, Cat, Module -from nmigen.hdl.ast import ArrayProxy -from nmigen.cli import verilog, rtlil -from math import log2 - - -DCACHE_SET_ASSOC = 8 -CONFIG_L1D_SIZE = 32*1024 -DCACHE_INDEX_WIDTH = int(log2(CONFIG_L1D_SIZE / DCACHE_SET_ASSOC)) -DCACHE_TAG_WIDTH = 56 - DCACHE_INDEX_WIDTH - -ASID_WIDTH = 8 - - -class DCacheReqI: - def __init__(self): - self.address_index = Signal(DCACHE_INDEX_WIDTH) - self.address_tag = Signal(DCACHE_TAG_WIDTH) - self.data_wdata = Signal(64) - self.data_req = Signal() - self.data_we = Signal() - self.data_be = Signal(8) - self.data_size = Signal(2) - self.kill_req = Signal() - self.tag_valid = Signal() - - def eq(self, inp): - res = [] - for (o, i) in zip(self.ports(), inp.ports()): - res.append(o.eq(i)) - return res - - def ports(self): - return [self.address_index, self.address_tag, - self.data_wdata, self.data_req, - self.data_we, self.data_be, self.data_size, - self.kill_req, self.tag_valid, - ] - -class DCacheReqO: - def __init__(self): - self.data_gnt = Signal() - self.data_rvalid = Signal() - self.data_rdata = Signal(64) # actually in PTE object format - - def eq(self, inp): - res = [] - for (o, i) in zip(self.ports(), inp.ports()): - res.append(o.eq(i)) - return res - - def ports(self): - return [self.data_gnt, self.data_rvalid, self.data_rdata] - - -class PTE: #(RecordObject): - def __init__(self): - self.v = Signal() - self.r = Signal() - self.w = Signal() - self.x = Signal() - self.u = Signal() - self.g = Signal() - self.a = Signal() - self.d = Signal() - self.rsw = Signal(2) - self.ppn = Signal(44) - self.reserved = Signal(10) - - def flatten(self): - return Cat(*self.ports()) - - def eq(self, x): - if isinstance(x, ArrayProxy): - res = [] - for o in self.ports(): - i = getattr(x, o.name) - res.append(i) - x = Cat(*res) - else: - x = x.flatten() - return self.flatten().eq(x) - - def __iter__(self): - """ order is critical so that flatten creates LSB to MSB - """ - yield self.v - yield self.r - yield self.w - yield self.x - yield self.u - yield self.g - yield self.a - yield self.d - yield self.rsw - yield self.ppn - yield self.reserved - - def ports(self): - return list(self) - - -class TLBUpdate: - def __init__(self, asid_width): - self.valid = Signal() # valid flag - self.is_2M = Signal() - self.is_1G = Signal() - self.vpn = Signal(27) - self.asid = Signal(asid_width) - self.content = PTE() - - def flatten(self): - return Cat(*self.ports()) - - def eq(self, x): - return self.flatten().eq(x.flatten()) - - def ports(self): - return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \ - self.content.ports() - - -# SV39 defines three levels of page tables -LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1 -LVL2 = Const(1, 2) -LVL3 = Const(2, 2) - - -class PTW: - def __init__(self, asid_width=8): - self.asid_width = asid_width - - self.flush_i = Signal() # flush everything, we need to do this because - # actually everything we do is speculative at this stage - # e.g.: there could be a CSR instruction that changes everything - self.ptw_active_o = Signal(reset=1) # active if not IDLE - self.walking_instr_o = Signal() # set when walking for TLB - self.ptw_error_o = Signal() # set when an error occurred - self.enable_translation_i = Signal() # CSRs indicate to enable SV39 - self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st - - self.lsu_is_store_i = Signal() # translation triggered by store - # PTW memory interface - self.req_port_i = DCacheReqO() - self.req_port_o = DCacheReqI() - - # to TLBs, update logic - self.itlb_update_o = TLBUpdate(asid_width) - self.dtlb_update_o = TLBUpdate(asid_width) - - self.update_vaddr_o = Signal(39) - - self.asid_i = Signal(self.asid_width) - # from TLBs - # did we miss? - self.itlb_access_i = Signal() - self.itlb_hit_i = Signal() - self.itlb_vaddr_i = Signal(64) - - self.dtlb_access_i = Signal() - self.dtlb_hit_i = Signal() - self.dtlb_vaddr_i = Signal(64) - # from CSR file - self.satp_ppn_i = Signal(44) # ppn from satp - self.mxr_i = Signal() - # Performance counters - self.itlb_miss_o = Signal() - self.dtlb_miss_o = Signal() - - def ports(self): - return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o, - ] - return [ - self.enable_translation_i, self.en_ld_st_translation_i, - self.lsu_is_store_i, self.req_port_i, self.req_port_o, - self.update_vaddr_o, - self.asid_i, - self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i, - self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i, - self.satp_ppn_i, self.mxr_i, - self.itlb_miss_o, self.dtlb_miss_o - ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports() - - def elaborate(self, platform): - m = Module() - - # input registers - data_rvalid = Signal() - data_rdata = Signal(64) - - # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata - # is spec'd in 64-bit binary-format: better to spec as Record? - pte = PTE() - m.d.comb += pte.flatten().eq(data_rdata) - - # SV39 defines three levels of page tables - ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above) - ptw_lvl1 = Signal() - ptw_lvl2 = Signal() - ptw_lvl3 = Signal() - m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1), - ptw_lvl2.eq(ptw_lvl == LVL2), - ptw_lvl3.eq(ptw_lvl == LVL3)] - - # is this an instruction page table walk? - is_instr_ptw = Signal() - global_mapping = Signal() - # latched tag signal - tag_valid = Signal() - # register the ASID - tlb_update_asid = Signal(self.asid_width) - # register VPN we need to walk, SV39 defines a 39 bit virtual addr - vaddr = Signal(64) - # 4 byte aligned physical pointer - ptw_pptr = Signal(56) - - end = DCACHE_INDEX_WIDTH + DCACHE_TAG_WIDTH - m.d.sync += [ - # Assignments - self.update_vaddr_o.eq(vaddr), - - self.walking_instr_o.eq(is_instr_ptw), - # directly output the correct physical address - self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]), - self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]), - # we are never going to kill this request - self.req_port_o.kill_req.eq(0), # XXX assign comb? - # we are never going to write with the HPTW - self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb? - # ----------- - # TLB Update - # ----------- - self.itlb_update_o.vpn.eq(vaddr[12:39]), - self.dtlb_update_o.vpn.eq(vaddr[12:39]), - # update the correct page table level - self.itlb_update_o.is_2M.eq(ptw_lvl2), - self.itlb_update_o.is_1G.eq(ptw_lvl1), - self.dtlb_update_o.is_2M.eq(ptw_lvl2), - self.dtlb_update_o.is_1G.eq(ptw_lvl1), - # output the correct ASID - self.itlb_update_o.asid.eq(tlb_update_asid), - self.dtlb_update_o.asid.eq(tlb_update_asid), - # set the global mapping bit - self.itlb_update_o.content.eq(pte), - self.itlb_update_o.content.g.eq(global_mapping), - self.dtlb_update_o.content.eq(pte), - self.dtlb_update_o.content.g.eq(global_mapping), - - self.req_port_o.tag_valid.eq(tag_valid), - ] - - #------------------- - # Page table walker - #------------------- - # A virtual address va is translated into a physical address pa as - # follows: - # 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39, - # PAGESIZE=2^12 and LEVELS=3.) - # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. - # (For Sv32, PTESIZE=4.) - # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an - # access exception. - # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to - # step 5. Otherwise, this PTE is a pointer to the next level of - # the page table. - # Let i=i-1. If i < 0, stop and raise an access exception. - # Otherwise, let a = pte.ppn × PAGESIZE and go to step 2. - # 5. A leaf PTE has been found. Determine if the requested memory - # access is allowed by the pte.r, pte.w, and pte.x bits. If not, - # stop and raise an access exception. Otherwise, the translation is - # successful. Set pte.a to 1, and, if the memory access is a - # store, set pte.d to 1. - # The translated physical address is given as follows: - # - pa.pgoff = va.pgoff. - # - If i > 0, then this is a superpage translation and - # pa.ppn[i-1:0] = va.vpn[i-1:0]. - # - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i]. - # 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned - # superpage stop and raise a page-fault exception. - - m.d.sync += tag_valid.eq(0) - - # default assignments - m.d.comb += [ - # PTW memory interface - self.req_port_o.data_req.eq(0), - self.req_port_o.data_be.eq(Const(0xFF, 8)), - self.req_port_o.data_size.eq(Const(0b11, 2)), - self.req_port_o.data_we.eq(0), - self.ptw_error_o.eq(0), - self.itlb_update_o.valid.eq(0), - self.dtlb_update_o.valid.eq(0), - - self.itlb_miss_o.eq(0), - self.dtlb_miss_o.eq(0), - ] - - # ------------ - # State Machine - # ------------ - - with m.FSM() as fsm: - - with m.State("IDLE"): - self.idle(m, is_instr_ptw, ptw_lvl, global_mapping, - ptw_pptr, vaddr, tlb_update_asid) - - with m.State("WAIT_GRANT"): - self.grant(m, tag_valid, data_rvalid) - - with m.State("PTE_LOOKUP"): - # we wait for the valid signal - with m.If(data_rvalid): - self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, - data_rvalid, global_mapping, - is_instr_ptw, ptw_pptr) - - # Propagate error to MMU/LSU - with m.State("PROPAGATE_ERROR"): - m.next = "IDLE" - m.d.comb += self.ptw_error_o.eq(1) - - # wait for the rvalid before going back to IDLE - with m.State("WAIT_RVALID"): - with m.If(data_rvalid): - m.next = "IDLE" - - m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata), - data_rvalid.eq(self.req_port_i.data_rvalid) - ] - - return m - - def set_grant_state(self, m): - # should we have flushed before we got an rvalid, - # wait for it until going back to IDLE - with m.If(self.flush_i): - with m.If (self.req_port_i.data_gnt): - m.next = "WAIT_RVALID" - with m.Else(): - m.next = "IDLE" - with m.Else(): - m.next = "WAIT_GRANT" - - def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping, - ptw_pptr, vaddr, tlb_update_asid): - # by default we start with the top-most page table - m.d.sync += [is_instr_ptw.eq(0), - ptw_lvl.eq(LVL1), - global_mapping.eq(0), - self.ptw_active_o.eq(0), # deactive (IDLE) - ] - # work out itlb/dtlb miss - m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \ - self.itlb_access_i & \ - ~self.itlb_hit_i & \ - ~self.dtlb_access_i) - m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \ - self.dtlb_access_i & \ - ~self.dtlb_hit_i) - # we got an ITLB miss? - with m.If(self.itlb_miss_o): - pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:39], - self.satp_ppn_i) - m.d.sync += [ptw_pptr.eq(pptr), - is_instr_ptw.eq(1), - vaddr.eq(self.itlb_vaddr_i), - tlb_update_asid.eq(self.asid_i), - ] - self.set_grant_state(m) - - # we got a DTLB miss? - with m.Elif(self.dtlb_miss_o): - pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39], - self.satp_ppn_i) - m.d.sync += [ptw_pptr.eq(pptr), - vaddr.eq(self.dtlb_vaddr_i), - tlb_update_asid.eq(self.asid_i), - ] - self.set_grant_state(m) - - def grant(self, m, tag_valid, data_rvalid): - # we've got a data WAIT_GRANT so tell the - # cache that the tag is valid - - # send a request out - m.d.comb += self.req_port_o.data_req.eq(1) - # wait for the WAIT_GRANT - with m.If(self.req_port_i.data_gnt): - # send the tag valid signal one cycle later - m.d.sync += tag_valid.eq(1) - # should we have flushed before we got an rvalid, - # wait for it until going back to IDLE - with m.If(self.flush_i): - with m.If (~data_rvalid): - m.next = "WAIT_RVALID" - with m.Else(): - m.next = "IDLE" - with m.Else(): - m.next = "PTE_LOOKUP" - - def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, - data_rvalid, global_mapping, - is_instr_ptw, ptw_pptr): - # temporaries - pte_rx = Signal(reset_less=True) - pte_exe = Signal(reset_less=True) - pte_inv = Signal(reset_less=True) - pte_a = Signal(reset_less=True) - st_wd = Signal(reset_less=True) - m.d.comb += [pte_rx.eq(pte.r | pte.x), - pte_exe.eq(~pte.x | ~pte.a), - pte_inv.eq(~pte.v | (~pte.r & pte.w)), - pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))), - st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))] - - l1err = Signal(reset_less=True) - l2err = Signal(reset_less=True) - m.d.comb += [l2err.eq((ptw_lvl2) & pte.ppn[0:9] != Const(0, 9)), - l1err.eq((ptw_lvl1) & pte.ppn[0:18] != Const(0, 18)) ] - - # check if the global mapping bit is set - with m.If (pte.g): - m.d.sync += global_mapping.eq(1) - - m.next = "IDLE" - - # ------------- - # Invalid PTE - # ------------- - # If pte.v = 0, or if pte.r = 0 and pte.w = 1, - # stop and raise a page-fault exception. - with m.If (pte_inv): - m.next = "PROPAGATE_ERROR" - - # ----------- - # Valid PTE - # ----------- - - # it is a valid PTE - # if pte.r = 1 or pte.x = 1 it is a valid PTE - with m.Elif (pte_rx): - # Valid translation found (either 1G, 2M or 4K) - with m.If(is_instr_ptw): - # ------------ - # Update ITLB - # ------------ - # If page not executable, we can directly raise error. - # This doesn't put a useless entry into the TLB. - # The same idea applies to the access flag since we let - # the access flag be managed by SW. - with m.If (pte_exe): - m.next = "IDLE" - with m.Else(): - m.d.comb += self.itlb_update_o.valid.eq(1) - - with m.Else(): - # ------------ - # Update DTLB - # ------------ - # Check if the access flag has been set, otherwise - # throw page-fault and let software handle those bits. - # If page not readable (there are no write-only pages) - # directly raise an error. This doesn't put a useless - # entry into the TLB. - with m.If(pte_a): - m.d.comb += self.dtlb_update_o.valid.eq(1) - with m.Else(): - m.next = "PROPAGATE_ERROR" - # Request is a store: perform additional checks - # If the request was a store and the page not - # write-able, raise an error - # the same applies if the dirty flag is not set - with m.If (st_wd): - m.d.comb += self.dtlb_update_o.valid.eq(0) - m.next = "PROPAGATE_ERROR" - - # check if the ppn is correctly aligned: Case (6) - with m.If(l1err | l2err): - m.next = "PROPAGATE_ERROR" - m.d.comb += [self.dtlb_update_o.valid.eq(0), - self.itlb_update_o.valid.eq(0)] - - # this is a pointer to the next TLB level - with m.Else(): - # pointer to next level of page table - with m.If (ptw_lvl1): - # we are in the second level now - pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn) - m.d.sync += [ptw_pptr.eq(pptr), - ptw_lvl.eq(LVL2) - ] - with m.If(ptw_lvl2): - # here we received a pointer to the third level - pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn) - m.d.sync += [ptw_pptr.eq(pptr), - ptw_lvl.eq(LVL3) - ] - self.set_grant_state(m) - - with m.If (ptw_lvl3): - # Should already be the last level - # page table => Error - m.d.sync += ptw_lvl.eq(LVL3) - m.next = "PROPAGATE_ERROR" - - -if __name__ == '__main__': - ptw = PTW() - vl = rtlil.convert(ptw, ports=ptw.ports()) - with open("test_ptw.il", "w") as f: - f.write(vl) diff --git a/TLB/src/ariane/src/tlb.py b/TLB/src/ariane/src/tlb.py deleted file mode 100644 index f768571e..00000000 --- a/TLB/src/ariane/src/tlb.py +++ /dev/null @@ -1,170 +0,0 @@ -""" -# Copyright 2018 ETH Zurich and University of Bologna. -# Copyright and related rights are licensed under the Solderpad Hardware -# License, Version 0.51 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# or agreed to in writing, software, hardware and materials distributed under -# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. -# -# Author: David Schaffenrath, TU Graz -# Author: Florian Zaruba, ETH Zurich -# Date: 21.4.2017 -# Description: Translation Lookaside Buffer, SV39 -# fully set-associative - -Implementation in c++: -https://raw.githubusercontent.com/Tony-Hu/TreePLRU/master/TreePLRU.cpp - -Text description: -https://people.cs.clemson.edu/~mark/464/p_lru.txt - -Online simulator: -http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/vm.html -""" -from math import log2 -from nmigen import Signal, Module, Cat, Const, Array -from nmigen.cli import verilog, rtlil -from nmigen.lib.coding import Encoder - -from ptw import TLBUpdate, PTE, ASID_WIDTH -from plru import PLRU -from tlb_content import TLBContent - -TLB_ENTRIES = 8 - -class TLB: - def __init__(self, tlb_entries=8, asid_width=8): - self.tlb_entries = tlb_entries - self.asid_width = asid_width - - self.flush_i = Signal() # Flush signal - # Lookup signals - self.lu_access_i = Signal() - self.lu_asid_i = Signal(self.asid_width) - self.lu_vaddr_i = Signal(64) - self.lu_content_o = PTE() - self.lu_is_2M_o = Signal() - self.lu_is_1G_o = Signal() - self.lu_hit_o = Signal() - # Update TLB - self.pte_width = len(self.lu_content_o.flatten()) - self.update_i = TLBUpdate(asid_width) - - def elaborate(self, platform): - m = Module() - - vpn2 = Signal(9) - vpn1 = Signal(9) - vpn0 = Signal(9) - - #------------- - # Translation - #------------- - - # SV39 defines three levels of page tables - m.d.comb += [ vpn0.eq(self.lu_vaddr_i[12:21]), - vpn1.eq(self.lu_vaddr_i[21:30]), - vpn2.eq(self.lu_vaddr_i[30:39]), - ] - - tc = [] - for i in range(self.tlb_entries): - tlc = TLBContent(self.pte_width, self.asid_width) - setattr(m.submodules, "tc%d" % i, tlc) - tc.append(tlc) - # connect inputs - tlc.update_i = self.update_i # saves a lot of graphviz links - m.d.comb += [tlc.vpn0.eq(vpn0), - tlc.vpn1.eq(vpn1), - tlc.vpn2.eq(vpn2), - tlc.flush_i.eq(self.flush_i), - #tlc.update_i.eq(self.update_i), - tlc.lu_asid_i.eq(self.lu_asid_i)] - tc = Array(tc) - - #-------------- - # Select hit - #-------------- - - # use Encoder to select hit index - # XXX TODO: assert that there's only one valid entry (one lu_hit) - hitsel = Encoder(self.tlb_entries) - m.submodules.hitsel = hitsel - - hits = [] - for i in range(self.tlb_entries): - hits.append(tc[i].lu_hit_o) - m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well) - idx = hitsel.o - - active = Signal(reset_less=True) - m.d.comb += active.eq(~hitsel.n) - with m.If(active): - # active hit, send selected as output - m.d.comb += [ self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o), - self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o), - self.lu_hit_o.eq(1), - self.lu_content_o.flatten().eq(tc[idx].lu_content_o), - ] - - #-------------- - # PLRU. - #-------------- - - p = PLRU(self.tlb_entries) - plru_tree = Signal(p.TLBSZ) - m.submodules.plru = p - - # connect PLRU inputs/outputs - # XXX TODO: assert that there's only one valid entry (one replace_en) - en = [] - for i in range(self.tlb_entries): - en.append(tc[i].replace_en_i) - m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags - p.lu_hit.eq(hitsel.i), - p.lu_access_i.eq(self.lu_access_i), - p.plru_tree.eq(plru_tree)] - m.d.sync += plru_tree.eq(p.plru_tree_o) - - #-------------- - # Sanity checks - #-------------- - - assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \ - "TLB size must be a multiple of 2 and greater than 1" - assert (self.asid_width >= 1), \ - "ASID width must be at least 1" - - return m - - """ - # Just for checking - function int countSetBits(logic[self.tlb_entries-1:0] vector); - automatic int count = 0; - foreach (vector[idx]) begin - count += vector[idx]; - end - return count; - endfunction - - assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1)) - else $error("More then one hit in TLB!"); $stop(); end - assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1)) - else $error("More then one TLB entry selected for next replace!"); - """ - - def ports(self): - return [self.flush_i, self.lu_access_i, - self.lu_asid_i, self.lu_vaddr_i, - self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o, - ] + self.lu_content_o.ports() + self.update_i.ports() - -if __name__ == '__main__': - tlb = TLB() - vl = rtlil.convert(tlb, ports=tlb.ports()) - with open("test_tlb.il", "w") as f: - f.write(vl) - diff --git a/TLB/src/ariane/src/tlb_content.py b/TLB/src/ariane/src/tlb_content.py deleted file mode 100644 index 024c5697..00000000 --- a/TLB/src/ariane/src/tlb_content.py +++ /dev/null @@ -1,125 +0,0 @@ -from nmigen import Signal, Module, Cat, Const - -from ptw import TLBUpdate, PTE - -class TLBEntry: - def __init__(self, asid_width): - self.asid = Signal(asid_width) - # SV39 defines three levels of page tables - self.vpn0 = Signal(9) - self.vpn1 = Signal(9) - self.vpn2 = Signal(9) - self.is_2M = Signal() - self.is_1G = Signal() - self.valid = Signal() - - def flatten(self): - return Cat(*self.ports()) - - def eq(self, x): - return self.flatten().eq(x.flatten()) - - def ports(self): - return [self.asid, self.vpn0, self.vpn1, self.vpn2, - self.is_2M, self.is_1G, self.valid] - -class TLBContent: - def __init__(self, pte_width, asid_width): - self.asid_width = asid_width - self.pte_width = pte_width - self.flush_i = Signal() # Flush signal - # Update TLB - self.update_i = TLBUpdate(asid_width) - self.vpn2 = Signal(9) - self.vpn1 = Signal(9) - self.vpn0 = Signal(9) - self.replace_en_i = Signal() # replace the following entry, - # set by replacement strategy - # Lookup signals - self.lu_asid_i = Signal(asid_width) - self.lu_content_o = Signal(pte_width) - self.lu_is_2M_o = Signal() - self.lu_is_1G_o = Signal() - self.lu_hit_o = Signal() - - def elaborate(self, platform): - m = Module() - - tags = TLBEntry(self.asid_width) - content = Signal(self.pte_width) - - m.d.comb += [self.lu_hit_o.eq(0), - self.lu_is_2M_o.eq(0), - self.lu_is_1G_o.eq(0)] - - # temporaries for 1st level match - asid_ok = Signal(reset_less=True) - vpn2_ok = Signal(reset_less=True) - tags_ok = Signal(reset_less=True) - vpn2_hit = Signal(reset_less=True) - m.d.comb += [tags_ok.eq(tags.valid), - asid_ok.eq(tags.asid == self.lu_asid_i), - vpn2_ok.eq(tags.vpn2 == self.vpn2), - vpn2_hit.eq(tags_ok & asid_ok & vpn2_ok)] - # temporaries for 2nd level match - vpn1_ok = Signal(reset_less=True) - tags_2M = Signal(reset_less=True) - vpn0_ok = Signal(reset_less=True) - vpn0_or_2M = Signal(reset_less=True) - m.d.comb += [vpn1_ok.eq(self.vpn1 == tags.vpn1), - tags_2M.eq(tags.is_2M), - vpn0_ok.eq(self.vpn0 == tags.vpn0), - vpn0_or_2M.eq(tags_2M | vpn0_ok)] - # first level match, this may be a giga page, - # check the ASID flags as well - with m.If(vpn2_hit): - # second level - with m.If (tags.is_1G): - m.d.comb += [ self.lu_content_o.eq(content), - self.lu_is_1G_o.eq(1), - self.lu_hit_o.eq(1), - ] - # not a giga page hit so check further - with m.Elif(vpn1_ok): - # this could be a 2 mega page hit or a 4 kB hit - # output accordingly - with m.If(vpn0_or_2M): - m.d.comb += [ self.lu_content_o.eq(content), - self.lu_is_2M_o.eq(tags.is_2M), - self.lu_hit_o.eq(1), - ] - # ------------------ - # Update or Flush - # ------------------ - - # temporaries - replace_valid = Signal(reset_less=True) - m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i) - - # flush - with m.If (self.flush_i): - # invalidate (flush) conditions: all if zero or just this ASID - with m.If (self.lu_asid_i == Const(0, self.asid_width) | - (self.lu_asid_i == tags.asid)): - m.d.sync += tags.valid.eq(0) - - # normal replacement - with m.Elif(replace_valid): - m.d.sync += [ # update tag array - tags.asid.eq(self.update_i.asid), - tags.vpn2.eq(self.update_i.vpn[18:27]), - tags.vpn1.eq(self.update_i.vpn[9:18]), - tags.vpn0.eq(self.update_i.vpn[0:9]), - tags.is_1G.eq(self.update_i.is_1G), - tags.is_2M.eq(self.update_i.is_2M), - tags.valid.eq(1), - # and content as well - content.eq(self.update_i.content.flatten()) - ] - return m - - def ports(self): - return [self.flush_i, - self.lu_asid_i, - self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o, - ] + self.update_i.content.ports() + self.update_i.ports() diff --git a/TLB/src/ariane/test/test_plru.py b/TLB/src/ariane/test/test_plru.py deleted file mode 100644 index 9b040e1d..00000000 --- a/TLB/src/ariane/test/test_plru.py +++ /dev/null @@ -1,15 +0,0 @@ -import sys -sys.path.append("../src") -sys.path.append("../../../TestUtil") - -from plru import PLRU - -from nmigen.compat.sim import run_simulation - -def testbench(dut): - yield - -if __name__ == "__main__": - dut = PLRU(4) - run_simulation(dut, testbench(dut), vcd_name="test_plru.vcd") - print("PLRU Unit Test Success") \ No newline at end of file diff --git a/TLB/src/ariane/test/test_ptw.py b/TLB/src/ariane/test/test_ptw.py deleted file mode 100644 index e9c5324c..00000000 --- a/TLB/src/ariane/test/test_ptw.py +++ /dev/null @@ -1,127 +0,0 @@ -import sys -sys.path.append("../src") -sys.path.append("../../../TestUtil") - -from nmigen.compat.sim import run_simulation - -from ptw import PTW, PTE - - -def testbench(dut): - - addr = 0x8000000 - - #pte = PTE() - #yield pte.v.eq(1) - #yield pte.r.eq(1) - - yield dut.req_port_i.data_gnt.eq(1) - yield dut.req_port_i.data_rvalid.eq(1) - yield dut.req_port_i.data_rdata.eq(0x43)#pte.flatten()) - - # data lookup - yield dut.en_ld_st_translation_i.eq(1) - yield dut.asid_i.eq(1) - - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(0x400000000) - - yield - yield - yield - - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(0x200000) - - yield - yield - yield - - yield dut.req_port_i.data_gnt.eq(0) - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(0x400000011) - - yield - yield dut.req_port_i.data_gnt.eq(1) - yield - yield - - # data lookup, PTW levels 1-2-3 - addr = 0x4000000 - yield dut.dtlb_vaddr_i.eq(addr) - yield dut.mxr_i.eq(0x1) - yield dut.req_port_i.data_gnt.eq(1) - yield dut.req_port_i.data_rvalid.eq(1) - yield dut.req_port_i.data_rdata.eq(0x41 | (addr>>12)<<10)#pte.flatten()) - - yield dut.en_ld_st_translation_i.eq(1) - yield dut.asid_i.eq(1) - - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(addr) - - yield - yield - yield - yield - yield - yield - yield - yield - - yield dut.req_port_i.data_gnt.eq(0) - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(0x400000011) - - yield - yield dut.req_port_i.data_gnt.eq(1) - yield - yield - yield - yield - - - # instruction lookup - yield dut.en_ld_st_translation_i.eq(0) - yield dut.enable_translation_i.eq(1) - yield dut.asid_i.eq(1) - - yield dut.itlb_access_i.eq(1) - yield dut.itlb_hit_i.eq(0) - yield dut.itlb_vaddr_i.eq(0x800000) - - yield - yield - yield - - yield dut.itlb_access_i.eq(1) - yield dut.itlb_hit_i.eq(0) - yield dut.itlb_vaddr_i.eq(0x200000) - - yield - yield - yield - - yield dut.req_port_i.data_gnt.eq(0) - yield dut.itlb_access_i.eq(1) - yield dut.itlb_hit_i.eq(0) - yield dut.itlb_vaddr_i.eq(0x800011) - - yield - yield dut.req_port_i.data_gnt.eq(1) - yield - yield - - yield - - - -if __name__ == "__main__": - dut = PTW() - run_simulation(dut, testbench(dut), vcd_name="test_ptw.vcd") - print("PTW Unit Test Success") diff --git a/TLB/src/ariane/test/test_tlb.py b/TLB/src/ariane/test/test_tlb.py deleted file mode 100644 index aab1d43c..00000000 --- a/TLB/src/ariane/test/test_tlb.py +++ /dev/null @@ -1,69 +0,0 @@ -import sys -sys.path.append("../src") -sys.path.append("../../../TestUtil") - -from nmigen.compat.sim import run_simulation - -from tlb import TLB - -def set_vaddr(addr): - yield dut.lu_vaddr_i.eq(addr) - yield dut.update_i.vpn.eq(addr>>12) - - -def testbench(dut): - yield dut.lu_access_i.eq(1) - yield dut.lu_asid_i.eq(1) - yield dut.update_i.valid.eq(1) - yield dut.update_i.is_1G.eq(0) - yield dut.update_i.is_2M.eq(0) - yield dut.update_i.asid.eq(1) - yield dut.update_i.content.ppn.eq(0) - yield dut.update_i.content.rsw.eq(0) - yield dut.update_i.content.r.eq(1) - - yield - - addr = 0x80000 - yield from set_vaddr(addr) - yield - - addr = 0x90001 - yield from set_vaddr(addr) - yield - - addr = 0x28000000 - yield from set_vaddr(addr) - yield - - addr = 0x28000001 - yield from set_vaddr(addr) - - addr = 0x28000001 - yield from set_vaddr(addr) - yield - - addr = 0x1000040000 - yield from set_vaddr(addr) - yield - - addr = 0x1000040001 - yield from set_vaddr(addr) - yield - - yield dut.update_i.is_1G.eq(1) - addr = 0x2040000 - yield from set_vaddr(addr) - yield - - yield dut.update_i.is_1G.eq(1) - addr = 0x2040001 - yield from set_vaddr(addr) - yield - - yield - - -if __name__ == "__main__": - dut = TLB() - run_simulation(dut, testbench(dut), vcd_name="test_tlb.vcd") diff --git a/TLB/test/__init__.py b/TLB/test/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/TLB/test/test_LFSR2.py b/TLB/test/test_LFSR2.py deleted file mode 100644 index 889a042f..00000000 --- a/TLB/test/test_LFSR2.py +++ /dev/null @@ -1,72 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -# See Notices.txt for copyright information -import sys -sys.path.append("../src") -sys.path.append("../../TestUtil") -from LFSR import LFSR, LFSRPolynomial, LFSR_POLY_3 - -from nmigen.back.pysim import Simulator, Delay, Tick -import unittest - - -class TestLFSR(unittest.TestCase): - def test_poly(self): - v = LFSRPolynomial() - self.assertEqual(repr(v), "LFSRPolynomial([0])") - self.assertEqual(str(v), "1") - v = LFSRPolynomial([1]) - self.assertEqual(repr(v), "LFSRPolynomial([1, 0])") - self.assertEqual(str(v), "x + 1") - v = LFSRPolynomial([0, 1]) - self.assertEqual(repr(v), "LFSRPolynomial([1, 0])") - self.assertEqual(str(v), "x + 1") - v = LFSRPolynomial([1, 2]) - self.assertEqual(repr(v), "LFSRPolynomial([2, 1, 0])") - self.assertEqual(str(v), "x^2 + x + 1") - v = LFSRPolynomial([2]) - self.assertEqual(repr(v), "LFSRPolynomial([2, 0])") - self.assertEqual(str(v), "x^2 + 1") - self.assertEqual(str(LFSR_POLY_3), "x^3 + x^2 + 1") - - def test_lfsr_3(self): - module = LFSR(LFSR_POLY_3) - traces = [module.state, module.enable] - with Simulator(module, - vcd_file=open("Waveforms/test_LFSR2.vcd", "w"), - gtkw_file=open("Waveforms/test_LFSR2.gtkw", "w"), - traces=traces) as sim: - sim.add_clock(1e-6, 0.25e-6) - delay = Delay(1e-7) - - def async_process(): - yield module.enable.eq(0) - yield Tick() - self.assertEqual((yield module.state), 0x1) - yield Tick() - self.assertEqual((yield module.state), 0x1) - yield module.enable.eq(1) - yield Tick() - yield delay - self.assertEqual((yield module.state), 0x2) - yield Tick() - yield delay - self.assertEqual((yield module.state), 0x5) - yield Tick() - yield delay - self.assertEqual((yield module.state), 0x3) - yield Tick() - yield delay - self.assertEqual((yield module.state), 0x7) - yield Tick() - yield delay - self.assertEqual((yield module.state), 0x6) - yield Tick() - yield delay - self.assertEqual((yield module.state), 0x4) - yield Tick() - yield delay - self.assertEqual((yield module.state), 0x1) - yield Tick() - - sim.add_process(async_process) - sim.run() diff --git a/TLB/test/test_address_encoder.py b/TLB/test/test_address_encoder.py deleted file mode 100644 index 29537136..00000000 --- a/TLB/test/test_address_encoder.py +++ /dev/null @@ -1,107 +0,0 @@ -import sys -sys.path.append("../src") -sys.path.append("../../TestUtil") - -from nmigen.compat.sim import run_simulation - -from AddressEncoder import AddressEncoder - -from test_helper import assert_eq, assert_ne, assert_op - -# This function allows for the easy setting of values to the AddressEncoder -# Arguments: -# dut: The AddressEncoder being tested -# i (Input): The array of single bits to be written -def set_encoder(dut, i): - yield dut.i.eq(i) - yield - -# Checks the single match of the AddressEncoder -# Arguments: -# dut: The AddressEncoder being tested -# sm (Single Match): The expected match result -# op (Operation): (0 => ==), (1 => !=) -def check_single_match(dut, sm, op): - out_sm = yield dut.single_match - assert_op("Single Match", out_sm, sm, op) - -# Checks the multiple match of the AddressEncoder -# Arguments: -# dut: The AddressEncoder being tested -# mm (Multiple Match): The expected match result -# op (Operation): (0 => ==), (1 => !=) -def check_multiple_match(dut, mm, op): - out_mm = yield dut.multiple_match - assert_op("Multiple Match", out_mm, mm, op) - -# Checks the output of the AddressEncoder -# Arguments: -# dut: The AddressEncoder being tested -# o (Output): The expected output -# op (Operation): (0 => ==), (1 => !=) -def check_output(dut, o, op): - out_o = yield dut.o - assert_op("Output", out_o, o, op) - -# Checks the state of the AddressEncoder -# Arguments: -# dut: The AddressEncoder being tested -# sm (Single Match): The expected match result -# mm (Multiple Match): The expected match result -# o (Output): The expected output -# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) -# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) -# o_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) -def check_all(dut, sm, mm, o, sm_op, mm_op, o_op): - yield from check_single_match(dut, sm, sm_op) - yield from check_multiple_match(dut, mm, mm_op) - yield from check_output(dut, o, o_op) - -def testbench(dut): - # Check invalid input - in_val = 0b000 - single_match = 0 - multiple_match = 0 - output = 0 - yield from set_encoder(dut, in_val) - yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) - - # Check single bit - in_val = 0b001 - single_match = 1 - multiple_match = 0 - output = 0 - yield from set_encoder(dut, in_val) - yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) - - # Check another single bit - in_val = 0b100 - single_match = 1 - multiple_match = 0 - output = 2 - yield from set_encoder(dut, in_val) - yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) - - # Check multiple match - # We expected the lowest bit to be returned which is address 0 - in_val = 0b101 - single_match = 0 - multiple_match = 1 - output = 0 - yield from set_encoder(dut, in_val) - yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) - - # Check another multiple match - # We expected the lowest bit to be returned which is address 1 - in_val = 0b110 - single_match = 0 - multiple_match = 1 - output = 1 - yield from set_encoder(dut, in_val) - yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) - -if __name__ == "__main__": - dut = AddressEncoder(4) - run_simulation(dut, testbench(dut), - vcd_name="Waveforms/test_address_encoder.vcd") - print("AddressEncoder Unit Test Success") diff --git a/TLB/test/test_cam.py b/TLB/test/test_cam.py deleted file mode 100644 index 0047f538..00000000 --- a/TLB/test/test_cam.py +++ /dev/null @@ -1,207 +0,0 @@ -import sys -sys.path.append("../src") -sys.path.append("../../TestUtil") - -from nmigen.compat.sim import run_simulation - -from Cam import Cam - -from test_helper import assert_eq, assert_ne, assert_op - -# This function allows for the easy setting of values to the Cam -# Arguments: -# dut: The Cam being tested -# e (Enable): Whether the block is going to be enabled -# we (Write Enable): Whether the Cam will write on the next cycle -# a (Address): Where the data will be written if write enable is high -# d (Data): Either what we are looking for or will write to the address -def set_cam(dut, e, we, a, d): - yield dut.enable.eq(e) - yield dut.write_enable.eq(we) - yield dut.address_in.eq(a) - yield dut.data_in.eq(d) - yield - -# Checks the multiple match of the Cam -# Arguments: -# dut: The Cam being tested -# mm (Multiple Match): The expected match result -# op (Operation): (0 => ==), (1 => !=) -def check_multiple_match(dut, mm, op): - out_mm = yield dut.multiple_match - assert_op("Multiple Match", out_mm, mm, op) - -# Checks the single match of the Cam -# Arguments: -# dut: The Cam being tested -# sm (Single Match): The expected match result -# op (Operation): (0 => ==), (1 => !=) -def check_single_match(dut, sm, op): - out_sm = yield dut.single_match - assert_op("Single Match", out_sm, sm, op) - -# Checks the address output of the Cam -# Arguments: -# dut: The Cam being tested -# ma (Match Address): The expected match result -# op (Operation): (0 => ==), (1 => !=) -def check_match_address(dut, ma, op): - out_ma = yield dut.match_address - assert_op("Match Address", out_ma, ma, op) - -# Checks the state of the Cam -# Arguments: -# dut: The Cam being tested -# sm (Single Match): The expected match result -# mm (Multiple Match): The expected match result -# ma: (Match Address): The expected address output -# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) -# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) -# ma_op (Operation): Operation for the address assertion (0 => ==), (1 => !=) -def check_all(dut, mm, sm, ma, mm_op, sm_op, ma_op): - yield from check_multiple_match(dut, mm, mm_op) - yield from check_single_match(dut, sm, sm_op) - yield from check_match_address(dut, ma, ma_op) - -def testbench(dut): - # NA - enable = 0 - write_enable = 0 - address = 0 - data = 0 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_single_match(dut, single_match, 0) - - # Read Miss Multiple - # Note that the default starting entry data bits are all 0 - enable = 1 - write_enable = 0 - address = 0 - data = 0 - multiple_match = 1 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_multiple_match(dut, multiple_match, 0) - - # Read Miss - # Note that the default starting entry data bits are all 0 - enable = 1 - write_enable = 0 - address = 0 - data = 1 - multiple_match = 0 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_single_match(dut, single_match, 0) - - # Write Entry 0 - enable = 1 - write_enable = 1 - address = 0 - data = 4 - multiple_match = 0 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_single_match(dut, single_match, 0) - - # Read Hit Entry 0 - enable = 1 - write_enable = 0 - address = 0 - data = 4 - multiple_match = 0 - single_match = 1 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0) - - # Search Hit - enable = 1 - write_enable = 0 - address = 0 - data = 4 - multiple_match = 0 - single_match = 1 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0) - - # Search Miss - enable = 1 - write_enable = 0 - address = 0 - data = 5 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_single_match(dut, single_match, 0) - - # Multiple Match test - # Write Entry 1 - enable = 1 - write_enable = 1 - address = 1 - data = 5 - multiple_match = 0 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_single_match(dut, single_match, 0) - - # Write Entry 2 - # Same data as Entry 1 - enable = 1 - write_enable = 1 - address = 2 - data = 5 - multiple_match = 0 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_single_match(dut, single_match, 0) - - # Read Hit Data 5 - enable = 1 - write_enable = 0 - address = 1 - data = 5 - multiple_match = 1 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_all(dut, multiple_match, single_match, address,0,0,0) - - # Verify read_warning is not caused - # Write Entry 0 - enable = 1 - write_enable = 1 - address = 0 - data = 7 - multiple_match = 0 - single_match = 0 - yield from set_cam(dut, enable, write_enable, address, data) - # Note there is no yield we immediately attempt to read in the next cycle - - # Read Hit Data 7 - enable = 1 - write_enable = 0 - address = 0 - data = 7 - multiple_match = 0 - single_match = 1 - yield from set_cam(dut, enable, write_enable, address, data) - yield - yield from check_single_match(dut, single_match, 0) - - yield - - -if __name__ == "__main__": - dut = Cam(4, 4) - run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_cam.vcd") - print("Cam Unit Test Success") diff --git a/TLB/test/test_cam_entry.py b/TLB/test/test_cam_entry.py deleted file mode 100644 index 7fcd7942..00000000 --- a/TLB/test/test_cam_entry.py +++ /dev/null @@ -1,108 +0,0 @@ -import sys -sys.path.append("../src") -sys.path.append("../../TestUtil") - -from nmigen.compat.sim import run_simulation - -from test_helper import assert_eq, assert_ne, assert_op -from CamEntry import CamEntry - -# This function allows for the easy setting of values to the Cam Entry -# Arguments: -# dut: The CamEntry being tested -# c (command): NA (0), Read (1), Write (2), Reserve (3) -# d (data): The data to be set -def set_cam_entry(dut, c, d): - # Write desired values - yield dut.command.eq(c) - yield dut.data_in.eq(d) - yield - # Reset all lines - yield dut.command.eq(0) - yield dut.data_in.eq(0) - yield - -# Checks the data state of the CAM entry -# Arguments: -# dut: The CamEntry being tested -# d (Data): The expected data -# op (Operation): (0 => ==), (1 => !=) -def check_data(dut, d, op): - out_d = yield dut.data - assert_op("Data", out_d, d, op) - -# Checks the match state of the CAM entry -# Arguments: -# dut: The CamEntry being tested -# m (Match): The expected match -# op (Operation): (0 => ==), (1 => !=) -def check_match(dut, m, op): - out_m = yield dut.match - assert_op("Match", out_m, m, op) - -# Checks the state of the CAM entry -# Arguments: -# dut: The CamEntry being tested -# d (data): The expected data -# m (match): The expected match -# d_op (Operation): Operation for the data assertion (0 => ==), (1 => !=) -# m_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) -def check_all(dut, d, m, d_op, m_op): - yield from check_data(dut, d, d_op) - yield from check_match(dut, m, m_op) - -# This testbench goes through the paces of testing the CamEntry module -# It is done by writing and then reading various combinations of key/data pairs -# and reading the results with varying keys to verify the resulting stored -# data is correct. -def testbench(dut): - # Check write - command = 2 - data = 1 - match = 0 - yield from set_cam_entry(dut, command, data) - yield from check_all(dut, data, match, 0, 0) - - # Check read miss - command = 1 - data = 2 - match = 0 - yield from set_cam_entry(dut, command, data) - yield from check_all(dut, data, match, 1, 0) - - # Check read hit - command = 1 - data = 1 - match = 1 - yield from set_cam_entry(dut, command, data) - yield from check_all(dut, data, match, 0, 0) - - # Check overwrite - command = 2 - data = 5 - match = 0 - yield from set_cam_entry(dut, command, data) - yield - yield from check_all(dut, data, match, 0, 0) - - # Check read hit - command = 1 - data = 5 - match = 1 - yield from set_cam_entry(dut, command, data) - yield from check_all(dut, data, match, 0, 0) - - # Check reset - command = 3 - data = 0 - match = 0 - yield from set_cam_entry(dut, command, data) - yield from check_all(dut, data, match, 0, 0) - - # Extra clock cycle for waveform - yield - -if __name__ == "__main__": - dut = CamEntry(4) - run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_cam_entry.vcd") - print("CamEntry Unit Test Success") diff --git a/TLB/test/test_lfsr.py b/TLB/test/test_lfsr.py deleted file mode 100644 index 0b476adc..00000000 --- a/TLB/test/test_lfsr.py +++ /dev/null @@ -1,30 +0,0 @@ -import sys -sys.path.append("../src") -sys.path.append("../../TestUtil") - -from nmigen.compat.sim import run_simulation - -from LFSR import LFSR - -from test_helper import assert_eq, assert_ne, assert_op - -def testbench(dut): - yield dut.enable.eq(1) - yield dut.o.eq(9) - yield - yield - yield - yield - yield - yield - yield - yield - yield - yield - yield - yield - -if __name__ == "__main__": - dut = LFSR() - run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_lfsr.vcd") - print("LFSR Unit Test Success") \ No newline at end of file diff --git a/TLB/test/test_permission_validator.py b/TLB/test/test_permission_validator.py deleted file mode 100644 index 59750c51..00000000 --- a/TLB/test/test_permission_validator.py +++ /dev/null @@ -1,145 +0,0 @@ -import sys -sys.path.append("../src") -sys.path.append("../../TestUtil") - -from nmigen.compat.sim import run_simulation - -from PermissionValidator import PermissionValidator - -from test_helper import assert_op - -def set_validator(dut, d, xwr, sm, sa, asid): - yield dut.data.eq(d) - yield dut.xwr.eq(xwr) - yield dut.super_mode.eq(sm) - yield dut.super_access.eq(sa) - yield dut.asid.eq(asid) - yield - -def check_valid(dut, v, op): - out_v = yield dut.valid - assert_op("Valid", out_v, v, op) - -def testbench(dut): - # 80 bits represented. Ignore the MSB as it will be truncated - # ASID is bits first 4 hex values (bits 64 - 78) - - # Test user mode entry valid - # Global Bit matching ASID - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000031 - # Ignore MSB it will be truncated - asid = 0x7FFF - super_mode = 0 - super_access = 0 - xwr = 0 - valid = 1 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test user mode entry valid - # Global Bit nonmatching ASID - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000031 - # Ignore MSB it will be truncated - asid = 0x7FF6 - super_mode = 0 - super_access = 0 - xwr = 0 - valid = 1 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test user mode entry invalid - # Global Bit nonmatching ASID - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000021 - # Ignore MSB it will be truncated - asid = 0x7FF6 - super_mode = 0 - super_access = 0 - xwr = 0 - valid = 0 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test user mode entry valid - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000011 - # Ignore MSB it will be truncated - asid = 0x7FFF - super_mode = 0 - super_access = 0 - xwr = 0 - valid = 1 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test user mode entry invalid - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000011 - # Ignore MSB it will be truncated - asid = 0x7FF6 - super_mode = 0 - super_access = 0 - xwr = 0 - valid = 0 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test supervisor mode entry valid - # The entry is NOT in user mode - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000001 - # Ignore MSB it will be truncated - asid = 0x7FFF - super_mode = 1 - super_access = 0 - xwr = 0 - valid = 1 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test supervisor mode entry invalid - # The entry is in user mode - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000011 - # Ignore MSB it will be truncated - asid = 0x7FFF - super_mode = 1 - super_access = 0 - xwr = 0 - valid = 0 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test supervisor mode entry valid - # The entry is NOT in user mode with access - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000001 - # Ignore MSB it will be truncated - asid = 0x7FFF - super_mode = 1 - super_access = 1 - xwr = 0 - valid = 1 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - - # Test supervisor mode entry valid - # The entry is in user mode with access - # Ensure that user mode and valid is enabled! - data = 0x7FFF0000000000000011 - # Ignore MSB it will be truncated - asid = 0x7FFF - super_mode = 1 - super_access = 1 - xwr = 0 - valid = 1 - yield from set_validator(dut, data, xwr, super_mode, super_access, asid) - yield from check_valid(dut, valid, 0) - -if __name__ == "__main__": - dut = PermissionValidator(15, 64); - run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_permission_validator.vcd") - print("PermissionValidator Unit Test Success") diff --git a/TLB/test/test_pte_entry.py b/TLB/test/test_pte_entry.py deleted file mode 100644 index 5faa0bf1..00000000 --- a/TLB/test/test_pte_entry.py +++ /dev/null @@ -1,103 +0,0 @@ -import sys -sys.path.append("../src") -sys.path.append("../../TestUtil") - -from nmigen.compat.sim import run_simulation - -from PteEntry import PteEntry - -from test_helper import assert_op - -def set_entry(dut, i): - yield dut.i.eq(i) - yield - -def check_dirty(dut, d, op): - out_d = yield dut.d - assert_op("Dirty", out_d, d, op) - -def check_accessed(dut, a, op): - out_a = yield dut.a - assert_op("Accessed", out_a, a, op) - -def check_global(dut, o, op): - out = yield dut.g - assert_op("Global", out, o, op) - -def check_user(dut, o, op): - out = yield dut.u - assert_op("User Mode", out, o, op) - -def check_xwr(dut, o, op): - out = yield dut.xwr - assert_op("XWR", out, o, op) - -def check_asid(dut, o, op): - out = yield dut.asid - assert_op("ASID", out, o, op) - -def check_pte(dut, o, op): - out = yield dut.pte - assert_op("ASID", out, o, op) - -def check_valid(dut, v, op): - out_v = yield dut.v - assert_op("Valid", out_v, v, op) - -def check_all(dut, d, a, g, u, xwr, v, asid, pte): - yield from check_dirty(dut, d, 0) - yield from check_accessed(dut, a, 0) - yield from check_global(dut, g, 0) - yield from check_user(dut, u, 0) - yield from check_xwr(dut, xwr, 0) - yield from check_asid(dut, asid, 0) - yield from check_pte(dut, pte, 0) - yield from check_valid(dut, v, 0) - -def testbench(dut): - # 80 bits represented. Ignore the MSB as it will be truncated - # ASID is bits first 4 hex values (bits 64 - 78) - - i = 0x7FFF0000000000000031 - dirty = 0 - access = 0 - glob = 1 - user = 1 - xwr = 0 - valid = 1 - asid = 0x7FFF - pte = 0x0000000000000031 - yield from set_entry(dut, i) - yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte) - - i = 0x0FFF00000000000000FF - dirty = 1 - access = 1 - glob = 1 - user = 1 - xwr = 7 - valid = 1 - asid = 0x0FFF - pte = 0x00000000000000FF - yield from set_entry(dut, i) - yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte) - - i = 0x0721000000001100001F - dirty = 0 - access = 0 - glob = 0 - user = 1 - xwr = 7 - valid = 1 - asid = 0x0721 - pte = 0x000000001100001F - yield from set_entry(dut, i) - yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte) - - yield - - -if __name__ == "__main__": - dut = PteEntry(15, 64); - run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_pte_entry.vcd") - print("PteEntry Unit Test Success") \ No newline at end of file diff --git a/TLB/test/test_set_associative_cache.py b/TLB/test/test_set_associative_cache.py deleted file mode 100644 index d681425f..00000000 --- a/TLB/test/test_set_associative_cache.py +++ /dev/null @@ -1,39 +0,0 @@ -import sys -sys.path.append("../src") -sys.path.append("../../TestUtil") - -from nmigen.compat.sim import run_simulation - -from SetAssociativeCache import SetAssociativeCache - -from test_helper import assert_eq, assert_ne, assert_op - -def set_sac(dut, e, c, s, t, d): - yield dut.enable.eq(e) - yield dut.command.eq(c) - yield dut.cset.eq(s) - yield dut.tag.eq(t) - yield dut.data_i.eq(d) - yield - -def testbench(dut): - enable = 1 - command = 2 - cset = 1 - tag = 2 - data = 3 - yield from set_sac(dut, enable, command, cset, tag, data) - yield - - enable = 1 - command = 2 - cset = 1 - tag = 5 - data = 8 - yield from set_sac(dut, enable, command, cset, tag, data) - yield - -if __name__ == "__main__": - dut = SetAssociativeCache(4, 4, 4, 4) - run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_set_associative_cache.vcd") - print("Set Associative Cache Unit Test Success") diff --git a/TestUtil/test_helper.py b/TestUtil/test_helper.py deleted file mode 100644 index d22124b8..00000000 --- a/TestUtil/test_helper.py +++ /dev/null @@ -1,27 +0,0 @@ -# Verifies the given values given the particular operand -# Arguments: -# p (Prefix): Appended to the front of the assert statement -# e (Expected): The expected value -# o (Output): The output result -# op (Operation): (0 => ==), (1 => !=) -def assert_op(pre, o, e, op): - if op == 0: - assert_eq(pre, o, e) - else: - assert_ne(pre, o, e) - -# Verifies the given values are equal -# Arguments: -# p (Prefix): Appended to the front of the assert statement -# e (Expected): The expected value -# o (Output): The output result -def assert_eq(p, o, e): - assert o == e, p + " Output " + str(o) + " Expected " + str(e) - -# Verifies the given values are not equal -# Arguments: -# p (Prefix): Appended to the front of the assert statement -# e (Expected): The expected value -# o (Output): The output result -def assert_ne(p, o, e): - assert o != e, p + " Output " + str(o) + " Not Expecting " + str(e) \ No newline at end of file diff --git a/scoreboard/dependence_cell.py b/scoreboard/dependence_cell.py deleted file mode 100644 index 18e8d755..00000000 --- a/scoreboard/dependence_cell.py +++ /dev/null @@ -1,109 +0,0 @@ -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil -from nmigen import Module, Signal, Elaboratable -from nmutil.latch import SRLatch - - -class DependenceCell(Elaboratable): - """ implements 11.4.7 mitch alsup dependence cell, p27 - """ - def __init__(self): - # inputs - self.dest_i = Signal(reset_less=True) # Dest in (top) - self.src1_i = Signal(reset_less=True) # oper1 in (top) - self.src2_i = Signal(reset_less=True) # oper2 in (top) - self.issue_i = Signal(reset_less=True) # Issue in (top) - - self.go_write_i = Signal(reset_less=True) # Go Write in (left) - self.go_read_i = Signal(reset_less=True) # Go Read in (left) - - # for Register File Select Lines (vertical) - self.dest_rsel_o = Signal(reset_less=True) # dest reg sel (bottom) - self.src1_rsel_o = Signal(reset_less=True) # src1 reg sel (bottom) - self.src2_rsel_o = Signal(reset_less=True) # src2 reg sel (bottom) - - # for Function Unit "forward progress" (horizontal) - self.dest_fwd_o = Signal(reset_less=True) # dest FU fw (right) - self.src1_fwd_o = Signal(reset_less=True) # src1 FU fw (right) - self.src2_fwd_o = Signal(reset_less=True) # src2 FU fw (right) - - def elaborate(self, platform): - m = Module() - m.submodules.dest_l = dest_l = SRLatch() - m.submodules.src1_l = src1_l = SRLatch() - m.submodules.src2_l = src2_l = SRLatch() - - # destination latch: reset on go_write HI, set on dest and issue - m.d.comb += dest_l.s.eq(self.issue_i & self.dest_i) - m.d.comb += dest_l.r.eq(self.go_write_i) - - # src1 latch: reset on go_read HI, set on src1_i and issue - m.d.comb += src1_l.s.eq(self.issue_i & self.src1_i) - m.d.comb += src1_l.r.eq(self.go_read_i) - - # src2 latch: reset on go_read HI, set on op2_i and issue - m.d.comb += src2_l.s.eq(self.issue_i & self.src2_i) - m.d.comb += src2_l.r.eq(self.go_read_i) - - # FU "Forward Progress" (read out horizontally) - m.d.comb += self.dest_fwd_o.eq(dest_l.qn & self.dest_i) - m.d.comb += self.src1_fwd_o.eq(src1_l.qn & self.src1_i) - m.d.comb += self.src2_fwd_o.eq(src2_l.qn & self.src2_i) - - # Register File Select (read out vertically) - m.d.comb += self.dest_rsel_o.eq(dest_l.qn & self.go_write_i) - m.d.comb += self.src1_rsel_o.eq(src1_l.qn & self.go_read_i) - m.d.comb += self.src2_rsel_o.eq(src2_l.qn & self.go_read_i) - - return m - - def __iter__(self): - yield self.dest_i - yield self.src1_i - yield self.src2_i - yield self.issue_i - yield self.go_write_i - yield self.go_read_i - yield self.dest_rsel_o - yield self.src1_rsel_o - yield self.src2_rsel_o - yield self.dest_fwd_o - yield self.src1_fwd_o - yield self.src2_fwd_o - - def ports(self): - return list(self) - - -def dcell_sim(dut): - yield dut.dest_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.src1_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield - yield - yield dut.issue_i.eq(0) - yield - yield dut.go_read_i.eq(1) - yield - yield dut.go_read_i.eq(0) - yield - yield dut.go_write_i.eq(1) - yield - yield dut.go_write_i.eq(0) - yield - -def test_dcell(): - dut = DependenceCell() - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_dcell.il", "w") as f: - f.write(vl) - - run_simulation(dut, dcell_sim(dut), vcd_name='test_dcell.vcd') - -if __name__ == '__main__': - test_dcell() diff --git a/scoreboard/fn_unit.py b/scoreboard/fn_unit.py deleted file mode 100644 index b2ef9468..00000000 --- a/scoreboard/fn_unit.py +++ /dev/null @@ -1,327 +0,0 @@ -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil -from nmigen import Module, Signal, Cat, Array, Const, Elaboratable -from nmutil.latch import SRLatch -from nmigen.lib.coding import Decoder - -from shadow_fn import ShadowFn - - -class FnUnit(Elaboratable): - """ implements 11.4.8 function unit, p31 - also implements optional shadowing 11.5.1, p55 - - shadowing can be used for branches as well as exceptions (interrupts), - load/store hold (exceptions again), and vector-element predication - (once the predicate is known, which it may not be at instruction issue) - - Inputs - - * :wid: register file width - * :shadow_wid: number of shadow/fail/good/go_die sets - * :n_dests: number of destination regfile(s) (index: rfile_sel_i) - * :wr_pend: if true, writable observes the g_wr_pend_i vector - otherwise observes g_rd_pend_i - - notes: - - * dest_i / src1_i / src2_i are in *binary*, whereas... - * ...g_rd_pend_i / g_wr_pend_i and rd_pend_o / wr_pend_o are UNARY - * req_rel_i (request release) is the direct equivalent of pipeline - "output valid" (valid_o) - * recover is a local python variable (actually go_die_o) - * when shadow_wid = 0, recover and shadown are Consts (i.e. do nothing) - * wr_pend is set False for the majority of uses: however for - use in a STORE Function Unit it is set to True - """ - def __init__(self, wid, shadow_wid=0, n_dests=1, wr_pend=False): - self.reg_width = wid - self.n_dests = n_dests - self.shadow_wid = shadow_wid - self.wr_pend = wr_pend - - # inputs - if n_dests > 1: - self.rfile_sel_i = Signal(max=n_dests, reset_less=True) - else: - self.rfile_sel_i = Const(0) # no selection. gets Array[0] - self.dest_i = Signal(max=wid, reset_less=True) # Dest R# in (top) - self.src1_i = Signal(max=wid, reset_less=True) # oper1 R# in (top) - self.src2_i = Signal(max=wid, reset_less=True) # oper2 R# in (top) - self.issue_i = Signal(reset_less=True) # Issue in (top) - - self.go_write_i = Signal(reset_less=True) # Go Write in (left) - self.go_read_i = Signal(reset_less=True) # Go Read in (left) - self.req_rel_i = Signal(reset_less=True) # request release (left) - - self.g_xx_pend_i = Array(Signal(wid, reset_less=True, name="g_pend_i") \ - for i in range(n_dests)) # global rd (right) - self.g_wr_pend_i = Signal(wid, reset_less=True) # global wr (right) - - if shadow_wid: - self.shadow_i = Signal(shadow_wid, reset_less=True) - self.s_fail_i = Signal(shadow_wid, reset_less=True) - self.s_good_i = Signal(shadow_wid, reset_less=True) - self.go_die_o = Signal(reset_less=True) - - # outputs - self.readable_o = Signal(reset_less=True) # Readable out (right) - self.writable_o = Array(Signal(reset_less=True, name="writable_o") \ - for i in range(n_dests)) # writable out (right) - self.busy_o = Signal(reset_less=True) # busy out (left) - - self.rd_pend_o = Signal(wid, reset_less=True) # rd pending (right) - self.xx_pend_o = Array(Signal(wid, reset_less=True, name="pend_o") \ - for i in range(n_dests))# wr pending (right) - - def elaborate(self, platform): - m = Module() - m.submodules.rd_l = rd_l = SRLatch(sync=False) - m.submodules.wr_l = wr_l = SRLatch(sync=False) - m.submodules.dest_d = dest_d = Decoder(self.reg_width) - m.submodules.src1_d = src1_d = Decoder(self.reg_width) - m.submodules.src2_d = src2_d = Decoder(self.reg_width) - s_latches = [] - for i in range(self.shadow_wid): - sh = ShadowFn() - setattr(m.submodules, "shadow%d" % i, sh) - s_latches.append(sh) - - # shadow / recover (optional: shadow_wid > 0) - if self.shadow_wid: - recover = self.go_die_o - shadown = Signal(reset_less=True) - i_l = [] - fail_l = [] - good_l = [] - shi_l = [] - sho_l = [] - rec_l = [] - # get list of latch signals. really must be a better way to do this - for l in s_latches: - i_l.append(l.issue_i) - shi_l.append(l.shadow_i) - fail_l.append(l.s_fail_i) - good_l.append(l.s_good_i) - sho_l.append(l.shadow_o) - rec_l.append(l.recover_o) - m.d.comb += Cat(*i_l).eq(self.issue_i) - m.d.comb += Cat(*fail_l).eq(self.s_fail_i) - m.d.comb += Cat(*good_l).eq(self.s_good_i) - m.d.comb += Cat(*shi_l).eq(self.shadow_i) - m.d.comb += shadown.eq(~(Cat(*sho_l).bool())) - m.d.comb += recover.eq(Cat(*rec_l).bool()) - else: - shadown = Const(1) - recover = Const(0) - - # selector - xx_pend_o = self.xx_pend_o[self.rfile_sel_i] - writable_o = self.writable_o[self.rfile_sel_i] - g_pend_i = self.g_xx_pend_i[self.rfile_sel_i] - - for i in range(self.n_dests): - m.d.comb += self.xx_pend_o[i].eq(0) # initialise all array - m.d.comb += self.writable_o[i].eq(0) # to zero - - # go_write latch: reset on go_write HI, set on issue - m.d.comb += wr_l.s.eq(self.issue_i) - m.d.comb += wr_l.r.eq(self.go_write_i | recover) - - # src1 latch: reset on go_read HI, set on issue - m.d.comb += rd_l.s.eq(self.issue_i) - m.d.comb += rd_l.r.eq(self.go_read_i | recover) - - # dest decoder: write-pending out - m.d.comb += dest_d.i.eq(self.dest_i) - m.d.comb += dest_d.n.eq(wr_l.qn) # decode is inverted - m.d.comb += self.busy_o.eq(wr_l.q) # busy if set - m.d.comb += xx_pend_o.eq(dest_d.o) - - # src1/src2 decoder: read-pending out - m.d.comb += src1_d.i.eq(self.src1_i) - m.d.comb += src1_d.n.eq(rd_l.qn) # decode is inverted - m.d.comb += src2_d.i.eq(self.src2_i) - m.d.comb += src2_d.n.eq(rd_l.qn) # decode is inverted - m.d.comb += self.rd_pend_o.eq(src1_d.o | src2_d.o) - - # readable output signal - g_rd = Signal(self.reg_width, reset_less=True) - m.d.comb += g_rd.eq(self.g_wr_pend_i & self.rd_pend_o) - m.d.comb += self.readable_o.eq(g_rd.bool()) - - # writable output signal - g_wr_v = Signal(self.reg_width, reset_less=True) - g_wr = Signal(reset_less=True) - wo = Signal(reset_less=True) - m.d.comb += g_wr_v.eq(g_pend_i & xx_pend_o) - m.d.comb += g_wr.eq(~g_wr_v.bool()) - m.d.comb += wo.eq(g_wr & rd_l.q & self.req_rel_i & shadown) - m.d.comb += writable_o.eq(wo) - - return m - - def __iter__(self): - yield self.dest_i - yield self.src1_i - yield self.src2_i - yield self.issue_i - yield self.go_write_i - yield self.go_read_i - yield self.req_rel_i - yield from self.g_xx_pend_i - yield self.g_wr_pend_i - yield self.readable_o - yield from self.writable_o - yield self.rd_pend_o - yield from self.xx_pend_o - - def ports(self): - return list(self) - -############# ############### -# --- --- # -# --- renamed / redirected from base class --- # -# --- --- # -# --- below are convenience classes which match the names --- # -# --- of the various mitch alsup book chapter gate diagrams --- # -# --- --- # -############# ############### - - -class IntFnUnit(FnUnit): - def __init__(self, wid, shadow_wid=0): - FnUnit.__init__(self, wid, shadow_wid) - self.int_rd_pend_o = self.rd_pend_o - self.int_wr_pend_o = self.xx_pend_o[0] - self.g_int_wr_pend_i = self.g_wr_pend_i - self.g_int_rd_pend_i = self.g_xx_pend_i[0] - self.int_readable_o = self.readable_o - self.int_writable_o = self.writable_o[0] - - self.int_rd_pend_o.name = "int_rd_pend_o" - self.int_wr_pend_o.name = "int_wr_pend_o" - self.g_int_rd_pend_i.name = "g_int_rd_pend_i" - self.g_int_wr_pend_i.name = "g_int_wr_pend_i" - self.int_readable_o.name = "int_readable_o" - self.int_writable_o.name = "int_writable_o" - - -class FPFnUnit(FnUnit): - def __init__(self, wid, shadow_wid=0): - FnUnit.__init__(self, wid, shadow_wid) - self.fp_rd_pend_o = self.rd_pend_o - self.fp_wr_pend_o = self.xx_pend_o[0] - self.g_fp_wr_pend_i = self.g_wr_pend_i - self.g_fp_rd_pend_i = self.g_xx_pend_i[0] - self.fp_writable_o = self.writable_o[0] - self.fp_readable_o = self.readable_o - - self.fp_rd_pend_o.name = "fp_rd_pend_o" - self.fp_wr_pend_o.name = "fp_wr_pend_o" - self.g_fp_rd_pend_i.name = "g_fp_rd_pend_i" - self.g_fp_wr_pend_i.name = "g_fp_wr_pend_i" - self.fp_writable_o.name = "fp_writable_o" - self.fp_readable_o.name = "fp_readable_o" - - -class LDFnUnit(FnUnit): - """ number of dest selectors: 2. assumes len(int_regfile) == len(fp_regfile) - * when rfile_sel_i == 0, int_wr_pend_o is set - * when rfile_sel_i == 1, fp_wr_pend_o is set - """ - def __init__(self, wid, shadow_wid=0): - FnUnit.__init__(self, wid, shadow_wid, n_dests=2) - self.int_rd_pend_o = self.rd_pend_o - self.int_wr_pend_o = self.xx_pend_o[0] - self.fp_wr_pend_o = self.xx_pend_o[1] - self.g_int_wr_pend_i = self.g_wr_pend_i - self.g_int_rd_pend_i = self.g_xx_pend_i[0] - self.g_fp_rd_pend_i = self.g_xx_pend_i[1] - self.int_readable_o = self.readable_o - self.int_writable_o = self.writable_o[0] - self.fp_writable_o = self.writable_o[1] - - self.int_rd_pend_o.name = "int_rd_pend_o" - self.int_wr_pend_o.name = "int_wr_pend_o" - self.fp_wr_pend_o.name = "fp_wr_pend_o" - self.g_int_wr_pend_i.name = "g_int_wr_pend_i" - self.g_int_rd_pend_i.name = "g_int_rd_pend_i" - self.g_fp_rd_pend_i.name = "g_fp_rd_pend_i" - self.int_readable_o.name = "int_readable_o" - self.int_writable_o.name = "int_writable_o" - self.fp_writable_o.name = "fp_writable_o" - - -class STFnUnit(FnUnit): - """ number of dest selectors: 2. assumes len(int_regfile) == len(fp_regfile) - * wr_pend=False indicates to observe global fp write pending - * when rfile_sel_i == 0, int_wr_pend_o is set - * when rfile_sel_i == 1, fp_wr_pend_o is set - * - """ - def __init__(self, wid, shadow_wid=0): - FnUnit.__init__(self, wid, shadow_wid, n_dests=2, wr_pend=True) - self.int_rd_pend_o = self.rd_pend_o # 1st int read-pending vector - self.int2_rd_pend_o = self.xx_pend_o[0] # 2nd int read-pending vector - self.fp_rd_pend_o = self.xx_pend_o[1] # 1x FP read-pending vector - # yes overwrite FnUnit base class g_wr_pend_i vector - self.g_int_wr_pend_i = self.g_wr_pend_i = self.g_xx_pend_i[0] - self.g_fp_wr_pend_i = self.g_xx_pend_i[1] - self.int_readable_o = self.readable_o - self.int_writable_o = self.writable_o[0] - self.fp_writable_o = self.writable_o[1] - - self.int_rd_pend_o.name = "int_rd_pend_o" - self.int2_rd_pend_o.name = "int2_rd_pend_o" - self.fp_rd_pend_o.name = "fp_rd_pend_o" - self.g_int_wr_pend_i.name = "g_int_wr_pend_i" - self.g_fp_wr_pend_i.name = "g_fp_wr_pend_i" - self.int_readable_o.name = "int_readable_o" - self.int_writable_o.name = "int_writable_o" - self.fp_writable_o.name = "fp_writable_o" - - - -def int_fn_unit_sim(dut): - yield dut.dest_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.src1_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield - yield - yield dut.issue_i.eq(0) - yield - yield dut.go_read_i.eq(1) - yield - yield dut.go_read_i.eq(0) - yield - yield dut.go_write_i.eq(1) - yield - yield dut.go_write_i.eq(0) - yield - -def test_int_fn_unit(): - dut = FnUnit(32, 2, 2) - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_fn_unit.il", "w") as f: - f.write(vl) - - dut = LDFnUnit(32, 2) - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_ld_fn_unit.il", "w") as f: - f.write(vl) - - dut = STFnUnit(32, 0) - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_st_fn_unit.il", "w") as f: - f.write(vl) - - run_simulation(dut, int_fn_unit_sim(dut), vcd_name='test_fn_unit.vcd') - -if __name__ == '__main__': - test_int_fn_unit() diff --git a/scoreboard/fu_dep_cell.py b/scoreboard/fu_dep_cell.py deleted file mode 100644 index 93ef28d3..00000000 --- a/scoreboard/fu_dep_cell.py +++ /dev/null @@ -1,84 +0,0 @@ -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil -from nmigen import Module, Signal, Elaboratable -from nmutil.latch import SRLatch - - -class FUDependenceCell(Elaboratable): - """ implements 11.4.7 mitch alsup dependence cell, p27 - """ - def __init__(self): - # inputs - self.rd_pend_i = Signal(reset_less=True) # read pending in (left) - self.wr_pend_i = Signal(reset_less=True) # write pending in (left) - self.issue_i = Signal(reset_less=True) # Issue in (top) - - self.go_write_i = Signal(reset_less=True) # Go Write in (left) - self.go_read_i = Signal(reset_less=True) # Go Read in (left) - - # outputs (latched rd/wr pend) - self.rd_pend_o = Signal(reset_less=True) # read pending out (right) - self.wr_pend_o = Signal(reset_less=True) # write pending out (right) - - def elaborate(self, platform): - m = Module() - m.submodules.rd_l = rd_l = SRLatch() - m.submodules.wr_l = wr_l = SRLatch() - - # write latch: reset on go_write HI, set on write pending and issue - m.d.comb += wr_l.s.eq(self.issue_i & self.wr_pend_i) - m.d.comb += wr_l.r.eq(self.go_write_i) - - # read latch: reset on go_read HI, set on read pending and issue - m.d.comb += rd_l.s.eq(self.issue_i & self.rd_pend_i) - m.d.comb += rd_l.r.eq(self.go_read_i) - - # Read/Write Pending Latches (read out horizontally) - m.d.comb += self.wr_pend_o.eq(wr_l.qn) - m.d.comb += self.rd_pend_o.eq(rd_l.qn) - - return m - - def __iter__(self): - yield self.rd_pend_i - yield self.wr_pend_i - yield self.issue_i - yield self.go_write_i - yield self.go_read_i - yield self.rd_pend_o - yield self.wr_pend_o - - def ports(self): - return list(self) - - -def dcell_sim(dut): - yield dut.dest_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.src1_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.go_read_i.eq(1) - yield - yield dut.go_read_i.eq(0) - yield - yield dut.go_write_i.eq(1) - yield - yield dut.go_write_i.eq(0) - yield - -def test_dcell(): - dut = FUDependenceCell() - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_fu_dcell.il", "w") as f: - f.write(vl) - - run_simulation(dut, dcell_sim(dut), vcd_name='test_fu_dcell.vcd') - -if __name__ == '__main__': - test_dcell() diff --git a/scoreboard/fu_fu_matrix.py b/scoreboard/fu_fu_matrix.py deleted file mode 100644 index 6ffd4442..00000000 --- a/scoreboard/fu_fu_matrix.py +++ /dev/null @@ -1,157 +0,0 @@ -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil -from nmigen import Module, Signal, Elaboratable, Array, Cat - -#from nmutil.latch import SRLatch -from fu_dep_cell import FUDependenceCell -from fu_picker_vec import FU_Pick_Vec - -""" - - 6600 Function Unit Dependency Table Matrix inputs / outputs - ----------------------------------------------------------- - -""" - -class FUFUDepMatrix(Elaboratable): - """ implements 11.4.7 mitch alsup FU-to-Reg Dependency Matrix, p26 - """ - def __init__(self, n_fu_row, n_fu_col): - self.n_fu_row = n_fu_row # Y (FU row#) ^v - self.n_fu_col = n_fu_col # X (FU col #) <> - self.rd_pend_i = Signal(n_fu_row, reset_less=True) # Rd pending (left) - self.wr_pend_i = Signal(n_fu_row, reset_less=True) # Wr pending (left) - self.issue_i = Signal(n_fu_col, reset_less=True) # Issue in (top) - - self.go_write_i = Signal(n_fu_row, reset_less=True) # Go Write in (left) - self.go_read_i = Signal(n_fu_row, reset_less=True) # Go Read in (left) - - # for Function Unit Readable/Writable (horizontal) - self.readable_o = Signal(n_fu_col, reset_less=True) # readable (bot) - self.writable_o = Signal(n_fu_col, reset_less=True) # writable (bot) - - def elaborate(self, platform): - m = Module() - - # --- - # matrix of dependency cells - # --- - dm = Array(Array(FUDependenceCell() for r in range(self.n_fu_row)) \ - for f in range(self.n_fu_col)) - for x in range(self.n_fu_col): - for y in range(self.n_fu_row): - setattr(m.submodules, "dm_fx%d_fy%d" % (x, y), dm[x][y]) - - # --- - # array of Function Unit Readable/Writable: row-length, horizontal - # --- - fur = Array(FU_Pick_Vec(self.n_fu_row) for r in range(self.n_fu_col)) - for x in range(self.n_fu_col): - setattr(m.submodules, "fur_x%d" % (x), fur[x]) - - # --- - # connect FU Readable/Writable vector - # --- - readable = [] - writable = [] - for x in range(self.n_fu_col): - fu = fur[x] - rd_pend_o = [] - wr_pend_o = [] - for y in range(self.n_fu_row): - dc = dm[x][y] - # accumulate cell outputs rd/wr-pending - rd_pend_o.append(dc.rd_pend_o) - wr_pend_o.append(dc.wr_pend_o) - # connect cell reg-select outputs to Reg Vector In - m.d.comb += [fu.rd_pend_i.eq(Cat(*rd_pend_o)), - fu.wr_pend_i.eq(Cat(*wr_pend_o)), - ] - # accumulate Readable/Writable Vector outputs - readable.append(fu.readable_o) - writable.append(fu.writable_o) - - # ... and output them from this module (horizontal, width=REGs) - m.d.comb += self.readable_o.eq(Cat(*readable)) - m.d.comb += self.writable_o.eq(Cat(*writable)) - - # --- - # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i - # --- - for y in range(self.n_fu_row): - issue_i = [] - for x in range(self.n_fu_col): - dc = dm[x][y] - # accumulate cell inputs issue - issue_i.append(dc.issue_i) - # wire up inputs from module to row cell inputs (Cat is gooood) - m.d.comb += Cat(*issue_i).eq(self.issue_i) - - # --- - # connect Matrix go_read_i/go_write_i to module readable/writable - # --- - for x in range(self.n_fu_col): - go_read_i = [] - go_write_i = [] - rd_pend_i = [] - wr_pend_i = [] - for y in range(self.n_fu_row): - dc = dm[x][y] - # accumulate cell rd_pend/wr_pend/go_read/go_write - rd_pend_i.append(dc.rd_pend_i) - wr_pend_i.append(dc.wr_pend_i) - go_read_i.append(dc.go_read_i) - go_write_i.append(dc.go_write_i) - # wire up inputs from module to row cell inputs (Cat is gooood) - m.d.comb += [Cat(*go_read_i).eq(self.go_read_i), - Cat(*go_write_i).eq(self.go_write_i), - Cat(*rd_pend_i).eq(self.rd_pend_i), - Cat(*wr_pend_i).eq(self.wr_pend_i), - ] - - return m - - def __iter__(self): - yield self.rd_pend_i - yield self.wr_pend_i - yield self.issue_i - yield self.go_write_i - yield self.go_read_i - yield self.readable_o - yield self.writable_o - - def ports(self): - return list(self) - -def d_matrix_sim(dut): - """ XXX TODO - """ - yield dut.dest_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.src1_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.go_read_i.eq(1) - yield - yield dut.go_read_i.eq(0) - yield - yield dut.go_write_i.eq(1) - yield - yield dut.go_write_i.eq(0) - yield - -def test_fu_fu_matrix(): - dut = FUFUDepMatrix(n_fu_row=3, n_fu_col=4) - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_fu_fu_matrix.il", "w") as f: - f.write(vl) - - run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_fu_matrix.vcd') - -if __name__ == '__main__': - test_fu_fu_matrix() diff --git a/scoreboard/fu_picker_vec.py b/scoreboard/fu_picker_vec.py deleted file mode 100644 index fd44c45f..00000000 --- a/scoreboard/fu_picker_vec.py +++ /dev/null @@ -1,21 +0,0 @@ -from nmigen import Elaboratable, Module, Signal, Cat - - -class FU_Pick_Vec(Elaboratable): - """ these are allocated per-FU (horizontally), - and are of length fu_row_n - """ - def __init__(self, fu_row_n): - self.fu_row_n = fu_row_n - self.rd_pend_i = Signal(fu_row_n, reset_less=True) - self.wr_pend_i = Signal(fu_row_n, reset_less=True) - - self.readable_o = Signal(reset_less=True) - self.writable_o = Signal(reset_less=True) - - def elaborate(self, platform): - m = Module() - m.d.comb += self.readable_o.eq(self.rd_pend_i.bool()) - m.d.comb += self.writable_o.eq(self.wr_pend_i.bool()) - return m - diff --git a/scoreboard/fu_reg_matrix.py b/scoreboard/fu_reg_matrix.py deleted file mode 100644 index 0826ea56..00000000 --- a/scoreboard/fu_reg_matrix.py +++ /dev/null @@ -1,225 +0,0 @@ -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil -from nmigen import Module, Signal, Elaboratable, Array, Cat - -#from nmutil.latch import SRLatch -from dependence_cell import DependenceCell -from fu_wr_pending import FU_RW_Pend -from reg_select import Reg_Rsv - -""" - - 6600 Dependency Table Matrix inputs / outputs - --------------------------------------------- - - d s1 s2 i d s1 s2 i d s1 s2 i d s1 s2 i - | | | | | | | | | | | | | | | | - v v v v v v v v v v v v v v v v - go_rd/go_wr -> dm-r0-fu0 dm-r1-fu0 dm-r2-fu0 dm-r3-fu0 -> wr/rd-pend - go_rd/go_wr -> dm-r0-fu1 dm-r1-fu1 dm-r2-fu1 dm-r3-fu1 -> wr/rd-pend - go_rd/go_wr -> dm-r0-fu2 dm-r1-fu2 dm-r2-fu2 dm-r3-fu2 -> wr/rd-pend - | | | | | | | | | | | | - v v v v v v v v v v v v - d s1 s2 d s1 s2 d s1 s2 d s1 s2 - reg sel reg sel reg sel reg sel - -""" - -class FURegDepMatrix(Elaboratable): - """ implements 11.4.7 mitch alsup FU-to-Reg Dependency Matrix, p26 - """ - def __init__(self, n_fu_row, n_reg_col): - self.n_fu_row = n_fu_row # Y (FUs) ^v - self.n_reg_col = n_reg_col # X (Regs) <> - self.dest_i = Signal(n_reg_col, reset_less=True) # Dest in (top) - self.src1_i = Signal(n_reg_col, reset_less=True) # oper1 in (top) - self.src2_i = Signal(n_reg_col, reset_less=True) # oper2 in (top) - self.issue_i = Signal(n_reg_col, reset_less=True) # Issue in (top) - - self.go_write_i = Signal(n_fu_row, reset_less=True) # Go Write in (left) - self.go_read_i = Signal(n_fu_row, reset_less=True) # Go Read in (left) - - # for Register File Select Lines (horizontal), per-reg - self.dest_rsel_o = Signal(n_reg_col, reset_less=True) # dest reg (bot) - self.src1_rsel_o = Signal(n_reg_col, reset_less=True) # src1 reg (bot) - self.src2_rsel_o = Signal(n_reg_col, reset_less=True) # src2 reg (bot) - - # for Function Unit "forward progress" (vertical), per-FU - self.wr_pend_o = Signal(n_fu_row, reset_less=True) # wr pending (right) - self.rd_pend_o = Signal(n_fu_row, reset_less=True) # rd pending (right) - - def elaborate(self, platform): - m = Module() - - # --- - # matrix of dependency cells - # --- - dm = Array(Array(DependenceCell() for r in range(self.n_fu_row)) \ - for f in range(self.n_reg_col)) - for rn in range(self.n_reg_col): - for fu in range(self.n_fu_row): - setattr(m.submodules, "dm_r%d_fu%d" % (rn, fu), dm[rn][fu]) - - # --- - # array of Function Unit Pending vectors - # --- - fupend = Array(FU_RW_Pend(self.n_reg_col) for f in range(self.n_fu_row)) - for fu in range(self.n_fu_row): - setattr(m.submodules, "fu_fu%d" % (fu), fupend[fu]) - - # --- - # array of Register Reservation vectors - # --- - regrsv = Array(Reg_Rsv(self.n_fu_row) for r in range(self.n_reg_col)) - for rn in range(self.n_reg_col): - setattr(m.submodules, "rr_r%d" % (rn), regrsv[rn]) - - # --- - # connect Function Unit vector - # --- - wr_pend = [] - rd_pend = [] - for fu in range(self.n_fu_row): - fup = fupend[fu] - dest_fwd_o = [] - src1_fwd_o = [] - src2_fwd_o = [] - for rn in range(self.n_reg_col): - dc = dm[rn][fu] - # accumulate cell fwd outputs for dest/src1/src2 - dest_fwd_o.append(dc.dest_fwd_o) - src1_fwd_o.append(dc.src1_fwd_o) - src2_fwd_o.append(dc.src2_fwd_o) - # connect cell fwd outputs to FU Vector in [Cat is gooood] - m.d.comb += [fup.dest_fwd_i.eq(Cat(*dest_fwd_o)), - fup.src1_fwd_i.eq(Cat(*src1_fwd_o)), - fup.src2_fwd_i.eq(Cat(*src2_fwd_o)) - ] - # accumulate FU Vector outputs - wr_pend.append(fup.reg_wr_pend_o) - rd_pend.append(fup.reg_rd_pend_o) - - # ... and output them from this module (vertical, width=FUs) - m.d.comb += self.wr_pend_o.eq(Cat(*wr_pend)) - m.d.comb += self.rd_pend_o.eq(Cat(*rd_pend)) - - # --- - # connect Reg Selection vector - # --- - dest_rsel = [] - src1_rsel = [] - src2_rsel = [] - for rn in range(self.n_reg_col): - rsv = regrsv[rn] - dest_rsel_o = [] - src1_rsel_o = [] - src2_rsel_o = [] - for fu in range(self.n_fu_row): - dc = dm[rn][fu] - # accumulate cell reg-select outputs dest/src1/src2 - dest_rsel_o.append(dc.dest_rsel_o) - src1_rsel_o.append(dc.src1_rsel_o) - src2_rsel_o.append(dc.src2_rsel_o) - # connect cell reg-select outputs to Reg Vector In - m.d.comb += [rsv.dest_rsel_i.eq(Cat(*dest_rsel_o)), - rsv.src1_rsel_i.eq(Cat(*src1_rsel_o)), - rsv.src2_rsel_i.eq(Cat(*src2_rsel_o)), - ] - # accumulate Reg-Sel Vector outputs - dest_rsel.append(rsv.dest_rsel_o) - src1_rsel.append(rsv.src1_rsel_o) - src2_rsel.append(rsv.src2_rsel_o) - - # ... and output them from this module (horizontal, width=REGs) - m.d.comb += self.dest_rsel_o.eq(Cat(*dest_rsel)) - m.d.comb += self.src1_rsel_o.eq(Cat(*src1_rsel)) - m.d.comb += self.src2_rsel_o.eq(Cat(*src2_rsel)) - - # --- - # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i - # --- - for rn in range(self.n_reg_col): - dest_i = [] - src1_i = [] - src2_i = [] - issue_i = [] - for fu in range(self.n_fu_row): - dc = dm[rn][fu] - # accumulate cell inputs dest/src1/src2 - dest_i.append(dc.dest_i) - src1_i.append(dc.src1_i) - src2_i.append(dc.src2_i) - issue_i.append(dc.issue_i) - # wire up inputs from module to row cell inputs (Cat is gooood) - m.d.comb += [Cat(*dest_i).eq(self.dest_i), - Cat(*src1_i).eq(self.src1_i), - Cat(*src2_i).eq(self.src2_i), - Cat(*issue_i).eq(self.issue_i), - ] - - # --- - # connect Dependency Matrix go_read_i/go_write_i to module go_rd/go_wr - # --- - for fu in range(self.n_fu_row): - go_read_i = [] - go_write_i = [] - for rn in range(self.n_reg_col): - dc = dm[rn][fu] - # accumulate cell fwd outputs for dest/src1/src2 - go_read_i.append(dc.go_read_i) - go_write_i.append(dc.go_write_i) - # wire up inputs from module to row cell inputs (Cat is gooood) - m.d.comb += [Cat(*go_read_i).eq(self.go_read_i), - Cat(*go_write_i).eq(self.go_write_i), - ] - - return m - - def __iter__(self): - yield self.dest_i - yield self.src1_i - yield self.src2_i - yield self.issue_i - yield self.go_write_i - yield self.go_read_i - yield self.dest_rsel_o - yield self.src1_rsel_o - yield self.src2_rsel_o - yield self.wr_pend_o - yield self.rd_pend_o - - def ports(self): - return list(self) - -def d_matrix_sim(dut): - """ XXX TODO - """ - yield dut.dest_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.src1_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.go_read_i.eq(1) - yield - yield dut.go_read_i.eq(0) - yield - yield dut.go_write_i.eq(1) - yield - yield dut.go_write_i.eq(0) - yield - -def test_d_matrix(): - dut = FURegDepMatrix(n_fu_row=3, n_reg_col=4) - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_fu_reg_matrix.il", "w") as f: - f.write(vl) - - run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_reg_matrix.vcd') - -if __name__ == '__main__': - test_d_matrix() diff --git a/scoreboard/fu_wr_pending.py b/scoreboard/fu_wr_pending.py deleted file mode 100644 index 9b177ff0..00000000 --- a/scoreboard/fu_wr_pending.py +++ /dev/null @@ -1,23 +0,0 @@ -from nmigen import Elaboratable, Module, Signal, Cat - - -class FU_RW_Pend(Elaboratable): - """ these are allocated per-FU (horizontally), - and are of length reg_count - """ - def __init__(self, reg_count): - self.reg_count = reg_count - self.dest_fwd_i = Signal(reg_count, reset_less=True) - self.src1_fwd_i = Signal(reg_count, reset_less=True) - self.src2_fwd_i = Signal(reg_count, reset_less=True) - - self.reg_wr_pend_o = Signal(reset_less=True) - self.reg_rd_pend_o = Signal(reset_less=True) - - def elaborate(self, platform): - m = Module() - srces = Cat(self.src1_fwd_i, self.src2_fwd_i) - m.d.comb += self.reg_wr_pend_o.eq(self.dest_fwd_i.bool()) - m.d.comb += self.reg_rd_pend_o.eq(srces.bool()) - return m - diff --git a/scoreboard/global_pending.py b/scoreboard/global_pending.py deleted file mode 100644 index 50e43378..00000000 --- a/scoreboard/global_pending.py +++ /dev/null @@ -1,93 +0,0 @@ -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil -from nmigen import Module, Signal, Cat, Elaboratable -from nmutil.latch import SRLatch -from nmigen.lib.coding import Decoder - - -class GlobalPending(Elaboratable): - """ implements Global Pending Vector, basically ORs all incoming Function - Unit vectors together. Can be used for creating Read or Write Global - Pending. Can be used for INT or FP Global Pending. - - Inputs: - * :wid: register file width - * :fu_vecs: a python list of function unit "pending" vectors, each - vector being a Signal of width equal to the reg file. - - Notes: - - * the regfile may be Int or FP, this code doesn't care which. - obviously do not try to put in a mixture of regfiles into fu_vecs. - * this code also doesn't care if it's used for Read Pending or Write - pending, it can be used for both: again, obviously, do not try to - put in a mixture of read *and* write pending vectors in. - * if some Function Units happen not to be uniform (don't operate - on a particular register (extremely unusual), they must set a Const - zero bit in the vector. - """ - def __init__(self, wid, fu_vecs): - self.reg_width = wid - # inputs - self.fu_vecs = fu_vecs - for v in fu_vecs: - assert len(v) == wid, "FU Vector must be same width as regfile" - - self.g_pend_o = Signal(wid, reset_less=True) # global pending vector - - def elaborate(self, platform): - m = Module() - - pend_l = [] - for i in range(self.reg_width): # per-register - vec_bit_l = [] - for v in self.fu_vecs: - vec_bit_l.append(v[i]) # fu bit for same register - pend_l.append(Cat(*vec_bit_l).bool()) # OR all bits for same reg - m.d.comb += self.g_pend_o.eq(Cat(*pend_l)) # merge all OR'd bits - - return m - - def __iter__(self): - yield from self.fu_vecs - yield self.g_pend_o - - def ports(self): - return list(self) - - -def g_vec_sim(dut): - yield dut.dest_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.src1_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield - yield - yield dut.issue_i.eq(0) - yield - yield dut.go_read_i.eq(1) - yield - yield dut.go_read_i.eq(0) - yield - yield dut.go_write_i.eq(1) - yield - yield dut.go_write_i.eq(0) - yield - -def test_g_vec(): - vecs = [] - for i in range(3): - vecs.append(Signal(32, name="fu%d" % i)) - dut = GlobalPending(32, vecs) - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_global_pending.il", "w") as f: - f.write(vl) - - run_simulation(dut, g_vec_sim(dut), vcd_name='test_global_pending.vcd') - -if __name__ == '__main__': - test_g_vec() diff --git a/scoreboard/group_picker.py b/scoreboard/group_picker.py deleted file mode 100644 index 8f959a18..00000000 --- a/scoreboard/group_picker.py +++ /dev/null @@ -1,111 +0,0 @@ -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil -from nmigen import Module, Signal, Cat, Elaboratable - - -class PriorityPicker(Elaboratable): - """ implements a priority-picker. input: N bits, output: N bits - """ - def __init__(self, wid): - self.wid = wid - # inputs - self.i = Signal(wid, reset_less=True) - self.o = Signal(wid, reset_less=True) - - def elaborate(self, platform): - m = Module() - - res = [] - for i in range(0, self.wid): - tmp = Signal(reset_less = True) - if i == 0: - m.d.comb += tmp.eq(self.i[0]) - else: - m.d.comb += tmp.eq((~tmp) & self.i[i]) - res.append(tmp) - - # we like Cat(*xxx). turn lists into concatenated bits - m.d.comb += self.o.eq(Cat(*res)) - - return m - - def __iter__(self): - yield self.i - yield self.o - - def ports(self): - return list(self) - - -class GroupPicker(Elaboratable): - """ implements 10.5 mitch alsup group picker, p27 - """ - def __init__(self, wid): - self.gp_wid = wid - # inputs - self.readable_i = Signal(wid, reset_less=True) # readable in (top) - self.writable_i = Signal(wid, reset_less=True) # writable in (top) - self.rel_req_i = Signal(wid, reset_less=True) # release request in (top) - - # outputs - self.go_rd_o = Signal(wid, reset_less=True) # go read (bottom) - self.go_wr_o = Signal(wid, reset_less=True) # go write (bottom) - - def elaborate(self, platform): - m = Module() - - m.submodules.rpick = rpick = PriorityPicker(self.gp_wid) - m.submodules.wpick = wpick = PriorityPicker(self.gp_wid) - - # combine release (output ready signal) with writeable - m.d.comb += wpick.i.eq(self.writable_i & self.rel_req_i) - m.d.comb += self.go_wr_o.eq(wpick.o) - - m.d.comb += rpick.i.eq(self.readable_i) - m.d.comb += self.go_rd_o.eq(rpick.o) - - return m - - def __iter__(self): - yield self.readable_i - yield self.writable_i - yield self.rel_req_i - yield self.go_rd_o - yield self.go_wr_o - - def ports(self): - return list(self) - - -def grp_pick_sim(dut): - yield dut.dest_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.src1_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield - yield - yield dut.issue_i.eq(0) - yield - yield dut.go_read_i.eq(1) - yield - yield dut.go_read_i.eq(0) - yield - yield dut.go_write_i.eq(1) - yield - yield dut.go_write_i.eq(0) - yield - -def test_grp_pick(): - dut = GroupPicker(4) - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_grp_pick.il", "w") as f: - f.write(vl) - - run_simulation(dut, grp_pick_sim(dut), vcd_name='test_grp_pick.vcd') - -if __name__ == '__main__': - test_grp_pick() diff --git a/scoreboard/issue_unit.py b/scoreboard/issue_unit.py deleted file mode 100644 index d1f58d11..00000000 --- a/scoreboard/issue_unit.py +++ /dev/null @@ -1,143 +0,0 @@ -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil -from nmigen import Module, Signal, Cat, Array, Const, Record, Elaboratable -from nmutil.latch import SRLatch -from nmigen.lib.coding import Decoder - -from shadow_fn import ShadowFn - - -class IssueUnit(Elaboratable): - """ implements 11.4.14 issue unit, p50 - - Inputs - - * :wid: register file width - * :n_insns: number of instructions in this issue unit. - """ - def __init__(self, wid, n_insns): - self.reg_width = wid - self.n_insns = n_insns - - # inputs - self.store_i = Signal(reset_less=True) # instruction is a store - self.dest_i = Signal(max=wid, reset_less=True) # Dest R# in - self.src1_i = Signal(max=wid, reset_less=True) # oper1 R# in - self.src2_i = Signal(max=wid, reset_less=True) # oper2 R# in - - self.g_wr_pend_i = Signal(wid, reset_less=True) # write pending vector - - self.insn_i = Array(Signal(reset_less=True, name="insn_i") \ - for i in range(n_insns)) - self.busy_i = Array(Signal(reset_less=True, name="busy_i") \ - for i in range(n_insns)) - - # outputs - self.fn_issue_o = Array(Signal(reset_less=True, name="fn_issue_o") \ - for i in range(n_insns)) - self.g_issue_o = Signal(reset_less=True) - - def elaborate(self, platform): - m = Module() - m.submodules.dest_d = dest_d = Decoder(self.reg_width) - - # temporaries - waw_stall = Signal(reset_less=True) - fu_stall = Signal(reset_less=True) - pend = Signal(self.reg_width, reset_less=True) - - # dest decoder: write-pending - m.d.comb += dest_d.i.eq(self.dest_i) - m.d.comb += dest_d.n.eq(~self.store_i) # decode is inverted - m.d.comb += pend.eq(dest_d.o & self.g_wr_pend_i) - m.d.comb += waw_stall.eq(pend.bool()) - - ib_l = [] - for i in range(self.n_insns): - ib_l.append(self.insn_i[i] & self.busy_i[i]) - m.d.comb += fu_stall.eq(Cat(*ib_l).bool()) - m.d.comb += self.g_issue_o.eq(~(waw_stall | fu_stall)) - for i in range(self.n_insns): - m.d.comb += self.fn_issue_o[i].eq(self.g_issue_o & self.insn_i[i]) - - return m - - def __iter__(self): - yield self.store_i - yield self.dest_i - yield self.src1_i - yield self.src2_i - yield self.g_wr_pend_i - yield from self.insn_i - yield from self.busy_i - yield from self.fn_issue_o - yield self.g_issue_o - - def ports(self): - return list(self) - - -class IntFPIssueUnit(Elaboratable): - def __init__(self, wid, n_int_insns, n_fp_insns): - self.i = IssueUnit(wid, n_int_insns) - self.f = IssueUnit(wid, n_fp_insns) - self.issue_o = Signal(reset_less=True) - - # some renames - self.int_write_pending_i = self.i.g_wr_pend_i - self.fp_write_pending_i = self.f.g_wr_pend_i - self.int_write_pending_i.name = 'int_write_pending_i' - self.fp_write_pending_i.name = 'fp_write_pending_i' - - def elaborate(self, platform): - m = Module() - m.submodules.intissue = self.i - m.submodules.fpissue = self.f - - m.d.comb += self.issue_o.eq(self.i.g_issue_o | self.f.g_issue_o) - - return m - - def ports(self): - yield self.issue_o - yield from self.i - yield from self.f - - -def issue_unit_sim(dut): - yield dut.dest_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.src1_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield - yield - yield dut.issue_i.eq(0) - yield - yield dut.go_read_i.eq(1) - yield - yield dut.go_read_i.eq(0) - yield - yield dut.go_write_i.eq(1) - yield - yield dut.go_write_i.eq(0) - yield - -def test_issue_unit(): - dut = IssueUnit(32, 3) - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_issue_unit.il", "w") as f: - f.write(vl) - - dut = IntFPIssueUnit(32, 3, 3) - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_intfp_issue_unit.il", "w") as f: - f.write(vl) - - run_simulation(dut, issue_unit_sim(dut), vcd_name='test_issue_unit.vcd') - -if __name__ == '__main__': - test_issue_unit() diff --git a/scoreboard/ldst_dep_cell.py b/scoreboard/ldst_dep_cell.py deleted file mode 100644 index 40e1ffbc..00000000 --- a/scoreboard/ldst_dep_cell.py +++ /dev/null @@ -1,95 +0,0 @@ -""" Mitch Alsup 6600-style LD/ST scoreboard Dependency Cell - -Relevant bugreports: -* http://bugs.libre-riscv.org/show_bug.cgi?id=81 - -""" - -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil -from nmigen import Module, Signal, Elaboratable -from nmutil.latch import SRLatch - - -class LDSTDepCell(Elaboratable): - """ implements 11.4.12 mitch alsup load/store dependence cell, p45 - """ - def __init__(self): - # inputs - self.load_i = Signal(reset_less=True) # load pending in (top) - self.stor_i = Signal(reset_less=True) # store pending in (top) - self.issue_i = Signal(reset_less=True) # Issue in (top) - - self.load_hit_i = Signal(reset_less=True) # load hit in (right) - self.stwd_hit_i = Signal(reset_less=True) # store w/ data hit in (right) - - # outputs (latched rd/wr pend) - self.ld_hold_st_o = Signal(reset_less=True) # load holds st out (left) - self.st_hold_ld_o = Signal(reset_less=True) # st holds load out (left) - - def elaborate(self, platform): - m = Module() - m.submodules.war_l = war_l = SRLatch(sync=False) # WriteAfterRead Latch - m.submodules.raw_l = raw_l = SRLatch(sync=False) # ReadAfterWrite Latch - - # issue & store & load - used for both WAR and RAW Setting - i_s_l = Signal(reset_less=True) - m.d.comb += i_s_l.eq(self.issue_i & self.stor_i & self.load_i) - - # write after read latch: loads block stores - m.d.comb += war_l.s.eq(i_s_l) - m.d.comb += war_l.r.eq(self.load_i) # reset on LD - - # read after write latch: stores block loads - m.d.comb += raw_l.s.eq(i_s_l) - m.d.comb += raw_l.r.eq(self.stor_i) # reset on ST - - # Hold results (read out horizontally, accumulate in OR fashion) - m.d.comb += self.ld_hold_st_o.eq(war_l.qn & self.load_hit_i) - m.d.comb += self.st_hold_ld_o.eq(raw_l.qn & self.stwd_hit_i) - - return m - - def __iter__(self): - yield self.load_i - yield self.stor_i - yield self.issue_i - yield self.load_hit_i - yield self.stwd_hit_i - yield self.ld_hold_st_o - yield self.st_hold_ld_o - - def ports(self): - return list(self) - - -def dcell_sim(dut): - yield dut.dest_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.src1_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.go_read_i.eq(1) - yield - yield dut.go_read_i.eq(0) - yield - yield dut.go_write_i.eq(1) - yield - yield dut.go_write_i.eq(0) - yield - -def test_dcell(): - dut = LDSTDepCell() - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_ldst_dcell.il", "w") as f: - f.write(vl) - - run_simulation(dut, dcell_sim(dut), vcd_name='test_ldst_dcell.vcd') - -if __name__ == '__main__': - test_dcell() diff --git a/scoreboard/ldst_matrix.py b/scoreboard/ldst_matrix.py deleted file mode 100644 index b872155d..00000000 --- a/scoreboard/ldst_matrix.py +++ /dev/null @@ -1,135 +0,0 @@ -""" Mitch Alsup 6600-style LD/ST Memory Scoreboard Matrix (sparse vector) - -6600 LD/ST Dependency Table Matrix inputs / outputs ---------------------------------------------------- - -Relevant comments (p45-46): - -* If there are no WAR dependencies on a Load instruction with a computed - address it can assert Bank_Addressable and Translate_Addressable. - -* If there are no RAW dependencies on a Store instruction with both a - write permission and store data present it can assert Bank_Addressable - -Relevant bugreports: -* http://bugs.libre-riscv.org/show_bug.cgi?id=81 - -""" - -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil -from nmigen import Module, Signal, Elaboratable, Array, Cat, Const - -from ldst_dep_cell import LDSTDepCell - - -class LDSTDepMatrix(Elaboratable): - """ implements 11.4.12 mitch alsup LD/ST Dependency Matrix, p46 - actually a sparse matrix along the diagonal. - - load-hold-store and store-hold-load accumulate in a priority-picking - fashion, ORing together. the OR gate from the dependency cell is - here. - """ - def __init__(self, n_ldst): - self.n_ldst = n_ldst # X and Y (FUs) - self.load_i = Signal(n_ldst, reset_less=True) # load pending in - self.stor_i = Signal(n_ldst, reset_less=True) # store pending in - self.issue_i = Signal(n_ldst, reset_less=True) # Issue in - - self.load_hit_i = Signal(n_ldst, reset_less=True) # load hit in - self.stwd_hit_i = Signal(n_ldst, reset_less=True) # store w/data hit in - - # outputs - self.ld_hold_st_o = Signal(reset_less=True) # load holds st out - self.st_hold_ld_o = Signal(reset_less=True) # st holds load out - - def elaborate(self, platform): - m = Module() - - # --- - # matrix of dependency cells - # --- - dm = Array(LDSTDepCell() for f in range(self.n_ldst)) - for fu in range(self.n_ldst): - setattr(m.submodules, "dm_fu%d" % (fu), dm[fu]) - - # --- - # connect Function Unit vector - # --- - lhs_l = [] - shl_l = [] - load_l = [] - stor_l = [] - issue_l = [] - lh_l = [] - sh_l = [] - for fu in range(self.n_ldst): - dc = dm[fu] - # accumulate load-hold-store / store-hold-load bits - lhs_l.append(dc.ld_hold_st_o) - shl_l.append(dc.st_hold_ld_o) - # accumulate inputs (for Cat'ing later) - TODO: must be a better way - load_l.append(dc.load_i) - stor_l.append(dc.stor_i) - issue_l.append(dc.issue_i) - lh_l.append(dc.load_hit_i) - sh_l.append(dc.stwd_hit_i) - - # connect cell inputs using Cat(*list_of_stuff) - m.d.comb += [Cat(*load_l).eq(self.load_i), - Cat(*stor_l).eq(self.stor_i), - Cat(*issue_l).eq(self.issue_i), - Cat(*lh_l).eq(self.load_hit_i), - Cat(*sh_l).eq(self.stwd_hit_i), - ] - # set the load-hold-store / store-hold-load OR-accumulated outputs - m.d.comb += self.ld_hold_st_o.eq(Cat(*lhs_l).bool()) - m.d.comb += self.st_hold_ld_o.eq(Cat(*shl_l).bool()) - - return m - - def __iter__(self): - yield self.load_i - yield self.stor_i - yield self.issue_i - yield self.load_hit_i - yield self.stwd_hit_i - yield self.ld_hold_st_o - yield self.st_hold_ld_o - - def ports(self): - return list(self) - -def d_matrix_sim(dut): - """ XXX TODO - """ - yield dut.dest_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.src1_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.go_read_i.eq(1) - yield - yield dut.go_read_i.eq(0) - yield - yield dut.go_write_i.eq(1) - yield - yield dut.go_write_i.eq(0) - yield - -def test_d_matrix(): - dut = LDSTDepMatrix(n_ldst=4) - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_ld_st_matrix.il", "w") as f: - f.write(vl) - - run_simulation(dut, d_matrix_sim(dut), vcd_name='test_ld_st_matrix.vcd') - -if __name__ == '__main__': - test_d_matrix() diff --git a/scoreboard/reg_select.py b/scoreboard/reg_select.py deleted file mode 100644 index eca3328e..00000000 --- a/scoreboard/reg_select.py +++ /dev/null @@ -1,23 +0,0 @@ -from nmigen import Elaboratable, Module, Signal - - -class Reg_Rsv(Elaboratable): - """ these are allocated per-Register (vertically), - and are each of length fu_count - """ - def __init__(self, fu_count): - self.fu_count = fu_count - self.dest_rsel_i = Signal(fu_count, reset_less=True) - self.src1_rsel_i = Signal(fu_count, reset_less=True) - self.src2_rsel_i = Signal(fu_count, reset_less=True) - self.dest_rsel_o = Signal(reset_less=True) - self.src1_rsel_o = Signal(reset_less=True) - self.src2_rsel_o = Signal(reset_less=True) - - def elaborate(self, platform): - m = Module() - m.d.comb += self.dest_rsel_o.eq(self.dest_rsel_i.bool()) - m.d.comb += self.src1_rsel_o.eq(self.src1_rsel_i.bool()) - m.d.comb += self.src2_rsel_o.eq(self.src2_rsel_i.bool()) - return m - diff --git a/scoreboard/shadow_fn.py b/scoreboard/shadow_fn.py deleted file mode 100644 index a60f9d95..00000000 --- a/scoreboard/shadow_fn.py +++ /dev/null @@ -1,79 +0,0 @@ -from nmigen.compat.sim import run_simulation -from nmigen.cli import verilog, rtlil -from nmigen import Module, Signal, Cat, Elaboratable -from nmutil.latch import SRLatch -from nmigen.lib.coding import Decoder - - -class ShadowFn(Elaboratable): - """ implements shadowing 11.5.1, p55, just the individual shadow function - """ - def __init__(self): - - # inputs - self.issue_i = Signal(reset_less=True) - self.shadow_i = Signal(reset_less=True) - self.s_fail_i = Signal(reset_less=True) - self.s_good_i = Signal(reset_less=True) - - # outputs - self.shadow_o = Signal(reset_less=True) - self.recover_o = Signal(reset_less=True) - - def elaborate(self, platform): - m = Module() - m.submodules.sl = sl = SRLatch(sync=False) - - m.d.comb += sl.s.eq(self.shadow_i & self.issue_i) - m.d.comb += sl.r.eq(self.s_good_i) - m.d.comb += self.recover_o.eq(sl.q & self.s_fail_i) - m.d.comb += self.shadow_o.eq(sl.q) - - return m - - def __iter__(self): - yield self.issue_i - yield self.shadow_i - yield self.s_fail_i - yield self.s_good_i - yield self.shadow_o - yield self.recover_o - - def ports(self): - return list(self) - - -def shadow_fn_unit_sim(dut): - yield dut.dest_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield dut.issue_i.eq(0) - yield - yield dut.src1_i.eq(1) - yield dut.issue_i.eq(1) - yield - yield - yield - yield dut.issue_i.eq(0) - yield - yield dut.go_read_i.eq(1) - yield - yield dut.go_read_i.eq(0) - yield - yield dut.go_write_i.eq(1) - yield - yield dut.go_write_i.eq(0) - yield - - -def test_shadow_fn_unit(): - dut = ShadowFn() - vl = rtlil.convert(dut, ports=dut.ports()) - with open("test_shadow_fn_unit.il", "w") as f: - f.write(vl) - - run_simulation(dut, shadow_fn_unit_sim(dut), - vcd_name='test_shadow_fn_unit.vcd') - -if __name__ == '__main__': - test_shadow_fn_unit() diff --git a/src/TLB/.gitignore b/src/TLB/.gitignore new file mode 100644 index 00000000..3324664b --- /dev/null +++ b/src/TLB/.gitignore @@ -0,0 +1,2 @@ +*.wpr +__pycache__ diff --git a/src/TLB/src/AddressEncoder.py b/src/TLB/src/AddressEncoder.py new file mode 100644 index 00000000..4c4b8d76 --- /dev/null +++ b/src/TLB/src/AddressEncoder.py @@ -0,0 +1,75 @@ +from nmigen import Module, Signal +from nmigen.lib.coding import Encoder, PriorityEncoder + +class AddressEncoder(): + """Address Encoder + + The purpose of this module is to take in a vector and + encode the bits that are one hot into an address. This module + combines both nmigen's Encoder and PriorityEncoder and will state + whether the input line has a single bit hot, multiple bits hot, + or no bits hot. The output line will always have the lowest value + address output. + + Usage: + The output is valid when either single or multiple match is high. + Otherwise output is 0. + """ + def __init__(self, width): + """ Arguments: + * width: The desired length of the input vector + """ + # Internal + self.encoder = Encoder(width) + self.p_encoder = PriorityEncoder(width) + + # Input + self.i = Signal(width) + + # Output + self.single_match = Signal(1) + self.multiple_match = Signal(1) + self.o = Signal(max=width) + + def elaborate(self, platform=None): + m = Module() + + # Add internal submodules + m.submodules.encoder = self.encoder + m.submodules.p_encoder = self.p_encoder + + m.d.comb += [ + self.encoder.i.eq(self.i), + self.p_encoder.i.eq(self.i) + ] + + # Steps: + # 1. check if the input vector is non-zero + # 2. if non-zero, check if single match or multiple match + # 3. set output line to be lowest value address output + + # If the priority encoder recieves an input of 0 + # If n is 1 then the output is not valid + with m.If(self.p_encoder.n): + m.d.comb += [ + self.single_match.eq(0), + self.multiple_match.eq(0), + self.o.eq(0) + ] + # If the priority encoder recieves an input > 0 + with m.Else(): + # Multiple Match if encoder n is invalid + with m.If(self.encoder.n): + m.d.comb += [ + self.single_match.eq(0), + self.multiple_match.eq(1) + ] + # Single Match if encoder n is valid + with m.Else(): + m.d.comb += [ + self.single_match.eq(1), + self.multiple_match.eq(0) + ] + # Always set output based on priority encoder output + m.d.comb += self.o.eq(self.p_encoder.o) + return m diff --git a/src/TLB/src/Cam.py b/src/TLB/src/Cam.py new file mode 100644 index 00000000..3c499211 --- /dev/null +++ b/src/TLB/src/Cam.py @@ -0,0 +1,124 @@ +from nmigen import Array, Cat, Module, Signal +from nmigen.lib.coding import Decoder +from nmigen.cli import main #, verilog + +from CamEntry import CamEntry +from AddressEncoder import AddressEncoder + +class Cam(): + """ Content Addressable Memory (CAM) + + The purpose of this module is to quickly look up whether an + entry exists given a data key. + This module will search for the given data in all internal entries + and output whether a single or multiple match was found. + If an single entry is found the address be returned and single_match + is set HIGH. If multiple entries are found the lowest address is + returned and multiple_match is set HIGH. If neither single_match or + multiple_match are HIGH this implies no match was found. To write + to the CAM set the address bus to the desired entry and set write_enable + HIGH. Entry managment should be performed one level above this block + as lookup is performed within. + + Notes: + The read and write operations take one clock cycle to complete. + Currently the read_warning line is present for interfacing but + is not necessary for this design. This module is capable of writing + in the first cycle, reading on the second, and output the correct + address on the third. + """ + + def __init__(self, data_size, cam_size): + """ Arguments: + * data_size: (bits) The bit size of the data + * cam_size: (number) The number of entries in the CAM + """ + + # Internal + self.cam_size = cam_size + self.encoder = AddressEncoder(cam_size) + self.decoder = Decoder(cam_size) + self.entry_array = Array(CamEntry(data_size) for x in range(cam_size)) + + # Input + self.enable = Signal(1) + self.write_enable = Signal(1) + self.data_in = Signal(data_size) # The data to be written + self.data_mask = Signal(data_size) # mask for ternary writes + self.address_in = Signal(max=cam_size) # address of CAM Entry to write + + # Output + self.read_warning = Signal(1) # High when a read interrupts a write + self.single_match = Signal(1) # High when there is only one match + self.multiple_match = Signal(1) # High when there at least two matches + self.match_address = Signal(max=cam_size) # The lowest address matched + + def elaborate(self, platform=None): + m = Module() + # AddressEncoder for match types and output address + m.submodules.AddressEncoder = self.encoder + # Decoder is used to select which entry will be written to + m.submodules.Decoder = self.decoder + # CamEntry Array Submodules + # Note these area added anonymously + entry_array = self.entry_array + m.submodules += entry_array + + # Decoder logic + m.d.comb += [ + self.decoder.i.eq(self.address_in), + self.decoder.n.eq(0) + ] + + encoder_vector = [] + with m.If(self.enable): + # Set the key value for every CamEntry + for index in range(self.cam_size): + + # Write Operation + with m.If(self.write_enable): + with m.If(self.decoder.o[index]): + m.d.comb += entry_array[index].command.eq(2) + with m.Else(): + m.d.comb += entry_array[index].command.eq(0) + + # Read Operation + with m.Else(): + m.d.comb += entry_array[index].command.eq(1) + + # Send data input to all entries + m.d.comb += entry_array[index].data_in.eq(self.data_in) + # Send all entry matches to encoder + ematch = entry_array[index].match + encoder_vector.append(ematch) + + # Give input to and accept output from encoder module + m.d.comb += [ + self.encoder.i.eq(Cat(*encoder_vector)), + self.single_match.eq(self.encoder.single_match), + self.multiple_match.eq(self.encoder.multiple_match), + self.match_address.eq(self.encoder.o) + ] + + # If the CAM is not enabled set all outputs to 0 + with m.Else(): + m.d.comb += [ + self.read_warning.eq(0), + self.single_match.eq(0), + self.multiple_match.eq(0), + self.match_address.eq(0) + ] + + return m + + def ports(self): + return [self.enable, self.write_enable, + self.data_in, self.data_mask, + self.read_warning, self.single_match, + self.multiple_match, self.match_address] + + +if __name__ == '__main__': + cam = Cam(4, 4) + main(cam, ports=cam.ports()) + diff --git a/src/TLB/src/CamEntry.py b/src/TLB/src/CamEntry.py new file mode 100644 index 00000000..73081ce5 --- /dev/null +++ b/src/TLB/src/CamEntry.py @@ -0,0 +1,45 @@ +from nmigen import Module, Signal + +class CamEntry: + """ Content Addressable Memory (CAM) Entry + + The purpose of this module is to represent an entry within a CAM. + This module when given a read command will compare the given data + and output whether a match was found or not. When given a write + command it will write the given data into internal registers. + """ + + def __init__(self, data_size): + """ Arguments: + * data_size: (bit count) The size of the data + """ + # Input + self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset + self.data_in = Signal(data_size) # Data input when writing + + # Output + self.match = Signal(1) # Result of the internal/input key comparison + self.data = Signal(data_size) + + def elaborate(self, platform=None): + m = Module() + with m.Switch(self.command): + with m.Case("00"): + m.d.sync += self.match.eq(0) + with m.Case("01"): + with m.If(self.data == self.data_in): + m.d.sync += self.match.eq(1) + with m.Else(): + m.d.sync += self.match.eq(0) + with m.Case("10"): + m.d.sync += [ + self.data.eq(self.data_in), + self.match.eq(0) + ] + with m.Case(): + m.d.sync += [ + self.match.eq(0), + self.data.eq(0) + ] + + return m diff --git a/src/TLB/src/LFSR.py b/src/TLB/src/LFSR.py new file mode 100644 index 00000000..d8b606ec --- /dev/null +++ b/src/TLB/src/LFSR.py @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# See Notices.txt for copyright information +from nmigen import Signal, Module, Const, Cat, Elaboratable +from nmigen.cli import verilog, rtlil + + +class LFSRPolynomial(set): + """ implements a polynomial for use in LFSR + """ + def __init__(self, exponents=()): + for e in exponents: + assert isinstance(e, int), TypeError("%s must be an int" % repr(e)) + assert (e >= 0), ValueError("%d must not be negative" % e) + set.__init__(self, set(exponents).union({0})) # must contain zero + + @property + def max_exponent(self): + return max(self) # derived from set, so this returns the max exponent + + @property + def exponents(self): + exponents = list(self) # get elements of set as a list + exponents.sort(reverse=True) + return exponents + + def __str__(self): + expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2) + retval = map(lambda i: expd[min(i,2)].format(i), self.exponents) + return " + ".join(retval) + + def __repr__(self): + return "LFSRPolynomial(%s)" % self.exponents + + +# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs # noqa +LFSR_POLY_2 = LFSRPolynomial([2, 1, 0]) +LFSR_POLY_3 = LFSRPolynomial([3, 2, 0]) +LFSR_POLY_4 = LFSRPolynomial([4, 3, 0]) +LFSR_POLY_5 = LFSRPolynomial([5, 3, 0]) +LFSR_POLY_6 = LFSRPolynomial([6, 5, 0]) +LFSR_POLY_7 = LFSRPolynomial([7, 6, 0]) +LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0]) +LFSR_POLY_9 = LFSRPolynomial([9, 5, 0]) +LFSR_POLY_10 = LFSRPolynomial([10, 7, 0]) +LFSR_POLY_11 = LFSRPolynomial([11, 9, 0]) +LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0]) +LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0]) +LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0]) +LFSR_POLY_15 = LFSRPolynomial([15, 14, 0]) +LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0]) +LFSR_POLY_17 = LFSRPolynomial([17, 14, 0]) +LFSR_POLY_18 = LFSRPolynomial([18, 11, 0]) +LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0]) +LFSR_POLY_20 = LFSRPolynomial([20, 17, 0]) +LFSR_POLY_21 = LFSRPolynomial([21, 19, 0]) +LFSR_POLY_22 = LFSRPolynomial([22, 21, 0]) +LFSR_POLY_23 = LFSRPolynomial([23, 18, 0]) +LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0]) + + +class LFSR(LFSRPolynomial, Elaboratable): + """ implements a Linear Feedback Shift Register + """ + def __init__(self, polynomial): + """ Inputs: + ------ + :polynomial: the polynomial to feedback on. may be a LFSRPolynomial + instance or an iterable of ints (list/tuple/generator) + :enable: enable (set LO to disable. NOTE: defaults to HI) + + Outputs: + ------- + :state: the LFSR state. bitwidth is taken from the polynomial + maximum exponent. + + Note: if an LFSRPolynomial is passed in as the input, because + LFSRPolynomial is derived from set() it's ok: + LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p) + """ + LFSRPolynomial.__init__(self, polynomial) + self.state = Signal(self.max_exponent, reset=1) + self.enable = Signal(reset=1) + + def elaborate(self, platform): + m = Module() + # do absolutely nothing if the polynomial is empty (always has a zero) + if self.max_exponent <= 1: + return m + + # create XOR-bunch, select bits from state based on exponent + feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain) + for exponent in self: + if exponent > 0: # don't have to skip, saves CPU cycles though + feedback ^= self.state[exponent - 1] + + # if enabled, shift-and-feedback + with m.If(self.enable): + # shift up lower bits by Cat'ing in a new bit zero (feedback) + newstate = Cat(feedback, self.state[:-1]) + m.d.sync += self.state.eq(newstate) + + return m + + +# example: Poly24 +if __name__ == '__main__': + p24 = rtlil.convert(LFSR(LFSR_POLY_24)) + with open("lfsr2_p24.il", "w") as f: + f.write(p24) diff --git a/src/TLB/src/LFSR.pyi b/src/TLB/src/LFSR.pyi new file mode 100644 index 00000000..64eb9115 --- /dev/null +++ b/src/TLB/src/LFSR.pyi @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# See Notices.txt for copyright information +from nmigen import Module +from typing import Iterable, Optional, Iterator, Any, Union +from typing_extensions import final + + +@final +class LFSRPolynomial(set): + def __init__(self, exponents: Iterable[int] = ()): + def elements() -> Iterable[int]: ... + @property + def exponents(self) -> list[int]: ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + + +@final +class LFSR: + def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ... + @property + def width(self) -> int: ... + def elaborate(self, platform: Any) -> Module: ... diff --git a/src/TLB/src/Makefile b/src/TLB/src/Makefile new file mode 100644 index 00000000..1eb67acc --- /dev/null +++ b/src/TLB/src/Makefile @@ -0,0 +1,2 @@ +verilog: + python3 Cam.py generate -t v > Cam.v diff --git a/src/TLB/src/MemorySet.py b/src/TLB/src/MemorySet.py new file mode 100644 index 00000000..ea61bdf5 --- /dev/null +++ b/src/TLB/src/MemorySet.py @@ -0,0 +1,66 @@ +from nmigen import Cat, Memory, Module, Signal, Elaboratable +from nmigen.cli import main +from nmigen.cli import verilog, rtlil + + +class MemorySet(Elaboratable): + def __init__(self, data_size, tag_size, set_count, active): + self.active = active + input_size = tag_size + data_size # Size of the input data + memory_width = input_size + 1 # The width of the cache memory + self.active = active + self.data_size = data_size + self.tag_size = tag_size + + # XXX TODO, use rd-enable and wr-enable? + self.mem = Memory(memory_width, set_count) + self.r = self.mem.read_port() + self.w = self.mem.write_port() + + # inputs (address) + self.cset = Signal(max=set_count) # The set to be checked + self.tag = Signal(tag_size) # The tag to find + self.data_i = Signal(data_size) # Incoming data + + # outputs + self.valid = Signal() + self.data_o = Signal(data_size) # Outgoing data (excludes tag) + + def elaborate(self, platform): + m = Module() + m.submodules.mem = self.mem + m.submodules.r = self.r + m.submodules.w = self.w + + # temporaries + active_bit = Signal() + tag_valid = Signal() + data_start = self.active + 1 + data_end = data_start + self.data_size + tag_start = data_end + tag_end = tag_start + self.tag_size + + # connect the read port address to the set/entry + read_port = self.r + m.d.comb += read_port.addr.eq(self.cset) + # Pull out active bit from data + data = read_port.data + m.d.comb += active_bit.eq(data[self.active]) + # Validate given tag vs stored tag + tag = data[tag_start:tag_end] + m.d.comb += tag_valid.eq(self.tag == tag) + # An entry is only valid if the tags match AND + # is marked as a valid entry + m.d.comb += self.valid.eq(tag_valid & active_bit) + + # output data: TODO, check rd-enable? + m.d.comb += self.data_o.eq(data[data_start:data_end]) + + # connect the write port addr to the set/entry (only if write enabled) + # (which is only done on a match, see SAC.write_entry below) + write_port = self.w + with m.If(write_port.en): + m.d.comb += write_port.addr.eq(self.cset) + m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag)) + + return m diff --git a/src/TLB/src/PermissionValidator.py b/src/TLB/src/PermissionValidator.py new file mode 100644 index 00000000..14f01e42 --- /dev/null +++ b/src/TLB/src/PermissionValidator.py @@ -0,0 +1,67 @@ +from nmigen import Module, Signal +from nmigen.cli import main + +from PteEntry import PteEntry + +class PermissionValidator(): + """ The purpose of this Module is to check the Permissions of a given PTE + against the requested access permissions. + + This module will either validate (by setting the valid bit HIGH) + the request or find a permission fault and invalidate (by setting + the valid bit LOW) the request + """ + + def __init__(self, asid_size, pte_size): + """ Arguments: + * asid_size: (bit count) The size of the asid to be processed + * pte_size: (bit count) The size of the pte to be processed + + Return: + * valid HIGH when permissions are correct + """ + # Internal + self.pte_entry = PteEntry(asid_size, pte_size) + + # Input + self.data = Signal(asid_size + pte_size); + self.xwr = Signal(3) # Execute, Write, Read + self.super_mode = Signal(1) # Supervisor Mode + self.super_access = Signal(1) # Supervisor Access + self.asid = Signal(15) # Address Space IDentifier (ASID) + + # Output + self.valid = Signal(1) # Denotes if the permissions are correct + + def elaborate(self, platform=None): + m = Module() + + m.submodules.pte_entry = self.pte_entry + + m.d.comb += self.pte_entry.i.eq(self.data) + + # Check if the entry is valid + with m.If(self.pte_entry.v): + # ASID match or Global Permission + # Note that the MSB bound is exclusive + with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g): + # Check Execute, Write, Read (XWR) Permissions + with m.If(self.pte_entry.xwr == self.xwr): + # Supervisor Logic + with m.If(self.super_mode): + # Valid if entry is not in user mode or supervisor + # has Supervisor User Memory (SUM) access via the + # SUM bit in the sstatus register + m.d.comb += self.valid.eq((~self.pte_entry.u) \ + | self.super_access) + # User logic + with m.Else(): + # Valid if the entry is in user mode only + m.d.comb += self.valid.eq(self.pte_entry.u) + with m.Else(): + m.d.comb += self.valid.eq(0) + with m.Else(): + m.d.comb += self.valid.eq(0) + with m.Else(): + m.d.comb += self.valid.eq(0) + return m \ No newline at end of file diff --git a/src/TLB/src/PteEntry.py b/src/TLB/src/PteEntry.py new file mode 100644 index 00000000..c0705457 --- /dev/null +++ b/src/TLB/src/PteEntry.py @@ -0,0 +1,66 @@ +from nmigen import Module, Signal +from nmigen.cli import main + +class PteEntry(): + """ The purpose of this Module is to centralize the parsing of Page + Table Entries (PTE) into one module to prevent common mistakes + and duplication of code. The control bits are parsed out for + ease of use. + + This module parses according to the standard PTE given by the + Volume II: RISC-V Privileged Architectures V1.10 Pg 60. + The Address Space IDentifier (ASID) is appended to the MSB of the input + and is parsed out as such. + + An valid input Signal would be: + ASID PTE + Bits:[78-64][63-0] + + The output PTE value will include the control bits. + """ + def __init__(self, asid_size, pte_size): + """ Arguments: + * asid_size: (bit count) The size of the asid to be processed + * pte_size: (bit count) The size of the pte to be processed + + Return: + * d The Dirty bit from the PTE portion of i + * a The Accessed bit from the PTE portion of i + * g The Global bit from the PTE portion of i + * u The User Mode bit from the PTE portion of i + * xwr The Execute/Write/Read bit from the PTE portion of i + * v The Valid bit from the PTE portion of i + * asid The asid portion of i + * pte The pte portion of i + """ + # Internal + self.asid_start = pte_size + self.asid_end = pte_size + asid_size + + # Input + self.i = Signal(asid_size + pte_size) + + # Output + self.d = Signal(1) # Dirty bit (From pte) + self.a = Signal(1) # Accessed bit (From pte) + self.g = Signal(1) # Global Access (From pte) + self.u = Signal(1) # User Mode (From pte) + self.xwr = Signal(3) # Execute Read Write (From pte) + self.v = Signal(1) # Valid (From pte) + self.asid = Signal(asid_size) # Associated Address Space IDentifier + self.pte = Signal(pte_size) # Full Page Table Entry + + def elaborate(self, platform=None): + m = Module() + # Pull out all control bites from PTE + m.d.comb += [ + self.d.eq(self.i[7]), + self.a.eq(self.i[6]), + self.g.eq(self.i[5]), + self.u.eq(self.i[4]), + self.xwr.eq(self.i[1:4]), + self.v.eq(self.i[0]) + ] + m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end]) + m.d.comb += self.pte.eq(self.i[0:self.asid_start]) + return m \ No newline at end of file diff --git a/src/TLB/src/SetAssociativeCache.py b/src/TLB/src/SetAssociativeCache.py new file mode 100644 index 00000000..0acd3488 --- /dev/null +++ b/src/TLB/src/SetAssociativeCache.py @@ -0,0 +1,274 @@ +""" + +Online simulator of 4-way set-associative cache: +http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/sa4.html + +Python simulator of a N-way set-associative cache: +https://github.com/vaskevich/CacheSim/blob/master/cachesim.py +""" +import sys +sys.path.append("ariane/src/") + +from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable +from nmigen.compat.genlib import fsm +from nmigen.cli import main +from nmigen.cli import verilog, rtlil + +from AddressEncoder import AddressEncoder +from MemorySet import MemorySet + +# TODO: use a LFSR that advances continuously and picking the bottom +# few bits from it to select which cache line to replace, instead of PLRU +# http://bugs.libre-riscv.org/show_bug.cgi?id=71 +from plru import PLRU +from LFSR import LFSR, LFSR_POLY_24 + +SA_NA = "00" # no action (none) +SA_RD = "01" # read +SA_WR = "10" # write + + +class SetAssociativeCache(Elaboratable): + """ Set Associative Cache Memory + + The purpose of this module is to generate a memory cache given the + constraints passed in. This will create a n-way set associative cache. + It is expected for the SV TLB that the VMA will provide the set number + while the ASID provides the tag (still to be decided). + + """ + def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False): + """ Arguments + * tag_size (bits): The bit count of the tag + * data_size (bits): The bit count of the data to be stored + * set_count (number): The number of sets/entries in the cache + * way_count (number): The number of slots a data can be stored + in one set + * lfsr: if set, use an LFSR for (pseudo-randomly) selecting + set/entry to write to. otherwise, use a PLRU + """ + # Internals + self.lfsr_mode = lfsr + self.way_count = way_count # The number of slots in one set + self.tag_size = tag_size # The bit count of the tag + self.data_size = data_size # The bit count of the data to be stored + + # set up Memory array + self.mem_array = Array() # memory array + for i in range(way_count): + ms = MemorySet(data_size, tag_size, set_count, active=0) + self.mem_array.append(ms) + + # Finds valid entries + self.encoder = AddressEncoder(way_count) + + # setup PLRU or LFSR + if lfsr: + # LFSR mode + self.lfsr = LFSR(LFSR_POLY_24) + else: + # PLRU mode + self.plru = PLRU(way_count) # One block to handle plru calculations + self.plru_array = Array() # PLRU data on each set + for i in range(set_count): + name="plru%d" % i + self.plru_array.append(Signal(self.plru.TLBSZ, name=name)) + + # Input + self.enable = Signal(1) # Whether the cache is enabled + self.command = Signal(2) # 00=None, 01=Read, 10=Write (see SA_XX) + self.cset = Signal(max=set_count) # The set to be checked + self.tag = Signal(tag_size) # The tag to find + self.data_i = Signal(data_size) # The input data + + # Output + self.ready = Signal(1) # 0 => Processing 1 => Ready for commands + self.hit = Signal(1) # Tag matched one way in the given set + self.multiple_hit = Signal(1) # Tag matched many ways in the given set + self.data_o = Signal(data_size) # The data linked to the matched tag + + def check_tags(self, m): + """ Validate the tags in the selected set. If one and only one + tag matches set its state to zero and increment all others + by one. We only advance to next state if a single hit is found. + """ + # Vector to store way valid results + # A zero denotes a way is invalid + valid_vector = [] + # Loop through memory to prep read/write ports and set valid_vector + for i in range(self.way_count): + valid_vector.append(self.mem_array[i].valid) + + # Pass encoder the valid vector + m.d.comb += self.encoder.i.eq(Cat(*valid_vector)) + + # Only one entry should be marked + # This is due to already verifying the tags + # matched and the valid bit is high + with m.If(self.hit): + m.next = "FINISHED_READ" + # Pull out data from the read port + data = self.mem_array[self.encoder.o].data_o + m.d.comb += self.data_o.eq(data) + if not self.lfsr_mode: + self.access_plru(m) + + # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k + with m.Elif(self.multiple_hit): + # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck + m.d.comb += self.data_o.eq(0) + + # No tag matches means no data + with m.Else(): + # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck + m.d.comb += self.data_o.eq(0) + + def access_plru(self, m): + """ An entry was accessed and the plru tree must now be updated + """ + # Pull out the set's entry being edited + plru_entry = self.plru_array[self.cset] + m.d.comb += [ + # Set the plru data to the current state + self.plru.plru_tree.eq(plru_entry), + # Set that the cache was accessed + self.plru.lu_access_i.eq(1) + ] + + def read(self, m): + """ Go through the read process of the cache. + This takes two cycles to complete. First it checks for a valid tag + and secondly it updates the LRU values. + """ + with m.FSM() as fsm_read: + with m.State("READY"): + m.d.comb += self.ready.eq(0) + # check_tags will set the state if the conditions are met + self.check_tags(m) + with m.State("FINISHED_READ"): + m.next = "READY" + m.d.comb += self.ready.eq(1) + if not self.lfsr_mode: + plru_tree_o = self.plru.plru_tree_o + m.d.sync += self.plru_array[self.cset].eq(plru_tree_o) + + def write_entry(self, m): + if not self.lfsr_mode: + m.d.comb += [# set cset (mem address) into PLRU + self.plru.plru_tree.eq(self.plru_array[self.cset]), + # and connect plru to encoder for write + self.encoder.i.eq(self.plru.replace_en_o) + ] + write_port = self.mem_array[self.encoder.o].w + else: + # use the LFSR to generate a random(ish) one of the mem array + lfsr_output = Signal(max=self.way_count) + lfsr_random = Signal(max=self.way_count) + m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits + # address too big, limit to range of array + m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count, + lfsr_output - self.way_count, + lfsr_output)) + write_port = self.mem_array[lfsr_random].w + + # then if there is a match from the encoder, enable the selected write + with m.If(self.encoder.single_match): + m.d.comb += write_port.en.eq(1) + + def write(self, m): + """ Go through the write process of the cache. + This takes two cycles to complete. First it writes the entry, + and secondly it updates the PLRU (in plru mode) + """ + with m.FSM() as fsm_write: + with m.State("READY"): + m.d.comb += self.ready.eq(0) + self.write_entry(m) + m.next ="FINISHED_WRITE" + with m.State("FINISHED_WRITE"): + m.d.comb += self.ready.eq(1) + if not self.lfsr_mode: + plru_entry = self.plru_array[self.cset] + m.d.sync += plru_entry.eq(self.plru.plru_tree_o) + m.next = "READY" + + + def elaborate(self, platform=None): + m = Module() + + # ---- + # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array + # ---- + + m.submodules.AddressEncoder = self.encoder + if self.lfsr_mode: + m.submodules.LFSR = self.lfsr + else: + m.submodules.PLRU = self.plru + + for i, mem in enumerate(self.mem_array): + setattr(m.submodules, "mem%d" % i, mem) + + # ---- + # select mode: PLRU connect to encoder, LFSR do... something + # ---- + + if not self.lfsr_mode: + # Set what entry was hit + m.d.comb += self.plru.lu_hit.eq(self.encoder.o) + else: + # enable LFSR + m.d.comb += self.lfsr.enable.eq(self.enable) + + # ---- + # connect hit/multiple hit to encoder output + # ---- + + m.d.comb += [ + self.hit.eq(self.encoder.single_match), + self.multiple_hit.eq(self.encoder.multiple_match), + ] + + # ---- + # connect incoming data/tag/cset(addr) to mem_array + # ---- + + for mem in self.mem_array: + write_port = mem.w + m.d.comb += [mem.cset.eq(self.cset), + mem.tag.eq(self.tag), + mem.data_i.eq(self.data_i), + write_port.en.eq(0), # default: disable write + ] + # ---- + # Commands: READ/WRITE/TODO + # ---- + + with m.If(self.enable): + with m.Switch(self.command): + # Search all sets at a particular tag + with m.Case(SA_RD): + self.read(m) + with m.Case(SA_WR): + self.write(m) + # Maybe catch multiple tags write here? + # TODO + # TODO: invalidate/flush, flush-all? + + return m + + def ports(self): + return [self.enable, self.command, self.cset, self.tag, self.data_i, + self.ready, self.hit, self.multiple_hit, self.data_o] + + +if __name__ == '__main__': + sac = SetAssociativeCache(4, 8, 4, 6) + vl = rtlil.convert(sac, ports=sac.ports()) + with open("SetAssociativeCache.il", "w") as f: + f.write(vl) + + sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True) + vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports()) + with open("SetAssociativeCacheLFSR.il", "w") as f: + f.write(vl) diff --git a/src/TLB/src/TLB.py b/src/TLB/src/TLB.py new file mode 100644 index 00000000..3538bdc1 --- /dev/null +++ b/src/TLB/src/TLB.py @@ -0,0 +1,173 @@ +""" TLB Module + + The expected form of the data is: + * Item (Bits) + * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0) +""" + +from nmigen import Memory, Module, Signal, Cat +from nmigen.cli import main + +from PermissionValidator import PermissionValidator +from Cam import Cam + +class TLB(): + def __init__(self, asid_size, vma_size, pte_size, L1_size): + """ Arguments + * asid_size: Address Space IDentifier (ASID) typically 15 bits + * vma_size: Virtual Memory Address (VMA) typically 36 bits + * pte_size: Page Table Entry (PTE) typically 64 bits + + Notes: + These arguments should represent the largest possible size + defined by the MODE settings. See + Volume II: RISC-V Privileged Architectures V1.10 Page 57 + """ + + # Internal + self.state = 0 + # L1 Cache Modules + L1_size = 8 # XXX overridden incoming argument? + self.cam_L1 = Cam(vma_size, L1_size) + self.mem_L1 = Memory(asid_size + pte_size, L1_size) + + # Permission Validator + self.perm_validator = PermissionValidator(asid_size, pte_size) + + # Inputs + self.supermode = Signal(1) # Supervisor Mode + self.super_access = Signal(1) # Supervisor Access + self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2 + self.xwr = Signal(3) # Execute, Write, Read + self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64 + self.address_L1 = Signal(max=L1_size) + self.asid = Signal(asid_size) # Address Space IDentifier (ASID) + self.vma = Signal(vma_size) # Virtual Memory Address (VMA) + self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE) + + # Outputs + self.hit = Signal(1) # Denotes if the VMA had a mapped PTE + self.perm_valid = Signal(1) # Denotes if the permissions are correct + self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA + + def search(self, m, read_L1, write_L1): + """ searches the TLB + """ + m.d.comb += [ + write_L1.en.eq(0), + self.cam_L1.write_enable.eq(0), + self.cam_L1.data_in.eq(self.vma) + ] + # Match found in L1 CAM + match_found = Signal(reset_less=True) + m.d.comb += match_found.eq(self.cam_L1.single_match + | self.cam_L1.multiple_match) + with m.If(match_found): + # Memory shortcut variables + mem_address = self.cam_L1.match_address + # Memory Logic + m.d.comb += read_L1.addr.eq(mem_address) + # Permission Validator Logic + m.d.comb += [ + self.hit.eq(1), + # Set permission validator data to the correct + # register file data according to CAM match + # address + self.perm_validator.data.eq(read_L1.data), + # Execute, Read, Write + self.perm_validator.xwr.eq(self.xwr), + # Supervisor Mode + self.perm_validator.super_mode.eq(self.supermode), + # Supverisor Access + self.perm_validator.super_access.eq(self.super_access), + # Address Space IDentifier (ASID) + self.perm_validator.asid.eq(self.asid), + # Output result of permission validation + self.perm_valid.eq(self.perm_validator.valid) + ] + # Only output PTE if permissions are valid + with m.If(self.perm_validator.valid): + # XXX TODO - dummy for now + reg_data = Signal.like(self.pte_out) + m.d.comb += [ + self.pte_out.eq(reg_data) + ] + with m.Else(): + m.d.comb += [ + self.pte_out.eq(0) + ] + # Miss Logic + with m.Else(): + m.d.comb += [ + self.hit.eq(0), + self.perm_valid.eq(0), + self.pte_out.eq(0) + ] + + def write_l1(self, m, read_L1, write_L1): + """ writes to the L1 cache + """ + # Memory_L1 Logic + m.d.comb += [ + write_L1.en.eq(1), + write_L1.addr.eq(self.address_L1), + # The Cat places arguments from LSB -> MSB + write_L1.data.eq(Cat(self.pte_in, self.asid)) + ] + # CAM_L1 Logic + m.d.comb += [ + self.cam_L1.write_enable.eq(1), + self.cam_L1.data_in.eq(self.vma), + ] + + def elaborate(self, platform): + m = Module() + # Add submodules + # Submodules for L1 Cache + m.d.submodules.cam_L1 = self.cam_L1 + m.d.sumbmodules.read_L1 = read_L1 = self.mem_L1.read_port() + m.d.sumbmodules.read_L1 = write_L1 = self.mem_L1.write_port() + # Permission Validator Submodule + m.d.submodules.perm_valididator = self.perm_validator + + # When MODE specifies translation + # TODO add in different bit length handling ie prefix 0s + tlb_enable = Signal(reset_less=True) + m.d.comb += tlb_enable.eq(self.mode != 0) + + with m.If(tlb_enable): + m.d.comb += [ + self.cam_L1.enable.eq(1) + ] + with m.Switch(self.command): + # Search + with m.Case("01"): + self.search(m, read_L1, write_L1) + + # Write L1 + # Expected that the miss will be handled in software + with m.Case("10"): + self.write_l1(m, read_L1, write_L1) + + # TODO + #with m.Case("11"): + + # When disabled + with m.Else(): + m.d.comb += [ + self.cam_L1.enable.eq(0), + # XXX TODO - self.reg_file.enable.eq(0), + self.hit.eq(0), + self.perm_valid.eq(0), # XXX TODO, check this + self.pte_out.eq(0) + ] + return m + + +if __name__ == '__main__': + tlb = TLB(15, 36, 64, 4) + main(tlb, ports=[ tlb.supermode, tlb.super_access, tlb.command, + tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid, + tlb.vma, tlb.pte_in, + tlb.hit, tlb.perm_valid, tlb.pte_out, + ] + tlb.cam_L1.ports()) diff --git a/src/TLB/src/__init__.py b/src/TLB/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/TLB/src/ariane/TreePLRU.cpp b/src/TLB/src/ariane/TreePLRU.cpp new file mode 100644 index 00000000..2f6aeea5 --- /dev/null +++ b/src/TLB/src/ariane/TreePLRU.cpp @@ -0,0 +1,211 @@ +#include +#include +#include + + +#define NWAY 4 +#define NLINE 256 +#define HIT 0 +#define MISS 1 +#define MS 1000 +/* +Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing +Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt +four-way set associative - three bits + each bit represents one branch point in a binary decision tree; let 1 + represent that the left side has been referenced more recently than the + right side, and 0 vice-versa + are all 4 lines valid? + / \ + yes no, use an invalid line + | + | + | + bit_0 == 0? state | replace ref to | next state + / \ ------+-------- -------+----------- + y n 00x | line_0 line_0 | 11_ + / \ 01x | line_1 line_1 | 10_ + bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1 + / \ / \ 1x1 | line_3 line_3 | 0_0 + y n y n + / \ / \ ('x' means ('_' means unchanged) + line_0 line_1 line_2 line_3 don't care) + 8-way set associative - 7 = 1+2+4 bits +16-way set associative - 15 = 1+2+4+8 bits +32-way set associative - 31 = 1+2+4+8+16 bits +64-way set associative - 63 = 1+2+4+8+16+32 bits +*/ +using namespace std; +struct AddressField { + uint64_t wd_idx : 2;//Unused + uint64_t offset : 4;//Unused + uint64_t index : 8;//NLINE = 256 = 2^8 + uint64_t tag : 50; +}; + +union Address { + uint32_t* p; + AddressField fields; +}; + +struct Cell { + bool v; + uint64_t tag; + + Cell() : v(false), tag(0) {} + + bool isHit(uint64_t tag) { + return v && (tag == this->tag); + } + + void fetch(uint32_t* address) { + Address addr; + addr.p = address; + addr.fields.offset = 0; + addr.fields.wd_idx = 0; + tag = addr.fields.tag; + v = true; + } +}; + +ostream& operator<<(ostream & out, const Cell& cell) { + out << " v:" << cell.v << " tag:" << hex << cell.tag; + return out; +} + +struct Block { + Cell cell[NWAY]; + uint32_t state; + uint64_t *mask;//Mask the state to get accurate value for specified 1 bit. + uint64_t *value; + uint64_t *next_value; + + Block() : state(0) { + switch (NWAY) { + case 4: + mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101}; + value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101}; + next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000}; + break; + case 8: + mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001, + 0b1010001}; + value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000, + 0b1010001}; + next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000, + 0b0000001, 0b0000000}; + break; + //TODO - more NWAY goes here. + default: + std::cout << "Error definition NWAY = " << NWAY << std::endl; + } + } + + uint32_t *getByTag(uint64_t tag, uint32_t *pway) { + for (int i = 0; i < NWAY; ++i) { + if (cell[i].isHit(tag)) { + *pway = i; + return pway; + } + } + return NULL; + } + + void setLRU(uint32_t *address) { + int way = 0; + uint32_t st = state; + for (int i = 0; i < NWAY; ++i) { + if ((state & mask[i]) == value[i]) { + state ^= mask[i]; + way = i; + break; + } + } + cell[way].fetch(address); + cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl; + } + + uint32_t *get(uint32_t *address, uint32_t *pway) { + Address addr; + addr.p = address; + uint32_t *d = getByTag(addr.fields.tag, pway); + if (d != NULL) { + return &d[addr.fields.offset]; + } + return d; + } + + int set(uint32_t *address) { + uint32_t way = 0; + uint32_t *p = get(address, &way); + if (p != NULL) { + printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state); + state &= ~mask[way]; + printf("%X --> ", state); + state |= next_value[way]; + printf("%X\n", state); + // *p = *address; //skip since address is fake. + return HIT; + } else { + setLRU(address); + return MISS; + } + } +}; + +ostream& operator<<(ostream & out, const Block& block) { + out << "state:" << block.state << " "; + for (int i = 0; i signal with a page fault exception + # 2. We got an access error because of insufficient permissions -> + # throw an access exception + m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0) + # Check whether we are allowed to access this memory region + # from a fetch perspective + + # XXX TODO: use PermissionValidator instead [we like modules] + m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \ + (((self.priv_lvl_i == PRIV_LVL_U) & \ + ~itlb_content.u) | \ + ((self.priv_lvl_i == PRIV_LVL_S) & \ + itlb_content.u))) + + # MMU enabled: address from TLB, request delayed until hit. + # Error when TLB hit and no access right or TLB hit and + # translated address not valid (e.g. AXI decode error), + # or when PTW performs walk due to ITLB miss and raises + # an error. + with m.If (self.enable_translation_i): + # we work with SV39, so if VM is enabled, check that + # all bits [63:38] are equal + with m.If (self.icache_areq_i.fetch_req & \ + ~(((~self.icache_areq_i.fetch_vaddr[38:64]) == 0) | \ + (self.icache_areq_i.fetch_vaddr[38:64]) == 0)): + fe = self.icache_areq_o.fetch_exception + m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT), + fe.tval.eq(self.icache_areq_i.fetch_vaddr), + fe.valid.eq(1) + ] + + m.d.comb += self.icache_areq_o.fetch_valid.eq(0) + + # 4K page + paddr = Signal.like(self.icache_areq_o.fetch_paddr) + paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12], + itlb_content.ppn) + m.d.comb += paddr.eq(paddr4k) + # Mega page + with m.If(itlb_is_2M): + m.d.comb += paddr[12:21].eq( + self.icache_areq_i.fetch_vaddr[12:21]) + # Giga page + with m.If(itlb_is_1G): + m.d.comb += paddr[12:30].eq( + self.icache_areq_i.fetch_vaddr[12:30]) + m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr) + + # --------- + # ITLB Hit + # -------- + # if we hit the ITLB output the request signal immediately + with m.If(itlb_lu_hit): + m.d.comb += self.icache_areq_o.fetch_valid.eq( + self.icache_areq_i.fetch_req) + # we got an access error + with m.If (iaccess_err): + # throw a page fault + fe = self.icache_areq_o.fetch_exception + m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT), + fe.tval.eq(self.icache_areq_i.fetch_vaddr), + fe.valid.eq(1) + ] + # --------- + # ITLB Miss + # --------- + # watch out for exceptions happening during walking the page table + with m.Elif(ptw_active & walking_instr): + m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error) + fe = self.icache_areq_o.fetch_exception + m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT), + fe.tval.eq(uaddr64), + fe.valid.eq(1) + ] + + #----------------------- + # Data Interface + #----------------------- + + lsu_vaddr = Signal(64) + dtlb_pte = PTE() + misaligned_ex = RVException() + lsu_req = Signal() + lsu_is_store = Signal() + dtlb_hit = Signal() + dtlb_is_2M = Signal() + dtlb_is_1G = Signal() + + # check if we need to do translation or if we are always + # ready (e.g.: we are not translating anything) + m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i, + dtlb_lu_hit, 1)) + + # The data interface is simpler and only consists of a + # request/response interface + m.d.comb += [ + # save request and DTLB response + lsu_vaddr.eq(self.lsu_vaddr_i), + lsu_req.eq(self.lsu_req_i), + misaligned_ex.eq(self.misaligned_ex_i), + dtlb_pte.eq(dtlb_content), + dtlb_hit.eq(dtlb_lu_hit), + lsu_is_store.eq(self.lsu_is_store_i), + dtlb_is_2M.eq(dtlb_is_2M), + dtlb_is_1G.eq(dtlb_is_1G), + ] + m.d.sync += [ + self.lsu_paddr_o.eq(lsu_vaddr), + self.lsu_valid_o.eq(lsu_req), + self.lsu_exception_o.eq(misaligned_ex), + ] + + sverr = Signal() + usrerr = Signal() + + m.d.comb += [ + # mute misaligned exceptions if there is no request + # otherwise they will throw accidental exceptions + misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i), + + # SUM is not set and we are trying to access a user + # page in supervisor mode + sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \ + dtlb_pte.u), + # this is not a user page but we are in user mode and + # trying to access it + usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u), + + # Check if the User flag is set, then we may only + # access it in supervisor mode if SUM is enabled + daccess_err.eq(sverr | usrerr), + ] + + # translation is enabled and no misaligned exception occurred + with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid): + m.d.comb += lsu_req.eq(0) + # 4K page + paddr = Signal.like(lsu_vaddr) + paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn) + m.d.comb += paddr.eq(paddr4k) + # Mega page + with m.If(dtlb_is_2M): + m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21]) + # Giga page + with m.If(dtlb_is_1G): + m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30]) + m.d.sync += self.lsu_paddr_o.eq(paddr) + + # --------- + # DTLB Hit + # -------- + with m.If(dtlb_hit & lsu_req): + m.d.comb += lsu_req.eq(1) + # this is a store + with m.If (lsu_is_store): + # check if the page is write-able and + # we are not violating privileges + # also check if the dirty flag is set + with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d): + le = self.lsu_exception_o + m.d.sync += [le.cause.eq(STORE_PAGE_FAULT), + le.tval.eq(lsu_vaddr), + le.valid.eq(1) + ] + + # this is a load, check for sufficient access + # privileges - throw a page fault if necessary + with m.Elif(daccess_err): + le = self.lsu_exception_o + m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT), + le.tval.eq(lsu_vaddr), + le.valid.eq(1) + ] + # --------- + # DTLB Miss + # --------- + # watch out for exceptions + with m.Elif (ptw_active & ~walking_instr): + # page table walker threw an exception + with m.If (ptw_error): + # an error makes the translation valid + m.d.comb += lsu_req.eq(1) + # the page table walker can only throw page faults + with m.If (lsu_is_store): + le = self.lsu_exception_o + m.d.sync += [le.cause.eq(STORE_PAGE_FAULT), + le.tval.eq(uaddr64), + le.valid.eq(1) + ] + with m.Else(): + m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT), + le.tval.eq(uaddr64), + le.valid.eq(1) + ] + + return m + + def ports(self): + return [self.flush_i, self.enable_translation_i, + self.en_ld_st_translation_i, + self.lsu_req_i, + self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o, + self.lsu_valid_o, self.lsu_paddr_o, + self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i, + self.satp_ppn_i, self.asid_i, self.flush_tlb_i, + self.itlb_miss_o, self.dtlb_miss_o] + \ + self.icache_areq_i.ports() + self.icache_areq_o.ports() + \ + self.req_port_i.ports() + self.req_port_o.ports() + \ + self.misaligned_ex_i.ports() + self.lsu_exception_o.ports() + +if __name__ == '__main__': + mmu = MMU() + vl = rtlil.convert(mmu, ports=mmu.ports()) + with open("test_mmu.il", "w") as f: + f.write(vl) + diff --git a/src/TLB/src/ariane/src/plru.py b/src/TLB/src/ariane/src/plru.py new file mode 100644 index 00000000..95d515c4 --- /dev/null +++ b/src/TLB/src/ariane/src/plru.py @@ -0,0 +1,106 @@ +from nmigen import Signal, Module, Cat, Const +from nmigen.hdl.ir import Elaboratable +from math import log2 + +from ptw import TLBUpdate, PTE, ASID_WIDTH + +class PLRU(Elaboratable): + """ PLRU - Pseudo Least Recently Used Replacement + + PLRU-tree indexing: + lvl0 0 + / \ + / \ + lvl1 1 2 + / \ / \ + lvl2 3 4 5 6 + / \ /\/\ /\ + ... ... ... ... + """ + def __init__(self, entries): + self.entries = entries + self.lu_hit = Signal(entries) + self.replace_en_o = Signal(entries) + self.lu_access_i = Signal() + # Tree (bit per entry) + self.TLBSZ = 2*(self.entries-1) + self.plru_tree = Signal(self.TLBSZ) + self.plru_tree_o = Signal(self.TLBSZ) + + def elaborate(self, platform=None): + m = Module() + + # Just predefine which nodes will be set/cleared + # E.g. for a TLB with 8 entries, the for-loop is semantically + # equivalent to the following pseudo-code: + # unique case (1'b1) + # lu_hit[7]: plru_tree[0, 2, 6] = {1, 1, 1}; + # lu_hit[6]: plru_tree[0, 2, 6] = {1, 1, 0}; + # lu_hit[5]: plru_tree[0, 2, 5] = {1, 0, 1}; + # lu_hit[4]: plru_tree[0, 2, 5] = {1, 0, 0}; + # lu_hit[3]: plru_tree[0, 1, 4] = {0, 1, 1}; + # lu_hit[2]: plru_tree[0, 1, 4] = {0, 1, 0}; + # lu_hit[1]: plru_tree[0, 1, 3] = {0, 0, 1}; + # lu_hit[0]: plru_tree[0, 1, 3] = {0, 0, 0}; + # default: begin /* No hit */ end + # endcase + LOG_TLB = int(log2(self.entries)) + print(LOG_TLB) + for i in range(self.entries): + # we got a hit so update the pointer as it was least recently used + hit = Signal(reset_less=True) + m.d.comb += hit.eq(self.lu_hit[i] & self.lu_access_i) + with m.If(hit): + # Set the nodes to the values we would expect + for lvl in range(LOG_TLB): + idx_base = (1< MSB, lvl1 <=> MSB-1, ... + shift = LOG_TLB - lvl; + new_idx = Const(~((i >> (shift-1)) & 1), (1, False)) + plru_idx = idx_base + (i >> shift) + print ("plru", i, lvl, hex(idx_base), + plru_idx, shift, new_idx) + m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx) + + # Decode tree to write enable signals + # Next for-loop basically creates the following logic for e.g. + # an 8 entry TLB (note: pseudo-code obviously): + # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1} + # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0} + # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1} + # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0} + # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1} + # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0} + # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1} + # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0} + # For each entry traverse the tree. If every tree-node matches + # the corresponding bit of the entry's index, this is + # the next entry to replace. + replace = [] + for i in range(self.entries): + en = [] + for lvl in range(LOG_TLB): + idx_base = (1< MSB, lvl1 <=> MSB-1, ... + shift = LOG_TLB - lvl; + new_idx = (i >> (shift-1)) & 1; + plru_idx = idx_base + (i>>shift) + plru = Signal(reset_less=True, + name="plru-%d-%d-%d" % (i, lvl, plru_idx)) + m.d.comb += plru.eq(self.plru_tree[plru_idx]) + # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx; + if new_idx: + en.append(~plru) # yes inverted (using bool()) + else: + en.append(plru) # yes inverted (using bool()) + print ("plru", i, en) + # boolean logic manipulation: + # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2) + replace.append(~Cat(*en).bool()) + m.d.comb += self.replace_en_o.eq(Cat(*replace)) + + return m + + def ports(self): + return [self.entries, self.lu_hit, self.replace_en_o, + self.lu_access_i, self.plru_tree, self.plru_tree_o] \ No newline at end of file diff --git a/src/TLB/src/ariane/src/ptw.py b/src/TLB/src/ariane/src/ptw.py new file mode 100644 index 00000000..05ec2d7d --- /dev/null +++ b/src/TLB/src/ariane/src/ptw.py @@ -0,0 +1,539 @@ +""" +# Copyright 2018 ETH Zurich and University of Bologna. +# Copyright and related rights are licensed under the Solderpad Hardware +# License, Version 0.51 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# or agreed to in writing, software, hardware and materials distributed under +# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Author: David Schaffenrath, TU Graz +# Author: Florian Zaruba, ETH Zurich +# Date: 24.4.2017 +# Description: Hardware-PTW + +/* verilator lint_off WIDTH */ +import ariane_pkg::*; + +see linux kernel source: + +* "arch/riscv/include/asm/page.h" +* "arch/riscv/include/asm/mmu_context.h" +* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET) + +""" + +from nmigen import Const, Signal, Cat, Module +from nmigen.hdl.ast import ArrayProxy +from nmigen.cli import verilog, rtlil +from math import log2 + + +DCACHE_SET_ASSOC = 8 +CONFIG_L1D_SIZE = 32*1024 +DCACHE_INDEX_WIDTH = int(log2(CONFIG_L1D_SIZE / DCACHE_SET_ASSOC)) +DCACHE_TAG_WIDTH = 56 - DCACHE_INDEX_WIDTH + +ASID_WIDTH = 8 + + +class DCacheReqI: + def __init__(self): + self.address_index = Signal(DCACHE_INDEX_WIDTH) + self.address_tag = Signal(DCACHE_TAG_WIDTH) + self.data_wdata = Signal(64) + self.data_req = Signal() + self.data_we = Signal() + self.data_be = Signal(8) + self.data_size = Signal(2) + self.kill_req = Signal() + self.tag_valid = Signal() + + def eq(self, inp): + res = [] + for (o, i) in zip(self.ports(), inp.ports()): + res.append(o.eq(i)) + return res + + def ports(self): + return [self.address_index, self.address_tag, + self.data_wdata, self.data_req, + self.data_we, self.data_be, self.data_size, + self.kill_req, self.tag_valid, + ] + +class DCacheReqO: + def __init__(self): + self.data_gnt = Signal() + self.data_rvalid = Signal() + self.data_rdata = Signal(64) # actually in PTE object format + + def eq(self, inp): + res = [] + for (o, i) in zip(self.ports(), inp.ports()): + res.append(o.eq(i)) + return res + + def ports(self): + return [self.data_gnt, self.data_rvalid, self.data_rdata] + + +class PTE: #(RecordObject): + def __init__(self): + self.v = Signal() + self.r = Signal() + self.w = Signal() + self.x = Signal() + self.u = Signal() + self.g = Signal() + self.a = Signal() + self.d = Signal() + self.rsw = Signal(2) + self.ppn = Signal(44) + self.reserved = Signal(10) + + def flatten(self): + return Cat(*self.ports()) + + def eq(self, x): + if isinstance(x, ArrayProxy): + res = [] + for o in self.ports(): + i = getattr(x, o.name) + res.append(i) + x = Cat(*res) + else: + x = x.flatten() + return self.flatten().eq(x) + + def __iter__(self): + """ order is critical so that flatten creates LSB to MSB + """ + yield self.v + yield self.r + yield self.w + yield self.x + yield self.u + yield self.g + yield self.a + yield self.d + yield self.rsw + yield self.ppn + yield self.reserved + + def ports(self): + return list(self) + + +class TLBUpdate: + def __init__(self, asid_width): + self.valid = Signal() # valid flag + self.is_2M = Signal() + self.is_1G = Signal() + self.vpn = Signal(27) + self.asid = Signal(asid_width) + self.content = PTE() + + def flatten(self): + return Cat(*self.ports()) + + def eq(self, x): + return self.flatten().eq(x.flatten()) + + def ports(self): + return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \ + self.content.ports() + + +# SV39 defines three levels of page tables +LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1 +LVL2 = Const(1, 2) +LVL3 = Const(2, 2) + + +class PTW: + def __init__(self, asid_width=8): + self.asid_width = asid_width + + self.flush_i = Signal() # flush everything, we need to do this because + # actually everything we do is speculative at this stage + # e.g.: there could be a CSR instruction that changes everything + self.ptw_active_o = Signal(reset=1) # active if not IDLE + self.walking_instr_o = Signal() # set when walking for TLB + self.ptw_error_o = Signal() # set when an error occurred + self.enable_translation_i = Signal() # CSRs indicate to enable SV39 + self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st + + self.lsu_is_store_i = Signal() # translation triggered by store + # PTW memory interface + self.req_port_i = DCacheReqO() + self.req_port_o = DCacheReqI() + + # to TLBs, update logic + self.itlb_update_o = TLBUpdate(asid_width) + self.dtlb_update_o = TLBUpdate(asid_width) + + self.update_vaddr_o = Signal(39) + + self.asid_i = Signal(self.asid_width) + # from TLBs + # did we miss? + self.itlb_access_i = Signal() + self.itlb_hit_i = Signal() + self.itlb_vaddr_i = Signal(64) + + self.dtlb_access_i = Signal() + self.dtlb_hit_i = Signal() + self.dtlb_vaddr_i = Signal(64) + # from CSR file + self.satp_ppn_i = Signal(44) # ppn from satp + self.mxr_i = Signal() + # Performance counters + self.itlb_miss_o = Signal() + self.dtlb_miss_o = Signal() + + def ports(self): + return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o, + ] + return [ + self.enable_translation_i, self.en_ld_st_translation_i, + self.lsu_is_store_i, self.req_port_i, self.req_port_o, + self.update_vaddr_o, + self.asid_i, + self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i, + self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i, + self.satp_ppn_i, self.mxr_i, + self.itlb_miss_o, self.dtlb_miss_o + ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports() + + def elaborate(self, platform): + m = Module() + + # input registers + data_rvalid = Signal() + data_rdata = Signal(64) + + # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata + # is spec'd in 64-bit binary-format: better to spec as Record? + pte = PTE() + m.d.comb += pte.flatten().eq(data_rdata) + + # SV39 defines three levels of page tables + ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above) + ptw_lvl1 = Signal() + ptw_lvl2 = Signal() + ptw_lvl3 = Signal() + m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1), + ptw_lvl2.eq(ptw_lvl == LVL2), + ptw_lvl3.eq(ptw_lvl == LVL3)] + + # is this an instruction page table walk? + is_instr_ptw = Signal() + global_mapping = Signal() + # latched tag signal + tag_valid = Signal() + # register the ASID + tlb_update_asid = Signal(self.asid_width) + # register VPN we need to walk, SV39 defines a 39 bit virtual addr + vaddr = Signal(64) + # 4 byte aligned physical pointer + ptw_pptr = Signal(56) + + end = DCACHE_INDEX_WIDTH + DCACHE_TAG_WIDTH + m.d.sync += [ + # Assignments + self.update_vaddr_o.eq(vaddr), + + self.walking_instr_o.eq(is_instr_ptw), + # directly output the correct physical address + self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]), + self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]), + # we are never going to kill this request + self.req_port_o.kill_req.eq(0), # XXX assign comb? + # we are never going to write with the HPTW + self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb? + # ----------- + # TLB Update + # ----------- + self.itlb_update_o.vpn.eq(vaddr[12:39]), + self.dtlb_update_o.vpn.eq(vaddr[12:39]), + # update the correct page table level + self.itlb_update_o.is_2M.eq(ptw_lvl2), + self.itlb_update_o.is_1G.eq(ptw_lvl1), + self.dtlb_update_o.is_2M.eq(ptw_lvl2), + self.dtlb_update_o.is_1G.eq(ptw_lvl1), + # output the correct ASID + self.itlb_update_o.asid.eq(tlb_update_asid), + self.dtlb_update_o.asid.eq(tlb_update_asid), + # set the global mapping bit + self.itlb_update_o.content.eq(pte), + self.itlb_update_o.content.g.eq(global_mapping), + self.dtlb_update_o.content.eq(pte), + self.dtlb_update_o.content.g.eq(global_mapping), + + self.req_port_o.tag_valid.eq(tag_valid), + ] + + #------------------- + # Page table walker + #------------------- + # A virtual address va is translated into a physical address pa as + # follows: + # 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39, + # PAGESIZE=2^12 and LEVELS=3.) + # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. + # (For Sv32, PTESIZE=4.) + # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an + # access exception. + # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to + # step 5. Otherwise, this PTE is a pointer to the next level of + # the page table. + # Let i=i-1. If i < 0, stop and raise an access exception. + # Otherwise, let a = pte.ppn × PAGESIZE and go to step 2. + # 5. A leaf PTE has been found. Determine if the requested memory + # access is allowed by the pte.r, pte.w, and pte.x bits. If not, + # stop and raise an access exception. Otherwise, the translation is + # successful. Set pte.a to 1, and, if the memory access is a + # store, set pte.d to 1. + # The translated physical address is given as follows: + # - pa.pgoff = va.pgoff. + # - If i > 0, then this is a superpage translation and + # pa.ppn[i-1:0] = va.vpn[i-1:0]. + # - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i]. + # 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned + # superpage stop and raise a page-fault exception. + + m.d.sync += tag_valid.eq(0) + + # default assignments + m.d.comb += [ + # PTW memory interface + self.req_port_o.data_req.eq(0), + self.req_port_o.data_be.eq(Const(0xFF, 8)), + self.req_port_o.data_size.eq(Const(0b11, 2)), + self.req_port_o.data_we.eq(0), + self.ptw_error_o.eq(0), + self.itlb_update_o.valid.eq(0), + self.dtlb_update_o.valid.eq(0), + + self.itlb_miss_o.eq(0), + self.dtlb_miss_o.eq(0), + ] + + # ------------ + # State Machine + # ------------ + + with m.FSM() as fsm: + + with m.State("IDLE"): + self.idle(m, is_instr_ptw, ptw_lvl, global_mapping, + ptw_pptr, vaddr, tlb_update_asid) + + with m.State("WAIT_GRANT"): + self.grant(m, tag_valid, data_rvalid) + + with m.State("PTE_LOOKUP"): + # we wait for the valid signal + with m.If(data_rvalid): + self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, + data_rvalid, global_mapping, + is_instr_ptw, ptw_pptr) + + # Propagate error to MMU/LSU + with m.State("PROPAGATE_ERROR"): + m.next = "IDLE" + m.d.comb += self.ptw_error_o.eq(1) + + # wait for the rvalid before going back to IDLE + with m.State("WAIT_RVALID"): + with m.If(data_rvalid): + m.next = "IDLE" + + m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata), + data_rvalid.eq(self.req_port_i.data_rvalid) + ] + + return m + + def set_grant_state(self, m): + # should we have flushed before we got an rvalid, + # wait for it until going back to IDLE + with m.If(self.flush_i): + with m.If (self.req_port_i.data_gnt): + m.next = "WAIT_RVALID" + with m.Else(): + m.next = "IDLE" + with m.Else(): + m.next = "WAIT_GRANT" + + def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping, + ptw_pptr, vaddr, tlb_update_asid): + # by default we start with the top-most page table + m.d.sync += [is_instr_ptw.eq(0), + ptw_lvl.eq(LVL1), + global_mapping.eq(0), + self.ptw_active_o.eq(0), # deactive (IDLE) + ] + # work out itlb/dtlb miss + m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \ + self.itlb_access_i & \ + ~self.itlb_hit_i & \ + ~self.dtlb_access_i) + m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \ + self.dtlb_access_i & \ + ~self.dtlb_hit_i) + # we got an ITLB miss? + with m.If(self.itlb_miss_o): + pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:39], + self.satp_ppn_i) + m.d.sync += [ptw_pptr.eq(pptr), + is_instr_ptw.eq(1), + vaddr.eq(self.itlb_vaddr_i), + tlb_update_asid.eq(self.asid_i), + ] + self.set_grant_state(m) + + # we got a DTLB miss? + with m.Elif(self.dtlb_miss_o): + pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39], + self.satp_ppn_i) + m.d.sync += [ptw_pptr.eq(pptr), + vaddr.eq(self.dtlb_vaddr_i), + tlb_update_asid.eq(self.asid_i), + ] + self.set_grant_state(m) + + def grant(self, m, tag_valid, data_rvalid): + # we've got a data WAIT_GRANT so tell the + # cache that the tag is valid + + # send a request out + m.d.comb += self.req_port_o.data_req.eq(1) + # wait for the WAIT_GRANT + with m.If(self.req_port_i.data_gnt): + # send the tag valid signal one cycle later + m.d.sync += tag_valid.eq(1) + # should we have flushed before we got an rvalid, + # wait for it until going back to IDLE + with m.If(self.flush_i): + with m.If (~data_rvalid): + m.next = "WAIT_RVALID" + with m.Else(): + m.next = "IDLE" + with m.Else(): + m.next = "PTE_LOOKUP" + + def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, + data_rvalid, global_mapping, + is_instr_ptw, ptw_pptr): + # temporaries + pte_rx = Signal(reset_less=True) + pte_exe = Signal(reset_less=True) + pte_inv = Signal(reset_less=True) + pte_a = Signal(reset_less=True) + st_wd = Signal(reset_less=True) + m.d.comb += [pte_rx.eq(pte.r | pte.x), + pte_exe.eq(~pte.x | ~pte.a), + pte_inv.eq(~pte.v | (~pte.r & pte.w)), + pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))), + st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))] + + l1err = Signal(reset_less=True) + l2err = Signal(reset_less=True) + m.d.comb += [l2err.eq((ptw_lvl2) & pte.ppn[0:9] != Const(0, 9)), + l1err.eq((ptw_lvl1) & pte.ppn[0:18] != Const(0, 18)) ] + + # check if the global mapping bit is set + with m.If (pte.g): + m.d.sync += global_mapping.eq(1) + + m.next = "IDLE" + + # ------------- + # Invalid PTE + # ------------- + # If pte.v = 0, or if pte.r = 0 and pte.w = 1, + # stop and raise a page-fault exception. + with m.If (pte_inv): + m.next = "PROPAGATE_ERROR" + + # ----------- + # Valid PTE + # ----------- + + # it is a valid PTE + # if pte.r = 1 or pte.x = 1 it is a valid PTE + with m.Elif (pte_rx): + # Valid translation found (either 1G, 2M or 4K) + with m.If(is_instr_ptw): + # ------------ + # Update ITLB + # ------------ + # If page not executable, we can directly raise error. + # This doesn't put a useless entry into the TLB. + # The same idea applies to the access flag since we let + # the access flag be managed by SW. + with m.If (pte_exe): + m.next = "IDLE" + with m.Else(): + m.d.comb += self.itlb_update_o.valid.eq(1) + + with m.Else(): + # ------------ + # Update DTLB + # ------------ + # Check if the access flag has been set, otherwise + # throw page-fault and let software handle those bits. + # If page not readable (there are no write-only pages) + # directly raise an error. This doesn't put a useless + # entry into the TLB. + with m.If(pte_a): + m.d.comb += self.dtlb_update_o.valid.eq(1) + with m.Else(): + m.next = "PROPAGATE_ERROR" + # Request is a store: perform additional checks + # If the request was a store and the page not + # write-able, raise an error + # the same applies if the dirty flag is not set + with m.If (st_wd): + m.d.comb += self.dtlb_update_o.valid.eq(0) + m.next = "PROPAGATE_ERROR" + + # check if the ppn is correctly aligned: Case (6) + with m.If(l1err | l2err): + m.next = "PROPAGATE_ERROR" + m.d.comb += [self.dtlb_update_o.valid.eq(0), + self.itlb_update_o.valid.eq(0)] + + # this is a pointer to the next TLB level + with m.Else(): + # pointer to next level of page table + with m.If (ptw_lvl1): + # we are in the second level now + pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn) + m.d.sync += [ptw_pptr.eq(pptr), + ptw_lvl.eq(LVL2) + ] + with m.If(ptw_lvl2): + # here we received a pointer to the third level + pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn) + m.d.sync += [ptw_pptr.eq(pptr), + ptw_lvl.eq(LVL3) + ] + self.set_grant_state(m) + + with m.If (ptw_lvl3): + # Should already be the last level + # page table => Error + m.d.sync += ptw_lvl.eq(LVL3) + m.next = "PROPAGATE_ERROR" + + +if __name__ == '__main__': + ptw = PTW() + vl = rtlil.convert(ptw, ports=ptw.ports()) + with open("test_ptw.il", "w") as f: + f.write(vl) diff --git a/src/TLB/src/ariane/src/tlb.py b/src/TLB/src/ariane/src/tlb.py new file mode 100644 index 00000000..f768571e --- /dev/null +++ b/src/TLB/src/ariane/src/tlb.py @@ -0,0 +1,170 @@ +""" +# Copyright 2018 ETH Zurich and University of Bologna. +# Copyright and related rights are licensed under the Solderpad Hardware +# License, Version 0.51 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# or agreed to in writing, software, hardware and materials distributed under +# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Author: David Schaffenrath, TU Graz +# Author: Florian Zaruba, ETH Zurich +# Date: 21.4.2017 +# Description: Translation Lookaside Buffer, SV39 +# fully set-associative + +Implementation in c++: +https://raw.githubusercontent.com/Tony-Hu/TreePLRU/master/TreePLRU.cpp + +Text description: +https://people.cs.clemson.edu/~mark/464/p_lru.txt + +Online simulator: +http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/vm.html +""" +from math import log2 +from nmigen import Signal, Module, Cat, Const, Array +from nmigen.cli import verilog, rtlil +from nmigen.lib.coding import Encoder + +from ptw import TLBUpdate, PTE, ASID_WIDTH +from plru import PLRU +from tlb_content import TLBContent + +TLB_ENTRIES = 8 + +class TLB: + def __init__(self, tlb_entries=8, asid_width=8): + self.tlb_entries = tlb_entries + self.asid_width = asid_width + + self.flush_i = Signal() # Flush signal + # Lookup signals + self.lu_access_i = Signal() + self.lu_asid_i = Signal(self.asid_width) + self.lu_vaddr_i = Signal(64) + self.lu_content_o = PTE() + self.lu_is_2M_o = Signal() + self.lu_is_1G_o = Signal() + self.lu_hit_o = Signal() + # Update TLB + self.pte_width = len(self.lu_content_o.flatten()) + self.update_i = TLBUpdate(asid_width) + + def elaborate(self, platform): + m = Module() + + vpn2 = Signal(9) + vpn1 = Signal(9) + vpn0 = Signal(9) + + #------------- + # Translation + #------------- + + # SV39 defines three levels of page tables + m.d.comb += [ vpn0.eq(self.lu_vaddr_i[12:21]), + vpn1.eq(self.lu_vaddr_i[21:30]), + vpn2.eq(self.lu_vaddr_i[30:39]), + ] + + tc = [] + for i in range(self.tlb_entries): + tlc = TLBContent(self.pte_width, self.asid_width) + setattr(m.submodules, "tc%d" % i, tlc) + tc.append(tlc) + # connect inputs + tlc.update_i = self.update_i # saves a lot of graphviz links + m.d.comb += [tlc.vpn0.eq(vpn0), + tlc.vpn1.eq(vpn1), + tlc.vpn2.eq(vpn2), + tlc.flush_i.eq(self.flush_i), + #tlc.update_i.eq(self.update_i), + tlc.lu_asid_i.eq(self.lu_asid_i)] + tc = Array(tc) + + #-------------- + # Select hit + #-------------- + + # use Encoder to select hit index + # XXX TODO: assert that there's only one valid entry (one lu_hit) + hitsel = Encoder(self.tlb_entries) + m.submodules.hitsel = hitsel + + hits = [] + for i in range(self.tlb_entries): + hits.append(tc[i].lu_hit_o) + m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well) + idx = hitsel.o + + active = Signal(reset_less=True) + m.d.comb += active.eq(~hitsel.n) + with m.If(active): + # active hit, send selected as output + m.d.comb += [ self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o), + self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o), + self.lu_hit_o.eq(1), + self.lu_content_o.flatten().eq(tc[idx].lu_content_o), + ] + + #-------------- + # PLRU. + #-------------- + + p = PLRU(self.tlb_entries) + plru_tree = Signal(p.TLBSZ) + m.submodules.plru = p + + # connect PLRU inputs/outputs + # XXX TODO: assert that there's only one valid entry (one replace_en) + en = [] + for i in range(self.tlb_entries): + en.append(tc[i].replace_en_i) + m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags + p.lu_hit.eq(hitsel.i), + p.lu_access_i.eq(self.lu_access_i), + p.plru_tree.eq(plru_tree)] + m.d.sync += plru_tree.eq(p.plru_tree_o) + + #-------------- + # Sanity checks + #-------------- + + assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \ + "TLB size must be a multiple of 2 and greater than 1" + assert (self.asid_width >= 1), \ + "ASID width must be at least 1" + + return m + + """ + # Just for checking + function int countSetBits(logic[self.tlb_entries-1:0] vector); + automatic int count = 0; + foreach (vector[idx]) begin + count += vector[idx]; + end + return count; + endfunction + + assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1)) + else $error("More then one hit in TLB!"); $stop(); end + assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1)) + else $error("More then one TLB entry selected for next replace!"); + """ + + def ports(self): + return [self.flush_i, self.lu_access_i, + self.lu_asid_i, self.lu_vaddr_i, + self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o, + ] + self.lu_content_o.ports() + self.update_i.ports() + +if __name__ == '__main__': + tlb = TLB() + vl = rtlil.convert(tlb, ports=tlb.ports()) + with open("test_tlb.il", "w") as f: + f.write(vl) + diff --git a/src/TLB/src/ariane/src/tlb_content.py b/src/TLB/src/ariane/src/tlb_content.py new file mode 100644 index 00000000..024c5697 --- /dev/null +++ b/src/TLB/src/ariane/src/tlb_content.py @@ -0,0 +1,125 @@ +from nmigen import Signal, Module, Cat, Const + +from ptw import TLBUpdate, PTE + +class TLBEntry: + def __init__(self, asid_width): + self.asid = Signal(asid_width) + # SV39 defines three levels of page tables + self.vpn0 = Signal(9) + self.vpn1 = Signal(9) + self.vpn2 = Signal(9) + self.is_2M = Signal() + self.is_1G = Signal() + self.valid = Signal() + + def flatten(self): + return Cat(*self.ports()) + + def eq(self, x): + return self.flatten().eq(x.flatten()) + + def ports(self): + return [self.asid, self.vpn0, self.vpn1, self.vpn2, + self.is_2M, self.is_1G, self.valid] + +class TLBContent: + def __init__(self, pte_width, asid_width): + self.asid_width = asid_width + self.pte_width = pte_width + self.flush_i = Signal() # Flush signal + # Update TLB + self.update_i = TLBUpdate(asid_width) + self.vpn2 = Signal(9) + self.vpn1 = Signal(9) + self.vpn0 = Signal(9) + self.replace_en_i = Signal() # replace the following entry, + # set by replacement strategy + # Lookup signals + self.lu_asid_i = Signal(asid_width) + self.lu_content_o = Signal(pte_width) + self.lu_is_2M_o = Signal() + self.lu_is_1G_o = Signal() + self.lu_hit_o = Signal() + + def elaborate(self, platform): + m = Module() + + tags = TLBEntry(self.asid_width) + content = Signal(self.pte_width) + + m.d.comb += [self.lu_hit_o.eq(0), + self.lu_is_2M_o.eq(0), + self.lu_is_1G_o.eq(0)] + + # temporaries for 1st level match + asid_ok = Signal(reset_less=True) + vpn2_ok = Signal(reset_less=True) + tags_ok = Signal(reset_less=True) + vpn2_hit = Signal(reset_less=True) + m.d.comb += [tags_ok.eq(tags.valid), + asid_ok.eq(tags.asid == self.lu_asid_i), + vpn2_ok.eq(tags.vpn2 == self.vpn2), + vpn2_hit.eq(tags_ok & asid_ok & vpn2_ok)] + # temporaries for 2nd level match + vpn1_ok = Signal(reset_less=True) + tags_2M = Signal(reset_less=True) + vpn0_ok = Signal(reset_less=True) + vpn0_or_2M = Signal(reset_less=True) + m.d.comb += [vpn1_ok.eq(self.vpn1 == tags.vpn1), + tags_2M.eq(tags.is_2M), + vpn0_ok.eq(self.vpn0 == tags.vpn0), + vpn0_or_2M.eq(tags_2M | vpn0_ok)] + # first level match, this may be a giga page, + # check the ASID flags as well + with m.If(vpn2_hit): + # second level + with m.If (tags.is_1G): + m.d.comb += [ self.lu_content_o.eq(content), + self.lu_is_1G_o.eq(1), + self.lu_hit_o.eq(1), + ] + # not a giga page hit so check further + with m.Elif(vpn1_ok): + # this could be a 2 mega page hit or a 4 kB hit + # output accordingly + with m.If(vpn0_or_2M): + m.d.comb += [ self.lu_content_o.eq(content), + self.lu_is_2M_o.eq(tags.is_2M), + self.lu_hit_o.eq(1), + ] + # ------------------ + # Update or Flush + # ------------------ + + # temporaries + replace_valid = Signal(reset_less=True) + m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i) + + # flush + with m.If (self.flush_i): + # invalidate (flush) conditions: all if zero or just this ASID + with m.If (self.lu_asid_i == Const(0, self.asid_width) | + (self.lu_asid_i == tags.asid)): + m.d.sync += tags.valid.eq(0) + + # normal replacement + with m.Elif(replace_valid): + m.d.sync += [ # update tag array + tags.asid.eq(self.update_i.asid), + tags.vpn2.eq(self.update_i.vpn[18:27]), + tags.vpn1.eq(self.update_i.vpn[9:18]), + tags.vpn0.eq(self.update_i.vpn[0:9]), + tags.is_1G.eq(self.update_i.is_1G), + tags.is_2M.eq(self.update_i.is_2M), + tags.valid.eq(1), + # and content as well + content.eq(self.update_i.content.flatten()) + ] + return m + + def ports(self): + return [self.flush_i, + self.lu_asid_i, + self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o, + ] + self.update_i.content.ports() + self.update_i.ports() diff --git a/src/TLB/src/ariane/test/test_plru.py b/src/TLB/src/ariane/test/test_plru.py new file mode 100644 index 00000000..9b040e1d --- /dev/null +++ b/src/TLB/src/ariane/test/test_plru.py @@ -0,0 +1,15 @@ +import sys +sys.path.append("../src") +sys.path.append("../../../TestUtil") + +from plru import PLRU + +from nmigen.compat.sim import run_simulation + +def testbench(dut): + yield + +if __name__ == "__main__": + dut = PLRU(4) + run_simulation(dut, testbench(dut), vcd_name="test_plru.vcd") + print("PLRU Unit Test Success") \ No newline at end of file diff --git a/src/TLB/src/ariane/test/test_ptw.py b/src/TLB/src/ariane/test/test_ptw.py new file mode 100644 index 00000000..e9c5324c --- /dev/null +++ b/src/TLB/src/ariane/test/test_ptw.py @@ -0,0 +1,127 @@ +import sys +sys.path.append("../src") +sys.path.append("../../../TestUtil") + +from nmigen.compat.sim import run_simulation + +from ptw import PTW, PTE + + +def testbench(dut): + + addr = 0x8000000 + + #pte = PTE() + #yield pte.v.eq(1) + #yield pte.r.eq(1) + + yield dut.req_port_i.data_gnt.eq(1) + yield dut.req_port_i.data_rvalid.eq(1) + yield dut.req_port_i.data_rdata.eq(0x43)#pte.flatten()) + + # data lookup + yield dut.en_ld_st_translation_i.eq(1) + yield dut.asid_i.eq(1) + + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(0x400000000) + + yield + yield + yield + + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(0x200000) + + yield + yield + yield + + yield dut.req_port_i.data_gnt.eq(0) + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(0x400000011) + + yield + yield dut.req_port_i.data_gnt.eq(1) + yield + yield + + # data lookup, PTW levels 1-2-3 + addr = 0x4000000 + yield dut.dtlb_vaddr_i.eq(addr) + yield dut.mxr_i.eq(0x1) + yield dut.req_port_i.data_gnt.eq(1) + yield dut.req_port_i.data_rvalid.eq(1) + yield dut.req_port_i.data_rdata.eq(0x41 | (addr>>12)<<10)#pte.flatten()) + + yield dut.en_ld_st_translation_i.eq(1) + yield dut.asid_i.eq(1) + + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(addr) + + yield + yield + yield + yield + yield + yield + yield + yield + + yield dut.req_port_i.data_gnt.eq(0) + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(0x400000011) + + yield + yield dut.req_port_i.data_gnt.eq(1) + yield + yield + yield + yield + + + # instruction lookup + yield dut.en_ld_st_translation_i.eq(0) + yield dut.enable_translation_i.eq(1) + yield dut.asid_i.eq(1) + + yield dut.itlb_access_i.eq(1) + yield dut.itlb_hit_i.eq(0) + yield dut.itlb_vaddr_i.eq(0x800000) + + yield + yield + yield + + yield dut.itlb_access_i.eq(1) + yield dut.itlb_hit_i.eq(0) + yield dut.itlb_vaddr_i.eq(0x200000) + + yield + yield + yield + + yield dut.req_port_i.data_gnt.eq(0) + yield dut.itlb_access_i.eq(1) + yield dut.itlb_hit_i.eq(0) + yield dut.itlb_vaddr_i.eq(0x800011) + + yield + yield dut.req_port_i.data_gnt.eq(1) + yield + yield + + yield + + + +if __name__ == "__main__": + dut = PTW() + run_simulation(dut, testbench(dut), vcd_name="test_ptw.vcd") + print("PTW Unit Test Success") diff --git a/src/TLB/src/ariane/test/test_tlb.py b/src/TLB/src/ariane/test/test_tlb.py new file mode 100644 index 00000000..aab1d43c --- /dev/null +++ b/src/TLB/src/ariane/test/test_tlb.py @@ -0,0 +1,69 @@ +import sys +sys.path.append("../src") +sys.path.append("../../../TestUtil") + +from nmigen.compat.sim import run_simulation + +from tlb import TLB + +def set_vaddr(addr): + yield dut.lu_vaddr_i.eq(addr) + yield dut.update_i.vpn.eq(addr>>12) + + +def testbench(dut): + yield dut.lu_access_i.eq(1) + yield dut.lu_asid_i.eq(1) + yield dut.update_i.valid.eq(1) + yield dut.update_i.is_1G.eq(0) + yield dut.update_i.is_2M.eq(0) + yield dut.update_i.asid.eq(1) + yield dut.update_i.content.ppn.eq(0) + yield dut.update_i.content.rsw.eq(0) + yield dut.update_i.content.r.eq(1) + + yield + + addr = 0x80000 + yield from set_vaddr(addr) + yield + + addr = 0x90001 + yield from set_vaddr(addr) + yield + + addr = 0x28000000 + yield from set_vaddr(addr) + yield + + addr = 0x28000001 + yield from set_vaddr(addr) + + addr = 0x28000001 + yield from set_vaddr(addr) + yield + + addr = 0x1000040000 + yield from set_vaddr(addr) + yield + + addr = 0x1000040001 + yield from set_vaddr(addr) + yield + + yield dut.update_i.is_1G.eq(1) + addr = 0x2040000 + yield from set_vaddr(addr) + yield + + yield dut.update_i.is_1G.eq(1) + addr = 0x2040001 + yield from set_vaddr(addr) + yield + + yield + + +if __name__ == "__main__": + dut = TLB() + run_simulation(dut, testbench(dut), vcd_name="test_tlb.vcd") diff --git a/src/TLB/test/__init__.py b/src/TLB/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/TLB/test/test_LFSR2.py b/src/TLB/test/test_LFSR2.py new file mode 100644 index 00000000..889a042f --- /dev/null +++ b/src/TLB/test/test_LFSR2.py @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# See Notices.txt for copyright information +import sys +sys.path.append("../src") +sys.path.append("../../TestUtil") +from LFSR import LFSR, LFSRPolynomial, LFSR_POLY_3 + +from nmigen.back.pysim import Simulator, Delay, Tick +import unittest + + +class TestLFSR(unittest.TestCase): + def test_poly(self): + v = LFSRPolynomial() + self.assertEqual(repr(v), "LFSRPolynomial([0])") + self.assertEqual(str(v), "1") + v = LFSRPolynomial([1]) + self.assertEqual(repr(v), "LFSRPolynomial([1, 0])") + self.assertEqual(str(v), "x + 1") + v = LFSRPolynomial([0, 1]) + self.assertEqual(repr(v), "LFSRPolynomial([1, 0])") + self.assertEqual(str(v), "x + 1") + v = LFSRPolynomial([1, 2]) + self.assertEqual(repr(v), "LFSRPolynomial([2, 1, 0])") + self.assertEqual(str(v), "x^2 + x + 1") + v = LFSRPolynomial([2]) + self.assertEqual(repr(v), "LFSRPolynomial([2, 0])") + self.assertEqual(str(v), "x^2 + 1") + self.assertEqual(str(LFSR_POLY_3), "x^3 + x^2 + 1") + + def test_lfsr_3(self): + module = LFSR(LFSR_POLY_3) + traces = [module.state, module.enable] + with Simulator(module, + vcd_file=open("Waveforms/test_LFSR2.vcd", "w"), + gtkw_file=open("Waveforms/test_LFSR2.gtkw", "w"), + traces=traces) as sim: + sim.add_clock(1e-6, 0.25e-6) + delay = Delay(1e-7) + + def async_process(): + yield module.enable.eq(0) + yield Tick() + self.assertEqual((yield module.state), 0x1) + yield Tick() + self.assertEqual((yield module.state), 0x1) + yield module.enable.eq(1) + yield Tick() + yield delay + self.assertEqual((yield module.state), 0x2) + yield Tick() + yield delay + self.assertEqual((yield module.state), 0x5) + yield Tick() + yield delay + self.assertEqual((yield module.state), 0x3) + yield Tick() + yield delay + self.assertEqual((yield module.state), 0x7) + yield Tick() + yield delay + self.assertEqual((yield module.state), 0x6) + yield Tick() + yield delay + self.assertEqual((yield module.state), 0x4) + yield Tick() + yield delay + self.assertEqual((yield module.state), 0x1) + yield Tick() + + sim.add_process(async_process) + sim.run() diff --git a/src/TLB/test/test_address_encoder.py b/src/TLB/test/test_address_encoder.py new file mode 100644 index 00000000..29537136 --- /dev/null +++ b/src/TLB/test/test_address_encoder.py @@ -0,0 +1,107 @@ +import sys +sys.path.append("../src") +sys.path.append("../../TestUtil") + +from nmigen.compat.sim import run_simulation + +from AddressEncoder import AddressEncoder + +from test_helper import assert_eq, assert_ne, assert_op + +# This function allows for the easy setting of values to the AddressEncoder +# Arguments: +# dut: The AddressEncoder being tested +# i (Input): The array of single bits to be written +def set_encoder(dut, i): + yield dut.i.eq(i) + yield + +# Checks the single match of the AddressEncoder +# Arguments: +# dut: The AddressEncoder being tested +# sm (Single Match): The expected match result +# op (Operation): (0 => ==), (1 => !=) +def check_single_match(dut, sm, op): + out_sm = yield dut.single_match + assert_op("Single Match", out_sm, sm, op) + +# Checks the multiple match of the AddressEncoder +# Arguments: +# dut: The AddressEncoder being tested +# mm (Multiple Match): The expected match result +# op (Operation): (0 => ==), (1 => !=) +def check_multiple_match(dut, mm, op): + out_mm = yield dut.multiple_match + assert_op("Multiple Match", out_mm, mm, op) + +# Checks the output of the AddressEncoder +# Arguments: +# dut: The AddressEncoder being tested +# o (Output): The expected output +# op (Operation): (0 => ==), (1 => !=) +def check_output(dut, o, op): + out_o = yield dut.o + assert_op("Output", out_o, o, op) + +# Checks the state of the AddressEncoder +# Arguments: +# dut: The AddressEncoder being tested +# sm (Single Match): The expected match result +# mm (Multiple Match): The expected match result +# o (Output): The expected output +# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) +# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) +# o_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) +def check_all(dut, sm, mm, o, sm_op, mm_op, o_op): + yield from check_single_match(dut, sm, sm_op) + yield from check_multiple_match(dut, mm, mm_op) + yield from check_output(dut, o, o_op) + +def testbench(dut): + # Check invalid input + in_val = 0b000 + single_match = 0 + multiple_match = 0 + output = 0 + yield from set_encoder(dut, in_val) + yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) + + # Check single bit + in_val = 0b001 + single_match = 1 + multiple_match = 0 + output = 0 + yield from set_encoder(dut, in_val) + yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) + + # Check another single bit + in_val = 0b100 + single_match = 1 + multiple_match = 0 + output = 2 + yield from set_encoder(dut, in_val) + yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) + + # Check multiple match + # We expected the lowest bit to be returned which is address 0 + in_val = 0b101 + single_match = 0 + multiple_match = 1 + output = 0 + yield from set_encoder(dut, in_val) + yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) + + # Check another multiple match + # We expected the lowest bit to be returned which is address 1 + in_val = 0b110 + single_match = 0 + multiple_match = 1 + output = 1 + yield from set_encoder(dut, in_val) + yield from check_all(dut, single_match, multiple_match, output, 0, 0, 0) + +if __name__ == "__main__": + dut = AddressEncoder(4) + run_simulation(dut, testbench(dut), + vcd_name="Waveforms/test_address_encoder.vcd") + print("AddressEncoder Unit Test Success") diff --git a/src/TLB/test/test_cam.py b/src/TLB/test/test_cam.py new file mode 100644 index 00000000..0047f538 --- /dev/null +++ b/src/TLB/test/test_cam.py @@ -0,0 +1,207 @@ +import sys +sys.path.append("../src") +sys.path.append("../../TestUtil") + +from nmigen.compat.sim import run_simulation + +from Cam import Cam + +from test_helper import assert_eq, assert_ne, assert_op + +# This function allows for the easy setting of values to the Cam +# Arguments: +# dut: The Cam being tested +# e (Enable): Whether the block is going to be enabled +# we (Write Enable): Whether the Cam will write on the next cycle +# a (Address): Where the data will be written if write enable is high +# d (Data): Either what we are looking for or will write to the address +def set_cam(dut, e, we, a, d): + yield dut.enable.eq(e) + yield dut.write_enable.eq(we) + yield dut.address_in.eq(a) + yield dut.data_in.eq(d) + yield + +# Checks the multiple match of the Cam +# Arguments: +# dut: The Cam being tested +# mm (Multiple Match): The expected match result +# op (Operation): (0 => ==), (1 => !=) +def check_multiple_match(dut, mm, op): + out_mm = yield dut.multiple_match + assert_op("Multiple Match", out_mm, mm, op) + +# Checks the single match of the Cam +# Arguments: +# dut: The Cam being tested +# sm (Single Match): The expected match result +# op (Operation): (0 => ==), (1 => !=) +def check_single_match(dut, sm, op): + out_sm = yield dut.single_match + assert_op("Single Match", out_sm, sm, op) + +# Checks the address output of the Cam +# Arguments: +# dut: The Cam being tested +# ma (Match Address): The expected match result +# op (Operation): (0 => ==), (1 => !=) +def check_match_address(dut, ma, op): + out_ma = yield dut.match_address + assert_op("Match Address", out_ma, ma, op) + +# Checks the state of the Cam +# Arguments: +# dut: The Cam being tested +# sm (Single Match): The expected match result +# mm (Multiple Match): The expected match result +# ma: (Match Address): The expected address output +# ss_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) +# mm_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) +# ma_op (Operation): Operation for the address assertion (0 => ==), (1 => !=) +def check_all(dut, mm, sm, ma, mm_op, sm_op, ma_op): + yield from check_multiple_match(dut, mm, mm_op) + yield from check_single_match(dut, sm, sm_op) + yield from check_match_address(dut, ma, ma_op) + +def testbench(dut): + # NA + enable = 0 + write_enable = 0 + address = 0 + data = 0 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_single_match(dut, single_match, 0) + + # Read Miss Multiple + # Note that the default starting entry data bits are all 0 + enable = 1 + write_enable = 0 + address = 0 + data = 0 + multiple_match = 1 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_multiple_match(dut, multiple_match, 0) + + # Read Miss + # Note that the default starting entry data bits are all 0 + enable = 1 + write_enable = 0 + address = 0 + data = 1 + multiple_match = 0 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_single_match(dut, single_match, 0) + + # Write Entry 0 + enable = 1 + write_enable = 1 + address = 0 + data = 4 + multiple_match = 0 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_single_match(dut, single_match, 0) + + # Read Hit Entry 0 + enable = 1 + write_enable = 0 + address = 0 + data = 4 + multiple_match = 0 + single_match = 1 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0) + + # Search Hit + enable = 1 + write_enable = 0 + address = 0 + data = 4 + multiple_match = 0 + single_match = 1 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_all(dut, multiple_match, single_match, address, 0, 0, 0) + + # Search Miss + enable = 1 + write_enable = 0 + address = 0 + data = 5 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_single_match(dut, single_match, 0) + + # Multiple Match test + # Write Entry 1 + enable = 1 + write_enable = 1 + address = 1 + data = 5 + multiple_match = 0 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_single_match(dut, single_match, 0) + + # Write Entry 2 + # Same data as Entry 1 + enable = 1 + write_enable = 1 + address = 2 + data = 5 + multiple_match = 0 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_single_match(dut, single_match, 0) + + # Read Hit Data 5 + enable = 1 + write_enable = 0 + address = 1 + data = 5 + multiple_match = 1 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_all(dut, multiple_match, single_match, address,0,0,0) + + # Verify read_warning is not caused + # Write Entry 0 + enable = 1 + write_enable = 1 + address = 0 + data = 7 + multiple_match = 0 + single_match = 0 + yield from set_cam(dut, enable, write_enable, address, data) + # Note there is no yield we immediately attempt to read in the next cycle + + # Read Hit Data 7 + enable = 1 + write_enable = 0 + address = 0 + data = 7 + multiple_match = 0 + single_match = 1 + yield from set_cam(dut, enable, write_enable, address, data) + yield + yield from check_single_match(dut, single_match, 0) + + yield + + +if __name__ == "__main__": + dut = Cam(4, 4) + run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_cam.vcd") + print("Cam Unit Test Success") diff --git a/src/TLB/test/test_cam_entry.py b/src/TLB/test/test_cam_entry.py new file mode 100644 index 00000000..7fcd7942 --- /dev/null +++ b/src/TLB/test/test_cam_entry.py @@ -0,0 +1,108 @@ +import sys +sys.path.append("../src") +sys.path.append("../../TestUtil") + +from nmigen.compat.sim import run_simulation + +from test_helper import assert_eq, assert_ne, assert_op +from CamEntry import CamEntry + +# This function allows for the easy setting of values to the Cam Entry +# Arguments: +# dut: The CamEntry being tested +# c (command): NA (0), Read (1), Write (2), Reserve (3) +# d (data): The data to be set +def set_cam_entry(dut, c, d): + # Write desired values + yield dut.command.eq(c) + yield dut.data_in.eq(d) + yield + # Reset all lines + yield dut.command.eq(0) + yield dut.data_in.eq(0) + yield + +# Checks the data state of the CAM entry +# Arguments: +# dut: The CamEntry being tested +# d (Data): The expected data +# op (Operation): (0 => ==), (1 => !=) +def check_data(dut, d, op): + out_d = yield dut.data + assert_op("Data", out_d, d, op) + +# Checks the match state of the CAM entry +# Arguments: +# dut: The CamEntry being tested +# m (Match): The expected match +# op (Operation): (0 => ==), (1 => !=) +def check_match(dut, m, op): + out_m = yield dut.match + assert_op("Match", out_m, m, op) + +# Checks the state of the CAM entry +# Arguments: +# dut: The CamEntry being tested +# d (data): The expected data +# m (match): The expected match +# d_op (Operation): Operation for the data assertion (0 => ==), (1 => !=) +# m_op (Operation): Operation for the match assertion (0 => ==), (1 => !=) +def check_all(dut, d, m, d_op, m_op): + yield from check_data(dut, d, d_op) + yield from check_match(dut, m, m_op) + +# This testbench goes through the paces of testing the CamEntry module +# It is done by writing and then reading various combinations of key/data pairs +# and reading the results with varying keys to verify the resulting stored +# data is correct. +def testbench(dut): + # Check write + command = 2 + data = 1 + match = 0 + yield from set_cam_entry(dut, command, data) + yield from check_all(dut, data, match, 0, 0) + + # Check read miss + command = 1 + data = 2 + match = 0 + yield from set_cam_entry(dut, command, data) + yield from check_all(dut, data, match, 1, 0) + + # Check read hit + command = 1 + data = 1 + match = 1 + yield from set_cam_entry(dut, command, data) + yield from check_all(dut, data, match, 0, 0) + + # Check overwrite + command = 2 + data = 5 + match = 0 + yield from set_cam_entry(dut, command, data) + yield + yield from check_all(dut, data, match, 0, 0) + + # Check read hit + command = 1 + data = 5 + match = 1 + yield from set_cam_entry(dut, command, data) + yield from check_all(dut, data, match, 0, 0) + + # Check reset + command = 3 + data = 0 + match = 0 + yield from set_cam_entry(dut, command, data) + yield from check_all(dut, data, match, 0, 0) + + # Extra clock cycle for waveform + yield + +if __name__ == "__main__": + dut = CamEntry(4) + run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_cam_entry.vcd") + print("CamEntry Unit Test Success") diff --git a/src/TLB/test/test_lfsr.py b/src/TLB/test/test_lfsr.py new file mode 100644 index 00000000..0b476adc --- /dev/null +++ b/src/TLB/test/test_lfsr.py @@ -0,0 +1,30 @@ +import sys +sys.path.append("../src") +sys.path.append("../../TestUtil") + +from nmigen.compat.sim import run_simulation + +from LFSR import LFSR + +from test_helper import assert_eq, assert_ne, assert_op + +def testbench(dut): + yield dut.enable.eq(1) + yield dut.o.eq(9) + yield + yield + yield + yield + yield + yield + yield + yield + yield + yield + yield + yield + +if __name__ == "__main__": + dut = LFSR() + run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_lfsr.vcd") + print("LFSR Unit Test Success") \ No newline at end of file diff --git a/src/TLB/test/test_permission_validator.py b/src/TLB/test/test_permission_validator.py new file mode 100644 index 00000000..59750c51 --- /dev/null +++ b/src/TLB/test/test_permission_validator.py @@ -0,0 +1,145 @@ +import sys +sys.path.append("../src") +sys.path.append("../../TestUtil") + +from nmigen.compat.sim import run_simulation + +from PermissionValidator import PermissionValidator + +from test_helper import assert_op + +def set_validator(dut, d, xwr, sm, sa, asid): + yield dut.data.eq(d) + yield dut.xwr.eq(xwr) + yield dut.super_mode.eq(sm) + yield dut.super_access.eq(sa) + yield dut.asid.eq(asid) + yield + +def check_valid(dut, v, op): + out_v = yield dut.valid + assert_op("Valid", out_v, v, op) + +def testbench(dut): + # 80 bits represented. Ignore the MSB as it will be truncated + # ASID is bits first 4 hex values (bits 64 - 78) + + # Test user mode entry valid + # Global Bit matching ASID + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000031 + # Ignore MSB it will be truncated + asid = 0x7FFF + super_mode = 0 + super_access = 0 + xwr = 0 + valid = 1 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test user mode entry valid + # Global Bit nonmatching ASID + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000031 + # Ignore MSB it will be truncated + asid = 0x7FF6 + super_mode = 0 + super_access = 0 + xwr = 0 + valid = 1 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test user mode entry invalid + # Global Bit nonmatching ASID + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000021 + # Ignore MSB it will be truncated + asid = 0x7FF6 + super_mode = 0 + super_access = 0 + xwr = 0 + valid = 0 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test user mode entry valid + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000011 + # Ignore MSB it will be truncated + asid = 0x7FFF + super_mode = 0 + super_access = 0 + xwr = 0 + valid = 1 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test user mode entry invalid + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000011 + # Ignore MSB it will be truncated + asid = 0x7FF6 + super_mode = 0 + super_access = 0 + xwr = 0 + valid = 0 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test supervisor mode entry valid + # The entry is NOT in user mode + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000001 + # Ignore MSB it will be truncated + asid = 0x7FFF + super_mode = 1 + super_access = 0 + xwr = 0 + valid = 1 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test supervisor mode entry invalid + # The entry is in user mode + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000011 + # Ignore MSB it will be truncated + asid = 0x7FFF + super_mode = 1 + super_access = 0 + xwr = 0 + valid = 0 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test supervisor mode entry valid + # The entry is NOT in user mode with access + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000001 + # Ignore MSB it will be truncated + asid = 0x7FFF + super_mode = 1 + super_access = 1 + xwr = 0 + valid = 1 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + + # Test supervisor mode entry valid + # The entry is in user mode with access + # Ensure that user mode and valid is enabled! + data = 0x7FFF0000000000000011 + # Ignore MSB it will be truncated + asid = 0x7FFF + super_mode = 1 + super_access = 1 + xwr = 0 + valid = 1 + yield from set_validator(dut, data, xwr, super_mode, super_access, asid) + yield from check_valid(dut, valid, 0) + +if __name__ == "__main__": + dut = PermissionValidator(15, 64); + run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_permission_validator.vcd") + print("PermissionValidator Unit Test Success") diff --git a/src/TLB/test/test_pte_entry.py b/src/TLB/test/test_pte_entry.py new file mode 100644 index 00000000..5faa0bf1 --- /dev/null +++ b/src/TLB/test/test_pte_entry.py @@ -0,0 +1,103 @@ +import sys +sys.path.append("../src") +sys.path.append("../../TestUtil") + +from nmigen.compat.sim import run_simulation + +from PteEntry import PteEntry + +from test_helper import assert_op + +def set_entry(dut, i): + yield dut.i.eq(i) + yield + +def check_dirty(dut, d, op): + out_d = yield dut.d + assert_op("Dirty", out_d, d, op) + +def check_accessed(dut, a, op): + out_a = yield dut.a + assert_op("Accessed", out_a, a, op) + +def check_global(dut, o, op): + out = yield dut.g + assert_op("Global", out, o, op) + +def check_user(dut, o, op): + out = yield dut.u + assert_op("User Mode", out, o, op) + +def check_xwr(dut, o, op): + out = yield dut.xwr + assert_op("XWR", out, o, op) + +def check_asid(dut, o, op): + out = yield dut.asid + assert_op("ASID", out, o, op) + +def check_pte(dut, o, op): + out = yield dut.pte + assert_op("ASID", out, o, op) + +def check_valid(dut, v, op): + out_v = yield dut.v + assert_op("Valid", out_v, v, op) + +def check_all(dut, d, a, g, u, xwr, v, asid, pte): + yield from check_dirty(dut, d, 0) + yield from check_accessed(dut, a, 0) + yield from check_global(dut, g, 0) + yield from check_user(dut, u, 0) + yield from check_xwr(dut, xwr, 0) + yield from check_asid(dut, asid, 0) + yield from check_pte(dut, pte, 0) + yield from check_valid(dut, v, 0) + +def testbench(dut): + # 80 bits represented. Ignore the MSB as it will be truncated + # ASID is bits first 4 hex values (bits 64 - 78) + + i = 0x7FFF0000000000000031 + dirty = 0 + access = 0 + glob = 1 + user = 1 + xwr = 0 + valid = 1 + asid = 0x7FFF + pte = 0x0000000000000031 + yield from set_entry(dut, i) + yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte) + + i = 0x0FFF00000000000000FF + dirty = 1 + access = 1 + glob = 1 + user = 1 + xwr = 7 + valid = 1 + asid = 0x0FFF + pte = 0x00000000000000FF + yield from set_entry(dut, i) + yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte) + + i = 0x0721000000001100001F + dirty = 0 + access = 0 + glob = 0 + user = 1 + xwr = 7 + valid = 1 + asid = 0x0721 + pte = 0x000000001100001F + yield from set_entry(dut, i) + yield from check_all(dut, dirty, access, glob, user, xwr, valid, asid, pte) + + yield + + +if __name__ == "__main__": + dut = PteEntry(15, 64); + run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_pte_entry.vcd") + print("PteEntry Unit Test Success") \ No newline at end of file diff --git a/src/TLB/test/test_set_associative_cache.py b/src/TLB/test/test_set_associative_cache.py new file mode 100644 index 00000000..d681425f --- /dev/null +++ b/src/TLB/test/test_set_associative_cache.py @@ -0,0 +1,39 @@ +import sys +sys.path.append("../src") +sys.path.append("../../TestUtil") + +from nmigen.compat.sim import run_simulation + +from SetAssociativeCache import SetAssociativeCache + +from test_helper import assert_eq, assert_ne, assert_op + +def set_sac(dut, e, c, s, t, d): + yield dut.enable.eq(e) + yield dut.command.eq(c) + yield dut.cset.eq(s) + yield dut.tag.eq(t) + yield dut.data_i.eq(d) + yield + +def testbench(dut): + enable = 1 + command = 2 + cset = 1 + tag = 2 + data = 3 + yield from set_sac(dut, enable, command, cset, tag, data) + yield + + enable = 1 + command = 2 + cset = 1 + tag = 5 + data = 8 + yield from set_sac(dut, enable, command, cset, tag, data) + yield + +if __name__ == "__main__": + dut = SetAssociativeCache(4, 4, 4, 4) + run_simulation(dut, testbench(dut), vcd_name="Waveforms/test_set_associative_cache.vcd") + print("Set Associative Cache Unit Test Success") diff --git a/src/TestUtil/test_helper.py b/src/TestUtil/test_helper.py new file mode 100644 index 00000000..d22124b8 --- /dev/null +++ b/src/TestUtil/test_helper.py @@ -0,0 +1,27 @@ +# Verifies the given values given the particular operand +# Arguments: +# p (Prefix): Appended to the front of the assert statement +# e (Expected): The expected value +# o (Output): The output result +# op (Operation): (0 => ==), (1 => !=) +def assert_op(pre, o, e, op): + if op == 0: + assert_eq(pre, o, e) + else: + assert_ne(pre, o, e) + +# Verifies the given values are equal +# Arguments: +# p (Prefix): Appended to the front of the assert statement +# e (Expected): The expected value +# o (Output): The output result +def assert_eq(p, o, e): + assert o == e, p + " Output " + str(o) + " Expected " + str(e) + +# Verifies the given values are not equal +# Arguments: +# p (Prefix): Appended to the front of the assert statement +# e (Expected): The expected value +# o (Output): The output result +def assert_ne(p, o, e): + assert o != e, p + " Output " + str(o) + " Not Expecting " + str(e) \ No newline at end of file diff --git a/src/scoreboard/dependence_cell.py b/src/scoreboard/dependence_cell.py new file mode 100644 index 00000000..18e8d755 --- /dev/null +++ b/src/scoreboard/dependence_cell.py @@ -0,0 +1,109 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Elaboratable +from nmutil.latch import SRLatch + + +class DependenceCell(Elaboratable): + """ implements 11.4.7 mitch alsup dependence cell, p27 + """ + def __init__(self): + # inputs + self.dest_i = Signal(reset_less=True) # Dest in (top) + self.src1_i = Signal(reset_less=True) # oper1 in (top) + self.src2_i = Signal(reset_less=True) # oper2 in (top) + self.issue_i = Signal(reset_less=True) # Issue in (top) + + self.go_write_i = Signal(reset_less=True) # Go Write in (left) + self.go_read_i = Signal(reset_less=True) # Go Read in (left) + + # for Register File Select Lines (vertical) + self.dest_rsel_o = Signal(reset_less=True) # dest reg sel (bottom) + self.src1_rsel_o = Signal(reset_less=True) # src1 reg sel (bottom) + self.src2_rsel_o = Signal(reset_less=True) # src2 reg sel (bottom) + + # for Function Unit "forward progress" (horizontal) + self.dest_fwd_o = Signal(reset_less=True) # dest FU fw (right) + self.src1_fwd_o = Signal(reset_less=True) # src1 FU fw (right) + self.src2_fwd_o = Signal(reset_less=True) # src2 FU fw (right) + + def elaborate(self, platform): + m = Module() + m.submodules.dest_l = dest_l = SRLatch() + m.submodules.src1_l = src1_l = SRLatch() + m.submodules.src2_l = src2_l = SRLatch() + + # destination latch: reset on go_write HI, set on dest and issue + m.d.comb += dest_l.s.eq(self.issue_i & self.dest_i) + m.d.comb += dest_l.r.eq(self.go_write_i) + + # src1 latch: reset on go_read HI, set on src1_i and issue + m.d.comb += src1_l.s.eq(self.issue_i & self.src1_i) + m.d.comb += src1_l.r.eq(self.go_read_i) + + # src2 latch: reset on go_read HI, set on op2_i and issue + m.d.comb += src2_l.s.eq(self.issue_i & self.src2_i) + m.d.comb += src2_l.r.eq(self.go_read_i) + + # FU "Forward Progress" (read out horizontally) + m.d.comb += self.dest_fwd_o.eq(dest_l.qn & self.dest_i) + m.d.comb += self.src1_fwd_o.eq(src1_l.qn & self.src1_i) + m.d.comb += self.src2_fwd_o.eq(src2_l.qn & self.src2_i) + + # Register File Select (read out vertically) + m.d.comb += self.dest_rsel_o.eq(dest_l.qn & self.go_write_i) + m.d.comb += self.src1_rsel_o.eq(src1_l.qn & self.go_read_i) + m.d.comb += self.src2_rsel_o.eq(src2_l.qn & self.go_read_i) + + return m + + def __iter__(self): + yield self.dest_i + yield self.src1_i + yield self.src2_i + yield self.issue_i + yield self.go_write_i + yield self.go_read_i + yield self.dest_rsel_o + yield self.src1_rsel_o + yield self.src2_rsel_o + yield self.dest_fwd_o + yield self.src1_fwd_o + yield self.src2_fwd_o + + def ports(self): + return list(self) + + +def dcell_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_dcell(): + dut = DependenceCell() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_dcell.il", "w") as f: + f.write(vl) + + run_simulation(dut, dcell_sim(dut), vcd_name='test_dcell.vcd') + +if __name__ == '__main__': + test_dcell() diff --git a/src/scoreboard/fn_unit.py b/src/scoreboard/fn_unit.py new file mode 100644 index 00000000..b2ef9468 --- /dev/null +++ b/src/scoreboard/fn_unit.py @@ -0,0 +1,327 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Cat, Array, Const, Elaboratable +from nmutil.latch import SRLatch +from nmigen.lib.coding import Decoder + +from shadow_fn import ShadowFn + + +class FnUnit(Elaboratable): + """ implements 11.4.8 function unit, p31 + also implements optional shadowing 11.5.1, p55 + + shadowing can be used for branches as well as exceptions (interrupts), + load/store hold (exceptions again), and vector-element predication + (once the predicate is known, which it may not be at instruction issue) + + Inputs + + * :wid: register file width + * :shadow_wid: number of shadow/fail/good/go_die sets + * :n_dests: number of destination regfile(s) (index: rfile_sel_i) + * :wr_pend: if true, writable observes the g_wr_pend_i vector + otherwise observes g_rd_pend_i + + notes: + + * dest_i / src1_i / src2_i are in *binary*, whereas... + * ...g_rd_pend_i / g_wr_pend_i and rd_pend_o / wr_pend_o are UNARY + * req_rel_i (request release) is the direct equivalent of pipeline + "output valid" (valid_o) + * recover is a local python variable (actually go_die_o) + * when shadow_wid = 0, recover and shadown are Consts (i.e. do nothing) + * wr_pend is set False for the majority of uses: however for + use in a STORE Function Unit it is set to True + """ + def __init__(self, wid, shadow_wid=0, n_dests=1, wr_pend=False): + self.reg_width = wid + self.n_dests = n_dests + self.shadow_wid = shadow_wid + self.wr_pend = wr_pend + + # inputs + if n_dests > 1: + self.rfile_sel_i = Signal(max=n_dests, reset_less=True) + else: + self.rfile_sel_i = Const(0) # no selection. gets Array[0] + self.dest_i = Signal(max=wid, reset_less=True) # Dest R# in (top) + self.src1_i = Signal(max=wid, reset_less=True) # oper1 R# in (top) + self.src2_i = Signal(max=wid, reset_less=True) # oper2 R# in (top) + self.issue_i = Signal(reset_less=True) # Issue in (top) + + self.go_write_i = Signal(reset_less=True) # Go Write in (left) + self.go_read_i = Signal(reset_less=True) # Go Read in (left) + self.req_rel_i = Signal(reset_less=True) # request release (left) + + self.g_xx_pend_i = Array(Signal(wid, reset_less=True, name="g_pend_i") \ + for i in range(n_dests)) # global rd (right) + self.g_wr_pend_i = Signal(wid, reset_less=True) # global wr (right) + + if shadow_wid: + self.shadow_i = Signal(shadow_wid, reset_less=True) + self.s_fail_i = Signal(shadow_wid, reset_less=True) + self.s_good_i = Signal(shadow_wid, reset_less=True) + self.go_die_o = Signal(reset_less=True) + + # outputs + self.readable_o = Signal(reset_less=True) # Readable out (right) + self.writable_o = Array(Signal(reset_less=True, name="writable_o") \ + for i in range(n_dests)) # writable out (right) + self.busy_o = Signal(reset_less=True) # busy out (left) + + self.rd_pend_o = Signal(wid, reset_less=True) # rd pending (right) + self.xx_pend_o = Array(Signal(wid, reset_less=True, name="pend_o") \ + for i in range(n_dests))# wr pending (right) + + def elaborate(self, platform): + m = Module() + m.submodules.rd_l = rd_l = SRLatch(sync=False) + m.submodules.wr_l = wr_l = SRLatch(sync=False) + m.submodules.dest_d = dest_d = Decoder(self.reg_width) + m.submodules.src1_d = src1_d = Decoder(self.reg_width) + m.submodules.src2_d = src2_d = Decoder(self.reg_width) + s_latches = [] + for i in range(self.shadow_wid): + sh = ShadowFn() + setattr(m.submodules, "shadow%d" % i, sh) + s_latches.append(sh) + + # shadow / recover (optional: shadow_wid > 0) + if self.shadow_wid: + recover = self.go_die_o + shadown = Signal(reset_less=True) + i_l = [] + fail_l = [] + good_l = [] + shi_l = [] + sho_l = [] + rec_l = [] + # get list of latch signals. really must be a better way to do this + for l in s_latches: + i_l.append(l.issue_i) + shi_l.append(l.shadow_i) + fail_l.append(l.s_fail_i) + good_l.append(l.s_good_i) + sho_l.append(l.shadow_o) + rec_l.append(l.recover_o) + m.d.comb += Cat(*i_l).eq(self.issue_i) + m.d.comb += Cat(*fail_l).eq(self.s_fail_i) + m.d.comb += Cat(*good_l).eq(self.s_good_i) + m.d.comb += Cat(*shi_l).eq(self.shadow_i) + m.d.comb += shadown.eq(~(Cat(*sho_l).bool())) + m.d.comb += recover.eq(Cat(*rec_l).bool()) + else: + shadown = Const(1) + recover = Const(0) + + # selector + xx_pend_o = self.xx_pend_o[self.rfile_sel_i] + writable_o = self.writable_o[self.rfile_sel_i] + g_pend_i = self.g_xx_pend_i[self.rfile_sel_i] + + for i in range(self.n_dests): + m.d.comb += self.xx_pend_o[i].eq(0) # initialise all array + m.d.comb += self.writable_o[i].eq(0) # to zero + + # go_write latch: reset on go_write HI, set on issue + m.d.comb += wr_l.s.eq(self.issue_i) + m.d.comb += wr_l.r.eq(self.go_write_i | recover) + + # src1 latch: reset on go_read HI, set on issue + m.d.comb += rd_l.s.eq(self.issue_i) + m.d.comb += rd_l.r.eq(self.go_read_i | recover) + + # dest decoder: write-pending out + m.d.comb += dest_d.i.eq(self.dest_i) + m.d.comb += dest_d.n.eq(wr_l.qn) # decode is inverted + m.d.comb += self.busy_o.eq(wr_l.q) # busy if set + m.d.comb += xx_pend_o.eq(dest_d.o) + + # src1/src2 decoder: read-pending out + m.d.comb += src1_d.i.eq(self.src1_i) + m.d.comb += src1_d.n.eq(rd_l.qn) # decode is inverted + m.d.comb += src2_d.i.eq(self.src2_i) + m.d.comb += src2_d.n.eq(rd_l.qn) # decode is inverted + m.d.comb += self.rd_pend_o.eq(src1_d.o | src2_d.o) + + # readable output signal + g_rd = Signal(self.reg_width, reset_less=True) + m.d.comb += g_rd.eq(self.g_wr_pend_i & self.rd_pend_o) + m.d.comb += self.readable_o.eq(g_rd.bool()) + + # writable output signal + g_wr_v = Signal(self.reg_width, reset_less=True) + g_wr = Signal(reset_less=True) + wo = Signal(reset_less=True) + m.d.comb += g_wr_v.eq(g_pend_i & xx_pend_o) + m.d.comb += g_wr.eq(~g_wr_v.bool()) + m.d.comb += wo.eq(g_wr & rd_l.q & self.req_rel_i & shadown) + m.d.comb += writable_o.eq(wo) + + return m + + def __iter__(self): + yield self.dest_i + yield self.src1_i + yield self.src2_i + yield self.issue_i + yield self.go_write_i + yield self.go_read_i + yield self.req_rel_i + yield from self.g_xx_pend_i + yield self.g_wr_pend_i + yield self.readable_o + yield from self.writable_o + yield self.rd_pend_o + yield from self.xx_pend_o + + def ports(self): + return list(self) + +############# ############### +# --- --- # +# --- renamed / redirected from base class --- # +# --- --- # +# --- below are convenience classes which match the names --- # +# --- of the various mitch alsup book chapter gate diagrams --- # +# --- --- # +############# ############### + + +class IntFnUnit(FnUnit): + def __init__(self, wid, shadow_wid=0): + FnUnit.__init__(self, wid, shadow_wid) + self.int_rd_pend_o = self.rd_pend_o + self.int_wr_pend_o = self.xx_pend_o[0] + self.g_int_wr_pend_i = self.g_wr_pend_i + self.g_int_rd_pend_i = self.g_xx_pend_i[0] + self.int_readable_o = self.readable_o + self.int_writable_o = self.writable_o[0] + + self.int_rd_pend_o.name = "int_rd_pend_o" + self.int_wr_pend_o.name = "int_wr_pend_o" + self.g_int_rd_pend_i.name = "g_int_rd_pend_i" + self.g_int_wr_pend_i.name = "g_int_wr_pend_i" + self.int_readable_o.name = "int_readable_o" + self.int_writable_o.name = "int_writable_o" + + +class FPFnUnit(FnUnit): + def __init__(self, wid, shadow_wid=0): + FnUnit.__init__(self, wid, shadow_wid) + self.fp_rd_pend_o = self.rd_pend_o + self.fp_wr_pend_o = self.xx_pend_o[0] + self.g_fp_wr_pend_i = self.g_wr_pend_i + self.g_fp_rd_pend_i = self.g_xx_pend_i[0] + self.fp_writable_o = self.writable_o[0] + self.fp_readable_o = self.readable_o + + self.fp_rd_pend_o.name = "fp_rd_pend_o" + self.fp_wr_pend_o.name = "fp_wr_pend_o" + self.g_fp_rd_pend_i.name = "g_fp_rd_pend_i" + self.g_fp_wr_pend_i.name = "g_fp_wr_pend_i" + self.fp_writable_o.name = "fp_writable_o" + self.fp_readable_o.name = "fp_readable_o" + + +class LDFnUnit(FnUnit): + """ number of dest selectors: 2. assumes len(int_regfile) == len(fp_regfile) + * when rfile_sel_i == 0, int_wr_pend_o is set + * when rfile_sel_i == 1, fp_wr_pend_o is set + """ + def __init__(self, wid, shadow_wid=0): + FnUnit.__init__(self, wid, shadow_wid, n_dests=2) + self.int_rd_pend_o = self.rd_pend_o + self.int_wr_pend_o = self.xx_pend_o[0] + self.fp_wr_pend_o = self.xx_pend_o[1] + self.g_int_wr_pend_i = self.g_wr_pend_i + self.g_int_rd_pend_i = self.g_xx_pend_i[0] + self.g_fp_rd_pend_i = self.g_xx_pend_i[1] + self.int_readable_o = self.readable_o + self.int_writable_o = self.writable_o[0] + self.fp_writable_o = self.writable_o[1] + + self.int_rd_pend_o.name = "int_rd_pend_o" + self.int_wr_pend_o.name = "int_wr_pend_o" + self.fp_wr_pend_o.name = "fp_wr_pend_o" + self.g_int_wr_pend_i.name = "g_int_wr_pend_i" + self.g_int_rd_pend_i.name = "g_int_rd_pend_i" + self.g_fp_rd_pend_i.name = "g_fp_rd_pend_i" + self.int_readable_o.name = "int_readable_o" + self.int_writable_o.name = "int_writable_o" + self.fp_writable_o.name = "fp_writable_o" + + +class STFnUnit(FnUnit): + """ number of dest selectors: 2. assumes len(int_regfile) == len(fp_regfile) + * wr_pend=False indicates to observe global fp write pending + * when rfile_sel_i == 0, int_wr_pend_o is set + * when rfile_sel_i == 1, fp_wr_pend_o is set + * + """ + def __init__(self, wid, shadow_wid=0): + FnUnit.__init__(self, wid, shadow_wid, n_dests=2, wr_pend=True) + self.int_rd_pend_o = self.rd_pend_o # 1st int read-pending vector + self.int2_rd_pend_o = self.xx_pend_o[0] # 2nd int read-pending vector + self.fp_rd_pend_o = self.xx_pend_o[1] # 1x FP read-pending vector + # yes overwrite FnUnit base class g_wr_pend_i vector + self.g_int_wr_pend_i = self.g_wr_pend_i = self.g_xx_pend_i[0] + self.g_fp_wr_pend_i = self.g_xx_pend_i[1] + self.int_readable_o = self.readable_o + self.int_writable_o = self.writable_o[0] + self.fp_writable_o = self.writable_o[1] + + self.int_rd_pend_o.name = "int_rd_pend_o" + self.int2_rd_pend_o.name = "int2_rd_pend_o" + self.fp_rd_pend_o.name = "fp_rd_pend_o" + self.g_int_wr_pend_i.name = "g_int_wr_pend_i" + self.g_fp_wr_pend_i.name = "g_fp_wr_pend_i" + self.int_readable_o.name = "int_readable_o" + self.int_writable_o.name = "int_writable_o" + self.fp_writable_o.name = "fp_writable_o" + + + +def int_fn_unit_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_int_fn_unit(): + dut = FnUnit(32, 2, 2) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_fn_unit.il", "w") as f: + f.write(vl) + + dut = LDFnUnit(32, 2) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_ld_fn_unit.il", "w") as f: + f.write(vl) + + dut = STFnUnit(32, 0) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_st_fn_unit.il", "w") as f: + f.write(vl) + + run_simulation(dut, int_fn_unit_sim(dut), vcd_name='test_fn_unit.vcd') + +if __name__ == '__main__': + test_int_fn_unit() diff --git a/src/scoreboard/fu_dep_cell.py b/src/scoreboard/fu_dep_cell.py new file mode 100644 index 00000000..93ef28d3 --- /dev/null +++ b/src/scoreboard/fu_dep_cell.py @@ -0,0 +1,84 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Elaboratable +from nmutil.latch import SRLatch + + +class FUDependenceCell(Elaboratable): + """ implements 11.4.7 mitch alsup dependence cell, p27 + """ + def __init__(self): + # inputs + self.rd_pend_i = Signal(reset_less=True) # read pending in (left) + self.wr_pend_i = Signal(reset_less=True) # write pending in (left) + self.issue_i = Signal(reset_less=True) # Issue in (top) + + self.go_write_i = Signal(reset_less=True) # Go Write in (left) + self.go_read_i = Signal(reset_less=True) # Go Read in (left) + + # outputs (latched rd/wr pend) + self.rd_pend_o = Signal(reset_less=True) # read pending out (right) + self.wr_pend_o = Signal(reset_less=True) # write pending out (right) + + def elaborate(self, platform): + m = Module() + m.submodules.rd_l = rd_l = SRLatch() + m.submodules.wr_l = wr_l = SRLatch() + + # write latch: reset on go_write HI, set on write pending and issue + m.d.comb += wr_l.s.eq(self.issue_i & self.wr_pend_i) + m.d.comb += wr_l.r.eq(self.go_write_i) + + # read latch: reset on go_read HI, set on read pending and issue + m.d.comb += rd_l.s.eq(self.issue_i & self.rd_pend_i) + m.d.comb += rd_l.r.eq(self.go_read_i) + + # Read/Write Pending Latches (read out horizontally) + m.d.comb += self.wr_pend_o.eq(wr_l.qn) + m.d.comb += self.rd_pend_o.eq(rd_l.qn) + + return m + + def __iter__(self): + yield self.rd_pend_i + yield self.wr_pend_i + yield self.issue_i + yield self.go_write_i + yield self.go_read_i + yield self.rd_pend_o + yield self.wr_pend_o + + def ports(self): + return list(self) + + +def dcell_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_dcell(): + dut = FUDependenceCell() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_fu_dcell.il", "w") as f: + f.write(vl) + + run_simulation(dut, dcell_sim(dut), vcd_name='test_fu_dcell.vcd') + +if __name__ == '__main__': + test_dcell() diff --git a/src/scoreboard/fu_fu_matrix.py b/src/scoreboard/fu_fu_matrix.py new file mode 100644 index 00000000..6ffd4442 --- /dev/null +++ b/src/scoreboard/fu_fu_matrix.py @@ -0,0 +1,157 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Elaboratable, Array, Cat + +#from nmutil.latch import SRLatch +from fu_dep_cell import FUDependenceCell +from fu_picker_vec import FU_Pick_Vec + +""" + + 6600 Function Unit Dependency Table Matrix inputs / outputs + ----------------------------------------------------------- + +""" + +class FUFUDepMatrix(Elaboratable): + """ implements 11.4.7 mitch alsup FU-to-Reg Dependency Matrix, p26 + """ + def __init__(self, n_fu_row, n_fu_col): + self.n_fu_row = n_fu_row # Y (FU row#) ^v + self.n_fu_col = n_fu_col # X (FU col #) <> + self.rd_pend_i = Signal(n_fu_row, reset_less=True) # Rd pending (left) + self.wr_pend_i = Signal(n_fu_row, reset_less=True) # Wr pending (left) + self.issue_i = Signal(n_fu_col, reset_less=True) # Issue in (top) + + self.go_write_i = Signal(n_fu_row, reset_less=True) # Go Write in (left) + self.go_read_i = Signal(n_fu_row, reset_less=True) # Go Read in (left) + + # for Function Unit Readable/Writable (horizontal) + self.readable_o = Signal(n_fu_col, reset_less=True) # readable (bot) + self.writable_o = Signal(n_fu_col, reset_less=True) # writable (bot) + + def elaborate(self, platform): + m = Module() + + # --- + # matrix of dependency cells + # --- + dm = Array(Array(FUDependenceCell() for r in range(self.n_fu_row)) \ + for f in range(self.n_fu_col)) + for x in range(self.n_fu_col): + for y in range(self.n_fu_row): + setattr(m.submodules, "dm_fx%d_fy%d" % (x, y), dm[x][y]) + + # --- + # array of Function Unit Readable/Writable: row-length, horizontal + # --- + fur = Array(FU_Pick_Vec(self.n_fu_row) for r in range(self.n_fu_col)) + for x in range(self.n_fu_col): + setattr(m.submodules, "fur_x%d" % (x), fur[x]) + + # --- + # connect FU Readable/Writable vector + # --- + readable = [] + writable = [] + for x in range(self.n_fu_col): + fu = fur[x] + rd_pend_o = [] + wr_pend_o = [] + for y in range(self.n_fu_row): + dc = dm[x][y] + # accumulate cell outputs rd/wr-pending + rd_pend_o.append(dc.rd_pend_o) + wr_pend_o.append(dc.wr_pend_o) + # connect cell reg-select outputs to Reg Vector In + m.d.comb += [fu.rd_pend_i.eq(Cat(*rd_pend_o)), + fu.wr_pend_i.eq(Cat(*wr_pend_o)), + ] + # accumulate Readable/Writable Vector outputs + readable.append(fu.readable_o) + writable.append(fu.writable_o) + + # ... and output them from this module (horizontal, width=REGs) + m.d.comb += self.readable_o.eq(Cat(*readable)) + m.d.comb += self.writable_o.eq(Cat(*writable)) + + # --- + # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i + # --- + for y in range(self.n_fu_row): + issue_i = [] + for x in range(self.n_fu_col): + dc = dm[x][y] + # accumulate cell inputs issue + issue_i.append(dc.issue_i) + # wire up inputs from module to row cell inputs (Cat is gooood) + m.d.comb += Cat(*issue_i).eq(self.issue_i) + + # --- + # connect Matrix go_read_i/go_write_i to module readable/writable + # --- + for x in range(self.n_fu_col): + go_read_i = [] + go_write_i = [] + rd_pend_i = [] + wr_pend_i = [] + for y in range(self.n_fu_row): + dc = dm[x][y] + # accumulate cell rd_pend/wr_pend/go_read/go_write + rd_pend_i.append(dc.rd_pend_i) + wr_pend_i.append(dc.wr_pend_i) + go_read_i.append(dc.go_read_i) + go_write_i.append(dc.go_write_i) + # wire up inputs from module to row cell inputs (Cat is gooood) + m.d.comb += [Cat(*go_read_i).eq(self.go_read_i), + Cat(*go_write_i).eq(self.go_write_i), + Cat(*rd_pend_i).eq(self.rd_pend_i), + Cat(*wr_pend_i).eq(self.wr_pend_i), + ] + + return m + + def __iter__(self): + yield self.rd_pend_i + yield self.wr_pend_i + yield self.issue_i + yield self.go_write_i + yield self.go_read_i + yield self.readable_o + yield self.writable_o + + def ports(self): + return list(self) + +def d_matrix_sim(dut): + """ XXX TODO + """ + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_fu_fu_matrix(): + dut = FUFUDepMatrix(n_fu_row=3, n_fu_col=4) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_fu_fu_matrix.il", "w") as f: + f.write(vl) + + run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_fu_matrix.vcd') + +if __name__ == '__main__': + test_fu_fu_matrix() diff --git a/src/scoreboard/fu_picker_vec.py b/src/scoreboard/fu_picker_vec.py new file mode 100644 index 00000000..fd44c45f --- /dev/null +++ b/src/scoreboard/fu_picker_vec.py @@ -0,0 +1,21 @@ +from nmigen import Elaboratable, Module, Signal, Cat + + +class FU_Pick_Vec(Elaboratable): + """ these are allocated per-FU (horizontally), + and are of length fu_row_n + """ + def __init__(self, fu_row_n): + self.fu_row_n = fu_row_n + self.rd_pend_i = Signal(fu_row_n, reset_less=True) + self.wr_pend_i = Signal(fu_row_n, reset_less=True) + + self.readable_o = Signal(reset_less=True) + self.writable_o = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + m.d.comb += self.readable_o.eq(self.rd_pend_i.bool()) + m.d.comb += self.writable_o.eq(self.wr_pend_i.bool()) + return m + diff --git a/src/scoreboard/fu_reg_matrix.py b/src/scoreboard/fu_reg_matrix.py new file mode 100644 index 00000000..0826ea56 --- /dev/null +++ b/src/scoreboard/fu_reg_matrix.py @@ -0,0 +1,225 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Elaboratable, Array, Cat + +#from nmutil.latch import SRLatch +from dependence_cell import DependenceCell +from fu_wr_pending import FU_RW_Pend +from reg_select import Reg_Rsv + +""" + + 6600 Dependency Table Matrix inputs / outputs + --------------------------------------------- + + d s1 s2 i d s1 s2 i d s1 s2 i d s1 s2 i + | | | | | | | | | | | | | | | | + v v v v v v v v v v v v v v v v + go_rd/go_wr -> dm-r0-fu0 dm-r1-fu0 dm-r2-fu0 dm-r3-fu0 -> wr/rd-pend + go_rd/go_wr -> dm-r0-fu1 dm-r1-fu1 dm-r2-fu1 dm-r3-fu1 -> wr/rd-pend + go_rd/go_wr -> dm-r0-fu2 dm-r1-fu2 dm-r2-fu2 dm-r3-fu2 -> wr/rd-pend + | | | | | | | | | | | | + v v v v v v v v v v v v + d s1 s2 d s1 s2 d s1 s2 d s1 s2 + reg sel reg sel reg sel reg sel + +""" + +class FURegDepMatrix(Elaboratable): + """ implements 11.4.7 mitch alsup FU-to-Reg Dependency Matrix, p26 + """ + def __init__(self, n_fu_row, n_reg_col): + self.n_fu_row = n_fu_row # Y (FUs) ^v + self.n_reg_col = n_reg_col # X (Regs) <> + self.dest_i = Signal(n_reg_col, reset_less=True) # Dest in (top) + self.src1_i = Signal(n_reg_col, reset_less=True) # oper1 in (top) + self.src2_i = Signal(n_reg_col, reset_less=True) # oper2 in (top) + self.issue_i = Signal(n_reg_col, reset_less=True) # Issue in (top) + + self.go_write_i = Signal(n_fu_row, reset_less=True) # Go Write in (left) + self.go_read_i = Signal(n_fu_row, reset_less=True) # Go Read in (left) + + # for Register File Select Lines (horizontal), per-reg + self.dest_rsel_o = Signal(n_reg_col, reset_less=True) # dest reg (bot) + self.src1_rsel_o = Signal(n_reg_col, reset_less=True) # src1 reg (bot) + self.src2_rsel_o = Signal(n_reg_col, reset_less=True) # src2 reg (bot) + + # for Function Unit "forward progress" (vertical), per-FU + self.wr_pend_o = Signal(n_fu_row, reset_less=True) # wr pending (right) + self.rd_pend_o = Signal(n_fu_row, reset_less=True) # rd pending (right) + + def elaborate(self, platform): + m = Module() + + # --- + # matrix of dependency cells + # --- + dm = Array(Array(DependenceCell() for r in range(self.n_fu_row)) \ + for f in range(self.n_reg_col)) + for rn in range(self.n_reg_col): + for fu in range(self.n_fu_row): + setattr(m.submodules, "dm_r%d_fu%d" % (rn, fu), dm[rn][fu]) + + # --- + # array of Function Unit Pending vectors + # --- + fupend = Array(FU_RW_Pend(self.n_reg_col) for f in range(self.n_fu_row)) + for fu in range(self.n_fu_row): + setattr(m.submodules, "fu_fu%d" % (fu), fupend[fu]) + + # --- + # array of Register Reservation vectors + # --- + regrsv = Array(Reg_Rsv(self.n_fu_row) for r in range(self.n_reg_col)) + for rn in range(self.n_reg_col): + setattr(m.submodules, "rr_r%d" % (rn), regrsv[rn]) + + # --- + # connect Function Unit vector + # --- + wr_pend = [] + rd_pend = [] + for fu in range(self.n_fu_row): + fup = fupend[fu] + dest_fwd_o = [] + src1_fwd_o = [] + src2_fwd_o = [] + for rn in range(self.n_reg_col): + dc = dm[rn][fu] + # accumulate cell fwd outputs for dest/src1/src2 + dest_fwd_o.append(dc.dest_fwd_o) + src1_fwd_o.append(dc.src1_fwd_o) + src2_fwd_o.append(dc.src2_fwd_o) + # connect cell fwd outputs to FU Vector in [Cat is gooood] + m.d.comb += [fup.dest_fwd_i.eq(Cat(*dest_fwd_o)), + fup.src1_fwd_i.eq(Cat(*src1_fwd_o)), + fup.src2_fwd_i.eq(Cat(*src2_fwd_o)) + ] + # accumulate FU Vector outputs + wr_pend.append(fup.reg_wr_pend_o) + rd_pend.append(fup.reg_rd_pend_o) + + # ... and output them from this module (vertical, width=FUs) + m.d.comb += self.wr_pend_o.eq(Cat(*wr_pend)) + m.d.comb += self.rd_pend_o.eq(Cat(*rd_pend)) + + # --- + # connect Reg Selection vector + # --- + dest_rsel = [] + src1_rsel = [] + src2_rsel = [] + for rn in range(self.n_reg_col): + rsv = regrsv[rn] + dest_rsel_o = [] + src1_rsel_o = [] + src2_rsel_o = [] + for fu in range(self.n_fu_row): + dc = dm[rn][fu] + # accumulate cell reg-select outputs dest/src1/src2 + dest_rsel_o.append(dc.dest_rsel_o) + src1_rsel_o.append(dc.src1_rsel_o) + src2_rsel_o.append(dc.src2_rsel_o) + # connect cell reg-select outputs to Reg Vector In + m.d.comb += [rsv.dest_rsel_i.eq(Cat(*dest_rsel_o)), + rsv.src1_rsel_i.eq(Cat(*src1_rsel_o)), + rsv.src2_rsel_i.eq(Cat(*src2_rsel_o)), + ] + # accumulate Reg-Sel Vector outputs + dest_rsel.append(rsv.dest_rsel_o) + src1_rsel.append(rsv.src1_rsel_o) + src2_rsel.append(rsv.src2_rsel_o) + + # ... and output them from this module (horizontal, width=REGs) + m.d.comb += self.dest_rsel_o.eq(Cat(*dest_rsel)) + m.d.comb += self.src1_rsel_o.eq(Cat(*src1_rsel)) + m.d.comb += self.src2_rsel_o.eq(Cat(*src2_rsel)) + + # --- + # connect Dependency Matrix dest/src1/src2/issue to module d/s/s/i + # --- + for rn in range(self.n_reg_col): + dest_i = [] + src1_i = [] + src2_i = [] + issue_i = [] + for fu in range(self.n_fu_row): + dc = dm[rn][fu] + # accumulate cell inputs dest/src1/src2 + dest_i.append(dc.dest_i) + src1_i.append(dc.src1_i) + src2_i.append(dc.src2_i) + issue_i.append(dc.issue_i) + # wire up inputs from module to row cell inputs (Cat is gooood) + m.d.comb += [Cat(*dest_i).eq(self.dest_i), + Cat(*src1_i).eq(self.src1_i), + Cat(*src2_i).eq(self.src2_i), + Cat(*issue_i).eq(self.issue_i), + ] + + # --- + # connect Dependency Matrix go_read_i/go_write_i to module go_rd/go_wr + # --- + for fu in range(self.n_fu_row): + go_read_i = [] + go_write_i = [] + for rn in range(self.n_reg_col): + dc = dm[rn][fu] + # accumulate cell fwd outputs for dest/src1/src2 + go_read_i.append(dc.go_read_i) + go_write_i.append(dc.go_write_i) + # wire up inputs from module to row cell inputs (Cat is gooood) + m.d.comb += [Cat(*go_read_i).eq(self.go_read_i), + Cat(*go_write_i).eq(self.go_write_i), + ] + + return m + + def __iter__(self): + yield self.dest_i + yield self.src1_i + yield self.src2_i + yield self.issue_i + yield self.go_write_i + yield self.go_read_i + yield self.dest_rsel_o + yield self.src1_rsel_o + yield self.src2_rsel_o + yield self.wr_pend_o + yield self.rd_pend_o + + def ports(self): + return list(self) + +def d_matrix_sim(dut): + """ XXX TODO + """ + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_d_matrix(): + dut = FURegDepMatrix(n_fu_row=3, n_reg_col=4) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_fu_reg_matrix.il", "w") as f: + f.write(vl) + + run_simulation(dut, d_matrix_sim(dut), vcd_name='test_fu_reg_matrix.vcd') + +if __name__ == '__main__': + test_d_matrix() diff --git a/src/scoreboard/fu_wr_pending.py b/src/scoreboard/fu_wr_pending.py new file mode 100644 index 00000000..9b177ff0 --- /dev/null +++ b/src/scoreboard/fu_wr_pending.py @@ -0,0 +1,23 @@ +from nmigen import Elaboratable, Module, Signal, Cat + + +class FU_RW_Pend(Elaboratable): + """ these are allocated per-FU (horizontally), + and are of length reg_count + """ + def __init__(self, reg_count): + self.reg_count = reg_count + self.dest_fwd_i = Signal(reg_count, reset_less=True) + self.src1_fwd_i = Signal(reg_count, reset_less=True) + self.src2_fwd_i = Signal(reg_count, reset_less=True) + + self.reg_wr_pend_o = Signal(reset_less=True) + self.reg_rd_pend_o = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + srces = Cat(self.src1_fwd_i, self.src2_fwd_i) + m.d.comb += self.reg_wr_pend_o.eq(self.dest_fwd_i.bool()) + m.d.comb += self.reg_rd_pend_o.eq(srces.bool()) + return m + diff --git a/src/scoreboard/global_pending.py b/src/scoreboard/global_pending.py new file mode 100644 index 00000000..50e43378 --- /dev/null +++ b/src/scoreboard/global_pending.py @@ -0,0 +1,93 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Cat, Elaboratable +from nmutil.latch import SRLatch +from nmigen.lib.coding import Decoder + + +class GlobalPending(Elaboratable): + """ implements Global Pending Vector, basically ORs all incoming Function + Unit vectors together. Can be used for creating Read or Write Global + Pending. Can be used for INT or FP Global Pending. + + Inputs: + * :wid: register file width + * :fu_vecs: a python list of function unit "pending" vectors, each + vector being a Signal of width equal to the reg file. + + Notes: + + * the regfile may be Int or FP, this code doesn't care which. + obviously do not try to put in a mixture of regfiles into fu_vecs. + * this code also doesn't care if it's used for Read Pending or Write + pending, it can be used for both: again, obviously, do not try to + put in a mixture of read *and* write pending vectors in. + * if some Function Units happen not to be uniform (don't operate + on a particular register (extremely unusual), they must set a Const + zero bit in the vector. + """ + def __init__(self, wid, fu_vecs): + self.reg_width = wid + # inputs + self.fu_vecs = fu_vecs + for v in fu_vecs: + assert len(v) == wid, "FU Vector must be same width as regfile" + + self.g_pend_o = Signal(wid, reset_less=True) # global pending vector + + def elaborate(self, platform): + m = Module() + + pend_l = [] + for i in range(self.reg_width): # per-register + vec_bit_l = [] + for v in self.fu_vecs: + vec_bit_l.append(v[i]) # fu bit for same register + pend_l.append(Cat(*vec_bit_l).bool()) # OR all bits for same reg + m.d.comb += self.g_pend_o.eq(Cat(*pend_l)) # merge all OR'd bits + + return m + + def __iter__(self): + yield from self.fu_vecs + yield self.g_pend_o + + def ports(self): + return list(self) + + +def g_vec_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_g_vec(): + vecs = [] + for i in range(3): + vecs.append(Signal(32, name="fu%d" % i)) + dut = GlobalPending(32, vecs) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_global_pending.il", "w") as f: + f.write(vl) + + run_simulation(dut, g_vec_sim(dut), vcd_name='test_global_pending.vcd') + +if __name__ == '__main__': + test_g_vec() diff --git a/src/scoreboard/group_picker.py b/src/scoreboard/group_picker.py new file mode 100644 index 00000000..8f959a18 --- /dev/null +++ b/src/scoreboard/group_picker.py @@ -0,0 +1,111 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Cat, Elaboratable + + +class PriorityPicker(Elaboratable): + """ implements a priority-picker. input: N bits, output: N bits + """ + def __init__(self, wid): + self.wid = wid + # inputs + self.i = Signal(wid, reset_less=True) + self.o = Signal(wid, reset_less=True) + + def elaborate(self, platform): + m = Module() + + res = [] + for i in range(0, self.wid): + tmp = Signal(reset_less = True) + if i == 0: + m.d.comb += tmp.eq(self.i[0]) + else: + m.d.comb += tmp.eq((~tmp) & self.i[i]) + res.append(tmp) + + # we like Cat(*xxx). turn lists into concatenated bits + m.d.comb += self.o.eq(Cat(*res)) + + return m + + def __iter__(self): + yield self.i + yield self.o + + def ports(self): + return list(self) + + +class GroupPicker(Elaboratable): + """ implements 10.5 mitch alsup group picker, p27 + """ + def __init__(self, wid): + self.gp_wid = wid + # inputs + self.readable_i = Signal(wid, reset_less=True) # readable in (top) + self.writable_i = Signal(wid, reset_less=True) # writable in (top) + self.rel_req_i = Signal(wid, reset_less=True) # release request in (top) + + # outputs + self.go_rd_o = Signal(wid, reset_less=True) # go read (bottom) + self.go_wr_o = Signal(wid, reset_less=True) # go write (bottom) + + def elaborate(self, platform): + m = Module() + + m.submodules.rpick = rpick = PriorityPicker(self.gp_wid) + m.submodules.wpick = wpick = PriorityPicker(self.gp_wid) + + # combine release (output ready signal) with writeable + m.d.comb += wpick.i.eq(self.writable_i & self.rel_req_i) + m.d.comb += self.go_wr_o.eq(wpick.o) + + m.d.comb += rpick.i.eq(self.readable_i) + m.d.comb += self.go_rd_o.eq(rpick.o) + + return m + + def __iter__(self): + yield self.readable_i + yield self.writable_i + yield self.rel_req_i + yield self.go_rd_o + yield self.go_wr_o + + def ports(self): + return list(self) + + +def grp_pick_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_grp_pick(): + dut = GroupPicker(4) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_grp_pick.il", "w") as f: + f.write(vl) + + run_simulation(dut, grp_pick_sim(dut), vcd_name='test_grp_pick.vcd') + +if __name__ == '__main__': + test_grp_pick() diff --git a/src/scoreboard/issue_unit.py b/src/scoreboard/issue_unit.py new file mode 100644 index 00000000..d1f58d11 --- /dev/null +++ b/src/scoreboard/issue_unit.py @@ -0,0 +1,143 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Cat, Array, Const, Record, Elaboratable +from nmutil.latch import SRLatch +from nmigen.lib.coding import Decoder + +from shadow_fn import ShadowFn + + +class IssueUnit(Elaboratable): + """ implements 11.4.14 issue unit, p50 + + Inputs + + * :wid: register file width + * :n_insns: number of instructions in this issue unit. + """ + def __init__(self, wid, n_insns): + self.reg_width = wid + self.n_insns = n_insns + + # inputs + self.store_i = Signal(reset_less=True) # instruction is a store + self.dest_i = Signal(max=wid, reset_less=True) # Dest R# in + self.src1_i = Signal(max=wid, reset_less=True) # oper1 R# in + self.src2_i = Signal(max=wid, reset_less=True) # oper2 R# in + + self.g_wr_pend_i = Signal(wid, reset_less=True) # write pending vector + + self.insn_i = Array(Signal(reset_less=True, name="insn_i") \ + for i in range(n_insns)) + self.busy_i = Array(Signal(reset_less=True, name="busy_i") \ + for i in range(n_insns)) + + # outputs + self.fn_issue_o = Array(Signal(reset_less=True, name="fn_issue_o") \ + for i in range(n_insns)) + self.g_issue_o = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + m.submodules.dest_d = dest_d = Decoder(self.reg_width) + + # temporaries + waw_stall = Signal(reset_less=True) + fu_stall = Signal(reset_less=True) + pend = Signal(self.reg_width, reset_less=True) + + # dest decoder: write-pending + m.d.comb += dest_d.i.eq(self.dest_i) + m.d.comb += dest_d.n.eq(~self.store_i) # decode is inverted + m.d.comb += pend.eq(dest_d.o & self.g_wr_pend_i) + m.d.comb += waw_stall.eq(pend.bool()) + + ib_l = [] + for i in range(self.n_insns): + ib_l.append(self.insn_i[i] & self.busy_i[i]) + m.d.comb += fu_stall.eq(Cat(*ib_l).bool()) + m.d.comb += self.g_issue_o.eq(~(waw_stall | fu_stall)) + for i in range(self.n_insns): + m.d.comb += self.fn_issue_o[i].eq(self.g_issue_o & self.insn_i[i]) + + return m + + def __iter__(self): + yield self.store_i + yield self.dest_i + yield self.src1_i + yield self.src2_i + yield self.g_wr_pend_i + yield from self.insn_i + yield from self.busy_i + yield from self.fn_issue_o + yield self.g_issue_o + + def ports(self): + return list(self) + + +class IntFPIssueUnit(Elaboratable): + def __init__(self, wid, n_int_insns, n_fp_insns): + self.i = IssueUnit(wid, n_int_insns) + self.f = IssueUnit(wid, n_fp_insns) + self.issue_o = Signal(reset_less=True) + + # some renames + self.int_write_pending_i = self.i.g_wr_pend_i + self.fp_write_pending_i = self.f.g_wr_pend_i + self.int_write_pending_i.name = 'int_write_pending_i' + self.fp_write_pending_i.name = 'fp_write_pending_i' + + def elaborate(self, platform): + m = Module() + m.submodules.intissue = self.i + m.submodules.fpissue = self.f + + m.d.comb += self.issue_o.eq(self.i.g_issue_o | self.f.g_issue_o) + + return m + + def ports(self): + yield self.issue_o + yield from self.i + yield from self.f + + +def issue_unit_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_issue_unit(): + dut = IssueUnit(32, 3) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_issue_unit.il", "w") as f: + f.write(vl) + + dut = IntFPIssueUnit(32, 3, 3) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_intfp_issue_unit.il", "w") as f: + f.write(vl) + + run_simulation(dut, issue_unit_sim(dut), vcd_name='test_issue_unit.vcd') + +if __name__ == '__main__': + test_issue_unit() diff --git a/src/scoreboard/ldst_dep_cell.py b/src/scoreboard/ldst_dep_cell.py new file mode 100644 index 00000000..40e1ffbc --- /dev/null +++ b/src/scoreboard/ldst_dep_cell.py @@ -0,0 +1,95 @@ +""" Mitch Alsup 6600-style LD/ST scoreboard Dependency Cell + +Relevant bugreports: +* http://bugs.libre-riscv.org/show_bug.cgi?id=81 + +""" + +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Elaboratable +from nmutil.latch import SRLatch + + +class LDSTDepCell(Elaboratable): + """ implements 11.4.12 mitch alsup load/store dependence cell, p45 + """ + def __init__(self): + # inputs + self.load_i = Signal(reset_less=True) # load pending in (top) + self.stor_i = Signal(reset_less=True) # store pending in (top) + self.issue_i = Signal(reset_less=True) # Issue in (top) + + self.load_hit_i = Signal(reset_less=True) # load hit in (right) + self.stwd_hit_i = Signal(reset_less=True) # store w/ data hit in (right) + + # outputs (latched rd/wr pend) + self.ld_hold_st_o = Signal(reset_less=True) # load holds st out (left) + self.st_hold_ld_o = Signal(reset_less=True) # st holds load out (left) + + def elaborate(self, platform): + m = Module() + m.submodules.war_l = war_l = SRLatch(sync=False) # WriteAfterRead Latch + m.submodules.raw_l = raw_l = SRLatch(sync=False) # ReadAfterWrite Latch + + # issue & store & load - used for both WAR and RAW Setting + i_s_l = Signal(reset_less=True) + m.d.comb += i_s_l.eq(self.issue_i & self.stor_i & self.load_i) + + # write after read latch: loads block stores + m.d.comb += war_l.s.eq(i_s_l) + m.d.comb += war_l.r.eq(self.load_i) # reset on LD + + # read after write latch: stores block loads + m.d.comb += raw_l.s.eq(i_s_l) + m.d.comb += raw_l.r.eq(self.stor_i) # reset on ST + + # Hold results (read out horizontally, accumulate in OR fashion) + m.d.comb += self.ld_hold_st_o.eq(war_l.qn & self.load_hit_i) + m.d.comb += self.st_hold_ld_o.eq(raw_l.qn & self.stwd_hit_i) + + return m + + def __iter__(self): + yield self.load_i + yield self.stor_i + yield self.issue_i + yield self.load_hit_i + yield self.stwd_hit_i + yield self.ld_hold_st_o + yield self.st_hold_ld_o + + def ports(self): + return list(self) + + +def dcell_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_dcell(): + dut = LDSTDepCell() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_ldst_dcell.il", "w") as f: + f.write(vl) + + run_simulation(dut, dcell_sim(dut), vcd_name='test_ldst_dcell.vcd') + +if __name__ == '__main__': + test_dcell() diff --git a/src/scoreboard/ldst_matrix.py b/src/scoreboard/ldst_matrix.py new file mode 100644 index 00000000..b872155d --- /dev/null +++ b/src/scoreboard/ldst_matrix.py @@ -0,0 +1,135 @@ +""" Mitch Alsup 6600-style LD/ST Memory Scoreboard Matrix (sparse vector) + +6600 LD/ST Dependency Table Matrix inputs / outputs +--------------------------------------------------- + +Relevant comments (p45-46): + +* If there are no WAR dependencies on a Load instruction with a computed + address it can assert Bank_Addressable and Translate_Addressable. + +* If there are no RAW dependencies on a Store instruction with both a + write permission and store data present it can assert Bank_Addressable + +Relevant bugreports: +* http://bugs.libre-riscv.org/show_bug.cgi?id=81 + +""" + +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Elaboratable, Array, Cat, Const + +from ldst_dep_cell import LDSTDepCell + + +class LDSTDepMatrix(Elaboratable): + """ implements 11.4.12 mitch alsup LD/ST Dependency Matrix, p46 + actually a sparse matrix along the diagonal. + + load-hold-store and store-hold-load accumulate in a priority-picking + fashion, ORing together. the OR gate from the dependency cell is + here. + """ + def __init__(self, n_ldst): + self.n_ldst = n_ldst # X and Y (FUs) + self.load_i = Signal(n_ldst, reset_less=True) # load pending in + self.stor_i = Signal(n_ldst, reset_less=True) # store pending in + self.issue_i = Signal(n_ldst, reset_less=True) # Issue in + + self.load_hit_i = Signal(n_ldst, reset_less=True) # load hit in + self.stwd_hit_i = Signal(n_ldst, reset_less=True) # store w/data hit in + + # outputs + self.ld_hold_st_o = Signal(reset_less=True) # load holds st out + self.st_hold_ld_o = Signal(reset_less=True) # st holds load out + + def elaborate(self, platform): + m = Module() + + # --- + # matrix of dependency cells + # --- + dm = Array(LDSTDepCell() for f in range(self.n_ldst)) + for fu in range(self.n_ldst): + setattr(m.submodules, "dm_fu%d" % (fu), dm[fu]) + + # --- + # connect Function Unit vector + # --- + lhs_l = [] + shl_l = [] + load_l = [] + stor_l = [] + issue_l = [] + lh_l = [] + sh_l = [] + for fu in range(self.n_ldst): + dc = dm[fu] + # accumulate load-hold-store / store-hold-load bits + lhs_l.append(dc.ld_hold_st_o) + shl_l.append(dc.st_hold_ld_o) + # accumulate inputs (for Cat'ing later) - TODO: must be a better way + load_l.append(dc.load_i) + stor_l.append(dc.stor_i) + issue_l.append(dc.issue_i) + lh_l.append(dc.load_hit_i) + sh_l.append(dc.stwd_hit_i) + + # connect cell inputs using Cat(*list_of_stuff) + m.d.comb += [Cat(*load_l).eq(self.load_i), + Cat(*stor_l).eq(self.stor_i), + Cat(*issue_l).eq(self.issue_i), + Cat(*lh_l).eq(self.load_hit_i), + Cat(*sh_l).eq(self.stwd_hit_i), + ] + # set the load-hold-store / store-hold-load OR-accumulated outputs + m.d.comb += self.ld_hold_st_o.eq(Cat(*lhs_l).bool()) + m.d.comb += self.st_hold_ld_o.eq(Cat(*shl_l).bool()) + + return m + + def __iter__(self): + yield self.load_i + yield self.stor_i + yield self.issue_i + yield self.load_hit_i + yield self.stwd_hit_i + yield self.ld_hold_st_o + yield self.st_hold_ld_o + + def ports(self): + return list(self) + +def d_matrix_sim(dut): + """ XXX TODO + """ + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + +def test_d_matrix(): + dut = LDSTDepMatrix(n_ldst=4) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_ld_st_matrix.il", "w") as f: + f.write(vl) + + run_simulation(dut, d_matrix_sim(dut), vcd_name='test_ld_st_matrix.vcd') + +if __name__ == '__main__': + test_d_matrix() diff --git a/src/scoreboard/reg_select.py b/src/scoreboard/reg_select.py new file mode 100644 index 00000000..eca3328e --- /dev/null +++ b/src/scoreboard/reg_select.py @@ -0,0 +1,23 @@ +from nmigen import Elaboratable, Module, Signal + + +class Reg_Rsv(Elaboratable): + """ these are allocated per-Register (vertically), + and are each of length fu_count + """ + def __init__(self, fu_count): + self.fu_count = fu_count + self.dest_rsel_i = Signal(fu_count, reset_less=True) + self.src1_rsel_i = Signal(fu_count, reset_less=True) + self.src2_rsel_i = Signal(fu_count, reset_less=True) + self.dest_rsel_o = Signal(reset_less=True) + self.src1_rsel_o = Signal(reset_less=True) + self.src2_rsel_o = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + m.d.comb += self.dest_rsel_o.eq(self.dest_rsel_i.bool()) + m.d.comb += self.src1_rsel_o.eq(self.src1_rsel_i.bool()) + m.d.comb += self.src2_rsel_o.eq(self.src2_rsel_i.bool()) + return m + diff --git a/src/scoreboard/shadow_fn.py b/src/scoreboard/shadow_fn.py new file mode 100644 index 00000000..a60f9d95 --- /dev/null +++ b/src/scoreboard/shadow_fn.py @@ -0,0 +1,79 @@ +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil +from nmigen import Module, Signal, Cat, Elaboratable +from nmutil.latch import SRLatch +from nmigen.lib.coding import Decoder + + +class ShadowFn(Elaboratable): + """ implements shadowing 11.5.1, p55, just the individual shadow function + """ + def __init__(self): + + # inputs + self.issue_i = Signal(reset_less=True) + self.shadow_i = Signal(reset_less=True) + self.s_fail_i = Signal(reset_less=True) + self.s_good_i = Signal(reset_less=True) + + # outputs + self.shadow_o = Signal(reset_less=True) + self.recover_o = Signal(reset_less=True) + + def elaborate(self, platform): + m = Module() + m.submodules.sl = sl = SRLatch(sync=False) + + m.d.comb += sl.s.eq(self.shadow_i & self.issue_i) + m.d.comb += sl.r.eq(self.s_good_i) + m.d.comb += self.recover_o.eq(sl.q & self.s_fail_i) + m.d.comb += self.shadow_o.eq(sl.q) + + return m + + def __iter__(self): + yield self.issue_i + yield self.shadow_i + yield self.s_fail_i + yield self.s_good_i + yield self.shadow_o + yield self.recover_o + + def ports(self): + return list(self) + + +def shadow_fn_unit_sim(dut): + yield dut.dest_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield dut.issue_i.eq(0) + yield + yield dut.src1_i.eq(1) + yield dut.issue_i.eq(1) + yield + yield + yield + yield dut.issue_i.eq(0) + yield + yield dut.go_read_i.eq(1) + yield + yield dut.go_read_i.eq(0) + yield + yield dut.go_write_i.eq(1) + yield + yield dut.go_write_i.eq(0) + yield + + +def test_shadow_fn_unit(): + dut = ShadowFn() + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_shadow_fn_unit.il", "w") as f: + f.write(vl) + + run_simulation(dut, shadow_fn_unit_sim(dut), + vcd_name='test_shadow_fn_unit.vcd') + +if __name__ == '__main__': + test_shadow_fn_unit()