From: Luke Kenneth Casson Leighton Date: Tue, 7 May 2019 05:42:28 +0000 (+0100) Subject: reorg TLB src X-Git-Tag: div_pipeline~2135 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5d56ccfe26b32a1e485f982129133c031a65e2ed;p=soc.git reorg TLB src --- diff --git a/src/TLB/AddressEncoder.py b/src/TLB/AddressEncoder.py new file mode 100644 index 00000000..4c4b8d76 --- /dev/null +++ b/src/TLB/AddressEncoder.py @@ -0,0 +1,75 @@ +from nmigen import Module, Signal +from nmigen.lib.coding import Encoder, PriorityEncoder + +class AddressEncoder(): + """Address Encoder + + The purpose of this module is to take in a vector and + encode the bits that are one hot into an address. This module + combines both nmigen's Encoder and PriorityEncoder and will state + whether the input line has a single bit hot, multiple bits hot, + or no bits hot. The output line will always have the lowest value + address output. + + Usage: + The output is valid when either single or multiple match is high. + Otherwise output is 0. + """ + def __init__(self, width): + """ Arguments: + * width: The desired length of the input vector + """ + # Internal + self.encoder = Encoder(width) + self.p_encoder = PriorityEncoder(width) + + # Input + self.i = Signal(width) + + # Output + self.single_match = Signal(1) + self.multiple_match = Signal(1) + self.o = Signal(max=width) + + def elaborate(self, platform=None): + m = Module() + + # Add internal submodules + m.submodules.encoder = self.encoder + m.submodules.p_encoder = self.p_encoder + + m.d.comb += [ + self.encoder.i.eq(self.i), + self.p_encoder.i.eq(self.i) + ] + + # Steps: + # 1. check if the input vector is non-zero + # 2. if non-zero, check if single match or multiple match + # 3. set output line to be lowest value address output + + # If the priority encoder recieves an input of 0 + # If n is 1 then the output is not valid + with m.If(self.p_encoder.n): + m.d.comb += [ + self.single_match.eq(0), + self.multiple_match.eq(0), + self.o.eq(0) + ] + # If the priority encoder recieves an input > 0 + with m.Else(): + # Multiple Match if encoder n is invalid + with m.If(self.encoder.n): + m.d.comb += [ + self.single_match.eq(0), + self.multiple_match.eq(1) + ] + # Single Match if encoder n is valid + with m.Else(): + m.d.comb += [ + self.single_match.eq(1), + self.multiple_match.eq(0) + ] + # Always set output based on priority encoder output + m.d.comb += self.o.eq(self.p_encoder.o) + return m diff --git a/src/TLB/Cam.py b/src/TLB/Cam.py new file mode 100644 index 00000000..3c499211 --- /dev/null +++ b/src/TLB/Cam.py @@ -0,0 +1,124 @@ +from nmigen import Array, Cat, Module, Signal +from nmigen.lib.coding import Decoder +from nmigen.cli import main #, verilog + +from CamEntry import CamEntry +from AddressEncoder import AddressEncoder + +class Cam(): + """ Content Addressable Memory (CAM) + + The purpose of this module is to quickly look up whether an + entry exists given a data key. + This module will search for the given data in all internal entries + and output whether a single or multiple match was found. + If an single entry is found the address be returned and single_match + is set HIGH. If multiple entries are found the lowest address is + returned and multiple_match is set HIGH. If neither single_match or + multiple_match are HIGH this implies no match was found. To write + to the CAM set the address bus to the desired entry and set write_enable + HIGH. Entry managment should be performed one level above this block + as lookup is performed within. + + Notes: + The read and write operations take one clock cycle to complete. + Currently the read_warning line is present for interfacing but + is not necessary for this design. This module is capable of writing + in the first cycle, reading on the second, and output the correct + address on the third. + """ + + def __init__(self, data_size, cam_size): + """ Arguments: + * data_size: (bits) The bit size of the data + * cam_size: (number) The number of entries in the CAM + """ + + # Internal + self.cam_size = cam_size + self.encoder = AddressEncoder(cam_size) + self.decoder = Decoder(cam_size) + self.entry_array = Array(CamEntry(data_size) for x in range(cam_size)) + + # Input + self.enable = Signal(1) + self.write_enable = Signal(1) + self.data_in = Signal(data_size) # The data to be written + self.data_mask = Signal(data_size) # mask for ternary writes + self.address_in = Signal(max=cam_size) # address of CAM Entry to write + + # Output + self.read_warning = Signal(1) # High when a read interrupts a write + self.single_match = Signal(1) # High when there is only one match + self.multiple_match = Signal(1) # High when there at least two matches + self.match_address = Signal(max=cam_size) # The lowest address matched + + def elaborate(self, platform=None): + m = Module() + # AddressEncoder for match types and output address + m.submodules.AddressEncoder = self.encoder + # Decoder is used to select which entry will be written to + m.submodules.Decoder = self.decoder + # CamEntry Array Submodules + # Note these area added anonymously + entry_array = self.entry_array + m.submodules += entry_array + + # Decoder logic + m.d.comb += [ + self.decoder.i.eq(self.address_in), + self.decoder.n.eq(0) + ] + + encoder_vector = [] + with m.If(self.enable): + # Set the key value for every CamEntry + for index in range(self.cam_size): + + # Write Operation + with m.If(self.write_enable): + with m.If(self.decoder.o[index]): + m.d.comb += entry_array[index].command.eq(2) + with m.Else(): + m.d.comb += entry_array[index].command.eq(0) + + # Read Operation + with m.Else(): + m.d.comb += entry_array[index].command.eq(1) + + # Send data input to all entries + m.d.comb += entry_array[index].data_in.eq(self.data_in) + # Send all entry matches to encoder + ematch = entry_array[index].match + encoder_vector.append(ematch) + + # Give input to and accept output from encoder module + m.d.comb += [ + self.encoder.i.eq(Cat(*encoder_vector)), + self.single_match.eq(self.encoder.single_match), + self.multiple_match.eq(self.encoder.multiple_match), + self.match_address.eq(self.encoder.o) + ] + + # If the CAM is not enabled set all outputs to 0 + with m.Else(): + m.d.comb += [ + self.read_warning.eq(0), + self.single_match.eq(0), + self.multiple_match.eq(0), + self.match_address.eq(0) + ] + + return m + + def ports(self): + return [self.enable, self.write_enable, + self.data_in, self.data_mask, + self.read_warning, self.single_match, + self.multiple_match, self.match_address] + + +if __name__ == '__main__': + cam = Cam(4, 4) + main(cam, ports=cam.ports()) + diff --git a/src/TLB/CamEntry.py b/src/TLB/CamEntry.py new file mode 100644 index 00000000..73081ce5 --- /dev/null +++ b/src/TLB/CamEntry.py @@ -0,0 +1,45 @@ +from nmigen import Module, Signal + +class CamEntry: + """ Content Addressable Memory (CAM) Entry + + The purpose of this module is to represent an entry within a CAM. + This module when given a read command will compare the given data + and output whether a match was found or not. When given a write + command it will write the given data into internal registers. + """ + + def __init__(self, data_size): + """ Arguments: + * data_size: (bit count) The size of the data + """ + # Input + self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset + self.data_in = Signal(data_size) # Data input when writing + + # Output + self.match = Signal(1) # Result of the internal/input key comparison + self.data = Signal(data_size) + + def elaborate(self, platform=None): + m = Module() + with m.Switch(self.command): + with m.Case("00"): + m.d.sync += self.match.eq(0) + with m.Case("01"): + with m.If(self.data == self.data_in): + m.d.sync += self.match.eq(1) + with m.Else(): + m.d.sync += self.match.eq(0) + with m.Case("10"): + m.d.sync += [ + self.data.eq(self.data_in), + self.match.eq(0) + ] + with m.Case(): + m.d.sync += [ + self.match.eq(0), + self.data.eq(0) + ] + + return m diff --git a/src/TLB/LFSR.py b/src/TLB/LFSR.py new file mode 100644 index 00000000..d8b606ec --- /dev/null +++ b/src/TLB/LFSR.py @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# See Notices.txt for copyright information +from nmigen import Signal, Module, Const, Cat, Elaboratable +from nmigen.cli import verilog, rtlil + + +class LFSRPolynomial(set): + """ implements a polynomial for use in LFSR + """ + def __init__(self, exponents=()): + for e in exponents: + assert isinstance(e, int), TypeError("%s must be an int" % repr(e)) + assert (e >= 0), ValueError("%d must not be negative" % e) + set.__init__(self, set(exponents).union({0})) # must contain zero + + @property + def max_exponent(self): + return max(self) # derived from set, so this returns the max exponent + + @property + def exponents(self): + exponents = list(self) # get elements of set as a list + exponents.sort(reverse=True) + return exponents + + def __str__(self): + expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2) + retval = map(lambda i: expd[min(i,2)].format(i), self.exponents) + return " + ".join(retval) + + def __repr__(self): + return "LFSRPolynomial(%s)" % self.exponents + + +# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs # noqa +LFSR_POLY_2 = LFSRPolynomial([2, 1, 0]) +LFSR_POLY_3 = LFSRPolynomial([3, 2, 0]) +LFSR_POLY_4 = LFSRPolynomial([4, 3, 0]) +LFSR_POLY_5 = LFSRPolynomial([5, 3, 0]) +LFSR_POLY_6 = LFSRPolynomial([6, 5, 0]) +LFSR_POLY_7 = LFSRPolynomial([7, 6, 0]) +LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0]) +LFSR_POLY_9 = LFSRPolynomial([9, 5, 0]) +LFSR_POLY_10 = LFSRPolynomial([10, 7, 0]) +LFSR_POLY_11 = LFSRPolynomial([11, 9, 0]) +LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0]) +LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0]) +LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0]) +LFSR_POLY_15 = LFSRPolynomial([15, 14, 0]) +LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0]) +LFSR_POLY_17 = LFSRPolynomial([17, 14, 0]) +LFSR_POLY_18 = LFSRPolynomial([18, 11, 0]) +LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0]) +LFSR_POLY_20 = LFSRPolynomial([20, 17, 0]) +LFSR_POLY_21 = LFSRPolynomial([21, 19, 0]) +LFSR_POLY_22 = LFSRPolynomial([22, 21, 0]) +LFSR_POLY_23 = LFSRPolynomial([23, 18, 0]) +LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0]) + + +class LFSR(LFSRPolynomial, Elaboratable): + """ implements a Linear Feedback Shift Register + """ + def __init__(self, polynomial): + """ Inputs: + ------ + :polynomial: the polynomial to feedback on. may be a LFSRPolynomial + instance or an iterable of ints (list/tuple/generator) + :enable: enable (set LO to disable. NOTE: defaults to HI) + + Outputs: + ------- + :state: the LFSR state. bitwidth is taken from the polynomial + maximum exponent. + + Note: if an LFSRPolynomial is passed in as the input, because + LFSRPolynomial is derived from set() it's ok: + LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p) + """ + LFSRPolynomial.__init__(self, polynomial) + self.state = Signal(self.max_exponent, reset=1) + self.enable = Signal(reset=1) + + def elaborate(self, platform): + m = Module() + # do absolutely nothing if the polynomial is empty (always has a zero) + if self.max_exponent <= 1: + return m + + # create XOR-bunch, select bits from state based on exponent + feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain) + for exponent in self: + if exponent > 0: # don't have to skip, saves CPU cycles though + feedback ^= self.state[exponent - 1] + + # if enabled, shift-and-feedback + with m.If(self.enable): + # shift up lower bits by Cat'ing in a new bit zero (feedback) + newstate = Cat(feedback, self.state[:-1]) + m.d.sync += self.state.eq(newstate) + + return m + + +# example: Poly24 +if __name__ == '__main__': + p24 = rtlil.convert(LFSR(LFSR_POLY_24)) + with open("lfsr2_p24.il", "w") as f: + f.write(p24) diff --git a/src/TLB/LFSR.pyi b/src/TLB/LFSR.pyi new file mode 100644 index 00000000..64eb9115 --- /dev/null +++ b/src/TLB/LFSR.pyi @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: LGPL-2.1-or-later +# See Notices.txt for copyright information +from nmigen import Module +from typing import Iterable, Optional, Iterator, Any, Union +from typing_extensions import final + + +@final +class LFSRPolynomial(set): + def __init__(self, exponents: Iterable[int] = ()): + def elements() -> Iterable[int]: ... + @property + def exponents(self) -> list[int]: ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + + +@final +class LFSR: + def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ... + @property + def width(self) -> int: ... + def elaborate(self, platform: Any) -> Module: ... diff --git a/src/TLB/Makefile b/src/TLB/Makefile new file mode 100644 index 00000000..1eb67acc --- /dev/null +++ b/src/TLB/Makefile @@ -0,0 +1,2 @@ +verilog: + python3 Cam.py generate -t v > Cam.v diff --git a/src/TLB/MemorySet.py b/src/TLB/MemorySet.py new file mode 100644 index 00000000..ea61bdf5 --- /dev/null +++ b/src/TLB/MemorySet.py @@ -0,0 +1,66 @@ +from nmigen import Cat, Memory, Module, Signal, Elaboratable +from nmigen.cli import main +from nmigen.cli import verilog, rtlil + + +class MemorySet(Elaboratable): + def __init__(self, data_size, tag_size, set_count, active): + self.active = active + input_size = tag_size + data_size # Size of the input data + memory_width = input_size + 1 # The width of the cache memory + self.active = active + self.data_size = data_size + self.tag_size = tag_size + + # XXX TODO, use rd-enable and wr-enable? + self.mem = Memory(memory_width, set_count) + self.r = self.mem.read_port() + self.w = self.mem.write_port() + + # inputs (address) + self.cset = Signal(max=set_count) # The set to be checked + self.tag = Signal(tag_size) # The tag to find + self.data_i = Signal(data_size) # Incoming data + + # outputs + self.valid = Signal() + self.data_o = Signal(data_size) # Outgoing data (excludes tag) + + def elaborate(self, platform): + m = Module() + m.submodules.mem = self.mem + m.submodules.r = self.r + m.submodules.w = self.w + + # temporaries + active_bit = Signal() + tag_valid = Signal() + data_start = self.active + 1 + data_end = data_start + self.data_size + tag_start = data_end + tag_end = tag_start + self.tag_size + + # connect the read port address to the set/entry + read_port = self.r + m.d.comb += read_port.addr.eq(self.cset) + # Pull out active bit from data + data = read_port.data + m.d.comb += active_bit.eq(data[self.active]) + # Validate given tag vs stored tag + tag = data[tag_start:tag_end] + m.d.comb += tag_valid.eq(self.tag == tag) + # An entry is only valid if the tags match AND + # is marked as a valid entry + m.d.comb += self.valid.eq(tag_valid & active_bit) + + # output data: TODO, check rd-enable? + m.d.comb += self.data_o.eq(data[data_start:data_end]) + + # connect the write port addr to the set/entry (only if write enabled) + # (which is only done on a match, see SAC.write_entry below) + write_port = self.w + with m.If(write_port.en): + m.d.comb += write_port.addr.eq(self.cset) + m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag)) + + return m diff --git a/src/TLB/PermissionValidator.py b/src/TLB/PermissionValidator.py new file mode 100644 index 00000000..14f01e42 --- /dev/null +++ b/src/TLB/PermissionValidator.py @@ -0,0 +1,67 @@ +from nmigen import Module, Signal +from nmigen.cli import main + +from PteEntry import PteEntry + +class PermissionValidator(): + """ The purpose of this Module is to check the Permissions of a given PTE + against the requested access permissions. + + This module will either validate (by setting the valid bit HIGH) + the request or find a permission fault and invalidate (by setting + the valid bit LOW) the request + """ + + def __init__(self, asid_size, pte_size): + """ Arguments: + * asid_size: (bit count) The size of the asid to be processed + * pte_size: (bit count) The size of the pte to be processed + + Return: + * valid HIGH when permissions are correct + """ + # Internal + self.pte_entry = PteEntry(asid_size, pte_size) + + # Input + self.data = Signal(asid_size + pte_size); + self.xwr = Signal(3) # Execute, Write, Read + self.super_mode = Signal(1) # Supervisor Mode + self.super_access = Signal(1) # Supervisor Access + self.asid = Signal(15) # Address Space IDentifier (ASID) + + # Output + self.valid = Signal(1) # Denotes if the permissions are correct + + def elaborate(self, platform=None): + m = Module() + + m.submodules.pte_entry = self.pte_entry + + m.d.comb += self.pte_entry.i.eq(self.data) + + # Check if the entry is valid + with m.If(self.pte_entry.v): + # ASID match or Global Permission + # Note that the MSB bound is exclusive + with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g): + # Check Execute, Write, Read (XWR) Permissions + with m.If(self.pte_entry.xwr == self.xwr): + # Supervisor Logic + with m.If(self.super_mode): + # Valid if entry is not in user mode or supervisor + # has Supervisor User Memory (SUM) access via the + # SUM bit in the sstatus register + m.d.comb += self.valid.eq((~self.pte_entry.u) \ + | self.super_access) + # User logic + with m.Else(): + # Valid if the entry is in user mode only + m.d.comb += self.valid.eq(self.pte_entry.u) + with m.Else(): + m.d.comb += self.valid.eq(0) + with m.Else(): + m.d.comb += self.valid.eq(0) + with m.Else(): + m.d.comb += self.valid.eq(0) + return m \ No newline at end of file diff --git a/src/TLB/PteEntry.py b/src/TLB/PteEntry.py new file mode 100644 index 00000000..c0705457 --- /dev/null +++ b/src/TLB/PteEntry.py @@ -0,0 +1,66 @@ +from nmigen import Module, Signal +from nmigen.cli import main + +class PteEntry(): + """ The purpose of this Module is to centralize the parsing of Page + Table Entries (PTE) into one module to prevent common mistakes + and duplication of code. The control bits are parsed out for + ease of use. + + This module parses according to the standard PTE given by the + Volume II: RISC-V Privileged Architectures V1.10 Pg 60. + The Address Space IDentifier (ASID) is appended to the MSB of the input + and is parsed out as such. + + An valid input Signal would be: + ASID PTE + Bits:[78-64][63-0] + + The output PTE value will include the control bits. + """ + def __init__(self, asid_size, pte_size): + """ Arguments: + * asid_size: (bit count) The size of the asid to be processed + * pte_size: (bit count) The size of the pte to be processed + + Return: + * d The Dirty bit from the PTE portion of i + * a The Accessed bit from the PTE portion of i + * g The Global bit from the PTE portion of i + * u The User Mode bit from the PTE portion of i + * xwr The Execute/Write/Read bit from the PTE portion of i + * v The Valid bit from the PTE portion of i + * asid The asid portion of i + * pte The pte portion of i + """ + # Internal + self.asid_start = pte_size + self.asid_end = pte_size + asid_size + + # Input + self.i = Signal(asid_size + pte_size) + + # Output + self.d = Signal(1) # Dirty bit (From pte) + self.a = Signal(1) # Accessed bit (From pte) + self.g = Signal(1) # Global Access (From pte) + self.u = Signal(1) # User Mode (From pte) + self.xwr = Signal(3) # Execute Read Write (From pte) + self.v = Signal(1) # Valid (From pte) + self.asid = Signal(asid_size) # Associated Address Space IDentifier + self.pte = Signal(pte_size) # Full Page Table Entry + + def elaborate(self, platform=None): + m = Module() + # Pull out all control bites from PTE + m.d.comb += [ + self.d.eq(self.i[7]), + self.a.eq(self.i[6]), + self.g.eq(self.i[5]), + self.u.eq(self.i[4]), + self.xwr.eq(self.i[1:4]), + self.v.eq(self.i[0]) + ] + m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end]) + m.d.comb += self.pte.eq(self.i[0:self.asid_start]) + return m \ No newline at end of file diff --git a/src/TLB/SetAssociativeCache.py b/src/TLB/SetAssociativeCache.py new file mode 100644 index 00000000..0acd3488 --- /dev/null +++ b/src/TLB/SetAssociativeCache.py @@ -0,0 +1,274 @@ +""" + +Online simulator of 4-way set-associative cache: +http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/sa4.html + +Python simulator of a N-way set-associative cache: +https://github.com/vaskevich/CacheSim/blob/master/cachesim.py +""" +import sys +sys.path.append("ariane/src/") + +from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable +from nmigen.compat.genlib import fsm +from nmigen.cli import main +from nmigen.cli import verilog, rtlil + +from AddressEncoder import AddressEncoder +from MemorySet import MemorySet + +# TODO: use a LFSR that advances continuously and picking the bottom +# few bits from it to select which cache line to replace, instead of PLRU +# http://bugs.libre-riscv.org/show_bug.cgi?id=71 +from plru import PLRU +from LFSR import LFSR, LFSR_POLY_24 + +SA_NA = "00" # no action (none) +SA_RD = "01" # read +SA_WR = "10" # write + + +class SetAssociativeCache(Elaboratable): + """ Set Associative Cache Memory + + The purpose of this module is to generate a memory cache given the + constraints passed in. This will create a n-way set associative cache. + It is expected for the SV TLB that the VMA will provide the set number + while the ASID provides the tag (still to be decided). + + """ + def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False): + """ Arguments + * tag_size (bits): The bit count of the tag + * data_size (bits): The bit count of the data to be stored + * set_count (number): The number of sets/entries in the cache + * way_count (number): The number of slots a data can be stored + in one set + * lfsr: if set, use an LFSR for (pseudo-randomly) selecting + set/entry to write to. otherwise, use a PLRU + """ + # Internals + self.lfsr_mode = lfsr + self.way_count = way_count # The number of slots in one set + self.tag_size = tag_size # The bit count of the tag + self.data_size = data_size # The bit count of the data to be stored + + # set up Memory array + self.mem_array = Array() # memory array + for i in range(way_count): + ms = MemorySet(data_size, tag_size, set_count, active=0) + self.mem_array.append(ms) + + # Finds valid entries + self.encoder = AddressEncoder(way_count) + + # setup PLRU or LFSR + if lfsr: + # LFSR mode + self.lfsr = LFSR(LFSR_POLY_24) + else: + # PLRU mode + self.plru = PLRU(way_count) # One block to handle plru calculations + self.plru_array = Array() # PLRU data on each set + for i in range(set_count): + name="plru%d" % i + self.plru_array.append(Signal(self.plru.TLBSZ, name=name)) + + # Input + self.enable = Signal(1) # Whether the cache is enabled + self.command = Signal(2) # 00=None, 01=Read, 10=Write (see SA_XX) + self.cset = Signal(max=set_count) # The set to be checked + self.tag = Signal(tag_size) # The tag to find + self.data_i = Signal(data_size) # The input data + + # Output + self.ready = Signal(1) # 0 => Processing 1 => Ready for commands + self.hit = Signal(1) # Tag matched one way in the given set + self.multiple_hit = Signal(1) # Tag matched many ways in the given set + self.data_o = Signal(data_size) # The data linked to the matched tag + + def check_tags(self, m): + """ Validate the tags in the selected set. If one and only one + tag matches set its state to zero and increment all others + by one. We only advance to next state if a single hit is found. + """ + # Vector to store way valid results + # A zero denotes a way is invalid + valid_vector = [] + # Loop through memory to prep read/write ports and set valid_vector + for i in range(self.way_count): + valid_vector.append(self.mem_array[i].valid) + + # Pass encoder the valid vector + m.d.comb += self.encoder.i.eq(Cat(*valid_vector)) + + # Only one entry should be marked + # This is due to already verifying the tags + # matched and the valid bit is high + with m.If(self.hit): + m.next = "FINISHED_READ" + # Pull out data from the read port + data = self.mem_array[self.encoder.o].data_o + m.d.comb += self.data_o.eq(data) + if not self.lfsr_mode: + self.access_plru(m) + + # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k + with m.Elif(self.multiple_hit): + # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck + m.d.comb += self.data_o.eq(0) + + # No tag matches means no data + with m.Else(): + # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck + m.d.comb += self.data_o.eq(0) + + def access_plru(self, m): + """ An entry was accessed and the plru tree must now be updated + """ + # Pull out the set's entry being edited + plru_entry = self.plru_array[self.cset] + m.d.comb += [ + # Set the plru data to the current state + self.plru.plru_tree.eq(plru_entry), + # Set that the cache was accessed + self.plru.lu_access_i.eq(1) + ] + + def read(self, m): + """ Go through the read process of the cache. + This takes two cycles to complete. First it checks for a valid tag + and secondly it updates the LRU values. + """ + with m.FSM() as fsm_read: + with m.State("READY"): + m.d.comb += self.ready.eq(0) + # check_tags will set the state if the conditions are met + self.check_tags(m) + with m.State("FINISHED_READ"): + m.next = "READY" + m.d.comb += self.ready.eq(1) + if not self.lfsr_mode: + plru_tree_o = self.plru.plru_tree_o + m.d.sync += self.plru_array[self.cset].eq(plru_tree_o) + + def write_entry(self, m): + if not self.lfsr_mode: + m.d.comb += [# set cset (mem address) into PLRU + self.plru.plru_tree.eq(self.plru_array[self.cset]), + # and connect plru to encoder for write + self.encoder.i.eq(self.plru.replace_en_o) + ] + write_port = self.mem_array[self.encoder.o].w + else: + # use the LFSR to generate a random(ish) one of the mem array + lfsr_output = Signal(max=self.way_count) + lfsr_random = Signal(max=self.way_count) + m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits + # address too big, limit to range of array + m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count, + lfsr_output - self.way_count, + lfsr_output)) + write_port = self.mem_array[lfsr_random].w + + # then if there is a match from the encoder, enable the selected write + with m.If(self.encoder.single_match): + m.d.comb += write_port.en.eq(1) + + def write(self, m): + """ Go through the write process of the cache. + This takes two cycles to complete. First it writes the entry, + and secondly it updates the PLRU (in plru mode) + """ + with m.FSM() as fsm_write: + with m.State("READY"): + m.d.comb += self.ready.eq(0) + self.write_entry(m) + m.next ="FINISHED_WRITE" + with m.State("FINISHED_WRITE"): + m.d.comb += self.ready.eq(1) + if not self.lfsr_mode: + plru_entry = self.plru_array[self.cset] + m.d.sync += plru_entry.eq(self.plru.plru_tree_o) + m.next = "READY" + + + def elaborate(self, platform=None): + m = Module() + + # ---- + # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array + # ---- + + m.submodules.AddressEncoder = self.encoder + if self.lfsr_mode: + m.submodules.LFSR = self.lfsr + else: + m.submodules.PLRU = self.plru + + for i, mem in enumerate(self.mem_array): + setattr(m.submodules, "mem%d" % i, mem) + + # ---- + # select mode: PLRU connect to encoder, LFSR do... something + # ---- + + if not self.lfsr_mode: + # Set what entry was hit + m.d.comb += self.plru.lu_hit.eq(self.encoder.o) + else: + # enable LFSR + m.d.comb += self.lfsr.enable.eq(self.enable) + + # ---- + # connect hit/multiple hit to encoder output + # ---- + + m.d.comb += [ + self.hit.eq(self.encoder.single_match), + self.multiple_hit.eq(self.encoder.multiple_match), + ] + + # ---- + # connect incoming data/tag/cset(addr) to mem_array + # ---- + + for mem in self.mem_array: + write_port = mem.w + m.d.comb += [mem.cset.eq(self.cset), + mem.tag.eq(self.tag), + mem.data_i.eq(self.data_i), + write_port.en.eq(0), # default: disable write + ] + # ---- + # Commands: READ/WRITE/TODO + # ---- + + with m.If(self.enable): + with m.Switch(self.command): + # Search all sets at a particular tag + with m.Case(SA_RD): + self.read(m) + with m.Case(SA_WR): + self.write(m) + # Maybe catch multiple tags write here? + # TODO + # TODO: invalidate/flush, flush-all? + + return m + + def ports(self): + return [self.enable, self.command, self.cset, self.tag, self.data_i, + self.ready, self.hit, self.multiple_hit, self.data_o] + + +if __name__ == '__main__': + sac = SetAssociativeCache(4, 8, 4, 6) + vl = rtlil.convert(sac, ports=sac.ports()) + with open("SetAssociativeCache.il", "w") as f: + f.write(vl) + + sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True) + vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports()) + with open("SetAssociativeCacheLFSR.il", "w") as f: + f.write(vl) diff --git a/src/TLB/TLB.py b/src/TLB/TLB.py new file mode 100644 index 00000000..3538bdc1 --- /dev/null +++ b/src/TLB/TLB.py @@ -0,0 +1,173 @@ +""" TLB Module + + The expected form of the data is: + * Item (Bits) + * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0) +""" + +from nmigen import Memory, Module, Signal, Cat +from nmigen.cli import main + +from PermissionValidator import PermissionValidator +from Cam import Cam + +class TLB(): + def __init__(self, asid_size, vma_size, pte_size, L1_size): + """ Arguments + * asid_size: Address Space IDentifier (ASID) typically 15 bits + * vma_size: Virtual Memory Address (VMA) typically 36 bits + * pte_size: Page Table Entry (PTE) typically 64 bits + + Notes: + These arguments should represent the largest possible size + defined by the MODE settings. See + Volume II: RISC-V Privileged Architectures V1.10 Page 57 + """ + + # Internal + self.state = 0 + # L1 Cache Modules + L1_size = 8 # XXX overridden incoming argument? + self.cam_L1 = Cam(vma_size, L1_size) + self.mem_L1 = Memory(asid_size + pte_size, L1_size) + + # Permission Validator + self.perm_validator = PermissionValidator(asid_size, pte_size) + + # Inputs + self.supermode = Signal(1) # Supervisor Mode + self.super_access = Signal(1) # Supervisor Access + self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2 + self.xwr = Signal(3) # Execute, Write, Read + self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64 + self.address_L1 = Signal(max=L1_size) + self.asid = Signal(asid_size) # Address Space IDentifier (ASID) + self.vma = Signal(vma_size) # Virtual Memory Address (VMA) + self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE) + + # Outputs + self.hit = Signal(1) # Denotes if the VMA had a mapped PTE + self.perm_valid = Signal(1) # Denotes if the permissions are correct + self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA + + def search(self, m, read_L1, write_L1): + """ searches the TLB + """ + m.d.comb += [ + write_L1.en.eq(0), + self.cam_L1.write_enable.eq(0), + self.cam_L1.data_in.eq(self.vma) + ] + # Match found in L1 CAM + match_found = Signal(reset_less=True) + m.d.comb += match_found.eq(self.cam_L1.single_match + | self.cam_L1.multiple_match) + with m.If(match_found): + # Memory shortcut variables + mem_address = self.cam_L1.match_address + # Memory Logic + m.d.comb += read_L1.addr.eq(mem_address) + # Permission Validator Logic + m.d.comb += [ + self.hit.eq(1), + # Set permission validator data to the correct + # register file data according to CAM match + # address + self.perm_validator.data.eq(read_L1.data), + # Execute, Read, Write + self.perm_validator.xwr.eq(self.xwr), + # Supervisor Mode + self.perm_validator.super_mode.eq(self.supermode), + # Supverisor Access + self.perm_validator.super_access.eq(self.super_access), + # Address Space IDentifier (ASID) + self.perm_validator.asid.eq(self.asid), + # Output result of permission validation + self.perm_valid.eq(self.perm_validator.valid) + ] + # Only output PTE if permissions are valid + with m.If(self.perm_validator.valid): + # XXX TODO - dummy for now + reg_data = Signal.like(self.pte_out) + m.d.comb += [ + self.pte_out.eq(reg_data) + ] + with m.Else(): + m.d.comb += [ + self.pte_out.eq(0) + ] + # Miss Logic + with m.Else(): + m.d.comb += [ + self.hit.eq(0), + self.perm_valid.eq(0), + self.pte_out.eq(0) + ] + + def write_l1(self, m, read_L1, write_L1): + """ writes to the L1 cache + """ + # Memory_L1 Logic + m.d.comb += [ + write_L1.en.eq(1), + write_L1.addr.eq(self.address_L1), + # The Cat places arguments from LSB -> MSB + write_L1.data.eq(Cat(self.pte_in, self.asid)) + ] + # CAM_L1 Logic + m.d.comb += [ + self.cam_L1.write_enable.eq(1), + self.cam_L1.data_in.eq(self.vma), + ] + + def elaborate(self, platform): + m = Module() + # Add submodules + # Submodules for L1 Cache + m.d.submodules.cam_L1 = self.cam_L1 + m.d.sumbmodules.read_L1 = read_L1 = self.mem_L1.read_port() + m.d.sumbmodules.read_L1 = write_L1 = self.mem_L1.write_port() + # Permission Validator Submodule + m.d.submodules.perm_valididator = self.perm_validator + + # When MODE specifies translation + # TODO add in different bit length handling ie prefix 0s + tlb_enable = Signal(reset_less=True) + m.d.comb += tlb_enable.eq(self.mode != 0) + + with m.If(tlb_enable): + m.d.comb += [ + self.cam_L1.enable.eq(1) + ] + with m.Switch(self.command): + # Search + with m.Case("01"): + self.search(m, read_L1, write_L1) + + # Write L1 + # Expected that the miss will be handled in software + with m.Case("10"): + self.write_l1(m, read_L1, write_L1) + + # TODO + #with m.Case("11"): + + # When disabled + with m.Else(): + m.d.comb += [ + self.cam_L1.enable.eq(0), + # XXX TODO - self.reg_file.enable.eq(0), + self.hit.eq(0), + self.perm_valid.eq(0), # XXX TODO, check this + self.pte_out.eq(0) + ] + return m + + +if __name__ == '__main__': + tlb = TLB(15, 36, 64, 4) + main(tlb, ports=[ tlb.supermode, tlb.super_access, tlb.command, + tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid, + tlb.vma, tlb.pte_in, + tlb.hit, tlb.perm_valid, tlb.pte_out, + ] + tlb.cam_L1.ports()) diff --git a/src/TLB/__init__.py b/src/TLB/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/TLB/ariane/TreePLRU.cpp b/src/TLB/ariane/TreePLRU.cpp new file mode 100644 index 00000000..2f6aeea5 --- /dev/null +++ b/src/TLB/ariane/TreePLRU.cpp @@ -0,0 +1,211 @@ +#include +#include +#include + + +#define NWAY 4 +#define NLINE 256 +#define HIT 0 +#define MISS 1 +#define MS 1000 +/* +Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing +Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt +four-way set associative - three bits + each bit represents one branch point in a binary decision tree; let 1 + represent that the left side has been referenced more recently than the + right side, and 0 vice-versa + are all 4 lines valid? + / \ + yes no, use an invalid line + | + | + | + bit_0 == 0? state | replace ref to | next state + / \ ------+-------- -------+----------- + y n 00x | line_0 line_0 | 11_ + / \ 01x | line_1 line_1 | 10_ + bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1 + / \ / \ 1x1 | line_3 line_3 | 0_0 + y n y n + / \ / \ ('x' means ('_' means unchanged) + line_0 line_1 line_2 line_3 don't care) + 8-way set associative - 7 = 1+2+4 bits +16-way set associative - 15 = 1+2+4+8 bits +32-way set associative - 31 = 1+2+4+8+16 bits +64-way set associative - 63 = 1+2+4+8+16+32 bits +*/ +using namespace std; +struct AddressField { + uint64_t wd_idx : 2;//Unused + uint64_t offset : 4;//Unused + uint64_t index : 8;//NLINE = 256 = 2^8 + uint64_t tag : 50; +}; + +union Address { + uint32_t* p; + AddressField fields; +}; + +struct Cell { + bool v; + uint64_t tag; + + Cell() : v(false), tag(0) {} + + bool isHit(uint64_t tag) { + return v && (tag == this->tag); + } + + void fetch(uint32_t* address) { + Address addr; + addr.p = address; + addr.fields.offset = 0; + addr.fields.wd_idx = 0; + tag = addr.fields.tag; + v = true; + } +}; + +ostream& operator<<(ostream & out, const Cell& cell) { + out << " v:" << cell.v << " tag:" << hex << cell.tag; + return out; +} + +struct Block { + Cell cell[NWAY]; + uint32_t state; + uint64_t *mask;//Mask the state to get accurate value for specified 1 bit. + uint64_t *value; + uint64_t *next_value; + + Block() : state(0) { + switch (NWAY) { + case 4: + mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101}; + value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101}; + next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000}; + break; + case 8: + mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001, + 0b1010001}; + value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000, + 0b1010001}; + next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000, + 0b0000001, 0b0000000}; + break; + //TODO - more NWAY goes here. + default: + std::cout << "Error definition NWAY = " << NWAY << std::endl; + } + } + + uint32_t *getByTag(uint64_t tag, uint32_t *pway) { + for (int i = 0; i < NWAY; ++i) { + if (cell[i].isHit(tag)) { + *pway = i; + return pway; + } + } + return NULL; + } + + void setLRU(uint32_t *address) { + int way = 0; + uint32_t st = state; + for (int i = 0; i < NWAY; ++i) { + if ((state & mask[i]) == value[i]) { + state ^= mask[i]; + way = i; + break; + } + } + cell[way].fetch(address); + cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl; + } + + uint32_t *get(uint32_t *address, uint32_t *pway) { + Address addr; + addr.p = address; + uint32_t *d = getByTag(addr.fields.tag, pway); + if (d != NULL) { + return &d[addr.fields.offset]; + } + return d; + } + + int set(uint32_t *address) { + uint32_t way = 0; + uint32_t *p = get(address, &way); + if (p != NULL) { + printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state); + state &= ~mask[way]; + printf("%X --> ", state); + state |= next_value[way]; + printf("%X\n", state); + // *p = *address; //skip since address is fake. + return HIT; + } else { + setLRU(address); + return MISS; + } + } +}; + +ostream& operator<<(ostream & out, const Block& block) { + out << "state:" << block.state << " "; + for (int i = 0; i signal with a page fault exception + # 2. We got an access error because of insufficient permissions -> + # throw an access exception + m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0) + # Check whether we are allowed to access this memory region + # from a fetch perspective + + # XXX TODO: use PermissionValidator instead [we like modules] + m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \ + (((self.priv_lvl_i == PRIV_LVL_U) & \ + ~itlb_content.u) | \ + ((self.priv_lvl_i == PRIV_LVL_S) & \ + itlb_content.u))) + + # MMU enabled: address from TLB, request delayed until hit. + # Error when TLB hit and no access right or TLB hit and + # translated address not valid (e.g. AXI decode error), + # or when PTW performs walk due to ITLB miss and raises + # an error. + with m.If (self.enable_translation_i): + # we work with SV39, so if VM is enabled, check that + # all bits [63:38] are equal + with m.If (self.icache_areq_i.fetch_req & \ + ~(((~self.icache_areq_i.fetch_vaddr[38:64]) == 0) | \ + (self.icache_areq_i.fetch_vaddr[38:64]) == 0)): + fe = self.icache_areq_o.fetch_exception + m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT), + fe.tval.eq(self.icache_areq_i.fetch_vaddr), + fe.valid.eq(1) + ] + + m.d.comb += self.icache_areq_o.fetch_valid.eq(0) + + # 4K page + paddr = Signal.like(self.icache_areq_o.fetch_paddr) + paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12], + itlb_content.ppn) + m.d.comb += paddr.eq(paddr4k) + # Mega page + with m.If(itlb_is_2M): + m.d.comb += paddr[12:21].eq( + self.icache_areq_i.fetch_vaddr[12:21]) + # Giga page + with m.If(itlb_is_1G): + m.d.comb += paddr[12:30].eq( + self.icache_areq_i.fetch_vaddr[12:30]) + m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr) + + # --------- + # ITLB Hit + # -------- + # if we hit the ITLB output the request signal immediately + with m.If(itlb_lu_hit): + m.d.comb += self.icache_areq_o.fetch_valid.eq( + self.icache_areq_i.fetch_req) + # we got an access error + with m.If (iaccess_err): + # throw a page fault + fe = self.icache_areq_o.fetch_exception + m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT), + fe.tval.eq(self.icache_areq_i.fetch_vaddr), + fe.valid.eq(1) + ] + # --------- + # ITLB Miss + # --------- + # watch out for exceptions happening during walking the page table + with m.Elif(ptw_active & walking_instr): + m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error) + fe = self.icache_areq_o.fetch_exception + m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT), + fe.tval.eq(uaddr64), + fe.valid.eq(1) + ] + + #----------------------- + # Data Interface + #----------------------- + + lsu_vaddr = Signal(64) + dtlb_pte = PTE() + misaligned_ex = RVException() + lsu_req = Signal() + lsu_is_store = Signal() + dtlb_hit = Signal() + dtlb_is_2M = Signal() + dtlb_is_1G = Signal() + + # check if we need to do translation or if we are always + # ready (e.g.: we are not translating anything) + m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i, + dtlb_lu_hit, 1)) + + # The data interface is simpler and only consists of a + # request/response interface + m.d.comb += [ + # save request and DTLB response + lsu_vaddr.eq(self.lsu_vaddr_i), + lsu_req.eq(self.lsu_req_i), + misaligned_ex.eq(self.misaligned_ex_i), + dtlb_pte.eq(dtlb_content), + dtlb_hit.eq(dtlb_lu_hit), + lsu_is_store.eq(self.lsu_is_store_i), + dtlb_is_2M.eq(dtlb_is_2M), + dtlb_is_1G.eq(dtlb_is_1G), + ] + m.d.sync += [ + self.lsu_paddr_o.eq(lsu_vaddr), + self.lsu_valid_o.eq(lsu_req), + self.lsu_exception_o.eq(misaligned_ex), + ] + + sverr = Signal() + usrerr = Signal() + + m.d.comb += [ + # mute misaligned exceptions if there is no request + # otherwise they will throw accidental exceptions + misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i), + + # SUM is not set and we are trying to access a user + # page in supervisor mode + sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \ + dtlb_pte.u), + # this is not a user page but we are in user mode and + # trying to access it + usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u), + + # Check if the User flag is set, then we may only + # access it in supervisor mode if SUM is enabled + daccess_err.eq(sverr | usrerr), + ] + + # translation is enabled and no misaligned exception occurred + with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid): + m.d.comb += lsu_req.eq(0) + # 4K page + paddr = Signal.like(lsu_vaddr) + paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn) + m.d.comb += paddr.eq(paddr4k) + # Mega page + with m.If(dtlb_is_2M): + m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21]) + # Giga page + with m.If(dtlb_is_1G): + m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30]) + m.d.sync += self.lsu_paddr_o.eq(paddr) + + # --------- + # DTLB Hit + # -------- + with m.If(dtlb_hit & lsu_req): + m.d.comb += lsu_req.eq(1) + # this is a store + with m.If (lsu_is_store): + # check if the page is write-able and + # we are not violating privileges + # also check if the dirty flag is set + with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d): + le = self.lsu_exception_o + m.d.sync += [le.cause.eq(STORE_PAGE_FAULT), + le.tval.eq(lsu_vaddr), + le.valid.eq(1) + ] + + # this is a load, check for sufficient access + # privileges - throw a page fault if necessary + with m.Elif(daccess_err): + le = self.lsu_exception_o + m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT), + le.tval.eq(lsu_vaddr), + le.valid.eq(1) + ] + # --------- + # DTLB Miss + # --------- + # watch out for exceptions + with m.Elif (ptw_active & ~walking_instr): + # page table walker threw an exception + with m.If (ptw_error): + # an error makes the translation valid + m.d.comb += lsu_req.eq(1) + # the page table walker can only throw page faults + with m.If (lsu_is_store): + le = self.lsu_exception_o + m.d.sync += [le.cause.eq(STORE_PAGE_FAULT), + le.tval.eq(uaddr64), + le.valid.eq(1) + ] + with m.Else(): + m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT), + le.tval.eq(uaddr64), + le.valid.eq(1) + ] + + return m + + def ports(self): + return [self.flush_i, self.enable_translation_i, + self.en_ld_st_translation_i, + self.lsu_req_i, + self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o, + self.lsu_valid_o, self.lsu_paddr_o, + self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i, + self.satp_ppn_i, self.asid_i, self.flush_tlb_i, + self.itlb_miss_o, self.dtlb_miss_o] + \ + self.icache_areq_i.ports() + self.icache_areq_o.ports() + \ + self.req_port_i.ports() + self.req_port_o.ports() + \ + self.misaligned_ex_i.ports() + self.lsu_exception_o.ports() + +if __name__ == '__main__': + mmu = MMU() + vl = rtlil.convert(mmu, ports=mmu.ports()) + with open("test_mmu.il", "w") as f: + f.write(vl) + diff --git a/src/TLB/ariane/src/plru.py b/src/TLB/ariane/src/plru.py new file mode 100644 index 00000000..95d515c4 --- /dev/null +++ b/src/TLB/ariane/src/plru.py @@ -0,0 +1,106 @@ +from nmigen import Signal, Module, Cat, Const +from nmigen.hdl.ir import Elaboratable +from math import log2 + +from ptw import TLBUpdate, PTE, ASID_WIDTH + +class PLRU(Elaboratable): + """ PLRU - Pseudo Least Recently Used Replacement + + PLRU-tree indexing: + lvl0 0 + / \ + / \ + lvl1 1 2 + / \ / \ + lvl2 3 4 5 6 + / \ /\/\ /\ + ... ... ... ... + """ + def __init__(self, entries): + self.entries = entries + self.lu_hit = Signal(entries) + self.replace_en_o = Signal(entries) + self.lu_access_i = Signal() + # Tree (bit per entry) + self.TLBSZ = 2*(self.entries-1) + self.plru_tree = Signal(self.TLBSZ) + self.plru_tree_o = Signal(self.TLBSZ) + + def elaborate(self, platform=None): + m = Module() + + # Just predefine which nodes will be set/cleared + # E.g. for a TLB with 8 entries, the for-loop is semantically + # equivalent to the following pseudo-code: + # unique case (1'b1) + # lu_hit[7]: plru_tree[0, 2, 6] = {1, 1, 1}; + # lu_hit[6]: plru_tree[0, 2, 6] = {1, 1, 0}; + # lu_hit[5]: plru_tree[0, 2, 5] = {1, 0, 1}; + # lu_hit[4]: plru_tree[0, 2, 5] = {1, 0, 0}; + # lu_hit[3]: plru_tree[0, 1, 4] = {0, 1, 1}; + # lu_hit[2]: plru_tree[0, 1, 4] = {0, 1, 0}; + # lu_hit[1]: plru_tree[0, 1, 3] = {0, 0, 1}; + # lu_hit[0]: plru_tree[0, 1, 3] = {0, 0, 0}; + # default: begin /* No hit */ end + # endcase + LOG_TLB = int(log2(self.entries)) + print(LOG_TLB) + for i in range(self.entries): + # we got a hit so update the pointer as it was least recently used + hit = Signal(reset_less=True) + m.d.comb += hit.eq(self.lu_hit[i] & self.lu_access_i) + with m.If(hit): + # Set the nodes to the values we would expect + for lvl in range(LOG_TLB): + idx_base = (1< MSB, lvl1 <=> MSB-1, ... + shift = LOG_TLB - lvl; + new_idx = Const(~((i >> (shift-1)) & 1), (1, False)) + plru_idx = idx_base + (i >> shift) + print ("plru", i, lvl, hex(idx_base), + plru_idx, shift, new_idx) + m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx) + + # Decode tree to write enable signals + # Next for-loop basically creates the following logic for e.g. + # an 8 entry TLB (note: pseudo-code obviously): + # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1} + # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0} + # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1} + # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0} + # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1} + # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0} + # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1} + # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0} + # For each entry traverse the tree. If every tree-node matches + # the corresponding bit of the entry's index, this is + # the next entry to replace. + replace = [] + for i in range(self.entries): + en = [] + for lvl in range(LOG_TLB): + idx_base = (1< MSB, lvl1 <=> MSB-1, ... + shift = LOG_TLB - lvl; + new_idx = (i >> (shift-1)) & 1; + plru_idx = idx_base + (i>>shift) + plru = Signal(reset_less=True, + name="plru-%d-%d-%d" % (i, lvl, plru_idx)) + m.d.comb += plru.eq(self.plru_tree[plru_idx]) + # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx; + if new_idx: + en.append(~plru) # yes inverted (using bool()) + else: + en.append(plru) # yes inverted (using bool()) + print ("plru", i, en) + # boolean logic manipulation: + # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2) + replace.append(~Cat(*en).bool()) + m.d.comb += self.replace_en_o.eq(Cat(*replace)) + + return m + + def ports(self): + return [self.entries, self.lu_hit, self.replace_en_o, + self.lu_access_i, self.plru_tree, self.plru_tree_o] \ No newline at end of file diff --git a/src/TLB/ariane/src/ptw.py b/src/TLB/ariane/src/ptw.py new file mode 100644 index 00000000..05ec2d7d --- /dev/null +++ b/src/TLB/ariane/src/ptw.py @@ -0,0 +1,539 @@ +""" +# Copyright 2018 ETH Zurich and University of Bologna. +# Copyright and related rights are licensed under the Solderpad Hardware +# License, Version 0.51 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# or agreed to in writing, software, hardware and materials distributed under +# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Author: David Schaffenrath, TU Graz +# Author: Florian Zaruba, ETH Zurich +# Date: 24.4.2017 +# Description: Hardware-PTW + +/* verilator lint_off WIDTH */ +import ariane_pkg::*; + +see linux kernel source: + +* "arch/riscv/include/asm/page.h" +* "arch/riscv/include/asm/mmu_context.h" +* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET) + +""" + +from nmigen import Const, Signal, Cat, Module +from nmigen.hdl.ast import ArrayProxy +from nmigen.cli import verilog, rtlil +from math import log2 + + +DCACHE_SET_ASSOC = 8 +CONFIG_L1D_SIZE = 32*1024 +DCACHE_INDEX_WIDTH = int(log2(CONFIG_L1D_SIZE / DCACHE_SET_ASSOC)) +DCACHE_TAG_WIDTH = 56 - DCACHE_INDEX_WIDTH + +ASID_WIDTH = 8 + + +class DCacheReqI: + def __init__(self): + self.address_index = Signal(DCACHE_INDEX_WIDTH) + self.address_tag = Signal(DCACHE_TAG_WIDTH) + self.data_wdata = Signal(64) + self.data_req = Signal() + self.data_we = Signal() + self.data_be = Signal(8) + self.data_size = Signal(2) + self.kill_req = Signal() + self.tag_valid = Signal() + + def eq(self, inp): + res = [] + for (o, i) in zip(self.ports(), inp.ports()): + res.append(o.eq(i)) + return res + + def ports(self): + return [self.address_index, self.address_tag, + self.data_wdata, self.data_req, + self.data_we, self.data_be, self.data_size, + self.kill_req, self.tag_valid, + ] + +class DCacheReqO: + def __init__(self): + self.data_gnt = Signal() + self.data_rvalid = Signal() + self.data_rdata = Signal(64) # actually in PTE object format + + def eq(self, inp): + res = [] + for (o, i) in zip(self.ports(), inp.ports()): + res.append(o.eq(i)) + return res + + def ports(self): + return [self.data_gnt, self.data_rvalid, self.data_rdata] + + +class PTE: #(RecordObject): + def __init__(self): + self.v = Signal() + self.r = Signal() + self.w = Signal() + self.x = Signal() + self.u = Signal() + self.g = Signal() + self.a = Signal() + self.d = Signal() + self.rsw = Signal(2) + self.ppn = Signal(44) + self.reserved = Signal(10) + + def flatten(self): + return Cat(*self.ports()) + + def eq(self, x): + if isinstance(x, ArrayProxy): + res = [] + for o in self.ports(): + i = getattr(x, o.name) + res.append(i) + x = Cat(*res) + else: + x = x.flatten() + return self.flatten().eq(x) + + def __iter__(self): + """ order is critical so that flatten creates LSB to MSB + """ + yield self.v + yield self.r + yield self.w + yield self.x + yield self.u + yield self.g + yield self.a + yield self.d + yield self.rsw + yield self.ppn + yield self.reserved + + def ports(self): + return list(self) + + +class TLBUpdate: + def __init__(self, asid_width): + self.valid = Signal() # valid flag + self.is_2M = Signal() + self.is_1G = Signal() + self.vpn = Signal(27) + self.asid = Signal(asid_width) + self.content = PTE() + + def flatten(self): + return Cat(*self.ports()) + + def eq(self, x): + return self.flatten().eq(x.flatten()) + + def ports(self): + return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \ + self.content.ports() + + +# SV39 defines three levels of page tables +LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1 +LVL2 = Const(1, 2) +LVL3 = Const(2, 2) + + +class PTW: + def __init__(self, asid_width=8): + self.asid_width = asid_width + + self.flush_i = Signal() # flush everything, we need to do this because + # actually everything we do is speculative at this stage + # e.g.: there could be a CSR instruction that changes everything + self.ptw_active_o = Signal(reset=1) # active if not IDLE + self.walking_instr_o = Signal() # set when walking for TLB + self.ptw_error_o = Signal() # set when an error occurred + self.enable_translation_i = Signal() # CSRs indicate to enable SV39 + self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st + + self.lsu_is_store_i = Signal() # translation triggered by store + # PTW memory interface + self.req_port_i = DCacheReqO() + self.req_port_o = DCacheReqI() + + # to TLBs, update logic + self.itlb_update_o = TLBUpdate(asid_width) + self.dtlb_update_o = TLBUpdate(asid_width) + + self.update_vaddr_o = Signal(39) + + self.asid_i = Signal(self.asid_width) + # from TLBs + # did we miss? + self.itlb_access_i = Signal() + self.itlb_hit_i = Signal() + self.itlb_vaddr_i = Signal(64) + + self.dtlb_access_i = Signal() + self.dtlb_hit_i = Signal() + self.dtlb_vaddr_i = Signal(64) + # from CSR file + self.satp_ppn_i = Signal(44) # ppn from satp + self.mxr_i = Signal() + # Performance counters + self.itlb_miss_o = Signal() + self.dtlb_miss_o = Signal() + + def ports(self): + return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o, + ] + return [ + self.enable_translation_i, self.en_ld_st_translation_i, + self.lsu_is_store_i, self.req_port_i, self.req_port_o, + self.update_vaddr_o, + self.asid_i, + self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i, + self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i, + self.satp_ppn_i, self.mxr_i, + self.itlb_miss_o, self.dtlb_miss_o + ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports() + + def elaborate(self, platform): + m = Module() + + # input registers + data_rvalid = Signal() + data_rdata = Signal(64) + + # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata + # is spec'd in 64-bit binary-format: better to spec as Record? + pte = PTE() + m.d.comb += pte.flatten().eq(data_rdata) + + # SV39 defines three levels of page tables + ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above) + ptw_lvl1 = Signal() + ptw_lvl2 = Signal() + ptw_lvl3 = Signal() + m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1), + ptw_lvl2.eq(ptw_lvl == LVL2), + ptw_lvl3.eq(ptw_lvl == LVL3)] + + # is this an instruction page table walk? + is_instr_ptw = Signal() + global_mapping = Signal() + # latched tag signal + tag_valid = Signal() + # register the ASID + tlb_update_asid = Signal(self.asid_width) + # register VPN we need to walk, SV39 defines a 39 bit virtual addr + vaddr = Signal(64) + # 4 byte aligned physical pointer + ptw_pptr = Signal(56) + + end = DCACHE_INDEX_WIDTH + DCACHE_TAG_WIDTH + m.d.sync += [ + # Assignments + self.update_vaddr_o.eq(vaddr), + + self.walking_instr_o.eq(is_instr_ptw), + # directly output the correct physical address + self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]), + self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]), + # we are never going to kill this request + self.req_port_o.kill_req.eq(0), # XXX assign comb? + # we are never going to write with the HPTW + self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb? + # ----------- + # TLB Update + # ----------- + self.itlb_update_o.vpn.eq(vaddr[12:39]), + self.dtlb_update_o.vpn.eq(vaddr[12:39]), + # update the correct page table level + self.itlb_update_o.is_2M.eq(ptw_lvl2), + self.itlb_update_o.is_1G.eq(ptw_lvl1), + self.dtlb_update_o.is_2M.eq(ptw_lvl2), + self.dtlb_update_o.is_1G.eq(ptw_lvl1), + # output the correct ASID + self.itlb_update_o.asid.eq(tlb_update_asid), + self.dtlb_update_o.asid.eq(tlb_update_asid), + # set the global mapping bit + self.itlb_update_o.content.eq(pte), + self.itlb_update_o.content.g.eq(global_mapping), + self.dtlb_update_o.content.eq(pte), + self.dtlb_update_o.content.g.eq(global_mapping), + + self.req_port_o.tag_valid.eq(tag_valid), + ] + + #------------------- + # Page table walker + #------------------- + # A virtual address va is translated into a physical address pa as + # follows: + # 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39, + # PAGESIZE=2^12 and LEVELS=3.) + # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. + # (For Sv32, PTESIZE=4.) + # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an + # access exception. + # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to + # step 5. Otherwise, this PTE is a pointer to the next level of + # the page table. + # Let i=i-1. If i < 0, stop and raise an access exception. + # Otherwise, let a = pte.ppn × PAGESIZE and go to step 2. + # 5. A leaf PTE has been found. Determine if the requested memory + # access is allowed by the pte.r, pte.w, and pte.x bits. If not, + # stop and raise an access exception. Otherwise, the translation is + # successful. Set pte.a to 1, and, if the memory access is a + # store, set pte.d to 1. + # The translated physical address is given as follows: + # - pa.pgoff = va.pgoff. + # - If i > 0, then this is a superpage translation and + # pa.ppn[i-1:0] = va.vpn[i-1:0]. + # - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i]. + # 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned + # superpage stop and raise a page-fault exception. + + m.d.sync += tag_valid.eq(0) + + # default assignments + m.d.comb += [ + # PTW memory interface + self.req_port_o.data_req.eq(0), + self.req_port_o.data_be.eq(Const(0xFF, 8)), + self.req_port_o.data_size.eq(Const(0b11, 2)), + self.req_port_o.data_we.eq(0), + self.ptw_error_o.eq(0), + self.itlb_update_o.valid.eq(0), + self.dtlb_update_o.valid.eq(0), + + self.itlb_miss_o.eq(0), + self.dtlb_miss_o.eq(0), + ] + + # ------------ + # State Machine + # ------------ + + with m.FSM() as fsm: + + with m.State("IDLE"): + self.idle(m, is_instr_ptw, ptw_lvl, global_mapping, + ptw_pptr, vaddr, tlb_update_asid) + + with m.State("WAIT_GRANT"): + self.grant(m, tag_valid, data_rvalid) + + with m.State("PTE_LOOKUP"): + # we wait for the valid signal + with m.If(data_rvalid): + self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, + data_rvalid, global_mapping, + is_instr_ptw, ptw_pptr) + + # Propagate error to MMU/LSU + with m.State("PROPAGATE_ERROR"): + m.next = "IDLE" + m.d.comb += self.ptw_error_o.eq(1) + + # wait for the rvalid before going back to IDLE + with m.State("WAIT_RVALID"): + with m.If(data_rvalid): + m.next = "IDLE" + + m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata), + data_rvalid.eq(self.req_port_i.data_rvalid) + ] + + return m + + def set_grant_state(self, m): + # should we have flushed before we got an rvalid, + # wait for it until going back to IDLE + with m.If(self.flush_i): + with m.If (self.req_port_i.data_gnt): + m.next = "WAIT_RVALID" + with m.Else(): + m.next = "IDLE" + with m.Else(): + m.next = "WAIT_GRANT" + + def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping, + ptw_pptr, vaddr, tlb_update_asid): + # by default we start with the top-most page table + m.d.sync += [is_instr_ptw.eq(0), + ptw_lvl.eq(LVL1), + global_mapping.eq(0), + self.ptw_active_o.eq(0), # deactive (IDLE) + ] + # work out itlb/dtlb miss + m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \ + self.itlb_access_i & \ + ~self.itlb_hit_i & \ + ~self.dtlb_access_i) + m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \ + self.dtlb_access_i & \ + ~self.dtlb_hit_i) + # we got an ITLB miss? + with m.If(self.itlb_miss_o): + pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:39], + self.satp_ppn_i) + m.d.sync += [ptw_pptr.eq(pptr), + is_instr_ptw.eq(1), + vaddr.eq(self.itlb_vaddr_i), + tlb_update_asid.eq(self.asid_i), + ] + self.set_grant_state(m) + + # we got a DTLB miss? + with m.Elif(self.dtlb_miss_o): + pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39], + self.satp_ppn_i) + m.d.sync += [ptw_pptr.eq(pptr), + vaddr.eq(self.dtlb_vaddr_i), + tlb_update_asid.eq(self.asid_i), + ] + self.set_grant_state(m) + + def grant(self, m, tag_valid, data_rvalid): + # we've got a data WAIT_GRANT so tell the + # cache that the tag is valid + + # send a request out + m.d.comb += self.req_port_o.data_req.eq(1) + # wait for the WAIT_GRANT + with m.If(self.req_port_i.data_gnt): + # send the tag valid signal one cycle later + m.d.sync += tag_valid.eq(1) + # should we have flushed before we got an rvalid, + # wait for it until going back to IDLE + with m.If(self.flush_i): + with m.If (~data_rvalid): + m.next = "WAIT_RVALID" + with m.Else(): + m.next = "IDLE" + with m.Else(): + m.next = "PTE_LOOKUP" + + def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, + data_rvalid, global_mapping, + is_instr_ptw, ptw_pptr): + # temporaries + pte_rx = Signal(reset_less=True) + pte_exe = Signal(reset_less=True) + pte_inv = Signal(reset_less=True) + pte_a = Signal(reset_less=True) + st_wd = Signal(reset_less=True) + m.d.comb += [pte_rx.eq(pte.r | pte.x), + pte_exe.eq(~pte.x | ~pte.a), + pte_inv.eq(~pte.v | (~pte.r & pte.w)), + pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))), + st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))] + + l1err = Signal(reset_less=True) + l2err = Signal(reset_less=True) + m.d.comb += [l2err.eq((ptw_lvl2) & pte.ppn[0:9] != Const(0, 9)), + l1err.eq((ptw_lvl1) & pte.ppn[0:18] != Const(0, 18)) ] + + # check if the global mapping bit is set + with m.If (pte.g): + m.d.sync += global_mapping.eq(1) + + m.next = "IDLE" + + # ------------- + # Invalid PTE + # ------------- + # If pte.v = 0, or if pte.r = 0 and pte.w = 1, + # stop and raise a page-fault exception. + with m.If (pte_inv): + m.next = "PROPAGATE_ERROR" + + # ----------- + # Valid PTE + # ----------- + + # it is a valid PTE + # if pte.r = 1 or pte.x = 1 it is a valid PTE + with m.Elif (pte_rx): + # Valid translation found (either 1G, 2M or 4K) + with m.If(is_instr_ptw): + # ------------ + # Update ITLB + # ------------ + # If page not executable, we can directly raise error. + # This doesn't put a useless entry into the TLB. + # The same idea applies to the access flag since we let + # the access flag be managed by SW. + with m.If (pte_exe): + m.next = "IDLE" + with m.Else(): + m.d.comb += self.itlb_update_o.valid.eq(1) + + with m.Else(): + # ------------ + # Update DTLB + # ------------ + # Check if the access flag has been set, otherwise + # throw page-fault and let software handle those bits. + # If page not readable (there are no write-only pages) + # directly raise an error. This doesn't put a useless + # entry into the TLB. + with m.If(pte_a): + m.d.comb += self.dtlb_update_o.valid.eq(1) + with m.Else(): + m.next = "PROPAGATE_ERROR" + # Request is a store: perform additional checks + # If the request was a store and the page not + # write-able, raise an error + # the same applies if the dirty flag is not set + with m.If (st_wd): + m.d.comb += self.dtlb_update_o.valid.eq(0) + m.next = "PROPAGATE_ERROR" + + # check if the ppn is correctly aligned: Case (6) + with m.If(l1err | l2err): + m.next = "PROPAGATE_ERROR" + m.d.comb += [self.dtlb_update_o.valid.eq(0), + self.itlb_update_o.valid.eq(0)] + + # this is a pointer to the next TLB level + with m.Else(): + # pointer to next level of page table + with m.If (ptw_lvl1): + # we are in the second level now + pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn) + m.d.sync += [ptw_pptr.eq(pptr), + ptw_lvl.eq(LVL2) + ] + with m.If(ptw_lvl2): + # here we received a pointer to the third level + pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn) + m.d.sync += [ptw_pptr.eq(pptr), + ptw_lvl.eq(LVL3) + ] + self.set_grant_state(m) + + with m.If (ptw_lvl3): + # Should already be the last level + # page table => Error + m.d.sync += ptw_lvl.eq(LVL3) + m.next = "PROPAGATE_ERROR" + + +if __name__ == '__main__': + ptw = PTW() + vl = rtlil.convert(ptw, ports=ptw.ports()) + with open("test_ptw.il", "w") as f: + f.write(vl) diff --git a/src/TLB/ariane/src/tlb.py b/src/TLB/ariane/src/tlb.py new file mode 100644 index 00000000..f768571e --- /dev/null +++ b/src/TLB/ariane/src/tlb.py @@ -0,0 +1,170 @@ +""" +# Copyright 2018 ETH Zurich and University of Bologna. +# Copyright and related rights are licensed under the Solderpad Hardware +# License, Version 0.51 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License at +# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law +# or agreed to in writing, software, hardware and materials distributed under +# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Author: David Schaffenrath, TU Graz +# Author: Florian Zaruba, ETH Zurich +# Date: 21.4.2017 +# Description: Translation Lookaside Buffer, SV39 +# fully set-associative + +Implementation in c++: +https://raw.githubusercontent.com/Tony-Hu/TreePLRU/master/TreePLRU.cpp + +Text description: +https://people.cs.clemson.edu/~mark/464/p_lru.txt + +Online simulator: +http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/vm.html +""" +from math import log2 +from nmigen import Signal, Module, Cat, Const, Array +from nmigen.cli import verilog, rtlil +from nmigen.lib.coding import Encoder + +from ptw import TLBUpdate, PTE, ASID_WIDTH +from plru import PLRU +from tlb_content import TLBContent + +TLB_ENTRIES = 8 + +class TLB: + def __init__(self, tlb_entries=8, asid_width=8): + self.tlb_entries = tlb_entries + self.asid_width = asid_width + + self.flush_i = Signal() # Flush signal + # Lookup signals + self.lu_access_i = Signal() + self.lu_asid_i = Signal(self.asid_width) + self.lu_vaddr_i = Signal(64) + self.lu_content_o = PTE() + self.lu_is_2M_o = Signal() + self.lu_is_1G_o = Signal() + self.lu_hit_o = Signal() + # Update TLB + self.pte_width = len(self.lu_content_o.flatten()) + self.update_i = TLBUpdate(asid_width) + + def elaborate(self, platform): + m = Module() + + vpn2 = Signal(9) + vpn1 = Signal(9) + vpn0 = Signal(9) + + #------------- + # Translation + #------------- + + # SV39 defines three levels of page tables + m.d.comb += [ vpn0.eq(self.lu_vaddr_i[12:21]), + vpn1.eq(self.lu_vaddr_i[21:30]), + vpn2.eq(self.lu_vaddr_i[30:39]), + ] + + tc = [] + for i in range(self.tlb_entries): + tlc = TLBContent(self.pte_width, self.asid_width) + setattr(m.submodules, "tc%d" % i, tlc) + tc.append(tlc) + # connect inputs + tlc.update_i = self.update_i # saves a lot of graphviz links + m.d.comb += [tlc.vpn0.eq(vpn0), + tlc.vpn1.eq(vpn1), + tlc.vpn2.eq(vpn2), + tlc.flush_i.eq(self.flush_i), + #tlc.update_i.eq(self.update_i), + tlc.lu_asid_i.eq(self.lu_asid_i)] + tc = Array(tc) + + #-------------- + # Select hit + #-------------- + + # use Encoder to select hit index + # XXX TODO: assert that there's only one valid entry (one lu_hit) + hitsel = Encoder(self.tlb_entries) + m.submodules.hitsel = hitsel + + hits = [] + for i in range(self.tlb_entries): + hits.append(tc[i].lu_hit_o) + m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well) + idx = hitsel.o + + active = Signal(reset_less=True) + m.d.comb += active.eq(~hitsel.n) + with m.If(active): + # active hit, send selected as output + m.d.comb += [ self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o), + self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o), + self.lu_hit_o.eq(1), + self.lu_content_o.flatten().eq(tc[idx].lu_content_o), + ] + + #-------------- + # PLRU. + #-------------- + + p = PLRU(self.tlb_entries) + plru_tree = Signal(p.TLBSZ) + m.submodules.plru = p + + # connect PLRU inputs/outputs + # XXX TODO: assert that there's only one valid entry (one replace_en) + en = [] + for i in range(self.tlb_entries): + en.append(tc[i].replace_en_i) + m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags + p.lu_hit.eq(hitsel.i), + p.lu_access_i.eq(self.lu_access_i), + p.plru_tree.eq(plru_tree)] + m.d.sync += plru_tree.eq(p.plru_tree_o) + + #-------------- + # Sanity checks + #-------------- + + assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \ + "TLB size must be a multiple of 2 and greater than 1" + assert (self.asid_width >= 1), \ + "ASID width must be at least 1" + + return m + + """ + # Just for checking + function int countSetBits(logic[self.tlb_entries-1:0] vector); + automatic int count = 0; + foreach (vector[idx]) begin + count += vector[idx]; + end + return count; + endfunction + + assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1)) + else $error("More then one hit in TLB!"); $stop(); end + assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1)) + else $error("More then one TLB entry selected for next replace!"); + """ + + def ports(self): + return [self.flush_i, self.lu_access_i, + self.lu_asid_i, self.lu_vaddr_i, + self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o, + ] + self.lu_content_o.ports() + self.update_i.ports() + +if __name__ == '__main__': + tlb = TLB() + vl = rtlil.convert(tlb, ports=tlb.ports()) + with open("test_tlb.il", "w") as f: + f.write(vl) + diff --git a/src/TLB/ariane/src/tlb_content.py b/src/TLB/ariane/src/tlb_content.py new file mode 100644 index 00000000..024c5697 --- /dev/null +++ b/src/TLB/ariane/src/tlb_content.py @@ -0,0 +1,125 @@ +from nmigen import Signal, Module, Cat, Const + +from ptw import TLBUpdate, PTE + +class TLBEntry: + def __init__(self, asid_width): + self.asid = Signal(asid_width) + # SV39 defines three levels of page tables + self.vpn0 = Signal(9) + self.vpn1 = Signal(9) + self.vpn2 = Signal(9) + self.is_2M = Signal() + self.is_1G = Signal() + self.valid = Signal() + + def flatten(self): + return Cat(*self.ports()) + + def eq(self, x): + return self.flatten().eq(x.flatten()) + + def ports(self): + return [self.asid, self.vpn0, self.vpn1, self.vpn2, + self.is_2M, self.is_1G, self.valid] + +class TLBContent: + def __init__(self, pte_width, asid_width): + self.asid_width = asid_width + self.pte_width = pte_width + self.flush_i = Signal() # Flush signal + # Update TLB + self.update_i = TLBUpdate(asid_width) + self.vpn2 = Signal(9) + self.vpn1 = Signal(9) + self.vpn0 = Signal(9) + self.replace_en_i = Signal() # replace the following entry, + # set by replacement strategy + # Lookup signals + self.lu_asid_i = Signal(asid_width) + self.lu_content_o = Signal(pte_width) + self.lu_is_2M_o = Signal() + self.lu_is_1G_o = Signal() + self.lu_hit_o = Signal() + + def elaborate(self, platform): + m = Module() + + tags = TLBEntry(self.asid_width) + content = Signal(self.pte_width) + + m.d.comb += [self.lu_hit_o.eq(0), + self.lu_is_2M_o.eq(0), + self.lu_is_1G_o.eq(0)] + + # temporaries for 1st level match + asid_ok = Signal(reset_less=True) + vpn2_ok = Signal(reset_less=True) + tags_ok = Signal(reset_less=True) + vpn2_hit = Signal(reset_less=True) + m.d.comb += [tags_ok.eq(tags.valid), + asid_ok.eq(tags.asid == self.lu_asid_i), + vpn2_ok.eq(tags.vpn2 == self.vpn2), + vpn2_hit.eq(tags_ok & asid_ok & vpn2_ok)] + # temporaries for 2nd level match + vpn1_ok = Signal(reset_less=True) + tags_2M = Signal(reset_less=True) + vpn0_ok = Signal(reset_less=True) + vpn0_or_2M = Signal(reset_less=True) + m.d.comb += [vpn1_ok.eq(self.vpn1 == tags.vpn1), + tags_2M.eq(tags.is_2M), + vpn0_ok.eq(self.vpn0 == tags.vpn0), + vpn0_or_2M.eq(tags_2M | vpn0_ok)] + # first level match, this may be a giga page, + # check the ASID flags as well + with m.If(vpn2_hit): + # second level + with m.If (tags.is_1G): + m.d.comb += [ self.lu_content_o.eq(content), + self.lu_is_1G_o.eq(1), + self.lu_hit_o.eq(1), + ] + # not a giga page hit so check further + with m.Elif(vpn1_ok): + # this could be a 2 mega page hit or a 4 kB hit + # output accordingly + with m.If(vpn0_or_2M): + m.d.comb += [ self.lu_content_o.eq(content), + self.lu_is_2M_o.eq(tags.is_2M), + self.lu_hit_o.eq(1), + ] + # ------------------ + # Update or Flush + # ------------------ + + # temporaries + replace_valid = Signal(reset_less=True) + m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i) + + # flush + with m.If (self.flush_i): + # invalidate (flush) conditions: all if zero or just this ASID + with m.If (self.lu_asid_i == Const(0, self.asid_width) | + (self.lu_asid_i == tags.asid)): + m.d.sync += tags.valid.eq(0) + + # normal replacement + with m.Elif(replace_valid): + m.d.sync += [ # update tag array + tags.asid.eq(self.update_i.asid), + tags.vpn2.eq(self.update_i.vpn[18:27]), + tags.vpn1.eq(self.update_i.vpn[9:18]), + tags.vpn0.eq(self.update_i.vpn[0:9]), + tags.is_1G.eq(self.update_i.is_1G), + tags.is_2M.eq(self.update_i.is_2M), + tags.valid.eq(1), + # and content as well + content.eq(self.update_i.content.flatten()) + ] + return m + + def ports(self): + return [self.flush_i, + self.lu_asid_i, + self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o, + ] + self.update_i.content.ports() + self.update_i.ports() diff --git a/src/TLB/ariane/test/test_plru.py b/src/TLB/ariane/test/test_plru.py new file mode 100644 index 00000000..9b040e1d --- /dev/null +++ b/src/TLB/ariane/test/test_plru.py @@ -0,0 +1,15 @@ +import sys +sys.path.append("../src") +sys.path.append("../../../TestUtil") + +from plru import PLRU + +from nmigen.compat.sim import run_simulation + +def testbench(dut): + yield + +if __name__ == "__main__": + dut = PLRU(4) + run_simulation(dut, testbench(dut), vcd_name="test_plru.vcd") + print("PLRU Unit Test Success") \ No newline at end of file diff --git a/src/TLB/ariane/test/test_ptw.py b/src/TLB/ariane/test/test_ptw.py new file mode 100644 index 00000000..e9c5324c --- /dev/null +++ b/src/TLB/ariane/test/test_ptw.py @@ -0,0 +1,127 @@ +import sys +sys.path.append("../src") +sys.path.append("../../../TestUtil") + +from nmigen.compat.sim import run_simulation + +from ptw import PTW, PTE + + +def testbench(dut): + + addr = 0x8000000 + + #pte = PTE() + #yield pte.v.eq(1) + #yield pte.r.eq(1) + + yield dut.req_port_i.data_gnt.eq(1) + yield dut.req_port_i.data_rvalid.eq(1) + yield dut.req_port_i.data_rdata.eq(0x43)#pte.flatten()) + + # data lookup + yield dut.en_ld_st_translation_i.eq(1) + yield dut.asid_i.eq(1) + + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(0x400000000) + + yield + yield + yield + + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(0x200000) + + yield + yield + yield + + yield dut.req_port_i.data_gnt.eq(0) + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(0x400000011) + + yield + yield dut.req_port_i.data_gnt.eq(1) + yield + yield + + # data lookup, PTW levels 1-2-3 + addr = 0x4000000 + yield dut.dtlb_vaddr_i.eq(addr) + yield dut.mxr_i.eq(0x1) + yield dut.req_port_i.data_gnt.eq(1) + yield dut.req_port_i.data_rvalid.eq(1) + yield dut.req_port_i.data_rdata.eq(0x41 | (addr>>12)<<10)#pte.flatten()) + + yield dut.en_ld_st_translation_i.eq(1) + yield dut.asid_i.eq(1) + + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(addr) + + yield + yield + yield + yield + yield + yield + yield + yield + + yield dut.req_port_i.data_gnt.eq(0) + yield dut.dtlb_access_i.eq(1) + yield dut.dtlb_hit_i.eq(0) + yield dut.dtlb_vaddr_i.eq(0x400000011) + + yield + yield dut.req_port_i.data_gnt.eq(1) + yield + yield + yield + yield + + + # instruction lookup + yield dut.en_ld_st_translation_i.eq(0) + yield dut.enable_translation_i.eq(1) + yield dut.asid_i.eq(1) + + yield dut.itlb_access_i.eq(1) + yield dut.itlb_hit_i.eq(0) + yield dut.itlb_vaddr_i.eq(0x800000) + + yield + yield + yield + + yield dut.itlb_access_i.eq(1) + yield dut.itlb_hit_i.eq(0) + yield dut.itlb_vaddr_i.eq(0x200000) + + yield + yield + yield + + yield dut.req_port_i.data_gnt.eq(0) + yield dut.itlb_access_i.eq(1) + yield dut.itlb_hit_i.eq(0) + yield dut.itlb_vaddr_i.eq(0x800011) + + yield + yield dut.req_port_i.data_gnt.eq(1) + yield + yield + + yield + + + +if __name__ == "__main__": + dut = PTW() + run_simulation(dut, testbench(dut), vcd_name="test_ptw.vcd") + print("PTW Unit Test Success") diff --git a/src/TLB/ariane/test/test_tlb.py b/src/TLB/ariane/test/test_tlb.py new file mode 100644 index 00000000..aab1d43c --- /dev/null +++ b/src/TLB/ariane/test/test_tlb.py @@ -0,0 +1,69 @@ +import sys +sys.path.append("../src") +sys.path.append("../../../TestUtil") + +from nmigen.compat.sim import run_simulation + +from tlb import TLB + +def set_vaddr(addr): + yield dut.lu_vaddr_i.eq(addr) + yield dut.update_i.vpn.eq(addr>>12) + + +def testbench(dut): + yield dut.lu_access_i.eq(1) + yield dut.lu_asid_i.eq(1) + yield dut.update_i.valid.eq(1) + yield dut.update_i.is_1G.eq(0) + yield dut.update_i.is_2M.eq(0) + yield dut.update_i.asid.eq(1) + yield dut.update_i.content.ppn.eq(0) + yield dut.update_i.content.rsw.eq(0) + yield dut.update_i.content.r.eq(1) + + yield + + addr = 0x80000 + yield from set_vaddr(addr) + yield + + addr = 0x90001 + yield from set_vaddr(addr) + yield + + addr = 0x28000000 + yield from set_vaddr(addr) + yield + + addr = 0x28000001 + yield from set_vaddr(addr) + + addr = 0x28000001 + yield from set_vaddr(addr) + yield + + addr = 0x1000040000 + yield from set_vaddr(addr) + yield + + addr = 0x1000040001 + yield from set_vaddr(addr) + yield + + yield dut.update_i.is_1G.eq(1) + addr = 0x2040000 + yield from set_vaddr(addr) + yield + + yield dut.update_i.is_1G.eq(1) + addr = 0x2040001 + yield from set_vaddr(addr) + yield + + yield + + +if __name__ == "__main__": + dut = TLB() + run_simulation(dut, testbench(dut), vcd_name="test_tlb.vcd") diff --git a/src/TLB/src/AddressEncoder.py b/src/TLB/src/AddressEncoder.py deleted file mode 100644 index 4c4b8d76..00000000 --- a/src/TLB/src/AddressEncoder.py +++ /dev/null @@ -1,75 +0,0 @@ -from nmigen import Module, Signal -from nmigen.lib.coding import Encoder, PriorityEncoder - -class AddressEncoder(): - """Address Encoder - - The purpose of this module is to take in a vector and - encode the bits that are one hot into an address. This module - combines both nmigen's Encoder and PriorityEncoder and will state - whether the input line has a single bit hot, multiple bits hot, - or no bits hot. The output line will always have the lowest value - address output. - - Usage: - The output is valid when either single or multiple match is high. - Otherwise output is 0. - """ - def __init__(self, width): - """ Arguments: - * width: The desired length of the input vector - """ - # Internal - self.encoder = Encoder(width) - self.p_encoder = PriorityEncoder(width) - - # Input - self.i = Signal(width) - - # Output - self.single_match = Signal(1) - self.multiple_match = Signal(1) - self.o = Signal(max=width) - - def elaborate(self, platform=None): - m = Module() - - # Add internal submodules - m.submodules.encoder = self.encoder - m.submodules.p_encoder = self.p_encoder - - m.d.comb += [ - self.encoder.i.eq(self.i), - self.p_encoder.i.eq(self.i) - ] - - # Steps: - # 1. check if the input vector is non-zero - # 2. if non-zero, check if single match or multiple match - # 3. set output line to be lowest value address output - - # If the priority encoder recieves an input of 0 - # If n is 1 then the output is not valid - with m.If(self.p_encoder.n): - m.d.comb += [ - self.single_match.eq(0), - self.multiple_match.eq(0), - self.o.eq(0) - ] - # If the priority encoder recieves an input > 0 - with m.Else(): - # Multiple Match if encoder n is invalid - with m.If(self.encoder.n): - m.d.comb += [ - self.single_match.eq(0), - self.multiple_match.eq(1) - ] - # Single Match if encoder n is valid - with m.Else(): - m.d.comb += [ - self.single_match.eq(1), - self.multiple_match.eq(0) - ] - # Always set output based on priority encoder output - m.d.comb += self.o.eq(self.p_encoder.o) - return m diff --git a/src/TLB/src/Cam.py b/src/TLB/src/Cam.py deleted file mode 100644 index 3c499211..00000000 --- a/src/TLB/src/Cam.py +++ /dev/null @@ -1,124 +0,0 @@ -from nmigen import Array, Cat, Module, Signal -from nmigen.lib.coding import Decoder -from nmigen.cli import main #, verilog - -from CamEntry import CamEntry -from AddressEncoder import AddressEncoder - -class Cam(): - """ Content Addressable Memory (CAM) - - The purpose of this module is to quickly look up whether an - entry exists given a data key. - This module will search for the given data in all internal entries - and output whether a single or multiple match was found. - If an single entry is found the address be returned and single_match - is set HIGH. If multiple entries are found the lowest address is - returned and multiple_match is set HIGH. If neither single_match or - multiple_match are HIGH this implies no match was found. To write - to the CAM set the address bus to the desired entry and set write_enable - HIGH. Entry managment should be performed one level above this block - as lookup is performed within. - - Notes: - The read and write operations take one clock cycle to complete. - Currently the read_warning line is present for interfacing but - is not necessary for this design. This module is capable of writing - in the first cycle, reading on the second, and output the correct - address on the third. - """ - - def __init__(self, data_size, cam_size): - """ Arguments: - * data_size: (bits) The bit size of the data - * cam_size: (number) The number of entries in the CAM - """ - - # Internal - self.cam_size = cam_size - self.encoder = AddressEncoder(cam_size) - self.decoder = Decoder(cam_size) - self.entry_array = Array(CamEntry(data_size) for x in range(cam_size)) - - # Input - self.enable = Signal(1) - self.write_enable = Signal(1) - self.data_in = Signal(data_size) # The data to be written - self.data_mask = Signal(data_size) # mask for ternary writes - self.address_in = Signal(max=cam_size) # address of CAM Entry to write - - # Output - self.read_warning = Signal(1) # High when a read interrupts a write - self.single_match = Signal(1) # High when there is only one match - self.multiple_match = Signal(1) # High when there at least two matches - self.match_address = Signal(max=cam_size) # The lowest address matched - - def elaborate(self, platform=None): - m = Module() - # AddressEncoder for match types and output address - m.submodules.AddressEncoder = self.encoder - # Decoder is used to select which entry will be written to - m.submodules.Decoder = self.decoder - # CamEntry Array Submodules - # Note these area added anonymously - entry_array = self.entry_array - m.submodules += entry_array - - # Decoder logic - m.d.comb += [ - self.decoder.i.eq(self.address_in), - self.decoder.n.eq(0) - ] - - encoder_vector = [] - with m.If(self.enable): - # Set the key value for every CamEntry - for index in range(self.cam_size): - - # Write Operation - with m.If(self.write_enable): - with m.If(self.decoder.o[index]): - m.d.comb += entry_array[index].command.eq(2) - with m.Else(): - m.d.comb += entry_array[index].command.eq(0) - - # Read Operation - with m.Else(): - m.d.comb += entry_array[index].command.eq(1) - - # Send data input to all entries - m.d.comb += entry_array[index].data_in.eq(self.data_in) - # Send all entry matches to encoder - ematch = entry_array[index].match - encoder_vector.append(ematch) - - # Give input to and accept output from encoder module - m.d.comb += [ - self.encoder.i.eq(Cat(*encoder_vector)), - self.single_match.eq(self.encoder.single_match), - self.multiple_match.eq(self.encoder.multiple_match), - self.match_address.eq(self.encoder.o) - ] - - # If the CAM is not enabled set all outputs to 0 - with m.Else(): - m.d.comb += [ - self.read_warning.eq(0), - self.single_match.eq(0), - self.multiple_match.eq(0), - self.match_address.eq(0) - ] - - return m - - def ports(self): - return [self.enable, self.write_enable, - self.data_in, self.data_mask, - self.read_warning, self.single_match, - self.multiple_match, self.match_address] - - -if __name__ == '__main__': - cam = Cam(4, 4) - main(cam, ports=cam.ports()) - diff --git a/src/TLB/src/CamEntry.py b/src/TLB/src/CamEntry.py deleted file mode 100644 index 73081ce5..00000000 --- a/src/TLB/src/CamEntry.py +++ /dev/null @@ -1,45 +0,0 @@ -from nmigen import Module, Signal - -class CamEntry: - """ Content Addressable Memory (CAM) Entry - - The purpose of this module is to represent an entry within a CAM. - This module when given a read command will compare the given data - and output whether a match was found or not. When given a write - command it will write the given data into internal registers. - """ - - def __init__(self, data_size): - """ Arguments: - * data_size: (bit count) The size of the data - """ - # Input - self.command = Signal(2) # 00 => NA 01 => Read 10 => Write 11 => Reset - self.data_in = Signal(data_size) # Data input when writing - - # Output - self.match = Signal(1) # Result of the internal/input key comparison - self.data = Signal(data_size) - - def elaborate(self, platform=None): - m = Module() - with m.Switch(self.command): - with m.Case("00"): - m.d.sync += self.match.eq(0) - with m.Case("01"): - with m.If(self.data == self.data_in): - m.d.sync += self.match.eq(1) - with m.Else(): - m.d.sync += self.match.eq(0) - with m.Case("10"): - m.d.sync += [ - self.data.eq(self.data_in), - self.match.eq(0) - ] - with m.Case(): - m.d.sync += [ - self.match.eq(0), - self.data.eq(0) - ] - - return m diff --git a/src/TLB/src/LFSR.py b/src/TLB/src/LFSR.py deleted file mode 100644 index d8b606ec..00000000 --- a/src/TLB/src/LFSR.py +++ /dev/null @@ -1,109 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -# See Notices.txt for copyright information -from nmigen import Signal, Module, Const, Cat, Elaboratable -from nmigen.cli import verilog, rtlil - - -class LFSRPolynomial(set): - """ implements a polynomial for use in LFSR - """ - def __init__(self, exponents=()): - for e in exponents: - assert isinstance(e, int), TypeError("%s must be an int" % repr(e)) - assert (e >= 0), ValueError("%d must not be negative" % e) - set.__init__(self, set(exponents).union({0})) # must contain zero - - @property - def max_exponent(self): - return max(self) # derived from set, so this returns the max exponent - - @property - def exponents(self): - exponents = list(self) # get elements of set as a list - exponents.sort(reverse=True) - return exponents - - def __str__(self): - expd = {0: "1", 1: 'x', 2: "x^{}"} # case 2 isn't 2, it's min(i,2) - retval = map(lambda i: expd[min(i,2)].format(i), self.exponents) - return " + ".join(retval) - - def __repr__(self): - return "LFSRPolynomial(%s)" % self.exponents - - -# list of selected polynomials from https://web.archive.org/web/20190418121923/https://en.wikipedia.org/wiki/Linear-feedback_shift_register#Some_polynomials_for_maximal_LFSRs # noqa -LFSR_POLY_2 = LFSRPolynomial([2, 1, 0]) -LFSR_POLY_3 = LFSRPolynomial([3, 2, 0]) -LFSR_POLY_4 = LFSRPolynomial([4, 3, 0]) -LFSR_POLY_5 = LFSRPolynomial([5, 3, 0]) -LFSR_POLY_6 = LFSRPolynomial([6, 5, 0]) -LFSR_POLY_7 = LFSRPolynomial([7, 6, 0]) -LFSR_POLY_8 = LFSRPolynomial([8, 6, 5, 4, 0]) -LFSR_POLY_9 = LFSRPolynomial([9, 5, 0]) -LFSR_POLY_10 = LFSRPolynomial([10, 7, 0]) -LFSR_POLY_11 = LFSRPolynomial([11, 9, 0]) -LFSR_POLY_12 = LFSRPolynomial([12, 11, 10, 4, 0]) -LFSR_POLY_13 = LFSRPolynomial([13, 12, 11, 8, 0]) -LFSR_POLY_14 = LFSRPolynomial([14, 13, 12, 2, 0]) -LFSR_POLY_15 = LFSRPolynomial([15, 14, 0]) -LFSR_POLY_16 = LFSRPolynomial([16, 15, 13, 4, 0]) -LFSR_POLY_17 = LFSRPolynomial([17, 14, 0]) -LFSR_POLY_18 = LFSRPolynomial([18, 11, 0]) -LFSR_POLY_19 = LFSRPolynomial([19, 18, 17, 14, 0]) -LFSR_POLY_20 = LFSRPolynomial([20, 17, 0]) -LFSR_POLY_21 = LFSRPolynomial([21, 19, 0]) -LFSR_POLY_22 = LFSRPolynomial([22, 21, 0]) -LFSR_POLY_23 = LFSRPolynomial([23, 18, 0]) -LFSR_POLY_24 = LFSRPolynomial([24, 23, 22, 17, 0]) - - -class LFSR(LFSRPolynomial, Elaboratable): - """ implements a Linear Feedback Shift Register - """ - def __init__(self, polynomial): - """ Inputs: - ------ - :polynomial: the polynomial to feedback on. may be a LFSRPolynomial - instance or an iterable of ints (list/tuple/generator) - :enable: enable (set LO to disable. NOTE: defaults to HI) - - Outputs: - ------- - :state: the LFSR state. bitwidth is taken from the polynomial - maximum exponent. - - Note: if an LFSRPolynomial is passed in as the input, because - LFSRPolynomial is derived from set() it's ok: - LFSRPolynomial(LFSRPolynomial(p)) == LFSRPolynomial(p) - """ - LFSRPolynomial.__init__(self, polynomial) - self.state = Signal(self.max_exponent, reset=1) - self.enable = Signal(reset=1) - - def elaborate(self, platform): - m = Module() - # do absolutely nothing if the polynomial is empty (always has a zero) - if self.max_exponent <= 1: - return m - - # create XOR-bunch, select bits from state based on exponent - feedback = Const(0) # doesn't do any harm starting from 0b0 (xor chain) - for exponent in self: - if exponent > 0: # don't have to skip, saves CPU cycles though - feedback ^= self.state[exponent - 1] - - # if enabled, shift-and-feedback - with m.If(self.enable): - # shift up lower bits by Cat'ing in a new bit zero (feedback) - newstate = Cat(feedback, self.state[:-1]) - m.d.sync += self.state.eq(newstate) - - return m - - -# example: Poly24 -if __name__ == '__main__': - p24 = rtlil.convert(LFSR(LFSR_POLY_24)) - with open("lfsr2_p24.il", "w") as f: - f.write(p24) diff --git a/src/TLB/src/LFSR.pyi b/src/TLB/src/LFSR.pyi deleted file mode 100644 index 64eb9115..00000000 --- a/src/TLB/src/LFSR.pyi +++ /dev/null @@ -1,23 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1-or-later -# See Notices.txt for copyright information -from nmigen import Module -from typing import Iterable, Optional, Iterator, Any, Union -from typing_extensions import final - - -@final -class LFSRPolynomial(set): - def __init__(self, exponents: Iterable[int] = ()): - def elements() -> Iterable[int]: ... - @property - def exponents(self) -> list[int]: ... - def __str__(self) -> str: ... - def __repr__(self) -> str: ... - - -@final -class LFSR: - def __init__(self, polynomial: Union[Iterable[int], LFSRPolynomial]): ... - @property - def width(self) -> int: ... - def elaborate(self, platform: Any) -> Module: ... diff --git a/src/TLB/src/Makefile b/src/TLB/src/Makefile deleted file mode 100644 index 1eb67acc..00000000 --- a/src/TLB/src/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -verilog: - python3 Cam.py generate -t v > Cam.v diff --git a/src/TLB/src/MemorySet.py b/src/TLB/src/MemorySet.py deleted file mode 100644 index ea61bdf5..00000000 --- a/src/TLB/src/MemorySet.py +++ /dev/null @@ -1,66 +0,0 @@ -from nmigen import Cat, Memory, Module, Signal, Elaboratable -from nmigen.cli import main -from nmigen.cli import verilog, rtlil - - -class MemorySet(Elaboratable): - def __init__(self, data_size, tag_size, set_count, active): - self.active = active - input_size = tag_size + data_size # Size of the input data - memory_width = input_size + 1 # The width of the cache memory - self.active = active - self.data_size = data_size - self.tag_size = tag_size - - # XXX TODO, use rd-enable and wr-enable? - self.mem = Memory(memory_width, set_count) - self.r = self.mem.read_port() - self.w = self.mem.write_port() - - # inputs (address) - self.cset = Signal(max=set_count) # The set to be checked - self.tag = Signal(tag_size) # The tag to find - self.data_i = Signal(data_size) # Incoming data - - # outputs - self.valid = Signal() - self.data_o = Signal(data_size) # Outgoing data (excludes tag) - - def elaborate(self, platform): - m = Module() - m.submodules.mem = self.mem - m.submodules.r = self.r - m.submodules.w = self.w - - # temporaries - active_bit = Signal() - tag_valid = Signal() - data_start = self.active + 1 - data_end = data_start + self.data_size - tag_start = data_end - tag_end = tag_start + self.tag_size - - # connect the read port address to the set/entry - read_port = self.r - m.d.comb += read_port.addr.eq(self.cset) - # Pull out active bit from data - data = read_port.data - m.d.comb += active_bit.eq(data[self.active]) - # Validate given tag vs stored tag - tag = data[tag_start:tag_end] - m.d.comb += tag_valid.eq(self.tag == tag) - # An entry is only valid if the tags match AND - # is marked as a valid entry - m.d.comb += self.valid.eq(tag_valid & active_bit) - - # output data: TODO, check rd-enable? - m.d.comb += self.data_o.eq(data[data_start:data_end]) - - # connect the write port addr to the set/entry (only if write enabled) - # (which is only done on a match, see SAC.write_entry below) - write_port = self.w - with m.If(write_port.en): - m.d.comb += write_port.addr.eq(self.cset) - m.d.comb += write_port.data.eq(Cat(1, self.data_i, self.tag)) - - return m diff --git a/src/TLB/src/PermissionValidator.py b/src/TLB/src/PermissionValidator.py deleted file mode 100644 index 14f01e42..00000000 --- a/src/TLB/src/PermissionValidator.py +++ /dev/null @@ -1,67 +0,0 @@ -from nmigen import Module, Signal -from nmigen.cli import main - -from PteEntry import PteEntry - -class PermissionValidator(): - """ The purpose of this Module is to check the Permissions of a given PTE - against the requested access permissions. - - This module will either validate (by setting the valid bit HIGH) - the request or find a permission fault and invalidate (by setting - the valid bit LOW) the request - """ - - def __init__(self, asid_size, pte_size): - """ Arguments: - * asid_size: (bit count) The size of the asid to be processed - * pte_size: (bit count) The size of the pte to be processed - - Return: - * valid HIGH when permissions are correct - """ - # Internal - self.pte_entry = PteEntry(asid_size, pte_size) - - # Input - self.data = Signal(asid_size + pte_size); - self.xwr = Signal(3) # Execute, Write, Read - self.super_mode = Signal(1) # Supervisor Mode - self.super_access = Signal(1) # Supervisor Access - self.asid = Signal(15) # Address Space IDentifier (ASID) - - # Output - self.valid = Signal(1) # Denotes if the permissions are correct - - def elaborate(self, platform=None): - m = Module() - - m.submodules.pte_entry = self.pte_entry - - m.d.comb += self.pte_entry.i.eq(self.data) - - # Check if the entry is valid - with m.If(self.pte_entry.v): - # ASID match or Global Permission - # Note that the MSB bound is exclusive - with m.If((self.pte_entry.asid == self.asid) | self.pte_entry.g): - # Check Execute, Write, Read (XWR) Permissions - with m.If(self.pte_entry.xwr == self.xwr): - # Supervisor Logic - with m.If(self.super_mode): - # Valid if entry is not in user mode or supervisor - # has Supervisor User Memory (SUM) access via the - # SUM bit in the sstatus register - m.d.comb += self.valid.eq((~self.pte_entry.u) \ - | self.super_access) - # User logic - with m.Else(): - # Valid if the entry is in user mode only - m.d.comb += self.valid.eq(self.pte_entry.u) - with m.Else(): - m.d.comb += self.valid.eq(0) - with m.Else(): - m.d.comb += self.valid.eq(0) - with m.Else(): - m.d.comb += self.valid.eq(0) - return m \ No newline at end of file diff --git a/src/TLB/src/PteEntry.py b/src/TLB/src/PteEntry.py deleted file mode 100644 index c0705457..00000000 --- a/src/TLB/src/PteEntry.py +++ /dev/null @@ -1,66 +0,0 @@ -from nmigen import Module, Signal -from nmigen.cli import main - -class PteEntry(): - """ The purpose of this Module is to centralize the parsing of Page - Table Entries (PTE) into one module to prevent common mistakes - and duplication of code. The control bits are parsed out for - ease of use. - - This module parses according to the standard PTE given by the - Volume II: RISC-V Privileged Architectures V1.10 Pg 60. - The Address Space IDentifier (ASID) is appended to the MSB of the input - and is parsed out as such. - - An valid input Signal would be: - ASID PTE - Bits:[78-64][63-0] - - The output PTE value will include the control bits. - """ - def __init__(self, asid_size, pte_size): - """ Arguments: - * asid_size: (bit count) The size of the asid to be processed - * pte_size: (bit count) The size of the pte to be processed - - Return: - * d The Dirty bit from the PTE portion of i - * a The Accessed bit from the PTE portion of i - * g The Global bit from the PTE portion of i - * u The User Mode bit from the PTE portion of i - * xwr The Execute/Write/Read bit from the PTE portion of i - * v The Valid bit from the PTE portion of i - * asid The asid portion of i - * pte The pte portion of i - """ - # Internal - self.asid_start = pte_size - self.asid_end = pte_size + asid_size - - # Input - self.i = Signal(asid_size + pte_size) - - # Output - self.d = Signal(1) # Dirty bit (From pte) - self.a = Signal(1) # Accessed bit (From pte) - self.g = Signal(1) # Global Access (From pte) - self.u = Signal(1) # User Mode (From pte) - self.xwr = Signal(3) # Execute Read Write (From pte) - self.v = Signal(1) # Valid (From pte) - self.asid = Signal(asid_size) # Associated Address Space IDentifier - self.pte = Signal(pte_size) # Full Page Table Entry - - def elaborate(self, platform=None): - m = Module() - # Pull out all control bites from PTE - m.d.comb += [ - self.d.eq(self.i[7]), - self.a.eq(self.i[6]), - self.g.eq(self.i[5]), - self.u.eq(self.i[4]), - self.xwr.eq(self.i[1:4]), - self.v.eq(self.i[0]) - ] - m.d.comb += self.asid.eq(self.i[self.asid_start:self.asid_end]) - m.d.comb += self.pte.eq(self.i[0:self.asid_start]) - return m \ No newline at end of file diff --git a/src/TLB/src/SetAssociativeCache.py b/src/TLB/src/SetAssociativeCache.py deleted file mode 100644 index 0acd3488..00000000 --- a/src/TLB/src/SetAssociativeCache.py +++ /dev/null @@ -1,274 +0,0 @@ -""" - -Online simulator of 4-way set-associative cache: -http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/sa4.html - -Python simulator of a N-way set-associative cache: -https://github.com/vaskevich/CacheSim/blob/master/cachesim.py -""" -import sys -sys.path.append("ariane/src/") - -from nmigen import Array, Cat, Memory, Module, Signal, Mux, Elaboratable -from nmigen.compat.genlib import fsm -from nmigen.cli import main -from nmigen.cli import verilog, rtlil - -from AddressEncoder import AddressEncoder -from MemorySet import MemorySet - -# TODO: use a LFSR that advances continuously and picking the bottom -# few bits from it to select which cache line to replace, instead of PLRU -# http://bugs.libre-riscv.org/show_bug.cgi?id=71 -from plru import PLRU -from LFSR import LFSR, LFSR_POLY_24 - -SA_NA = "00" # no action (none) -SA_RD = "01" # read -SA_WR = "10" # write - - -class SetAssociativeCache(Elaboratable): - """ Set Associative Cache Memory - - The purpose of this module is to generate a memory cache given the - constraints passed in. This will create a n-way set associative cache. - It is expected for the SV TLB that the VMA will provide the set number - while the ASID provides the tag (still to be decided). - - """ - def __init__(self, tag_size, data_size, set_count, way_count, lfsr=False): - """ Arguments - * tag_size (bits): The bit count of the tag - * data_size (bits): The bit count of the data to be stored - * set_count (number): The number of sets/entries in the cache - * way_count (number): The number of slots a data can be stored - in one set - * lfsr: if set, use an LFSR for (pseudo-randomly) selecting - set/entry to write to. otherwise, use a PLRU - """ - # Internals - self.lfsr_mode = lfsr - self.way_count = way_count # The number of slots in one set - self.tag_size = tag_size # The bit count of the tag - self.data_size = data_size # The bit count of the data to be stored - - # set up Memory array - self.mem_array = Array() # memory array - for i in range(way_count): - ms = MemorySet(data_size, tag_size, set_count, active=0) - self.mem_array.append(ms) - - # Finds valid entries - self.encoder = AddressEncoder(way_count) - - # setup PLRU or LFSR - if lfsr: - # LFSR mode - self.lfsr = LFSR(LFSR_POLY_24) - else: - # PLRU mode - self.plru = PLRU(way_count) # One block to handle plru calculations - self.plru_array = Array() # PLRU data on each set - for i in range(set_count): - name="plru%d" % i - self.plru_array.append(Signal(self.plru.TLBSZ, name=name)) - - # Input - self.enable = Signal(1) # Whether the cache is enabled - self.command = Signal(2) # 00=None, 01=Read, 10=Write (see SA_XX) - self.cset = Signal(max=set_count) # The set to be checked - self.tag = Signal(tag_size) # The tag to find - self.data_i = Signal(data_size) # The input data - - # Output - self.ready = Signal(1) # 0 => Processing 1 => Ready for commands - self.hit = Signal(1) # Tag matched one way in the given set - self.multiple_hit = Signal(1) # Tag matched many ways in the given set - self.data_o = Signal(data_size) # The data linked to the matched tag - - def check_tags(self, m): - """ Validate the tags in the selected set. If one and only one - tag matches set its state to zero and increment all others - by one. We only advance to next state if a single hit is found. - """ - # Vector to store way valid results - # A zero denotes a way is invalid - valid_vector = [] - # Loop through memory to prep read/write ports and set valid_vector - for i in range(self.way_count): - valid_vector.append(self.mem_array[i].valid) - - # Pass encoder the valid vector - m.d.comb += self.encoder.i.eq(Cat(*valid_vector)) - - # Only one entry should be marked - # This is due to already verifying the tags - # matched and the valid bit is high - with m.If(self.hit): - m.next = "FINISHED_READ" - # Pull out data from the read port - data = self.mem_array[self.encoder.o].data_o - m.d.comb += self.data_o.eq(data) - if not self.lfsr_mode: - self.access_plru(m) - - # Oh no! Seal the gates! Multiple tags matched?!? kasd;ljkafdsj;k - with m.Elif(self.multiple_hit): - # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck - m.d.comb += self.data_o.eq(0) - - # No tag matches means no data - with m.Else(): - # XXX TODO, m.next = "FINISHED_READ" ? otherwise stuck - m.d.comb += self.data_o.eq(0) - - def access_plru(self, m): - """ An entry was accessed and the plru tree must now be updated - """ - # Pull out the set's entry being edited - plru_entry = self.plru_array[self.cset] - m.d.comb += [ - # Set the plru data to the current state - self.plru.plru_tree.eq(plru_entry), - # Set that the cache was accessed - self.plru.lu_access_i.eq(1) - ] - - def read(self, m): - """ Go through the read process of the cache. - This takes two cycles to complete. First it checks for a valid tag - and secondly it updates the LRU values. - """ - with m.FSM() as fsm_read: - with m.State("READY"): - m.d.comb += self.ready.eq(0) - # check_tags will set the state if the conditions are met - self.check_tags(m) - with m.State("FINISHED_READ"): - m.next = "READY" - m.d.comb += self.ready.eq(1) - if not self.lfsr_mode: - plru_tree_o = self.plru.plru_tree_o - m.d.sync += self.plru_array[self.cset].eq(plru_tree_o) - - def write_entry(self, m): - if not self.lfsr_mode: - m.d.comb += [# set cset (mem address) into PLRU - self.plru.plru_tree.eq(self.plru_array[self.cset]), - # and connect plru to encoder for write - self.encoder.i.eq(self.plru.replace_en_o) - ] - write_port = self.mem_array[self.encoder.o].w - else: - # use the LFSR to generate a random(ish) one of the mem array - lfsr_output = Signal(max=self.way_count) - lfsr_random = Signal(max=self.way_count) - m.d.comb += lfsr_output.eq(self.lfsr.state) # lose some bits - # address too big, limit to range of array - m.d.comb += lfsr_random.eq(Mux(lfsr_output > self.way_count, - lfsr_output - self.way_count, - lfsr_output)) - write_port = self.mem_array[lfsr_random].w - - # then if there is a match from the encoder, enable the selected write - with m.If(self.encoder.single_match): - m.d.comb += write_port.en.eq(1) - - def write(self, m): - """ Go through the write process of the cache. - This takes two cycles to complete. First it writes the entry, - and secondly it updates the PLRU (in plru mode) - """ - with m.FSM() as fsm_write: - with m.State("READY"): - m.d.comb += self.ready.eq(0) - self.write_entry(m) - m.next ="FINISHED_WRITE" - with m.State("FINISHED_WRITE"): - m.d.comb += self.ready.eq(1) - if not self.lfsr_mode: - plru_entry = self.plru_array[self.cset] - m.d.sync += plru_entry.eq(self.plru.plru_tree_o) - m.next = "READY" - - - def elaborate(self, platform=None): - m = Module() - - # ---- - # set up Modules: AddressEncoder, LFSR/PLRU, Mem Array - # ---- - - m.submodules.AddressEncoder = self.encoder - if self.lfsr_mode: - m.submodules.LFSR = self.lfsr - else: - m.submodules.PLRU = self.plru - - for i, mem in enumerate(self.mem_array): - setattr(m.submodules, "mem%d" % i, mem) - - # ---- - # select mode: PLRU connect to encoder, LFSR do... something - # ---- - - if not self.lfsr_mode: - # Set what entry was hit - m.d.comb += self.plru.lu_hit.eq(self.encoder.o) - else: - # enable LFSR - m.d.comb += self.lfsr.enable.eq(self.enable) - - # ---- - # connect hit/multiple hit to encoder output - # ---- - - m.d.comb += [ - self.hit.eq(self.encoder.single_match), - self.multiple_hit.eq(self.encoder.multiple_match), - ] - - # ---- - # connect incoming data/tag/cset(addr) to mem_array - # ---- - - for mem in self.mem_array: - write_port = mem.w - m.d.comb += [mem.cset.eq(self.cset), - mem.tag.eq(self.tag), - mem.data_i.eq(self.data_i), - write_port.en.eq(0), # default: disable write - ] - # ---- - # Commands: READ/WRITE/TODO - # ---- - - with m.If(self.enable): - with m.Switch(self.command): - # Search all sets at a particular tag - with m.Case(SA_RD): - self.read(m) - with m.Case(SA_WR): - self.write(m) - # Maybe catch multiple tags write here? - # TODO - # TODO: invalidate/flush, flush-all? - - return m - - def ports(self): - return [self.enable, self.command, self.cset, self.tag, self.data_i, - self.ready, self.hit, self.multiple_hit, self.data_o] - - -if __name__ == '__main__': - sac = SetAssociativeCache(4, 8, 4, 6) - vl = rtlil.convert(sac, ports=sac.ports()) - with open("SetAssociativeCache.il", "w") as f: - f.write(vl) - - sac_lfsr = SetAssociativeCache(4, 8, 4, 6, True) - vl = rtlil.convert(sac_lfsr, ports=sac_lfsr.ports()) - with open("SetAssociativeCacheLFSR.il", "w") as f: - f.write(vl) diff --git a/src/TLB/src/TLB.py b/src/TLB/src/TLB.py deleted file mode 100644 index 3538bdc1..00000000 --- a/src/TLB/src/TLB.py +++ /dev/null @@ -1,173 +0,0 @@ -""" TLB Module - - The expected form of the data is: - * Item (Bits) - * Tag (N - 79) / ASID (78 - 64) / PTE (63 - 0) -""" - -from nmigen import Memory, Module, Signal, Cat -from nmigen.cli import main - -from PermissionValidator import PermissionValidator -from Cam import Cam - -class TLB(): - def __init__(self, asid_size, vma_size, pte_size, L1_size): - """ Arguments - * asid_size: Address Space IDentifier (ASID) typically 15 bits - * vma_size: Virtual Memory Address (VMA) typically 36 bits - * pte_size: Page Table Entry (PTE) typically 64 bits - - Notes: - These arguments should represent the largest possible size - defined by the MODE settings. See - Volume II: RISC-V Privileged Architectures V1.10 Page 57 - """ - - # Internal - self.state = 0 - # L1 Cache Modules - L1_size = 8 # XXX overridden incoming argument? - self.cam_L1 = Cam(vma_size, L1_size) - self.mem_L1 = Memory(asid_size + pte_size, L1_size) - - # Permission Validator - self.perm_validator = PermissionValidator(asid_size, pte_size) - - # Inputs - self.supermode = Signal(1) # Supervisor Mode - self.super_access = Signal(1) # Supervisor Access - self.command = Signal(2) # 00=None, 01=Search, 10=Write L1, 11=Write L2 - self.xwr = Signal(3) # Execute, Write, Read - self.mode = Signal(4) # 4 bits for access to Sv48 on Rv64 - self.address_L1 = Signal(max=L1_size) - self.asid = Signal(asid_size) # Address Space IDentifier (ASID) - self.vma = Signal(vma_size) # Virtual Memory Address (VMA) - self.pte_in = Signal(pte_size) # To be saved Page Table Entry (PTE) - - # Outputs - self.hit = Signal(1) # Denotes if the VMA had a mapped PTE - self.perm_valid = Signal(1) # Denotes if the permissions are correct - self.pte_out = Signal(pte_size) # PTE that was mapped to by the VMA - - def search(self, m, read_L1, write_L1): - """ searches the TLB - """ - m.d.comb += [ - write_L1.en.eq(0), - self.cam_L1.write_enable.eq(0), - self.cam_L1.data_in.eq(self.vma) - ] - # Match found in L1 CAM - match_found = Signal(reset_less=True) - m.d.comb += match_found.eq(self.cam_L1.single_match - | self.cam_L1.multiple_match) - with m.If(match_found): - # Memory shortcut variables - mem_address = self.cam_L1.match_address - # Memory Logic - m.d.comb += read_L1.addr.eq(mem_address) - # Permission Validator Logic - m.d.comb += [ - self.hit.eq(1), - # Set permission validator data to the correct - # register file data according to CAM match - # address - self.perm_validator.data.eq(read_L1.data), - # Execute, Read, Write - self.perm_validator.xwr.eq(self.xwr), - # Supervisor Mode - self.perm_validator.super_mode.eq(self.supermode), - # Supverisor Access - self.perm_validator.super_access.eq(self.super_access), - # Address Space IDentifier (ASID) - self.perm_validator.asid.eq(self.asid), - # Output result of permission validation - self.perm_valid.eq(self.perm_validator.valid) - ] - # Only output PTE if permissions are valid - with m.If(self.perm_validator.valid): - # XXX TODO - dummy for now - reg_data = Signal.like(self.pte_out) - m.d.comb += [ - self.pte_out.eq(reg_data) - ] - with m.Else(): - m.d.comb += [ - self.pte_out.eq(0) - ] - # Miss Logic - with m.Else(): - m.d.comb += [ - self.hit.eq(0), - self.perm_valid.eq(0), - self.pte_out.eq(0) - ] - - def write_l1(self, m, read_L1, write_L1): - """ writes to the L1 cache - """ - # Memory_L1 Logic - m.d.comb += [ - write_L1.en.eq(1), - write_L1.addr.eq(self.address_L1), - # The Cat places arguments from LSB -> MSB - write_L1.data.eq(Cat(self.pte_in, self.asid)) - ] - # CAM_L1 Logic - m.d.comb += [ - self.cam_L1.write_enable.eq(1), - self.cam_L1.data_in.eq(self.vma), - ] - - def elaborate(self, platform): - m = Module() - # Add submodules - # Submodules for L1 Cache - m.d.submodules.cam_L1 = self.cam_L1 - m.d.sumbmodules.read_L1 = read_L1 = self.mem_L1.read_port() - m.d.sumbmodules.read_L1 = write_L1 = self.mem_L1.write_port() - # Permission Validator Submodule - m.d.submodules.perm_valididator = self.perm_validator - - # When MODE specifies translation - # TODO add in different bit length handling ie prefix 0s - tlb_enable = Signal(reset_less=True) - m.d.comb += tlb_enable.eq(self.mode != 0) - - with m.If(tlb_enable): - m.d.comb += [ - self.cam_L1.enable.eq(1) - ] - with m.Switch(self.command): - # Search - with m.Case("01"): - self.search(m, read_L1, write_L1) - - # Write L1 - # Expected that the miss will be handled in software - with m.Case("10"): - self.write_l1(m, read_L1, write_L1) - - # TODO - #with m.Case("11"): - - # When disabled - with m.Else(): - m.d.comb += [ - self.cam_L1.enable.eq(0), - # XXX TODO - self.reg_file.enable.eq(0), - self.hit.eq(0), - self.perm_valid.eq(0), # XXX TODO, check this - self.pte_out.eq(0) - ] - return m - - -if __name__ == '__main__': - tlb = TLB(15, 36, 64, 4) - main(tlb, ports=[ tlb.supermode, tlb.super_access, tlb.command, - tlb.xwr, tlb.mode, tlb.address_L1, tlb.asid, - tlb.vma, tlb.pte_in, - tlb.hit, tlb.perm_valid, tlb.pte_out, - ] + tlb.cam_L1.ports()) diff --git a/src/TLB/src/__init__.py b/src/TLB/src/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/TLB/src/ariane/TreePLRU.cpp b/src/TLB/src/ariane/TreePLRU.cpp deleted file mode 100644 index 2f6aeea5..00000000 --- a/src/TLB/src/ariane/TreePLRU.cpp +++ /dev/null @@ -1,211 +0,0 @@ -#include -#include -#include - - -#define NWAY 4 -#define NLINE 256 -#define HIT 0 -#define MISS 1 -#define MS 1000 -/* -Detailed TreePLRU inference see here: https://docs.google.com/spreadsheets/d/14zQpPYPwDAbCCjBT_a3KLaE5FEk-RNhI8Z7Qm_biW8g/edit?usp=sharing -Ref: https://people.cs.clemson.edu/~mark/464/p_lru.txt -four-way set associative - three bits - each bit represents one branch point in a binary decision tree; let 1 - represent that the left side has been referenced more recently than the - right side, and 0 vice-versa - are all 4 lines valid? - / \ - yes no, use an invalid line - | - | - | - bit_0 == 0? state | replace ref to | next state - / \ ------+-------- -------+----------- - y n 00x | line_0 line_0 | 11_ - / \ 01x | line_1 line_1 | 10_ - bit_1 == 0? bit_2 == 0? 1x0 | line_2 line_2 | 0_1 - / \ / \ 1x1 | line_3 line_3 | 0_0 - y n y n - / \ / \ ('x' means ('_' means unchanged) - line_0 line_1 line_2 line_3 don't care) - 8-way set associative - 7 = 1+2+4 bits -16-way set associative - 15 = 1+2+4+8 bits -32-way set associative - 31 = 1+2+4+8+16 bits -64-way set associative - 63 = 1+2+4+8+16+32 bits -*/ -using namespace std; -struct AddressField { - uint64_t wd_idx : 2;//Unused - uint64_t offset : 4;//Unused - uint64_t index : 8;//NLINE = 256 = 2^8 - uint64_t tag : 50; -}; - -union Address { - uint32_t* p; - AddressField fields; -}; - -struct Cell { - bool v; - uint64_t tag; - - Cell() : v(false), tag(0) {} - - bool isHit(uint64_t tag) { - return v && (tag == this->tag); - } - - void fetch(uint32_t* address) { - Address addr; - addr.p = address; - addr.fields.offset = 0; - addr.fields.wd_idx = 0; - tag = addr.fields.tag; - v = true; - } -}; - -ostream& operator<<(ostream & out, const Cell& cell) { - out << " v:" << cell.v << " tag:" << hex << cell.tag; - return out; -} - -struct Block { - Cell cell[NWAY]; - uint32_t state; - uint64_t *mask;//Mask the state to get accurate value for specified 1 bit. - uint64_t *value; - uint64_t *next_value; - - Block() : state(0) { - switch (NWAY) { - case 4: - mask = new uint64_t[4]{0b110, 0b110, 0b101, 0b101}; - value = new uint64_t[4]{0b000, 0b010, 0b100, 0b101}; - next_value = new uint64_t[4]{0b110, 0b100, 0b001, 0b000}; - break; - case 8: - mask = new uint64_t[8]{0b1101000, 0b1101000, 0b1100100, 0b1100100, 0b1010010, 0b1010010, 0b1010001, - 0b1010001}; - value = new uint64_t[8]{0b0000000, 0b0001000, 0b0100000, 0b0100100, 0b1000000, 0b1000010, 0b1010000, - 0b1010001}; - next_value = new uint64_t[8]{0b1101000, 0b1100000, 0b1000100, 0b1000000, 0b0010010, 0b0010000, - 0b0000001, 0b0000000}; - break; - //TODO - more NWAY goes here. - default: - std::cout << "Error definition NWAY = " << NWAY << std::endl; - } - } - - uint32_t *getByTag(uint64_t tag, uint32_t *pway) { - for (int i = 0; i < NWAY; ++i) { - if (cell[i].isHit(tag)) { - *pway = i; - return pway; - } - } - return NULL; - } - - void setLRU(uint32_t *address) { - int way = 0; - uint32_t st = state; - for (int i = 0; i < NWAY; ++i) { - if ((state & mask[i]) == value[i]) { - state ^= mask[i]; - way = i; - break; - } - } - cell[way].fetch(address); - cout << "MISS: way:" << way << " address:" << address << " state:" << st << "->" << state << endl; - } - - uint32_t *get(uint32_t *address, uint32_t *pway) { - Address addr; - addr.p = address; - uint32_t *d = getByTag(addr.fields.tag, pway); - if (d != NULL) { - return &d[addr.fields.offset]; - } - return d; - } - - int set(uint32_t *address) { - uint32_t way = 0; - uint32_t *p = get(address, &way); - if (p != NULL) { - printf("HIT: address:%p ref_to way:%d state %X --> ", address, way, state); - state &= ~mask[way]; - printf("%X --> ", state); - state |= next_value[way]; - printf("%X\n", state); - // *p = *address; //skip since address is fake. - return HIT; - } else { - setLRU(address); - return MISS; - } - } -}; - -ostream& operator<<(ostream & out, const Block& block) { - out << "state:" << block.state << " "; - for (int i = 0; i signal with a page fault exception - # 2. We got an access error because of insufficient permissions -> - # throw an access exception - m.d.comb += self.icache_areq_o.fetch_exception.valid.eq(0) - # Check whether we are allowed to access this memory region - # from a fetch perspective - - # XXX TODO: use PermissionValidator instead [we like modules] - m.d.comb += iaccess_err.eq(self.icache_areq_i.fetch_req & \ - (((self.priv_lvl_i == PRIV_LVL_U) & \ - ~itlb_content.u) | \ - ((self.priv_lvl_i == PRIV_LVL_S) & \ - itlb_content.u))) - - # MMU enabled: address from TLB, request delayed until hit. - # Error when TLB hit and no access right or TLB hit and - # translated address not valid (e.g. AXI decode error), - # or when PTW performs walk due to ITLB miss and raises - # an error. - with m.If (self.enable_translation_i): - # we work with SV39, so if VM is enabled, check that - # all bits [63:38] are equal - with m.If (self.icache_areq_i.fetch_req & \ - ~(((~self.icache_areq_i.fetch_vaddr[38:64]) == 0) | \ - (self.icache_areq_i.fetch_vaddr[38:64]) == 0)): - fe = self.icache_areq_o.fetch_exception - m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT), - fe.tval.eq(self.icache_areq_i.fetch_vaddr), - fe.valid.eq(1) - ] - - m.d.comb += self.icache_areq_o.fetch_valid.eq(0) - - # 4K page - paddr = Signal.like(self.icache_areq_o.fetch_paddr) - paddr4k = Cat(self.icache_areq_i.fetch_vaddr[0:12], - itlb_content.ppn) - m.d.comb += paddr.eq(paddr4k) - # Mega page - with m.If(itlb_is_2M): - m.d.comb += paddr[12:21].eq( - self.icache_areq_i.fetch_vaddr[12:21]) - # Giga page - with m.If(itlb_is_1G): - m.d.comb += paddr[12:30].eq( - self.icache_areq_i.fetch_vaddr[12:30]) - m.d.comb += self.icache_areq_o.fetch_paddr.eq(paddr) - - # --------- - # ITLB Hit - # -------- - # if we hit the ITLB output the request signal immediately - with m.If(itlb_lu_hit): - m.d.comb += self.icache_areq_o.fetch_valid.eq( - self.icache_areq_i.fetch_req) - # we got an access error - with m.If (iaccess_err): - # throw a page fault - fe = self.icache_areq_o.fetch_exception - m.d.comb += [fe.cause.eq(INSTR_ACCESS_FAULT), - fe.tval.eq(self.icache_areq_i.fetch_vaddr), - fe.valid.eq(1) - ] - # --------- - # ITLB Miss - # --------- - # watch out for exceptions happening during walking the page table - with m.Elif(ptw_active & walking_instr): - m.d.comb += self.icache_areq_o.fetch_valid.eq(ptw_error) - fe = self.icache_areq_o.fetch_exception - m.d.comb += [fe.cause.eq(INSTR_PAGE_FAULT), - fe.tval.eq(uaddr64), - fe.valid.eq(1) - ] - - #----------------------- - # Data Interface - #----------------------- - - lsu_vaddr = Signal(64) - dtlb_pte = PTE() - misaligned_ex = RVException() - lsu_req = Signal() - lsu_is_store = Signal() - dtlb_hit = Signal() - dtlb_is_2M = Signal() - dtlb_is_1G = Signal() - - # check if we need to do translation or if we are always - # ready (e.g.: we are not translating anything) - m.d.comb += self.lsu_dtlb_hit_o.eq(Mux(self.en_ld_st_translation_i, - dtlb_lu_hit, 1)) - - # The data interface is simpler and only consists of a - # request/response interface - m.d.comb += [ - # save request and DTLB response - lsu_vaddr.eq(self.lsu_vaddr_i), - lsu_req.eq(self.lsu_req_i), - misaligned_ex.eq(self.misaligned_ex_i), - dtlb_pte.eq(dtlb_content), - dtlb_hit.eq(dtlb_lu_hit), - lsu_is_store.eq(self.lsu_is_store_i), - dtlb_is_2M.eq(dtlb_is_2M), - dtlb_is_1G.eq(dtlb_is_1G), - ] - m.d.sync += [ - self.lsu_paddr_o.eq(lsu_vaddr), - self.lsu_valid_o.eq(lsu_req), - self.lsu_exception_o.eq(misaligned_ex), - ] - - sverr = Signal() - usrerr = Signal() - - m.d.comb += [ - # mute misaligned exceptions if there is no request - # otherwise they will throw accidental exceptions - misaligned_ex.valid.eq(self.misaligned_ex_i.valid & self.lsu_req_i), - - # SUM is not set and we are trying to access a user - # page in supervisor mode - sverr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_S & ~self.sum_i & \ - dtlb_pte.u), - # this is not a user page but we are in user mode and - # trying to access it - usrerr.eq(self.ld_st_priv_lvl_i == PRIV_LVL_U & ~dtlb_pte.u), - - # Check if the User flag is set, then we may only - # access it in supervisor mode if SUM is enabled - daccess_err.eq(sverr | usrerr), - ] - - # translation is enabled and no misaligned exception occurred - with m.If(self.en_ld_st_translation_i & ~misaligned_ex.valid): - m.d.comb += lsu_req.eq(0) - # 4K page - paddr = Signal.like(lsu_vaddr) - paddr4k = Cat(lsu_vaddr[0:12], itlb_content.ppn) - m.d.comb += paddr.eq(paddr4k) - # Mega page - with m.If(dtlb_is_2M): - m.d.comb += paddr[12:21].eq(lsu_vaddr[12:21]) - # Giga page - with m.If(dtlb_is_1G): - m.d.comb += paddr[12:30].eq(lsu_vaddr[12:30]) - m.d.sync += self.lsu_paddr_o.eq(paddr) - - # --------- - # DTLB Hit - # -------- - with m.If(dtlb_hit & lsu_req): - m.d.comb += lsu_req.eq(1) - # this is a store - with m.If (lsu_is_store): - # check if the page is write-able and - # we are not violating privileges - # also check if the dirty flag is set - with m.If(~dtlb_pte.w | daccess_err | ~dtlb_pte.d): - le = self.lsu_exception_o - m.d.sync += [le.cause.eq(STORE_PAGE_FAULT), - le.tval.eq(lsu_vaddr), - le.valid.eq(1) - ] - - # this is a load, check for sufficient access - # privileges - throw a page fault if necessary - with m.Elif(daccess_err): - le = self.lsu_exception_o - m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT), - le.tval.eq(lsu_vaddr), - le.valid.eq(1) - ] - # --------- - # DTLB Miss - # --------- - # watch out for exceptions - with m.Elif (ptw_active & ~walking_instr): - # page table walker threw an exception - with m.If (ptw_error): - # an error makes the translation valid - m.d.comb += lsu_req.eq(1) - # the page table walker can only throw page faults - with m.If (lsu_is_store): - le = self.lsu_exception_o - m.d.sync += [le.cause.eq(STORE_PAGE_FAULT), - le.tval.eq(uaddr64), - le.valid.eq(1) - ] - with m.Else(): - m.d.sync += [le.cause.eq(LOAD_PAGE_FAULT), - le.tval.eq(uaddr64), - le.valid.eq(1) - ] - - return m - - def ports(self): - return [self.flush_i, self.enable_translation_i, - self.en_ld_st_translation_i, - self.lsu_req_i, - self.lsu_vaddr_i, self.lsu_is_store_i, self.lsu_dtlb_hit_o, - self.lsu_valid_o, self.lsu_paddr_o, - self.priv_lvl_i, self.ld_st_priv_lvl_i, self.sum_i, self.mxr_i, - self.satp_ppn_i, self.asid_i, self.flush_tlb_i, - self.itlb_miss_o, self.dtlb_miss_o] + \ - self.icache_areq_i.ports() + self.icache_areq_o.ports() + \ - self.req_port_i.ports() + self.req_port_o.ports() + \ - self.misaligned_ex_i.ports() + self.lsu_exception_o.ports() - -if __name__ == '__main__': - mmu = MMU() - vl = rtlil.convert(mmu, ports=mmu.ports()) - with open("test_mmu.il", "w") as f: - f.write(vl) - diff --git a/src/TLB/src/ariane/src/plru.py b/src/TLB/src/ariane/src/plru.py deleted file mode 100644 index 95d515c4..00000000 --- a/src/TLB/src/ariane/src/plru.py +++ /dev/null @@ -1,106 +0,0 @@ -from nmigen import Signal, Module, Cat, Const -from nmigen.hdl.ir import Elaboratable -from math import log2 - -from ptw import TLBUpdate, PTE, ASID_WIDTH - -class PLRU(Elaboratable): - """ PLRU - Pseudo Least Recently Used Replacement - - PLRU-tree indexing: - lvl0 0 - / \ - / \ - lvl1 1 2 - / \ / \ - lvl2 3 4 5 6 - / \ /\/\ /\ - ... ... ... ... - """ - def __init__(self, entries): - self.entries = entries - self.lu_hit = Signal(entries) - self.replace_en_o = Signal(entries) - self.lu_access_i = Signal() - # Tree (bit per entry) - self.TLBSZ = 2*(self.entries-1) - self.plru_tree = Signal(self.TLBSZ) - self.plru_tree_o = Signal(self.TLBSZ) - - def elaborate(self, platform=None): - m = Module() - - # Just predefine which nodes will be set/cleared - # E.g. for a TLB with 8 entries, the for-loop is semantically - # equivalent to the following pseudo-code: - # unique case (1'b1) - # lu_hit[7]: plru_tree[0, 2, 6] = {1, 1, 1}; - # lu_hit[6]: plru_tree[0, 2, 6] = {1, 1, 0}; - # lu_hit[5]: plru_tree[0, 2, 5] = {1, 0, 1}; - # lu_hit[4]: plru_tree[0, 2, 5] = {1, 0, 0}; - # lu_hit[3]: plru_tree[0, 1, 4] = {0, 1, 1}; - # lu_hit[2]: plru_tree[0, 1, 4] = {0, 1, 0}; - # lu_hit[1]: plru_tree[0, 1, 3] = {0, 0, 1}; - # lu_hit[0]: plru_tree[0, 1, 3] = {0, 0, 0}; - # default: begin /* No hit */ end - # endcase - LOG_TLB = int(log2(self.entries)) - print(LOG_TLB) - for i in range(self.entries): - # we got a hit so update the pointer as it was least recently used - hit = Signal(reset_less=True) - m.d.comb += hit.eq(self.lu_hit[i] & self.lu_access_i) - with m.If(hit): - # Set the nodes to the values we would expect - for lvl in range(LOG_TLB): - idx_base = (1< MSB, lvl1 <=> MSB-1, ... - shift = LOG_TLB - lvl; - new_idx = Const(~((i >> (shift-1)) & 1), (1, False)) - plru_idx = idx_base + (i >> shift) - print ("plru", i, lvl, hex(idx_base), - plru_idx, shift, new_idx) - m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx) - - # Decode tree to write enable signals - # Next for-loop basically creates the following logic for e.g. - # an 8 entry TLB (note: pseudo-code obviously): - # replace_en[7] = &plru_tree[ 6, 2, 0]; #plru_tree[0,2,6]=={1,1,1} - # replace_en[6] = &plru_tree[~6, 2, 0]; #plru_tree[0,2,6]=={1,1,0} - # replace_en[5] = &plru_tree[ 5,~2, 0]; #plru_tree[0,2,5]=={1,0,1} - # replace_en[4] = &plru_tree[~5,~2, 0]; #plru_tree[0,2,5]=={1,0,0} - # replace_en[3] = &plru_tree[ 4, 1,~0]; #plru_tree[0,1,4]=={0,1,1} - # replace_en[2] = &plru_tree[~4, 1,~0]; #plru_tree[0,1,4]=={0,1,0} - # replace_en[1] = &plru_tree[ 3,~1,~0]; #plru_tree[0,1,3]=={0,0,1} - # replace_en[0] = &plru_tree[~3,~1,~0]; #plru_tree[0,1,3]=={0,0,0} - # For each entry traverse the tree. If every tree-node matches - # the corresponding bit of the entry's index, this is - # the next entry to replace. - replace = [] - for i in range(self.entries): - en = [] - for lvl in range(LOG_TLB): - idx_base = (1< MSB, lvl1 <=> MSB-1, ... - shift = LOG_TLB - lvl; - new_idx = (i >> (shift-1)) & 1; - plru_idx = idx_base + (i>>shift) - plru = Signal(reset_less=True, - name="plru-%d-%d-%d" % (i, lvl, plru_idx)) - m.d.comb += plru.eq(self.plru_tree[plru_idx]) - # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx; - if new_idx: - en.append(~plru) # yes inverted (using bool()) - else: - en.append(plru) # yes inverted (using bool()) - print ("plru", i, en) - # boolean logic manipulation: - # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2) - replace.append(~Cat(*en).bool()) - m.d.comb += self.replace_en_o.eq(Cat(*replace)) - - return m - - def ports(self): - return [self.entries, self.lu_hit, self.replace_en_o, - self.lu_access_i, self.plru_tree, self.plru_tree_o] \ No newline at end of file diff --git a/src/TLB/src/ariane/src/ptw.py b/src/TLB/src/ariane/src/ptw.py deleted file mode 100644 index 05ec2d7d..00000000 --- a/src/TLB/src/ariane/src/ptw.py +++ /dev/null @@ -1,539 +0,0 @@ -""" -# Copyright 2018 ETH Zurich and University of Bologna. -# Copyright and related rights are licensed under the Solderpad Hardware -# License, Version 0.51 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# or agreed to in writing, software, hardware and materials distributed under -# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. -# -# Author: David Schaffenrath, TU Graz -# Author: Florian Zaruba, ETH Zurich -# Date: 24.4.2017 -# Description: Hardware-PTW - -/* verilator lint_off WIDTH */ -import ariane_pkg::*; - -see linux kernel source: - -* "arch/riscv/include/asm/page.h" -* "arch/riscv/include/asm/mmu_context.h" -* "arch/riscv/Kconfig" (CONFIG_PAGE_OFFSET) - -""" - -from nmigen import Const, Signal, Cat, Module -from nmigen.hdl.ast import ArrayProxy -from nmigen.cli import verilog, rtlil -from math import log2 - - -DCACHE_SET_ASSOC = 8 -CONFIG_L1D_SIZE = 32*1024 -DCACHE_INDEX_WIDTH = int(log2(CONFIG_L1D_SIZE / DCACHE_SET_ASSOC)) -DCACHE_TAG_WIDTH = 56 - DCACHE_INDEX_WIDTH - -ASID_WIDTH = 8 - - -class DCacheReqI: - def __init__(self): - self.address_index = Signal(DCACHE_INDEX_WIDTH) - self.address_tag = Signal(DCACHE_TAG_WIDTH) - self.data_wdata = Signal(64) - self.data_req = Signal() - self.data_we = Signal() - self.data_be = Signal(8) - self.data_size = Signal(2) - self.kill_req = Signal() - self.tag_valid = Signal() - - def eq(self, inp): - res = [] - for (o, i) in zip(self.ports(), inp.ports()): - res.append(o.eq(i)) - return res - - def ports(self): - return [self.address_index, self.address_tag, - self.data_wdata, self.data_req, - self.data_we, self.data_be, self.data_size, - self.kill_req, self.tag_valid, - ] - -class DCacheReqO: - def __init__(self): - self.data_gnt = Signal() - self.data_rvalid = Signal() - self.data_rdata = Signal(64) # actually in PTE object format - - def eq(self, inp): - res = [] - for (o, i) in zip(self.ports(), inp.ports()): - res.append(o.eq(i)) - return res - - def ports(self): - return [self.data_gnt, self.data_rvalid, self.data_rdata] - - -class PTE: #(RecordObject): - def __init__(self): - self.v = Signal() - self.r = Signal() - self.w = Signal() - self.x = Signal() - self.u = Signal() - self.g = Signal() - self.a = Signal() - self.d = Signal() - self.rsw = Signal(2) - self.ppn = Signal(44) - self.reserved = Signal(10) - - def flatten(self): - return Cat(*self.ports()) - - def eq(self, x): - if isinstance(x, ArrayProxy): - res = [] - for o in self.ports(): - i = getattr(x, o.name) - res.append(i) - x = Cat(*res) - else: - x = x.flatten() - return self.flatten().eq(x) - - def __iter__(self): - """ order is critical so that flatten creates LSB to MSB - """ - yield self.v - yield self.r - yield self.w - yield self.x - yield self.u - yield self.g - yield self.a - yield self.d - yield self.rsw - yield self.ppn - yield self.reserved - - def ports(self): - return list(self) - - -class TLBUpdate: - def __init__(self, asid_width): - self.valid = Signal() # valid flag - self.is_2M = Signal() - self.is_1G = Signal() - self.vpn = Signal(27) - self.asid = Signal(asid_width) - self.content = PTE() - - def flatten(self): - return Cat(*self.ports()) - - def eq(self, x): - return self.flatten().eq(x.flatten()) - - def ports(self): - return [self.valid, self.is_2M, self.is_1G, self.vpn, self.asid] + \ - self.content.ports() - - -# SV39 defines three levels of page tables -LVL1 = Const(0, 2) # defined to 0 so that ptw_lvl default-resets to LVL1 -LVL2 = Const(1, 2) -LVL3 = Const(2, 2) - - -class PTW: - def __init__(self, asid_width=8): - self.asid_width = asid_width - - self.flush_i = Signal() # flush everything, we need to do this because - # actually everything we do is speculative at this stage - # e.g.: there could be a CSR instruction that changes everything - self.ptw_active_o = Signal(reset=1) # active if not IDLE - self.walking_instr_o = Signal() # set when walking for TLB - self.ptw_error_o = Signal() # set when an error occurred - self.enable_translation_i = Signal() # CSRs indicate to enable SV39 - self.en_ld_st_translation_i = Signal() # enable VM translation for ld/st - - self.lsu_is_store_i = Signal() # translation triggered by store - # PTW memory interface - self.req_port_i = DCacheReqO() - self.req_port_o = DCacheReqI() - - # to TLBs, update logic - self.itlb_update_o = TLBUpdate(asid_width) - self.dtlb_update_o = TLBUpdate(asid_width) - - self.update_vaddr_o = Signal(39) - - self.asid_i = Signal(self.asid_width) - # from TLBs - # did we miss? - self.itlb_access_i = Signal() - self.itlb_hit_i = Signal() - self.itlb_vaddr_i = Signal(64) - - self.dtlb_access_i = Signal() - self.dtlb_hit_i = Signal() - self.dtlb_vaddr_i = Signal(64) - # from CSR file - self.satp_ppn_i = Signal(44) # ppn from satp - self.mxr_i = Signal() - # Performance counters - self.itlb_miss_o = Signal() - self.dtlb_miss_o = Signal() - - def ports(self): - return [self.ptw_active_o, self.walking_instr_o, self.ptw_error_o, - ] - return [ - self.enable_translation_i, self.en_ld_st_translation_i, - self.lsu_is_store_i, self.req_port_i, self.req_port_o, - self.update_vaddr_o, - self.asid_i, - self.itlb_access_i, self.itlb_hit_i, self.itlb_vaddr_i, - self.dtlb_access_i, self.dtlb_hit_i, self.dtlb_vaddr_i, - self.satp_ppn_i, self.mxr_i, - self.itlb_miss_o, self.dtlb_miss_o - ] + self.itlb_update_o.ports() + self.dtlb_update_o.ports() - - def elaborate(self, platform): - m = Module() - - # input registers - data_rvalid = Signal() - data_rdata = Signal(64) - - # NOTE: pte decodes the incoming bit-field (data_rdata). data_rdata - # is spec'd in 64-bit binary-format: better to spec as Record? - pte = PTE() - m.d.comb += pte.flatten().eq(data_rdata) - - # SV39 defines three levels of page tables - ptw_lvl = Signal(2) # default=0=LVL1 on reset (see above) - ptw_lvl1 = Signal() - ptw_lvl2 = Signal() - ptw_lvl3 = Signal() - m.d.comb += [ptw_lvl1.eq(ptw_lvl == LVL1), - ptw_lvl2.eq(ptw_lvl == LVL2), - ptw_lvl3.eq(ptw_lvl == LVL3)] - - # is this an instruction page table walk? - is_instr_ptw = Signal() - global_mapping = Signal() - # latched tag signal - tag_valid = Signal() - # register the ASID - tlb_update_asid = Signal(self.asid_width) - # register VPN we need to walk, SV39 defines a 39 bit virtual addr - vaddr = Signal(64) - # 4 byte aligned physical pointer - ptw_pptr = Signal(56) - - end = DCACHE_INDEX_WIDTH + DCACHE_TAG_WIDTH - m.d.sync += [ - # Assignments - self.update_vaddr_o.eq(vaddr), - - self.walking_instr_o.eq(is_instr_ptw), - # directly output the correct physical address - self.req_port_o.address_index.eq(ptw_pptr[0:DCACHE_INDEX_WIDTH]), - self.req_port_o.address_tag.eq(ptw_pptr[DCACHE_INDEX_WIDTH:end]), - # we are never going to kill this request - self.req_port_o.kill_req.eq(0), # XXX assign comb? - # we are never going to write with the HPTW - self.req_port_o.data_wdata.eq(Const(0, 64)), # XXX assign comb? - # ----------- - # TLB Update - # ----------- - self.itlb_update_o.vpn.eq(vaddr[12:39]), - self.dtlb_update_o.vpn.eq(vaddr[12:39]), - # update the correct page table level - self.itlb_update_o.is_2M.eq(ptw_lvl2), - self.itlb_update_o.is_1G.eq(ptw_lvl1), - self.dtlb_update_o.is_2M.eq(ptw_lvl2), - self.dtlb_update_o.is_1G.eq(ptw_lvl1), - # output the correct ASID - self.itlb_update_o.asid.eq(tlb_update_asid), - self.dtlb_update_o.asid.eq(tlb_update_asid), - # set the global mapping bit - self.itlb_update_o.content.eq(pte), - self.itlb_update_o.content.g.eq(global_mapping), - self.dtlb_update_o.content.eq(pte), - self.dtlb_update_o.content.g.eq(global_mapping), - - self.req_port_o.tag_valid.eq(tag_valid), - ] - - #------------------- - # Page table walker - #------------------- - # A virtual address va is translated into a physical address pa as - # follows: - # 1. Let a be sptbr.ppn × PAGESIZE, and let i = LEVELS-1. (For Sv39, - # PAGESIZE=2^12 and LEVELS=3.) - # 2. Let pte be the value of the PTE at address a+va.vpn[i]×PTESIZE. - # (For Sv32, PTESIZE=4.) - # 3. If pte.v = 0, or if pte.r = 0 and pte.w = 1, stop and raise an - # access exception. - # 4. Otherwise, the PTE is valid. If pte.r = 1 or pte.x = 1, go to - # step 5. Otherwise, this PTE is a pointer to the next level of - # the page table. - # Let i=i-1. If i < 0, stop and raise an access exception. - # Otherwise, let a = pte.ppn × PAGESIZE and go to step 2. - # 5. A leaf PTE has been found. Determine if the requested memory - # access is allowed by the pte.r, pte.w, and pte.x bits. If not, - # stop and raise an access exception. Otherwise, the translation is - # successful. Set pte.a to 1, and, if the memory access is a - # store, set pte.d to 1. - # The translated physical address is given as follows: - # - pa.pgoff = va.pgoff. - # - If i > 0, then this is a superpage translation and - # pa.ppn[i-1:0] = va.vpn[i-1:0]. - # - pa.ppn[LEVELS-1:i] = pte.ppn[LEVELS-1:i]. - # 6. If i > 0 and pa.ppn[i − 1 : 0] != 0, this is a misaligned - # superpage stop and raise a page-fault exception. - - m.d.sync += tag_valid.eq(0) - - # default assignments - m.d.comb += [ - # PTW memory interface - self.req_port_o.data_req.eq(0), - self.req_port_o.data_be.eq(Const(0xFF, 8)), - self.req_port_o.data_size.eq(Const(0b11, 2)), - self.req_port_o.data_we.eq(0), - self.ptw_error_o.eq(0), - self.itlb_update_o.valid.eq(0), - self.dtlb_update_o.valid.eq(0), - - self.itlb_miss_o.eq(0), - self.dtlb_miss_o.eq(0), - ] - - # ------------ - # State Machine - # ------------ - - with m.FSM() as fsm: - - with m.State("IDLE"): - self.idle(m, is_instr_ptw, ptw_lvl, global_mapping, - ptw_pptr, vaddr, tlb_update_asid) - - with m.State("WAIT_GRANT"): - self.grant(m, tag_valid, data_rvalid) - - with m.State("PTE_LOOKUP"): - # we wait for the valid signal - with m.If(data_rvalid): - self.lookup(m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, - data_rvalid, global_mapping, - is_instr_ptw, ptw_pptr) - - # Propagate error to MMU/LSU - with m.State("PROPAGATE_ERROR"): - m.next = "IDLE" - m.d.comb += self.ptw_error_o.eq(1) - - # wait for the rvalid before going back to IDLE - with m.State("WAIT_RVALID"): - with m.If(data_rvalid): - m.next = "IDLE" - - m.d.sync += [data_rdata.eq(self.req_port_i.data_rdata), - data_rvalid.eq(self.req_port_i.data_rvalid) - ] - - return m - - def set_grant_state(self, m): - # should we have flushed before we got an rvalid, - # wait for it until going back to IDLE - with m.If(self.flush_i): - with m.If (self.req_port_i.data_gnt): - m.next = "WAIT_RVALID" - with m.Else(): - m.next = "IDLE" - with m.Else(): - m.next = "WAIT_GRANT" - - def idle(self, m, is_instr_ptw, ptw_lvl, global_mapping, - ptw_pptr, vaddr, tlb_update_asid): - # by default we start with the top-most page table - m.d.sync += [is_instr_ptw.eq(0), - ptw_lvl.eq(LVL1), - global_mapping.eq(0), - self.ptw_active_o.eq(0), # deactive (IDLE) - ] - # work out itlb/dtlb miss - m.d.comb += self.itlb_miss_o.eq(self.enable_translation_i & \ - self.itlb_access_i & \ - ~self.itlb_hit_i & \ - ~self.dtlb_access_i) - m.d.comb += self.dtlb_miss_o.eq(self.en_ld_st_translation_i & \ - self.dtlb_access_i & \ - ~self.dtlb_hit_i) - # we got an ITLB miss? - with m.If(self.itlb_miss_o): - pptr = Cat(Const(0, 3), self.itlb_vaddr_i[30:39], - self.satp_ppn_i) - m.d.sync += [ptw_pptr.eq(pptr), - is_instr_ptw.eq(1), - vaddr.eq(self.itlb_vaddr_i), - tlb_update_asid.eq(self.asid_i), - ] - self.set_grant_state(m) - - # we got a DTLB miss? - with m.Elif(self.dtlb_miss_o): - pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[30:39], - self.satp_ppn_i) - m.d.sync += [ptw_pptr.eq(pptr), - vaddr.eq(self.dtlb_vaddr_i), - tlb_update_asid.eq(self.asid_i), - ] - self.set_grant_state(m) - - def grant(self, m, tag_valid, data_rvalid): - # we've got a data WAIT_GRANT so tell the - # cache that the tag is valid - - # send a request out - m.d.comb += self.req_port_o.data_req.eq(1) - # wait for the WAIT_GRANT - with m.If(self.req_port_i.data_gnt): - # send the tag valid signal one cycle later - m.d.sync += tag_valid.eq(1) - # should we have flushed before we got an rvalid, - # wait for it until going back to IDLE - with m.If(self.flush_i): - with m.If (~data_rvalid): - m.next = "WAIT_RVALID" - with m.Else(): - m.next = "IDLE" - with m.Else(): - m.next = "PTE_LOOKUP" - - def lookup(self, m, pte, ptw_lvl, ptw_lvl1, ptw_lvl2, ptw_lvl3, - data_rvalid, global_mapping, - is_instr_ptw, ptw_pptr): - # temporaries - pte_rx = Signal(reset_less=True) - pte_exe = Signal(reset_less=True) - pte_inv = Signal(reset_less=True) - pte_a = Signal(reset_less=True) - st_wd = Signal(reset_less=True) - m.d.comb += [pte_rx.eq(pte.r | pte.x), - pte_exe.eq(~pte.x | ~pte.a), - pte_inv.eq(~pte.v | (~pte.r & pte.w)), - pte_a.eq(pte.a & (pte.r | (pte.x & self.mxr_i))), - st_wd.eq(self.lsu_is_store_i & (~pte.w | ~pte.d))] - - l1err = Signal(reset_less=True) - l2err = Signal(reset_less=True) - m.d.comb += [l2err.eq((ptw_lvl2) & pte.ppn[0:9] != Const(0, 9)), - l1err.eq((ptw_lvl1) & pte.ppn[0:18] != Const(0, 18)) ] - - # check if the global mapping bit is set - with m.If (pte.g): - m.d.sync += global_mapping.eq(1) - - m.next = "IDLE" - - # ------------- - # Invalid PTE - # ------------- - # If pte.v = 0, or if pte.r = 0 and pte.w = 1, - # stop and raise a page-fault exception. - with m.If (pte_inv): - m.next = "PROPAGATE_ERROR" - - # ----------- - # Valid PTE - # ----------- - - # it is a valid PTE - # if pte.r = 1 or pte.x = 1 it is a valid PTE - with m.Elif (pte_rx): - # Valid translation found (either 1G, 2M or 4K) - with m.If(is_instr_ptw): - # ------------ - # Update ITLB - # ------------ - # If page not executable, we can directly raise error. - # This doesn't put a useless entry into the TLB. - # The same idea applies to the access flag since we let - # the access flag be managed by SW. - with m.If (pte_exe): - m.next = "IDLE" - with m.Else(): - m.d.comb += self.itlb_update_o.valid.eq(1) - - with m.Else(): - # ------------ - # Update DTLB - # ------------ - # Check if the access flag has been set, otherwise - # throw page-fault and let software handle those bits. - # If page not readable (there are no write-only pages) - # directly raise an error. This doesn't put a useless - # entry into the TLB. - with m.If(pte_a): - m.d.comb += self.dtlb_update_o.valid.eq(1) - with m.Else(): - m.next = "PROPAGATE_ERROR" - # Request is a store: perform additional checks - # If the request was a store and the page not - # write-able, raise an error - # the same applies if the dirty flag is not set - with m.If (st_wd): - m.d.comb += self.dtlb_update_o.valid.eq(0) - m.next = "PROPAGATE_ERROR" - - # check if the ppn is correctly aligned: Case (6) - with m.If(l1err | l2err): - m.next = "PROPAGATE_ERROR" - m.d.comb += [self.dtlb_update_o.valid.eq(0), - self.itlb_update_o.valid.eq(0)] - - # this is a pointer to the next TLB level - with m.Else(): - # pointer to next level of page table - with m.If (ptw_lvl1): - # we are in the second level now - pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[21:30], pte.ppn) - m.d.sync += [ptw_pptr.eq(pptr), - ptw_lvl.eq(LVL2) - ] - with m.If(ptw_lvl2): - # here we received a pointer to the third level - pptr = Cat(Const(0, 3), self.dtlb_vaddr_i[12:21], pte.ppn) - m.d.sync += [ptw_pptr.eq(pptr), - ptw_lvl.eq(LVL3) - ] - self.set_grant_state(m) - - with m.If (ptw_lvl3): - # Should already be the last level - # page table => Error - m.d.sync += ptw_lvl.eq(LVL3) - m.next = "PROPAGATE_ERROR" - - -if __name__ == '__main__': - ptw = PTW() - vl = rtlil.convert(ptw, ports=ptw.ports()) - with open("test_ptw.il", "w") as f: - f.write(vl) diff --git a/src/TLB/src/ariane/src/tlb.py b/src/TLB/src/ariane/src/tlb.py deleted file mode 100644 index f768571e..00000000 --- a/src/TLB/src/ariane/src/tlb.py +++ /dev/null @@ -1,170 +0,0 @@ -""" -# Copyright 2018 ETH Zurich and University of Bologna. -# Copyright and related rights are licensed under the Solderpad Hardware -# License, Version 0.51 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http:#solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# or agreed to in writing, software, hardware and materials distributed under -# this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. -# -# Author: David Schaffenrath, TU Graz -# Author: Florian Zaruba, ETH Zurich -# Date: 21.4.2017 -# Description: Translation Lookaside Buffer, SV39 -# fully set-associative - -Implementation in c++: -https://raw.githubusercontent.com/Tony-Hu/TreePLRU/master/TreePLRU.cpp - -Text description: -https://people.cs.clemson.edu/~mark/464/p_lru.txt - -Online simulator: -http://www.ntu.edu.sg/home/smitha/ParaCache/Paracache/vm.html -""" -from math import log2 -from nmigen import Signal, Module, Cat, Const, Array -from nmigen.cli import verilog, rtlil -from nmigen.lib.coding import Encoder - -from ptw import TLBUpdate, PTE, ASID_WIDTH -from plru import PLRU -from tlb_content import TLBContent - -TLB_ENTRIES = 8 - -class TLB: - def __init__(self, tlb_entries=8, asid_width=8): - self.tlb_entries = tlb_entries - self.asid_width = asid_width - - self.flush_i = Signal() # Flush signal - # Lookup signals - self.lu_access_i = Signal() - self.lu_asid_i = Signal(self.asid_width) - self.lu_vaddr_i = Signal(64) - self.lu_content_o = PTE() - self.lu_is_2M_o = Signal() - self.lu_is_1G_o = Signal() - self.lu_hit_o = Signal() - # Update TLB - self.pte_width = len(self.lu_content_o.flatten()) - self.update_i = TLBUpdate(asid_width) - - def elaborate(self, platform): - m = Module() - - vpn2 = Signal(9) - vpn1 = Signal(9) - vpn0 = Signal(9) - - #------------- - # Translation - #------------- - - # SV39 defines three levels of page tables - m.d.comb += [ vpn0.eq(self.lu_vaddr_i[12:21]), - vpn1.eq(self.lu_vaddr_i[21:30]), - vpn2.eq(self.lu_vaddr_i[30:39]), - ] - - tc = [] - for i in range(self.tlb_entries): - tlc = TLBContent(self.pte_width, self.asid_width) - setattr(m.submodules, "tc%d" % i, tlc) - tc.append(tlc) - # connect inputs - tlc.update_i = self.update_i # saves a lot of graphviz links - m.d.comb += [tlc.vpn0.eq(vpn0), - tlc.vpn1.eq(vpn1), - tlc.vpn2.eq(vpn2), - tlc.flush_i.eq(self.flush_i), - #tlc.update_i.eq(self.update_i), - tlc.lu_asid_i.eq(self.lu_asid_i)] - tc = Array(tc) - - #-------------- - # Select hit - #-------------- - - # use Encoder to select hit index - # XXX TODO: assert that there's only one valid entry (one lu_hit) - hitsel = Encoder(self.tlb_entries) - m.submodules.hitsel = hitsel - - hits = [] - for i in range(self.tlb_entries): - hits.append(tc[i].lu_hit_o) - m.d.comb += hitsel.i.eq(Cat(*hits)) # (goes into plru as well) - idx = hitsel.o - - active = Signal(reset_less=True) - m.d.comb += active.eq(~hitsel.n) - with m.If(active): - # active hit, send selected as output - m.d.comb += [ self.lu_is_1G_o.eq(tc[idx].lu_is_1G_o), - self.lu_is_2M_o.eq(tc[idx].lu_is_2M_o), - self.lu_hit_o.eq(1), - self.lu_content_o.flatten().eq(tc[idx].lu_content_o), - ] - - #-------------- - # PLRU. - #-------------- - - p = PLRU(self.tlb_entries) - plru_tree = Signal(p.TLBSZ) - m.submodules.plru = p - - # connect PLRU inputs/outputs - # XXX TODO: assert that there's only one valid entry (one replace_en) - en = [] - for i in range(self.tlb_entries): - en.append(tc[i].replace_en_i) - m.d.comb += [Cat(*en).eq(p.replace_en_o), # output from PLRU into tags - p.lu_hit.eq(hitsel.i), - p.lu_access_i.eq(self.lu_access_i), - p.plru_tree.eq(plru_tree)] - m.d.sync += plru_tree.eq(p.plru_tree_o) - - #-------------- - # Sanity checks - #-------------- - - assert (self.tlb_entries % 2 == 0) and (self.tlb_entries > 1), \ - "TLB size must be a multiple of 2 and greater than 1" - assert (self.asid_width >= 1), \ - "ASID width must be at least 1" - - return m - - """ - # Just for checking - function int countSetBits(logic[self.tlb_entries-1:0] vector); - automatic int count = 0; - foreach (vector[idx]) begin - count += vector[idx]; - end - return count; - endfunction - - assert property (@(posedge clk_i)(countSetBits(lu_hit) <= 1)) - else $error("More then one hit in TLB!"); $stop(); end - assert property (@(posedge clk_i)(countSetBits(replace_en) <= 1)) - else $error("More then one TLB entry selected for next replace!"); - """ - - def ports(self): - return [self.flush_i, self.lu_access_i, - self.lu_asid_i, self.lu_vaddr_i, - self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o, - ] + self.lu_content_o.ports() + self.update_i.ports() - -if __name__ == '__main__': - tlb = TLB() - vl = rtlil.convert(tlb, ports=tlb.ports()) - with open("test_tlb.il", "w") as f: - f.write(vl) - diff --git a/src/TLB/src/ariane/src/tlb_content.py b/src/TLB/src/ariane/src/tlb_content.py deleted file mode 100644 index 024c5697..00000000 --- a/src/TLB/src/ariane/src/tlb_content.py +++ /dev/null @@ -1,125 +0,0 @@ -from nmigen import Signal, Module, Cat, Const - -from ptw import TLBUpdate, PTE - -class TLBEntry: - def __init__(self, asid_width): - self.asid = Signal(asid_width) - # SV39 defines three levels of page tables - self.vpn0 = Signal(9) - self.vpn1 = Signal(9) - self.vpn2 = Signal(9) - self.is_2M = Signal() - self.is_1G = Signal() - self.valid = Signal() - - def flatten(self): - return Cat(*self.ports()) - - def eq(self, x): - return self.flatten().eq(x.flatten()) - - def ports(self): - return [self.asid, self.vpn0, self.vpn1, self.vpn2, - self.is_2M, self.is_1G, self.valid] - -class TLBContent: - def __init__(self, pte_width, asid_width): - self.asid_width = asid_width - self.pte_width = pte_width - self.flush_i = Signal() # Flush signal - # Update TLB - self.update_i = TLBUpdate(asid_width) - self.vpn2 = Signal(9) - self.vpn1 = Signal(9) - self.vpn0 = Signal(9) - self.replace_en_i = Signal() # replace the following entry, - # set by replacement strategy - # Lookup signals - self.lu_asid_i = Signal(asid_width) - self.lu_content_o = Signal(pte_width) - self.lu_is_2M_o = Signal() - self.lu_is_1G_o = Signal() - self.lu_hit_o = Signal() - - def elaborate(self, platform): - m = Module() - - tags = TLBEntry(self.asid_width) - content = Signal(self.pte_width) - - m.d.comb += [self.lu_hit_o.eq(0), - self.lu_is_2M_o.eq(0), - self.lu_is_1G_o.eq(0)] - - # temporaries for 1st level match - asid_ok = Signal(reset_less=True) - vpn2_ok = Signal(reset_less=True) - tags_ok = Signal(reset_less=True) - vpn2_hit = Signal(reset_less=True) - m.d.comb += [tags_ok.eq(tags.valid), - asid_ok.eq(tags.asid == self.lu_asid_i), - vpn2_ok.eq(tags.vpn2 == self.vpn2), - vpn2_hit.eq(tags_ok & asid_ok & vpn2_ok)] - # temporaries for 2nd level match - vpn1_ok = Signal(reset_less=True) - tags_2M = Signal(reset_less=True) - vpn0_ok = Signal(reset_less=True) - vpn0_or_2M = Signal(reset_less=True) - m.d.comb += [vpn1_ok.eq(self.vpn1 == tags.vpn1), - tags_2M.eq(tags.is_2M), - vpn0_ok.eq(self.vpn0 == tags.vpn0), - vpn0_or_2M.eq(tags_2M | vpn0_ok)] - # first level match, this may be a giga page, - # check the ASID flags as well - with m.If(vpn2_hit): - # second level - with m.If (tags.is_1G): - m.d.comb += [ self.lu_content_o.eq(content), - self.lu_is_1G_o.eq(1), - self.lu_hit_o.eq(1), - ] - # not a giga page hit so check further - with m.Elif(vpn1_ok): - # this could be a 2 mega page hit or a 4 kB hit - # output accordingly - with m.If(vpn0_or_2M): - m.d.comb += [ self.lu_content_o.eq(content), - self.lu_is_2M_o.eq(tags.is_2M), - self.lu_hit_o.eq(1), - ] - # ------------------ - # Update or Flush - # ------------------ - - # temporaries - replace_valid = Signal(reset_less=True) - m.d.comb += replace_valid.eq(self.update_i.valid & self.replace_en_i) - - # flush - with m.If (self.flush_i): - # invalidate (flush) conditions: all if zero or just this ASID - with m.If (self.lu_asid_i == Const(0, self.asid_width) | - (self.lu_asid_i == tags.asid)): - m.d.sync += tags.valid.eq(0) - - # normal replacement - with m.Elif(replace_valid): - m.d.sync += [ # update tag array - tags.asid.eq(self.update_i.asid), - tags.vpn2.eq(self.update_i.vpn[18:27]), - tags.vpn1.eq(self.update_i.vpn[9:18]), - tags.vpn0.eq(self.update_i.vpn[0:9]), - tags.is_1G.eq(self.update_i.is_1G), - tags.is_2M.eq(self.update_i.is_2M), - tags.valid.eq(1), - # and content as well - content.eq(self.update_i.content.flatten()) - ] - return m - - def ports(self): - return [self.flush_i, - self.lu_asid_i, - self.lu_is_2M_o, self.lu_is_1G_o, self.lu_hit_o, - ] + self.update_i.content.ports() + self.update_i.ports() diff --git a/src/TLB/src/ariane/test/test_plru.py b/src/TLB/src/ariane/test/test_plru.py deleted file mode 100644 index 9b040e1d..00000000 --- a/src/TLB/src/ariane/test/test_plru.py +++ /dev/null @@ -1,15 +0,0 @@ -import sys -sys.path.append("../src") -sys.path.append("../../../TestUtil") - -from plru import PLRU - -from nmigen.compat.sim import run_simulation - -def testbench(dut): - yield - -if __name__ == "__main__": - dut = PLRU(4) - run_simulation(dut, testbench(dut), vcd_name="test_plru.vcd") - print("PLRU Unit Test Success") \ No newline at end of file diff --git a/src/TLB/src/ariane/test/test_ptw.py b/src/TLB/src/ariane/test/test_ptw.py deleted file mode 100644 index e9c5324c..00000000 --- a/src/TLB/src/ariane/test/test_ptw.py +++ /dev/null @@ -1,127 +0,0 @@ -import sys -sys.path.append("../src") -sys.path.append("../../../TestUtil") - -from nmigen.compat.sim import run_simulation - -from ptw import PTW, PTE - - -def testbench(dut): - - addr = 0x8000000 - - #pte = PTE() - #yield pte.v.eq(1) - #yield pte.r.eq(1) - - yield dut.req_port_i.data_gnt.eq(1) - yield dut.req_port_i.data_rvalid.eq(1) - yield dut.req_port_i.data_rdata.eq(0x43)#pte.flatten()) - - # data lookup - yield dut.en_ld_st_translation_i.eq(1) - yield dut.asid_i.eq(1) - - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(0x400000000) - - yield - yield - yield - - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(0x200000) - - yield - yield - yield - - yield dut.req_port_i.data_gnt.eq(0) - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(0x400000011) - - yield - yield dut.req_port_i.data_gnt.eq(1) - yield - yield - - # data lookup, PTW levels 1-2-3 - addr = 0x4000000 - yield dut.dtlb_vaddr_i.eq(addr) - yield dut.mxr_i.eq(0x1) - yield dut.req_port_i.data_gnt.eq(1) - yield dut.req_port_i.data_rvalid.eq(1) - yield dut.req_port_i.data_rdata.eq(0x41 | (addr>>12)<<10)#pte.flatten()) - - yield dut.en_ld_st_translation_i.eq(1) - yield dut.asid_i.eq(1) - - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(addr) - - yield - yield - yield - yield - yield - yield - yield - yield - - yield dut.req_port_i.data_gnt.eq(0) - yield dut.dtlb_access_i.eq(1) - yield dut.dtlb_hit_i.eq(0) - yield dut.dtlb_vaddr_i.eq(0x400000011) - - yield - yield dut.req_port_i.data_gnt.eq(1) - yield - yield - yield - yield - - - # instruction lookup - yield dut.en_ld_st_translation_i.eq(0) - yield dut.enable_translation_i.eq(1) - yield dut.asid_i.eq(1) - - yield dut.itlb_access_i.eq(1) - yield dut.itlb_hit_i.eq(0) - yield dut.itlb_vaddr_i.eq(0x800000) - - yield - yield - yield - - yield dut.itlb_access_i.eq(1) - yield dut.itlb_hit_i.eq(0) - yield dut.itlb_vaddr_i.eq(0x200000) - - yield - yield - yield - - yield dut.req_port_i.data_gnt.eq(0) - yield dut.itlb_access_i.eq(1) - yield dut.itlb_hit_i.eq(0) - yield dut.itlb_vaddr_i.eq(0x800011) - - yield - yield dut.req_port_i.data_gnt.eq(1) - yield - yield - - yield - - - -if __name__ == "__main__": - dut = PTW() - run_simulation(dut, testbench(dut), vcd_name="test_ptw.vcd") - print("PTW Unit Test Success") diff --git a/src/TLB/src/ariane/test/test_tlb.py b/src/TLB/src/ariane/test/test_tlb.py deleted file mode 100644 index aab1d43c..00000000 --- a/src/TLB/src/ariane/test/test_tlb.py +++ /dev/null @@ -1,69 +0,0 @@ -import sys -sys.path.append("../src") -sys.path.append("../../../TestUtil") - -from nmigen.compat.sim import run_simulation - -from tlb import TLB - -def set_vaddr(addr): - yield dut.lu_vaddr_i.eq(addr) - yield dut.update_i.vpn.eq(addr>>12) - - -def testbench(dut): - yield dut.lu_access_i.eq(1) - yield dut.lu_asid_i.eq(1) - yield dut.update_i.valid.eq(1) - yield dut.update_i.is_1G.eq(0) - yield dut.update_i.is_2M.eq(0) - yield dut.update_i.asid.eq(1) - yield dut.update_i.content.ppn.eq(0) - yield dut.update_i.content.rsw.eq(0) - yield dut.update_i.content.r.eq(1) - - yield - - addr = 0x80000 - yield from set_vaddr(addr) - yield - - addr = 0x90001 - yield from set_vaddr(addr) - yield - - addr = 0x28000000 - yield from set_vaddr(addr) - yield - - addr = 0x28000001 - yield from set_vaddr(addr) - - addr = 0x28000001 - yield from set_vaddr(addr) - yield - - addr = 0x1000040000 - yield from set_vaddr(addr) - yield - - addr = 0x1000040001 - yield from set_vaddr(addr) - yield - - yield dut.update_i.is_1G.eq(1) - addr = 0x2040000 - yield from set_vaddr(addr) - yield - - yield dut.update_i.is_1G.eq(1) - addr = 0x2040001 - yield from set_vaddr(addr) - yield - - yield - - -if __name__ == "__main__": - dut = TLB() - run_simulation(dut, testbench(dut), vcd_name="test_tlb.vcd")