nmigen_soc/csr/bus.py

   1 from functools import reduce
   2 from nmigen import *
   3 from nmigen import tracer
   4
   5
   6 __all__ = ["CSRElement", "CSRMultiplexer"]
   7
   8
   9 class CSRElement(Record):
  10     """Peripheral-side CSR interface.
  11
  12     A low-level interface to a single atomically readable and writable register in a peripheral.
  13     This interface supports any register width and semantics, provided that both reads and writes
  14     always succeed and complete in one cycle.
  15
  16     Parameters
  17     ----------
  18     width : int
  19         Width of the register.
  20     name : str
  21         Name of the underlying record.
  22
  23     Attributes
  24     ----------
  25     r_data : Signal(width)
  26         Read data. Must be always valid, and is sampled when ``r_stb`` is asserted.
  27     r_stb : Signal()
  28         Read strobe. Registers with read side effects should perform the read side effect when this
  29         strobe is asserted.
  30     w_data : Signal(width)
  31         Write data. Valid only when ``w_stb`` is asserted.
  32     w_stb : Signal()
  33         Write strobe. Registers should update their value or perform the write side effect when
  34         this strobe is asserted.
  35     """
  36     def __init__(self, width, access, *, name=None, src_loc_at=0):
  37         if not isinstance(width, int) or width < 0:
  38             raise ValueError("Width must be a non-negative integer, not {!r}"
  39                              .format(width))
  40         if access not in ("r", "w", "rw"):
  41             raise ValueError("Access mode must be one of \"r\", \"w\", or \"rw\", not {!r}"
  42                              .format(access))
  43
  44         self.width  = int(width)
  45         self.access = access
  46
  47         layout = []
  48         if "r" in self.access:
  49             layout += [
  50                 ("r_data", width),
  51                 ("r_stb",  1),
  52             ]
  53         if "w" in self.access:
  54             layout += [
  55                 ("w_data", width),
  56                 ("w_stb",  1),
  57             ]
  58         super().__init__(layout, name=name, src_loc_at=1)
  59
  60
  61 class CSRMultiplexer(Elaboratable):
  62     """CPU-side CSR interface.
  63
  64     A low-level interface to a set of peripheral CSR registers that implements address-based
  65     multiplexing and atomic updates of wide registers.
  66
  67     Operation
  68     ---------
  69
  70     The CSR multiplexer splits each CSR register into chunks according to its data width. Each
  71     chunk is assigned an address, and the first chunk of each register always has the provided
  72     minimum alignment. This allows accessing CSRs of any size using any datapath width.
  73
  74     When the first chunk of a register is read, the value of a register is captured, and reads
  75     from subsequent chunks of the same register return the captured values. When any chunk except
  76     the last chunk of a register is written, the written value is captured; a write to the last
  77     chunk writes the captured value to the register. This allows atomically accessing CSRs larger
  78     than datapath width.
  79
  80     Reads to padding bytes return zeroes, and writes to padding bytes are ignored.
  81
  82     Writes are registered, and add 1 cycle of latency.
  83
  84     Wide registers
  85     --------------
  86
  87     Because the CSR bus conserves logic and routing resources, it is common to e.g. access
  88     a CSR bus with an *n*-bit data path from a CPU with a *k*-bit datapath (*k>n*) in cases
  89     where CSR access latency is less important than resource usage. In this case, two strategies
  90     are possible for connecting the CSR bus to the CPU:
  91         * The CPU could access the CSR bus directly (with no intervening logic other than simple
  92           translation of control signals). In this case, the register alignment should be set
  93           to 1, and each *w*-bit register would occupy *ceil(w/n)* addresses from the CPU
  94           perspective, requiring the same amount of memory instructions to access.
  95         * The CPU could also access the CSR bus through a width down-converter, which would issue
  96           *k/n* CSR accesses for each CPU access. In this case, the register alignment should be
  97           set to *k/n*, and each *w*-bit register would occupy *ceil(w/k)* addresses from the CPU
  98           perspective, requiring the same amount of memory instructions to access.
  99
 100     If alignment is greater than 1, it affects which CSR bus write is considered a write to
 101     the last register chunk. For example, if a 24-bit register is used with a 8-bit CSR bus and
 102     a CPU with a 32-bit datapath, a write to this register requires 4 CSR bus writes to complete
 103     and the 4th write is the one that actually writes the value to the register. This allows
 104     determining write latency solely from the amount of addresses the register occupies in
 105     the CPU address space, and the width of the CSR bus.
 106
 107     Parameters
 108     ----------
 109     addr_width : int
 110         Address width. At most ``(2 ** addr_width) * data_width`` register bits will be available.
 111     data_width : int
 112         Data width. Registers are accessed in ``data_width`` sized chunks.
 113     alignment : int
 114         Register alignment. The address assigned to each register will be a multiple of
 115         ``2 ** alignment``.
 116
 117     Attributes
 118     ----------
 119     addr : Signal(addr_width)
 120         Address for reads and writes.
 121     r_data : Signal(data_width)
 122         Read data. Valid on the next cycle after ``r_stb`` is asserted.
 123     r_stb : Signal()
 124         Read strobe. If ``addr`` points to the first chunk of a register, captures register value
 125         and causes read side effects to be performed (if any). If ``addr`` points to any chunk
 126         of a register, latches the captured value to ``r_data``. Otherwise, latches zero
 127         to ``r_data``.
 128     w_data : Signal(data_width)
 129         Write data. Must be valid when ``w_stb`` is asserted.
 130     w_stb : Signal()
 131         Write strobe. If ``addr`` points to the last chunk of a register, writes captured value
 132         to the register and causes write side effects to be performed (if any). If ``addr`` points
 133         to any chunk of a register, latches ``w_data`` to the captured value. Otherwise, does
 134         nothing.
 135     """
 136     def __init__(self, *, addr_width, data_width, alignment=0):
 137         if not isinstance(addr_width, int) or addr_width <= 0:
 138             raise ValueError("Address width must be a positive integer, not {!r}"
 139                              .format(addr_width))
 140         if not isinstance(data_width, int) or data_width <= 0:
 141             raise ValueError("Data width must be a positive integer, not {!r}"
 142                              .format(data_width))
 143         if not isinstance(alignment, int) or alignment < 0:
 144             raise ValueError("Alignment must be a non-negative integer, not {!r}"
 145                              .format(alignment))
 146
 147         self.addr_width = int(addr_width)
 148         self.data_width = int(data_width)
 149         self.alignment  = alignment
 150
 151         self._next_addr = 0
 152         self._elements  = dict()
 153
 154         self.addr   = Signal(addr_width)
 155         self.r_data = Signal(data_width)
 156         self.r_stb  = Signal()
 157         self.w_data = Signal(data_width)
 158         self.w_stb  = Signal()
 159
 160     def add(self, element):
 161         """Add a register.
 162
 163         Arguments
 164         ---------
 165         element : CSRElement
 166             Interface of the register.
 167
 168         Return value
 169         ------------
 170         An ``(addr, size)`` tuple, where ``addr`` is the address assigned to the first chunk of
 171         the register, and ``size`` is the amount of chunks it takes, which may be greater than
 172         ``element.size // self.data_width`` due to alignment.
 173         """
 174         if not isinstance(element, CSRElement):
 175             raise TypeError("Element must be an instance of CSRElement, not {!r}"
 176                             .format(element))
 177
 178         addr = self.align_to(self.alignment)
 179         self._next_addr += (element.width + self.data_width - 1) // self.data_width
 180         size = self.align_to(self.alignment) - addr
 181         self._elements[addr] = element, size
 182         return addr, size
 183
 184     def align_to(self, alignment):
 185         """Align the next register explicitly.
 186
 187         Arguments
 188         ---------
 189         alignment : int
 190             Register alignment. The address assigned to the next register will be a multiple of
 191             ``2 ** alignment`` or ``2 ** self.alignment``, whichever is greater.
 192
 193         Return value
 194         ------------
 195         Address of the next register.
 196         """
 197         if not isinstance(alignment, int) or alignment < 0:
 198             raise ValueError("Alignment must be a non-negative integer, not {!r}"
 199                              .format(alignment))
 200
 201         align_chunks = 1 << alignment
 202         if self._next_addr % align_chunks != 0:
 203             self._next_addr += align_chunks - (self._next_addr % align_chunks)
 204         return self._next_addr
 205
 206     def elaborate(self, platform):
 207         m = Module()
 208
 209         # Instead of a straightforward multiplexer for reads, use a per-element address comparator,
 210         # AND the shadow register chunk with the comparator output, and OR all of those together.
 211         # If the toolchain doesn't already synthesize multiplexer trees this way, this trick can
 212         # save a significant amount of logic, since e.g. one 4-LUT can pack one 2-MUX, but two
 213         # 2-AND or 2-OR gates.
 214         r_data_fanin = 0
 215
 216         for elem_addr, (elem, elem_size) in self._elements.items():
 217             shadow = Signal(elem.width, name="{}__shadow".format(elem.name))
 218             if "w" in elem.access:
 219                 m.d.comb += elem.w_data.eq(shadow)
 220
 221             # Enumerate every address used by the register explicitly, rather than using
 222             # arithmetic comparisons, since some toolchains (e.g. Yosys) are too eager to infer
 223             # carry chains for comparisons, even with a constant. (Register sizes don't have
 224             # to be powers of 2.)
 225             with m.Switch(self.addr):
 226                 for chunk_offset in range(elem_size):
 227                     chunk_slice = slice(chunk_offset * self.data_width,
 228                                         (chunk_offset + 1) * self.data_width)
 229                     with m.Case(elem_addr + chunk_offset):
 230                         if "r" in elem.access:
 231                             chunk_r_stb = Signal(self.data_width,
 232                                 name="{}__r_stb_{}".format(elem.name, chunk_offset))
 233                             r_data_fanin |= Mux(chunk_r_stb, shadow[chunk_slice], 0)
 234                             if chunk_offset == 0:
 235                                 m.d.comb += elem.r_stb.eq(self.r_stb)
 236                                 with m.If(self.r_stb):
 237                                     m.d.sync += shadow.eq(elem.r_data)
 238                             # Delay by 1 cycle, allowing reads to be pipelined.
 239                             m.d.sync += chunk_r_stb.eq(self.r_stb)
 240
 241                         if "w" in elem.access:
 242                             if chunk_offset == elem_size - 1:
 243                                 # Delay by 1 cycle, avoiding combinatorial paths through
 244                                 # the CSR bus and into CSR registers.
 245                                 m.d.sync += elem.w_stb.eq(self.w_stb)
 246                             with m.If(self.w_stb):
 247                                 m.d.sync += shadow[chunk_slice].eq(self.w_data)
 248
 249                 with m.Default():
 250                     m.d.sync += shadow.eq(0)
 251
 252         m.d.comb += self.r_data.eq(r_data_fanin)
 253
 254         return m