csr.bus: improve comments/docs. NFC.
[nmigen-soc.git] / nmigen_soc / csr / bus.py
1 from functools import reduce
2 from nmigen import *
3 from nmigen import tracer
4
5
6 __all__ = ["CSRElement", "CSRMultiplexer"]
7
8
9 class CSRElement(Record):
10 """Peripheral-side CSR interface.
11
12 A low-level interface to a single atomically readable and writable register in a peripheral.
13 This interface supports any register width and semantics, provided that both reads and writes
14 always succeed and complete in one cycle.
15
16 Parameters
17 ----------
18 width : int
19 Width of the register.
20 name : str
21 Name of the underlying record.
22
23 Attributes
24 ----------
25 r_data : Signal(width)
26 Read data. Must be always valid, and is sampled when ``r_stb`` is asserted.
27 r_stb : Signal()
28 Read strobe. Registers with read side effects should perform the read side effect when this
29 strobe is asserted.
30 w_data : Signal(width)
31 Write data. Valid only when ``w_stb`` is asserted.
32 w_stb : Signal()
33 Write strobe. Registers should update their value or perform the write side effect when
34 this strobe is asserted.
35 """
36 def __init__(self, width, access, *, name=None, src_loc_at=0):
37 if not isinstance(width, int) or width < 0:
38 raise ValueError("Width must be a non-negative integer, not {!r}"
39 .format(width))
40 if access not in ("r", "w", "rw"):
41 raise ValueError("Access mode must be one of \"r\", \"w\", or \"rw\", not {!r}"
42 .format(access))
43
44 self.width = int(width)
45 self.access = access
46
47 layout = []
48 if "r" in self.access:
49 layout += [
50 ("r_data", width),
51 ("r_stb", 1),
52 ]
53 if "w" in self.access:
54 layout += [
55 ("w_data", width),
56 ("w_stb", 1),
57 ]
58 super().__init__(layout, name=name, src_loc_at=1)
59
60
61 class CSRMultiplexer(Elaboratable):
62 """CPU-side CSR interface.
63
64 A low-level interface to a set of peripheral CSR registers that implements address-based
65 multiplexing and atomic updates of wide registers.
66
67 Operation
68 ---------
69
70 The CSR multiplexer splits each CSR register into chunks according to its data width. Each
71 chunk is assigned an address, and the first chunk of each register always has the provided
72 minimum alignment. This allows accessing CSRs of any size using any datapath width.
73
74 When the first chunk of a register is read, the value of a register is captured, and reads
75 from subsequent chunks of the same register return the captured values. When any chunk except
76 the last chunk of a register is written, the written value is captured; a write to the last
77 chunk writes the captured value to the register. This allows atomically accessing CSRs larger
78 than datapath width.
79
80 Reads to padding bytes return zeroes, and writes to padding bytes are ignored.
81
82 Writes are registered, and add 1 cycle of latency.
83
84 Wide registers
85 --------------
86
87 Because the CSR bus conserves logic and routing resources, it is common to e.g. access
88 a CSR bus with an *n*-bit data path from a CPU with a *k*-bit datapath (*k>n*) in cases
89 where CSR access latency is less important than resource usage. In this case, two strategies
90 are possible for connecting the CSR bus to the CPU:
91 * The CPU could access the CSR bus directly (with no intervening logic other than simple
92 translation of control signals). In this case, the register alignment should be set
93 to 1, and each *w*-bit register would occupy *ceil(w/n)* addresses from the CPU
94 perspective, requiring the same amount of memory instructions to access.
95 * The CPU could also access the CSR bus through a width down-converter, which would issue
96 *k/n* CSR accesses for each CPU access. In this case, the register alignment should be
97 set to *k/n*, and each *w*-bit register would occupy *ceil(w/k)* addresses from the CPU
98 perspective, requiring the same amount of memory instructions to access.
99
100 If alignment is greater than 1, it affects which CSR bus write is considered a write to
101 the last register chunk. For example, if a 24-bit register is used with a 8-bit CSR bus and
102 a CPU with a 32-bit datapath, a write to this register requires 4 CSR bus writes to complete
103 and the 4th write is the one that actually writes the value to the register. This allows
104 determining write latency solely from the amount of addresses the register occupies in
105 the CPU address space, and the width of the CSR bus.
106
107 Parameters
108 ----------
109 addr_width : int
110 Address width. At most ``(2 ** addr_width) * data_width`` register bits will be available.
111 data_width : int
112 Data width. Registers are accessed in ``data_width`` sized chunks.
113 alignment : int
114 Register alignment. The address assigned to each register will be a multiple of
115 ``2 ** alignment``.
116
117 Attributes
118 ----------
119 addr : Signal(addr_width)
120 Address for reads and writes.
121 r_data : Signal(data_width)
122 Read data. Valid on the next cycle after ``r_stb`` is asserted.
123 r_stb : Signal()
124 Read strobe. If ``addr`` points to the first chunk of a register, captures register value
125 and causes read side effects to be performed (if any). If ``addr`` points to any chunk
126 of a register, latches the captured value to ``r_data``. Otherwise, latches zero
127 to ``r_data``.
128 w_data : Signal(data_width)
129 Write data. Must be valid when ``w_stb`` is asserted.
130 w_stb : Signal()
131 Write strobe. If ``addr`` points to the last chunk of a register, writes captured value
132 to the register and causes write side effects to be performed (if any). If ``addr`` points
133 to any chunk of a register, latches ``w_data`` to the captured value. Otherwise, does
134 nothing.
135 """
136 def __init__(self, *, addr_width, data_width, alignment=0):
137 if not isinstance(addr_width, int) or addr_width <= 0:
138 raise ValueError("Address width must be a positive integer, not {!r}"
139 .format(addr_width))
140 if not isinstance(data_width, int) or data_width <= 0:
141 raise ValueError("Data width must be a positive integer, not {!r}"
142 .format(data_width))
143 if not isinstance(alignment, int) or alignment < 0:
144 raise ValueError("Alignment must be a non-negative integer, not {!r}"
145 .format(alignment))
146
147 self.addr_width = int(addr_width)
148 self.data_width = int(data_width)
149 self.alignment = alignment
150
151 self._next_addr = 0
152 self._elements = dict()
153
154 self.addr = Signal(addr_width)
155 self.r_data = Signal(data_width)
156 self.r_stb = Signal()
157 self.w_data = Signal(data_width)
158 self.w_stb = Signal()
159
160 def add(self, element):
161 """Add a register.
162
163 Arguments
164 ---------
165 element : CSRElement
166 Interface of the register.
167
168 Return value
169 ------------
170 An ``(addr, size)`` tuple, where ``addr`` is the address assigned to the first chunk of
171 the register, and ``size`` is the amount of chunks it takes, which may be greater than
172 ``element.size // self.data_width`` due to alignment.
173 """
174 if not isinstance(element, CSRElement):
175 raise TypeError("Element must be an instance of CSRElement, not {!r}"
176 .format(element))
177
178 addr = self.align_to(self.alignment)
179 self._next_addr += (element.width + self.data_width - 1) // self.data_width
180 size = self.align_to(self.alignment) - addr
181 self._elements[addr] = element, size
182 return addr, size
183
184 def align_to(self, alignment):
185 """Align the next register explicitly.
186
187 Arguments
188 ---------
189 alignment : int
190 Register alignment. The address assigned to the next register will be a multiple of
191 ``2 ** alignment`` or ``2 ** self.alignment``, whichever is greater.
192
193 Return value
194 ------------
195 Address of the next register.
196 """
197 if not isinstance(alignment, int) or alignment < 0:
198 raise ValueError("Alignment must be a non-negative integer, not {!r}"
199 .format(alignment))
200
201 align_chunks = 1 << alignment
202 if self._next_addr % align_chunks != 0:
203 self._next_addr += align_chunks - (self._next_addr % align_chunks)
204 return self._next_addr
205
206 def elaborate(self, platform):
207 m = Module()
208
209 # Instead of a straightforward multiplexer for reads, use a per-element address comparator,
210 # AND the shadow register chunk with the comparator output, and OR all of those together.
211 # If the toolchain doesn't already synthesize multiplexer trees this way, this trick can
212 # save a significant amount of logic, since e.g. one 4-LUT can pack one 2-MUX, but two
213 # 2-AND or 2-OR gates.
214 r_data_fanin = 0
215
216 for elem_addr, (elem, elem_size) in self._elements.items():
217 shadow = Signal(elem.width, name="{}__shadow".format(elem.name))
218 if "w" in elem.access:
219 m.d.comb += elem.w_data.eq(shadow)
220
221 # Enumerate every address used by the register explicitly, rather than using
222 # arithmetic comparisons, since some toolchains (e.g. Yosys) are too eager to infer
223 # carry chains for comparisons, even with a constant. (Register sizes don't have
224 # to be powers of 2.)
225 with m.Switch(self.addr):
226 for chunk_offset in range(elem_size):
227 chunk_slice = slice(chunk_offset * self.data_width,
228 (chunk_offset + 1) * self.data_width)
229 with m.Case(elem_addr + chunk_offset):
230 if "r" in elem.access:
231 chunk_r_stb = Signal(self.data_width,
232 name="{}__r_stb_{}".format(elem.name, chunk_offset))
233 r_data_fanin |= Mux(chunk_r_stb, shadow[chunk_slice], 0)
234 if chunk_offset == 0:
235 m.d.comb += elem.r_stb.eq(self.r_stb)
236 with m.If(self.r_stb):
237 m.d.sync += shadow.eq(elem.r_data)
238 # Delay by 1 cycle, allowing reads to be pipelined.
239 m.d.sync += chunk_r_stb.eq(self.r_stb)
240
241 if "w" in elem.access:
242 if chunk_offset == elem_size - 1:
243 # Delay by 1 cycle, avoiding combinatorial paths through
244 # the CSR bus and into CSR registers.
245 m.d.sync += elem.w_stb.eq(self.w_stb)
246 with m.If(self.w_stb):
247 m.d.sync += shadow[chunk_slice].eq(self.w_data)
248
249 with m.Default():
250 m.d.sync += shadow.eq(0)
251
252 m.d.comb += self.r_data.eq(r_data_fanin)
253
254 return m