1 from functools
import reduce
3 from nmigen
import tracer
6 __all__
= ["CSRElement", "CSRMultiplexer"]
9 class CSRElement(Record
):
10 """Peripheral-side CSR interface.
12 A low-level interface to a single atomically readable and writable register in a peripheral.
13 This interface supports any register width and semantics, provided that both reads and writes
14 always succeed and complete in one cycle.
19 Width of the register.
21 Name of the underlying record.
25 r_data : Signal(width)
26 Read data. Must be always valid, and is sampled when ``r_stb`` is asserted.
28 Read strobe. Registers with read side effects should perform the read side effect when this
30 w_data : Signal(width)
31 Write data. Valid only when ``w_stb`` is asserted.
33 Write strobe. Registers should update their value or perform the write side effect when
34 this strobe is asserted.
36 def __init__(self
, width
, access
, *, name
=None, src_loc_at
=0):
37 if not isinstance(width
, int) or width
< 0:
38 raise ValueError("Width must be a non-negative integer, not {!r}"
40 if access
not in ("r", "w", "rw"):
41 raise ValueError("Access mode must be one of \"r\", \"w\", or \"rw\", not {!r}"
44 self
.width
= int(width
)
48 if "r" in self
.access
:
53 if "w" in self
.access
:
58 super().__init
__(layout
, name
=name
, src_loc_at
=1)
61 class CSRMultiplexer(Elaboratable
):
62 """CPU-side CSR interface.
64 A low-level interface to a set of peripheral CSR registers that implements address-based
65 multiplexing and atomic updates of wide registers.
70 The CSR multiplexer splits each CSR register into chunks according to its data width. Each
71 chunk is assigned an address, and the first chunk of each register always has the provided
72 minimum alignment. This allows accessing CSRs of any size using any datapath width.
74 When the first chunk of a register is read, the value of a register is captured, and reads
75 from subsequent chunks of the same register return the captured values. When any chunk except
76 the last chunk of a register is written, the written value is captured; a write to the last
77 chunk writes the captured value to the register. This allows atomically accessing CSRs larger
80 Reads to padding bytes return zeroes, and writes to padding bytes are ignored.
82 Writes are registered, and add 1 cycle of latency.
87 Because the CSR bus conserves logic and routing resources, it is common to e.g. access
88 a CSR bus with an *n*-bit data path from a CPU with a *k*-bit datapath (*k>n*) in cases
89 where CSR access latency is less important than resource usage. In this case, two strategies
90 are possible for connecting the CSR bus to the CPU:
91 * The CPU could access the CSR bus directly (with no intervening logic other than simple
92 translation of control signals). In this case, the register alignment should be set
93 to 1, and each *w*-bit register would occupy *ceil(w/n)* addresses from the CPU
94 perspective, requiring the same amount of memory instructions to access.
95 * The CPU could also access the CSR bus through a width down-converter, which would issue
96 *k/n* CSR accesses for each CPU access. In this case, the register alignment should be
97 set to *k/n*, and each *w*-bit register would occupy *ceil(w/k)* addresses from the CPU
98 perspective, requiring the same amount of memory instructions to access.
100 If alignment is greater than 1, it affects which CSR bus write is considered a write to
101 the last register chunk. For example, if a 24-bit register is used with a 8-bit CSR bus and
102 a CPU with a 32-bit datapath, a write to this register requires 4 CSR bus writes to complete
103 and the 4th write is the one that actually writes the value to the register. This allows
104 determining write latency solely from the amount of addresses the register occupies in
105 the CPU address space, and the width of the CSR bus.
110 Address width. At most ``(2 ** addr_width) * data_width`` register bits will be available.
112 Data width. Registers are accessed in ``data_width`` sized chunks.
114 Register alignment. The address assigned to each register will be a multiple of
119 addr : Signal(addr_width)
120 Address for reads and writes.
121 r_data : Signal(data_width)
122 Read data. Valid on the next cycle after ``r_stb`` is asserted.
124 Read strobe. If ``addr`` points to the first chunk of a register, captures register value
125 and causes read side effects to be performed (if any). If ``addr`` points to any chunk
126 of a register, latches the captured value to ``r_data``. Otherwise, latches zero
128 w_data : Signal(data_width)
129 Write data. Must be valid when ``w_stb`` is asserted.
131 Write strobe. If ``addr`` points to the last chunk of a register, writes captured value
132 to the register and causes write side effects to be performed (if any). If ``addr`` points
133 to any chunk of a register, latches ``w_data`` to the captured value. Otherwise, does
136 def __init__(self
, *, addr_width
, data_width
, alignment
=0):
137 if not isinstance(addr_width
, int) or addr_width
<= 0:
138 raise ValueError("Address width must be a positive integer, not {!r}"
140 if not isinstance(data_width
, int) or data_width
<= 0:
141 raise ValueError("Data width must be a positive integer, not {!r}"
143 if not isinstance(alignment
, int) or alignment
< 0:
144 raise ValueError("Alignment must be a non-negative integer, not {!r}"
147 self
.addr_width
= int(addr_width
)
148 self
.data_width
= int(data_width
)
149 self
.alignment
= alignment
152 self
._elements
= dict()
154 self
.addr
= Signal(addr_width
)
155 self
.r_data
= Signal(data_width
)
156 self
.r_stb
= Signal()
157 self
.w_data
= Signal(data_width
)
158 self
.w_stb
= Signal()
160 def add(self
, element
):
166 Interface of the register.
170 An ``(addr, size)`` tuple, where ``addr`` is the address assigned to the first chunk of
171 the register, and ``size`` is the amount of chunks it takes, which may be greater than
172 ``element.size // self.data_width`` due to alignment.
174 if not isinstance(element
, CSRElement
):
175 raise TypeError("Element must be an instance of CSRElement, not {!r}"
178 addr
= self
.align_to(self
.alignment
)
179 self
._next
_addr
+= (element
.width
+ self
.data_width
- 1) // self
.data_width
180 size
= self
.align_to(self
.alignment
) - addr
181 self
._elements
[addr
] = element
, size
184 def align_to(self
, alignment
):
185 """Align the next register explicitly.
190 Register alignment. The address assigned to the next register will be a multiple of
191 ``2 ** alignment`` or ``2 ** self.alignment``, whichever is greater.
195 Address of the next register.
197 if not isinstance(alignment
, int) or alignment
< 0:
198 raise ValueError("Alignment must be a non-negative integer, not {!r}"
201 align_chunks
= 1 << alignment
202 if self
._next
_addr
% align_chunks
!= 0:
203 self
._next
_addr
+= align_chunks
- (self
._next
_addr
% align_chunks
)
204 return self
._next
_addr
206 def elaborate(self
, platform
):
209 # Instead of a straightforward multiplexer for reads, use a per-element address comparator,
210 # AND the shadow register chunk with the comparator output, and OR all of those together.
211 # If the toolchain doesn't already synthesize multiplexer trees this way, this trick can
212 # save a significant amount of logic, since e.g. one 4-LUT can pack one 2-MUX, but two
213 # 2-AND or 2-OR gates.
216 for elem_addr
, (elem
, elem_size
) in self
._elements
.items():
217 shadow
= Signal(elem
.width
, name
="{}__shadow".format(elem
.name
))
218 if "w" in elem
.access
:
219 m
.d
.comb
+= elem
.w_data
.eq(shadow
)
221 # Enumerate every address used by the register explicitly, rather than using
222 # arithmetic comparisons, since some toolchains (e.g. Yosys) are too eager to infer
223 # carry chains for comparisons, even with a constant. (Register sizes don't have
224 # to be powers of 2.)
225 with m
.Switch(self
.addr
):
226 for chunk_offset
in range(elem_size
):
227 chunk_slice
= slice(chunk_offset
* self
.data_width
,
228 (chunk_offset
+ 1) * self
.data_width
)
229 with m
.Case(elem_addr
+ chunk_offset
):
230 if "r" in elem
.access
:
231 chunk_r_stb
= Signal(self
.data_width
,
232 name
="{}__r_stb_{}".format(elem
.name
, chunk_offset
))
233 r_data_fanin |
= Mux(chunk_r_stb
, shadow
[chunk_slice
], 0)
234 if chunk_offset
== 0:
235 m
.d
.comb
+= elem
.r_stb
.eq(self
.r_stb
)
236 with m
.If(self
.r_stb
):
237 m
.d
.sync
+= shadow
.eq(elem
.r_data
)
238 # Delay by 1 cycle, allowing reads to be pipelined.
239 m
.d
.sync
+= chunk_r_stb
.eq(self
.r_stb
)
241 if "w" in elem
.access
:
242 if chunk_offset
== elem_size
- 1:
243 # Delay by 1 cycle, avoiding combinatorial paths through
244 # the CSR bus and into CSR registers.
245 m
.d
.sync
+= elem
.w_stb
.eq(self
.w_stb
)
246 with m
.If(self
.w_stb
):
247 m
.d
.sync
+= shadow
[chunk_slice
].eq(self
.w_data
)
250 m
.d
.sync
+= shadow
.eq(0)
252 m
.d
.comb
+= self
.r_data
.eq(r_data_fanin
)