speed up ==, hash, <, >, <=, and >= for plain_data
[nmutil.git] / src / nmutil / concurrentunit.py
1 # SPDX-License-Identifier: LGPL-3-or-later
2 """ concurrent unit from mitch alsup augmentations to 6600 scoreboard
3
4 This work is funded through NLnet under Grant 2019-02-012
5
6 License: LGPLv3+
7
8
9 * data fans in
10 * data goes through a pipeline
11 * results fan back out.
12
13 the output data format has to have a member "muxid", which is used
14 as the array index on fan-out
15
16 Associated bugreports:
17
18 * https://bugs.libre-soc.org/show_bug.cgi?id=538
19 """
20
21 from math import log
22 from nmigen import Module, Elaboratable, Signal, Cat
23 from nmigen.asserts import Assert
24 from nmigen.lib.coding import PriorityEncoder
25 from nmigen.cli import main, verilog
26
27 from nmutil.singlepipe import PassThroughStage
28 from nmutil.multipipe import CombMuxOutPipe
29 from nmutil.multipipe import PriorityCombMuxInPipe
30 from nmutil.iocontrol import NextControl, PrevControl
31 from nmutil import nmoperator
32
33
34 def num_bits(n):
35 return int(log(n) / log(2))
36
37
38 class PipeContext:
39
40 def __init__(self, pspec):
41 """ creates a pipeline context. currently: operator (op) and muxid
42
43 opkls (within pspec) - the class to create that will be the
44 "operator". instance must have an "eq"
45 function.
46 """
47 self.id_wid = pspec.id_wid
48 self.op_wid = pspec.op_wid
49 self.muxid = Signal(self.id_wid, reset_less=True) # RS multiplex ID
50 opkls = pspec.opkls
51 if opkls is None:
52 self.op = Signal(self.op_wid, reset_less=True)
53 else:
54 self.op = opkls(pspec)
55
56 def eq(self, i):
57 ret = [self.muxid.eq(i.muxid)]
58 ret.append(self.op.eq(i.op))
59 # don't forget to update matches if you add fields later.
60 return ret
61
62 def matches(self, another):
63 """
64 Returns a list of Assert()s validating that this context
65 matches the other context.
66 """
67 # I couldn't figure a clean way of overloading the == operator.
68 return [
69 Assert(self.muxid == another.muxid),
70 Assert(self.op == another.op),
71 ]
72
73 def __iter__(self):
74 yield self.muxid
75 yield self.op
76
77 def ports(self):
78 if hasattr(self.op, "ports"):
79 return [self.muxid] + self.op.ports()
80 else:
81 return list(self)
82
83
84 class InMuxPipe(PriorityCombMuxInPipe):
85 def __init__(self, num_rows, iospecfn, maskwid=0):
86 self.num_rows = num_rows
87 stage = PassThroughStage(iospecfn)
88 PriorityCombMuxInPipe.__init__(self, stage, p_len=self.num_rows,
89 maskwid=maskwid)
90
91
92 class MuxOutPipe(CombMuxOutPipe):
93 def __init__(self, num_rows, iospecfn, maskwid=0):
94 self.num_rows = num_rows
95 stage = PassThroughStage(iospecfn)
96 CombMuxOutPipe.__init__(self, stage, n_len=self.num_rows,
97 maskwid=maskwid)
98
99
100 class ALUProxy:
101 """ALUProxy: create a series of ALUs that look like the ALU being
102 sandwiched in between the fan-in and fan-out. One ALU looks like
103 it is multiple concurrent ALUs
104 """
105
106 def __init__(self, alu, p, n):
107 self.alu = alu
108 self.p = p
109 self.n = n
110
111
112 class ReservationStations(Elaboratable):
113 """ Reservation-Station pipeline
114
115 Input: num_rows - number of input and output Reservation Stations
116
117 Requires: the addition of an "alu" object, from which ispec and ospec
118 are taken, and inpipe and outpipe are connected to it
119
120 * fan-in on inputs (an array of BaseData: a,b,mid)
121 * ALU pipeline
122 * fan-out on outputs (an array of FPPackData: z,mid)
123
124 Fan-in and Fan-out are combinatorial.
125 """
126
127 def __init__(self, num_rows, maskwid=0, feedback_width=None):
128 self.num_rows = nr = num_rows
129 self.feedback_width = feedback_width
130 self.inpipe = InMuxPipe(nr, self.i_specfn, maskwid) # fan-in
131 self.outpipe = MuxOutPipe(nr, self.o_specfn, maskwid) # fan-out
132
133 self.p = self.inpipe.p # kinda annoying,
134 self.n = self.outpipe.n # use pipe in/out as this class in/out
135 self._ports = self.inpipe.ports() + self.outpipe.ports()
136
137 def setup_pseudoalus(self):
138 """setup_pseudoalus: establishes a suite of pseudo-alus
139 that look to all pipeline-intents-and-purposes just like the original
140 """
141 self.pseudoalus = []
142 for i in range(self.num_rows):
143 self.pseudoalus.append(ALUProxy(self.alu, self.p[i], self.n[i]))
144
145 def elaborate(self, platform):
146 m = Module()
147 m.submodules.inpipe = self.inpipe
148 m.submodules.alu = self.alu
149 m.submodules.outpipe = self.outpipe
150
151 m.d.comb += self.inpipe.n.connect_to_next(self.alu.p)
152 m.d.comb += self.alu.connect_to_next(self.outpipe)
153
154 if self.feedback_width is None:
155 return m
156
157 # connect all outputs above the feedback width back to their inputs
158 # (hence, feedback). pipeline stages are then expected to *modify*
159 # the muxid (with care) in order to use the "upper numbered" RSes
160 # for storing partially-completed results. micro-coding, basically
161
162 for i in range(self.feedback_width, self.num_rows):
163 self.outpipe.n[i].connect_to_next(self.inpipe.p[i])
164
165 return m
166
167 def ports(self):
168 return self._ports
169
170 def i_specfn(self):
171 return self.alu.ispec()
172
173 def o_specfn(self):
174 return self.alu.ospec()
175
176
177 class ReservationStations2(Elaboratable):
178 """ Reservation-Station pipeline. Manages an ALU and makes it look like
179 there are multiple of them, presenting the same ready/valid API
180
181 Input:
182
183 :alu: - an ALU to be "managed" by these ReservationStations
184 :num_rows: - number of input and output Reservation Stations
185
186 Note that the ALU data (in and out specs) right the way down the
187 entire chain *must* have a "muxid" data member. this is picked
188 up and used to route data correctly from input RS to output RS.
189
190 It is the responsibility of the USER of the ReservationStations
191 class to correctly set that muxid in each data packet to the
192 correct constant. this could change in future.
193
194 FAILING TO SET THE MUXID IS GUARANTEED TO RESULT IN CORRUPTED DATA.
195 """
196
197 def __init__(self, alu, num_rows, alu_name=None):
198 if alu_name is None:
199 alu_name = "alu"
200 self.num_rows = nr = num_rows
201 id_wid = num_rows.bit_length()
202 self.p = []
203 self.n = []
204 self.alu = alu
205 self.alu_name = alu_name
206 # create prev and next ready/valid and add replica of ALU data specs
207 for i in range(num_rows):
208 suffix = "_%d" % i
209 p = PrevControl(name=suffix)
210 n = NextControl(name=suffix)
211 p.i_data, n.o_data = self.alu.new_specs("rs_%d" % i)
212 self.p.append(p)
213 self.n.append(n)
214
215 self.pipe = self # for Arbiter to select the incoming prevcontrols
216
217 # set up pseudo-alus that look like a standard pipeline
218 self.pseudoalus = []
219 for i in range(self.num_rows):
220 self.pseudoalus.append(ALUProxy(self.alu, self.p[i], self.n[i]))
221
222 def __iter__(self):
223 for p in self.p:
224 yield from p
225 for n in self.n:
226 yield from n
227
228 def ports(self):
229 return list(self)
230
231 def elaborate(self, platform):
232 m = Module()
233 pe = PriorityEncoder(self.num_rows) # input priority picker
234 m.submodules[self.alu_name] = self.alu
235 m.submodules.selector = pe
236 for i, (p, n) in enumerate(zip(self.p, self.n)):
237 m.submodules["rs_p_%d" % i] = p
238 m.submodules["rs_n_%d" % i] = n
239
240 # Priority picker for one RS
241 self.active = Signal()
242 self.m_id = Signal.like(pe.o)
243
244 # ReservationStation status information, progressively updated in FSM
245 rsvd = Signal(self.num_rows) # indicates RS data in flight
246 sent = Signal(self.num_rows) # sent indicates data in pipeline
247 wait = Signal(self.num_rows) # the outputs are waiting for accept
248
249 # pick first non-reserved ReservationStation with data not already
250 # sent into the ALU
251 m.d.comb += pe.i.eq(rsvd & ~sent)
252 m.d.comb += self.active.eq(~pe.n) # encoder active (one input valid)
253 m.d.comb += self.m_id.eq(pe.o) # output one active input
254
255 # mux in and mux out ids. note that all data *must* have a muxid
256 mid = self.m_id # input mux selector
257 o_muxid = self.alu.n.o_data.muxid # output mux selector
258
259 # technically speaking this could be set permanently "HI".
260 # when all the ReservationStations outputs are waiting,
261 # the ALU cannot obviously accept any more data. as the
262 # ALU is effectively "decoupled" from (managed by) the RSes,
263 # as long as there is sufficient RS allocation this should not
264 # be necessary, i.e. at no time should the ALU be given more inputs
265 # than there are outputs to accept (!) but just in case...
266 m.d.comb += self.alu.n.i_ready.eq(~wait.all())
267
268 #####
269 # input side
270 #####
271
272 # first, establish input: select one input to pass data to (p_mux)
273 for i in range(self.num_rows):
274 i_buf, o_buf = self.alu.new_specs("buf%d" % i) # buffers
275 with m.FSM():
276 # indicate ready to accept data, and accept it if incoming
277 # BUT, if there is an opportunity to send on immediately
278 # to the ALU, take it early (combinatorial)
279 with m.State("ACCEPTING%d" % i):
280 m.d.comb += self.p[i].o_ready.eq(1) # ready indicator
281 with m.If(self.p[i].i_valid): # valid data incoming
282 m.d.sync += rsvd[i].eq(1) # now reserved
283 # a unique opportunity: the ALU happens to be free
284 with m.If(mid == i): # picker selected us
285 with m.If(self.alu.p.o_ready): # ALU can accept
286 # transfer
287 m.d.comb += self.alu.p.i_valid.eq(1)
288 m.d.comb += nmoperator.eq(self.alu.p.i_data,
289 self.p[i].i_data)
290 m.d.sync += sent[i].eq(1) # now reserved
291 m.next = "WAITOUT%d" % i # move to "wait output"
292 with m.Else():
293 # nope. ALU wasn't free. try next cycle(s)
294 m.d.sync += nmoperator.eq(i_buf, self.p[i].i_data)
295 m.next = "ACCEPTED%d" % i # move to "accepted"
296
297 # now try to deliver to the ALU, but only if we are "picked"
298 with m.State("ACCEPTED%d" % i):
299 with m.If(mid == i): # picker selected us
300 with m.If(self.alu.p.o_ready): # ALU can accept
301 m.d.comb += self.alu.p.i_valid.eq(1) # transfer
302 m.d.comb += nmoperator.eq(self.alu.p.i_data, i_buf)
303 m.d.sync += sent[i].eq(1) # now reserved
304 m.next = "WAITOUT%d" % i # move to "wait output"
305
306 # waiting for output to appear on the ALU, take a copy
307 # BUT, again, if there is an opportunity to send on
308 # immediately, take it (combinatorial)
309 with m.State("WAITOUT%d" % i):
310 with m.If(o_muxid == i): # when ALU output matches our RS
311 with m.If(self.alu.n.o_valid): # ALU can accept
312 # second unique opportunity: the RS is ready
313 with m.If(self.n[i].i_ready): # ready to receive
314 m.d.comb += self.n[i].o_valid.eq(1) # valid
315 m.d.comb += nmoperator.eq(self.n[i].o_data,
316 self.alu.n.o_data)
317 m.d.sync += wait[i].eq(0) # clear waiting
318 m.d.sync += sent[i].eq(0) # and sending
319 m.d.sync += rsvd[i].eq(0) # and reserved
320 m.next = "ACCEPTING%d" % i # back to "accepting"
321 with m.Else():
322 # nope. RS wasn't ready. try next cycles
323 m.d.sync += wait[i].eq(1) # now waiting
324 m.d.sync += nmoperator.eq(o_buf,
325 self.alu.n.o_data)
326 m.next = "SENDON%d" % i # move to "send data on"
327
328 # waiting for "valid" indicator on RS output: deliver it
329 with m.State("SENDON%d" % i):
330 with m.If(self.n[i].i_ready): # user is ready to receive
331 m.d.comb += self.n[i].o_valid.eq(1) # indicate valid
332 m.d.comb += nmoperator.eq(self.n[i].o_data, o_buf)
333 m.d.sync += wait[i].eq(0) # clear waiting
334 m.d.sync += sent[i].eq(0) # and sending
335 m.d.sync += rsvd[i].eq(0) # and reserved
336 m.next = "ACCEPTING%d" % i # and back to "accepting"
337
338 return m