80148cb293b9d9a91a977423f06bc0a4d3da36c8
[soc.git] / src / soc / experiment / compalu_multi.py
1 from nmigen.compat.sim import run_simulation
2 from nmigen.cli import verilog, rtlil
3 from nmigen import Module, Signal, Mux, Elaboratable, Repl, Array, Record
4 from nmigen.hdl.rec import (DIR_FANIN, DIR_FANOUT)
5
6 from nmutil.latch import SRLatch, latchregister
7 from nmutil.iocontrol import RecordObject
8
9 from soc.decoder.power_decoder2 import Data
10 from soc.decoder.power_enums import InternalOp
11
12 from soc.fu.alu.alu_input_record import CompALUOpSubset
13
14 """ Computation Unit (aka "ALU Manager").
15
16 This module runs a "revolving door" set of three latches, based on
17 * Issue
18 * Go_Read
19 * Go_Write
20 where one of them cannot be set on any given cycle.
21 (Note however that opc_l has been inverted (and qn used), due to SRLatch
22 default reset state being "0" rather than "1")
23
24 * When issue is first raised, a busy signal is sent out.
25 The src1 and src2 registers and the operand can be latched in
26 at this point
27
28 * Read request is set, which is acknowledged through the Scoreboard
29 to the priority picker, which generates (one and only one) Go_Read
30 at a time. One of those will (eventually) be this Computation Unit.
31
32 * Once Go_Read is set, the src1/src2/operand latch door shuts (locking
33 src1/src2/operand in place), and the ALU is told to proceed.
34
35 * As this is currently a "demo" unit, a countdown timer is activated
36 to simulate an ALU "pipeline", which activates "write request release",
37 and the ALU's output is captured into a temporary register.
38
39 * Write request release will go through a similar process as Read request,
40 resulting (eventually) in Go_Write being asserted.
41
42 * When Go_Write is asserted, two things happen: (1) the data in the temp
43 register is placed combinatorially onto the output, and (2) the
44 req_l latch is cleared, busy is dropped, and the Comp Unit is back
45 through its revolving door to do another task.
46 """
47
48 def go_record(n, name):
49 r = Record([('go', n, DIR_FANIN),
50 ('rel', n, DIR_FANOUT)], name=name)
51 r.go.reset_less = True
52 r.rel.reset_less = True
53 return r
54
55
56 class CompUnitRecord(RecordObject):
57 """CompUnitRecord
58
59 base class for Computation Units, to provide a uniform API
60 and allow "record.connect" etc. to be used, particularly when
61 it comes to connecting multiple Computation Units up as a block
62 (very laborious)
63
64 LDSTCompUnitRecord should derive from this class and add the
65 additional signals it requires
66 """
67 def __init__(self, subkls, rwid, n_src, n_dst, name=None):
68 RecordObject.__init__(self, name)
69 self._n_src, self._n_dst = n_src, n_dst
70 self._rwid = rwid
71
72 src = []
73 for i in range(n_src):
74 j = i + 1 # name numbering to match src1/src2
75 name = "src%d_i" % j
76 sreg = Signal(rwid, name=name, reset_less=True)
77 setattr(self, name, sreg)
78 src.append(sreg)
79 self._src_i = src
80
81 dst = []
82 for i in range(n_dst):
83 j = i + 1 # name numbering to match dest1/2...
84 name = "dest%d_i" % j
85 dreg = Signal(rwid, name=name, reset_less=True)
86 setattr(self, name, dreg)
87 dst.append(dreg)
88 self._dest = dst
89
90 self.rd = go_record(n_src, name="rd") # read in, req out
91 self.wr = go_record(n_dst, name="wr") # write in, req out
92 self.issue_i = Signal(reset_less=True) # fn issue in
93 self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
94 self.go_die_i = Signal() # go die (reset)
95
96 # operation / data input
97 self.oper_i = subkls() # operand
98
99 # output (busy/done)
100 self.busy_o = Signal(reset_less=True) # fn busy out
101 self.done_o = Signal(reset_less=True)
102
103
104 class MultiCompUnit(Elaboratable):
105 def __init__(self, rwid, alu, n_src=2, n_dst=1):
106 self.n_src, self.n_dst = n_src, n_dst
107 self.rwid = rwid
108 self.alu = alu # actual ALU - set as a "submodule" of the CU
109 self.cu = cu = CompUnitRecord(CompALUOpSubset, rwid, n_src, n_dst)
110
111 for i in range(n_src):
112 j = i + 1 # name numbering to match src1/src2
113 name = "src%d_i" % j
114 setattr(self, name, getattr(cu, name))
115
116 for i in range(n_dst):
117 j = i + 1 # name numbering to match dest1/2...
118 name = "dest%d_i" % j
119 setattr(self, name, getattr(cu, name))
120
121 # convenience names
122 self.rd = cu.rd
123 self.wr = cu.wr
124 self.go_rd_i = self.rd.go # temporary naming
125 self.go_wr_i = self.wr.go # temporary naming
126 self.rd_rel_o = self.rd.rel # temporary naming
127 self.req_rel_o = self.wr.rel # temporary naming
128 self.issue_i = cu.issue_i
129 self.shadown_i = cu.shadown_i
130 self.go_die_i = cu.go_die_i
131
132 # operation / data input
133 self.oper_i = cu.oper_i
134 self.src_i = cu._src_i
135
136 self.busy_o = cu.busy_o
137 self.dest = cu._dest
138 self.data_o = self.dest[0] # Dest out
139 self.done_o = cu.done_o
140
141 def elaborate(self, platform):
142 m = Module()
143 m.submodules.alu = self.alu
144 m.submodules.src_l = src_l = SRLatch(False, self.n_src, name="src")
145 m.submodules.opc_l = opc_l = SRLatch(sync=False, name="opc")
146 m.submodules.req_l = req_l = SRLatch(False, self.n_dst, name="req")
147 m.submodules.rst_l = rst_l = SRLatch(sync=False, name="rst")
148 m.submodules.rok_l = rok_l = SRLatch(sync=False, name="rdok")
149
150 # ALU only proceeds when all src are ready. rd_rel_o is delayed
151 # so combine it with go_rd_i. if all bits are set we're good
152 all_rd = Signal(reset_less=True)
153 m.d.comb += all_rd.eq(self.busy_o & rok_l.q &
154 (((~self.rd.rel) | self.rd.go).all()))
155
156 # write_requests all done
157 # req_done works because any one of the last of the writes
158 # is enough, when combined with when read-phase is done (rst_l.q)
159 wr_any = Signal(reset_less=True)
160 req_done = Signal(reset_less=True)
161 m.d.comb += self.done_o.eq(self.busy_o & ~(self.wr.rel.bool()))
162 m.d.comb += wr_any.eq(self.wr.go.bool())
163 m.d.comb += req_done.eq(rst_l.q & wr_any)
164
165 # shadow/go_die
166 reset = Signal(reset_less=True)
167 rst_r = Signal(reset_less=True) # reset latch off
168 reset_w = Signal(self.n_dst, reset_less=True)
169 reset_r = Signal(self.n_src, reset_less=True)
170 m.d.comb += reset.eq(req_done | self.go_die_i)
171 m.d.comb += rst_r.eq(self.issue_i | self.go_die_i)
172 m.d.comb += reset_w.eq(self.wr.go | Repl(self.go_die_i, self.n_dst))
173 m.d.comb += reset_r.eq(self.rd.go | Repl(self.go_die_i, self.n_src))
174
175 # read-done,wr-proceed latch
176 m.d.comb += rok_l.s.eq(self.issue_i) # set up when issue starts
177 m.d.comb += rok_l.r.eq(self.alu.p_ready_o) # off when ALU acknowledges
178
179 # wr-done, back-to-start latch
180 m.d.comb += rst_l.s.eq(all_rd) # set when read-phase is fully done
181 m.d.comb += rst_l.r.eq(rst_r) # *off* on issue
182
183 # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
184 m.d.sync += opc_l.s.eq(self.issue_i) # set on issue
185 m.d.sync += opc_l.r.eq(self.alu.n_valid_o & req_done) # reset on ALU
186
187 # src operand latch (not using go_wr_i)
188 m.d.sync += src_l.s.eq(Repl(self.issue_i, self.n_src))
189 m.d.sync += src_l.r.eq(reset_r)
190
191 # dest operand latch (not using issue_i)
192 m.d.sync += req_l.s.eq(Repl(all_rd, self.n_dst))
193 m.d.sync += req_l.r.eq(reset_w)
194
195 # create a latch/register for the operand
196 oper_r = CompALUOpSubset()
197 latchregister(m, self.oper_i, oper_r, self.issue_i, "oper_r")
198
199 # and for each output from the ALU
200 drl = []
201 for i in range(self.n_dst):
202 name = "data_r%d" % i
203 data_r = Signal(self.rwid, name=name, reset_less=True)
204 latchregister(m, self.alu.out[i], data_r, req_l.q[i], name)
205 drl.append(data_r)
206
207 # pass the operation to the ALU
208 m.d.comb += self.alu.op.eq(oper_r)
209
210 # create list of src/alu-src/src-latch. override 2nd one below
211 sl = []
212 for i in range(self.n_src):
213 sl.append([self.src_i[i], self.alu.i[i], src_l.q[i]])
214
215 # select immediate if opcode says so. however also change the latch
216 # to trigger *from* the opcode latch instead.
217 op_is_imm = oper_r.imm_data.imm_ok
218 src2_or_imm = Signal(self.rwid, reset_less=True)
219 src_sel = Signal(reset_less=True)
220 m.d.comb += src_sel.eq(Mux(op_is_imm, opc_l.q, src_l.q[1]))
221 m.d.comb += src2_or_imm.eq(Mux(op_is_imm, oper_r.imm_data.imm,
222 self.src2_i))
223 # overwrite 2nd src-latch with immediate-muxed stuff
224 sl[1][0] = src2_or_imm
225 sl[1][2] = src_sel
226
227 # create a latch/register for src1/src2
228 for i in range(self.n_src):
229 src, alusrc, latch = sl[i]
230 latchregister(m, src, alusrc, latch, name="src_r%d" % i)
231
232 # -----
233 # outputs
234 # -----
235
236 # all request signals gated by busy_o. prevents picker problems
237 m.d.comb += self.busy_o.eq(opc_l.q) # busy out
238 bro = Repl(self.busy_o, self.n_src)
239 m.d.comb += self.rd.rel.eq(src_l.q & bro) # src1/src2 req rel
240
241 # on a go_read, tell the ALU we're accepting data.
242 # NOTE: this spells TROUBLE if the ALU isn't ready!
243 # go_read is only valid for one clock!
244 with m.If(all_rd): # src operands ready, GO!
245 with m.If(~self.alu.p_ready_o): # no ACK yet
246 m.d.comb += self.alu.p_valid_i.eq(1) # so indicate valid
247
248 brd = Repl(self.busy_o & self.shadown_i, self.n_dst)
249 # only proceed if ALU says its output is valid
250 with m.If(self.alu.n_valid_o):
251 # when ALU ready, write req release out. waits for shadow
252 m.d.comb += self.wr.rel.eq(req_l.q & brd)
253 # when output latch is ready, and ALU says ready, accept ALU output
254 with m.If(reset):
255 m.d.comb += self.alu.n_ready_i.eq(1) # tells ALU "thanks got it"
256
257 # output the data from the latch on go_write
258 for i in range(self.n_dst):
259 with m.If(self.wr.go[i]):
260 m.d.comb += self.dest[i].eq(drl[i])
261
262 return m
263
264 def __iter__(self):
265 yield self.rd.go
266 yield self.wr.go
267 yield self.issue_i
268 yield self.shadown_i
269 yield self.go_die_i
270 yield from self.oper_i.ports()
271 yield self.src1_i
272 yield self.src2_i
273 yield self.busy_o
274 yield self.rd.rel
275 yield self.wr.rel
276 yield self.data_o
277
278 def ports(self):
279 return list(self)
280
281
282 def op_sim(dut, a, b, op, inv_a=0, imm=0, imm_ok=0):
283 yield dut.issue_i.eq(0)
284 yield
285 yield dut.src_i[0].eq(a)
286 yield dut.src_i[1].eq(b)
287 yield dut.oper_i.insn_type.eq(op)
288 yield dut.oper_i.invert_a.eq(inv_a)
289 yield dut.oper_i.imm_data.imm.eq(imm)
290 yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
291 yield dut.issue_i.eq(1)
292 yield
293 yield dut.issue_i.eq(0)
294 yield
295 yield dut.rd.go.eq(0b11)
296 while True:
297 yield
298 rd_rel_o = yield dut.rd.rel
299 print ("rd_rel", rd_rel_o)
300 if rd_rel_o:
301 break
302 yield
303 yield dut.rd.go.eq(0)
304 req_rel_o = yield dut.wr.rel
305 result = yield dut.data_o
306 print ("req_rel", req_rel_o, result)
307 while True:
308 req_rel_o = yield dut.wr.rel
309 result = yield dut.data_o
310 print ("req_rel", req_rel_o, result)
311 if req_rel_o:
312 break
313 yield
314 yield dut.wr.go[0].eq(1)
315 yield
316 result = yield dut.data_o
317 print ("result", result)
318 yield dut.wr.go[0].eq(0)
319 yield
320 return result
321
322
323 def scoreboard_sim(dut):
324 result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD, inv_a=0,
325 imm=8, imm_ok=1)
326 assert result == 13
327
328 result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD)
329 assert result == 7
330
331 result = yield from op_sim(dut, 5, 2, InternalOp.OP_ADD, inv_a=1)
332 assert result == 65532
333
334
335 def test_scoreboard():
336 from alu_hier import ALU
337 from soc.decoder.power_decoder2 import Decode2ToExecute1Type
338
339 m = Module()
340 alu = ALU(16)
341 dut = MultiCompUnit(16, alu)
342 m.submodules.cu = dut
343
344 vl = rtlil.convert(dut, ports=dut.ports())
345 with open("test_compalu.il", "w") as f:
346 f.write(vl)
347
348 run_simulation(m, scoreboard_sim(dut), vcd_name='test_compalu.vcd')
349
350 if __name__ == '__main__':
351 test_scoreboard()