fix imports in fu matrix tests
[soc.git] / src / soc / experiment / compldst.py
1 """ LOAD / STORE Computation Unit. Also capable of doing ADD and ADD immediate
2
3 This module runs a "revolving door" set of four latches, based on
4 * Issue
5 * Go_Read
6 * Go_Addr
7 * Go_Write *OR* Go_Store
8
9 (Note that opc_l has been inverted (and qn used), due to SRLatch
10 default reset state being "0" rather than "1")
11
12 Also note: the LD/ST Comp Unit can act as a *standard ALU* doing
13 add and subtract.
14
15 Stores are activated when Go_Store is enabled, and uses the ALU
16 to add the immediate (imm_i) to the address (src1_i), and then
17 when ready (go_st_i and the ALU ready) the operand (src2_i) is stored
18 in the computed address.
19 """
20
21 from nmigen.compat.sim import run_simulation
22 from nmigen.cli import verilog, rtlil
23 from nmigen import Module, Signal, Mux, Cat, Elaboratable, Array
24 from nmigen.hdl.rec import Record, Layout
25
26 from nmutil.latch import SRLatch, latchregister
27
28 from soc.experiment.compalu_multi import go_record
29 from soc.experiment.testmem import TestMemory
30 from soc.decoder.power_enums import InternalOp
31
32 from soc.experiment.alu_hier import CompALUOpSubset
33
34 from soc.decoder.power_enums import InternalOp, Function, CryIn
35
36 import operator
37
38
39 class CompLDSTOpSubset(Record):
40 """CompLDSTOpSubset
41
42 a copy of the relevant subset information from Decode2Execute1Type
43 needed for LD/ST operations. use with eq_from_execute1 (below) to
44 grab subsets.
45 """
46 def __init__(self, name=None):
47 layout = (('insn_type', InternalOp),
48 ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
49 ('is_32bit', 1),
50 ('is_signed', 1),
51 ('data_len', 4), # TODO: should be in separate CompLDSTSubset
52 ('byte_reverse', 1),
53 ('sign_extend', 1),
54 ('update', 1))
55
56 Record.__init__(self, Layout(layout), name=name)
57
58 # grrr. Record does not have kwargs
59 self.insn_type.reset_less = True
60 self.is_32bit.reset_less = True
61 self.is_signed.reset_less = True
62 self.data_len.reset_less = True
63 self.byte_reverse.reset_less = True
64 self.sign_extend.reset_less = True
65 self.update.reset_less = True
66
67 def eq_from_execute1(self, other):
68 """ use this to copy in from Decode2Execute1Type
69 """
70 res = []
71 for fname, sig in self.fields.items():
72 eqfrom = other.fields[fname]
73 res.append(sig.eq(eqfrom))
74 return res
75
76 def ports(self):
77 return [self.insn_type,
78 self.is_32bit,
79 self.is_signed,
80 self.data_len,
81 self.byte_reverse,
82 self.sign_extend,
83 self.update,
84 ]
85
86
87 class LDSTCompUnit(Elaboratable):
88 """ LOAD / STORE / ADD / SUB Computation Unit
89
90 Inputs
91 ------
92
93 * :rwid: register width
94 * :alu: an ALU module
95 * :mem: a Memory Module (read-write capable)
96
97 Control Signals (In)
98 --------------------
99
100 * :oper_i: operation being carried out (LDST_OP_ADD, LDST_OP_LD)
101 * :issue_i: LD/ST is being "issued".
102 * :isalu_i: ADD/SUB is being "issued" (aka issue_alu_i)
103 * :shadown_i: Inverted-shadow is being held (stops STORE *and* WRITE)
104 * :go_rd_i: read is being actioned (latches in src regs)
105 * :go_wr_i: write mode (exactly like ALU CompUnit)
106 * :go_ad_i: address is being actioned (triggers actual mem LD)
107 * :go_st_i: store is being actioned (triggers actual mem STORE)
108 * :go_die_i: resets the unit back to "wait for issue"
109
110 Control Signals (Out)
111 ---------------------
112
113 * :busy_o: function unit is busy
114 * :rd_rel_o: request src1/src2
115 * :adr_rel_o: request address (from mem)
116 * :sto_rel_o: request store (to mem)
117 * :req_rel_o: request write (result)
118 * :load_mem_o: activate memory LOAD
119 * :stwd_mem_o: activate memory STORE
120
121 Note: load_mem_o, stwd_mem_o and req_rel_o MUST all be acknowledged
122 in a single cycle and the CompUnit set back to doing another op.
123 This means deasserting go_st_i, go_ad_i or go_wr_i as appropriate
124 depending on whether the operation is a STORE, LD, or a straight
125 ALU operation respectively.
126
127 Control Data (out)
128 ------------------
129 * :data_o: Dest out (LD or ALU)
130 * :addr_o: Address out (LD or ST)
131 """
132
133 def __init__(self, rwid, alu, mem, n_src=2, n_dst=1, debugtest=False):
134 self.rwid = rwid
135 self.alu = alu
136 self.mem = mem
137 self.debugtest = debugtest
138
139 self.counter = Signal(4)
140 src = []
141 for i in range(n_src):
142 j = i + 1 # name numbering to match src1/src2
143 src.append(Signal(rwid, name="src%d_i" % j, reset_less=True))
144
145 dst = []
146 for i in range(n_dst):
147 j = i + 1 # name numbering to match dest1/2...
148 dst.append(Signal(rwid, name="dest%d_i" % j, reset_less=True))
149
150 self.rd = go_record(n_src, name="rd") # read in, req out
151 self.wr = go_record(n_dst, name="wr") # write in, req out
152 self.go_rd_i = self.rd.go # temporary naming
153 self.go_wr_i = self.wr.go # temporary naming
154 self.rd_rel_o = self.rd.rel # temporary naming
155 self.req_rel_o = self.wr.rel # temporary naming
156
157 self.go_ad_i = Signal(reset_less=True) # go address in
158 self.go_st_i = Signal(reset_less=True) # go store in
159 self.issue_i = Signal(reset_less=True) # fn issue in
160 self.isalu_i = Signal(reset_less=True) # fn issue as ALU in
161 self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
162 self.go_die_i = Signal() # go die (reset)
163
164 # operation / data input
165 self.oper_i = CompALUOpSubset() # operand
166 self.src_i = Array(src)
167 self.src1_i = src[0] # oper1 in
168 self.src2_i = src[1] # oper2 in
169
170 self.busy_o = Signal(reset_less=True) # fn busy out
171 self.dest = Array(dst)
172 self.data_o = dst[0] # Dest out
173 self.adr_rel_o = Signal(reset_less=True) # request address (from mem)
174 self.sto_rel_o = Signal(reset_less=True) # request store (to mem)
175 self.done_o = Signal(reset_less=True) # final release signal
176 self.data_o = Signal(rwid, reset_less=True) # Dest out (LD or ALU)
177 self.addr_o = Signal(rwid, reset_less=True) # Address out (LD or ST)
178
179 # hmm... TODO... move these to outside of LDSTCompUnit?
180 self.load_mem_o = Signal(reset_less=True) # activate memory LOAD
181 self.stwd_mem_o = Signal(reset_less=True) # activate memory STORE
182 self.ld_o = Signal(reset_less=True) # operation is a LD
183 self.st_o = Signal(reset_less=True) # operation is a ST
184
185 def elaborate(self, platform):
186 m = Module()
187 comb = m.d.comb
188 sync = m.d.sync
189
190 m.submodules.alu = self.alu
191 #m.submodules.mem = self.mem
192 m.submodules.src_l = src_l = SRLatch(sync=False, name="src")
193 m.submodules.opc_l = opc_l = SRLatch(sync=False, name="opc")
194 m.submodules.adr_l = adr_l = SRLatch(sync=False, name="adr")
195 m.submodules.req_l = req_l = SRLatch(sync=False, name="req")
196 m.submodules.sto_l = sto_l = SRLatch(sync=False, name="sto")
197
198 # shadow/go_die
199 reset_b = Signal(reset_less=True)
200 reset_w = Signal(reset_less=True)
201 reset_a = Signal(reset_less=True)
202 reset_s = Signal(reset_less=True)
203 reset_r = Signal(reset_less=True)
204 comb += reset_b.eq(self.go_st_i | self.wr.go |
205 self.go_ad_i | self.go_die_i)
206 comb += reset_w.eq(self.wr.go | self.go_die_i)
207 comb += reset_s.eq(self.go_st_i | self.go_die_i)
208 comb += reset_r.eq(self.rd.go | self.go_die_i)
209 # this one is slightly different, issue_alu_i selects wr.go)
210 a_sel = Mux(self.isalu_i, self.wr.go, self.go_ad_i)
211 comb += reset_a.eq(a_sel | self.go_die_i)
212
213 # opcode decode
214 op_alu = Signal(reset_less=True)
215 op_is_ld = Signal(reset_less=True)
216 op_is_st = Signal(reset_less=True)
217 op_ldst = Signal(reset_less=True)
218 op_is_imm = Signal(reset_less=True)
219
220 # ALU/LD data output control
221 alulatch = Signal(reset_less=True)
222 ldlatch = Signal(reset_less=True)
223
224 # src2 register
225 src2_r = Signal(self.rwid, reset_less=True)
226
227 # select immediate or src2 reg to add
228 src2_or_imm = Signal(self.rwid, reset_less=True)
229 src_sel = Signal(reset_less=True)
230
231 # issue can be either issue_i or issue_alu_i (isalu_i)
232 issue_i = Signal(reset_less=True)
233 comb += issue_i.eq(self.issue_i | self.isalu_i)
234
235 # Ripple-down the latches, each one set cancels the previous.
236 # NOTE: use sync to stop combinatorial loops.
237
238 # opcode latch - inverted so that busy resets to 0
239 sync += opc_l.s.eq(issue_i) # XXX NOTE: INVERTED FROM book!
240 sync += opc_l.r.eq(reset_b) # XXX NOTE: INVERTED FROM book!
241
242 # src operand latch
243 sync += src_l.s.eq(issue_i)
244 sync += src_l.r.eq(reset_r)
245
246 # addr latch
247 sync += adr_l.s.eq(self.rd.go)
248 sync += adr_l.r.eq(reset_a)
249
250 # dest operand latch
251 sync += req_l.s.eq(self.go_ad_i | self.go_st_i | self.wr.go)
252 sync += req_l.r.eq(reset_w)
253
254 # store latch
255 sync += sto_l.s.eq(self.rd.go) # XXX not sure which
256 sync += sto_l.r.eq(reset_s)
257
258 # create a latch/register for the operand
259 oper_r = CompALUOpSubset() # Dest register
260 latchregister(m, self.oper_i, oper_r, self.issue_i, name="oper_r")
261
262 # and one for the output from the ALU
263 data_r = Signal(self.rwid, reset_less=True) # Dest register
264 latchregister(m, self.alu.o, data_r, alulatch, "aluo_r")
265
266 # and pass the operation to the ALU
267 comb += self.alu.op.eq(oper_r)
268 comb += self.alu.op.insn_type.eq(InternalOp.OP_ADD) # override insn_type
269
270 # outputs: busy and release signals
271 busy_o = self.busy_o
272 comb += self.busy_o.eq(opc_l.q) # busy out
273 comb += self.rd.rel.eq(src_l.q & busy_o) # src1/src2 req rel
274 comb += self.sto_rel_o.eq(sto_l.q & busy_o & self.shadown_i & op_is_st)
275
276 # request release enabled based on if op is a LD/ST or a plain ALU
277 # if op is an ADD/SUB or a LD, req_rel activates.
278 wr_q = Signal(reset_less=True)
279 comb += wr_q.eq(req_l.q & (~op_ldst | op_is_ld))
280
281 comb += alulatch.eq((op_ldst & self.adr_rel_o) |
282 (~op_ldst & self.wr.rel))
283
284 # select immediate if opcode says so. however also change the latch
285 # to trigger *from* the opcode latch instead.
286 comb += src_sel.eq(Mux(op_is_imm, opc_l.qn, src_l.q))
287 comb += src2_or_imm.eq(Mux(op_is_imm, oper_r.imm_data.imm,
288 self.src2_i))
289
290 # create a latch/register for src1/src2 (include immediate select)
291 latchregister(m, self.src1_i, self.alu.a, src_l.q, name="src1_r")
292 latchregister(m, self.src2_i, src2_r, src_l.q, name="src2_r")
293 latchregister(m, src2_or_imm, self.alu.b, src_sel, name="imm_r")
294
295 # decode bits of operand (latched)
296 comb += op_is_imm.eq(oper_r.imm_data.imm_ok)
297 comb += op_is_st.eq(oper_r.insn_type == InternalOp.OP_STORE) # ST
298 comb += op_is_ld.eq(oper_r.insn_type == InternalOp.OP_LOAD) # LD
299 comb += op_ldst.eq(op_is_ld | op_is_st)
300 comb += self.load_mem_o.eq(op_is_ld & self.go_ad_i)
301 comb += self.stwd_mem_o.eq(op_is_st & self.go_st_i)
302 comb += self.ld_o.eq(op_is_ld)
303 comb += self.st_o.eq(op_is_st)
304
305 # on a go_read, tell the ALU we're accepting data.
306 # NOTE: this spells TROUBLE if the ALU isn't ready!
307 # go_read is only valid for one clock!
308 with m.If(self.rd.go): # src operands ready, GO!
309 with m.If(~self.alu.p_ready_o): # no ACK yet
310 m.d.comb += self.alu.p_valid_i.eq(1) # so indicate valid
311
312 # only proceed if ALU says its output is valid
313 with m.If(self.alu.n_valid_o):
314 # write req release out. waits until shadow is dropped.
315 comb += self.wr.rel.eq(wr_q & busy_o & self.shadown_i)
316 # address release only happens on LD/ST, and is shadowed.
317 comb += self.adr_rel_o.eq(adr_l.q & op_ldst & busy_o &
318 self.shadown_i)
319 # when output latch is ready, and ALU says ready, accept ALU output
320 with m.If(self.wr.rel):
321 # tells ALU "thanks got it"
322 m.d.comb += self.alu.n_ready_i.eq(1)
323
324 # provide "done" signal: select req_rel for non-LD/ST, adr_rel for LD/ST
325 comb += self.done_o.eq((self.wr.rel & ~op_ldst) |
326 (self.adr_rel_o & op_ldst))
327
328 # put the register directly onto the output bus on a go_write
329 # this is "ALU mode". go_wr_i *must* be deasserted on next clock
330 with m.If(self.wr.go):
331 comb += self.data_o.eq(data_r)
332
333 # "LD/ST" mode: put the register directly onto the *address* bus
334 with m.If(self.go_ad_i | self.go_st_i):
335 comb += self.addr_o.eq(data_r)
336
337 # TODO: think about moving these to another module
338
339 if self.debugtest:
340 return m
341
342 # connect ST to memory. NOTE: unit *must* be set back
343 # to start again by dropping go_st_i on next clock
344 with m.If(self.stwd_mem_o):
345 wrport = self.mem.wrport
346 comb += wrport.addr.eq(self.addr_o)
347 comb += wrport.data.eq(src2_r)
348 comb += wrport.en.eq(1)
349
350 # connect LD to memory. NOTE: unit *must* be set back
351 # to start again by dropping go_ad_i on next clock
352 rdport = self.mem.rdport
353 ldd_r = Signal(self.rwid, reset_less=True) # Dest register
354 # latch LD-out
355 latchregister(m, rdport.data, ldd_r, ldlatch, "ldo_r")
356 sync += ldlatch.eq(self.load_mem_o)
357 with m.If(self.load_mem_o):
358 comb += rdport.addr.eq(self.addr_o)
359 # comb += rdport.en.eq(1) # only when transparent=False
360
361 # if LD-latch, put ld-reg out onto output
362 with m.If(ldlatch | self.load_mem_o):
363 comb += self.data_o.eq(ldd_r)
364
365 return m
366
367 def __iter__(self):
368 yield self.rd.go
369 yield self.go_ad_i
370 yield self.wr.go
371 yield self.go_st_i
372 yield self.issue_i
373 yield self.isalu_i
374 yield self.shadown_i
375 yield self.go_die_i
376 yield from self.oper_i.ports()
377 yield from self.src_i
378 yield self.busy_o
379 yield self.rd.rel
380 yield self.adr_rel_o
381 yield self.sto_rel_o
382 yield self.wr.rel
383 yield self.data_o
384 yield self.load_mem_o
385 yield self.stwd_mem_o
386
387 def ports(self):
388 return list(self)
389
390
391 def wait_for(sig):
392 v = (yield sig)
393 print("wait for", sig, v)
394 while True:
395 yield
396 v = (yield sig)
397 print(v)
398 if v:
399 break
400
401
402 def store(dut, src1, src2, imm, imm_ok=True):
403 yield dut.oper_i.insn_type.eq(InternalOp.OP_STORE)
404 yield dut.src1_i.eq(src1)
405 yield dut.src2_i.eq(src2)
406 yield dut.oper_i.imm_data.imm.eq(imm)
407 yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
408 yield dut.issue_i.eq(1)
409 yield
410 yield dut.issue_i.eq(0)
411 yield
412 yield dut.rd.go.eq(0b11)
413 yield from wait_for(dut.rd.rel)
414 yield dut.rd.go.eq(0)
415 yield from wait_for(dut.adr_rel_o)
416 yield dut.go_st_i.eq(1)
417 yield from wait_for(dut.sto_rel_o)
418 wait_for(dut.stwd_mem_o)
419 yield dut.go_st_i.eq(0)
420 yield
421
422
423 def load(dut, src1, src2, imm, imm_ok=True):
424 yield dut.oper_i.insn_type.eq(InternalOp.OP_LOAD)
425 yield dut.src1_i.eq(src1)
426 yield dut.src2_i.eq(src2)
427 yield dut.oper_i.imm_data.imm.eq(imm)
428 yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
429 yield dut.issue_i.eq(1)
430 yield
431 yield dut.issue_i.eq(0)
432 yield
433 yield dut.rd.go.eq(0b11)
434 yield from wait_for(dut.rd.rel)
435 yield dut.rd.go.eq(0)
436 yield from wait_for(dut.adr_rel_o)
437 yield dut.go_ad_i.eq(1)
438 yield from wait_for(dut.busy_o)
439 yield
440 data = (yield dut.data_o)
441 yield dut.go_ad_i.eq(0)
442 # wait_for(dut.stwd_mem_o)
443 return data
444
445
446 def add(dut, src1, src2, imm, imm_ok=False):
447 yield dut.oper_i.insn_type.eq(InternalOp.OP_ADD)
448 yield dut.src1_i.eq(src1)
449 yield dut.src2_i.eq(src2)
450 yield dut.oper_i.imm_data.imm.eq(imm)
451 yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
452 yield dut.issue_i.eq(1)
453 yield
454 yield dut.issue_i.eq(0)
455 yield
456 yield dut.rd.go.eq(1)
457 yield from wait_for(dut.rd.rel)
458 yield dut.rd.go.eq(0)
459 yield from wait_for(dut.wr.rel)
460 yield dut.wr.go.eq(1)
461 yield from wait_for(dut.busy_o)
462 yield
463 data = (yield dut.data_o)
464 yield dut.wr.go.eq(0)
465 yield
466 # wait_for(dut.stwd_mem_o)
467 return data
468
469
470 def scoreboard_sim(dut):
471 # two STs (different addresses)
472 yield from store(dut, 4, 3, 2)
473 yield from store(dut, 2, 9, 2)
474 yield
475 # two LDs (deliberately LD from the 1st address then 2nd)
476 data = yield from load(dut, 4, 0, 2)
477 assert data == 0x0003
478 data = yield from load(dut, 2, 0, 2)
479 assert data == 0x0009
480 yield
481
482 # now do an add
483 data = yield from add(dut, 4, 3, 0xfeed)
484 assert data == 0x7
485
486 # and an add-immediate
487 data = yield from add(dut, 4, 0xdeef, 2, imm_ok=True)
488 assert data == 0x6
489
490
491 class TestLDSTCompUnit(LDSTCompUnit):
492
493 def __init__(self, rwid):
494 from alu_hier import ALU
495 self.alu = alu = ALU(rwid)
496 self.mem = mem = TestMemory(rwid, 8)
497 LDSTCompUnit.__init__(self, rwid, alu, mem)
498
499 def elaborate(self, platform):
500 m = LDSTCompUnit.elaborate(self, platform)
501 m.submodules.mem = self.mem
502 return m
503
504
505 def test_scoreboard():
506
507 dut = TestLDSTCompUnit(16)
508 vl = rtlil.convert(dut, ports=dut.ports())
509 with open("test_ldst_comp.il", "w") as f:
510 f.write(vl)
511
512 run_simulation(dut, scoreboard_sim(dut), vcd_name='test_ldst_comp.vcd')
513
514
515 if __name__ == '__main__':
516 test_scoreboard()