e049e042c11045769e1363516c2e287ec10e1c95
[soc.git] / src / soc / experiment / compldst.py
1 """ LOAD / STORE Computation Unit. Also capable of doing ADD and ADD immediate
2
3 This module runs a "revolving door" set of four latches, based on
4 * Issue
5 * Go_Read
6 * Go_Addr
7 * Go_Write *OR* Go_Store
8
9 (Note that opc_l has been inverted (and qn used), due to SRLatch
10 default reset state being "0" rather than "1")
11
12 Also note: the LD/ST Comp Unit can act as a *standard ALU* doing
13 add and subtract.
14
15 Stores are activated when Go_Store is enabled, and uses the ALU
16 to add the immediate (imm_i) to the address (src1_i), and then
17 when ready (go_st_i and the ALU ready) the operand (src2_i) is stored
18 in the computed address.
19 """
20
21 from nmigen.compat.sim import run_simulation
22 from nmigen.cli import verilog, rtlil
23 from nmigen import Module, Signal, Mux, Cat, Elaboratable, Array
24
25 from nmutil.latch import SRLatch, latchregister
26
27 from soc.experiment.testmem import TestMemory
28 from soc.decoder.power_enums import InternalOp
29
30 from .alu_hier import CompALUOpSubset
31
32
33 # internal opcodes. hypothetically this could do more combinations.
34 # meanings:
35 # * bit 0: 0 = ADD , 1 = SUB
36 # * bit 1: 0 = src1, 1 = IMM
37 # * bit 2: 1 = LD
38 # * bit 3: 1 = ST
39 BIT0_ADD = 0
40 BIT1_SRC = 1
41 BIT2_ST = 2
42 BIT3_LD = 3
43 # convenience thingies.
44 LDST_OP_ADD = 0b0000 # plain ADD (src1 + src2) - use this ALU as an ADD
45 LDST_OP_SUB = 0b0001 # plain SUB (src1 - src2) - use this ALU as a SUB
46 LDST_OP_ADDI = 0b0010 # immed ADD (imm + src1)
47 LDST_OP_SUBI = 0b0011 # immed SUB (imm - src1)
48 LDST_OP_ST = 0b0110 # immed ADD plus LD op. ADD result is address
49 LDST_OP_LD = 0b1010 # immed ADD plus ST op. ADD result is address
50
51
52 class LDSTCompUnit(Elaboratable):
53 """ LOAD / STORE / ADD / SUB Computation Unit
54
55 Inputs
56 ------
57
58 * :rwid: register width
59 * :alu: an ALU module
60 * :mem: a Memory Module (read-write capable)
61
62 Control Signals (In)
63 --------------------
64
65 * :oper_i: operation being carried out (LDST_OP_ADD, LDST_OP_LD)
66 * :issue_i: LD/ST is being "issued".
67 * :isalu_i: ADD/SUB is being "issued" (aka issue_alu_i)
68 * :shadown_i: Inverted-shadow is being held (stops STORE *and* WRITE)
69 * :go_rd_i: read is being actioned (latches in src regs)
70 * :go_wr_i: write mode (exactly like ALU CompUnit)
71 * :go_ad_i: address is being actioned (triggers actual mem LD)
72 * :go_st_i: store is being actioned (triggers actual mem STORE)
73 * :go_die_i: resets the unit back to "wait for issue"
74
75 Control Signals (Out)
76 ---------------------
77
78 * :busy_o: function unit is busy
79 * :rd_rel_o: request src1/src2
80 * :adr_rel_o: request address (from mem)
81 * :sto_rel_o: request store (to mem)
82 * :req_rel_o: request write (result)
83 * :load_mem_o: activate memory LOAD
84 * :stwd_mem_o: activate memory STORE
85
86 Note: load_mem_o, stwd_mem_o and req_rel_o MUST all be acknowledged
87 in a single cycle and the CompUnit set back to doing another op.
88 This means deasserting go_st_i, go_ad_i or go_wr_i as appropriate
89 depending on whether the operation is a STORE, LD, or a straight
90 ALU operation respectively.
91
92 Control Data (out)
93 ------------------
94 * :data_o: Dest out (LD or ALU)
95 * :addr_o: Address out (LD or ST)
96 """
97
98 def __init__(self, rwid, alu, mem, n_src=2, n_dst=1):
99 self.rwid = rwid
100 self.alu = alu
101 self.mem = mem
102
103 self.counter = Signal(4)
104 src = []
105 for i in range(n_src):
106 j = i + 1 # name numbering to match src1/src2
107 src.append(Signal(rwid, name="src%d_i" % j, reset_less=True))
108
109 dst = []
110 for i in range(n_dst):
111 j = i + 1 # name numbering to match dest1/2...
112 dst.append(Signal(rwid, name="dest%d_i" % j, reset_less=True))
113
114 self.go_rd_i = Signal(n_src, reset_less=True) # go read in
115 self.go_ad_i = Signal(n_dst, reset_less=True) # go address in
116 self.go_wr_i = Signal(reset_less=True) # go write in
117 self.go_st_i = Signal(reset_less=True) # go store in
118 self.issue_i = Signal(reset_less=True) # fn issue in
119 self.isalu_i = Signal(reset_less=True) # fn issue as ALU in
120 self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
121 self.go_die_i = Signal() # go die (reset)
122
123 # operation / data input
124 self.oper_i = CompALUOpSubset() # operand
125 self.src_i = Array(src)
126 self.src1_i = src[0] # oper1 in
127 self.src2_i = src[1] # oper2 in
128
129 self.busy_o = Signal(reset_less=True) # fn busy out
130 self.dest = Array(dst)
131 self.data_o = dst[0] # Dest out
132 self.rd_rel_o = Signal(n_src, reset_less=True) # request src1/src2
133 self.adr_rel_o = Signal(reset_less=True) # request address (from mem)
134 self.sto_rel_o = Signal(reset_less=True) # request store (to mem)
135 self.req_rel_o = Signal(n_dst, reset_less=True) # req write (result)
136 self.done_o = Signal(reset_less=True) # final release signal
137 self.data_o = Signal(rwid, reset_less=True) # Dest out (LD or ALU)
138 self.addr_o = Signal(rwid, reset_less=True) # Address out (LD or ST)
139
140 # hmm... TODO... move these to outside of LDSTCompUnit?
141 self.load_mem_o = Signal(reset_less=True) # activate memory LOAD
142 self.stwd_mem_o = Signal(reset_less=True) # activate memory STORE
143 self.ld_o = Signal(reset_less=True) # operation is a LD
144 self.st_o = Signal(reset_less=True) # operation is a ST
145
146 def elaborate(self, platform):
147 m = Module()
148 comb = m.d.comb
149 sync = m.d.sync
150
151 m.submodules.alu = self.alu
152 #m.submodules.mem = self.mem
153 m.submodules.src_l = src_l = SRLatch(sync=False, name="src")
154 m.submodules.opc_l = opc_l = SRLatch(sync=False, name="opc")
155 m.submodules.adr_l = adr_l = SRLatch(sync=False, name="adr")
156 m.submodules.req_l = req_l = SRLatch(sync=False, name="req")
157 m.submodules.sto_l = sto_l = SRLatch(sync=False, name="sto")
158
159 # shadow/go_die
160 reset_b = Signal(reset_less=True)
161 reset_w = Signal(reset_less=True)
162 reset_a = Signal(reset_less=True)
163 reset_s = Signal(reset_less=True)
164 reset_r = Signal(reset_less=True)
165 comb += reset_b.eq(self.go_st_i | self.go_wr_i |
166 self.go_ad_i | self.go_die_i)
167 comb += reset_w.eq(self.go_wr_i | self.go_die_i)
168 comb += reset_s.eq(self.go_st_i | self.go_die_i)
169 comb += reset_r.eq(self.go_rd_i | self.go_die_i)
170 # this one is slightly different, issue_alu_i selects go_wr_i)
171 a_sel = Mux(self.isalu_i, self.go_wr_i, self.go_ad_i)
172 comb += reset_a.eq(a_sel | self.go_die_i)
173
174 # opcode decode
175 op_alu = Signal(reset_less=True)
176 op_is_ld = Signal(reset_less=True)
177 op_is_st = Signal(reset_less=True)
178 op_ldst = Signal(reset_less=True)
179 op_is_imm = Signal(reset_less=True)
180 alulatch = Signal(reset_less=True)
181
182 # src2 register
183 src2_r = Signal(self.rwid, reset_less=True)
184
185 # select immediate or src2 reg to add
186 src2_or_imm = Signal(self.rwid, reset_less=True)
187 src_sel = Signal(reset_less=True)
188
189 # issue can be either issue_i or issue_alu_i (isalu_i)
190 issue_i = Signal(reset_less=True)
191 comb += issue_i.eq(self.issue_i | self.isalu_i)
192
193 # Ripple-down the latches, each one set cancels the previous.
194 # NOTE: use sync to stop combinatorial loops.
195
196 # opcode latch - inverted so that busy resets to 0
197 sync += opc_l.s.eq(issue_i) # XXX NOTE: INVERTED FROM book!
198 sync += opc_l.r.eq(reset_b) # XXX NOTE: INVERTED FROM book!
199
200 # src operand latch
201 sync += src_l.s.eq(issue_i)
202 sync += src_l.r.eq(reset_r)
203
204 # addr latch
205 sync += adr_l.s.eq(self.go_rd_i)
206 sync += adr_l.r.eq(reset_a)
207
208 # dest operand latch
209 sync += req_l.s.eq(self.go_ad_i | self.go_st_i | self.go_wr_i)
210 sync += req_l.r.eq(reset_w)
211
212 # store latch
213 sync += sto_l.s.eq(self.go_rd_i) # XXX not sure which
214 sync += sto_l.r.eq(reset_s)
215
216 # create a latch/register for the operand
217 oper_r = CompALUOpSubset() # Dest register
218 latchregister(m, self.oper_i, oper_r, self.issue_i, name="oper_r")
219
220 # and one for the output from the ALU
221 data_r = Signal(self.rwid, reset_less=True) # Dest register
222 latchregister(m, self.alu.o, data_r, alulatch, "aluo_r")
223
224 # and pass the operation to the ALU
225 comb += self.alu.op.eq(oper_r)
226 comb += self.alu.op.insn_type.eq(InternalOp.OP_ADD) # override insn_type
227
228 # outputs: busy and release signals
229 busy_o = self.busy_o
230 comb += self.busy_o.eq(opc_l.q) # busy out
231 comb += self.rd_rel_o.eq(src_l.q & busy_o) # src1/src2 req rel
232 comb += self.sto_rel_o.eq(sto_l.q & busy_o & self.shadown_i & op_is_st)
233
234 # request release enabled based on if op is a LD/ST or a plain ALU
235 # if op is an ADD/SUB or a LD, req_rel activates.
236 wr_q = Signal(reset_less=True)
237 comb += wr_q.eq(req_l.q & (~op_ldst | op_is_ld))
238
239 comb += alulatch.eq((op_ldst & self.adr_rel_o) |
240 (~op_ldst & self.req_rel_o))
241
242 # select immediate if opcode says so. however also change the latch
243 # to trigger *from* the opcode latch instead.
244 comb += src_sel.eq(Mux(op_is_imm, opc_l.qn, src_l.q))
245 comb += src2_or_imm.eq(Mux(op_is_imm, oper_r.imm_data.imm,
246 self.src2_i))
247
248 # create a latch/register for src1/src2 (include immediate select)
249 latchregister(m, self.src1_i, self.alu.a, src_l.q, name="src1_r")
250 latchregister(m, self.src2_i, src2_r, src_l.q, name="src2_r")
251 latchregister(m, src2_or_imm, self.alu.b, src_sel, name="imm_r")
252
253 # decode bits of operand (latched)
254 comb += op_is_imm.eq(oper_r.imm_data.imm_ok)
255 comb += op_is_st.eq(oper_r.insn_type == InternalOp.OP_STORE) # ST
256 comb += op_is_ld.eq(oper_r.insn_type == InternalOp.OP_LOAD) # LD
257 comb += op_ldst.eq(op_is_ld | op_is_st)
258 comb += self.load_mem_o.eq(op_is_ld & self.go_ad_i)
259 comb += self.stwd_mem_o.eq(op_is_st & self.go_st_i)
260 comb += self.ld_o.eq(op_is_ld)
261 comb += self.st_o.eq(op_is_st)
262
263 # on a go_read, tell the ALU we're accepting data.
264 # NOTE: this spells TROUBLE if the ALU isn't ready!
265 # go_read is only valid for one clock!
266 with m.If(self.go_rd_i): # src operands ready, GO!
267 with m.If(~self.alu.p_ready_o): # no ACK yet
268 m.d.comb += self.alu.p_valid_i.eq(1) # so indicate valid
269
270 # only proceed if ALU says its output is valid
271 with m.If(self.alu.n_valid_o):
272 # write req release out. waits until shadow is dropped.
273 comb += self.req_rel_o.eq(wr_q & busy_o & self.shadown_i)
274 # address release only happens on LD/ST, and is shadowed.
275 comb += self.adr_rel_o.eq(adr_l.q & op_ldst & busy_o &
276 self.shadown_i)
277 # when output latch is ready, and ALU says ready, accept ALU output
278 with m.If(self.req_rel_o):
279 # tells ALU "thanks got it"
280 m.d.comb += self.alu.n_ready_i.eq(1)
281
282 # provide "done" signal: select req_rel for non-LD/ST, adr_rel for LD/ST
283 comb += self.done_o.eq((self.req_rel_o & ~op_ldst) |
284 (self.adr_rel_o & op_ldst))
285
286 # put the register directly onto the output bus on a go_write
287 # this is "ALU mode". go_wr_i *must* be deasserted on next clock
288 with m.If(self.go_wr_i):
289 comb += self.data_o.eq(data_r)
290
291 # "LD/ST" mode: put the register directly onto the *address* bus
292 with m.If(self.go_ad_i | self.go_st_i):
293 comb += self.addr_o.eq(data_r)
294
295 # TODO: think about moving these to another module
296
297 # connect ST to memory. NOTE: unit *must* be set back
298 # to start again by dropping go_st_i on next clock
299 with m.If(self.stwd_mem_o):
300 wrport = self.mem.wrport
301 comb += wrport.addr.eq(self.addr_o)
302 comb += wrport.data.eq(src2_r)
303 comb += wrport.en.eq(1)
304
305 # connect LD to memory. NOTE: unit *must* be set back
306 # to start again by dropping go_ad_i on next clock
307 with m.If(self.load_mem_o):
308 rdport = self.mem.rdport
309 comb += rdport.addr.eq(self.addr_o)
310 comb += self.data_o.eq(rdport.data)
311 # comb += rdport.en.eq(1) # only when transparent=False
312
313 return m
314
315 def __iter__(self):
316 yield self.go_rd_i
317 yield self.go_ad_i
318 yield self.go_wr_i
319 yield self.go_st_i
320 yield self.issue_i
321 yield self.isalu_i
322 yield self.shadown_i
323 yield self.go_die_i
324 yield from self.oper_i.ports()
325 yield from self.src_i
326 yield self.busy_o
327 yield self.rd_rel_o
328 yield self.adr_rel_o
329 yield self.sto_rel_o
330 yield self.req_rel_o
331 yield self.data_o
332 yield self.load_mem_o
333 yield self.stwd_mem_o
334
335 def ports(self):
336 return list(self)
337
338
339 def wait_for(sig):
340 v = (yield sig)
341 print("wait for", sig, v)
342 while True:
343 yield
344 v = (yield sig)
345 print(v)
346 if v:
347 break
348
349
350 def store(dut, src1, src2, imm, imm_ok=True):
351 yield dut.oper_i.insn_type.eq(InternalOp.OP_STORE)
352 yield dut.src1_i.eq(src1)
353 yield dut.src2_i.eq(src2)
354 yield dut.oper_i.imm_data.imm.eq(imm)
355 yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
356 yield dut.issue_i.eq(1)
357 yield
358 yield dut.issue_i.eq(0)
359 yield
360 yield dut.go_rd_i.eq(0b11)
361 yield from wait_for(dut.rd_rel_o)
362 yield dut.go_rd_i.eq(0)
363 yield from wait_for(dut.adr_rel_o)
364 yield dut.go_st_i.eq(1)
365 yield from wait_for(dut.sto_rel_o)
366 wait_for(dut.stwd_mem_o)
367 yield dut.go_st_i.eq(0)
368 yield
369
370
371 def load(dut, src1, src2, imm, imm_ok=True):
372 yield dut.oper_i.insn_type.eq(InternalOp.OP_LOAD)
373 yield dut.src1_i.eq(src1)
374 yield dut.src2_i.eq(src2)
375 yield dut.oper_i.imm_data.imm.eq(imm)
376 yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
377 yield dut.issue_i.eq(1)
378 yield
379 yield dut.issue_i.eq(0)
380 yield
381 yield dut.go_rd_i.eq(0b11)
382 yield from wait_for(dut.rd_rel_o)
383 yield dut.go_rd_i.eq(0)
384 yield from wait_for(dut.adr_rel_o)
385 yield dut.go_ad_i.eq(1)
386 yield from wait_for(dut.busy_o)
387 yield
388 data = (yield dut.data_o)
389 yield dut.go_ad_i.eq(0)
390 # wait_for(dut.stwd_mem_o)
391 return data
392
393
394 def add(dut, src1, src2, imm, imm_ok=False):
395 yield dut.oper_i.insn_type.eq(InternalOp.OP_ADD)
396 yield dut.src1_i.eq(src1)
397 yield dut.src2_i.eq(src2)
398 yield dut.oper_i.imm_data.imm.eq(imm)
399 yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
400 yield dut.issue_i.eq(1)
401 yield
402 yield dut.issue_i.eq(0)
403 yield
404 yield dut.go_rd_i.eq(1)
405 yield from wait_for(dut.rd_rel_o)
406 yield dut.go_rd_i.eq(0)
407 yield from wait_for(dut.req_rel_o)
408 yield dut.go_wr_i.eq(1)
409 yield from wait_for(dut.busy_o)
410 yield
411 data = (yield dut.data_o)
412 yield dut.go_wr_i.eq(0)
413 yield
414 # wait_for(dut.stwd_mem_o)
415 return data
416
417
418 def scoreboard_sim(dut):
419 # two STs (different addresses)
420 yield from store(dut, 4, 3, 2)
421 yield from store(dut, 2, 9, 2)
422 yield
423 # two LDs (deliberately LD from the 1st address then 2nd)
424 data = yield from load(dut, 4, 0, 2)
425 assert data == 0x0003
426 data = yield from load(dut, 2, 0, 2)
427 assert data == 0x0009
428 yield
429
430 # now do an add
431 data = yield from add(dut, 4, 3, 0xfeed)
432 assert data == 0x7
433
434 # and an add-immediate
435 data = yield from add(dut, 4, 0xdeef, 2, imm_ok=True)
436 assert data == 0x6
437
438
439 class TestLDSTCompUnit(LDSTCompUnit):
440
441 def __init__(self, rwid):
442 from alu_hier import ALU
443 self.alu = alu = ALU(rwid)
444 self.mem = mem = TestMemory(rwid, 8)
445 LDSTCompUnit.__init__(self, rwid, alu, mem)
446
447 def elaborate(self, platform):
448 m = LDSTCompUnit.elaborate(self, platform)
449 m.submodules.mem = self.mem
450 return m
451
452
453 def test_scoreboard():
454
455 dut = TestLDSTCompUnit(16)
456 vl = rtlil.convert(dut, ports=dut.ports())
457 with open("test_ldst_comp.il", "w") as f:
458 f.write(vl)
459
460 run_simulation(dut, scoreboard_sim(dut), vcd_name='test_ldst_comp.vcd')
461
462
463 if __name__ == '__main__':
464 test_scoreboard()