code-morph LDSTCompUnit to use RecordObject structure, like CompUnitALU
[soc.git] / src / soc / experiment / compldst_multi.py
1 """ LOAD / STORE Computation Unit.
2
3 This module covers POWER9-compliant Load and Store operations,
4 with selection on each between immediate and indexed mode as
5 options for the calculation of the Effective Address (EA),
6 and also "update" mode which optionally stores that EA into
7 an additional register.
8
9 ----
10 Note: it took 15 attempts over several weeks to redraw the diagram
11 needed to capture this FSM properly. To understand it fully, please
12 take the time to review the links, video, and diagram.
13 ----
14
15 Stores are activated when Go_Store is enabled, and use a sync'd "ADD" to
16 compute the "Effective Address", and, when ready the operand (src3_i)
17 is stored in the computed address (passed through to the PortInterface)
18
19 Loads are activated when Go_Write[0] is enabled. The EA is computed,
20 and (as long as there was no exception) the data comes out (at any
21 time from the PortInterface), and is captured by the LDCompSTUnit.
22
23 Both LD and ST may request that the address be computed from summing
24 operand1 (src[0]) with operand2 (src[1]) *or* by summing operand1 with
25 the immediate (from the opcode).
26
27 Both LD and ST may also request "update" mode (op_is_update) which
28 activates the use of Go_Write[1] to control storage of the EA into
29 a *second* operand in the register file.
30
31 Thus this module has *TWO* write-requests to the register file and
32 *THREE* read-requests to the register file (not all at the same time!)
33 The regfile port usage is:
34
35 * LD-imm 1R1W
36 * LD-imm-update 1R2W
37 * LD-idx 2R1W
38 * LD-idx-update 2R2W
39
40 * ST-imm 2R
41 * ST-imm-update 2R1W
42 * ST-idx 3R
43 * ST-idx-update 3R1W
44
45 It's a multi-level Finite State Machine that (unfortunately) nmigen.FSM
46 is not suited to (nmigen.FSM is clock-driven, and some aspects of
47 the nested FSMs below are *combinatorial*).
48
49 * One FSM covers Operand collection and communication address-side
50 with the LD/ST PortInterface. its role ends when "RD_DONE" is asserted
51
52 * A second FSM activates to cover LD. it activates if op_is_ld is true
53
54 * A third FSM activates to cover ST. it activates if op_is_st is true
55
56 * The "overall" (fourth) FSM coordinates the progression and completion
57 of the three other FSMs, firing "WR_RESET" which switches off "busy"
58
59 Full diagram:
60 https://libre-soc.org/3d_gpu/ld_st_comp_unit.jpg
61
62 Links including to walk-through videos:
63 * https://libre-soc.org/3d_gpu/architecture/6600scoreboard/
64
65 Related Bugreports:
66 * https://bugs.libre-soc.org/show_bug.cgi?id=302
67
68 Terminology:
69
70 * EA - Effective Address
71 * LD - Load
72 * ST - Store
73 """
74
75 from nmigen.compat.sim import run_simulation
76 from nmigen.cli import verilog, rtlil
77 from nmigen import Module, Signal, Mux, Cat, Elaboratable, Array, Repl
78 from nmigen.hdl.rec import Record, Layout
79
80 from nmutil.latch import SRLatch, latchregister
81
82 from soc.experiment.compalu_multi import go_record, CompUnitRecord
83 from soc.experiment.l0_cache import PortInterface
84 from soc.experiment.testmem import TestMemory
85 from soc.decoder.power_enums import InternalOp
86
87 from soc.decoder.power_enums import InternalOp, Function
88
89
90 class CompLDSTOpSubset(Record):
91 """CompLDSTOpSubset
92
93 a copy of the relevant subset information from Decode2Execute1Type
94 needed for LD/ST operations. use with eq_from_execute1 (below) to
95 grab subsets.
96 """
97 def __init__(self, name=None):
98 layout = (('insn_type', InternalOp),
99 ('imm_data', Layout((("imm", 64), ("imm_ok", 1)))),
100 ('is_32bit', 1),
101 ('is_signed', 1),
102 ('data_len', 4), # TODO: should be in separate CompLDSTSubset
103 ('byte_reverse', 1),
104 ('sign_extend', 1),
105 ('update', 1))
106
107 Record.__init__(self, Layout(layout), name=name)
108
109 # grrr. Record does not have kwargs
110 self.insn_type.reset_less = True
111 self.is_32bit.reset_less = True
112 self.is_signed.reset_less = True
113 self.data_len.reset_less = True
114 self.byte_reverse.reset_less = True
115 self.sign_extend.reset_less = True
116 self.update.reset_less = True
117
118 def eq_from_execute1(self, other):
119 """ use this to copy in from Decode2Execute1Type
120 """
121 res = []
122 for fname, sig in self.fields.items():
123 eqfrom = other.fields[fname]
124 res.append(sig.eq(eqfrom))
125 return res
126
127 def ports(self):
128 return [self.insn_type,
129 self.is_32bit,
130 self.is_signed,
131 self.data_len,
132 self.byte_reverse,
133 self.sign_extend,
134 self.update,
135 ]
136
137
138 class LDSTCompUnitRecord(CompUnitRecord):
139 def __init__(self, rwid, name=None):
140 CompUnitRecord.__init__(self, CompLDSTOpSubset, rwid,
141 n_src=3, n_dst=2, name=name)
142
143 self.ad = go_record(1, name="ad") # address go in, req out
144 self.st = go_record(1, name="st") # store go in, req out
145
146 self.addr_exc_o = Signal(reset_less=True) # address exception
147
148 self.ld_o = Signal(reset_less=True) # operation is a LD
149 self.st_o = Signal(reset_less=True) # operation is a ST
150
151 # hmm... are these necessary?
152 self.load_mem_o = Signal(reset_less=True) # activate memory LOAD
153 self.stwd_mem_o = Signal(reset_less=True) # activate memory STORE
154
155
156 class LDSTCompUnit(Elaboratable):
157 """LOAD / STORE Computation Unit
158
159 Inputs
160 ------
161
162 * :pi: a PortInterface to the memory subsystem (read-write capable)
163 * :rwid: register width
164 * :awid: address width
165
166 Data inputs
167 -----------
168 * :src_i: Source Operands (RA/RB/RC) - managed by rd[0-3] go/req
169
170 Data (outputs)
171 --------------
172 * :data_o: Dest out (LD) - managed by wr[0] go/req
173 * :addr_o: Address out (LD or ST) - managed by wr[1] go/req
174 * :addr_exc_o: Address/Data Exception occurred. LD/ST must terminate
175
176 TODO: make addr_exc_o a data-type rather than a single-bit signal
177 (see bug #302)
178
179 Control Signals (In)
180 --------------------
181
182 * :oper_i: operation being carried out (POWER9 decode LD/ST subset)
183 * :issue_i: LD/ST is being "issued".
184 * :shadown_i: Inverted-shadow is being held (stops STORE *and* WRITE)
185 * :go_rd_i: read is being actioned (latches in src regs)
186 * :go_wr_i: write mode (exactly like ALU CompUnit)
187 * :go_ad_i: address is being actioned (triggers actual mem LD)
188 * :go_st_i: store is being actioned (triggers actual mem STORE)
189 * :go_die_i: resets the unit back to "wait for issue"
190
191 Control Signals (Out)
192 ---------------------
193
194 * :busy_o: function unit is busy
195 * :rd_rel_o: request src1/src2
196 * :adr_rel_o: request address (from mem)
197 * :sto_rel_o: request store (to mem)
198 * :req_rel_o: request write (result)
199 * :load_mem_o: activate memory LOAD
200 * :stwd_mem_o: activate memory STORE
201
202 Note: load_mem_o, stwd_mem_o and req_rel_o MUST all be acknowledged
203 in a single cycle and the CompUnit set back to doing another op.
204 This means deasserting go_st_i, go_ad_i or go_wr_i as appropriate
205 depending on whether the operation is a ST or LD.
206 """
207
208 def __init__(self, pi, rwid=64, awid=48, debugtest=False):
209 self.rwid = rwid
210 self.awid = awid
211 self.pi = pi
212 self.cu = cu = LDSTCompUnitRecord(rwid)
213 self.debugtest = debugtest
214
215 # POWER-compliant LD/ST has index and update: *fixed* number of ports
216 self.n_src = n_src = 3 # RA, RB, RT/RS
217 self.n_dst = n_dst = 2 # RA, RT/RS
218
219 # set up array of src and dest signals
220 for i in range(n_src):
221 j = i + 1 # name numbering to match src1/src2
222 name = "src%d_i" % j
223 setattr(self, name, getattr(cu, name))
224
225 dst = []
226 for i in range(n_dst):
227 j = i + 1 # name numbering to match dest1/2...
228 name = "dest%d_i" % j
229 setattr(self, name, getattr(cu, name))
230
231 # convenience names
232 self.rd = cu.rd
233 self.wr = cu.wr
234 self.ad = cu.ad
235 self.st = cu.st
236
237 self.go_rd_i = self.rd.go # temporary naming
238 self.go_wr_i = self.wr.go # temporary naming
239 self.go_ad_i = self.ad.go # temp naming: go address in
240 self.go_st_i = self.st.go # temp naming: go store in
241
242 self.rd_rel_o = self.rd.rel # temporary naming
243 self.req_rel_o = self.wr.rel # temporary naming
244 self.adr_rel_o = self.ad.rel # request address (from mem)
245 self.sto_rel_o = self.st.rel # request store (to mem)
246
247 self.issue_i = cu.issue_i
248 self.shadown_i = cu.shadown_i
249 self.go_die_i = cu.go_die_i
250
251 self.oper_i = cu.oper_i
252 self.src_i = cu._src_i
253 self.dest = cu._dest
254
255 self.data_o = self.dest[0] # Dest1 out: RT
256 self.addr_o = self.dest[1] # Address out (LD or ST) - Update => RA
257 self.addr_exc_o = cu.addr_exc_o
258 self.done_o = cu.done_o
259 self.busy_o = cu.busy_o
260
261 self.ld_o = cu.ld_o
262 self.st_o = cu.st_o
263
264 self.load_mem_o = cu.load_mem_o
265 self.stwd_mem_o = cu.stwd_mem_o
266
267 def elaborate(self, platform):
268 m = Module()
269
270 # temp/convenience
271 comb = m.d.comb
272 sync = m.d.sync
273 issue_i = self.issue_i
274
275 #####################
276 # latches for the FSM.
277 m.submodules.opc_l = opc_l = SRLatch(sync=False, name="opc")
278 m.submodules.src_l = src_l = SRLatch(False, self.n_src, name="src")
279 m.submodules.alu_l = alu_l = SRLatch(sync=False, name="alu")
280 m.submodules.adr_l = adr_l = SRLatch(sync=False, name="adr")
281 m.submodules.lod_l = lod_l = SRLatch(sync=False, name="lod")
282 m.submodules.sto_l = sto_l = SRLatch(sync=False, name="sto")
283 m.submodules.wri_l = wri_l = SRLatch(sync=False, name="wri")
284 m.submodules.upd_l = upd_l = SRLatch(sync=False, name="upd")
285 m.submodules.rst_l = rst_l = SRLatch(sync=False, name="rst")
286
287 ####################
288 # signals
289
290 # opcode decode
291 op_is_ld = Signal(reset_less=True)
292 op_is_st = Signal(reset_less=True)
293
294 # ALU/LD data output control
295 alu_valid = Signal(reset_less=True) # ALU operands are valid
296 alu_ok = Signal(reset_less=True) # ALU out ok (1 clock delay valid)
297 addr_ok = Signal(reset_less=True) # addr ok (from PortInterface)
298 ld_ok = Signal(reset_less=True) # LD out ok from PortInterface
299 wr_any = Signal(reset_less=True) # any write (incl. store)
300 rda_any = Signal(reset_less=True) # any read for address ops
301 rd_done = Signal(reset_less=True) # all *necessary* operands read
302 wr_reset = Signal(reset_less=True) # final reset condition
303
304 # LD and ALU out
305 alu_o = Signal(self.rwid, reset_less=True)
306 ldd_o = Signal(self.rwid, reset_less=True)
307
308 # select immediate or src2 reg to add
309 src2_or_imm = Signal(self.rwid, reset_less=True)
310 src_sel = Signal(reset_less=True)
311
312 ##############################
313 # reset conditions for latches
314
315 # temporaries (also convenient when debugging)
316 reset_o = Signal(reset_less=True) # reset opcode
317 reset_w = Signal(reset_less=True) # reset write
318 reset_u = Signal(reset_less=True) # reset update
319 reset_a = Signal(reset_less=True) # reset adr latch
320 reset_i = Signal(reset_less=True) # issue|die (use a lot)
321 reset_r = Signal(self.n_src, reset_less=True) # reset src
322 reset_s = Signal(reset_less=True) # reset store
323
324 comb += reset_i.eq(issue_i | self.go_die_i) # various
325 comb += reset_o.eq(wr_reset | self.go_die_i) # opcode reset
326 comb += reset_w.eq(self.wr.go[0] | self.go_die_i) # write reg 1
327 comb += reset_u.eq(self.wr.go[1] | self.go_die_i) # update (reg 2)
328 comb += reset_s.eq(self.go_st_i | self.go_die_i) # store reset
329 comb += reset_r.eq(self.rd.go | Repl(self.go_die_i, self.n_src))
330 comb += reset_a.eq(self.go_ad_i | self.go_die_i)
331
332 ##########################
333 # FSM implemented through sequence of latches. approximately this:
334 # - opc_l : opcode
335 # - src_l[0] : operands
336 # - src_l[1]
337 # - alu_l : looks after add of src1/2/imm (EA)
338 # - adr_l : waits for add (EA)
339 # - upd_l : waits for adr and Regfile (port 2)
340 # - src_l[2] : ST
341 # - lod_l : waits for adr (EA) and for LD Data
342 # - wri_l : waits for LD Data and Regfile (port 1)
343 # - st_l : waits for alu and operand2
344 # - rst_l : waits for all FSM paths to converge.
345 # NOTE: use sync to stop combinatorial loops.
346
347 # opcode latch - inverted so that busy resets to 0
348 # note this MUST be sync so as to avoid a combinatorial loop
349 # between busy_o and issue_i on the reset latch (rst_l)
350 sync += opc_l.s.eq(issue_i) # XXX NOTE: INVERTED FROM book!
351 sync += opc_l.r.eq(reset_o) # XXX NOTE: INVERTED FROM book!
352
353 # src operand latch
354 sync += src_l.s.eq(Repl(issue_i, self.n_src))
355 sync += src_l.r.eq(reset_r)
356
357 # alu latch. use sync-delay between alu_ok and valid to generate pulse
358 comb += alu_l.s.eq(reset_i)
359 comb += alu_l.r.eq(alu_ok & ~alu_valid & ~rda_any)
360
361 # addr latch
362 comb += adr_l.s.eq(reset_i)
363 sync += adr_l.r.eq(reset_a)
364
365 # ld latch
366 comb += lod_l.s.eq(reset_i)
367 comb += lod_l.r.eq(ld_ok)
368
369 # dest operand latch
370 comb += wri_l.s.eq(issue_i)
371 sync += wri_l.r.eq(reset_w)
372
373 # update-mode operand latch (EA written to reg 2)
374 sync += upd_l.s.eq(reset_i)
375 sync += upd_l.r.eq(reset_u)
376
377 # store latch
378 comb += sto_l.s.eq(addr_ok & op_is_st)
379 comb += sto_l.r.eq(reset_s)
380
381 # reset latch
382 comb += rst_l.s.eq(addr_ok) # start when address is ready
383 comb += rst_l.r.eq(issue_i)
384
385 # create a latch/register for the operand
386 oper_r = CompLDSTOpSubset() # Dest register
387 latchregister(m, self.oper_i, oper_r, self.issue_i, name="oper_r")
388
389 # and for LD
390 ldd_r = Signal(self.rwid, reset_less=True) # Dest register
391 latchregister(m, ldd_o, ldd_r, ld_ok, name="ldo_r")
392
393 # and for each input from the incoming src operands
394 srl = []
395 for i in range(self.n_src):
396 name = "src_r%d" % i
397 src_r = Signal(self.rwid, name=name, reset_less=True)
398 latchregister(m, self.src_i[i], src_r, src_l.q[i], name)
399 srl.append(src_r)
400
401 # and one for the output from the ADD (for the EA)
402 addr_r = Signal(self.rwid, reset_less=True) # Effective Address Latch
403 latchregister(m, alu_o, addr_r, alu_l.q, "ea_r")
404
405 # select either immediate or src2 if opcode says so
406 op_is_imm = oper_r.imm_data.imm_ok
407 src2_or_imm = Signal(self.rwid, reset_less=True)
408 m.d.comb += src2_or_imm.eq(Mux(op_is_imm, oper_r.imm_data.imm, srl[1]))
409
410 # now do the ALU addr add: one cycle, and say "ready" (next cycle, too)
411 sync += alu_o.eq(srl[0] + src2_or_imm) # actual EA
412 sync += alu_ok.eq(alu_valid) # keep ack in sync with EA
413
414 # decode bits of operand (latched)
415 comb += op_is_st.eq(oper_r.insn_type == InternalOp.OP_STORE) # ST
416 comb += op_is_ld.eq(oper_r.insn_type == InternalOp.OP_LOAD) # LD
417 op_is_update = oper_r.update # UPDATE
418 comb += self.load_mem_o.eq(op_is_ld & self.go_ad_i)
419 comb += self.stwd_mem_o.eq(op_is_st & self.go_st_i)
420 comb += self.ld_o.eq(op_is_ld)
421 comb += self.st_o.eq(op_is_st)
422
423 ############################
424 # Control Signal calculation
425
426 # busy signal
427 busy_o = self.busy_o
428 comb += self.busy_o.eq(opc_l.q) # | self.pi.busy_o) # busy out
429
430 # 1st operand read-request is simple: always need it
431 comb += self.rd.rel[0].eq(src_l.q[0] & busy_o)
432
433 # 2nd operand only needed when immediate is not active
434 comb += self.rd.rel[1].eq(src_l.q[1] & busy_o & ~op_is_imm)
435
436 # note when the address-related read "go" signals are active
437 comb += rda_any.eq(self.rd.go[0] | self.rd.go[1])
438
439 # alu input valid when 1st and 2nd ops done (or imm not active)
440 comb += alu_valid.eq(busy_o & ~(self.rd.rel[0] | self.rd.rel[1]))
441
442 # 3rd operand only needed when operation is a store
443 comb += self.rd.rel[2].eq(src_l.q[2] & busy_o & op_is_st)
444
445 # all reads done when alu is valid and 3rd operand needed
446 comb += rd_done.eq(alu_valid & ~self.rd.rel[2])
447
448 # address release only if addr ready, but Port must be idle
449 comb += self.adr_rel_o.eq(adr_l.q & busy_o)
450
451 # store release when st ready *and* all operands read (and no shadow)
452 comb += self.st.rel.eq(sto_l.q & busy_o & rd_done & op_is_st &
453 self.shadown_i)
454
455 # request write of LD result. waits until shadow is dropped.
456 comb += self.wr.rel[0].eq(wri_l.q & busy_o & lod_l.qn & op_is_ld &
457 self.shadown_i)
458
459 # request write of EA result only in update mode
460 comb += self.wr.rel[1].eq(upd_l.q & busy_o & op_is_update &
461 self.shadown_i)
462
463 # provide "done" signal: select req_rel for non-LD/ST, adr_rel for LD/ST
464 comb += wr_any.eq(self.st.go | self.wr.go[0] | self.wr.go[1])
465 comb += wr_reset.eq(rst_l.q & busy_o & self.shadown_i &
466 ~(self.st.rel | self.wr.rel[0] | self.wr.rel[1]) &
467 (lod_l.qn | op_is_st))
468 comb += self.done_o.eq(wr_reset)
469
470 ######################
471 # Data/Address outputs
472
473 # put the LD-output register directly onto the output bus on a go_write
474 with m.If(self.wr.go[0]):
475 comb += self.data_o.eq(ldd_r)
476
477 # "update" mode, put address out on 2nd go-write
478 with m.If(op_is_update & self.wr.go[1]):
479 comb += self.addr_o.eq(addr_r)
480
481 ###########################
482 # PortInterface connections
483 pi = self.pi
484
485 # connect to LD/ST PortInterface.
486 comb += pi.is_ld_i.eq(op_is_ld & busy_o) # decoded-LD
487 comb += pi.is_st_i.eq(op_is_st & busy_o) # decoded-ST
488 comb += pi.op.eq(self.oper_i) # op details (not all needed)
489 # address
490 comb += pi.addr.data.eq(addr_r) # EA from adder
491 comb += pi.addr.ok.eq(alu_ok & lod_l.q) # "go do address stuff"
492 comb += self.addr_exc_o.eq(pi.addr_exc_o) # exception occurred
493 comb += addr_ok.eq(self.pi.addr_ok_o) # no exc, address fine
494 # ld - ld gets latched in via lod_l
495 comb += ldd_o.eq(pi.ld.data) # ld data goes into ld reg (above)
496 comb += ld_ok.eq(pi.ld.ok) # ld.ok *closes* (freezes) ld data
497 # store - data goes in based on go_st
498 comb += pi.st.data.eq(srl[2]) # 3rd operand latch
499 comb += pi.st.ok.eq(self.st.go) # go store signals st data valid
500
501 return m
502
503 def __iter__(self):
504 yield self.rd.go
505 yield self.go_ad_i
506 yield self.wr.go
507 yield self.go_st_i
508 yield self.issue_i
509 yield self.shadown_i
510 yield self.go_die_i
511 yield from self.oper_i.ports()
512 yield from self.src_i
513 yield self.busy_o
514 yield self.rd.rel
515 yield self.adr_rel_o
516 yield self.sto_rel_o
517 yield self.wr.rel
518 yield self.data_o
519 yield self.addr_o
520 yield self.load_mem_o
521 yield self.stwd_mem_o
522
523 def ports(self):
524 return list(self)
525
526
527 def wait_for(sig, wait=True, test1st=False):
528 v = (yield sig)
529 print("wait for", sig, v, wait, test1st)
530 if test1st and bool(v) == wait:
531 return
532 while True:
533 yield
534 v = (yield sig)
535 #print("...wait for", sig, v)
536 if bool(v) == wait:
537 break
538
539
540 def store(dut, src1, src2, src3, imm, imm_ok=True, update=False):
541 print ("ST", src1, src2, src3, imm, imm_ok, update)
542 yield dut.oper_i.insn_type.eq(InternalOp.OP_STORE)
543 yield dut.src1_i.eq(src1)
544 yield dut.src2_i.eq(src2)
545 yield dut.src3_i.eq(src3)
546 yield dut.oper_i.imm_data.imm.eq(imm)
547 yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
548 yield dut.oper_i.update.eq(update)
549 yield dut.issue_i.eq(1)
550 yield
551 yield dut.issue_i.eq(0)
552 yield
553 if imm_ok:
554 yield dut.rd.go.eq(0b101)
555 else:
556 yield dut.rd.go.eq(0b111)
557 yield from wait_for(dut.rd.rel)
558 yield dut.rd.go.eq(0)
559
560 yield from wait_for(dut.adr_rel_o, False, test1st=True)
561 #yield from wait_for(dut.adr_rel_o)
562 #yield dut.ad.go.eq(1)
563 #yield
564 #yield dut.ad.go.eq(0)
565
566 if update:
567 yield from wait_for(dut.wr.rel[1])
568 yield dut.wr.go.eq(0b10)
569 yield
570 addr = yield dut.addr_o
571 print ("addr", addr)
572 yield dut.wr.go.eq(0)
573 else:
574 addr = None
575
576 yield from wait_for(dut.sto_rel_o)
577 yield dut.go_st_i.eq(1)
578 yield
579 yield dut.go_st_i.eq(0)
580 yield from wait_for(dut.busy_o, False)
581 #wait_for(dut.stwd_mem_o)
582 yield
583 return addr
584
585
586 def load(dut, src1, src2, imm, imm_ok=True, update=False):
587 print ("LD", src1, src2, imm, imm_ok, update)
588 yield dut.oper_i.insn_type.eq(InternalOp.OP_LOAD)
589 yield dut.src1_i.eq(src1)
590 yield dut.src2_i.eq(src2)
591 yield dut.oper_i.imm_data.imm.eq(imm)
592 yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
593 yield dut.issue_i.eq(1)
594 yield
595 yield dut.issue_i.eq(0)
596 yield
597 if imm_ok:
598 yield dut.rd.go.eq(0b01)
599 else:
600 yield dut.rd.go.eq(0b11)
601 yield from wait_for(dut.rd.rel)
602 yield dut.rd.go.eq(0)
603
604 yield from wait_for(dut.adr_rel_o, False, test1st=True)
605 #yield dut.ad.go.eq(1)
606 #yield
607 #yield dut.ad.go.eq(0)
608
609 if update:
610 yield from wait_for(dut.wr.rel[1])
611 yield dut.wr.go.eq(0b10)
612 yield
613 addr = yield dut.addr_o
614 print ("addr", addr)
615 yield dut.wr.go.eq(0)
616 else:
617 addr = None
618
619 yield from wait_for(dut.wr.rel[0], test1st=True)
620 yield dut.wr.go.eq(1)
621 yield
622 data = yield dut.data_o
623 print (data)
624 yield dut.wr.go.eq(0)
625 yield from wait_for(dut.busy_o)
626 yield
627 # wait_for(dut.stwd_mem_o)
628 return data, addr
629
630
631 def scoreboard_sim(dut):
632
633 ###################
634 # immediate version
635
636 # two STs (different addresses)
637 yield from store(dut, 4, 0, 3, 2) # ST reg4 into addr rfile[reg3]+2
638 yield from store(dut, 2, 0, 9, 2) # ST reg4 into addr rfile[reg9]+2
639 yield
640 # two LDs (deliberately LD from the 1st address then 2nd)
641 data, addr = yield from load(dut, 4, 0, 2)
642 assert data == 0x0003, "returned %x" % data
643 data, addr = yield from load(dut, 2, 0, 2)
644 assert data == 0x0009, "returned %x" % data
645 yield
646
647 # indexed version
648 yield from store(dut, 4, 5, 3, 0, imm_ok=False)
649 data, addr = yield from load(dut, 4, 5, 0, imm_ok=False)
650 assert data == 0x0003, "returned %x" % data
651
652 # update-immediate version
653 addr = yield from store(dut, 4, 6, 3, 2, update=True)
654 assert addr == 0x0006, "returned %x" % addr
655
656 # update-indexed version
657 data, addr = yield from load(dut, 4, 5, 0, imm_ok=False, update=True)
658 assert addr == 0x0009, "returned %x" % addr
659
660 class TestLDSTCompUnit(LDSTCompUnit):
661
662 def __init__(self, rwid):
663 from soc.experiment.l0_cache import TstL0CacheBuffer
664 self.l0 = l0 = TstL0CacheBuffer()
665 pi = l0.l0.dports[0].pi
666 LDSTCompUnit.__init__(self, pi, rwid, 4)
667
668 def elaborate(self, platform):
669 m = LDSTCompUnit.elaborate(self, platform)
670 m.submodules.l0 = self.l0
671 m.d.comb += self.ad.go.eq(self.ad.rel) # link addr-go direct to rel
672 return m
673
674
675 def test_scoreboard():
676
677 dut = TestLDSTCompUnit(16)
678 vl = rtlil.convert(dut, ports=dut.ports())
679 with open("test_ldst_comp.il", "w") as f:
680 f.write(vl)
681
682 run_simulation(dut, scoreboard_sim(dut), vcd_name='test_ldst_comp.vcd')
683
684
685 if __name__ == '__main__':
686 test_scoreboard()