add extra bugreport link
[soc.git] / src / soc / experiment / compldst_multi.py
1 """LOAD / STORE Computation Unit.
2
3 This module covers POWER9-compliant Load and Store operations,
4 with selection on each between immediate and indexed mode as
5 options for the calculation of the Effective Address (EA),
6 and also "update" mode which optionally stores that EA into
7 an additional register.
8
9 ----
10 Note: it took 15 attempts over several weeks to redraw the diagram
11 needed to capture this FSM properly. To understand it fully, please
12 take the time to review the links, video, and diagram.
13 ----
14
15 Stores are activated when Go_Store is enabled, and use a sync'd "ADD" to
16 compute the "Effective Address", and, when ready the operand (src3_i)
17 is stored in the computed address (passed through to the PortInterface)
18
19 Loads are activated when Go_Write[0] is enabled. The EA is computed,
20 and (as long as there was no exception) the data comes out (at any
21 time from the PortInterface), and is captured by the LDCompSTUnit.
22
23 Both LD and ST may request that the address be computed from summing
24 operand1 (src[0]) with operand2 (src[1]) *or* by summing operand1 with
25 the immediate (from the opcode).
26
27 Both LD and ST may also request "update" mode (op_is_update) which
28 activates the use of Go_Write[1] to control storage of the EA into
29 a *second* operand in the register file.
30
31 Thus this module has *TWO* write-requests to the register file and
32 *THREE* read-requests to the register file (not all at the same time!)
33 The regfile port usage is:
34
35 * LD-imm 1R1W
36 * LD-imm-update 1R2W
37 * LD-idx 2R1W
38 * LD-idx-update 2R2W
39
40 * ST-imm 2R
41 * ST-imm-update 2R1W
42 * ST-idx 3R
43 * ST-idx-update 3R1W
44
45 It's a multi-level Finite State Machine that (unfortunately) nmigen.FSM
46 is not suited to (nmigen.FSM is clock-driven, and some aspects of
47 the nested FSMs below are *combinatorial*).
48
49 * One FSM covers Operand collection and communication address-side
50 with the LD/ST PortInterface. its role ends when "RD_DONE" is asserted
51
52 * A second FSM activates to cover LD. it activates if op_is_ld is true
53
54 * A third FSM activates to cover ST. it activates if op_is_st is true
55
56 * The "overall" (fourth) FSM coordinates the progression and completion
57 of the three other FSMs, firing "WR_RESET" which switches off "busy"
58
59 Full diagram:
60
61 https://libre-soc.org/3d_gpu/ld_st_comp_unit.jpg
62
63 Links including to walk-through videos:
64
65 * https://libre-soc.org/3d_gpu/architecture/6600scoreboard/
66 * http://libre-soc.org/openpower/isa/fixedload
67 * http://libre-soc.org/openpower/isa/fixedstore
68
69 Related Bugreports:
70
71 * https://bugs.libre-soc.org/show_bug.cgi?id=302
72 * https://bugs.libre-soc.org/show_bug.cgi?id=216
73
74 Terminology:
75
76 * EA - Effective Address
77 * LD - Load
78 * ST - Store
79 """
80
81 from nmigen.compat.sim import run_simulation
82 from nmigen.cli import verilog, rtlil
83 from nmigen import Module, Signal, Mux, Cat, Elaboratable, Array, Repl
84 from nmigen.hdl.rec import Record, Layout
85
86 from nmutil.latch import SRLatch, latchregister
87
88 from soc.experiment.compalu_multi import go_record, CompUnitRecord
89 from soc.experiment.l0_cache import PortInterface
90 from soc.experiment.testmem import TestMemory
91
92 from soc.decoder.power_enums import InternalOp, Function
93 from soc.fu.ldst.ldst_input_record import CompLDSTOpSubset
94
95
96 class LDSTCompUnitRecord(CompUnitRecord):
97 def __init__(self, rwid, name=None):
98 CompUnitRecord.__init__(self, CompLDSTOpSubset, rwid,
99 n_src=3, n_dst=2, name=name)
100
101 self.ad = go_record(1, name="ad") # address go in, req out
102 self.st = go_record(1, name="st") # store go in, req out
103
104 self.addr_exc_o = Signal(reset_less=True) # address exception
105
106 self.ld_o = Signal(reset_less=True) # operation is a LD
107 self.st_o = Signal(reset_less=True) # operation is a ST
108
109 # hmm... are these necessary?
110 self.load_mem_o = Signal(reset_less=True) # activate memory LOAD
111 self.stwd_mem_o = Signal(reset_less=True) # activate memory STORE
112
113
114 class LDSTCompUnit(Elaboratable):
115 """LOAD / STORE Computation Unit
116
117 Inputs
118 ------
119
120 * :pi: a PortInterface to the memory subsystem (read-write capable)
121 * :rwid: register width
122 * :awid: address width
123
124 Data inputs
125 -----------
126 * :src_i: Source Operands (RA/RB/RC) - managed by rd[0-3] go/req
127
128 Data (outputs)
129 --------------
130 * :data_o: Dest out (LD) - managed by wr[0] go/req
131 * :addr_o: Address out (LD or ST) - managed by wr[1] go/req
132 * :addr_exc_o: Address/Data Exception occurred. LD/ST must terminate
133
134 TODO: make addr_exc_o a data-type rather than a single-bit signal
135 (see bug #302)
136
137 Control Signals (In)
138 --------------------
139
140 * :oper_i: operation being carried out (POWER9 decode LD/ST subset)
141 * :issue_i: LD/ST is being "issued".
142 * :shadown_i: Inverted-shadow is being held (stops STORE *and* WRITE)
143 * :go_rd_i: read is being actioned (latches in src regs)
144 * :go_wr_i: write mode (exactly like ALU CompUnit)
145 * :go_ad_i: address is being actioned (triggers actual mem LD)
146 * :go_st_i: store is being actioned (triggers actual mem STORE)
147 * :go_die_i: resets the unit back to "wait for issue"
148
149 Control Signals (Out)
150 ---------------------
151
152 * :busy_o: function unit is busy
153 * :rd_rel_o: request src1/src2
154 * :adr_rel_o: request address (from mem)
155 * :sto_rel_o: request store (to mem)
156 * :req_rel_o: request write (result)
157 * :load_mem_o: activate memory LOAD
158 * :stwd_mem_o: activate memory STORE
159
160 Note: load_mem_o, stwd_mem_o and req_rel_o MUST all be acknowledged
161 in a single cycle and the CompUnit set back to doing another op.
162 This means deasserting go_st_i, go_ad_i or go_wr_i as appropriate
163 depending on whether the operation is a ST or LD.
164 """
165
166 def __init__(self, pi=None, rwid=64, awid=48, debugtest=False):
167 self.rwid = rwid
168 self.awid = awid
169 self.pi = pi
170 self.cu = cu = LDSTCompUnitRecord(rwid)
171 self.debugtest = debugtest
172
173 # POWER-compliant LD/ST has index and update: *fixed* number of ports
174 self.n_src = n_src = 3 # RA, RB, RT/RS
175 self.n_dst = n_dst = 2 # RA, RT/RS
176
177 # set up array of src and dest signals
178 for i in range(n_src):
179 j = i + 1 # name numbering to match src1/src2
180 name = "src%d_i" % j
181 setattr(self, name, getattr(cu, name))
182
183 dst = []
184 for i in range(n_dst):
185 j = i + 1 # name numbering to match dest1/2...
186 name = "dest%d_o" % j
187 setattr(self, name, getattr(cu, name))
188
189 # convenience names
190 self.rd = cu.rd
191 self.wr = cu.wr
192 self.ad = cu.ad
193 self.st = cu.st
194
195 self.go_rd_i = self.rd.go # temporary naming
196 self.go_wr_i = self.wr.go # temporary naming
197 self.go_ad_i = self.ad.go # temp naming: go address in
198 self.go_st_i = self.st.go # temp naming: go store in
199
200 self.rd_rel_o = self.rd.rel # temporary naming
201 self.req_rel_o = self.wr.rel # temporary naming
202 self.adr_rel_o = self.ad.rel # request address (from mem)
203 self.sto_rel_o = self.st.rel # request store (to mem)
204
205 self.issue_i = cu.issue_i
206 self.shadown_i = cu.shadown_i
207 self.go_die_i = cu.go_die_i
208
209 self.oper_i = cu.oper_i
210 self.src_i = cu._src_i
211 self.dest = cu._dest
212
213 self.data_o = self.dest[0] # Dest1 out: RT
214 self.addr_o = self.dest[1] # Address out (LD or ST) - Update => RA
215 self.addr_exc_o = cu.addr_exc_o
216 self.done_o = cu.done_o
217 self.busy_o = cu.busy_o
218
219 self.ld_o = cu.ld_o
220 self.st_o = cu.st_o
221
222 self.load_mem_o = cu.load_mem_o
223 self.stwd_mem_o = cu.stwd_mem_o
224
225 def elaborate(self, platform):
226 m = Module()
227
228 # temp/convenience
229 comb = m.d.comb
230 sync = m.d.sync
231 issue_i = self.issue_i
232
233 #####################
234 # latches for the FSM.
235 m.submodules.opc_l = opc_l = SRLatch(sync=False, name="opc")
236 m.submodules.src_l = src_l = SRLatch(False, self.n_src, name="src")
237 m.submodules.alu_l = alu_l = SRLatch(sync=False, name="alu")
238 m.submodules.adr_l = adr_l = SRLatch(sync=False, name="adr")
239 m.submodules.lod_l = lod_l = SRLatch(sync=False, name="lod")
240 m.submodules.sto_l = sto_l = SRLatch(sync=False, name="sto")
241 m.submodules.wri_l = wri_l = SRLatch(sync=False, name="wri")
242 m.submodules.upd_l = upd_l = SRLatch(sync=False, name="upd")
243 m.submodules.rst_l = rst_l = SRLatch(sync=False, name="rst")
244
245 ####################
246 # signals
247
248 # opcode decode
249 op_is_ld = Signal(reset_less=True)
250 op_is_st = Signal(reset_less=True)
251
252 # ALU/LD data output control
253 alu_valid = Signal(reset_less=True) # ALU operands are valid
254 alu_ok = Signal(reset_less=True) # ALU out ok (1 clock delay valid)
255 addr_ok = Signal(reset_less=True) # addr ok (from PortInterface)
256 ld_ok = Signal(reset_less=True) # LD out ok from PortInterface
257 wr_any = Signal(reset_less=True) # any write (incl. store)
258 rda_any = Signal(reset_less=True) # any read for address ops
259 rd_done = Signal(reset_less=True) # all *necessary* operands read
260 wr_reset = Signal(reset_less=True) # final reset condition
261
262 # LD and ALU out
263 alu_o = Signal(self.rwid, reset_less=True)
264 ldd_o = Signal(self.rwid, reset_less=True)
265
266 # XXX TODO ZEROing just like in CompUnit
267
268
269 ##############################
270 # reset conditions for latches
271
272 # temporaries (also convenient when debugging)
273 reset_o = Signal(reset_less=True) # reset opcode
274 reset_w = Signal(reset_less=True) # reset write
275 reset_u = Signal(reset_less=True) # reset update
276 reset_a = Signal(reset_less=True) # reset adr latch
277 reset_i = Signal(reset_less=True) # issue|die (use a lot)
278 reset_r = Signal(self.n_src, reset_less=True) # reset src
279 reset_s = Signal(reset_less=True) # reset store
280
281 comb += reset_i.eq(issue_i | self.go_die_i) # various
282 comb += reset_o.eq(wr_reset | self.go_die_i) # opcode reset
283 comb += reset_w.eq(self.wr.go[0] | self.go_die_i) # write reg 1
284 comb += reset_u.eq(self.wr.go[1] | self.go_die_i) # update (reg 2)
285 comb += reset_s.eq(self.go_st_i | self.go_die_i) # store reset
286 comb += reset_r.eq(self.rd.go | Repl(self.go_die_i, self.n_src))
287 comb += reset_a.eq(self.go_ad_i | self.go_die_i)
288
289 ##########################
290 # FSM implemented through sequence of latches. approximately this:
291 # - opc_l : opcode
292 # - src_l[0] : operands
293 # - src_l[1]
294 # - alu_l : looks after add of src1/2/imm (EA)
295 # - adr_l : waits for add (EA)
296 # - upd_l : waits for adr and Regfile (port 2)
297 # - src_l[2] : ST
298 # - lod_l : waits for adr (EA) and for LD Data
299 # - wri_l : waits for LD Data and Regfile (port 1)
300 # - st_l : waits for alu and operand2
301 # - rst_l : waits for all FSM paths to converge.
302 # NOTE: use sync to stop combinatorial loops.
303
304 # opcode latch - inverted so that busy resets to 0
305 # note this MUST be sync so as to avoid a combinatorial loop
306 # between busy_o and issue_i on the reset latch (rst_l)
307 sync += opc_l.s.eq(issue_i) # XXX NOTE: INVERTED FROM book!
308 sync += opc_l.r.eq(reset_o) # XXX NOTE: INVERTED FROM book!
309
310 # src operand latch
311 sync += src_l.s.eq(Repl(issue_i, self.n_src))
312 sync += src_l.r.eq(reset_r)
313
314 # alu latch. use sync-delay between alu_ok and valid to generate pulse
315 comb += alu_l.s.eq(reset_i)
316 comb += alu_l.r.eq(alu_ok & ~alu_valid & ~rda_any)
317
318 # addr latch
319 comb += adr_l.s.eq(reset_i)
320 sync += adr_l.r.eq(reset_a)
321
322 # ld latch
323 comb += lod_l.s.eq(reset_i)
324 comb += lod_l.r.eq(ld_ok)
325
326 # dest operand latch
327 comb += wri_l.s.eq(issue_i)
328 sync += wri_l.r.eq(reset_w)
329
330 # update-mode operand latch (EA written to reg 2)
331 sync += upd_l.s.eq(reset_i)
332 sync += upd_l.r.eq(reset_u)
333
334 # store latch
335 comb += sto_l.s.eq(addr_ok & op_is_st)
336 comb += sto_l.r.eq(reset_s)
337
338 # reset latch
339 comb += rst_l.s.eq(addr_ok) # start when address is ready
340 comb += rst_l.r.eq(issue_i)
341
342 # create a latch/register for the operand
343 oper_r = CompLDSTOpSubset(name="oper_r") # Dest register
344 latchregister(m, self.oper_i, oper_r, self.issue_i, name="oper_l")
345
346 # and for LD
347 ldd_r = Signal(self.rwid, reset_less=True) # Dest register
348 latchregister(m, ldd_o, ldd_r, ld_ok, name="ldo_r")
349
350 # and for each input from the incoming src operands
351 srl = []
352 for i in range(self.n_src):
353 name = "src_r%d" % i
354 src_r = Signal(self.rwid, name=name, reset_less=True)
355 latchregister(m, self.src_i[i], src_r, src_l.q[i], name + '_l')
356 srl.append(src_r)
357
358 # and one for the output from the ADD (for the EA)
359 addr_r = Signal(self.rwid, reset_less=True) # Effective Address Latch
360 latchregister(m, alu_o, addr_r, alu_l.q, "ea_r")
361
362 # select either zero or src1 if opcode says so
363 op_is_z = oper_r.zero_a
364 src1_or_z = Signal(self.rwid, reset_less=True)
365 m.d.comb += src1_or_z.eq(Mux(op_is_z, 0, srl[0]))
366
367 # select either immediate or src2 if opcode says so
368 op_is_imm = oper_r.imm_data.imm_ok
369 src2_or_imm = Signal(self.rwid, reset_less=True)
370 m.d.comb += src2_or_imm.eq(Mux(op_is_imm, oper_r.imm_data.imm, srl[1]))
371
372 # now do the ALU addr add: one cycle, and say "ready" (next cycle, too)
373 sync += alu_o.eq(src1_or_z + src2_or_imm) # actual EA
374 sync += alu_ok.eq(alu_valid) # keep ack in sync with EA
375
376 # decode bits of operand (latched)
377 comb += op_is_st.eq(oper_r.insn_type == InternalOp.OP_STORE) # ST
378 comb += op_is_ld.eq(oper_r.insn_type == InternalOp.OP_LOAD) # LD
379 op_is_update = oper_r.update # UPDATE
380 comb += self.load_mem_o.eq(op_is_ld & self.go_ad_i)
381 comb += self.stwd_mem_o.eq(op_is_st & self.go_st_i)
382 comb += self.ld_o.eq(op_is_ld)
383 comb += self.st_o.eq(op_is_st)
384
385 ############################
386 # Control Signal calculation
387
388 # busy signal
389 busy_o = self.busy_o
390 comb += self.busy_o.eq(opc_l.q) # | self.pi.busy_o) # busy out
391
392 # 1st operand read-request only when zero not active
393 comb += self.rd.rel[0].eq(src_l.q[0] & busy_o & ~op_is_z)
394
395 # 2nd operand only needed when immediate is not active
396 comb += self.rd.rel[1].eq(src_l.q[1] & busy_o & ~op_is_imm)
397
398 # note when the address-related read "go" signals are active
399 comb += rda_any.eq(self.rd.go[0] | self.rd.go[1])
400
401 # alu input valid when 1st and 2nd ops done (or imm not active)
402 comb += alu_valid.eq(busy_o & ~(self.rd.rel[0] | self.rd.rel[1]))
403
404 # 3rd operand only needed when operation is a store
405 comb += self.rd.rel[2].eq(src_l.q[2] & busy_o & op_is_st)
406
407 # all reads done when alu is valid and 3rd operand needed
408 comb += rd_done.eq(alu_valid & ~self.rd.rel[2])
409
410 # address release only if addr ready, but Port must be idle
411 comb += self.adr_rel_o.eq(adr_l.q & busy_o)
412
413 # store release when st ready *and* all operands read (and no shadow)
414 comb += self.st.rel.eq(sto_l.q & busy_o & rd_done & op_is_st &
415 self.shadown_i)
416
417 # request write of LD result. waits until shadow is dropped.
418 comb += self.wr.rel[0].eq(wri_l.q & busy_o & lod_l.qn & op_is_ld &
419 self.shadown_i)
420
421 # request write of EA result only in update mode
422 comb += self.wr.rel[1].eq(upd_l.q & busy_o & op_is_update &
423 self.shadown_i)
424
425 # provide "done" signal: select req_rel for non-LD/ST, adr_rel for LD/ST
426 comb += wr_any.eq(self.st.go | self.wr.go[0] | self.wr.go[1])
427 comb += wr_reset.eq(rst_l.q & busy_o & self.shadown_i &
428 ~(self.st.rel | self.wr.rel[0] | self.wr.rel[1]) &
429 (lod_l.qn | op_is_st))
430 comb += self.done_o.eq(wr_reset)
431
432 ######################
433 # Data/Address outputs
434
435 # put the LD-output register directly onto the output bus on a go_write
436 with m.If(self.wr.go[0]):
437 comb += self.data_o.eq(ldd_r)
438
439 # "update" mode, put address out on 2nd go-write
440 with m.If(op_is_update & self.wr.go[1]):
441 comb += self.addr_o.eq(addr_r)
442
443 ###########################
444 # PortInterface connections
445 pi = self.pi
446
447 # connect to LD/ST PortInterface.
448 comb += pi.is_ld_i.eq(op_is_ld & busy_o) # decoded-LD
449 comb += pi.is_st_i.eq(op_is_st & busy_o) # decoded-ST
450 comb += pi.op.eq(self.oper_i) # op details (not all needed)
451 # address
452 comb += pi.addr.data.eq(addr_r) # EA from adder
453 comb += pi.addr.ok.eq(alu_ok & lod_l.q) # "go do address stuff"
454 comb += self.addr_exc_o.eq(pi.addr_exc_o) # exception occurred
455 comb += addr_ok.eq(self.pi.addr_ok_o) # no exc, address fine
456 # ld - ld gets latched in via lod_l
457 comb += ldd_o.eq(pi.ld.data) # ld data goes into ld reg (above)
458 comb += ld_ok.eq(pi.ld.ok) # ld.ok *closes* (freezes) ld data
459 # store - data goes in based on go_st
460 comb += pi.st.data.eq(srl[2]) # 3rd operand latch
461 comb += pi.st.ok.eq(self.st.go) # go store signals st data valid
462
463 return m
464
465 def __iter__(self):
466 yield self.rd.go
467 yield self.go_ad_i
468 yield self.wr.go
469 yield self.go_st_i
470 yield self.issue_i
471 yield self.shadown_i
472 yield self.go_die_i
473 yield from self.oper_i.ports()
474 yield from self.src_i
475 yield self.busy_o
476 yield self.rd.rel
477 yield self.adr_rel_o
478 yield self.sto_rel_o
479 yield self.wr.rel
480 yield self.data_o
481 yield self.addr_o
482 yield self.load_mem_o
483 yield self.stwd_mem_o
484
485 def ports(self):
486 return list(self)
487
488
489 def wait_for(sig, wait=True, test1st=False):
490 v = (yield sig)
491 print("wait for", sig, v, wait, test1st)
492 if test1st and bool(v) == wait:
493 return
494 while True:
495 yield
496 v = (yield sig)
497 #print("...wait for", sig, v)
498 if bool(v) == wait:
499 break
500
501
502 def store(dut, src1, src2, src3, imm, imm_ok=True, update=False):
503 print ("ST", src1, src2, src3, imm, imm_ok, update)
504 yield dut.oper_i.insn_type.eq(InternalOp.OP_STORE)
505 yield dut.src1_i.eq(src1)
506 yield dut.src2_i.eq(src2)
507 yield dut.src3_i.eq(src3)
508 yield dut.oper_i.imm_data.imm.eq(imm)
509 yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
510 yield dut.oper_i.update.eq(update)
511 yield dut.issue_i.eq(1)
512 yield
513 yield dut.issue_i.eq(0)
514 yield
515 if imm_ok:
516 yield dut.rd.go.eq(0b101)
517 else:
518 yield dut.rd.go.eq(0b111)
519 yield from wait_for(dut.rd.rel)
520 yield dut.rd.go.eq(0)
521
522 yield from wait_for(dut.adr_rel_o, False, test1st=True)
523 #yield from wait_for(dut.adr_rel_o)
524 #yield dut.ad.go.eq(1)
525 #yield
526 #yield dut.ad.go.eq(0)
527
528 if update:
529 yield from wait_for(dut.wr.rel[1])
530 yield dut.wr.go.eq(0b10)
531 yield
532 addr = yield dut.addr_o
533 print ("addr", addr)
534 yield dut.wr.go.eq(0)
535 else:
536 addr = None
537
538 yield from wait_for(dut.sto_rel_o)
539 yield dut.go_st_i.eq(1)
540 yield
541 yield dut.go_st_i.eq(0)
542 yield from wait_for(dut.busy_o, False)
543 #wait_for(dut.stwd_mem_o)
544 yield
545 return addr
546
547
548 def load(dut, src1, src2, imm, imm_ok=True, update=False, zero_a=False):
549 print ("LD", src1, src2, imm, imm_ok, update)
550 yield dut.oper_i.insn_type.eq(InternalOp.OP_LOAD)
551 yield dut.src1_i.eq(src1)
552 yield dut.src2_i.eq(src2)
553 yield dut.oper_i.zero_a.eq(zero_a)
554 yield dut.oper_i.imm_data.imm.eq(imm)
555 yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
556 yield dut.issue_i.eq(1)
557 yield
558 yield dut.issue_i.eq(0)
559 yield
560 rd = 0b00
561 if not imm_ok:
562 rd |= 0b10
563 if not zero_a:
564 rd |= 0b01
565
566 if rd:
567 yield dut.rd.go.eq(rd)
568 yield from wait_for(dut.rd.rel)
569 yield dut.rd.go.eq(0)
570
571 yield from wait_for(dut.adr_rel_o, False, test1st=True)
572 #yield dut.ad.go.eq(1)
573 #yield
574 #yield dut.ad.go.eq(0)
575
576 if update:
577 yield from wait_for(dut.wr.rel[1])
578 yield dut.wr.go.eq(0b10)
579 yield
580 addr = yield dut.addr_o
581 print ("addr", addr)
582 yield dut.wr.go.eq(0)
583 else:
584 addr = None
585
586 yield from wait_for(dut.wr.rel[0], test1st=True)
587 yield dut.wr.go.eq(1)
588 yield
589 data = yield dut.data_o
590 print (data)
591 yield dut.wr.go.eq(0)
592 yield from wait_for(dut.busy_o)
593 yield
594 # wait_for(dut.stwd_mem_o)
595 return data, addr
596
597
598 def scoreboard_sim(dut):
599
600 ###################
601 # immediate version
602
603 # two STs (different addresses)
604 yield from store(dut, 4, 0, 3, 2) # ST reg4 into addr rfile[reg3]+2
605 yield from store(dut, 2, 0, 9, 2) # ST reg4 into addr rfile[reg9]+2
606 yield
607 # two LDs (deliberately LD from the 1st address then 2nd)
608 data, addr = yield from load(dut, 4, 0, 2)
609 assert data == 0x0003, "returned %x" % data
610 data, addr = yield from load(dut, 2, 0, 2)
611 assert data == 0x0009, "returned %x" % data
612 yield
613
614 # indexed version
615 yield from store(dut, 4, 5, 3, 0, imm_ok=False)
616 data, addr = yield from load(dut, 4, 5, 0, imm_ok=False)
617 assert data == 0x0003, "returned %x" % data
618
619 # update-immediate version
620 addr = yield from store(dut, 4, 6, 3, 2, update=True)
621 assert addr == 0x0006, "returned %x" % addr
622
623 # update-indexed version
624 data, addr = yield from load(dut, 4, 5, 0, imm_ok=False, update=True)
625 assert data == 0x0003, "returned %x" % data
626 assert addr == 0x0009, "returned %x" % addr
627
628 # immediate *and* zero version
629 data, addr = yield from load(dut, 4, 5, 9, imm_ok=True, zero_a=True)
630 assert data == 0x0003, "returned %x" % data
631
632
633 class TestLDSTCompUnit(LDSTCompUnit):
634
635 def __init__(self, rwid):
636 from soc.experiment.l0_cache import TstL0CacheBuffer
637 self.l0 = l0 = TstL0CacheBuffer()
638 pi = l0.l0.dports[0].pi
639 LDSTCompUnit.__init__(self, pi, rwid, 4)
640
641 def elaborate(self, platform):
642 m = LDSTCompUnit.elaborate(self, platform)
643 m.submodules.l0 = self.l0
644 m.d.comb += self.ad.go.eq(self.ad.rel) # link addr-go direct to rel
645 return m
646
647
648 def test_scoreboard():
649
650 dut = TestLDSTCompUnit(16)
651 vl = rtlil.convert(dut, ports=dut.ports())
652 with open("test_ldst_comp.il", "w") as f:
653 f.write(vl)
654
655 run_simulation(dut, scoreboard_sim(dut), vcd_name='test_ldst_comp.vcd')
656
657
658 if __name__ == '__main__':
659 test_scoreboard()