compldst_multi: add debug output for dcbz
[soc.git] / src / soc / experiment / compldst_multi.py
1 """LOAD / STORE Computation Unit.
2
3 This module covers POWER9-compliant Load and Store operations,
4 with selection on each between immediate and indexed mode as
5 options for the calculation of the Effective Address (EA),
6 and also "update" mode which optionally stores that EA into
7 an additional register.
8
9 ----
10 Note: it took 15 attempts over several weeks to redraw the diagram
11 needed to capture this FSM properly. To understand it fully, please
12 take the time to review the links, video, and diagram.
13 ----
14
15 Stores are activated when Go_Store is enabled, and use a sync'd "ADD" to
16 compute the "Effective Address", and, when ready the operand (src3_i)
17 is stored in the computed address (passed through to the PortInterface)
18
19 Loads are activated when Go_Write[0] is enabled. The EA is computed,
20 and (as long as there was no exception) the data comes out (at any
21 time from the PortInterface), and is captured by the LDCompSTUnit.
22
23 Both LD and ST may request that the address be computed from summing
24 operand1 (src[0]) with operand2 (src[1]) *or* by summing operand1 with
25 the immediate (from the opcode).
26
27 Both LD and ST may also request "update" mode (op_is_update) which
28 activates the use of Go_Write[1] to control storage of the EA into
29 a *second* operand in the register file.
30
31 Thus this module has *TWO* write-requests to the register file and
32 *THREE* read-requests to the register file (not all at the same time!)
33 The regfile port usage is:
34
35 * LD-imm 1R1W
36 * LD-imm-update 1R2W
37 * LD-idx 2R1W
38 * LD-idx-update 2R2W
39
40 * ST-imm 2R
41 * ST-imm-update 2R1W
42 * ST-idx 3R
43 * ST-idx-update 3R1W
44
45 It's a multi-level Finite State Machine that (unfortunately) nmigen.FSM
46 is not suited to (nmigen.FSM is clock-driven, and some aspects of
47 the nested FSMs below are *combinatorial*).
48
49 * One FSM covers Operand collection and communication address-side
50 with the LD/ST PortInterface. its role ends when "RD_DONE" is asserted
51
52 * A second FSM activates to cover LD. it activates if op_is_ld is true
53
54 * A third FSM activates to cover ST. it activates if op_is_st is true
55
56 * The "overall" (fourth) FSM coordinates the progression and completion
57 of the three other FSMs, firing "WR_RESET" which switches off "busy"
58
59 Full diagram:
60
61 https://libre-soc.org/3d_gpu/ld_st_comp_unit.jpg
62
63 Links including to walk-through videos:
64
65 * https://libre-soc.org/3d_gpu/architecture/6600scoreboard/
66 * http://libre-soc.org/openpower/isa/fixedload
67 * http://libre-soc.org/openpower/isa/fixedstore
68
69 Related Bugreports:
70
71 * https://bugs.libre-soc.org/show_bug.cgi?id=302
72 * https://bugs.libre-soc.org/show_bug.cgi?id=216
73
74 Terminology:
75
76 * EA - Effective Address
77 * LD - Load
78 * ST - Store
79 """
80
81 from nmigen.compat.sim import run_simulation
82 from nmigen.cli import verilog, rtlil
83 from nmigen import Module, Signal, Mux, Cat, Elaboratable, Array, Repl
84 from nmigen.hdl.rec import Record, Layout
85
86 from nmutil.latch import SRLatch, latchregister
87 from nmutil.byterev import byte_reverse
88 from nmutil.extend import exts
89
90 from soc.experiment.compalu_multi import go_record, CompUnitRecord
91 from soc.experiment.l0_cache import PortInterface
92 from soc.experiment.pimem import LDSTException
93 from soc.fu.regspec import RegSpecAPI
94
95 from openpower.decoder.power_enums import MicrOp, Function, LDSTMode
96 from soc.fu.ldst.ldst_input_record import CompLDSTOpSubset
97 from openpower.decoder.power_decoder2 import Data
98 from openpower.consts import MSR
99
100 # for debugging dcbz
101 from nmutil.util import Display
102
103
104 # TODO: LDSTInputData and LDSTOutputData really should be used
105 # here, to make things more like the other CompUnits. currently,
106 # also, RegSpecAPI is used explicitly here
107
108
109 class LDSTCompUnitRecord(CompUnitRecord):
110 def __init__(self, rwid, opsubset=CompLDSTOpSubset, name=None):
111 CompUnitRecord.__init__(self, opsubset, rwid,
112 n_src=3, n_dst=2, name=name)
113
114 self.ad = go_record(1, name="cu_ad") # address go in, req out
115 self.st = go_record(1, name="cu_st") # store go in, req out
116
117 self.exc_o = LDSTException("exc_o")
118
119 self.ld_o = Signal(reset_less=True) # operation is a LD
120 self.st_o = Signal(reset_less=True) # operation is a ST
121
122 # hmm... are these necessary?
123 self.load_mem_o = Signal(reset_less=True) # activate memory LOAD
124 self.stwd_mem_o = Signal(reset_less=True) # activate memory STORE
125
126
127 class LDSTCompUnit(RegSpecAPI, Elaboratable):
128 """LOAD / STORE Computation Unit
129
130 Inputs
131 ------
132
133 * :pi: a PortInterface to the memory subsystem (read-write capable)
134 * :rwid: register width
135 * :awid: address width
136
137 Data inputs
138 -----------
139 * :src_i: Source Operands (RA/RB/RC) - managed by rd[0-3] go/req
140
141 Data (outputs)
142 --------------
143 * :data_o: Dest out (LD) - managed by wr[0] go/req
144 * :addr_o: Address out (LD or ST) - managed by wr[1] go/req
145 * :exc_o: Address/Data Exception occurred. LD/ST must terminate
146
147 TODO: make exc_o a data-type rather than a single-bit signal
148 (see bug #302)
149
150 Control Signals (In)
151 --------------------
152
153 * :oper_i: operation being carried out (POWER9 decode LD/ST subset)
154 * :issue_i: LD/ST is being "issued".
155 * :shadown_i: Inverted-shadow is being held (stops STORE *and* WRITE)
156 * :go_rd_i: read is being actioned (latches in src regs)
157 * :go_wr_i: write mode (exactly like ALU CompUnit)
158 * :go_ad_i: address is being actioned (triggers actual mem LD)
159 * :go_st_i: store is being actioned (triggers actual mem STORE)
160 * :go_die_i: resets the unit back to "wait for issue"
161
162 Control Signals (Out)
163 ---------------------
164
165 * :busy_o: function unit is busy
166 * :rd_rel_o: request src1/src2
167 * :adr_rel_o: request address (from mem)
168 * :sto_rel_o: request store (to mem)
169 * :req_rel_o: request write (result)
170 * :load_mem_o: activate memory LOAD
171 * :stwd_mem_o: activate memory STORE
172
173 Note: load_mem_o, stwd_mem_o and req_rel_o MUST all be acknowledged
174 in a single cycle and the CompUnit set back to doing another op.
175 This means deasserting go_st_i, go_ad_i or go_wr_i as appropriate
176 depending on whether the operation is a ST or LD.
177
178 Note: LDSTCompUnit takes care of LE/BE normalisation:
179 * LD data is normalised after receipt from the PortInterface
180 * ST data is normalised *prior* to sending onto the PortInterface
181 TODO: use one module for the byte-reverse as it's quite expensive in gates
182 """
183
184 def __init__(self, pi=None, rwid=64, awid=48, opsubset=CompLDSTOpSubset,
185 debugtest=False, name=None):
186 super().__init__(rwid)
187 self.awid = awid
188 self.pi = pi
189 self.cu = cu = LDSTCompUnitRecord(rwid, opsubset, name=name)
190 self.debugtest = debugtest
191
192 # POWER-compliant LD/ST has index and update: *fixed* number of ports
193 self.n_src = n_src = 3 # RA, RB, RT/RS
194 self.n_dst = n_dst = 2 # RA, RT/RS
195
196 # set up array of src and dest signals
197 for i in range(n_src):
198 j = i + 1 # name numbering to match src1/src2
199 name = "src%d_i" % j
200 setattr(self, name, getattr(cu, name))
201
202 dst = []
203 for i in range(n_dst):
204 j = i + 1 # name numbering to match dest1/2...
205 name = "dest%d_o" % j
206 setattr(self, name, getattr(cu, name))
207
208 # convenience names
209 self.rd = cu.rd
210 self.wr = cu.wr
211 self.rdmaskn = cu.rdmaskn
212 self.wrmask = cu.wrmask
213 self.ad = cu.ad
214 self.st = cu.st
215 self.dest = cu._dest
216
217 # HACK: get data width from dest[0]. this is used across the board
218 # (it really shouldn't be)
219 self.data_wid = self.dest[0].shape()
220
221 self.go_rd_i = self.rd.go_i # temporary naming
222 self.go_wr_i = self.wr.go_i # temporary naming
223 self.go_ad_i = self.ad.go_i # temp naming: go address in
224 self.go_st_i = self.st.go_i # temp naming: go store in
225
226 self.rd_rel_o = self.rd.rel_o # temporary naming
227 self.req_rel_o = self.wr.rel_o # temporary naming
228 self.adr_rel_o = self.ad.rel_o # request address (from mem)
229 self.sto_rel_o = self.st.rel_o # request store (to mem)
230
231 self.issue_i = cu.issue_i
232 self.shadown_i = cu.shadown_i
233 self.go_die_i = cu.go_die_i
234
235 self.oper_i = cu.oper_i
236 self.src_i = cu._src_i
237
238 self.data_o = Data(self.data_wid, name="o") # Dest1 out: RT
239 self.addr_o = Data(self.data_wid, name="ea") # Addr out: Update => RA
240 self.exc_o = cu.exc_o
241 self.done_o = cu.done_o
242 self.busy_o = cu.busy_o
243
244 self.ld_o = cu.ld_o
245 self.st_o = cu.st_o
246
247 self.load_mem_o = cu.load_mem_o
248 self.stwd_mem_o = cu.stwd_mem_o
249
250 def elaborate(self, platform):
251 m = Module()
252
253 # temp/convenience
254 comb = m.d.comb
255 sync = m.d.sync
256 issue_i = self.issue_i
257
258 #####################
259 # latches for the FSM.
260 m.submodules.opc_l = opc_l = SRLatch(sync=False, name="opc")
261 m.submodules.src_l = src_l = SRLatch(False, self.n_src, name="src")
262 m.submodules.alu_l = alu_l = SRLatch(sync=False, name="alu")
263 m.submodules.adr_l = adr_l = SRLatch(sync=False, name="adr")
264 m.submodules.lod_l = lod_l = SRLatch(sync=False, name="lod")
265 m.submodules.sto_l = sto_l = SRLatch(sync=False, name="sto")
266 m.submodules.wri_l = wri_l = SRLatch(sync=False, name="wri")
267 m.submodules.upd_l = upd_l = SRLatch(sync=False, name="upd")
268 m.submodules.rst_l = rst_l = SRLatch(sync=False, name="rst")
269 m.submodules.lsd_l = lsd_l = SRLatch(sync=False, name="lsd") # done
270
271 ####################
272 # signals
273
274 # opcode decode
275 op_is_ld = Signal(reset_less=True)
276 op_is_st = Signal(reset_less=True)
277
278 # ALU/LD data output control
279 alu_valid = Signal(reset_less=True) # ALU operands are valid
280 alu_ok = Signal(reset_less=True) # ALU out ok (1 clock delay valid)
281 addr_ok = Signal(reset_less=True) # addr ok (from PortInterface)
282 ld_ok = Signal(reset_less=True) # LD out ok from PortInterface
283 wr_any = Signal(reset_less=True) # any write (incl. store)
284 rda_any = Signal(reset_less=True) # any read for address ops
285 rd_done = Signal(reset_less=True) # all *necessary* operands read
286 wr_reset = Signal(reset_less=True) # final reset condition
287
288 # LD and ALU out
289 alu_o = Signal(self.data_wid, reset_less=True)
290 ldd_o = Signal(self.data_wid, reset_less=True)
291
292 ##############################
293 # reset conditions for latches
294
295 # temporaries (also convenient when debugging)
296 reset_o = Signal(reset_less=True) # reset opcode
297 reset_w = Signal(reset_less=True) # reset write
298 reset_u = Signal(reset_less=True) # reset update
299 reset_a = Signal(reset_less=True) # reset adr latch
300 reset_i = Signal(reset_less=True) # issue|die (use a lot)
301 reset_r = Signal(self.n_src, reset_less=True) # reset src
302 reset_s = Signal(reset_less=True) # reset store
303
304 comb += reset_i.eq(issue_i | self.go_die_i) # various
305 comb += reset_o.eq(self.done_o | self.go_die_i) # opcode reset
306 comb += reset_w.eq(self.wr.go_i[0] | self.go_die_i) # write reg 1
307 comb += reset_u.eq(self.wr.go_i[1] | self.go_die_i) # update (reg 2)
308 comb += reset_s.eq(self.go_st_i | self.go_die_i) # store reset
309 comb += reset_r.eq(self.rd.go_i | Repl(self.go_die_i, self.n_src))
310 comb += reset_a.eq(self.go_ad_i | self.go_die_i)
311
312 p_st_go = Signal(reset_less=True)
313 sync += p_st_go.eq(self.st.go_i)
314
315 # decode bits of operand (latched)
316 oper_r = CompLDSTOpSubset(name="oper_r") # Dest register
317 comb += op_is_st.eq(oper_r.insn_type == MicrOp.OP_STORE) # ST
318 comb += op_is_ld.eq(oper_r.insn_type == MicrOp.OP_LOAD) # LD
319 comb += Display("compldst_multi: op_is_dcbz = %i",
320 (oper_r.insn_type == MicrOp.OP_DCBZ))
321 op_is_update = oper_r.ldst_mode == LDSTMode.update # UPDATE
322 op_is_cix = oper_r.ldst_mode == LDSTMode.cix # cache-inhibit
323 comb += self.load_mem_o.eq(op_is_ld & self.go_ad_i)
324 comb += self.stwd_mem_o.eq(op_is_st & self.go_st_i)
325 comb += self.ld_o.eq(op_is_ld)
326 comb += self.st_o.eq(op_is_st)
327
328 ##########################
329 # FSM implemented through sequence of latches. approximately this:
330 # - opc_l : opcode
331 # - src_l[0] : operands
332 # - src_l[1]
333 # - alu_l : looks after add of src1/2/imm (EA)
334 # - adr_l : waits for add (EA)
335 # - upd_l : waits for adr and Regfile (port 2)
336 # - src_l[2] : ST
337 # - lod_l : waits for adr (EA) and for LD Data
338 # - wri_l : waits for LD Data and Regfile (port 1)
339 # - st_l : waits for alu and operand2
340 # - rst_l : waits for all FSM paths to converge.
341 # NOTE: use sync to stop combinatorial loops.
342
343 # opcode latch - inverted so that busy resets to 0
344 # note this MUST be sync so as to avoid a combinatorial loop
345 # between busy_o and issue_i on the reset latch (rst_l)
346 sync += opc_l.s.eq(issue_i) # XXX NOTE: INVERTED FROM book!
347 sync += opc_l.r.eq(reset_o) # XXX NOTE: INVERTED FROM book!
348
349 # src operand latch
350 sync += src_l.s.eq(Repl(issue_i, self.n_src))
351 sync += src_l.r.eq(reset_r)
352
353 # alu latch. use sync-delay between alu_ok and valid to generate pulse
354 comb += alu_l.s.eq(reset_i)
355 comb += alu_l.r.eq(alu_ok & ~alu_valid & ~rda_any)
356
357 # addr latch
358 comb += adr_l.s.eq(reset_i)
359 sync += adr_l.r.eq(reset_a)
360
361 # ld latch
362 comb += lod_l.s.eq(reset_i)
363 comb += lod_l.r.eq(ld_ok)
364
365 # dest operand latch
366 comb += wri_l.s.eq(issue_i)
367 sync += wri_l.r.eq(reset_w | Repl(wr_reset |
368 (~self.pi.busy_o & op_is_update),
369 #(self.pi.busy_o & op_is_update),
370 #self.done_o | (self.pi.busy_o & op_is_update),
371 self.n_dst))
372
373 # update-mode operand latch (EA written to reg 2)
374 sync += upd_l.s.eq(reset_i)
375 sync += upd_l.r.eq(reset_u)
376
377 # store latch
378 comb += sto_l.s.eq(addr_ok & op_is_st)
379 sync += sto_l.r.eq(reset_s | p_st_go)
380
381 # ld/st done. needed to stop LD/ST from activating repeatedly
382 comb += lsd_l.s.eq(issue_i)
383 sync += lsd_l.r.eq(reset_s | p_st_go | ld_ok)
384
385 # reset latch
386 comb += rst_l.s.eq(addr_ok) # start when address is ready
387 comb += rst_l.r.eq(issue_i)
388
389 # create a latch/register for the operand
390 with m.If(self.issue_i):
391 sync += oper_r.eq(self.oper_i)
392 with m.If(self.done_o):
393 sync += oper_r.eq(0)
394
395 # and for LD
396 ldd_r = Signal(self.data_wid, reset_less=True) # Dest register
397 latchregister(m, ldd_o, ldd_r, ld_ok, name="ldo_r")
398
399 # and for each input from the incoming src operands
400 srl = []
401 for i in range(self.n_src):
402 name = "src_r%d" % i
403 src_r = Signal(self.data_wid, name=name, reset_less=True)
404 with m.If(self.rd.go_i[i]):
405 sync += src_r.eq(self.src_i[i])
406 with m.If(self.issue_i):
407 sync += src_r.eq(0)
408 srl.append(src_r)
409
410 # and one for the output from the ADD (for the EA)
411 addr_r = Signal(self.data_wid, reset_less=True) # Effective Address
412 latchregister(m, alu_o, addr_r, alu_l.q, "ea_r")
413
414 # select either zero or src1 if opcode says so
415 op_is_z = oper_r.zero_a
416 src1_or_z = Signal(self.data_wid, reset_less=True)
417 m.d.comb += src1_or_z.eq(Mux(op_is_z, 0, srl[0]))
418
419 # select either immediate or src2 if opcode says so
420 op_is_imm = oper_r.imm_data.ok
421 src2_or_imm = Signal(self.data_wid, reset_less=True)
422 m.d.comb += src2_or_imm.eq(Mux(op_is_imm, oper_r.imm_data.data, srl[1]))
423
424 # now do the ALU addr add: one cycle, and say "ready" (next cycle, too)
425 comb += alu_o.eq(src1_or_z + src2_or_imm) # actual EA
426 m.d.sync += alu_ok.eq(alu_valid) # keep ack in sync with EA
427
428 ############################
429 # Control Signal calculation
430
431 # busy signal
432 busy_o = self.busy_o
433 comb += self.busy_o.eq(opc_l.q) # | self.pi.busy_o) # busy out
434
435 # 1st operand read-request only when zero not active
436 # 2nd operand only needed when immediate is not active
437 slg = Cat(op_is_z, op_is_imm)
438 bro = Repl(self.busy_o, self.n_src)
439 comb += self.rd.rel_o.eq(src_l.q & bro & ~slg & ~self.rdmaskn)
440
441 # note when the address-related read "go" signals are active
442 comb += rda_any.eq(self.rd.go_i[0] | self.rd.go_i[1])
443
444 # alu input valid when 1st and 2nd ops done (or imm not active)
445 comb += alu_valid.eq(busy_o & ~(self.rd.rel_o[0] | self.rd.rel_o[1]))
446
447 # 3rd operand only needed when operation is a store
448 comb += self.rd.rel_o[2].eq(src_l.q[2] & busy_o & op_is_st)
449
450 # all reads done when alu is valid and 3rd operand needed
451 comb += rd_done.eq(alu_valid & ~self.rd.rel_o[2])
452
453 # address release only if addr ready, but Port must be idle
454 comb += self.adr_rel_o.eq(alu_valid & adr_l.q & busy_o)
455
456 # the write/store (etc) all must be cancelled if an exception occurs
457 cancel = Signal(reset_less=True)
458 comb += cancel.eq(self.exc_o.happened | self.shadown_i)
459
460 # store release when st ready *and* all operands read (and no shadow)
461 comb += self.st.rel_o.eq(sto_l.q & busy_o & rd_done & op_is_st &
462 cancel)
463
464 # request write of LD result. waits until shadow is dropped.
465 comb += self.wr.rel_o[0].eq(rd_done & wri_l.q & busy_o & lod_l.qn &
466 op_is_ld & cancel)
467
468 # request write of EA result only in update mode
469 comb += self.wr.rel_o[1].eq(upd_l.q & busy_o & op_is_update &
470 alu_valid & cancel)
471
472 # provide "done" signal: select req_rel for non-LD/ST, adr_rel for LD/ST
473 comb += wr_any.eq(self.st.go_i | p_st_go |
474 self.wr.go_i[0] | self.wr.go_i[1])
475 comb += wr_reset.eq(rst_l.q & busy_o & cancel &
476 ~(self.st.rel_o | self.wr.rel_o[0] |
477 self.wr.rel_o[1]) &
478 (lod_l.qn | op_is_st)
479 )
480 comb += self.done_o.eq(wr_reset & (~self.pi.busy_o | op_is_ld))
481
482 ######################
483 # Data/Address outputs
484
485 # put the LD-output register directly onto the output bus on a go_write
486 comb += self.data_o.data.eq(self.dest[0])
487 with m.If(self.wr.go_i[0]):
488 comb += self.dest[0].eq(ldd_r)
489
490 # "update" mode, put address out on 2nd go-write
491 comb += self.addr_o.data.eq(self.dest[1])
492 with m.If(op_is_update & self.wr.go_i[1]):
493 comb += self.dest[1].eq(addr_r)
494
495 # need to look like MultiCompUnit: put wrmask out.
496 # XXX may need to make this enable only when write active
497 comb += self.wrmask.eq(bro & Cat(op_is_ld, op_is_update))
498
499 ###########################
500 # PortInterface connections
501 pi = self.pi
502
503 # connect to LD/ST PortInterface.
504 comb += pi.is_ld_i.eq(op_is_ld & busy_o) # decoded-LD
505 comb += pi.is_st_i.eq(op_is_st & busy_o) # decoded-ST
506 comb += pi.data_len.eq(oper_r.data_len) # data_len
507 # address: use sync to avoid long latency
508 sync += pi.addr.data.eq(addr_r) # EA from adder
509 sync += pi.addr.ok.eq(alu_ok & lsd_l.q) # "do address stuff" (once)
510 comb += self.exc_o.eq(pi.exc_o) # exception occurred
511 comb += addr_ok.eq(self.pi.addr_ok_o) # no exc, address fine
512 # connect MSR.PR for priv/virt operation
513 comb += pi.msr_pr.eq(oper_r.msr[MSR.PR])
514
515 # byte-reverse on LD
516 revnorev = Signal(64, reset_less=True)
517 with m.If(oper_r.byte_reverse):
518 # byte-reverse the data based on ld/st width (turn it to LE)
519 data_len = oper_r.data_len
520 lddata_r = byte_reverse(m, 'lddata_r', pi.ld.data, data_len)
521 comb += revnorev.eq(lddata_r) # put reversed- data out
522 with m.Else():
523 comb += revnorev.eq(pi.ld.data) # put data out, straight (as BE)
524
525 # then check sign-extend
526 with m.If(oper_r.sign_extend):
527 # okok really should "if data_len == 4" and so on here
528 with m.If(oper_r.data_len == 2):
529 comb += ldd_o.eq(exts(revnorev, 16, 64)) # sign-extend hword
530 with m.Else():
531 comb += ldd_o.eq(exts(revnorev, 32, 64)) # sign-extend dword
532 with m.Else():
533 comb += ldd_o.eq(revnorev)
534
535 # ld - ld gets latched in via lod_l
536 comb += ld_ok.eq(pi.ld.ok) # ld.ok *closes* (freezes) ld data
537
538 # byte-reverse on ST
539 op3 = srl[2] # 3rd operand latch
540 with m.If(oper_r.byte_reverse):
541 # byte-reverse the data based on width
542 data_len = oper_r.data_len
543 stdata_r = byte_reverse(m, 'stdata_r', op3, data_len)
544 comb += pi.st.data.eq(stdata_r)
545 with m.Else():
546 comb += pi.st.data.eq(op3)
547 # store - data goes in based on go_st
548 comb += pi.st.ok.eq(self.st.go_i) # go store signals st data valid
549
550 return m
551
552 def get_out(self, i):
553 """make LDSTCompUnit look like RegSpecALUAPI. these correspond
554 to LDSTOutputData o and o1 respectively.
555 """
556 if i == 0:
557 return self.data_o # LDSTOutputData.regspec o
558 if i == 1:
559 return self.addr_o # LDSTOutputData.regspec o1
560 # return self.dest[i]
561
562 def get_fu_out(self, i):
563 return self.get_out(i)
564
565 def __iter__(self):
566 yield self.rd.go_i
567 yield self.go_ad_i
568 yield self.wr.go_i
569 yield self.go_st_i
570 yield self.issue_i
571 yield self.shadown_i
572 yield self.go_die_i
573 yield from self.oper_i.ports()
574 yield from self.src_i
575 yield self.busy_o
576 yield self.rd.rel_o
577 yield self.adr_rel_o
578 yield self.sto_rel_o
579 yield self.wr.rel_o
580 yield from self.data_o.ports()
581 yield from self.addr_o.ports()
582 yield self.load_mem_o
583 yield self.stwd_mem_o
584
585 def ports(self):
586 return list(self)
587
588
589 def wait_for(sig, wait=True, test1st=False):
590 v = (yield sig)
591 print("wait for", sig, v, wait, test1st)
592 if test1st and bool(v) == wait:
593 return
594 while True:
595 yield
596 v = (yield sig)
597 #print("...wait for", sig, v)
598 if bool(v) == wait:
599 break
600
601
602 def store(dut, src1, src2, src3, imm, imm_ok=True, update=False,
603 byterev=True):
604 print("ST", src1, src2, src3, imm, imm_ok, update)
605 yield dut.oper_i.insn_type.eq(MicrOp.OP_STORE)
606 yield dut.oper_i.data_len.eq(2) # half-word
607 yield dut.oper_i.byte_reverse.eq(byterev)
608 yield dut.src1_i.eq(src1)
609 yield dut.src2_i.eq(src2)
610 yield dut.src3_i.eq(src3)
611 yield dut.oper_i.imm_data.imm.eq(imm)
612 yield dut.oper_i.imm_data.ok.eq(imm_ok)
613 yield dut.oper_i.update.eq(update)
614 yield dut.issue_i.eq(1)
615 yield
616 yield dut.issue_i.eq(0)
617
618 if imm_ok:
619 active_rel = 0b101
620 else:
621 active_rel = 0b111
622 # wait for all active rel signals to come up
623 while True:
624 rel = yield dut.rd.rel_o
625 if rel == active_rel:
626 break
627 yield
628 yield dut.rd.go.eq(active_rel)
629 yield
630 yield dut.rd.go.eq(0)
631
632 yield from wait_for(dut.adr_rel_o, False, test1st=True)
633 # yield from wait_for(dut.adr_rel_o)
634 # yield dut.ad.go.eq(1)
635 # yield
636 # yield dut.ad.go.eq(0)
637
638 if update:
639 yield from wait_for(dut.wr.rel_o[1])
640 yield dut.wr.go.eq(0b10)
641 yield
642 addr = yield dut.addr_o
643 print("addr", addr)
644 yield dut.wr.go.eq(0)
645 else:
646 addr = None
647
648 yield from wait_for(dut.sto_rel_o)
649 yield dut.go_st_i.eq(1)
650 yield
651 yield dut.go_st_i.eq(0)
652 yield from wait_for(dut.busy_o, False)
653 # wait_for(dut.stwd_mem_o)
654 yield
655 return addr
656
657
658 def load(dut, src1, src2, imm, imm_ok=True, update=False, zero_a=False,
659 byterev=True):
660 print("LD", src1, src2, imm, imm_ok, update)
661 yield dut.oper_i.insn_type.eq(MicrOp.OP_LOAD)
662 yield dut.oper_i.data_len.eq(2) # half-word
663 yield dut.oper_i.byte_reverse.eq(byterev)
664 yield dut.src1_i.eq(src1)
665 yield dut.src2_i.eq(src2)
666 yield dut.oper_i.zero_a.eq(zero_a)
667 yield dut.oper_i.imm_data.imm.eq(imm)
668 yield dut.oper_i.imm_data.ok.eq(imm_ok)
669 yield dut.issue_i.eq(1)
670 yield
671 yield dut.issue_i.eq(0)
672 yield
673
674 # set up read-operand flags
675 rd = 0b00
676 if not imm_ok: # no immediate means RB register needs to be read
677 rd |= 0b10
678 if not zero_a: # no zero-a means RA needs to be read
679 rd |= 0b01
680
681 # wait for the operands (RA, RB, or both)
682 if rd:
683 yield dut.rd.go.eq(rd)
684 yield from wait_for(dut.rd.rel_o)
685 yield dut.rd.go.eq(0)
686
687 yield from wait_for(dut.adr_rel_o, False, test1st=True)
688 # yield dut.ad.go.eq(1)
689 # yield
690 # yield dut.ad.go.eq(0)
691
692 if update:
693 yield from wait_for(dut.wr.rel_o[1])
694 yield dut.wr.go.eq(0b10)
695 yield
696 addr = yield dut.addr_o
697 print("addr", addr)
698 yield dut.wr.go.eq(0)
699 else:
700 addr = None
701
702 yield from wait_for(dut.wr.rel_o[0], test1st=True)
703 yield dut.wr.go.eq(1)
704 yield
705 data = yield dut.data_o
706 print(data)
707 yield dut.wr.go.eq(0)
708 yield from wait_for(dut.busy_o)
709 yield
710 # wait_for(dut.stwd_mem_o)
711 return data, addr
712
713
714 def ldst_sim(dut):
715
716 ###################
717 # immediate version
718
719 # two STs (different addresses)
720 yield from store(dut, 4, 0, 3, 2) # ST reg4 into addr rfile[reg3]+2
721 yield from store(dut, 2, 0, 9, 2) # ST reg4 into addr rfile[reg9]+2
722 yield
723 # two LDs (deliberately LD from the 1st address then 2nd)
724 data, addr = yield from load(dut, 4, 0, 2)
725 assert data == 0x0003, "returned %x" % data
726 data, addr = yield from load(dut, 2, 0, 2)
727 assert data == 0x0009, "returned %x" % data
728 yield
729
730 # indexed version
731 yield from store(dut, 9, 5, 3, 0, imm_ok=False)
732 data, addr = yield from load(dut, 9, 5, 0, imm_ok=False)
733 assert data == 0x0003, "returned %x" % data
734
735 # update-immediate version
736 addr = yield from store(dut, 9, 6, 3, 2, update=True)
737 assert addr == 0x000b, "returned %x" % addr
738
739 # update-indexed version
740 data, addr = yield from load(dut, 9, 5, 0, imm_ok=False, update=True)
741 assert data == 0x0003, "returned %x" % data
742 assert addr == 0x000e, "returned %x" % addr
743
744 # immediate *and* zero version
745 data, addr = yield from load(dut, 1, 4, 8, imm_ok=True, zero_a=True)
746 assert data == 0x0008, "returned %x" % data
747
748
749 class TestLDSTCompUnit(LDSTCompUnit):
750
751 def __init__(self, rwid):
752 from soc.experiment.l0_cache import TstL0CacheBuffer
753 self.l0 = l0 = TstL0CacheBuffer()
754 pi = l0.l0.dports[0].pi
755 LDSTCompUnit.__init__(self, pi, rwid, 4)
756
757 def elaborate(self, platform):
758 m = LDSTCompUnit.elaborate(self, platform)
759 m.submodules.l0 = self.l0
760 m.d.comb += self.ad.go.eq(self.ad.rel) # link addr-go direct to rel
761 return m
762
763
764 def test_scoreboard():
765
766 dut = TestLDSTCompUnit(16)
767 vl = rtlil.convert(dut, ports=dut.ports())
768 with open("test_ldst_comp.il", "w") as f:
769 f.write(vl)
770
771 run_simulation(dut, ldst_sim(dut), vcd_name='test_ldst_comp.vcd')
772
773
774 class TestLDSTCompUnitRegSpec(LDSTCompUnit):
775
776 def __init__(self):
777 from soc.experiment.l0_cache import TstL0CacheBuffer
778 from soc.fu.ldst.pipe_data import LDSTPipeSpec
779 regspec = LDSTPipeSpec.regspec
780 self.l0 = l0 = TstL0CacheBuffer()
781 pi = l0.l0.dports[0].pi
782 LDSTCompUnit.__init__(self, pi, regspec, 4)
783
784 def elaborate(self, platform):
785 m = LDSTCompUnit.elaborate(self, platform)
786 m.submodules.l0 = self.l0
787 m.d.comb += self.ad.go.eq(self.ad.rel) # link addr-go direct to rel
788 return m
789
790
791 def test_scoreboard_regspec():
792
793 dut = TestLDSTCompUnitRegSpec()
794 vl = rtlil.convert(dut, ports=dut.ports())
795 with open("test_ldst_comp.il", "w") as f:
796 f.write(vl)
797
798 run_simulation(dut, ldst_sim(dut), vcd_name='test_ldst_regspec.vcd')
799
800
801 if __name__ == '__main__':
802 test_scoreboard_regspec()
803 test_scoreboard()