-""" LOAD / STORE Computation Unit.
+"""LOAD / STORE Computation Unit.
- This module covers POWER9-compliant Load and Store operations,
- with selection on each between immediate and indexed mode as
- options for the calculation of the Effective Address (EA),
- and also "update" mode which optionally stores that EA into
- an additional register.
+This module covers POWER9-compliant Load and Store operations,
+with selection on each between immediate and indexed mode as
+options for the calculation of the Effective Address (EA),
+and also "update" mode which optionally stores that EA into
+an additional register.
- ----
- Note: it took 15 attempts over several weeks to redraw the diagram
- needed to capture this FSM properly. To understand it fully, please
- take the time to review the links, video, and diagram.
- ----
+----
+Note: it took 15 attempts over several weeks to redraw the diagram
+needed to capture this FSM properly. To understand it fully, please
+take the time to review the links, video, and diagram.
+----
- Stores are activated when Go_Store is enabled, and use a sync'd "ADD" to
- compute the "Effective Address", and, when ready the operand (src3_i)
- is stored in the computed address (passed through to the PortInterface)
+Stores are activated when Go_Store is enabled, and use a sync'd "ADD" to
+compute the "Effective Address", and, when ready the operand (src3_i)
+is stored in the computed address (passed through to the PortInterface)
- Loads are activated when Go_Write[0] is enabled. The EA is computed,
- and (as long as there was no exception) the data comes out (at any
- time from the PortInterface), and is captured by the LDCompSTUnit.
+Loads are activated when Go_Write[0] is enabled. The EA is computed,
+and (as long as there was no exception) the data comes out (at any
+time from the PortInterface), and is captured by the LDCompSTUnit.
- Both LD and ST may request that the address be computed from summing
- operand1 (src[0]) with operand2 (src[1]) *or* by summing operand1 with
- the immediate (from the opcode).
+Both LD and ST may request that the address be computed from summing
+operand1 (src[0]) with operand2 (src[1]) *or* by summing operand1 with
+the immediate (from the opcode).
- Both LD and ST may also request "update" mode (op_is_update) which
- activates the use of Go_Write[1] to control storage of the EA into
- a *second* operand in the register file.
+Both LD and ST may also request "update" mode (op_is_update) which
+activates the use of Go_Write[1] to control storage of the EA into
+a *second* operand in the register file.
- Thus this module has *TWO* write-requests to the register file and
- *THREE* read-requests to the register file (not all at the same time!)
- The regfile port usage is:
+Thus this module has *TWO* write-requests to the register file and
+*THREE* read-requests to the register file (not all at the same time!)
+The regfile port usage is:
* LD-imm 1R1W
* LD-imm-update 1R2W
* ST-idx 3R
* ST-idx-update 3R1W
- It's a multi-level Finite State Machine that (unfortunately) nmigen.FSM
- is not suited to (nmigen.FSM is clock-driven, and some aspects of
- the nested FSMs below are *combinatorial*).
+It's a multi-level Finite State Machine that (unfortunately) nmigen.FSM
+is not suited to (nmigen.FSM is clock-driven, and some aspects of
+the nested FSMs below are *combinatorial*).
* One FSM covers Operand collection and communication address-side
with the LD/ST PortInterface. its role ends when "RD_DONE" is asserted
* The "overall" (fourth) FSM coordinates the progression and completion
of the three other FSMs, firing "WR_RESET" which switches off "busy"
- Full diagram:
+Full diagram:
+
https://libre-soc.org/3d_gpu/ld_st_comp_unit.jpg
- Links including to walk-through videos:
+Links including to walk-through videos:
+
* https://libre-soc.org/3d_gpu/architecture/6600scoreboard/
* http://libre-soc.org/openpower/isa/fixedload
* http://libre-soc.org/openpower/isa/fixedstore
- Related Bugreports:
+Related Bugreports:
+
* https://bugs.libre-soc.org/show_bug.cgi?id=302
+ * https://bugs.libre-soc.org/show_bug.cgi?id=216
- Terminology:
+Terminology:
* EA - Effective Address
* LD - Load
depending on whether the operation is a ST or LD.
"""
- def __init__(self, pi, rwid=64, awid=48, debugtest=False):
+ def __init__(self, pi=None, rwid=64, awid=48, debugtest=False):
self.rwid = rwid
self.awid = awid
self.pi = pi
dst = []
for i in range(n_dst):
j = i + 1 # name numbering to match dest1/2...
- name = "dest%d_i" % j
+ name = "dest%d_o" % j
setattr(self, name, getattr(cu, name))
# convenience names
alu_o = Signal(self.rwid, reset_less=True)
ldd_o = Signal(self.rwid, reset_less=True)
- # XXX TODO ZEROing just lije in ComUnit
+ # XXX TODO ZEROing just like in CompUnit
- # select immediate or src2 reg to add
- src2_or_imm = Signal(self.rwid, reset_less=True)
- src_sel = Signal(reset_less=True)
##############################
# reset conditions for latches
comb += rst_l.r.eq(issue_i)
# create a latch/register for the operand
- oper_r = CompLDSTOpSubset() # Dest register
+ oper_r = CompLDSTOpSubset(name="oper_r") # Dest register
latchregister(m, self.oper_i, oper_r, self.issue_i, name="oper_l")
# and for LD
addr_r = Signal(self.rwid, reset_less=True) # Effective Address Latch
latchregister(m, alu_o, addr_r, alu_l.q, "ea_r")
+ # select either zero or src1 if opcode says so
+ op_is_z = oper_r.zero_a
+ src1_or_z = Signal(self.rwid, reset_less=True)
+ m.d.comb += src1_or_z.eq(Mux(op_is_z, 0, srl[0]))
+
# select either immediate or src2 if opcode says so
op_is_imm = oper_r.imm_data.imm_ok
src2_or_imm = Signal(self.rwid, reset_less=True)
m.d.comb += src2_or_imm.eq(Mux(op_is_imm, oper_r.imm_data.imm, srl[1]))
# now do the ALU addr add: one cycle, and say "ready" (next cycle, too)
- sync += alu_o.eq(srl[0] + src2_or_imm) # actual EA
+ sync += alu_o.eq(src1_or_z + src2_or_imm) # actual EA
sync += alu_ok.eq(alu_valid) # keep ack in sync with EA
# decode bits of operand (latched)
busy_o = self.busy_o
comb += self.busy_o.eq(opc_l.q) # | self.pi.busy_o) # busy out
- # 1st operand read-request is simple: always need it
- comb += self.rd.rel[0].eq(src_l.q[0] & busy_o)
+ # 1st operand read-request only when zero not active
+ comb += self.rd.rel[0].eq(src_l.q[0] & busy_o & ~op_is_z)
# 2nd operand only needed when immediate is not active
comb += self.rd.rel[1].eq(src_l.q[1] & busy_o & ~op_is_imm)
return addr
-def load(dut, src1, src2, imm, imm_ok=True, update=False):
+def load(dut, src1, src2, imm, imm_ok=True, update=False, zero_a=False):
print ("LD", src1, src2, imm, imm_ok, update)
yield dut.oper_i.insn_type.eq(InternalOp.OP_LOAD)
yield dut.src1_i.eq(src1)
yield dut.src2_i.eq(src2)
+ yield dut.oper_i.zero_a.eq(zero_a)
yield dut.oper_i.imm_data.imm.eq(imm)
yield dut.oper_i.imm_data.imm_ok.eq(imm_ok)
yield dut.issue_i.eq(1)
yield
yield dut.issue_i.eq(0)
yield
- if imm_ok:
- yield dut.rd.go.eq(0b01)
- else:
- yield dut.rd.go.eq(0b11)
- yield from wait_for(dut.rd.rel)
- yield dut.rd.go.eq(0)
+ rd = 0b00
+ if not imm_ok:
+ rd |= 0b10
+ if not zero_a:
+ rd |= 0b01
+
+ if rd:
+ yield dut.rd.go.eq(rd)
+ yield from wait_for(dut.rd.rel)
+ yield dut.rd.go.eq(0)
yield from wait_for(dut.adr_rel_o, False, test1st=True)
#yield dut.ad.go.eq(1)
# update-indexed version
data, addr = yield from load(dut, 4, 5, 0, imm_ok=False, update=True)
+ assert data == 0x0003, "returned %x" % data
assert addr == 0x0009, "returned %x" % addr
+ # immediate *and* zero version
+ data, addr = yield from load(dut, 4, 5, 9, imm_ok=True, zero_a=True)
+ assert data == 0x0003, "returned %x" % data
+
+
class TestLDSTCompUnit(LDSTCompUnit):
def __init__(self, rwid):