1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Signal
, Mux
, Elaboratable
, Repl
, Array
, Record
4 from nmigen
.hdl
.rec
import (DIR_FANIN
, DIR_FANOUT
)
6 from nmutil
.latch
import SRLatch
, latchregister
7 from nmutil
.iocontrol
import RecordObject
9 from soc
.decoder
.power_decoder2
import Data
10 from soc
.decoder
.power_enums
import InternalOp
13 """ Computation Unit (aka "ALU Manager").
15 This module runs a "revolving door" set of three latches, based on
19 where one of them cannot be set on any given cycle.
21 * When issue is first raised, a busy signal is sent out.
22 The src1 and src2 registers and the operand can be latched in
25 * Read request is set, which is acknowledged through the Scoreboard
26 to the priority picker, which generates (one and only one) Go_Read
27 at a time. One of those will (eventually) be this Computation Unit.
29 * Once Go_Read is set, the src1/src2/operand latch door shuts (locking
30 src1/src2/operand in place), and the ALU is told to proceed.
32 * when the ALU pipeline is ready, this activates "write request release",
33 and the ALU's output is captured into a temporary register.
35 * Write request release is *HELD UP* (prevented from proceeding) if shadowN
36 is asserted LOW. This is how all speculation, precise exceptions,
37 predication - everything - is achieved.
39 * Write request release will go through a similar process as Read request,
40 resulting (eventually) in Go_Write being asserted.
42 * When Go_Write is asserted, two things happen: (1) the data in the temp
43 register is placed combinatorially onto the output, and (2) the
44 req_l latch is cleared, busy is dropped, and the Comp Unit is back
45 through its revolving door to do another task.
47 Note that the read and write latches are held synchronously for one cycle,
48 i.e. that when Go_Read comes in, one cycle is given in which the incoming
49 register (broadcast over a Regfile Read Port) may have time to be latched.
51 It is REQUIRED that Go_Read be held valid only for one cycle, and it is
52 REQUIRED that the corresponding Read_Req be dropped exactly one cycle after
53 Go_Read is asserted HI.
55 Likewise for Go_Write: this is asserted for one cycle, and Req_Writes must
56 likewise be dropped exactly one cycle after assertion of Go_Write.
58 When Go_Die is asserted then strictly speaking the entire FSM should be
59 fully reset and that includes sending a cancellation request to the ALU.
60 (XXX TODO: alu "go die" is not presently wired up)
63 def go_record(n
, name
):
64 r
= Record([('go', n
, DIR_FANIN
),
65 ('rel', n
, DIR_FANOUT
)], name
=name
)
66 r
.go
.reset_less
= True
67 r
.rel
.reset_less
= True
71 class CompUnitRecord(RecordObject
):
74 base class for Computation Units, to provide a uniform API
75 and allow "record.connect" etc. to be used, particularly when
76 it comes to connecting multiple Computation Units up as a block
79 LDSTCompUnitRecord should derive from this class and add the
80 additional signals it requires
82 :subkls: the class (not an instance) needed to construct the opcode
84 def __init__(self
, subkls
, rwid
, n_src
, n_dst
, name
=None):
85 RecordObject
.__init
__(self
, name
)
86 self
._n
_src
, self
._n
_dst
= n_src
, n_dst
91 for i
in range(n_src
):
92 j
= i
+ 1 # name numbering to match src1/src2
94 sreg
= Signal(rwid
, name
=name
, reset_less
=True)
95 setattr(self
, name
, sreg
)
100 for i
in range(n_dst
):
101 j
= i
+ 1 # name numbering to match dest1/2...
102 name
= "dest%d_i" % j
103 dreg
= Signal(rwid
, name
=name
, reset_less
=True)
104 setattr(self
, name
, dreg
)
108 self
.rd
= go_record(n_src
, name
="rd") # read in, req out
109 self
.wr
= go_record(n_dst
, name
="wr") # write in, req out
110 self
.issue_i
= Signal(reset_less
=True) # fn issue in
111 self
.shadown_i
= Signal(reset
=1) # shadow function, defaults to ON
112 self
.go_die_i
= Signal() # go die (reset)
114 # operation / data input
115 self
.oper_i
= subkls() # operand
118 self
.busy_o
= Signal(reset_less
=True) # fn busy out
119 self
.done_o
= Signal(reset_less
=True)
122 class MultiCompUnit(Elaboratable
):
123 def __init__(self
, rwid
, alu
, opsubsetkls
, n_src
=2, n_dst
=1):
126 * :rwid: width of register latches (TODO: allocate per regspec)
127 * :alu: the ALU (pipeline, FSM) - must conform to nmutil Pipe API
128 * :opsubsetkls: the subset of Decode2ExecuteType
129 * :n_src: number of src operands
130 * :n_dst: number of destination operands
132 self
.n_src
, self
.n_dst
= n_src
, n_dst
134 self
.opsubsetkls
= opsubsetkls
135 self
.alu
= alu
# actual ALU - set as a "submodule" of the CU
136 self
.cu
= cu
= CompUnitRecord(opsubsetkls
, rwid
, n_src
, n_dst
)
138 for i
in range(n_src
):
139 j
= i
+ 1 # name numbering to match src1/src2
141 setattr(self
, name
, getattr(cu
, name
))
143 for i
in range(n_dst
):
144 j
= i
+ 1 # name numbering to match dest1/2...
145 name
= "dest%d_i" % j
146 setattr(self
, name
, getattr(cu
, name
))
151 self
.go_rd_i
= self
.rd
.go
# temporary naming
152 self
.go_wr_i
= self
.wr
.go
# temporary naming
153 self
.rd_rel_o
= self
.rd
.rel
# temporary naming
154 self
.req_rel_o
= self
.wr
.rel
# temporary naming
155 self
.issue_i
= cu
.issue_i
156 self
.shadown_i
= cu
.shadown_i
157 self
.go_die_i
= cu
.go_die_i
159 # operation / data input
160 self
.oper_i
= cu
.oper_i
161 self
.src_i
= cu
._src
_i
163 self
.busy_o
= cu
.busy_o
165 self
.data_o
= self
.dest
[0] # Dest out
166 self
.done_o
= cu
.done_o
168 def elaborate(self
, platform
):
170 m
.submodules
.alu
= self
.alu
171 m
.submodules
.src_l
= src_l
= SRLatch(False, self
.n_src
, name
="src")
172 m
.submodules
.opc_l
= opc_l
= SRLatch(sync
=False, name
="opc")
173 m
.submodules
.req_l
= req_l
= SRLatch(False, self
.n_dst
, name
="req")
174 m
.submodules
.rst_l
= rst_l
= SRLatch(sync
=False, name
="rst")
175 m
.submodules
.rok_l
= rok_l
= SRLatch(sync
=False, name
="rdok")
177 # ALU only proceeds when all src are ready. rd_rel_o is delayed
178 # so combine it with go_rd_i. if all bits are set we're good
179 all_rd
= Signal(reset_less
=True)
180 m
.d
.comb
+= all_rd
.eq(self
.busy_o
& rok_l
.q
&
181 (((~self
.rd
.rel
) | self
.rd
.go
).all()))
183 # write_requests all done
184 # req_done works because any one of the last of the writes
185 # is enough, when combined with when read-phase is done (rst_l.q)
186 wr_any
= Signal(reset_less
=True)
187 req_done
= Signal(reset_less
=True)
188 m
.d
.comb
+= self
.done_o
.eq(self
.busy_o
& ~
(self
.wr
.rel
.bool()))
189 m
.d
.comb
+= wr_any
.eq(self
.wr
.go
.bool())
190 m
.d
.comb
+= req_done
.eq(rst_l
.q
& wr_any
)
193 reset
= Signal(reset_less
=True)
194 rst_r
= Signal(reset_less
=True) # reset latch off
195 reset_w
= Signal(self
.n_dst
, reset_less
=True)
196 reset_r
= Signal(self
.n_src
, reset_less
=True)
197 m
.d
.comb
+= reset
.eq(req_done | self
.go_die_i
)
198 m
.d
.comb
+= rst_r
.eq(self
.issue_i | self
.go_die_i
)
199 m
.d
.comb
+= reset_w
.eq(self
.wr
.go |
Repl(self
.go_die_i
, self
.n_dst
))
200 m
.d
.comb
+= reset_r
.eq(self
.rd
.go |
Repl(self
.go_die_i
, self
.n_src
))
202 # read-done,wr-proceed latch
203 m
.d
.comb
+= rok_l
.s
.eq(self
.issue_i
) # set up when issue starts
204 m
.d
.comb
+= rok_l
.r
.eq(self
.alu
.p_ready_o
) # off when ALU acknowledges
206 # wr-done, back-to-start latch
207 m
.d
.comb
+= rst_l
.s
.eq(all_rd
) # set when read-phase is fully done
208 m
.d
.comb
+= rst_l
.r
.eq(rst_r
) # *off* on issue
210 # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
211 m
.d
.sync
+= opc_l
.s
.eq(self
.issue_i
) # set on issue
212 m
.d
.sync
+= opc_l
.r
.eq(self
.alu
.n_valid_o
& req_done
) # reset on ALU
214 # src operand latch (not using go_wr_i)
215 m
.d
.sync
+= src_l
.s
.eq(Repl(self
.issue_i
, self
.n_src
))
216 m
.d
.sync
+= src_l
.r
.eq(reset_r
)
218 # dest operand latch (not using issue_i)
219 m
.d
.sync
+= req_l
.s
.eq(Repl(all_rd
, self
.n_dst
))
220 m
.d
.sync
+= req_l
.r
.eq(reset_w
)
222 # create a latch/register for the operand
223 oper_r
= self
.opsubsetkls()
224 latchregister(m
, self
.oper_i
, oper_r
, self
.issue_i
, "oper_r")
226 # and for each output from the ALU
228 for i
in range(self
.n_dst
):
229 name
= "data_r%d" % i
230 data_r
= Signal(self
.rwid
, name
=name
, reset_less
=True)
231 latchregister(m
, self
.alu
.out
[i
], data_r
, req_l
.q
[i
], name
)
234 # pass the operation to the ALU
235 m
.d
.comb
+= self
.alu
.op
.eq(oper_r
)
237 # create list of src/alu-src/src-latch. override 2nd one below
239 for i
in range(self
.n_src
):
240 sl
.append([self
.src_i
[i
], self
.alu
.i
[i
], src_l
.q
[i
]])
242 # select immediate if opcode says so. however also change the latch
243 # to trigger *from* the opcode latch instead.
244 op_is_imm
= oper_r
.imm_data
.imm_ok
245 src2_or_imm
= Signal(self
.rwid
, reset_less
=True)
246 src_sel
= Signal(reset_less
=True)
247 m
.d
.comb
+= src_sel
.eq(Mux(op_is_imm
, opc_l
.q
, src_l
.q
[1]))
248 m
.d
.comb
+= src2_or_imm
.eq(Mux(op_is_imm
, oper_r
.imm_data
.imm
,
250 # overwrite 2nd src-latch with immediate-muxed stuff
251 sl
[1][0] = src2_or_imm
254 # create a latch/register for src1/src2
255 for i
in range(self
.n_src
):
256 src
, alusrc
, latch
= sl
[i
]
257 latchregister(m
, src
, alusrc
, latch
, name
="src_r%d" % i
)
263 # all request signals gated by busy_o. prevents picker problems
264 m
.d
.comb
+= self
.busy_o
.eq(opc_l
.q
) # busy out
265 bro
= Repl(self
.busy_o
, self
.n_src
)
266 m
.d
.comb
+= self
.rd
.rel
.eq(src_l
.q
& bro
) # src1/src2 req rel
268 # on a go_read, tell the ALU we're accepting data.
269 # NOTE: this spells TROUBLE if the ALU isn't ready!
270 # go_read is only valid for one clock!
271 with m
.If(all_rd
): # src operands ready, GO!
272 with m
.If(~self
.alu
.p_ready_o
): # no ACK yet
273 m
.d
.comb
+= self
.alu
.p_valid_i
.eq(1) # so indicate valid
275 brd
= Repl(self
.busy_o
& self
.shadown_i
, self
.n_dst
)
276 # only proceed if ALU says its output is valid
277 with m
.If(self
.alu
.n_valid_o
):
278 # when ALU ready, write req release out. waits for shadow
279 m
.d
.comb
+= self
.wr
.rel
.eq(req_l
.q
& brd
)
280 # when output latch is ready, and ALU says ready, accept ALU output
282 m
.d
.comb
+= self
.alu
.n_ready_i
.eq(1) # tells ALU "thanks got it"
284 # output the data from the latch on go_write
285 for i
in range(self
.n_dst
):
286 with m
.If(self
.wr
.go
[i
]):
287 m
.d
.comb
+= self
.dest
[i
].eq(drl
[i
])
297 yield from self
.oper_i
.ports()
309 def op_sim(dut
, a
, b
, op
, inv_a
=0, imm
=0, imm_ok
=0):
310 yield dut
.issue_i
.eq(0)
312 yield dut
.src_i
[0].eq(a
)
313 yield dut
.src_i
[1].eq(b
)
314 yield dut
.oper_i
.insn_type
.eq(op
)
315 yield dut
.oper_i
.invert_a
.eq(inv_a
)
316 yield dut
.oper_i
.imm_data
.imm
.eq(imm
)
317 yield dut
.oper_i
.imm_data
.imm_ok
.eq(imm_ok
)
318 yield dut
.issue_i
.eq(1)
320 yield dut
.issue_i
.eq(0)
322 yield dut
.rd
.go
.eq(0b11)
325 rd_rel_o
= yield dut
.rd
.rel
326 print ("rd_rel", rd_rel_o
)
330 yield dut
.rd
.go
.eq(0)
331 req_rel_o
= yield dut
.wr
.rel
332 result
= yield dut
.data_o
333 print ("req_rel", req_rel_o
, result
)
335 req_rel_o
= yield dut
.wr
.rel
336 result
= yield dut
.data_o
337 print ("req_rel", req_rel_o
, result
)
341 yield dut
.wr
.go
[0].eq(1)
343 result
= yield dut
.data_o
344 print ("result", result
)
345 yield dut
.wr
.go
[0].eq(0)
350 def scoreboard_sim(dut
):
351 result
= yield from op_sim(dut
, 5, 2, InternalOp
.OP_ADD
, inv_a
=0,
355 result
= yield from op_sim(dut
, 5, 2, InternalOp
.OP_ADD
)
358 result
= yield from op_sim(dut
, 5, 2, InternalOp
.OP_ADD
, inv_a
=1)
359 assert result
== 65532
362 def test_scoreboard():
363 from alu_hier
import ALU
364 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
368 dut
= MultiCompUnit(16, alu
, CompALUOpSubset
)
369 m
.submodules
.cu
= dut
371 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
372 with
open("test_compalu.il", "w") as f
:
375 run_simulation(m
, scoreboard_sim(dut
), vcd_name
='test_compalu.vcd')
377 if __name__
== '__main__':