1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Signal
, Mux
, Elaboratable
, Repl
, Array
, Record
4 from nmigen
.hdl
.rec
import (DIR_FANIN
, DIR_FANOUT
)
6 from nmutil
.latch
import SRLatch
, latchregister
7 from nmutil
.iocontrol
import RecordObject
9 from soc
.decoder
.power_decoder2
import Data
10 from soc
.decoder
.power_enums
import InternalOp
13 """ Computation Unit (aka "ALU Manager").
15 This module runs a "revolving door" set of three latches, based on
19 where one of them cannot be set on any given cycle.
21 * When issue is first raised, a busy signal is sent out.
22 The src1 and src2 registers and the operand can be latched in
25 * Read request is set, which is acknowledged through the Scoreboard
26 to the priority picker, which generates (one and only one) Go_Read
27 at a time. One of those will (eventually) be this Computation Unit.
29 * Once Go_Read is set, the src1/src2/operand latch door shuts (locking
30 src1/src2/operand in place), and the ALU is told to proceed.
32 * when the ALU pipeline is ready, this activates "write request release",
33 and the ALU's output is captured into a temporary register.
35 * Write request release is *HELD UP* (prevented from proceeding) if shadowN
36 is asserted LOW. This is how all speculation, precise exceptions,
37 predication - everything - is achieved.
39 * Write request release will go through a similar process as Read request,
40 resulting (eventually) in Go_Write being asserted.
42 * When Go_Write is asserted, two things happen: (1) the data in the temp
43 register is placed combinatorially onto the output, and (2) the
44 req_l latch is cleared, busy is dropped, and the Comp Unit is back
45 through its revolving door to do another task.
47 Note that the read and write latches are held synchronously for one cycle,
48 i.e. that when Go_Read comes in, one cycle is given in which the incoming
49 register (broadcast over a Regfile Read Port) may have time to be latched.
51 It is REQUIRED that Go_Read be held valid only for one cycle, and it is
52 REQUIRED that the corresponding Read_Req be dropped exactly one cycle after
53 Go_Read is asserted HI.
55 Likewise for Go_Write: this is asserted for one cycle, and Req_Writes must
56 likewise be dropped exactly one cycle after assertion of Go_Write.
58 When Go_Die is asserted then strictly speaking the entire FSM should be
59 fully reset and that includes sending a cancellation request to the ALU.
60 (XXX TODO: alu "go die" is not presently wired up)
63 def go_record(n
, name
):
64 r
= Record([('go', n
, DIR_FANIN
),
65 ('rel', n
, DIR_FANOUT
)], name
=name
)
66 r
.go
.reset_less
= True
67 r
.rel
.reset_less
= True
71 def get_regspec_bitwidth(regspec
, srcdest
, idx
):
72 bitspec
= regspec
[srcdest
][idx
]
75 for ranges
in bitspec
[2].split(","):
76 ranges
= ranges
.split(":")
78 if len(ranges
) == 1: # only one bit
81 start
, end
= map(int, ranges
)
86 class CompUnitRecord(RecordObject
):
89 base class for Computation Units, to provide a uniform API
90 and allow "record.connect" etc. to be used, particularly when
91 it comes to connecting multiple Computation Units up as a block
94 LDSTCompUnitRecord should derive from this class and add the
95 additional signals it requires
97 :subkls: the class (not an instance) needed to construct the opcode
98 :rwid: either an integer (specifies width of all regs) or a "regspec"
100 def __init__(self
, subkls
, rwid
, n_src
=None, n_dst
=None, name
=None):
101 RecordObject
.__init
__(self
, name
)
103 if isinstance(rwid
, int):
104 # rwid: integer (covers all registers)
105 self
._n
_src
, self
._n
_dst
= n_src
, n_dst
108 self
._n
_src
, self
._n
_dst
= len(rwid
[0]), len(rwid
[1])
109 self
._subkls
= subkls
112 for i
in range(n_src
):
113 j
= i
+ 1 # name numbering to match src1/src2
115 rw
= self
._get
_srcwid
(i
)
116 sreg
= Signal(rw
, name
=name
, reset_less
=True)
117 setattr(self
, name
, sreg
)
122 for i
in range(n_dst
):
123 j
= i
+ 1 # name numbering to match dest1/2...
124 name
= "dest%d_i" % j
125 rw
= self
._get
_dstwid
(i
)
126 dreg
= Signal(rw
, name
=name
, reset_less
=True)
127 setattr(self
, name
, dreg
)
131 self
.rd
= go_record(n_src
, name
="rd") # read in, req out
132 self
.wr
= go_record(n_dst
, name
="wr") # write in, req out
133 self
.issue_i
= Signal(reset_less
=True) # fn issue in
134 self
.shadown_i
= Signal(reset
=1) # shadow function, defaults to ON
135 self
.go_die_i
= Signal() # go die (reset)
137 # operation / data input
138 self
.oper_i
= subkls() # operand
141 self
.busy_o
= Signal(reset_less
=True) # fn busy out
142 self
.done_o
= Signal(reset_less
=True)
144 def _get_dstwid(self
, i
):
145 if isinstance(self
._rwid
, int):
147 return get_regspec_bitwidth(self
._rwid
, 1, i
)
149 def _get_srcwid(self
, i
):
150 if isinstance(self
._rwid
, int):
152 return get_regspec_bitwidth(self
._rwid
, 0, i
)
154 class MultiCompUnit(Elaboratable
):
155 def __init__(self
, rwid
, alu
, opsubsetkls
, n_src
=2, n_dst
=1):
158 * :rwid: width of register latches (TODO: allocate per regspec)
159 * :alu: the ALU (pipeline, FSM) - must conform to nmutil Pipe API
160 * :opsubsetkls: the subset of Decode2ExecuteType
161 * :n_src: number of src operands
162 * :n_dst: number of destination operands
164 self
.n_src
, self
.n_dst
= n_src
, n_dst
166 self
.opsubsetkls
= opsubsetkls
167 self
.alu
= alu
# actual ALU - set as a "submodule" of the CU
168 self
.cu
= cu
= CompUnitRecord(opsubsetkls
, rwid
, n_src
, n_dst
)
170 for i
in range(n_src
):
171 j
= i
+ 1 # name numbering to match src1/src2
173 setattr(self
, name
, getattr(cu
, name
))
175 for i
in range(n_dst
):
176 j
= i
+ 1 # name numbering to match dest1/2...
177 name
= "dest%d_i" % j
178 setattr(self
, name
, getattr(cu
, name
))
183 self
.go_rd_i
= self
.rd
.go
# temporary naming
184 self
.go_wr_i
= self
.wr
.go
# temporary naming
185 self
.rd_rel_o
= self
.rd
.rel
# temporary naming
186 self
.req_rel_o
= self
.wr
.rel
# temporary naming
187 self
.issue_i
= cu
.issue_i
188 self
.shadown_i
= cu
.shadown_i
189 self
.go_die_i
= cu
.go_die_i
191 # operation / data input
192 self
.oper_i
= cu
.oper_i
193 self
.src_i
= cu
._src
_i
195 self
.busy_o
= cu
.busy_o
197 self
.data_o
= self
.dest
[0] # Dest out
198 self
.done_o
= cu
.done_o
200 def elaborate(self
, platform
):
202 m
.submodules
.alu
= self
.alu
203 m
.submodules
.src_l
= src_l
= SRLatch(False, self
.n_src
, name
="src")
204 m
.submodules
.opc_l
= opc_l
= SRLatch(sync
=False, name
="opc")
205 m
.submodules
.req_l
= req_l
= SRLatch(False, self
.n_dst
, name
="req")
206 m
.submodules
.rst_l
= rst_l
= SRLatch(sync
=False, name
="rst")
207 m
.submodules
.rok_l
= rok_l
= SRLatch(sync
=False, name
="rdok")
209 # ALU only proceeds when all src are ready. rd_rel_o is delayed
210 # so combine it with go_rd_i. if all bits are set we're good
211 all_rd
= Signal(reset_less
=True)
212 m
.d
.comb
+= all_rd
.eq(self
.busy_o
& rok_l
.q
&
213 (((~self
.rd
.rel
) | self
.rd
.go
).all()))
215 # write_requests all done
216 # req_done works because any one of the last of the writes
217 # is enough, when combined with when read-phase is done (rst_l.q)
218 wr_any
= Signal(reset_less
=True)
219 req_done
= Signal(reset_less
=True)
220 m
.d
.comb
+= self
.done_o
.eq(self
.busy_o
& ~
(self
.wr
.rel
.bool()))
221 m
.d
.comb
+= wr_any
.eq(self
.wr
.go
.bool())
222 m
.d
.comb
+= req_done
.eq(rst_l
.q
& wr_any
)
225 reset
= Signal(reset_less
=True)
226 rst_r
= Signal(reset_less
=True) # reset latch off
227 reset_w
= Signal(self
.n_dst
, reset_less
=True)
228 reset_r
= Signal(self
.n_src
, reset_less
=True)
229 m
.d
.comb
+= reset
.eq(req_done | self
.go_die_i
)
230 m
.d
.comb
+= rst_r
.eq(self
.issue_i | self
.go_die_i
)
231 m
.d
.comb
+= reset_w
.eq(self
.wr
.go |
Repl(self
.go_die_i
, self
.n_dst
))
232 m
.d
.comb
+= reset_r
.eq(self
.rd
.go |
Repl(self
.go_die_i
, self
.n_src
))
234 # read-done,wr-proceed latch
235 m
.d
.comb
+= rok_l
.s
.eq(self
.issue_i
) # set up when issue starts
236 m
.d
.comb
+= rok_l
.r
.eq(self
.alu
.p_ready_o
) # off when ALU acknowledges
238 # wr-done, back-to-start latch
239 m
.d
.comb
+= rst_l
.s
.eq(all_rd
) # set when read-phase is fully done
240 m
.d
.comb
+= rst_l
.r
.eq(rst_r
) # *off* on issue
242 # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
243 m
.d
.sync
+= opc_l
.s
.eq(self
.issue_i
) # set on issue
244 m
.d
.sync
+= opc_l
.r
.eq(self
.alu
.n_valid_o
& req_done
) # reset on ALU
246 # src operand latch (not using go_wr_i)
247 m
.d
.sync
+= src_l
.s
.eq(Repl(self
.issue_i
, self
.n_src
))
248 m
.d
.sync
+= src_l
.r
.eq(reset_r
)
250 # dest operand latch (not using issue_i)
251 m
.d
.sync
+= req_l
.s
.eq(Repl(all_rd
, self
.n_dst
))
252 m
.d
.sync
+= req_l
.r
.eq(reset_w
)
254 # create a latch/register for the operand
255 oper_r
= self
.opsubsetkls()
256 latchregister(m
, self
.oper_i
, oper_r
, self
.issue_i
, "oper_r")
258 # and for each output from the ALU
260 for i
in range(self
.n_dst
):
261 name
= "data_r%d" % i
262 data_r
= Signal(self
.cu
._get
_srcwid
(i
), name
=name
, reset_less
=True)
263 latchregister(m
, self
.alu
.out
[i
], data_r
, req_l
.q
[i
], name
)
266 # pass the operation to the ALU
267 m
.d
.comb
+= self
.alu
.op
.eq(oper_r
)
269 # create list of src/alu-src/src-latch. override 1st and 2nd one below.
270 # in the case, for ALU and Logical pipelines, we assume RB is the 2nd operand
271 # in the input "regspec". see for example soc.fu.alu.pipe_data.ALUInputData
272 # TODO: assume RA is the 1st operand, zero_a detection is needed.
274 for i
in range(self
.n_src
):
275 sl
.append([self
.src_i
[i
], self
.alu
.i
[i
], src_l
.q
[i
]])
277 # if the operand subset has "zero_a" we implicitly assume that means
278 # src_i[0] is an INT register type where zero can be multiplexed in, instead.
279 # see https://bugs.libre-soc.org/show_bug.cgi?id=336
280 #if hasattr(oper_r, "zero_a"):
281 # select zero immediate if opcode says so. however also change the latch
282 # to trigger *from* the opcode latch instead.
286 # if the operand subset has "imm_data" we implicitly assume that means
287 # "this is an INT ALU/Logical FU jobbie, RB is multiplexed with the immediate"
288 if hasattr(oper_r
, "imm_data"):
289 # select immediate if opcode says so. however also change the latch
290 # to trigger *from* the opcode latch instead.
291 op_is_imm
= oper_r
.imm_data
.imm_ok
292 src2_or_imm
= Signal(self
.cu
._get
_srcwid
(1), reset_less
=True)
293 src_sel
= Signal(reset_less
=True)
294 m
.d
.comb
+= src_sel
.eq(Mux(op_is_imm
, opc_l
.q
, src_l
.q
[1]))
295 m
.d
.comb
+= src2_or_imm
.eq(Mux(op_is_imm
, oper_r
.imm_data
.imm
,
297 # overwrite 2nd src-latch with immediate-muxed stuff
298 sl
[1][0] = src2_or_imm
301 # create a latch/register for src1/src2 (even if it is a copy of an immediate)
302 for i
in range(self
.n_src
):
303 src
, alusrc
, latch
= sl
[i
]
304 latchregister(m
, src
, alusrc
, latch
, name
="src_r%d" % i
)
310 # all request signals gated by busy_o. prevents picker problems
311 m
.d
.comb
+= self
.busy_o
.eq(opc_l
.q
) # busy out
312 bro
= Repl(self
.busy_o
, self
.n_src
)
313 m
.d
.comb
+= self
.rd
.rel
.eq(src_l
.q
& bro
) # src1/src2 req rel
315 # on a go_read, tell the ALU we're accepting data.
316 # NOTE: this spells TROUBLE if the ALU isn't ready!
317 # go_read is only valid for one clock!
318 with m
.If(all_rd
): # src operands ready, GO!
319 with m
.If(~self
.alu
.p_ready_o
): # no ACK yet
320 m
.d
.comb
+= self
.alu
.p_valid_i
.eq(1) # so indicate valid
322 brd
= Repl(self
.busy_o
& self
.shadown_i
, self
.n_dst
)
323 # only proceed if ALU says its output is valid
324 with m
.If(self
.alu
.n_valid_o
):
325 # when ALU ready, write req release out. waits for shadow
326 m
.d
.comb
+= self
.wr
.rel
.eq(req_l
.q
& brd
)
327 # when output latch is ready, and ALU says ready, accept ALU output
329 m
.d
.comb
+= self
.alu
.n_ready_i
.eq(1) # tells ALU "thanks got it"
331 # output the data from the latch on go_write
332 for i
in range(self
.n_dst
):
333 with m
.If(self
.wr
.go
[i
]):
334 m
.d
.comb
+= self
.dest
[i
].eq(drl
[i
])
344 yield from self
.oper_i
.ports()
356 def op_sim(dut
, a
, b
, op
, inv_a
=0, imm
=0, imm_ok
=0):
357 yield dut
.issue_i
.eq(0)
359 yield dut
.src_i
[0].eq(a
)
360 yield dut
.src_i
[1].eq(b
)
361 yield dut
.oper_i
.insn_type
.eq(op
)
362 yield dut
.oper_i
.invert_a
.eq(inv_a
)
363 yield dut
.oper_i
.imm_data
.imm
.eq(imm
)
364 yield dut
.oper_i
.imm_data
.imm_ok
.eq(imm_ok
)
365 yield dut
.issue_i
.eq(1)
367 yield dut
.issue_i
.eq(0)
369 yield dut
.rd
.go
.eq(0b11)
372 rd_rel_o
= yield dut
.rd
.rel
373 print ("rd_rel", rd_rel_o
)
377 yield dut
.rd
.go
.eq(0)
378 req_rel_o
= yield dut
.wr
.rel
379 result
= yield dut
.data_o
380 print ("req_rel", req_rel_o
, result
)
382 req_rel_o
= yield dut
.wr
.rel
383 result
= yield dut
.data_o
384 print ("req_rel", req_rel_o
, result
)
388 yield dut
.wr
.go
[0].eq(1)
390 result
= yield dut
.data_o
391 print ("result", result
)
392 yield dut
.wr
.go
[0].eq(0)
397 def scoreboard_sim(dut
):
398 result
= yield from op_sim(dut
, 5, 2, InternalOp
.OP_ADD
, inv_a
=0,
402 result
= yield from op_sim(dut
, 5, 2, InternalOp
.OP_ADD
)
405 result
= yield from op_sim(dut
, 5, 2, InternalOp
.OP_ADD
, inv_a
=1)
406 assert result
== 65532
410 from alu_hier
import ALU
411 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
415 dut
= MultiCompUnit(16, alu
, CompALUOpSubset
)
416 m
.submodules
.cu
= dut
418 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
419 with
open("test_compunit1.il", "w") as f
:
422 run_simulation(m
, scoreboard_sim(dut
), vcd_name
='test_compunit1.vcd')
425 def test_compunit_regspec1():
426 from alu_hier
import ALU
427 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
429 inspec
= [('INT', 'a', '0:15'),
430 ('INT', 'b', '0:15')]
431 outspec
= [('INT', 'o', '0:15'),
434 regspec
= (inspec
, outspec
)
438 dut
= MultiCompUnit(regspec
, alu
, CompALUOpSubset
)
439 m
.submodules
.cu
= dut
441 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
442 with
open("test_compunit_regspec1.il", "w") as f
:
445 run_simulation(m
, scoreboard_sim(dut
), vcd_name
='test_compunit1.vcd')
448 if __name__
== '__main__':
450 test_compunit_regspec1()