1 """Computation Unit (aka "ALU Manager").
3 Manages a Pioeline or FSM, ensuring that the start and end time are 100%
4 monitored. At no time may the ALU proceed without this module notifying
5 the Dependency Matrices. At no time is a result production "abandoned".
6 This module blocks (indicates busy) until it receives notificatiob that
7 its result(s) have been successfully stored in the regfile(s)
9 Documented at http://libre-soc.org/3d_gpu/architecture/compunit
12 from nmigen
.compat
.sim
import run_simulation
13 from nmigen
.cli
import verilog
, rtlil
14 from nmigen
import Module
, Signal
, Mux
, Elaboratable
, Repl
, Array
, Record
, Const
15 from nmigen
.hdl
.rec
import (DIR_FANIN
, DIR_FANOUT
)
17 from nmutil
.latch
import SRLatch
, latchregister
18 from nmutil
.iocontrol
import RecordObject
20 from soc
.decoder
.power_decoder2
import Data
21 from soc
.decoder
.power_enums
import InternalOp
22 from soc
.fu
.regspec
import RegSpec
, RegSpecALUAPI
25 def go_record(n
, name
):
26 r
= Record([('go', n
, DIR_FANIN
),
27 ('rel', n
, DIR_FANOUT
)], name
=name
)
28 r
.go
.reset_less
= True
29 r
.rel
.reset_less
= True
32 # see https://libre-soc.org/3d_gpu/architecture/regfile/ section on regspecs
34 class CompUnitRecord(RegSpec
, RecordObject
):
37 base class for Computation Units, to provide a uniform API
38 and allow "record.connect" etc. to be used, particularly when
39 it comes to connecting multiple Computation Units up as a block
42 LDSTCompUnitRecord should derive from this class and add the
43 additional signals it requires
45 :subkls: the class (not an instance) needed to construct the opcode
46 :rwid: either an integer (specifies width of all regs) or a "regspec"
48 see https://libre-soc.org/3d_gpu/architecture/regfile/ section on regspecs
50 def __init__(self
, subkls
, rwid
, n_src
=None, n_dst
=None, name
=None):
51 RegSpec
.__init
__(self
, rwid
, n_src
, n_dst
)
52 RecordObject
.__init
__(self
, name
)
55 # create source operands
57 for i
in range(n_src
):
58 j
= i
+ 1 # name numbering to match src1/src2
60 rw
= self
._get
_srcwid
(i
)
61 sreg
= Signal(rw
, name
=name
, reset_less
=True)
62 setattr(self
, name
, sreg
)
66 # create dest operands
68 for i
in range(n_dst
):
69 j
= i
+ 1 # name numbering to match dest1/2...
71 rw
= self
._get
_dstwid
(i
)
72 dreg
= Signal(rw
, name
=name
, reset_less
=True)
73 setattr(self
, name
, dreg
)
77 # operation / data input
78 self
.oper_i
= subkls() # operand
80 # create read/write and other scoreboard signalling
81 self
.rd
= go_record(n_src
, name
="rd") # read in, req out
82 self
.wr
= go_record(n_dst
, name
="wr") # write in, req out
83 self
.issue_i
= Signal(reset_less
=True) # fn issue in
84 self
.shadown_i
= Signal(reset
=1) # shadow function, defaults to ON
85 self
.go_die_i
= Signal() # go die (reset)
88 self
.busy_o
= Signal(reset_less
=True) # fn busy out
89 self
.done_o
= Signal(reset_less
=True)
92 class MultiCompUnit(RegSpecALUAPI
, Elaboratable
):
93 def __init__(self
, rwid
, alu
, opsubsetkls
, n_src
=2, n_dst
=1):
96 * :rwid: width of register latches (TODO: allocate per regspec)
97 * :alu: the ALU (pipeline, FSM) - must conform to nmutil Pipe API
98 * :opsubsetkls: the subset of Decode2ExecuteType
99 * :n_src: number of src operands
100 * :n_dst: number of destination operands
102 RegSpecALUAPI
.__init
__(self
, rwid
, alu
)
103 self
.n_src
, self
.n_dst
= n_src
, n_dst
104 self
.opsubsetkls
= opsubsetkls
105 self
.cu
= cu
= CompUnitRecord(opsubsetkls
, rwid
, n_src
, n_dst
)
107 # convenience names for src operands
108 for i
in range(n_src
):
109 j
= i
+ 1 # name numbering to match src1/src2
111 setattr(self
, name
, getattr(cu
, name
))
113 # convenience names for dest operands
114 for i
in range(n_dst
):
115 j
= i
+ 1 # name numbering to match dest1/2...
116 name
= "dest%d_i" % j
117 setattr(self
, name
, getattr(cu
, name
))
119 # more convenience names
122 self
.go_rd_i
= self
.rd
.go
# temporary naming
123 self
.go_wr_i
= self
.wr
.go
# temporary naming
124 self
.rd_rel_o
= self
.rd
.rel
# temporary naming
125 self
.req_rel_o
= self
.wr
.rel
# temporary naming
126 self
.issue_i
= cu
.issue_i
127 self
.shadown_i
= cu
.shadown_i
128 self
.go_die_i
= cu
.go_die_i
130 # operation / data input
131 self
.oper_i
= cu
.oper_i
132 self
.src_i
= cu
._src
_i
134 self
.busy_o
= cu
.busy_o
136 self
.data_o
= self
.dest
[0] # Dest out
137 self
.done_o
= cu
.done_o
140 def _mux_op(self
, m
, sl
, op_is_imm
, imm
, i
):
141 # select zero immediate if opcode says so. however also change the latch
142 # to trigger *from* the opcode latch instead.
143 src_or_imm
= Signal(self
.cu
._get
_srcwid
(i
), reset_less
=True)
144 src_sel
= Signal(reset_less
=True)
145 m
.d
.comb
+= src_sel
.eq(Mux(op_is_imm
, self
.opc_l
.q
, self
.src_l
.q
[i
]))
146 m
.d
.comb
+= src_or_imm
.eq(Mux(op_is_imm
, imm
, self
.src_i
[i
]))
147 # overwrite 1st src-latch with immediate-muxed stuff
148 sl
[i
][0] = src_or_imm
151 def elaborate(self
, platform
):
153 m
.submodules
.alu
= self
.alu
154 m
.submodules
.src_l
= src_l
= SRLatch(False, self
.n_src
, name
="src")
155 m
.submodules
.opc_l
= opc_l
= SRLatch(sync
=False, name
="opc")
156 m
.submodules
.req_l
= req_l
= SRLatch(False, self
.n_dst
, name
="req")
157 m
.submodules
.rst_l
= rst_l
= SRLatch(sync
=False, name
="rst")
158 m
.submodules
.rok_l
= rok_l
= SRLatch(sync
=False, name
="rdok")
159 self
.opc_l
, self
.src_l
= opc_l
, src_l
161 # ALU only proceeds when all src are ready. rd_rel_o is delayed
162 # so combine it with go_rd_i. if all bits are set we're good
163 all_rd
= Signal(reset_less
=True)
164 m
.d
.comb
+= all_rd
.eq(self
.busy_o
& rok_l
.q
&
165 (((~self
.rd
.rel
) | self
.rd
.go
).all()))
167 # write_requests all done
168 # req_done works because any one of the last of the writes
169 # is enough, when combined with when read-phase is done (rst_l.q)
170 wr_any
= Signal(reset_less
=True)
171 req_done
= Signal(reset_less
=True)
172 m
.d
.comb
+= self
.done_o
.eq(self
.busy_o
& ~
(self
.wr
.rel
.bool()))
173 m
.d
.comb
+= wr_any
.eq(self
.wr
.go
.bool())
174 m
.d
.comb
+= req_done
.eq(rst_l
.q
& wr_any
)
177 reset
= Signal(reset_less
=True)
178 rst_r
= Signal(reset_less
=True) # reset latch off
179 reset_w
= Signal(self
.n_dst
, reset_less
=True)
180 reset_r
= Signal(self
.n_src
, reset_less
=True)
181 m
.d
.comb
+= reset
.eq(req_done | self
.go_die_i
)
182 m
.d
.comb
+= rst_r
.eq(self
.issue_i | self
.go_die_i
)
183 m
.d
.comb
+= reset_w
.eq(self
.wr
.go |
Repl(self
.go_die_i
, self
.n_dst
))
184 m
.d
.comb
+= reset_r
.eq(self
.rd
.go |
Repl(self
.go_die_i
, self
.n_src
))
186 # read-done,wr-proceed latch
187 m
.d
.comb
+= rok_l
.s
.eq(self
.issue_i
) # set up when issue starts
188 m
.d
.comb
+= rok_l
.r
.eq(self
.alu
.p
.ready_o
) # off when ALU acknowledges
190 # wr-done, back-to-start latch
191 m
.d
.comb
+= rst_l
.s
.eq(all_rd
) # set when read-phase is fully done
192 m
.d
.comb
+= rst_l
.r
.eq(rst_r
) # *off* on issue
194 # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
195 m
.d
.sync
+= opc_l
.s
.eq(self
.issue_i
) # set on issue
196 m
.d
.sync
+= opc_l
.r
.eq(self
.alu
.n
.valid_o
& req_done
) # reset on ALU
198 # src operand latch (not using go_wr_i)
199 m
.d
.sync
+= src_l
.s
.eq(Repl(self
.issue_i
, self
.n_src
))
200 m
.d
.sync
+= src_l
.r
.eq(reset_r
)
202 # dest operand latch (not using issue_i)
203 m
.d
.sync
+= req_l
.s
.eq(Repl(all_rd
, self
.n_dst
))
204 m
.d
.sync
+= req_l
.r
.eq(reset_w
)
206 # create a latch/register for the operand
207 oper_r
= self
.opsubsetkls()
208 latchregister(m
, self
.oper_i
, oper_r
, self
.issue_i
, "oper_r")
210 # and for each output from the ALU
212 for i
in range(self
.n_dst
):
213 name
= "data_r%d" % i
214 data_r
= Signal(self
.cu
._get
_srcwid
(i
), name
=name
, reset_less
=True)
215 latchregister(m
, self
.get_out(i
), data_r
, req_l
.q
[i
], name
)
218 # pass the operation to the ALU
219 m
.d
.comb
+= self
.get_op().eq(oper_r
)
221 # create list of src/alu-src/src-latch. override 1st and 2nd one below.
222 # in the case, for ALU and Logical pipelines, we assume RB is the 2nd operand
223 # in the input "regspec". see for example soc.fu.alu.pipe_data.ALUInputData
225 for i
in range(self
.n_src
):
226 sl
.append([self
.src_i
[i
], self
.get_in(i
), src_l
.q
[i
]])
228 # if the operand subset has "zero_a" we implicitly assume that means
229 # src_i[0] is an INT register type where zero can be multiplexed in, instead.
230 # see https://bugs.libre-soc.org/show_bug.cgi?id=336
231 if hasattr(oper_r
, "zero_a"):
232 # select zero immediate if opcode says so. however also change the latch
233 # to trigger *from* the opcode latch instead.
234 self
._mux
_op
(m
, sl
, oper_r
.zero_a
, 0, 0)
236 # if the operand subset has "imm_data" we implicitly assume that means
237 # "this is an INT ALU/Logical FU jobbie, RB is multiplexed with the immediate"
238 if hasattr(oper_r
, "imm_data"):
239 # select immediate if opcode says so. however also change the latch
240 # to trigger *from* the opcode latch instead.
241 op_is_imm
= oper_r
.imm_data
.imm_ok
242 imm
= oper_r
.imm_data
.imm
243 self
._mux
_op
(m
, sl
, op_is_imm
, imm
, 1)
245 # create a latch/register for src1/src2 (even if it is a copy of an immediate)
246 for i
in range(self
.n_src
):
247 src
, alusrc
, latch
= sl
[i
]
248 latchregister(m
, src
, alusrc
, latch
, name
="src_r%d" % i
)
254 # all request signals gated by busy_o. prevents picker problems
255 m
.d
.comb
+= self
.busy_o
.eq(opc_l
.q
) # busy out
256 bro
= Repl(self
.busy_o
, self
.n_src
)
257 m
.d
.comb
+= self
.rd
.rel
.eq(src_l
.q
& bro
) # src1/src2 req rel
259 # on a go_read, tell the ALU we're accepting data.
260 # NOTE: this spells TROUBLE if the ALU isn't ready!
261 # go_read is only valid for one clock!
262 with m
.If(all_rd
): # src operands ready, GO!
263 with m
.If(~self
.alu
.p
.ready_o
): # no ACK yet
264 m
.d
.comb
+= self
.alu
.p
.valid_i
.eq(1) # so indicate valid
266 brd
= Repl(self
.busy_o
& self
.shadown_i
, self
.n_dst
)
267 # only proceed if ALU says its output is valid
268 with m
.If(self
.alu
.n
.valid_o
):
269 # when ALU ready, write req release out. waits for shadow
270 m
.d
.comb
+= self
.wr
.rel
.eq(req_l
.q
& brd
)
271 # when output latch is ready, and ALU says ready, accept ALU output
273 m
.d
.comb
+= self
.alu
.n
.ready_i
.eq(1) # tells ALU "thanks got it"
275 # output the data from the latch on go_write
276 for i
in range(self
.n_dst
):
277 with m
.If(self
.wr
.go
[i
]):
278 m
.d
.comb
+= self
.dest
[i
].eq(drl
[i
])
288 yield from self
.oper_i
.ports()
300 def op_sim(dut
, a
, b
, op
, inv_a
=0, imm
=0, imm_ok
=0, zero_a
=0):
301 yield dut
.issue_i
.eq(0)
303 yield dut
.src_i
[0].eq(a
)
304 yield dut
.src_i
[1].eq(b
)
305 yield dut
.oper_i
.insn_type
.eq(op
)
306 yield dut
.oper_i
.invert_a
.eq(inv_a
)
307 yield dut
.oper_i
.imm_data
.imm
.eq(imm
)
308 yield dut
.oper_i
.imm_data
.imm_ok
.eq(imm_ok
)
309 yield dut
.oper_i
.zero_a
.eq(zero_a
)
310 yield dut
.issue_i
.eq(1)
312 yield dut
.issue_i
.eq(0)
314 yield dut
.rd
.go
.eq(0b11)
317 rd_rel_o
= yield dut
.rd
.rel
318 print ("rd_rel", rd_rel_o
)
322 yield dut
.rd
.go
.eq(0)
323 req_rel_o
= yield dut
.wr
.rel
324 result
= yield dut
.data_o
325 print ("req_rel", req_rel_o
, result
)
327 req_rel_o
= yield dut
.wr
.rel
328 result
= yield dut
.data_o
329 print ("req_rel", req_rel_o
, result
)
333 yield dut
.wr
.go
[0].eq(1)
335 result
= yield dut
.data_o
336 print ("result", result
)
337 yield dut
.wr
.go
[0].eq(0)
342 def scoreboard_sim(dut
):
343 result
= yield from op_sim(dut
, 5, 2, InternalOp
.OP_ADD
, inv_a
=0,
347 result
= yield from op_sim(dut
, 5, 2, InternalOp
.OP_ADD
)
350 result
= yield from op_sim(dut
, 5, 2, InternalOp
.OP_ADD
, inv_a
=1)
351 assert result
== 65532
353 result
= yield from op_sim(dut
, 5, 2, InternalOp
.OP_ADD
, zero_a
=1,
357 result
= yield from op_sim(dut
, 5, 2, InternalOp
.OP_ADD
, zero_a
=1)
362 from alu_hier
import ALU
363 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
367 dut
= MultiCompUnit(16, alu
, CompALUOpSubset
)
368 m
.submodules
.cu
= dut
370 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
371 with
open("test_compunit1.il", "w") as f
:
374 run_simulation(m
, scoreboard_sim(dut
), vcd_name
='test_compunit1.vcd')
377 def test_compunit_regspec1():
378 from alu_hier
import ALU
379 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
381 inspec
= [('INT', 'a', '0:15'),
382 ('INT', 'b', '0:15')]
383 outspec
= [('INT', 'o', '0:15'),
386 regspec
= (inspec
, outspec
)
390 dut
= MultiCompUnit(regspec
, alu
, CompALUOpSubset
)
391 m
.submodules
.cu
= dut
393 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
394 with
open("test_compunit_regspec1.il", "w") as f
:
397 run_simulation(m
, scoreboard_sim(dut
), vcd_name
='test_compunit1.vcd')
400 if __name__
== '__main__':
402 test_compunit_regspec1()