1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Signal
, Mux
, Elaboratable
, Repl
, Array
, Record
4 from nmigen
.hdl
.rec
import (DIR_FANIN
, DIR_FANOUT
)
6 from nmutil
.latch
import SRLatch
, latchregister
7 from nmutil
.iocontrol
import RecordObject
9 from soc
.decoder
.power_decoder2
import Data
10 from soc
.decoder
.power_enums
import InternalOp
11 from soc
.fu
.regspec
import RegSpec
, RegSpecALUAPI
14 """ Computation Unit (aka "ALU Manager").
16 This module runs a "revolving door" set of three latches, based on
20 where one of them cannot be set on any given cycle.
22 * When issue is first raised, a busy signal is sent out.
23 The src1 and src2 registers and the operand can be latched in
26 * Read request is set, which is acknowledged through the Scoreboard
27 to the priority picker, which generates (one and only one) Go_Read
28 at a time. One of those will (eventually) be this Computation Unit.
30 * Once Go_Read is set, the src1/src2/operand latch door shuts (locking
31 src1/src2/operand in place), and the ALU is told to proceed.
33 * when the ALU pipeline is ready, this activates "write request release",
34 and the ALU's output is captured into a temporary register.
36 * Write request release is *HELD UP* (prevented from proceeding) if shadowN
37 is asserted LOW. This is how all speculation, precise exceptions,
38 predication - everything - is achieved.
40 * Write request release will go through a similar process as Read request,
41 resulting (eventually) in Go_Write being asserted.
43 * When Go_Write is asserted, two things happen: (1) the data in the temp
44 register is placed combinatorially onto the output, and (2) the
45 req_l latch is cleared, busy is dropped, and the Comp Unit is back
46 through its revolving door to do another task.
48 Note that the read and write latches are held synchronously for one cycle,
49 i.e. that when Go_Read comes in, one cycle is given in which the incoming
50 register (broadcast over a Regfile Read Port) may have time to be latched.
52 It is REQUIRED that Go_Read be held valid only for one cycle, and it is
53 REQUIRED that the corresponding Read_Req be dropped exactly one cycle after
54 Go_Read is asserted HI.
56 Likewise for Go_Write: this is asserted for one cycle, and Req_Writes must
57 likewise be dropped exactly one cycle after assertion of Go_Write.
59 When Go_Die is asserted then strictly speaking the entire FSM should be
60 fully reset and that includes sending a cancellation request to the ALU.
61 (XXX TODO: alu "go die" is not presently wired up)
64 def go_record(n
, name
):
65 r
= Record([('go', n
, DIR_FANIN
),
66 ('rel', n
, DIR_FANOUT
)], name
=name
)
67 r
.go
.reset_less
= True
68 r
.rel
.reset_less
= True
71 # see https://libre-soc.org/3d_gpu/architecture/regfile/ section on regspecs
73 class CompUnitRecord(RegSpec
, RecordObject
):
76 base class for Computation Units, to provide a uniform API
77 and allow "record.connect" etc. to be used, particularly when
78 it comes to connecting multiple Computation Units up as a block
81 LDSTCompUnitRecord should derive from this class and add the
82 additional signals it requires
84 :subkls: the class (not an instance) needed to construct the opcode
85 :rwid: either an integer (specifies width of all regs) or a "regspec"
87 see https://libre-soc.org/3d_gpu/architecture/regfile/ section on regspecs
89 def __init__(self
, subkls
, rwid
, n_src
=None, n_dst
=None, name
=None):
90 RegSpec
.__init
__(self
, rwid
, n_src
, n_dst
)
91 RecordObject
.__init
__(self
, name
)
94 # create source operands
96 for i
in range(n_src
):
97 j
= i
+ 1 # name numbering to match src1/src2
99 rw
= self
._get
_srcwid
(i
)
100 sreg
= Signal(rw
, name
=name
, reset_less
=True)
101 setattr(self
, name
, sreg
)
105 # create dest operands
107 for i
in range(n_dst
):
108 j
= i
+ 1 # name numbering to match dest1/2...
109 name
= "dest%d_i" % j
110 rw
= self
._get
_dstwid
(i
)
111 dreg
= Signal(rw
, name
=name
, reset_less
=True)
112 setattr(self
, name
, dreg
)
116 # operation / data input
117 self
.oper_i
= subkls() # operand
119 # create read/write and other scoreboard signalling
120 self
.rd
= go_record(n_src
, name
="rd") # read in, req out
121 self
.wr
= go_record(n_dst
, name
="wr") # write in, req out
122 self
.issue_i
= Signal(reset_less
=True) # fn issue in
123 self
.shadown_i
= Signal(reset
=1) # shadow function, defaults to ON
124 self
.go_die_i
= Signal() # go die (reset)
127 self
.busy_o
= Signal(reset_less
=True) # fn busy out
128 self
.done_o
= Signal(reset_less
=True)
131 class MultiCompUnit(RegSpecALUAPI
, Elaboratable
):
132 def __init__(self
, rwid
, alu
, opsubsetkls
, n_src
=2, n_dst
=1):
135 * :rwid: width of register latches (TODO: allocate per regspec)
136 * :alu: the ALU (pipeline, FSM) - must conform to nmutil Pipe API
137 * :opsubsetkls: the subset of Decode2ExecuteType
138 * :n_src: number of src operands
139 * :n_dst: number of destination operands
141 RegSpecALUAPI
.__init
__(self
, rwid
, alu
)
142 self
.n_src
, self
.n_dst
= n_src
, n_dst
143 self
.opsubsetkls
= opsubsetkls
144 self
.cu
= cu
= CompUnitRecord(opsubsetkls
, rwid
, n_src
, n_dst
)
146 # convenience names for src operands
147 for i
in range(n_src
):
148 j
= i
+ 1 # name numbering to match src1/src2
150 setattr(self
, name
, getattr(cu
, name
))
152 # convenience names for dest operands
153 for i
in range(n_dst
):
154 j
= i
+ 1 # name numbering to match dest1/2...
155 name
= "dest%d_i" % j
156 setattr(self
, name
, getattr(cu
, name
))
158 # more convenience names
161 self
.go_rd_i
= self
.rd
.go
# temporary naming
162 self
.go_wr_i
= self
.wr
.go
# temporary naming
163 self
.rd_rel_o
= self
.rd
.rel
# temporary naming
164 self
.req_rel_o
= self
.wr
.rel
# temporary naming
165 self
.issue_i
= cu
.issue_i
166 self
.shadown_i
= cu
.shadown_i
167 self
.go_die_i
= cu
.go_die_i
169 # operation / data input
170 self
.oper_i
= cu
.oper_i
171 self
.src_i
= cu
._src
_i
173 self
.busy_o
= cu
.busy_o
175 self
.data_o
= self
.dest
[0] # Dest out
176 self
.done_o
= cu
.done_o
178 def elaborate(self
, platform
):
180 m
.submodules
.alu
= self
.alu
181 m
.submodules
.src_l
= src_l
= SRLatch(False, self
.n_src
, name
="src")
182 m
.submodules
.opc_l
= opc_l
= SRLatch(sync
=False, name
="opc")
183 m
.submodules
.req_l
= req_l
= SRLatch(False, self
.n_dst
, name
="req")
184 m
.submodules
.rst_l
= rst_l
= SRLatch(sync
=False, name
="rst")
185 m
.submodules
.rok_l
= rok_l
= SRLatch(sync
=False, name
="rdok")
187 # ALU only proceeds when all src are ready. rd_rel_o is delayed
188 # so combine it with go_rd_i. if all bits are set we're good
189 all_rd
= Signal(reset_less
=True)
190 m
.d
.comb
+= all_rd
.eq(self
.busy_o
& rok_l
.q
&
191 (((~self
.rd
.rel
) | self
.rd
.go
).all()))
193 # write_requests all done
194 # req_done works because any one of the last of the writes
195 # is enough, when combined with when read-phase is done (rst_l.q)
196 wr_any
= Signal(reset_less
=True)
197 req_done
= Signal(reset_less
=True)
198 m
.d
.comb
+= self
.done_o
.eq(self
.busy_o
& ~
(self
.wr
.rel
.bool()))
199 m
.d
.comb
+= wr_any
.eq(self
.wr
.go
.bool())
200 m
.d
.comb
+= req_done
.eq(rst_l
.q
& wr_any
)
203 reset
= Signal(reset_less
=True)
204 rst_r
= Signal(reset_less
=True) # reset latch off
205 reset_w
= Signal(self
.n_dst
, reset_less
=True)
206 reset_r
= Signal(self
.n_src
, reset_less
=True)
207 m
.d
.comb
+= reset
.eq(req_done | self
.go_die_i
)
208 m
.d
.comb
+= rst_r
.eq(self
.issue_i | self
.go_die_i
)
209 m
.d
.comb
+= reset_w
.eq(self
.wr
.go |
Repl(self
.go_die_i
, self
.n_dst
))
210 m
.d
.comb
+= reset_r
.eq(self
.rd
.go |
Repl(self
.go_die_i
, self
.n_src
))
212 # read-done,wr-proceed latch
213 m
.d
.comb
+= rok_l
.s
.eq(self
.issue_i
) # set up when issue starts
214 m
.d
.comb
+= rok_l
.r
.eq(self
.alu
.p
.ready_o
) # off when ALU acknowledges
216 # wr-done, back-to-start latch
217 m
.d
.comb
+= rst_l
.s
.eq(all_rd
) # set when read-phase is fully done
218 m
.d
.comb
+= rst_l
.r
.eq(rst_r
) # *off* on issue
220 # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
221 m
.d
.sync
+= opc_l
.s
.eq(self
.issue_i
) # set on issue
222 m
.d
.sync
+= opc_l
.r
.eq(self
.alu
.n
.valid_o
& req_done
) # reset on ALU
224 # src operand latch (not using go_wr_i)
225 m
.d
.sync
+= src_l
.s
.eq(Repl(self
.issue_i
, self
.n_src
))
226 m
.d
.sync
+= src_l
.r
.eq(reset_r
)
228 # dest operand latch (not using issue_i)
229 m
.d
.sync
+= req_l
.s
.eq(Repl(all_rd
, self
.n_dst
))
230 m
.d
.sync
+= req_l
.r
.eq(reset_w
)
232 # create a latch/register for the operand
233 oper_r
= self
.opsubsetkls()
234 latchregister(m
, self
.oper_i
, oper_r
, self
.issue_i
, "oper_r")
236 # and for each output from the ALU
238 for i
in range(self
.n_dst
):
239 name
= "data_r%d" % i
240 data_r
= Signal(self
.cu
._get
_srcwid
(i
), name
=name
, reset_less
=True)
241 latchregister(m
, self
.get_out(i
), data_r
, req_l
.q
[i
], name
)
244 # pass the operation to the ALU
245 m
.d
.comb
+= self
.get_op().eq(oper_r
)
247 # create list of src/alu-src/src-latch. override 1st and 2nd one below.
248 # in the case, for ALU and Logical pipelines, we assume RB is the 2nd operand
249 # in the input "regspec". see for example soc.fu.alu.pipe_data.ALUInputData
250 # TODO: assume RA is the 1st operand, zero_a detection is needed.
252 for i
in range(self
.n_src
):
253 sl
.append([self
.src_i
[i
], self
.get_in(i
), src_l
.q
[i
]])
255 # if the operand subset has "zero_a" we implicitly assume that means
256 # src_i[0] is an INT register type where zero can be multiplexed in, instead.
257 # see https://bugs.libre-soc.org/show_bug.cgi?id=336
258 #if hasattr(oper_r, "zero_a"):
259 # select zero immediate if opcode says so. however also change the latch
260 # to trigger *from* the opcode latch instead.
264 # if the operand subset has "imm_data" we implicitly assume that means
265 # "this is an INT ALU/Logical FU jobbie, RB is multiplexed with the immediate"
266 if hasattr(oper_r
, "imm_data"):
267 # select immediate if opcode says so. however also change the latch
268 # to trigger *from* the opcode latch instead.
269 op_is_imm
= oper_r
.imm_data
.imm_ok
270 src2_or_imm
= Signal(self
.cu
._get
_srcwid
(1), reset_less
=True)
271 src_sel
= Signal(reset_less
=True)
272 m
.d
.comb
+= src_sel
.eq(Mux(op_is_imm
, opc_l
.q
, src_l
.q
[1]))
273 m
.d
.comb
+= src2_or_imm
.eq(Mux(op_is_imm
, oper_r
.imm_data
.imm
,
275 # overwrite 2nd src-latch with immediate-muxed stuff
276 sl
[1][0] = src2_or_imm
279 # create a latch/register for src1/src2 (even if it is a copy of an immediate)
280 for i
in range(self
.n_src
):
281 src
, alusrc
, latch
= sl
[i
]
282 latchregister(m
, src
, alusrc
, latch
, name
="src_r%d" % i
)
288 # all request signals gated by busy_o. prevents picker problems
289 m
.d
.comb
+= self
.busy_o
.eq(opc_l
.q
) # busy out
290 bro
= Repl(self
.busy_o
, self
.n_src
)
291 m
.d
.comb
+= self
.rd
.rel
.eq(src_l
.q
& bro
) # src1/src2 req rel
293 # on a go_read, tell the ALU we're accepting data.
294 # NOTE: this spells TROUBLE if the ALU isn't ready!
295 # go_read is only valid for one clock!
296 with m
.If(all_rd
): # src operands ready, GO!
297 with m
.If(~self
.alu
.p
.ready_o
): # no ACK yet
298 m
.d
.comb
+= self
.alu
.p
.valid_i
.eq(1) # so indicate valid
300 brd
= Repl(self
.busy_o
& self
.shadown_i
, self
.n_dst
)
301 # only proceed if ALU says its output is valid
302 with m
.If(self
.alu
.n
.valid_o
):
303 # when ALU ready, write req release out. waits for shadow
304 m
.d
.comb
+= self
.wr
.rel
.eq(req_l
.q
& brd
)
305 # when output latch is ready, and ALU says ready, accept ALU output
307 m
.d
.comb
+= self
.alu
.n
.ready_i
.eq(1) # tells ALU "thanks got it"
309 # output the data from the latch on go_write
310 for i
in range(self
.n_dst
):
311 with m
.If(self
.wr
.go
[i
]):
312 m
.d
.comb
+= self
.dest
[i
].eq(drl
[i
])
322 yield from self
.oper_i
.ports()
334 def op_sim(dut
, a
, b
, op
, inv_a
=0, imm
=0, imm_ok
=0):
335 yield dut
.issue_i
.eq(0)
337 yield dut
.src_i
[0].eq(a
)
338 yield dut
.src_i
[1].eq(b
)
339 yield dut
.oper_i
.insn_type
.eq(op
)
340 yield dut
.oper_i
.invert_a
.eq(inv_a
)
341 yield dut
.oper_i
.imm_data
.imm
.eq(imm
)
342 yield dut
.oper_i
.imm_data
.imm_ok
.eq(imm_ok
)
343 yield dut
.issue_i
.eq(1)
345 yield dut
.issue_i
.eq(0)
347 yield dut
.rd
.go
.eq(0b11)
350 rd_rel_o
= yield dut
.rd
.rel
351 print ("rd_rel", rd_rel_o
)
355 yield dut
.rd
.go
.eq(0)
356 req_rel_o
= yield dut
.wr
.rel
357 result
= yield dut
.data_o
358 print ("req_rel", req_rel_o
, result
)
360 req_rel_o
= yield dut
.wr
.rel
361 result
= yield dut
.data_o
362 print ("req_rel", req_rel_o
, result
)
366 yield dut
.wr
.go
[0].eq(1)
368 result
= yield dut
.data_o
369 print ("result", result
)
370 yield dut
.wr
.go
[0].eq(0)
375 def scoreboard_sim(dut
):
376 result
= yield from op_sim(dut
, 5, 2, InternalOp
.OP_ADD
, inv_a
=0,
380 result
= yield from op_sim(dut
, 5, 2, InternalOp
.OP_ADD
)
383 result
= yield from op_sim(dut
, 5, 2, InternalOp
.OP_ADD
, inv_a
=1)
384 assert result
== 65532
388 from alu_hier
import ALU
389 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
393 dut
= MultiCompUnit(16, alu
, CompALUOpSubset
)
394 m
.submodules
.cu
= dut
396 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
397 with
open("test_compunit1.il", "w") as f
:
400 run_simulation(m
, scoreboard_sim(dut
), vcd_name
='test_compunit1.vcd')
403 def test_compunit_regspec1():
404 from alu_hier
import ALU
405 from soc
.fu
.alu
.alu_input_record
import CompALUOpSubset
407 inspec
= [('INT', 'a', '0:15'),
408 ('INT', 'b', '0:15')]
409 outspec
= [('INT', 'o', '0:15'),
412 regspec
= (inspec
, outspec
)
416 dut
= MultiCompUnit(regspec
, alu
, CompALUOpSubset
)
417 m
.submodules
.cu
= dut
419 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
420 with
open("test_compunit_regspec1.il", "w") as f
:
423 run_simulation(m
, scoreboard_sim(dut
), vcd_name
='test_compunit1.vcd')
426 if __name__
== '__main__':
428 test_compunit_regspec1()