66f1245e93db3591bbdfcb8a2ee7ba106f4fa847
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
22 from nmigen
import Elaboratable
, Module
, Signal
23 from nmigen
.cli
import rtlil
25 from nmutil
.picker
import PriorityPicker
26 from nmutil
.util
import treereduce
28 from soc
.fu
.compunits
.compunits
import AllFunctionUnits
29 from soc
.regfile
.regfiles
import RegFiles
30 from soc
.decoder
.power_decoder
import create_pdecode
31 from soc
.decoder
.power_decoder2
import PowerDecode2
32 from soc
.decoder
.decode2execute1
import Data
33 from soc
.experiment
.l0_cache
import TstL0CacheBuffer
# test only
34 from soc
.experiment
.testmem
import TestMemory
# test only for instructions
35 from soc
.regfile
.regfiles
import FastRegs
39 # helper function for reducing a list of signals down to a parallel
41 def ortreereduce(tree
, attr
="data_o"):
42 return treereduce(tree
, operator
.or_
, lambda x
: getattr(x
, attr
))
44 # helper function to place full regs declarations first
45 def sort_fuspecs(fuspecs
):
47 for (regname
, fspec
) in fuspecs
.items():
48 if regname
.startswith("full"):
49 res
.append((regname
, fspec
))
50 for (regname
, fspec
) in fuspecs
.items():
51 if not regname
.startswith("full"):
52 res
.append((regname
, fspec
))
53 return res
# enumerate(res)
56 class NonProductionCore(Elaboratable
):
57 def __init__(self
, addrwid
=6, idepth
=16):
58 # single LD/ST funnel for memory access
59 self
.l0
= TstL0CacheBuffer(n_units
=1, regwid
=64, addrwid
=addrwid
)
60 pi
= self
.l0
.l0
.dports
[0].pi
62 # function units (only one each)
63 self
.fus
= AllFunctionUnits(pilist
=[pi
], addrwid
=addrwid
)
65 # register files (yes plural)
66 self
.regs
= RegFiles()
69 pdecode
= create_pdecode()
70 self
.pdecode2
= PowerDecode2(pdecode
) # instruction decoder
72 # issue/valid/busy signalling
73 self
.ivalid_i
= self
.pdecode2
.e
.valid
# instruction is valid
74 self
.issue_i
= Signal(reset_less
=True)
75 self
.busy_o
= Signal(reset_less
=True)
78 self
.bigendian_i
= self
.pdecode2
.dec
.bigendian
79 self
.raw_opcode_i
= self
.pdecode2
.dec
.raw_opcode_in
81 def elaborate(self
, platform
):
84 m
.submodules
.pdecode2
= dec2
= self
.pdecode2
85 m
.submodules
.fus
= self
.fus
86 m
.submodules
.l0
= l0
= self
.l0
87 self
.regs
.elaborate_into(m
, platform
)
91 fu_bitdict
= self
.connect_instruction(m
)
92 self
.connect_rdports(m
, fu_bitdict
)
93 self
.connect_wrports(m
, fu_bitdict
)
97 def connect_instruction(self
, m
):
98 comb
, sync
= m
.d
.comb
, m
.d
.sync
102 # enable-signals for each FU, get one bit for each FU (by name)
103 fu_enable
= Signal(len(fus
), reset_less
=True)
105 for i
, funame
in enumerate(fus
.keys()):
106 fu_bitdict
[funame
] = fu_enable
[i
]
108 # connect up instructions. only one is enabled at any given time
109 for funame
, fu
in fus
.items():
110 fnunit
= fu
.fnunit
.value
111 enable
= Signal(name
="en_%s" % funame
, reset_less
=True)
112 comb
+= enable
.eq(self
.ivalid_i
& (dec2
.e
.fn_unit
& fnunit
).bool())
114 comb
+= fu
.oper_i
.eq_from_execute1(dec2
.e
)
115 comb
+= fu
.issue_i
.eq(self
.issue_i
)
116 comb
+= self
.busy_o
.eq(fu
.busy_o
)
117 rdmask
= dec2
.rdflags(fu
)
118 comb
+= fu
.rdmaskn
.eq(~rdmask
)
119 comb
+= fu_bitdict
[funame
].eq(enable
)
123 def connect_rdports(self
, m
, fu_bitdict
):
124 """connect read ports
126 orders the read regspecs into a dict-of-dicts, by regfile, by
127 regport name, then connects all FUs that want that regport by
128 way of a PriorityPicker.
130 comb
, sync
= m
.d
.comb
, m
.d
.sync
134 # dictionary of lists of regfile read ports
135 byregfiles_rd
, byregfiles_rdspec
= self
.get_byregfiles(True)
137 # okaay, now we need a PriorityPicker per regfile per regfile port
138 # loootta pickers... peter piper picked a pack of pickled peppers...
140 for regfile
, spec
in byregfiles_rd
.items():
141 fuspecs
= byregfiles_rdspec
[regfile
]
142 rdpickers
[regfile
] = {}
144 # for each named regfile port, connect up all FUs to that port
145 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
146 print ("connect rd", regname
, fspec
)
148 # get the regfile specs for this regfile port
149 (rf
, read
, write
, wid
, fuspec
) = fspec
150 name
= "rdflag_%s_%s" % (regfile
, regname
)
151 rdflag
= Signal(name
=name
, reset_less
=True)
152 comb
+= rdflag
.eq(rf
)
154 # select the required read port. these are pre-defined sizes
155 print (rpidx
, regfile
, regs
.rf
.keys())
156 rport
= regs
.rf
[regfile
.lower()].r_ports
[rpidx
]
158 # create a priority picker to manage this port
159 rdpickers
[regfile
][rpidx
] = rdpick
= PriorityPicker(len(fuspec
))
160 setattr(m
.submodules
, "rdpick_%s_%s" % (regfile
, rpidx
), rdpick
)
162 # connect the regspec "reg select" number to this port
163 with m
.If(rdpick
.en_o
):
164 comb
+= rport
.ren
.eq(read
)
166 # connect up the FU req/go signals, and the reg-read to the FU
167 # and create a Read Broadcast Bus
168 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
171 # connect request-read to picker input, and output to go-rd
172 fu_active
= fu_bitdict
[funame
]
173 pick
= fu
.rd_rel_o
[idx
] & fu_active
& rdflag
174 comb
+= rdpick
.i
[pi
].eq(pick
)
175 comb
+= fu
.go_rd_i
[idx
].eq(rdpick
.o
[pi
])
177 # connect regfile port to input, creating a Broadcast Bus
178 print ("reg connect widths",
179 regfile
, regname
, pi
, funame
,
180 src
.shape(), rport
.data_o
.shape())
181 comb
+= src
.eq(rport
.data_o
) # all FUs connect to same port
183 def connect_wrports(self
, m
, fu_bitdict
):
184 """connect write ports
186 orders the write regspecs into a dict-of-dicts, by regfile,
187 by regport name, then connects all FUs that want that regport
188 by way of a PriorityPicker.
190 note that the write-port wen, write-port data, and go_wr_i all need to
191 be on the exact same clock cycle. as there is a combinatorial loop bug
192 at the moment, these all use sync.
194 comb
, sync
= m
.d
.comb
, m
.d
.sync
197 # dictionary of lists of regfile write ports
198 byregfiles_wr
, byregfiles_wrspec
= self
.get_byregfiles(False)
200 # same for write ports.
201 # BLECH! complex code-duplication! BLECH!
203 for regfile
, spec
in byregfiles_wr
.items():
204 fuspecs
= byregfiles_wrspec
[regfile
]
205 wrpickers
[regfile
] = {}
206 for (regname
, fspec
) in sort_fuspecs(fuspecs
):
207 print ("connect wr", regname
, fspec
)
209 # get the regfile specs for this regfile port
210 (rf
, read
, write
, wid
, fuspec
) = fspec
212 # select the required write port. these are pre-defined sizes
213 print (regfile
, regs
.rf
.keys())
214 wport
= regs
.rf
[regfile
.lower()].w_ports
[rpidx
]
216 # create a priority picker to manage this port
217 wrpickers
[regfile
][rpidx
] = wrpick
= PriorityPicker(len(fuspec
))
218 setattr(m
.submodules
, "wrpick_%s_%s" % (regfile
, rpidx
), wrpick
)
220 # connect the regspec write "reg select" number to this port
221 # only if one FU actually requests (and is granted) the port
222 # will the write-enable be activated
223 with m
.If(wrpick
.en_o
):
224 sync
+= wport
.wen
.eq(write
)
226 sync
+= wport
.wen
.eq(0)
228 # connect up the FU req/go signals and the reg-read to the FU
229 # these are arbitrated by Data.ok signals
231 for pi
, (funame
, fu
, idx
) in enumerate(fuspec
):
232 # write-request comes from dest.ok
233 dest
= fu
.get_out(idx
)
234 name
= "wrflag_%s_%s_%d" % (funame
, regname
, idx
)
235 wrflag
= Signal(name
=name
, reset_less
=True)
236 comb
+= wrflag
.eq(dest
.ok
)
238 # connect request-read to picker input, and output to go-wr
239 fu_active
= fu_bitdict
[funame
]
240 pick
= fu
.wr
.rel
[idx
] & fu_active
#& wrflag
241 comb
+= wrpick
.i
[pi
].eq(pick
)
242 sync
+= fu
.go_wr_i
[idx
].eq(wrpick
.o
[pi
] & wrpick
.en_o
)
243 # connect regfile port to input
244 print ("reg connect widths",
245 regfile
, regname
, pi
, funame
,
246 dest
.shape(), wport
.data_i
.shape())
249 # here is where we create the Write Broadcast Bus. simple, eh?
250 sync
+= wport
.data_i
.eq(ortreereduce(wsigs
, "data"))
252 def get_byregfiles(self
, readmode
):
254 mode
= "read" if readmode
else "write"
259 # dictionary of lists of regfile ports
262 for (funame
, fu
) in fus
.items():
263 print ("%s ports for %s" % (mode
, funame
))
264 for idx
in range(fu
.n_src
if readmode
else fu
.n_dst
):
266 (regfile
, regname
, wid
) = fu
.get_in_spec(idx
)
268 (regfile
, regname
, wid
) = fu
.get_out_spec(idx
)
269 print (" %d %s %s %s" % (idx
, regfile
, regname
, str(wid
)))
271 rdflag
, read
= dec2
.regspecmap_read(regfile
, regname
)
274 rdflag
, read
= None, None
275 wrport
, write
= dec2
.regspecmap_write(regfile
, regname
)
276 if regfile
not in byregfiles
:
277 byregfiles
[regfile
] = {}
278 byregfiles_spec
[regfile
] = {}
279 if regname
not in byregfiles_spec
[regfile
]:
280 byregfiles_spec
[regfile
][regname
] = \
281 [rdflag
, read
, write
, wid
, []]
282 # here we start to create "lanes"
283 if idx
not in byregfiles
[regfile
]:
284 byregfiles
[regfile
][idx
] = []
285 fuspec
= (funame
, fu
, idx
)
286 byregfiles
[regfile
][idx
].append(fuspec
)
287 byregfiles_spec
[regfile
][regname
][4].append(fuspec
)
289 # ok just print that out, for convenience
290 for regfile
, spec
in byregfiles
.items():
291 print ("regfile %s ports:" % mode
, regfile
)
292 fuspecs
= byregfiles_spec
[regfile
]
293 for regname
, fspec
in fuspecs
.items():
294 [rdflag
, read
, write
, wid
, fuspec
] = fspec
295 print (" rf %s port %s lane: %s" % (mode
, regfile
, regname
))
296 print (" %s" % regname
, wid
, read
, write
, rdflag
)
297 for (funame
, fu
, idx
) in fuspec
:
298 fusig
= fu
.src_i
[idx
] if readmode
else fu
.dest
[idx
]
299 print (" ", funame
, fu
, idx
, fusig
)
302 return byregfiles
, byregfiles_spec
305 yield from self
.fus
.ports()
306 yield from self
.pdecode2
.ports()
313 class TestIssuer(Elaboratable
):
314 """TestIssuer - reads instructions from TestMemory and issues them
316 efficiency and speed is not the main goal here: functional correctness is.
318 def __init__(self
, addrwid
=6, idepth
=16):
319 # main instruction core
320 self
.core
= core
= NonProductionCore(addrwid
)
322 # Test Instruction memory
323 self
.imem
= TestMemory(32, idepth
)
324 self
.i_rd
= self
.imem
.rdport
325 #self.i_wr = self.imem.write_port() errr...
327 # instruction go/monitor
328 self
.go_insn_i
= Signal(reset_less
=True)
329 self
.pc_o
= Signal(64, reset_less
=True)
330 self
.pc_i
= Data(64, "pc") # set "ok" to indicate "please change me"
331 self
.busy_o
= core
.busy_o
332 self
.memerr_o
= Signal(reset_less
=True)
334 # FAST regfile read /write ports
335 self
.fast_rd1
= self
.core
.regs
.rf
['fast'].r_ports
['d_rd1']
336 self
.fast_wr1
= self
.core
.regs
.rf
['fast'].w_ports
['d_wr1']
338 def elaborate(self
, platform
):
340 comb
, sync
= m
.d
.comb
, m
.d
.sync
342 m
.submodules
.core
= core
= self
.core
343 m
.submodules
.imem
= imem
= self
.imem
345 # temporary hack: says "go" immediately for both address gen and ST
347 ldst
= core
.fus
.fus
['ldst0']
348 m
.d
.comb
+= ldst
.ad
.go
.eq(ldst
.ad
.rel
) # link addr-go direct to rel
349 m
.d
.comb
+= ldst
.st
.go
.eq(ldst
.st
.rel
) # link store-go direct to rel
351 # PC and instruction from I-Memory
352 current_insn
= Signal(32) # current fetched instruction (note sync)
353 current_pc
= Signal(64) # current PC (note it is reset/sync)
354 comb
+= self
.pc_o
.eq(current_pc
)
356 # next instruction (+4 on current)
357 nia
= Signal(64, reset_less
=True)
358 comb
+= nia
.eq(current_insn
+ 4)
361 core_busy_o
= core
.busy_o
# core is busy
362 core_ivalid_i
= core
.ivalid_i
# instruction is valid
363 core_issue_i
= core
.issue_i
# instruction is issued
364 core_be_i
= core
.bigendian_i
# bigendian mode
365 core_opcode_i
= core
.raw_opcode_i
# raw opcode
367 # actually use a nmigen FSM for the first time (w00t)
371 with m
.State("IDLE"):
372 with m
.If(self
.go_insn_i
):
373 # instruction allowed to go: start by reading the PC
374 pc
= Signal(64, reset_less
=True)
375 with m
.If(self
.pc_i
.ok
):
376 # incoming override (start from pc_i)
377 comb
+= pc
.eq(self
.pc_i
.data
)
379 # otherwise read FastRegs regfile for PC
380 comb
+= self
.fast_rd1
.ren
.eq(1<<FastRegs
.PC
)
381 comb
+= pc
.eq(self
.fast_rd1
.data_o
)
382 # capture the PC and also drop it into Insn Memory
383 # we have joined a pair of combinatorial memory
384 # lookups together. this is Generally Bad.
385 sync
+= current_pc
.eq(pc
)
386 comb
+= self
.i_rd
.addr
.eq(pc
)
387 #comb += self.i_rd.en.eq(1) # comb-read (no need to set)
388 sync
+= current_insn
.eq(self
.i_rd
.data
)
389 m
.next
= "INSN_READ" # move to "issue" phase
391 # got the instruction: start issue
392 with m
.State("INSN_READ"):
393 sync
+= core_ivalid_i
.eq(1) # say instruction is valid
394 sync
+= core_issue_i
.eq(1) # and issued (ivalid_i redundant)
395 sync
+= core_be_i
.eq(0) # little-endian mode
396 sync
+= core_opcode_i
.eq(current_insn
) # actual opcode
397 m
.next
= "INSN_ACTIVE" # move to "wait for completion" phase
399 # instruction started: must wait till it finishes
400 with m
.State("INSN_ACTIVE"):
401 sync
+= core_issue_i
.eq(0) # issue raises for only one cycle
402 with m
.If(~core_busy_o
): # instruction done!
403 sync
+= core_ivalid_i
.eq(0) # say instruction is invalid
404 sync
+= core_opcode_i
.eq(0) # clear out (no good reason)
405 # ok here we are not reading the branch unit. TODO
406 # this just blithely overwrites whatever pipeline updated
408 comb
+= self
.fast_wr1
.wen
.eq(1<<FastRegs
.PC
)
409 comb
+= self
.fast_wr1
.data_i
.eq(nia
)
410 m
.next
= "IDLE" # back to idle
415 yield from self
.pc_i
.ports()
419 yield from self
.core
.ports()
420 yield from self
.imem
.ports()
426 if __name__
== '__main__':
428 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
429 with
open("test_issuer.il", "w") as f
:
432 dut
= NonProductionCore()
433 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
434 with
open("non_production_core.il", "w") as f
: