3b195d240c925ab9cdebc1278a0a6eb43a9fc071
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal
23 from nmigen.cli import rtlil
24
25 from nmutil.picker import PriorityPicker
26 from nmutil.util import treereduce
27
28 from soc.fu.compunits.compunits import AllFunctionUnits
29 from soc.regfile.regfiles import RegFiles
30 from soc.decoder.power_decoder import create_pdecode
31 from soc.decoder.power_decoder2 import PowerDecode2
32 from soc.decoder.decode2execute1 import Data
33 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
34 from soc.config.test.test_loadstore import TestMemPspec
35 from soc.decoder.power_enums import MicrOp
36 import operator
37
38
39 # helper function for reducing a list of signals down to a parallel
40 # ORed single signal.
41 def ortreereduce(tree, attr="data_o"):
42 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
43
44
45 def ortreereduce_sig(tree):
46 return treereduce(tree, operator.or_, lambda x: x)
47
48
49 # helper function to place full regs declarations first
50 def sort_fuspecs(fuspecs):
51 res = []
52 for (regname, fspec) in fuspecs.items():
53 if regname.startswith("full"):
54 res.append((regname, fspec))
55 for (regname, fspec) in fuspecs.items():
56 if not regname.startswith("full"):
57 res.append((regname, fspec))
58 return res # enumerate(res)
59
60
61 class NonProductionCore(Elaboratable):
62 def __init__(self, pspec):
63 # single LD/ST funnel for memory access
64 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
65 pi = self.l0.l0.dports[0]
66
67 # function units (only one each)
68 self.fus = AllFunctionUnits(pspec, pilist=[pi])
69
70 # register files (yes plural)
71 self.regs = RegFiles()
72
73 # instruction decoder
74 pdecode = create_pdecode()
75 self.pdecode2 = PowerDecode2(pdecode) # instruction decoder
76
77 # issue/valid/busy signalling
78 self.ivalid_i = self.pdecode2.valid # instruction is valid
79 self.issue_i = Signal(reset_less=True)
80 self.busy_o = Signal(name="corebusy_o", reset_less=True)
81
82 # instruction input
83 self.bigendian_i = self.pdecode2.dec.bigendian
84 self.raw_opcode_i = self.pdecode2.dec.raw_opcode_in
85
86 # start/stop and terminated signalling
87 self.core_stopped_i = Signal(reset_less=True)
88 self.core_reset_i = Signal()
89 self.core_terminate_o = Signal(reset=0) # indicates stopped
90
91 def elaborate(self, platform):
92 m = Module()
93
94 m.submodules.pdecode2 = dec2 = self.pdecode2
95 m.submodules.fus = self.fus
96 m.submodules.l0 = l0 = self.l0
97 self.regs.elaborate_into(m, platform)
98 regs = self.regs
99 fus = self.fus.fus
100
101 # connect up Function Units, then read/write ports
102 fu_bitdict = self.connect_instruction(m)
103 self.connect_rdports(m, fu_bitdict)
104 self.connect_wrports(m, fu_bitdict)
105
106 # connect up reset
107 m.d.comb += ResetSignal().eq(self.core_reset_i)
108
109 return m
110
111 def connect_instruction(self, m):
112 """connect_instruction
113
114 uses decoded (from PowerOp) function unit information from CSV files
115 to ascertain which Function Unit should deal with the current
116 instruction.
117
118 some (such as OP_ATTN, OP_NOP) are dealt with here, including
119 ignoring it and halting the processor. OP_NOP is a bit annoying
120 because the issuer expects busy flag still to be raised then lowered.
121 (this requires a fake counter to be set).
122 """
123 comb, sync = m.d.comb, m.d.sync
124 fus = self.fus.fus
125 dec2 = self.pdecode2
126
127 # enable-signals for each FU, get one bit for each FU (by name)
128 fu_enable = Signal(len(fus), reset_less=True)
129 fu_bitdict = {}
130 for i, funame in enumerate(fus.keys()):
131 fu_bitdict[funame] = fu_enable[i]
132 # only run when allowed and when instruction is valid
133 can_run = Signal(reset_less=True)
134 comb += can_run.eq(self.ivalid_i & ~self.core_stopped_i)
135
136 # enable the required Function Unit based on the opcode decode
137 # note: this *only* works correctly for simple core when one and
138 # *only* one FU is allocated per instruction
139 for funame, fu in fus.items():
140 fnunit = fu.fnunit.value
141 enable = Signal(name="en_%s" % funame, reset_less=True)
142 comb += enable.eq((dec2.e.do.fn_unit & fnunit).bool())
143 comb += fu_bitdict[funame].eq(enable)
144
145 # sigh - need a NOP counter
146 counter = Signal(2)
147 with m.If(counter != 0):
148 sync += counter.eq(counter - 1)
149 comb += self.busy_o.eq(1)
150
151 with m.If(can_run):
152 with m.Switch(dec2.e.do.insn_type):
153 # check for ATTN: halt if true
154 with m.Case(MicrOp.OP_ATTN):
155 m.d.sync += self.core_terminate_o.eq(1)
156
157 with m.Case(MicrOp.OP_NOP):
158 sync += counter.eq(2)
159 comb += self.busy_o.eq(1)
160
161 with m.Default():
162 # connect up instructions. only one enabled at a time
163 for funame, fu in fus.items():
164 enable = fu_bitdict[funame]
165
166 # run this FunctionUnit if enabled
167 with m.If(enable):
168 # route op, issue, busy, read flags and mask to FU
169 comb += fu.oper_i.eq_from_execute1(dec2.e)
170 comb += fu.issue_i.eq(self.issue_i)
171 comb += self.busy_o.eq(fu.busy_o)
172 rdmask = dec2.rdflags(fu)
173 comb += fu.rdmaskn.eq(~rdmask)
174
175 return fu_bitdict
176
177 def connect_rdports(self, m, fu_bitdict):
178 """connect read ports
179
180 orders the read regspecs into a dict-of-dicts, by regfile, by
181 regport name, then connects all FUs that want that regport by
182 way of a PriorityPicker.
183 """
184 comb, sync = m.d.comb, m.d.sync
185 fus = self.fus.fus
186 regs = self.regs
187
188 # dictionary of lists of regfile read ports
189 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
190
191 # okaay, now we need a PriorityPicker per regfile per regfile port
192 # loootta pickers... peter piper picked a pack of pickled peppers...
193 rdpickers = {}
194 for regfile, spec in byregfiles_rd.items():
195 fuspecs = byregfiles_rdspec[regfile]
196 rdpickers[regfile] = {}
197
198 # for each named regfile port, connect up all FUs to that port
199 for (regname, fspec) in sort_fuspecs(fuspecs):
200 print("connect rd", regname, fspec)
201 rpidx = regname
202 # get the regfile specs for this regfile port
203 (rf, read, write, wid, fuspec) = fspec
204 name = "rdflag_%s_%s" % (regfile, regname)
205 rdflag = Signal(name=name, reset_less=True)
206 comb += rdflag.eq(rf)
207
208 # select the required read port. these are pre-defined sizes
209 print(rpidx, regfile, regs.rf.keys())
210 rport = regs.rf[regfile.lower()].r_ports[rpidx]
211
212 # create a priority picker to manage this port
213 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(
214 len(fuspec))
215 setattr(m.submodules, "rdpick_%s_%s" %
216 (regfile, rpidx), rdpick)
217
218 # connect the regspec "reg select" number to this port
219 with m.If(rdpick.en_o):
220 comb += rport.ren.eq(read)
221
222 # connect up the FU req/go signals, and the reg-read to the FU
223 # and create a Read Broadcast Bus
224 for pi, (funame, fu, idx) in enumerate(fuspec):
225 src = fu.src_i[idx]
226
227 # connect request-read to picker input, and output to go-rd
228 fu_active = fu_bitdict[funame]
229 pick = fu.rd_rel_o[idx] & fu_active & rdflag
230 comb += rdpick.i[pi].eq(pick)
231 comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
232
233 # connect regfile port to input, creating a Broadcast Bus
234 print("reg connect widths",
235 regfile, regname, pi, funame,
236 src.shape(), rport.data_o.shape())
237 # all FUs connect to same port
238 comb += src.eq(rport.data_o)
239
240 def connect_wrports(self, m, fu_bitdict):
241 """connect write ports
242
243 orders the write regspecs into a dict-of-dicts, by regfile,
244 by regport name, then connects all FUs that want that regport
245 by way of a PriorityPicker.
246
247 note that the write-port wen, write-port data, and go_wr_i all need to
248 be on the exact same clock cycle. as there is a combinatorial loop bug
249 at the moment, these all use sync.
250 """
251 comb, sync = m.d.comb, m.d.sync
252 fus = self.fus.fus
253 regs = self.regs
254 # dictionary of lists of regfile write ports
255 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
256
257 # same for write ports.
258 # BLECH! complex code-duplication! BLECH!
259 wrpickers = {}
260 for regfile, spec in byregfiles_wr.items():
261 fuspecs = byregfiles_wrspec[regfile]
262 wrpickers[regfile] = {}
263 for (regname, fspec) in sort_fuspecs(fuspecs):
264 print("connect wr", regname, fspec)
265 rpidx = regname
266 # get the regfile specs for this regfile port
267 (rf, read, write, wid, fuspec) = fspec
268
269 # select the required write port. these are pre-defined sizes
270 print(regfile, regs.rf.keys())
271 wport = regs.rf[regfile.lower()].w_ports[rpidx]
272
273 # create a priority picker to manage this port
274 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(
275 len(fuspec))
276 setattr(m.submodules, "wrpick_%s_%s" %
277 (regfile, rpidx), wrpick)
278
279 # connect the regspec write "reg select" number to this port
280 # only if one FU actually requests (and is granted) the port
281 # will the write-enable be activated
282 with m.If(wrpick.en_o):
283 comb += wport.wen.eq(write)
284 with m.Else():
285 comb += wport.wen.eq(0)
286
287 # connect up the FU req/go signals and the reg-read to the FU
288 # these are arbitrated by Data.ok signals
289 wsigs = []
290 for pi, (funame, fu, idx) in enumerate(fuspec):
291 # write-request comes from dest.ok
292 dest = fu.get_out(idx)
293 fu_dest_latch = fu.get_fu_out(idx) # latched output
294 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
295 wrflag = Signal(name=name, reset_less=True)
296 comb += wrflag.eq(dest.ok & fu.busy_o)
297
298 # connect request-read to picker input, and output to go-wr
299 fu_active = fu_bitdict[funame]
300 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
301 comb += wrpick.i[pi].eq(pick)
302 comb += fu.go_wr_i[idx].eq(wrpick.o[pi] & wrpick.en_o)
303 # connect regfile port to input
304 print("reg connect widths",
305 regfile, regname, pi, funame,
306 dest.shape(), wport.data_i.shape())
307 wsigs.append(fu_dest_latch)
308
309 # here is where we create the Write Broadcast Bus. simple, eh?
310 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
311
312 def get_byregfiles(self, readmode):
313
314 mode = "read" if readmode else "write"
315 dec2 = self.pdecode2
316 regs = self.regs
317 fus = self.fus.fus
318
319 # dictionary of lists of regfile ports
320 byregfiles = {}
321 byregfiles_spec = {}
322 for (funame, fu) in fus.items():
323 print("%s ports for %s" % (mode, funame))
324 for idx in range(fu.n_src if readmode else fu.n_dst):
325 if readmode:
326 (regfile, regname, wid) = fu.get_in_spec(idx)
327 else:
328 (regfile, regname, wid) = fu.get_out_spec(idx)
329 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
330 if readmode:
331 rdflag, read = dec2.regspecmap_read(regfile, regname)
332 write = None
333 else:
334 rdflag, read = None, None
335 wrport, write = dec2.regspecmap_write(regfile, regname)
336 if regfile not in byregfiles:
337 byregfiles[regfile] = {}
338 byregfiles_spec[regfile] = {}
339 if regname not in byregfiles_spec[regfile]:
340 byregfiles_spec[regfile][regname] = \
341 [rdflag, read, write, wid, []]
342 # here we start to create "lanes"
343 if idx not in byregfiles[regfile]:
344 byregfiles[regfile][idx] = []
345 fuspec = (funame, fu, idx)
346 byregfiles[regfile][idx].append(fuspec)
347 byregfiles_spec[regfile][regname][4].append(fuspec)
348
349 # ok just print that out, for convenience
350 for regfile, spec in byregfiles.items():
351 print("regfile %s ports:" % mode, regfile)
352 fuspecs = byregfiles_spec[regfile]
353 for regname, fspec in fuspecs.items():
354 [rdflag, read, write, wid, fuspec] = fspec
355 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
356 print(" %s" % regname, wid, read, write, rdflag)
357 for (funame, fu, idx) in fuspec:
358 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
359 print(" ", funame, fu, idx, fusig)
360 print()
361
362 return byregfiles, byregfiles_spec
363
364 def __iter__(self):
365 yield from self.fus.ports()
366 yield from self.pdecode2.ports()
367 yield from self.l0.ports()
368 # TODO: regs
369
370 def ports(self):
371 return list(self)
372
373
374 if __name__ == '__main__':
375 pspec = TestMemPspec(ldst_ifacetype='testpi',
376 imem_ifacetype='',
377 addr_wid=48,
378 mask_wid=8,
379 reg_wid=64)
380 dut = NonProductionCore(pspec)
381 vl = rtlil.convert(dut, ports=dut.ports())
382 with open("test_core.il", "w") as f:
383 f.write(vl)