1 from nmigen
.compat
.sim
import run_simulation
2 from nmigen
.cli
import verilog
, rtlil
3 from nmigen
import Module
, Const
, Signal
, Array
, Cat
, Elaboratable
5 from regfile
.regfile
import RegFileArray
, treereduce
6 from scoreboard
.fn_unit
import IntFnUnit
, FPFnUnit
, LDFnUnit
, STFnUnit
7 from scoreboard
.fu_fu_matrix
import FUFUDepMatrix
8 from scoreboard
.fu_reg_matrix
import FURegDepMatrix
9 from scoreboard
.global_pending
import GlobalPending
10 from scoreboard
.group_picker
import GroupPicker
11 from scoreboard
.issue_unit
import IntFPIssueUnit
13 from compalu
import ComputationUnitNoDelay
15 from alu_hier
import ALU
16 from nmutil
.latch
import SRLatch
19 class Scoreboard(Elaboratable
):
20 def __init__(self
, rwid
, n_regs
):
23 * :rwid: bit width of register file(s) - both FP and INT
24 * :n_regs: depth of register file(s) - number of FP and INT regs
30 self
.intregs
= RegFileArray(rwid
, n_regs
)
31 self
.fpregs
= RegFileArray(rwid
, n_regs
)
34 self
.int_store_i
= Signal(reset_less
=True) # instruction is a store
35 self
.int_dest_i
= Signal(max=n_regs
, reset_less
=True) # Dest R# in
36 self
.int_src1_i
= Signal(max=n_regs
, reset_less
=True) # oper1 R# in
37 self
.int_src2_i
= Signal(max=n_regs
, reset_less
=True) # oper2 R# in
39 self
.issue_o
= Signal(reset_less
=True) # instruction was accepted
41 def elaborate(self
, platform
):
44 m
.submodules
.intregs
= self
.intregs
45 m
.submodules
.fpregs
= self
.fpregs
48 int_dest
= self
.intregs
.write_port("dest")
49 int_src1
= self
.intregs
.read_port("src1")
50 int_src2
= self
.intregs
.read_port("src2")
52 fp_dest
= self
.fpregs
.write_port("dest")
53 fp_src1
= self
.fpregs
.read_port("src1")
54 fp_src2
= self
.fpregs
.read_port("src2")
59 m
.submodules
.comp1
= comp1
= ComputationUnitNoDelay(self
.rwid
, 1, add
)
60 m
.submodules
.comp2
= comp2
= ComputationUnitNoDelay(self
.rwid
, 1, sub
)
61 int_alus
= [comp1
, comp2
]
63 m
.d
.comb
+= comp1
.oper_i
.eq(Const(0)) # temporary/experiment: op=add
64 m
.d
.comb
+= comp2
.oper_i
.eq(Const(1)) # temporary/experiment: op=sub
70 for i
, a
in enumerate(int_alus
):
71 # set up Integer Function Unit, add to module (and python list)
72 fu
= IntFnUnit(self
.n_regs
, shadow_wid
=0)
73 setattr(m
.submodules
, "intfu%d" % i
, fu
)
75 # collate the read/write pending vectors (to go into global pending)
76 int_rd_pend_v
.append(fu
.int_rd_pend_o
)
77 int_wr_pend_v
.append(fu
.int_wr_pend_o
)
80 # Count of number of FUs
82 n_fp_fus
= 0 # for now
84 n_fus
= n_int_fus
+ n_fp_fus
# plus FP FUs
86 # XXX replaced by array of FUs? *FnUnit
87 # # Integer FU-FU Dep Matrix
88 # m.submodules.intfudeps = FUFUDepMatrix(n_int_fus, n_int_fus)
89 # Integer FU-Reg Dep Matrix
90 # intregdeps = FURegDepMatrix(self.n_regs, n_int_fus)
91 # m.submodules.intregdeps = intregdeps
93 # Integer Priority Picker 1: Adder + Subtractor
94 intpick1
= GroupPicker(2) # picks between add and sub
95 m
.submodules
.intpick1
= intpick1
97 # Global Pending Vectors (INT and FP)
98 # NOTE: number of vectors is NOT same as number of FUs.
99 g_int_rd_pend_v
= GlobalPending(self
.n_regs
, int_rd_pend_v
)
100 g_int_wr_pend_v
= GlobalPending(self
.n_regs
, int_wr_pend_v
)
101 m
.submodules
.g_int_rd_pend_v
= g_int_rd_pend_v
102 m
.submodules
.g_int_wr_pend_v
= g_int_wr_pend_v
105 issueunit
= IntFPIssueUnit(self
.n_regs
, n_int_fus
, n_fp_fus
)
106 m
.submodules
.issueunit
= issueunit
109 # ok start wiring things together...
110 # "now hear de word of de looord... dem bones dem bones dem dryy bones"
111 # https://www.youtube.com/watch?v=pYb8Wm6-QfA
115 # Issue Unit is where it starts. set up some in/outs for this module
117 m
.d
.comb
+= [issueunit
.i
.store_i
.eq(self
.int_store_i
),
118 issueunit
.i
.dest_i
.eq(self
.int_dest_i
),
119 issueunit
.i
.src1_i
.eq(self
.int_src1_i
),
120 issueunit
.i
.src2_i
.eq(self
.int_src2_i
),
121 self
.issue_o
.eq(issueunit
.issue_o
)
123 self
.int_insn_i
= issueunit
.i
.insn_i
# enabled by instruction decode
125 # connect global rd/wr pending vectors
126 m
.d
.comb
+= issueunit
.i
.g_wr_pend_i
.eq(g_int_wr_pend_v
.g_pend_o
)
127 # TODO: issueunit.f (FP)
129 # and int function issue / busy arrays, and dest/src1/src2
132 for i
, fu
in enumerate(il
):
133 fn_issue_l
.append(fu
.issue_i
)
134 fn_busy_l
.append(fu
.busy_o
)
135 # XXX sync, so as to stop a simulation infinite loop
136 m
.d
.sync
+= fu
.issue_i
.eq(issueunit
.i
.fn_issue_o
[i
])
137 m
.d
.sync
+= fu
.dest_i
.eq(issueunit
.i
.dest_i
)
138 m
.d
.sync
+= fu
.src1_i
.eq(issueunit
.i
.src1_i
)
139 m
.d
.sync
+= fu
.src2_i
.eq(issueunit
.i
.src2_i
)
140 m
.d
.sync
+= issueunit
.i
.busy_i
[i
].eq(fu
.busy_o
)
143 # connect Function Units
146 # Group Picker... done manually for now. TODO: cat array of pick sigs
147 m
.d
.comb
+= il
[0].go_rd_i
.eq(intpick1
.go_rd_o
[0]) # add rd
148 m
.d
.comb
+= il
[0].go_wr_i
.eq(intpick1
.go_wr_o
[0]) # add wr
150 m
.d
.comb
+= il
[1].go_rd_i
.eq(intpick1
.go_rd_o
[1]) # subtract rd
151 m
.d
.comb
+= il
[1].go_wr_i
.eq(intpick1
.go_wr_o
[1]) # subtract wr
153 # Connect INT Fn Unit global wr/rd pending
155 m
.d
.comb
+= fu
.g_int_wr_pend_i
.eq(g_int_wr_pend_v
.g_pend_o
)
156 m
.d
.comb
+= fu
.g_int_rd_pend_i
.eq(g_int_rd_pend_v
.g_pend_o
)
160 m
.d
.comb
+= intpick1
.req_rel_i
[0].eq(int_alus
[0].req_rel_o
)
161 m
.d
.comb
+= intpick1
.req_rel_i
[1].eq(int_alus
[1].req_rel_o
)
162 m
.d
.comb
+= intpick1
.readable_i
[0].eq(il
[0].int_readable_o
) # add rdable
163 m
.d
.comb
+= intpick1
.writable_i
[0].eq(il
[0].int_writable_o
) # add rdable
164 m
.d
.comb
+= intpick1
.readable_i
[1].eq(il
[1].int_readable_o
) # sub rdable
165 m
.d
.comb
+= intpick1
.writable_i
[1].eq(il
[1].int_writable_o
) # sub rdable
168 # Connect Register File(s)
170 m
.d
.comb
+= int_dest
.wen
.eq(g_int_wr_pend_v
.g_pend_o
)
171 m
.d
.comb
+= int_src1
.ren
.eq(g_int_rd_pend_v
.g_pend_o
)
172 m
.d
.comb
+= int_src2
.ren
.eq(g_int_rd_pend_v
.g_pend_o
)
174 # merge (OR) all integer FU / ALU outputs to a single value
175 # bit of a hack: treereduce needs a list with an item named "dest_o"
176 dest_o
= treereduce(int_alus
)
177 m
.d
.comb
+= int_dest
.data_i
.eq(dest_o
)
180 for i
, alu
in enumerate(int_alus
):
181 m
.d
.comb
+= alu
.go_rd_i
.eq(il
[i
].go_rd_i
) # chained from intpick
182 m
.d
.comb
+= alu
.go_wr_i
.eq(il
[i
].go_wr_i
) # chained from intpick
183 m
.d
.comb
+= alu
.issue_i
.eq(fn_issue_l
[i
])
184 #m.d.comb += fn_busy_l[i].eq(alu.busy_o) # XXX ignore, use fnissue
185 m
.d
.comb
+= alu
.src1_i
.eq(int_src1
.data_o
)
186 m
.d
.comb
+= alu
.src2_i
.eq(int_src2
.data_o
)
187 m
.d
.comb
+= il
[i
].req_rel_i
.eq(alu
.req_rel_o
) # pipe out ready
193 yield from self
.intregs
194 yield from self
.fpregs
195 yield self
.int_store_i
196 yield self
.int_dest_i
197 yield self
.int_src1_i
198 yield self
.int_src2_i
200 #yield from self.int_src1
201 #yield from self.int_dest
202 #yield from self.int_src1
203 #yield from self.int_src2
204 #yield from self.fp_dest
205 #yield from self.fp_src1
206 #yield from self.fp_src2
214 def int_instr(dut
, op
, src1
, src2
, dest
):
215 yield dut
.int_dest_i
.eq(dest
)
216 yield dut
.int_src1_i
.eq(src1
)
217 yield dut
.int_src2_i
.eq(src2
)
218 yield dut
.int_insn_i
[op
].eq(1)
220 def print_reg(dut
, rnum
):
221 reg
= yield dut
.intregs
.regs
[5].reg
222 print ("reg %d: %x" % (rnum
, reg
))
224 def scoreboard_sim(dut
):
225 for i
in range(1, dut
.n_regs
):
226 yield dut
.intregs
.regs
[i
].reg
.eq(i
)
228 yield from int_instr(dut
, IADD
, 4, 1, 5)
229 yield from print_reg(dut
, 5)
231 yield from print_reg(dut
, 5)
233 yield from print_reg(dut
, 5)
235 yield from print_reg(dut
, 5)
237 yield from print_reg(dut
, 5)
241 def test_scoreboard():
242 dut
= Scoreboard(32, 8)
243 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
244 with
open("test_scoreboard.il", "w") as f
:
247 run_simulation(dut
, scoreboard_sim(dut
), vcd_name
='test_scoreboard.vcd')
250 if __name__
== '__main__':