1 # Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
4 # For use for simulation and test purposes only
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
9 # 1. Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
12 # 2. Redistributions in binary form must reproduce the above copyright notice,
13 # this list of conditions and the following disclaimer in the documentation
14 # and/or other materials provided with the distribution.
16 # 3. Neither the name of the copyright holder nor the names of its
17 # contributors may be used to endorse or promote products derived from this
18 # software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
36 from m5
.objects
import *
37 from m5
.defines
import buildEnv
38 from m5
.util
import addToPath
39 from Ruby
import create_topology
40 from Ruby
import send_evicts
44 from topologies
.Cluster
import Cluster
45 from topologies
.Crossbar
import Crossbar
51 # Use SeqCount not class since we need global count
53 return CntrlBase
._seqs
- 1
58 # Use CntlCount not class since we need global count
59 CntrlBase
._cntrls
+= 1
60 return CntrlBase
._cntrls
- 1
64 def versionCount(cls
):
65 cls
._version
+= 1 # Use count for this particular type
66 return cls
._version
- 1
68 class TccDirCache(RubyCache
):
71 resourceStalls
= False
72 def create(self
, options
):
73 self
.size
= MemorySize(options
.tcc_size
)
74 self
.size
.value
+= (options
.num_compute_units
*
75 (MemorySize(options
.tcp_size
).value
) *
76 options
.tcc_dir_factor
) / long(options
.num_tccs
)
77 self
.start_index_bit
= math
.log(options
.cacheline_size
, 2) + \
78 math
.log(options
.num_tccs
, 2)
79 self
.replacement_policy
= PseudoLRUReplacementPolicy()
81 class L1DCache(RubyCache
):
82 resourceStalls
= False
83 def create(self
, options
):
84 self
.size
= MemorySize(options
.l1d_size
)
85 self
.assoc
= options
.l1d_assoc
86 self
.replacement_policy
= PseudoLRUReplacementPolicy()
88 class L1ICache(RubyCache
):
89 resourceStalls
= False
90 def create(self
, options
):
91 self
.size
= MemorySize(options
.l1i_size
)
92 self
.assoc
= options
.l1i_assoc
93 self
.replacement_policy
= PseudoLRUReplacementPolicy()
95 class L2Cache(RubyCache
):
96 resourceStalls
= False
97 def create(self
, options
):
98 self
.size
= MemorySize(options
.l2_size
)
99 self
.assoc
= options
.l2_assoc
100 self
.replacement_policy
= PseudoLRUReplacementPolicy()
103 class CPCntrl(CorePair_Controller
, CntrlBase
):
105 def create(self
, options
, ruby_system
, system
):
106 self
.version
= self
.versionCount()
108 self
.L1Icache
= L1ICache()
109 self
.L1Icache
.create(options
)
110 self
.L1D0cache
= L1DCache()
111 self
.L1D0cache
.create(options
)
112 self
.L1D1cache
= L1DCache()
113 self
.L1D1cache
.create(options
)
114 self
.L2cache
= L2Cache()
115 self
.L2cache
.create(options
)
117 self
.sequencer
= RubySequencer()
118 self
.sequencer
.icache_hit_latency
= 2
119 self
.sequencer
.dcache_hit_latency
= 2
120 self
.sequencer
.version
= self
.seqCount()
121 self
.sequencer
.icache
= self
.L1Icache
122 self
.sequencer
.dcache
= self
.L1D0cache
123 self
.sequencer
.ruby_system
= ruby_system
124 self
.sequencer
.coreid
= 0
125 self
.sequencer
.is_cpu_sequencer
= True
127 self
.sequencer1
= RubySequencer()
128 self
.sequencer1
.version
= self
.seqCount()
129 self
.sequencer1
.icache
= self
.L1Icache
130 self
.sequencer1
.dcache
= self
.L1D1cache
131 self
.sequencer1
.icache_hit_latency
= 2
132 self
.sequencer1
.dcache_hit_latency
= 2
133 self
.sequencer1
.ruby_system
= ruby_system
134 self
.sequencer1
.coreid
= 1
135 self
.sequencer1
.is_cpu_sequencer
= True
137 self
.issue_latency
= options
.cpu_to_dir_latency
138 self
.send_evictions
= send_evicts(options
)
140 self
.ruby_system
= ruby_system
142 if options
.recycle_latency
:
143 self
.recycle_latency
= options
.recycle_latency
145 class TCPCache(RubyCache
):
149 dataAccessLatency
= 4
151 def create(self
, options
):
152 self
.size
= MemorySize(options
.tcp_size
)
153 self
.replacement_policy
= PseudoLRUReplacementPolicy()
155 class TCPCntrl(TCP_Controller
, CntrlBase
):
157 def create(self
, options
, ruby_system
, system
):
158 self
.version
= self
.versionCount()
160 self
.L1cache
= TCPCache(tagAccessLatency
= options
.TCP_latency
)
161 self
.L1cache
.resourceStalls
= options
.no_resource_stalls
162 self
.L1cache
.create(options
)
164 self
.coalescer
= RubyGPUCoalescer()
165 self
.coalescer
.version
= self
.seqCount()
166 self
.coalescer
.icache
= self
.L1cache
167 self
.coalescer
.dcache
= self
.L1cache
168 self
.coalescer
.ruby_system
= ruby_system
169 self
.coalescer
.support_inst_reqs
= False
170 self
.coalescer
.is_cpu_sequencer
= False
171 self
.coalescer
.max_outstanding_requests
= options
.simds_per_cu
* \
172 options
.wfs_per_simd
* \
175 self
.sequencer
= RubySequencer()
176 self
.sequencer
.version
= self
.seqCount()
177 self
.sequencer
.icache
= self
.L1cache
178 self
.sequencer
.dcache
= self
.L1cache
179 self
.sequencer
.ruby_system
= ruby_system
180 self
.sequencer
.is_cpu_sequencer
= True
182 self
.use_seq_not_coal
= False
184 self
.ruby_system
= ruby_system
186 if options
.recycle_latency
:
187 self
.recycle_latency
= options
.recycle_latency
189 def createCP(self
, options
, ruby_system
, system
):
190 self
.version
= self
.versionCount()
192 self
.L1cache
= TCPCache(tagAccessLatency
= options
.TCP_latency
)
193 self
.L1cache
.resourceStalls
= options
.no_resource_stalls
194 self
.L1cache
.create(options
)
196 self
.coalescer
= RubyGPUCoalescer()
197 self
.coalescer
.version
= self
.seqCount()
198 self
.coalescer
.icache
= self
.L1cache
199 self
.coalescer
.dcache
= self
.L1cache
200 self
.coalescer
.ruby_system
= ruby_system
201 self
.coalescer
.support_inst_reqs
= False
202 self
.coalescer
.is_cpu_sequencer
= False
204 self
.sequencer
= RubySequencer()
205 self
.sequencer
.version
= self
.seqCount()
206 self
.sequencer
.icache
= self
.L1cache
207 self
.sequencer
.dcache
= self
.L1cache
208 self
.sequencer
.ruby_system
= ruby_system
209 self
.sequencer
.is_cpu_sequencer
= True
211 self
.use_seq_not_coal
= True
213 self
.ruby_system
= ruby_system
215 if options
.recycle_latency
:
216 self
.recycle_latency
= options
.recycle_latency
218 class SQCCache(RubyCache
):
223 dataAccessLatency
= 4
225 def create(self
, options
):
226 self
.replacement_policy
= PseudoLRUReplacementPolicy()
228 class SQCCntrl(SQC_Controller
, CntrlBase
):
230 def create(self
, options
, ruby_system
, system
):
231 self
.version
= self
.versionCount()
233 self
.L1cache
= SQCCache()
234 self
.L1cache
.create(options
)
235 self
.L1cache
.resourceStalls
= options
.no_resource_stalls
237 self
.sequencer
= RubySequencer()
239 self
.sequencer
.version
= self
.seqCount()
240 self
.sequencer
.icache
= self
.L1cache
241 self
.sequencer
.dcache
= self
.L1cache
242 self
.sequencer
.ruby_system
= ruby_system
243 self
.sequencer
.support_data_reqs
= False
244 self
.sequencer
.is_cpu_sequencer
= False
246 self
.ruby_system
= ruby_system
248 if options
.recycle_latency
:
249 self
.recycle_latency
= options
.recycle_latency
251 def createCP(self
, options
, ruby_system
, system
):
252 self
.version
= self
.versionCount()
254 self
.L1cache
= SQCCache()
255 self
.L1cache
.create(options
)
256 self
.L1cache
.resourceStalls
= options
.no_resource_stalls
258 self
.sequencer
= RubySequencer()
260 self
.sequencer
.version
= self
.seqCount()
261 self
.sequencer
.icache
= self
.L1cache
262 self
.sequencer
.dcache
= self
.L1cache
263 self
.sequencer
.ruby_system
= ruby_system
264 self
.sequencer
.support_data_reqs
= False
266 self
.ruby_system
= ruby_system
268 if options
.recycle_latency
:
269 self
.recycle_latency
= options
.recycle_latency
272 class TCC(RubyCache
):
274 dataAccessLatency
= 8
276 resourceStalls
= True
277 def create(self
, options
):
278 self
.size
= MemorySize(options
.tcc_size
)
279 self
.size
= self
.size
/ options
.num_tccs
280 self
.dataArrayBanks
= 256 / options
.num_tccs
#number of data banks
281 self
.tagArrayBanks
= 256 / options
.num_tccs
#number of tag banks
282 if ((self
.size
.value
/ long(self
.assoc
)) < 128):
283 self
.size
.value
= long(128 * self
.assoc
)
284 self
.start_index_bit
= math
.log(options
.cacheline_size
, 2) + \
285 math
.log(options
.num_tccs
, 2)
286 self
.replacement_policy
= PseudoLRUReplacementPolicy()
288 class TCCCntrl(TCC_Controller
, CntrlBase
):
289 def create(self
, options
, ruby_system
, system
):
290 self
.version
= self
.versionCount()
292 self
.L2cache
.create(options
)
293 self
.l2_response_latency
= options
.TCC_latency
295 self
.number_of_TBEs
= 2048
297 self
.ruby_system
= ruby_system
299 if options
.recycle_latency
:
300 self
.recycle_latency
= options
.recycle_latency
302 def connectWireBuffers(self
, req_to_tccdir
, resp_to_tccdir
,
303 tcc_unblock_to_tccdir
, req_to_tcc
,
304 probe_to_tcc
, resp_to_tcc
):
305 self
.w_reqToTCCDir
= req_to_tccdir
306 self
.w_respToTCCDir
= resp_to_tccdir
307 self
.w_TCCUnblockToTCCDir
= tcc_unblock_to_tccdir
308 self
.w_reqToTCC
= req_to_tcc
309 self
.w_probeToTCC
= probe_to_tcc
310 self
.w_respToTCC
= resp_to_tcc
312 class TCCDirCntrl(TCCdir_Controller
, CntrlBase
):
313 def create(self
, options
, ruby_system
, system
):
314 self
.version
= self
.versionCount()
316 self
.directory
= TccDirCache()
317 self
.directory
.create(options
)
319 self
.number_of_TBEs
= 1024
321 self
.ruby_system
= ruby_system
323 if options
.recycle_latency
:
324 self
.recycle_latency
= options
.recycle_latency
326 def connectWireBuffers(self
, req_to_tccdir
, resp_to_tccdir
,
327 tcc_unblock_to_tccdir
, req_to_tcc
,
328 probe_to_tcc
, resp_to_tcc
):
329 self
.w_reqToTCCDir
= req_to_tccdir
330 self
.w_respToTCCDir
= resp_to_tccdir
331 self
.w_TCCUnblockToTCCDir
= tcc_unblock_to_tccdir
332 self
.w_reqToTCC
= req_to_tcc
333 self
.w_probeToTCC
= probe_to_tcc
334 self
.w_respToTCC
= resp_to_tcc
336 class L3Cache(RubyCache
):
341 def create(self
, options
, ruby_system
, system
):
342 self
.size
= MemorySize(options
.l3_size
)
343 self
.size
.value
/= options
.num_dirs
344 self
.dataArrayBanks
/= options
.num_dirs
345 self
.tagArrayBanks
/= options
.num_dirs
346 self
.dataArrayBanks
/= options
.num_dirs
347 self
.tagArrayBanks
/= options
.num_dirs
348 self
.dataAccessLatency
= options
.l3_data_latency
349 self
.tagAccessLatency
= options
.l3_tag_latency
350 self
.resourceStalls
= options
.no_resource_stalls
351 self
.replacement_policy
= PseudoLRUReplacementPolicy()
353 class L3Cntrl(L3Cache_Controller
, CntrlBase
):
354 def create(self
, options
, ruby_system
, system
):
355 self
.version
= self
.versionCount()
356 self
.L3cache
= L3Cache()
357 self
.L3cache
.create(options
, ruby_system
, system
)
359 self
.l3_response_latency
= max(self
.L3cache
.dataAccessLatency
,
360 self
.L3cache
.tagAccessLatency
)
361 self
.ruby_system
= ruby_system
363 if options
.recycle_latency
:
364 self
.recycle_latency
= options
.recycle_latency
366 def connectWireBuffers(self
, req_to_dir
, resp_to_dir
, l3_unblock_to_dir
,
367 req_to_l3
, probe_to_l3
, resp_to_l3
):
368 self
.reqToDir
= req_to_dir
369 self
.respToDir
= resp_to_dir
370 self
.l3UnblockToDir
= l3_unblock_to_dir
371 self
.reqToL3
= req_to_l3
372 self
.probeToL3
= probe_to_l3
373 self
.respToL3
= resp_to_l3
375 class DirCntrl(Directory_Controller
, CntrlBase
):
376 def create(self
, options
, dir_ranges
, ruby_system
, system
):
377 self
.version
= self
.versionCount()
379 self
.response_latency
= 30
381 self
.addr_ranges
= dir_ranges
382 self
.directory
= RubyDirectoryMemory()
384 self
.L3CacheMemory
= L3Cache()
385 self
.L3CacheMemory
.create(options
, ruby_system
, system
)
387 self
.l3_hit_latency
= max(self
.L3CacheMemory
.dataAccessLatency
,
388 self
.L3CacheMemory
.tagAccessLatency
)
390 self
.number_of_TBEs
= options
.num_tbes
392 self
.ruby_system
= ruby_system
394 if options
.recycle_latency
:
395 self
.recycle_latency
= options
.recycle_latency
397 def connectWireBuffers(self
, req_to_dir
, resp_to_dir
, l3_unblock_to_dir
,
398 req_to_l3
, probe_to_l3
, resp_to_l3
):
399 self
.reqToDir
= req_to_dir
400 self
.respToDir
= resp_to_dir
401 self
.l3UnblockToDir
= l3_unblock_to_dir
402 self
.reqToL3
= req_to_l3
403 self
.probeToL3
= probe_to_l3
404 self
.respToL3
= resp_to_l3
408 def define_options(parser
):
409 parser
.add_option("--num-subcaches", type="int", default
=4)
410 parser
.add_option("--l3-data-latency", type="int", default
=20)
411 parser
.add_option("--l3-tag-latency", type="int", default
=15)
412 parser
.add_option("--cpu-to-dir-latency", type="int", default
=15)
413 parser
.add_option("--gpu-to-dir-latency", type="int", default
=160)
414 parser
.add_option("--no-resource-stalls", action
="store_false",
416 parser
.add_option("--num-tbes", type="int", default
=256)
417 parser
.add_option("--l2-latency", type="int", default
=50) # load to use
418 parser
.add_option("--num-tccs", type="int", default
=1,
419 help="number of TCC directories and banks in the GPU")
420 parser
.add_option("--TCP_latency", type="int", default
=4,
422 parser
.add_option("--TCC_latency", type="int", default
=16,
424 parser
.add_option("--tcc-size", type='string', default
='256kB',
425 help="agregate tcc size")
426 parser
.add_option("--tcp-size", type='string', default
='16kB',
428 parser
.add_option("--tcc-dir-factor", type='int', default
=4,
429 help="TCCdir size = factor *(TCPs + TCC)")
431 def create_system(options
, full_system
, system
, dma_devices
, bootmem
,
433 if buildEnv
['PROTOCOL'] != 'GPU_RfO':
434 panic("This script requires the GPU_RfO protocol to be built.")
439 # The ruby network creation expects the list of nodes in the system to be
440 # consistent with the NetDest list. Therefore the l1 controller nodes
441 # must be listed before the directory nodes and directory nodes before
448 tccdir_cntrl_nodes
= []
453 # Must create the individual controllers before the network to ensure the
454 # controller constructors are called before the network constructor
457 TCC_bits
= int(math
.log(options
.num_tccs
, 2))
459 # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
461 mainCluster
= Cluster(extBW
= 512, intBW
= 512) # 1 TB/s
463 if options
.numa_high_bit
:
464 numa_bit
= options
.numa_high_bit
466 # if the numa_bit is not specified, set the directory bits as the
467 # lowest bits above the block offset bits, and the numa_bit as the
468 # highest of those directory bits
469 dir_bits
= int(math
.log(options
.num_dirs
, 2))
470 block_size_bits
= int(math
.log(options
.cacheline_size
, 2))
471 numa_bit
= block_size_bits
+ dir_bits
- 1
473 for i
in range(options
.num_dirs
):
475 for r
in system
.mem_ranges
:
476 addr_range
= m5
.objects
.AddrRange(r
.start
, size
= r
.size(),
477 intlvHighBit
= numa_bit
,
478 intlvBits
= dir_bits
,
480 dir_ranges
.append(addr_range
)
482 dir_cntrl
= DirCntrl(TCC_select_num_bits
= TCC_bits
)
483 dir_cntrl
.create(options
, dir_ranges
, ruby_system
, system
)
484 dir_cntrl
.number_of_TBEs
= 2560 * options
.num_compute_units
485 #Enough TBEs for all TCP TBEs
487 # Connect the Directory controller to the ruby network
488 dir_cntrl
.requestFromCores
= MessageBuffer(ordered
= True)
489 dir_cntrl
.requestFromCores
.slave
= ruby_system
.network
.master
491 dir_cntrl
.responseFromCores
= MessageBuffer()
492 dir_cntrl
.responseFromCores
.slave
= ruby_system
.network
.master
494 dir_cntrl
.unblockFromCores
= MessageBuffer()
495 dir_cntrl
.unblockFromCores
.slave
= ruby_system
.network
.master
497 dir_cntrl
.probeToCore
= MessageBuffer()
498 dir_cntrl
.probeToCore
.master
= ruby_system
.network
.slave
500 dir_cntrl
.responseToCore
= MessageBuffer()
501 dir_cntrl
.responseToCore
.master
= ruby_system
.network
.slave
503 dir_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
504 dir_cntrl
.L3triggerQueue
= MessageBuffer(ordered
= True)
505 dir_cntrl
.responseFromMemory
= MessageBuffer()
507 exec("system.dir_cntrl%d = dir_cntrl" % i
)
508 dir_cntrl_nodes
.append(dir_cntrl
)
510 mainCluster
.add(dir_cntrl
)
512 # For an odd number of CPUs, still create the right number of controllers
513 cpuCluster
= Cluster(extBW
= 512, intBW
= 512) # 1 TB/s
514 for i
in range((options
.num_cpus
+ 1) // 2):
517 cp_cntrl
.create(options
, ruby_system
, system
)
519 exec("system.cp_cntrl%d = cp_cntrl" % i
)
521 # Add controllers and sequencers to the appropriate lists
523 cpu_sequencers
.extend([cp_cntrl
.sequencer
, cp_cntrl
.sequencer1
])
525 # Connect the CP controllers and the network
526 cp_cntrl
.requestFromCore
= MessageBuffer()
527 cp_cntrl
.requestFromCore
.master
= ruby_system
.network
.slave
529 cp_cntrl
.responseFromCore
= MessageBuffer()
530 cp_cntrl
.responseFromCore
.master
= ruby_system
.network
.slave
532 cp_cntrl
.unblockFromCore
= MessageBuffer()
533 cp_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
535 cp_cntrl
.probeToCore
= MessageBuffer()
536 cp_cntrl
.probeToCore
.slave
= ruby_system
.network
.master
538 cp_cntrl
.responseToCore
= MessageBuffer()
539 cp_cntrl
.responseToCore
.slave
= ruby_system
.network
.master
541 cp_cntrl
.mandatoryQueue
= MessageBuffer()
542 cp_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
544 cpuCluster
.add(cp_cntrl
)
546 gpuCluster
= Cluster(extBW
= 512, intBW
= 512) # 1 TB/s
548 for i
in range(options
.num_compute_units
):
550 tcp_cntrl
= TCPCntrl(TCC_select_num_bits
= TCC_bits
,
551 number_of_TBEs
= 2560) # max outstanding requests
552 tcp_cntrl
.create(options
, ruby_system
, system
)
554 exec("system.tcp_cntrl%d = tcp_cntrl" % i
)
556 # Add controllers and sequencers to the appropriate lists
558 cpu_sequencers
.append(tcp_cntrl
.coalescer
)
559 tcp_cntrl_nodes
.append(tcp_cntrl
)
561 # Connect the TCP controller to the ruby network
562 tcp_cntrl
.requestFromTCP
= MessageBuffer(ordered
= True)
563 tcp_cntrl
.requestFromTCP
.master
= ruby_system
.network
.slave
565 tcp_cntrl
.responseFromTCP
= MessageBuffer(ordered
= True)
566 tcp_cntrl
.responseFromTCP
.master
= ruby_system
.network
.slave
568 tcp_cntrl
.unblockFromCore
= MessageBuffer(ordered
= True)
569 tcp_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
571 tcp_cntrl
.probeToTCP
= MessageBuffer(ordered
= True)
572 tcp_cntrl
.probeToTCP
.slave
= ruby_system
.network
.master
574 tcp_cntrl
.responseToTCP
= MessageBuffer(ordered
= True)
575 tcp_cntrl
.responseToTCP
.slave
= ruby_system
.network
.master
577 tcp_cntrl
.mandatoryQueue
= MessageBuffer()
579 gpuCluster
.add(tcp_cntrl
)
581 for i
in range(options
.num_sqc
):
583 sqc_cntrl
= SQCCntrl(TCC_select_num_bits
= TCC_bits
)
584 sqc_cntrl
.create(options
, ruby_system
, system
)
586 exec("system.sqc_cntrl%d = sqc_cntrl" % i
)
588 # Add controllers and sequencers to the appropriate lists
590 cpu_sequencers
.append(sqc_cntrl
.sequencer
)
592 # Connect the SQC controller to the ruby network
593 sqc_cntrl
.requestFromSQC
= MessageBuffer(ordered
= True)
594 sqc_cntrl
.requestFromSQC
.master
= ruby_system
.network
.slave
596 sqc_cntrl
.responseFromSQC
= MessageBuffer(ordered
= True)
597 sqc_cntrl
.responseFromSQC
.master
= ruby_system
.network
.slave
599 sqc_cntrl
.unblockFromCore
= MessageBuffer(ordered
= True)
600 sqc_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
602 sqc_cntrl
.probeToSQC
= MessageBuffer(ordered
= True)
603 sqc_cntrl
.probeToSQC
.slave
= ruby_system
.network
.master
605 sqc_cntrl
.responseToSQC
= MessageBuffer(ordered
= True)
606 sqc_cntrl
.responseToSQC
.slave
= ruby_system
.network
.master
608 sqc_cntrl
.mandatoryQueue
= MessageBuffer()
610 # SQC also in GPU cluster
611 gpuCluster
.add(sqc_cntrl
)
613 for i
in range(options
.num_cp
):
615 tcp_cntrl
= TCPCntrl(TCC_select_num_bits
= TCC_bits
,
616 number_of_TBEs
= 2560) # max outstanding requests
617 tcp_cntrl
.createCP(options
, ruby_system
, system
)
619 exec("system.tcp_cntrl%d = tcp_cntrl" % (options
.num_compute_units
+ i
))
621 # Add controllers and sequencers to the appropriate lists
623 cpu_sequencers
.append(tcp_cntrl
.sequencer
)
624 tcp_cntrl_nodes
.append(tcp_cntrl
)
626 # Connect the TCP controller to the ruby network
627 tcp_cntrl
.requestFromTCP
= MessageBuffer(ordered
= True)
628 tcp_cntrl
.requestFromTCP
.master
= ruby_system
.network
.slave
630 tcp_cntrl
.responseFromTCP
= MessageBuffer(ordered
= True)
631 tcp_cntrl
.responseFromTCP
.master
= ruby_system
.network
.slave
633 tcp_cntrl
.unblockFromCore
= MessageBuffer(ordered
= True)
634 tcp_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
636 tcp_cntrl
.probeToTCP
= MessageBuffer(ordered
= True)
637 tcp_cntrl
.probeToTCP
.slave
= ruby_system
.network
.master
639 tcp_cntrl
.responseToTCP
= MessageBuffer(ordered
= True)
640 tcp_cntrl
.responseToTCP
.slave
= ruby_system
.network
.master
642 tcp_cntrl
.mandatoryQueue
= MessageBuffer()
644 gpuCluster
.add(tcp_cntrl
)
646 sqc_cntrl
= SQCCntrl(TCC_select_num_bits
= TCC_bits
)
647 sqc_cntrl
.createCP(options
, ruby_system
, system
)
649 exec("system.sqc_cntrl%d = sqc_cntrl" % (options
.num_compute_units
+ i
))
651 # Add controllers and sequencers to the appropriate lists
653 cpu_sequencers
.append(sqc_cntrl
.sequencer
)
655 # Connect the SQC controller to the ruby network
656 sqc_cntrl
.requestFromSQC
= MessageBuffer(ordered
= True)
657 sqc_cntrl
.requestFromSQC
.master
= ruby_system
.network
.slave
659 sqc_cntrl
.responseFromSQC
= MessageBuffer(ordered
= True)
660 sqc_cntrl
.responseFromSQC
.master
= ruby_system
.network
.slave
662 sqc_cntrl
.unblockFromCore
= MessageBuffer(ordered
= True)
663 sqc_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
665 sqc_cntrl
.probeToSQC
= MessageBuffer(ordered
= True)
666 sqc_cntrl
.probeToSQC
.slave
= ruby_system
.network
.master
668 sqc_cntrl
.responseToSQC
= MessageBuffer(ordered
= True)
669 sqc_cntrl
.responseToSQC
.slave
= ruby_system
.network
.master
671 sqc_cntrl
.mandatoryQueue
= MessageBuffer()
673 # SQC also in GPU cluster
674 gpuCluster
.add(sqc_cntrl
)
676 for i
in range(options
.num_tccs
):
678 tcc_cntrl
= TCCCntrl(TCC_select_num_bits
= TCC_bits
,
679 number_of_TBEs
= options
.num_compute_units
* 2560)
680 #Enough TBEs for all TCP TBEs
681 tcc_cntrl
.create(options
, ruby_system
, system
)
682 tcc_cntrl_nodes
.append(tcc_cntrl
)
684 tccdir_cntrl
= TCCDirCntrl(TCC_select_num_bits
= TCC_bits
,
685 number_of_TBEs
= options
.num_compute_units
* 2560)
686 #Enough TBEs for all TCP TBEs
687 tccdir_cntrl
.create(options
, ruby_system
, system
)
688 tccdir_cntrl_nodes
.append(tccdir_cntrl
)
690 exec("system.tcc_cntrl%d = tcc_cntrl" % i
)
691 exec("system.tccdir_cntrl%d = tccdir_cntrl" % i
)
693 # connect all of the wire buffers between L3 and dirs up
694 req_to_tccdir
= RubyWireBuffer()
695 resp_to_tccdir
= RubyWireBuffer()
696 tcc_unblock_to_tccdir
= RubyWireBuffer()
697 req_to_tcc
= RubyWireBuffer()
698 probe_to_tcc
= RubyWireBuffer()
699 resp_to_tcc
= RubyWireBuffer()
701 tcc_cntrl
.connectWireBuffers(req_to_tccdir
, resp_to_tccdir
,
702 tcc_unblock_to_tccdir
, req_to_tcc
,
703 probe_to_tcc
, resp_to_tcc
)
704 tccdir_cntrl
.connectWireBuffers(req_to_tccdir
, resp_to_tccdir
,
705 tcc_unblock_to_tccdir
, req_to_tcc
,
706 probe_to_tcc
, resp_to_tcc
)
708 # Connect the TCC controller to the ruby network
709 tcc_cntrl
.responseFromTCC
= MessageBuffer(ordered
= True)
710 tcc_cntrl
.responseFromTCC
.master
= ruby_system
.network
.slave
712 tcc_cntrl
.responseToTCC
= MessageBuffer(ordered
= True)
713 tcc_cntrl
.responseToTCC
.slave
= ruby_system
.network
.master
715 # Connect the TCC Dir controller to the ruby network
716 tccdir_cntrl
.requestFromTCP
= MessageBuffer(ordered
= True)
717 tccdir_cntrl
.requestFromTCP
.slave
= ruby_system
.network
.master
719 tccdir_cntrl
.responseFromTCP
= MessageBuffer(ordered
= True)
720 tccdir_cntrl
.responseFromTCP
.slave
= ruby_system
.network
.master
722 tccdir_cntrl
.unblockFromTCP
= MessageBuffer(ordered
= True)
723 tccdir_cntrl
.unblockFromTCP
.slave
= ruby_system
.network
.master
725 tccdir_cntrl
.probeToCore
= MessageBuffer(ordered
= True)
726 tccdir_cntrl
.probeToCore
.master
= ruby_system
.network
.slave
728 tccdir_cntrl
.responseToCore
= MessageBuffer(ordered
= True)
729 tccdir_cntrl
.responseToCore
.master
= ruby_system
.network
.slave
731 tccdir_cntrl
.probeFromNB
= MessageBuffer()
732 tccdir_cntrl
.probeFromNB
.slave
= ruby_system
.network
.master
734 tccdir_cntrl
.responseFromNB
= MessageBuffer()
735 tccdir_cntrl
.responseFromNB
.slave
= ruby_system
.network
.master
737 tccdir_cntrl
.requestToNB
= MessageBuffer()
738 tccdir_cntrl
.requestToNB
.master
= ruby_system
.network
.slave
740 tccdir_cntrl
.responseToNB
= MessageBuffer()
741 tccdir_cntrl
.responseToNB
.master
= ruby_system
.network
.slave
743 tccdir_cntrl
.unblockToNB
= MessageBuffer()
744 tccdir_cntrl
.unblockToNB
.master
= ruby_system
.network
.slave
746 tccdir_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
748 # TCC cntrls added to the GPU cluster
749 gpuCluster
.add(tcc_cntrl
)
750 gpuCluster
.add(tccdir_cntrl
)
752 # Assuming no DMA devices
753 assert(len(dma_devices
) == 0)
755 # Add cpu/gpu clusters to main cluster
756 mainCluster
.add(cpuCluster
)
757 mainCluster
.add(gpuCluster
)
759 ruby_system
.network
.number_of_virtual_networks
= 10
761 return (cpu_sequencers
, dir_cntrl_nodes
, mainCluster
)