1 # Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
4 # For use for simulation and test purposes only
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
9 # 1. Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
12 # 2. Redistributions in binary form must reproduce the above copyright notice,
13 # this list of conditions and the following disclaimer in the documentation
14 # and/or other materials provided with the distribution.
16 # 3. Neither the name of the copyright holder nor the names of its
17 # contributors may be used to endorse or promote products derived from this
18 # software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
34 from m5
.objects
import *
35 from m5
.defines
import buildEnv
36 from m5
.util
import addToPath
37 from Ruby
import create_topology
38 from Ruby
import send_evicts
42 from topologies
.Cluster
import Cluster
43 from topologies
.Crossbar
import Crossbar
49 # Use SeqCount not class since we need global count
51 return CntrlBase
._seqs
- 1
56 # Use CntlCount not class since we need global count
57 CntrlBase
._cntrls
+= 1
58 return CntrlBase
._cntrls
- 1
62 def versionCount(cls
):
63 cls
._version
+= 1 # Use count for this particular type
64 return cls
._version
- 1
66 class L1Cache(RubyCache
):
67 resourceStalls
= False
72 def create(self
, size
, assoc
, options
):
73 self
.size
= MemorySize(size
)
75 self
.replacement_policy
= TreePLRURP()
77 class L2Cache(RubyCache
):
78 resourceStalls
= False
82 def create(self
, size
, assoc
, options
):
83 self
.size
= MemorySize(size
)
85 self
.replacement_policy
= TreePLRURP()
87 class CPCntrl(CorePair_Controller
, CntrlBase
):
89 def create(self
, options
, ruby_system
, system
):
90 self
.version
= self
.versionCount()
92 self
.L1Icache
= L1Cache()
93 self
.L1Icache
.create(options
.l1i_size
, options
.l1i_assoc
, options
)
94 self
.L1D0cache
= L1Cache()
95 self
.L1D0cache
.create(options
.l1d_size
, options
.l1d_assoc
, options
)
96 self
.L1D1cache
= L1Cache()
97 self
.L1D1cache
.create(options
.l1d_size
, options
.l1d_assoc
, options
)
98 self
.L2cache
= L2Cache()
99 self
.L2cache
.create(options
.l2_size
, options
.l2_assoc
, options
)
101 self
.sequencer
= RubySequencer()
102 self
.sequencer
.version
= self
.seqCount()
103 self
.sequencer
.icache
= self
.L1Icache
104 self
.sequencer
.dcache
= self
.L1D0cache
105 self
.sequencer
.ruby_system
= ruby_system
106 self
.sequencer
.coreid
= 0
107 self
.sequencer
.is_cpu_sequencer
= True
109 self
.sequencer1
= RubySequencer()
110 self
.sequencer1
.version
= self
.seqCount()
111 self
.sequencer1
.icache
= self
.L1Icache
112 self
.sequencer1
.dcache
= self
.L1D1cache
113 self
.sequencer1
.ruby_system
= ruby_system
114 self
.sequencer1
.coreid
= 1
115 self
.sequencer1
.is_cpu_sequencer
= True
117 self
.issue_latency
= options
.cpu_to_dir_latency
118 self
.send_evictions
= send_evicts(options
)
120 self
.ruby_system
= ruby_system
122 if options
.recycle_latency
:
123 self
.recycle_latency
= options
.recycle_latency
125 class TCPCache(RubyCache
):
128 dataArrayBanks
= 16 #number of data banks
129 tagArrayBanks
= 16 #number of tag banks
130 dataAccessLatency
= 4
132 def create(self
, options
):
133 self
.size
= MemorySize(options
.tcp_size
)
134 self
.assoc
= options
.tcp_assoc
135 self
.resourceStalls
= options
.no_tcc_resource_stalls
136 self
.replacement_policy
= TreePLRURP()
138 class TCPCntrl(TCP_Controller
, CntrlBase
):
140 def create(self
, options
, ruby_system
, system
):
141 self
.version
= self
.versionCount()
143 self
.L1cache
= TCPCache(tagAccessLatency
= options
.TCP_latency
,
144 dataAccessLatency
= options
.TCP_latency
)
145 self
.L1cache
.resourceStalls
= options
.no_resource_stalls
146 self
.L1cache
.create(options
)
147 self
.issue_latency
= 1
149 self
.coalescer
= VIPERCoalescer()
150 self
.coalescer
.version
= self
.seqCount()
151 self
.coalescer
.icache
= self
.L1cache
152 self
.coalescer
.dcache
= self
.L1cache
153 self
.coalescer
.ruby_system
= ruby_system
154 self
.coalescer
.support_inst_reqs
= False
155 self
.coalescer
.is_cpu_sequencer
= False
156 if options
.tcp_deadlock_threshold
:
157 self
.coalescer
.deadlock_threshold
= \
158 options
.tcp_deadlock_threshold
159 self
.coalescer
.max_coalesces_per_cycle
= \
160 options
.max_coalesces_per_cycle
162 self
.sequencer
= RubySequencer()
163 self
.sequencer
.version
= self
.seqCount()
164 self
.sequencer
.icache
= self
.L1cache
165 self
.sequencer
.dcache
= self
.L1cache
166 self
.sequencer
.ruby_system
= ruby_system
167 self
.sequencer
.is_cpu_sequencer
= True
169 self
.use_seq_not_coal
= False
171 self
.ruby_system
= ruby_system
173 if options
.recycle_latency
:
174 self
.recycle_latency
= options
.recycle_latency
176 def createCP(self
, options
, ruby_system
, system
):
177 self
.version
= self
.versionCount()
179 self
.L1cache
= TCPCache(tagAccessLatency
= options
.TCP_latency
,
180 dataAccessLatency
= options
.TCP_latency
)
181 self
.L1cache
.resourceStalls
= options
.no_resource_stalls
182 self
.L1cache
.create(options
)
183 self
.issue_latency
= 1
185 self
.coalescer
= VIPERCoalescer()
186 self
.coalescer
.version
= self
.seqCount()
187 self
.coalescer
.icache
= self
.L1cache
188 self
.coalescer
.dcache
= self
.L1cache
189 self
.coalescer
.ruby_system
= ruby_system
190 self
.coalescer
.support_inst_reqs
= False
191 self
.coalescer
.is_cpu_sequencer
= False
193 self
.sequencer
= RubySequencer()
194 self
.sequencer
.version
= self
.seqCount()
195 self
.sequencer
.icache
= self
.L1cache
196 self
.sequencer
.dcache
= self
.L1cache
197 self
.sequencer
.ruby_system
= ruby_system
198 self
.sequencer
.is_cpu_sequencer
= True
200 self
.use_seq_not_coal
= True
202 self
.ruby_system
= ruby_system
204 if options
.recycle_latency
:
205 self
.recycle_latency
= options
.recycle_latency
207 class SQCCache(RubyCache
):
210 dataAccessLatency
= 1
213 def create(self
, options
):
214 self
.size
= MemorySize(options
.sqc_size
)
215 self
.assoc
= options
.sqc_assoc
216 self
.replacement_policy
= TreePLRURP()
218 class SQCCntrl(SQC_Controller
, CntrlBase
):
220 def create(self
, options
, ruby_system
, system
):
221 self
.version
= self
.versionCount()
223 self
.L1cache
= SQCCache()
224 self
.L1cache
.create(options
)
225 self
.L1cache
.resourceStalls
= options
.no_resource_stalls
227 self
.sequencer
= RubySequencer()
229 self
.sequencer
.version
= self
.seqCount()
230 self
.sequencer
.icache
= self
.L1cache
231 self
.sequencer
.dcache
= self
.L1cache
232 self
.sequencer
.ruby_system
= ruby_system
233 self
.sequencer
.support_data_reqs
= False
234 self
.sequencer
.is_cpu_sequencer
= False
235 if options
.sqc_deadlock_threshold
:
236 self
.sequencer
.deadlock_threshold
= \
237 options
.sqc_deadlock_threshold
239 self
.ruby_system
= ruby_system
241 if options
.recycle_latency
:
242 self
.recycle_latency
= options
.recycle_latency
244 class TCC(RubyCache
):
245 size
= MemorySize("256kB")
247 dataAccessLatency
= 8
249 resourceStalls
= True
250 def create(self
, options
):
251 self
.assoc
= options
.tcc_assoc
252 if hasattr(options
, 'bw_scalor') and options
.bw_scalor
> 0:
253 s
= options
.num_compute_units
255 tcc_size
= str(tcc_size
)+'kB'
256 self
.size
= MemorySize(tcc_size
)
257 self
.dataArrayBanks
= 64
258 self
.tagArrayBanks
= 64
260 self
.size
= MemorySize(options
.tcc_size
)
261 self
.dataArrayBanks
= 256 / options
.num_tccs
#number of data banks
262 self
.tagArrayBanks
= 256 / options
.num_tccs
#number of tag banks
263 self
.size
.value
= self
.size
.value
/ options
.num_tccs
264 if ((self
.size
.value
/ long(self
.assoc
)) < 128):
265 self
.size
.value
= long(128 * self
.assoc
)
266 self
.start_index_bit
= math
.log(options
.cacheline_size
, 2) + \
267 math
.log(options
.num_tccs
, 2)
268 self
.replacement_policy
= TreePLRURP()
271 class TCCCntrl(TCC_Controller
, CntrlBase
):
272 def create(self
, options
, ruby_system
, system
):
273 self
.version
= self
.versionCount()
275 self
.L2cache
.create(options
)
276 self
.L2cache
.resourceStalls
= options
.no_tcc_resource_stalls
278 self
.ruby_system
= ruby_system
280 if options
.recycle_latency
:
281 self
.recycle_latency
= options
.recycle_latency
283 class L3Cache(RubyCache
):
287 def create(self
, options
, ruby_system
, system
):
288 self
.size
= MemorySize(options
.l3_size
)
289 self
.size
.value
/= options
.num_dirs
290 self
.assoc
= options
.l3_assoc
291 self
.dataArrayBanks
/= options
.num_dirs
292 self
.tagArrayBanks
/= options
.num_dirs
293 self
.dataArrayBanks
/= options
.num_dirs
294 self
.tagArrayBanks
/= options
.num_dirs
295 self
.dataAccessLatency
= options
.l3_data_latency
296 self
.tagAccessLatency
= options
.l3_tag_latency
297 self
.resourceStalls
= False
298 self
.replacement_policy
= TreePLRURP()
300 class L3Cntrl(L3Cache_Controller
, CntrlBase
):
301 def create(self
, options
, ruby_system
, system
):
302 self
.version
= self
.versionCount()
303 self
.L3cache
= L3Cache()
304 self
.L3cache
.create(options
, ruby_system
, system
)
306 self
.l3_response_latency
= max(self
.L3cache
.dataAccessLatency
, self
.L3cache
.tagAccessLatency
)
307 self
.ruby_system
= ruby_system
309 if options
.recycle_latency
:
310 self
.recycle_latency
= options
.recycle_latency
312 def connectWireBuffers(self
, req_to_dir
, resp_to_dir
, l3_unblock_to_dir
,
313 req_to_l3
, probe_to_l3
, resp_to_l3
):
314 self
.reqToDir
= req_to_dir
315 self
.respToDir
= resp_to_dir
316 self
.l3UnblockToDir
= l3_unblock_to_dir
317 self
.reqToL3
= req_to_l3
318 self
.probeToL3
= probe_to_l3
319 self
.respToL3
= resp_to_l3
321 class DirMem(RubyDirectoryMemory
, CntrlBase
):
322 def create(self
, options
, ruby_system
, system
):
323 self
.version
= self
.versionCount()
325 phys_mem_size
= AddrRange(options
.mem_size
).size()
326 mem_module_size
= phys_mem_size
/ options
.num_dirs
327 dir_size
= MemorySize('0B')
328 dir_size
.value
= mem_module_size
331 class DirCntrl(Directory_Controller
, CntrlBase
):
332 def create(self
, options
, ruby_system
, system
):
333 self
.version
= self
.versionCount()
335 self
.response_latency
= 30
337 self
.directory
= DirMem()
338 self
.directory
.create(options
, ruby_system
, system
)
340 self
.L3CacheMemory
= L3Cache()
341 self
.L3CacheMemory
.create(options
, ruby_system
, system
)
343 self
.l3_hit_latency
= max(self
.L3CacheMemory
.dataAccessLatency
,
344 self
.L3CacheMemory
.tagAccessLatency
)
346 self
.number_of_TBEs
= options
.num_tbes
348 self
.ruby_system
= ruby_system
350 if options
.recycle_latency
:
351 self
.recycle_latency
= options
.recycle_latency
353 def connectWireBuffers(self
, req_to_dir
, resp_to_dir
, l3_unblock_to_dir
,
354 req_to_l3
, probe_to_l3
, resp_to_l3
):
355 self
.reqToDir
= req_to_dir
356 self
.respToDir
= resp_to_dir
357 self
.l3UnblockToDir
= l3_unblock_to_dir
358 self
.reqToL3
= req_to_l3
359 self
.probeToL3
= probe_to_l3
360 self
.respToL3
= resp_to_l3
362 def define_options(parser
):
363 parser
.add_option("--num-subcaches", type = "int", default
= 4)
364 parser
.add_option("--l3-data-latency", type = "int", default
= 20)
365 parser
.add_option("--l3-tag-latency", type = "int", default
= 15)
366 parser
.add_option("--cpu-to-dir-latency", type = "int", default
= 120)
367 parser
.add_option("--gpu-to-dir-latency", type = "int", default
= 120)
368 parser
.add_option("--no-resource-stalls", action
= "store_false",
370 parser
.add_option("--no-tcc-resource-stalls", action
= "store_false",
372 parser
.add_option("--use-L3-on-WT", action
= "store_true", default
= False)
373 parser
.add_option("--num-tbes", type = "int", default
= 256)
374 parser
.add_option("--l2-latency", type = "int", default
= 50) # load to use
375 parser
.add_option("--num-tccs", type = "int", default
= 1,
376 help = "number of TCC banks in the GPU")
377 parser
.add_option("--sqc-size", type = 'string', default
= '32kB',
378 help = "SQC cache size")
379 parser
.add_option("--sqc-assoc", type = 'int', default
= 8,
380 help = "SQC cache assoc")
381 parser
.add_option("--sqc-deadlock-threshold", type='int',
382 help="Set the SQC deadlock threshold to some value")
384 parser
.add_option("--WB_L1", action
= "store_true", default
= False,
385 help = "writeback L1")
386 parser
.add_option("--WB_L2", action
= "store_true", default
= False,
387 help = "writeback L2")
388 parser
.add_option("--TCP_latency", type = "int", default
= 4,
389 help = "TCP latency")
390 parser
.add_option("--TCC_latency", type = "int", default
= 16,
391 help = "TCC latency")
392 parser
.add_option("--tcc-size", type = 'string', default
= '256kB',
393 help = "agregate tcc size")
394 parser
.add_option("--tcc-assoc", type = 'int', default
= 16,
396 parser
.add_option("--tcp-size", type = 'string', default
= '16kB',
398 parser
.add_option("--tcp-assoc", type = 'int', default
= 16,
400 parser
.add_option("--tcp-deadlock-threshold", type='int',
401 help="Set the TCP deadlock threshold to some value")
402 parser
.add_option("--max-coalesces-per-cycle", type="int", default
=1,
403 help="Maximum insts that may coalesce in a cycle");
405 parser
.add_option("--noL1", action
= "store_true", default
= False,
408 def create_system(options
, full_system
, system
, dma_devices
, bootmem
,
410 if buildEnv
['PROTOCOL'] != 'GPU_VIPER':
411 panic("This script requires the GPU_VIPER protocol to be built.")
416 # The ruby network creation expects the list of nodes in the system to be
417 # consistent with the NetDest list. Therefore the l1 controller nodes
418 # must be listed before the directory nodes and directory nodes before
429 # Must create the individual controllers before the network to ensure the
430 # controller constructors are called before the network constructor
433 # For an odd number of CPUs, still create the right number of controllers
434 TCC_bits
= int(math
.log(options
.num_tccs
, 2))
436 # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
440 if hasattr(options
, 'bw_scalor') and options
.bw_scalor
> 0:
441 #Assuming a 2GHz clock
442 crossbar_bw
= 16 * options
.num_compute_units
* options
.bw_scalor
443 mainCluster
= Cluster(intBW
=crossbar_bw
)
445 mainCluster
= Cluster(intBW
=8) # 16 GB/s
446 for i
in range(options
.num_dirs
):
448 dir_cntrl
= DirCntrl(noTCCdir
= True, TCC_select_num_bits
= TCC_bits
)
449 dir_cntrl
.create(options
, ruby_system
, system
)
450 dir_cntrl
.number_of_TBEs
= options
.num_tbes
451 dir_cntrl
.useL3OnWT
= options
.use_L3_on_WT
452 # the number_of_TBEs is inclusive of TBEs below
454 # Connect the Directory controller to the ruby network
455 dir_cntrl
.requestFromCores
= MessageBuffer(ordered
= True)
456 dir_cntrl
.requestFromCores
.slave
= ruby_system
.network
.master
458 dir_cntrl
.responseFromCores
= MessageBuffer()
459 dir_cntrl
.responseFromCores
.slave
= ruby_system
.network
.master
461 dir_cntrl
.unblockFromCores
= MessageBuffer()
462 dir_cntrl
.unblockFromCores
.slave
= ruby_system
.network
.master
464 dir_cntrl
.probeToCore
= MessageBuffer()
465 dir_cntrl
.probeToCore
.master
= ruby_system
.network
.slave
467 dir_cntrl
.responseToCore
= MessageBuffer()
468 dir_cntrl
.responseToCore
.master
= ruby_system
.network
.slave
470 dir_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
471 dir_cntrl
.L3triggerQueue
= MessageBuffer(ordered
= True)
472 dir_cntrl
.requestToMemory
= MessageBuffer()
473 dir_cntrl
.responseFromMemory
= MessageBuffer()
475 exec("ruby_system.dir_cntrl%d = dir_cntrl" % i
)
476 dir_cntrl_nodes
.append(dir_cntrl
)
478 mainCluster
.add(dir_cntrl
)
481 if hasattr(options
, 'bw_scalor') and options
.bw_scalor
> 0:
482 cpuCluster
= Cluster(extBW
= crossbar_bw
, intBW
= crossbar_bw
)
484 cpuCluster
= Cluster(extBW
= 8, intBW
= 8) # 16 GB/s
485 for i
in range((options
.num_cpus
+ 1) // 2):
488 cp_cntrl
.create(options
, ruby_system
, system
)
490 exec("ruby_system.cp_cntrl%d = cp_cntrl" % i
)
492 # Add controllers and sequencers to the appropriate lists
494 cpu_sequencers
.extend([cp_cntrl
.sequencer
, cp_cntrl
.sequencer1
])
496 # Connect the CP controllers and the network
497 cp_cntrl
.requestFromCore
= MessageBuffer()
498 cp_cntrl
.requestFromCore
.master
= ruby_system
.network
.slave
500 cp_cntrl
.responseFromCore
= MessageBuffer()
501 cp_cntrl
.responseFromCore
.master
= ruby_system
.network
.slave
503 cp_cntrl
.unblockFromCore
= MessageBuffer()
504 cp_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
506 cp_cntrl
.probeToCore
= MessageBuffer()
507 cp_cntrl
.probeToCore
.slave
= ruby_system
.network
.master
509 cp_cntrl
.responseToCore
= MessageBuffer()
510 cp_cntrl
.responseToCore
.slave
= ruby_system
.network
.master
512 cp_cntrl
.mandatoryQueue
= MessageBuffer()
513 cp_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
515 cpuCluster
.add(cp_cntrl
)
517 # Register CPUs and caches for each CorePair and directory (SE mode only)
519 for i
in xrange((options
.num_cpus
+ 1) // 2):
520 FileSystemConfig
.register_cpu(physical_package_id
= 0,
522 xrange(options
.num_cpus
),
524 thread_siblings
= [])
526 FileSystemConfig
.register_cpu(physical_package_id
= 0,
528 xrange(options
.num_cpus
),
530 thread_siblings
= [])
532 FileSystemConfig
.register_cache(level
= 0,
533 idu_type
= 'Instruction',
534 size
= options
.l1i_size
,
535 line_size
= options
.cacheline_size
,
536 assoc
= options
.l1i_assoc
,
539 FileSystemConfig
.register_cache(level
= 0,
541 size
= options
.l1d_size
,
542 line_size
= options
.cacheline_size
,
543 assoc
= options
.l1d_assoc
,
546 FileSystemConfig
.register_cache(level
= 0,
548 size
= options
.l1d_size
,
549 line_size
= options
.cacheline_size
,
550 assoc
= options
.l1d_assoc
,
553 FileSystemConfig
.register_cache(level
= 1,
554 idu_type
= 'Unified',
555 size
= options
.l2_size
,
556 line_size
= options
.cacheline_size
,
557 assoc
= options
.l2_assoc
,
560 for i
in range(options
.num_dirs
):
561 FileSystemConfig
.register_cache(level
= 2,
562 idu_type
= 'Unified',
563 size
= options
.l3_size
,
564 line_size
= options
.cacheline_size
,
565 assoc
= options
.l3_assoc
,
567 xrange(options
.num_cpus
)])
570 if hasattr(options
, 'bw_scalor') and options
.bw_scalor
> 0:
571 gpuCluster
= Cluster(extBW
= crossbar_bw
, intBW
= crossbar_bw
)
573 gpuCluster
= Cluster(extBW
= 8, intBW
= 8) # 16 GB/s
574 for i
in range(options
.num_compute_units
):
576 tcp_cntrl
= TCPCntrl(TCC_select_num_bits
= TCC_bits
,
578 number_of_TBEs
= 2560)
579 # TBEs set to max outstanding requests
580 tcp_cntrl
.create(options
, ruby_system
, system
)
581 tcp_cntrl
.WB
= options
.WB_L1
582 tcp_cntrl
.disableL1
= options
.noL1
583 tcp_cntrl
.L1cache
.tagAccessLatency
= options
.TCP_latency
584 tcp_cntrl
.L1cache
.dataAccessLatency
= options
.TCP_latency
586 exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % i
)
588 # Add controllers and sequencers to the appropriate lists
590 cpu_sequencers
.append(tcp_cntrl
.coalescer
)
591 tcp_cntrl_nodes
.append(tcp_cntrl
)
593 # Connect the TCP controller to the ruby network
594 tcp_cntrl
.requestFromTCP
= MessageBuffer(ordered
= True)
595 tcp_cntrl
.requestFromTCP
.master
= ruby_system
.network
.slave
597 tcp_cntrl
.responseFromTCP
= MessageBuffer(ordered
= True)
598 tcp_cntrl
.responseFromTCP
.master
= ruby_system
.network
.slave
600 tcp_cntrl
.unblockFromCore
= MessageBuffer()
601 tcp_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
603 tcp_cntrl
.probeToTCP
= MessageBuffer(ordered
= True)
604 tcp_cntrl
.probeToTCP
.slave
= ruby_system
.network
.master
606 tcp_cntrl
.responseToTCP
= MessageBuffer(ordered
= True)
607 tcp_cntrl
.responseToTCP
.slave
= ruby_system
.network
.master
609 tcp_cntrl
.mandatoryQueue
= MessageBuffer()
611 gpuCluster
.add(tcp_cntrl
)
613 for i
in range(options
.num_sqc
):
615 sqc_cntrl
= SQCCntrl(TCC_select_num_bits
= TCC_bits
)
616 sqc_cntrl
.create(options
, ruby_system
, system
)
618 exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % i
)
620 # Add controllers and sequencers to the appropriate lists
622 cpu_sequencers
.append(sqc_cntrl
.sequencer
)
624 # Connect the SQC controller to the ruby network
625 sqc_cntrl
.requestFromSQC
= MessageBuffer(ordered
= True)
626 sqc_cntrl
.requestFromSQC
.master
= ruby_system
.network
.slave
628 sqc_cntrl
.probeToSQC
= MessageBuffer(ordered
= True)
629 sqc_cntrl
.probeToSQC
.slave
= ruby_system
.network
.master
631 sqc_cntrl
.responseToSQC
= MessageBuffer(ordered
= True)
632 sqc_cntrl
.responseToSQC
.slave
= ruby_system
.network
.master
634 sqc_cntrl
.mandatoryQueue
= MessageBuffer()
636 # SQC also in GPU cluster
637 gpuCluster
.add(sqc_cntrl
)
639 for i
in range(options
.num_cp
):
641 tcp_ID
= options
.num_compute_units
+ i
642 sqc_ID
= options
.num_sqc
+ i
644 tcp_cntrl
= TCPCntrl(TCC_select_num_bits
= TCC_bits
,
646 number_of_TBEs
= 2560)
647 # TBEs set to max outstanding requests
648 tcp_cntrl
.createCP(options
, ruby_system
, system
)
649 tcp_cntrl
.WB
= options
.WB_L1
650 tcp_cntrl
.disableL1
= options
.noL1
651 tcp_cntrl
.L1cache
.tagAccessLatency
= options
.TCP_latency
652 tcp_cntrl
.L1cache
.dataAccessLatency
= options
.TCP_latency
654 exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % tcp_ID
)
656 # Add controllers and sequencers to the appropriate lists
658 cpu_sequencers
.append(tcp_cntrl
.sequencer
)
659 tcp_cntrl_nodes
.append(tcp_cntrl
)
661 # Connect the CP (TCP) controllers to the ruby network
662 tcp_cntrl
.requestFromTCP
= MessageBuffer(ordered
= True)
663 tcp_cntrl
.requestFromTCP
.master
= ruby_system
.network
.slave
665 tcp_cntrl
.responseFromTCP
= MessageBuffer(ordered
= True)
666 tcp_cntrl
.responseFromTCP
.master
= ruby_system
.network
.slave
668 tcp_cntrl
.unblockFromCore
= MessageBuffer(ordered
= True)
669 tcp_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
671 tcp_cntrl
.probeToTCP
= MessageBuffer(ordered
= True)
672 tcp_cntrl
.probeToTCP
.slave
= ruby_system
.network
.master
674 tcp_cntrl
.responseToTCP
= MessageBuffer(ordered
= True)
675 tcp_cntrl
.responseToTCP
.slave
= ruby_system
.network
.master
677 tcp_cntrl
.mandatoryQueue
= MessageBuffer()
679 gpuCluster
.add(tcp_cntrl
)
681 sqc_cntrl
= SQCCntrl(TCC_select_num_bits
= TCC_bits
)
682 sqc_cntrl
.create(options
, ruby_system
, system
)
684 exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % sqc_ID
)
686 # Add controllers and sequencers to the appropriate lists
688 cpu_sequencers
.append(sqc_cntrl
.sequencer
)
690 # SQC also in GPU cluster
691 gpuCluster
.add(sqc_cntrl
)
693 for i
in range(options
.num_tccs
):
695 tcc_cntrl
= TCCCntrl(l2_response_latency
= options
.TCC_latency
)
696 tcc_cntrl
.create(options
, ruby_system
, system
)
697 tcc_cntrl
.l2_request_latency
= options
.gpu_to_dir_latency
698 tcc_cntrl
.l2_response_latency
= options
.TCC_latency
699 tcc_cntrl_nodes
.append(tcc_cntrl
)
700 tcc_cntrl
.WB
= options
.WB_L2
701 tcc_cntrl
.number_of_TBEs
= 2560 * options
.num_compute_units
702 # the number_of_TBEs is inclusive of TBEs below
704 # Connect the TCC controllers to the ruby network
705 tcc_cntrl
.requestFromTCP
= MessageBuffer(ordered
= True)
706 tcc_cntrl
.requestFromTCP
.slave
= ruby_system
.network
.master
708 tcc_cntrl
.responseToCore
= MessageBuffer(ordered
= True)
709 tcc_cntrl
.responseToCore
.master
= ruby_system
.network
.slave
711 tcc_cntrl
.probeFromNB
= MessageBuffer()
712 tcc_cntrl
.probeFromNB
.slave
= ruby_system
.network
.master
714 tcc_cntrl
.responseFromNB
= MessageBuffer()
715 tcc_cntrl
.responseFromNB
.slave
= ruby_system
.network
.master
717 tcc_cntrl
.requestToNB
= MessageBuffer(ordered
= True)
718 tcc_cntrl
.requestToNB
.master
= ruby_system
.network
.slave
720 tcc_cntrl
.responseToNB
= MessageBuffer()
721 tcc_cntrl
.responseToNB
.master
= ruby_system
.network
.slave
723 tcc_cntrl
.unblockToNB
= MessageBuffer()
724 tcc_cntrl
.unblockToNB
.master
= ruby_system
.network
.slave
726 tcc_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
728 exec("ruby_system.tcc_cntrl%d = tcc_cntrl" % i
)
730 # connect all of the wire buffers between L3 and dirs up
731 # TCC cntrls added to the GPU cluster
732 gpuCluster
.add(tcc_cntrl
)
734 # Assuming no DMA devices
735 assert(len(dma_devices
) == 0)
737 # Add cpu/gpu clusters to main cluster
738 mainCluster
.add(cpuCluster
)
739 mainCluster
.add(gpuCluster
)
741 ruby_system
.network
.number_of_virtual_networks
= 10
743 return (cpu_sequencers
, dir_cntrl_nodes
, mainCluster
)