1 # Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
4 # For use for simulation and test purposes only
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
9 # 1. Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
12 # 2. Redistributions in binary form must reproduce the above copyright notice,
13 # this list of conditions and the following disclaimer in the documentation
14 # and/or other materials provided with the distribution.
16 # 3. Neither the name of the copyright holder nor the names of its
17 # contributors may be used to endorse or promote products derived from this
18 # software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
35 from m5
.objects
import *
36 from m5
.defines
import buildEnv
37 from m5
.util
import addToPath
38 from .Ruby
import create_topology
39 from .Ruby
import send_evicts
43 from topologies
.Cluster
import Cluster
44 from topologies
.Crossbar
import Crossbar
53 # Use SeqCount not class since we need global count
55 return CntrlBase
._seqs
- 1
60 # Use CntlCount not class since we need global count
61 CntrlBase
._cntrls
+= 1
62 return CntrlBase
._cntrls
- 1
66 def versionCount(cls
):
67 cls
._version
+= 1 # Use count for this particular type
68 return cls
._version
- 1
70 class L1Cache(RubyCache
):
71 resourceStalls
= False
76 def create(self
, size
, assoc
, options
):
77 self
.size
= MemorySize(size
)
79 self
.replacement_policy
= TreePLRURP()
81 class L2Cache(RubyCache
):
82 resourceStalls
= False
86 def create(self
, size
, assoc
, options
):
87 self
.size
= MemorySize(size
)
89 self
.replacement_policy
= TreePLRURP()
91 class CPCntrl(CorePair_Controller
, CntrlBase
):
93 def create(self
, options
, ruby_system
, system
):
94 self
.version
= self
.versionCount()
96 self
.L1Icache
= L1Cache()
97 self
.L1Icache
.create(options
.l1i_size
, options
.l1i_assoc
, options
)
98 self
.L1D0cache
= L1Cache()
99 self
.L1D0cache
.create(options
.l1d_size
, options
.l1d_assoc
, options
)
100 self
.L1D1cache
= L1Cache()
101 self
.L1D1cache
.create(options
.l1d_size
, options
.l1d_assoc
, options
)
102 self
.L2cache
= L2Cache()
103 self
.L2cache
.create(options
.l2_size
, options
.l2_assoc
, options
)
105 self
.sequencer
= RubySequencer()
106 self
.sequencer
.version
= self
.seqCount()
107 self
.sequencer
.icache
= self
.L1Icache
108 self
.sequencer
.dcache
= self
.L1D0cache
109 self
.sequencer
.ruby_system
= ruby_system
110 self
.sequencer
.coreid
= 0
111 self
.sequencer
.is_cpu_sequencer
= True
113 self
.sequencer1
= RubySequencer()
114 self
.sequencer1
.version
= self
.seqCount()
115 self
.sequencer1
.icache
= self
.L1Icache
116 self
.sequencer1
.dcache
= self
.L1D1cache
117 self
.sequencer1
.ruby_system
= ruby_system
118 self
.sequencer1
.coreid
= 1
119 self
.sequencer1
.is_cpu_sequencer
= True
121 self
.issue_latency
= options
.cpu_to_dir_latency
122 self
.send_evictions
= send_evicts(options
)
124 self
.ruby_system
= ruby_system
126 if options
.recycle_latency
:
127 self
.recycle_latency
= options
.recycle_latency
129 class TCPCache(RubyCache
):
132 dataArrayBanks
= 16 #number of data banks
133 tagArrayBanks
= 16 #number of tag banks
134 dataAccessLatency
= 4
136 def create(self
, options
):
137 self
.size
= MemorySize(options
.tcp_size
)
138 self
.assoc
= options
.tcp_assoc
139 self
.resourceStalls
= options
.no_tcc_resource_stalls
140 self
.replacement_policy
= TreePLRURP()
142 class TCPCntrl(TCP_Controller
, CntrlBase
):
144 def create(self
, options
, ruby_system
, system
):
145 self
.version
= self
.versionCount()
147 self
.L1cache
= TCPCache(tagAccessLatency
= options
.TCP_latency
,
148 dataAccessLatency
= options
.TCP_latency
)
149 self
.L1cache
.resourceStalls
= options
.no_resource_stalls
150 self
.L1cache
.create(options
)
151 self
.issue_latency
= 1
153 self
.coalescer
= VIPERCoalescer()
154 self
.coalescer
.version
= self
.seqCount()
155 self
.coalescer
.icache
= self
.L1cache
156 self
.coalescer
.dcache
= self
.L1cache
157 self
.coalescer
.ruby_system
= ruby_system
158 self
.coalescer
.support_inst_reqs
= False
159 self
.coalescer
.is_cpu_sequencer
= False
160 if options
.tcp_deadlock_threshold
:
161 self
.coalescer
.deadlock_threshold
= \
162 options
.tcp_deadlock_threshold
163 self
.coalescer
.max_coalesces_per_cycle
= \
164 options
.max_coalesces_per_cycle
166 self
.sequencer
= RubySequencer()
167 self
.sequencer
.version
= self
.seqCount()
168 self
.sequencer
.icache
= self
.L1cache
169 self
.sequencer
.dcache
= self
.L1cache
170 self
.sequencer
.ruby_system
= ruby_system
171 self
.sequencer
.is_cpu_sequencer
= True
173 self
.use_seq_not_coal
= False
175 self
.ruby_system
= ruby_system
177 if options
.recycle_latency
:
178 self
.recycle_latency
= options
.recycle_latency
180 def createCP(self
, options
, ruby_system
, system
):
181 self
.version
= self
.versionCount()
183 self
.L1cache
= TCPCache(tagAccessLatency
= options
.TCP_latency
,
184 dataAccessLatency
= options
.TCP_latency
)
185 self
.L1cache
.resourceStalls
= options
.no_resource_stalls
186 self
.L1cache
.create(options
)
187 self
.issue_latency
= 1
189 self
.coalescer
= VIPERCoalescer()
190 self
.coalescer
.version
= self
.seqCount()
191 self
.coalescer
.icache
= self
.L1cache
192 self
.coalescer
.dcache
= self
.L1cache
193 self
.coalescer
.ruby_system
= ruby_system
194 self
.coalescer
.support_inst_reqs
= False
195 self
.coalescer
.is_cpu_sequencer
= False
197 self
.sequencer
= RubySequencer()
198 self
.sequencer
.version
= self
.seqCount()
199 self
.sequencer
.icache
= self
.L1cache
200 self
.sequencer
.dcache
= self
.L1cache
201 self
.sequencer
.ruby_system
= ruby_system
202 self
.sequencer
.is_cpu_sequencer
= True
204 self
.use_seq_not_coal
= True
206 self
.ruby_system
= ruby_system
208 if options
.recycle_latency
:
209 self
.recycle_latency
= options
.recycle_latency
211 class SQCCache(RubyCache
):
214 dataAccessLatency
= 1
217 def create(self
, options
):
218 self
.size
= MemorySize(options
.sqc_size
)
219 self
.assoc
= options
.sqc_assoc
220 self
.replacement_policy
= TreePLRURP()
222 class SQCCntrl(SQC_Controller
, CntrlBase
):
224 def create(self
, options
, ruby_system
, system
):
225 self
.version
= self
.versionCount()
227 self
.L1cache
= SQCCache()
228 self
.L1cache
.create(options
)
229 self
.L1cache
.resourceStalls
= options
.no_resource_stalls
231 self
.sequencer
= RubySequencer()
233 self
.sequencer
.version
= self
.seqCount()
234 self
.sequencer
.icache
= self
.L1cache
235 self
.sequencer
.dcache
= self
.L1cache
236 self
.sequencer
.ruby_system
= ruby_system
237 self
.sequencer
.support_data_reqs
= False
238 self
.sequencer
.is_cpu_sequencer
= False
239 if options
.sqc_deadlock_threshold
:
240 self
.sequencer
.deadlock_threshold
= \
241 options
.sqc_deadlock_threshold
243 self
.ruby_system
= ruby_system
245 if options
.recycle_latency
:
246 self
.recycle_latency
= options
.recycle_latency
248 class TCC(RubyCache
):
249 size
= MemorySize("256kB")
251 dataAccessLatency
= 8
253 resourceStalls
= True
254 def create(self
, options
):
255 self
.assoc
= options
.tcc_assoc
256 if hasattr(options
, 'bw_scalor') and options
.bw_scalor
> 0:
257 s
= options
.num_compute_units
259 tcc_size
= str(tcc_size
)+'kB'
260 self
.size
= MemorySize(tcc_size
)
261 self
.dataArrayBanks
= 64
262 self
.tagArrayBanks
= 64
264 self
.size
= MemorySize(options
.tcc_size
)
265 self
.dataArrayBanks
= 256 / options
.num_tccs
#number of data banks
266 self
.tagArrayBanks
= 256 / options
.num_tccs
#number of tag banks
267 self
.size
.value
= self
.size
.value
/ options
.num_tccs
268 if ((self
.size
.value
/ long(self
.assoc
)) < 128):
269 self
.size
.value
= long(128 * self
.assoc
)
270 self
.start_index_bit
= math
.log(options
.cacheline_size
, 2) + \
271 math
.log(options
.num_tccs
, 2)
272 self
.replacement_policy
= TreePLRURP()
275 class TCCCntrl(TCC_Controller
, CntrlBase
):
276 def create(self
, options
, ruby_system
, system
):
277 self
.version
= self
.versionCount()
279 self
.L2cache
.create(options
)
280 self
.L2cache
.resourceStalls
= options
.no_tcc_resource_stalls
282 self
.ruby_system
= ruby_system
284 if options
.recycle_latency
:
285 self
.recycle_latency
= options
.recycle_latency
287 class L3Cache(RubyCache
):
291 def create(self
, options
, ruby_system
, system
):
292 self
.size
= MemorySize(options
.l3_size
)
293 self
.size
.value
/= options
.num_dirs
294 self
.assoc
= options
.l3_assoc
295 self
.dataArrayBanks
/= options
.num_dirs
296 self
.tagArrayBanks
/= options
.num_dirs
297 self
.dataArrayBanks
/= options
.num_dirs
298 self
.tagArrayBanks
/= options
.num_dirs
299 self
.dataAccessLatency
= options
.l3_data_latency
300 self
.tagAccessLatency
= options
.l3_tag_latency
301 self
.resourceStalls
= False
302 self
.replacement_policy
= TreePLRURP()
304 class L3Cntrl(L3Cache_Controller
, CntrlBase
):
305 def create(self
, options
, ruby_system
, system
):
306 self
.version
= self
.versionCount()
307 self
.L3cache
= L3Cache()
308 self
.L3cache
.create(options
, ruby_system
, system
)
310 self
.l3_response_latency
= max(self
.L3cache
.dataAccessLatency
, self
.L3cache
.tagAccessLatency
)
311 self
.ruby_system
= ruby_system
313 if options
.recycle_latency
:
314 self
.recycle_latency
= options
.recycle_latency
316 def connectWireBuffers(self
, req_to_dir
, resp_to_dir
, l3_unblock_to_dir
,
317 req_to_l3
, probe_to_l3
, resp_to_l3
):
318 self
.reqToDir
= req_to_dir
319 self
.respToDir
= resp_to_dir
320 self
.l3UnblockToDir
= l3_unblock_to_dir
321 self
.reqToL3
= req_to_l3
322 self
.probeToL3
= probe_to_l3
323 self
.respToL3
= resp_to_l3
325 class DirMem(RubyDirectoryMemory
, CntrlBase
):
326 def create(self
, options
, ruby_system
, system
):
327 self
.version
= self
.versionCount()
329 phys_mem_size
= AddrRange(options
.mem_size
).size()
330 mem_module_size
= phys_mem_size
/ options
.num_dirs
331 dir_size
= MemorySize('0B')
332 dir_size
.value
= mem_module_size
335 class DirCntrl(Directory_Controller
, CntrlBase
):
336 def create(self
, options
, ruby_system
, system
):
337 self
.version
= self
.versionCount()
339 self
.response_latency
= 30
341 self
.directory
= DirMem()
342 self
.directory
.create(options
, ruby_system
, system
)
344 self
.L3CacheMemory
= L3Cache()
345 self
.L3CacheMemory
.create(options
, ruby_system
, system
)
347 self
.l3_hit_latency
= max(self
.L3CacheMemory
.dataAccessLatency
,
348 self
.L3CacheMemory
.tagAccessLatency
)
350 self
.number_of_TBEs
= options
.num_tbes
352 self
.ruby_system
= ruby_system
354 if options
.recycle_latency
:
355 self
.recycle_latency
= options
.recycle_latency
357 def connectWireBuffers(self
, req_to_dir
, resp_to_dir
, l3_unblock_to_dir
,
358 req_to_l3
, probe_to_l3
, resp_to_l3
):
359 self
.reqToDir
= req_to_dir
360 self
.respToDir
= resp_to_dir
361 self
.l3UnblockToDir
= l3_unblock_to_dir
362 self
.reqToL3
= req_to_l3
363 self
.probeToL3
= probe_to_l3
364 self
.respToL3
= resp_to_l3
366 def define_options(parser
):
367 parser
.add_option("--num-subcaches", type = "int", default
= 4)
368 parser
.add_option("--l3-data-latency", type = "int", default
= 20)
369 parser
.add_option("--l3-tag-latency", type = "int", default
= 15)
370 parser
.add_option("--cpu-to-dir-latency", type = "int", default
= 120)
371 parser
.add_option("--gpu-to-dir-latency", type = "int", default
= 120)
372 parser
.add_option("--no-resource-stalls", action
= "store_false",
374 parser
.add_option("--no-tcc-resource-stalls", action
= "store_false",
376 parser
.add_option("--use-L3-on-WT", action
= "store_true", default
= False)
377 parser
.add_option("--num-tbes", type = "int", default
= 256)
378 parser
.add_option("--l2-latency", type = "int", default
= 50) # load to use
379 parser
.add_option("--num-tccs", type = "int", default
= 1,
380 help = "number of TCC banks in the GPU")
381 parser
.add_option("--sqc-size", type = 'string', default
= '32kB',
382 help = "SQC cache size")
383 parser
.add_option("--sqc-assoc", type = 'int', default
= 8,
384 help = "SQC cache assoc")
385 parser
.add_option("--sqc-deadlock-threshold", type='int',
386 help="Set the SQC deadlock threshold to some value")
388 parser
.add_option("--WB_L1", action
= "store_true", default
= False,
389 help = "writeback L1")
390 parser
.add_option("--WB_L2", action
= "store_true", default
= False,
391 help = "writeback L2")
392 parser
.add_option("--TCP_latency", type = "int", default
= 4,
393 help = "TCP latency")
394 parser
.add_option("--TCC_latency", type = "int", default
= 16,
395 help = "TCC latency")
396 parser
.add_option("--tcc-size", type = 'string', default
= '256kB',
397 help = "agregate tcc size")
398 parser
.add_option("--tcc-assoc", type = 'int', default
= 16,
400 parser
.add_option("--tcp-size", type = 'string', default
= '16kB',
402 parser
.add_option("--tcp-assoc", type = 'int', default
= 16,
404 parser
.add_option("--tcp-deadlock-threshold", type='int',
405 help="Set the TCP deadlock threshold to some value")
406 parser
.add_option("--max-coalesces-per-cycle", type="int", default
=1,
407 help="Maximum insts that may coalesce in a cycle");
409 parser
.add_option("--noL1", action
= "store_true", default
= False,
412 def create_system(options
, full_system
, system
, dma_devices
, bootmem
,
414 if buildEnv
['PROTOCOL'] != 'GPU_VIPER':
415 panic("This script requires the GPU_VIPER protocol to be built.")
420 # The ruby network creation expects the list of nodes in the system to be
421 # consistent with the NetDest list. Therefore the l1 controller nodes
422 # must be listed before the directory nodes and directory nodes before
433 # Must create the individual controllers before the network to ensure the
434 # controller constructors are called before the network constructor
437 # For an odd number of CPUs, still create the right number of controllers
438 TCC_bits
= int(math
.log(options
.num_tccs
, 2))
440 # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
444 if hasattr(options
, 'bw_scalor') and options
.bw_scalor
> 0:
445 #Assuming a 2GHz clock
446 crossbar_bw
= 16 * options
.num_compute_units
* options
.bw_scalor
447 mainCluster
= Cluster(intBW
=crossbar_bw
)
449 mainCluster
= Cluster(intBW
=8) # 16 GB/s
450 for i
in range(options
.num_dirs
):
452 dir_cntrl
= DirCntrl(noTCCdir
= True, TCC_select_num_bits
= TCC_bits
)
453 dir_cntrl
.create(options
, ruby_system
, system
)
454 dir_cntrl
.number_of_TBEs
= options
.num_tbes
455 dir_cntrl
.useL3OnWT
= options
.use_L3_on_WT
456 # the number_of_TBEs is inclusive of TBEs below
458 # Connect the Directory controller to the ruby network
459 dir_cntrl
.requestFromCores
= MessageBuffer(ordered
= True)
460 dir_cntrl
.requestFromCores
.slave
= ruby_system
.network
.master
462 dir_cntrl
.responseFromCores
= MessageBuffer()
463 dir_cntrl
.responseFromCores
.slave
= ruby_system
.network
.master
465 dir_cntrl
.unblockFromCores
= MessageBuffer()
466 dir_cntrl
.unblockFromCores
.slave
= ruby_system
.network
.master
468 dir_cntrl
.probeToCore
= MessageBuffer()
469 dir_cntrl
.probeToCore
.master
= ruby_system
.network
.slave
471 dir_cntrl
.responseToCore
= MessageBuffer()
472 dir_cntrl
.responseToCore
.master
= ruby_system
.network
.slave
474 dir_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
475 dir_cntrl
.L3triggerQueue
= MessageBuffer(ordered
= True)
476 dir_cntrl
.requestToMemory
= MessageBuffer()
477 dir_cntrl
.responseFromMemory
= MessageBuffer()
479 dir_cntrl
.requestFromDMA
= MessageBuffer(ordered
=True)
480 dir_cntrl
.requestFromDMA
.slave
= ruby_system
.network
.master
482 dir_cntrl
.responseToDMA
= MessageBuffer()
483 dir_cntrl
.responseToDMA
.master
= ruby_system
.network
.slave
485 dir_cntrl
.requestToMemory
= MessageBuffer()
486 dir_cntrl
.responseFromMemory
= MessageBuffer()
488 exec("ruby_system.dir_cntrl%d = dir_cntrl" % i
)
489 dir_cntrl_nodes
.append(dir_cntrl
)
491 mainCluster
.add(dir_cntrl
)
494 if hasattr(options
, 'bw_scalor') and options
.bw_scalor
> 0:
495 cpuCluster
= Cluster(extBW
= crossbar_bw
, intBW
= crossbar_bw
)
497 cpuCluster
= Cluster(extBW
= 8, intBW
= 8) # 16 GB/s
498 for i
in range((options
.num_cpus
+ 1) // 2):
501 cp_cntrl
.create(options
, ruby_system
, system
)
503 exec("ruby_system.cp_cntrl%d = cp_cntrl" % i
)
505 # Add controllers and sequencers to the appropriate lists
507 cpu_sequencers
.extend([cp_cntrl
.sequencer
, cp_cntrl
.sequencer1
])
509 # Connect the CP controllers and the network
510 cp_cntrl
.requestFromCore
= MessageBuffer()
511 cp_cntrl
.requestFromCore
.master
= ruby_system
.network
.slave
513 cp_cntrl
.responseFromCore
= MessageBuffer()
514 cp_cntrl
.responseFromCore
.master
= ruby_system
.network
.slave
516 cp_cntrl
.unblockFromCore
= MessageBuffer()
517 cp_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
519 cp_cntrl
.probeToCore
= MessageBuffer()
520 cp_cntrl
.probeToCore
.slave
= ruby_system
.network
.master
522 cp_cntrl
.responseToCore
= MessageBuffer()
523 cp_cntrl
.responseToCore
.slave
= ruby_system
.network
.master
525 cp_cntrl
.mandatoryQueue
= MessageBuffer()
526 cp_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
528 cpuCluster
.add(cp_cntrl
)
530 # Register CPUs and caches for each CorePair and directory (SE mode only)
532 for i
in range((options
.num_cpus
+ 1) // 2):
533 FileSystemConfig
.register_cpu(physical_package_id
= 0,
535 range(options
.num_cpus
),
537 thread_siblings
= [])
539 FileSystemConfig
.register_cpu(physical_package_id
= 0,
541 range(options
.num_cpus
),
543 thread_siblings
= [])
545 FileSystemConfig
.register_cache(level
= 0,
546 idu_type
= 'Instruction',
547 size
= options
.l1i_size
,
548 line_size
= options
.cacheline_size
,
549 assoc
= options
.l1i_assoc
,
552 FileSystemConfig
.register_cache(level
= 0,
554 size
= options
.l1d_size
,
555 line_size
= options
.cacheline_size
,
556 assoc
= options
.l1d_assoc
,
559 FileSystemConfig
.register_cache(level
= 0,
561 size
= options
.l1d_size
,
562 line_size
= options
.cacheline_size
,
563 assoc
= options
.l1d_assoc
,
566 FileSystemConfig
.register_cache(level
= 1,
567 idu_type
= 'Unified',
568 size
= options
.l2_size
,
569 line_size
= options
.cacheline_size
,
570 assoc
= options
.l2_assoc
,
573 for i
in range(options
.num_dirs
):
574 FileSystemConfig
.register_cache(level
= 2,
575 idu_type
= 'Unified',
576 size
= options
.l3_size
,
577 line_size
= options
.cacheline_size
,
578 assoc
= options
.l3_assoc
,
580 range(options
.num_cpus
)])
583 if hasattr(options
, 'bw_scalor') and options
.bw_scalor
> 0:
584 gpuCluster
= Cluster(extBW
= crossbar_bw
, intBW
= crossbar_bw
)
586 gpuCluster
= Cluster(extBW
= 8, intBW
= 8) # 16 GB/s
587 for i
in range(options
.num_compute_units
):
589 tcp_cntrl
= TCPCntrl(TCC_select_num_bits
= TCC_bits
,
591 number_of_TBEs
= 2560)
592 # TBEs set to max outstanding requests
593 tcp_cntrl
.create(options
, ruby_system
, system
)
594 tcp_cntrl
.WB
= options
.WB_L1
595 tcp_cntrl
.disableL1
= options
.noL1
596 tcp_cntrl
.L1cache
.tagAccessLatency
= options
.TCP_latency
597 tcp_cntrl
.L1cache
.dataAccessLatency
= options
.TCP_latency
599 exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % i
)
601 # Add controllers and sequencers to the appropriate lists
603 cpu_sequencers
.append(tcp_cntrl
.coalescer
)
604 tcp_cntrl_nodes
.append(tcp_cntrl
)
606 # Connect the TCP controller to the ruby network
607 tcp_cntrl
.requestFromTCP
= MessageBuffer(ordered
= True)
608 tcp_cntrl
.requestFromTCP
.master
= ruby_system
.network
.slave
610 tcp_cntrl
.responseFromTCP
= MessageBuffer(ordered
= True)
611 tcp_cntrl
.responseFromTCP
.master
= ruby_system
.network
.slave
613 tcp_cntrl
.unblockFromCore
= MessageBuffer()
614 tcp_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
616 tcp_cntrl
.probeToTCP
= MessageBuffer(ordered
= True)
617 tcp_cntrl
.probeToTCP
.slave
= ruby_system
.network
.master
619 tcp_cntrl
.responseToTCP
= MessageBuffer(ordered
= True)
620 tcp_cntrl
.responseToTCP
.slave
= ruby_system
.network
.master
622 tcp_cntrl
.mandatoryQueue
= MessageBuffer()
624 gpuCluster
.add(tcp_cntrl
)
626 for i
in range(options
.num_sqc
):
628 sqc_cntrl
= SQCCntrl(TCC_select_num_bits
= TCC_bits
)
629 sqc_cntrl
.create(options
, ruby_system
, system
)
631 exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % i
)
633 # Add controllers and sequencers to the appropriate lists
635 cpu_sequencers
.append(sqc_cntrl
.sequencer
)
637 # Connect the SQC controller to the ruby network
638 sqc_cntrl
.requestFromSQC
= MessageBuffer(ordered
= True)
639 sqc_cntrl
.requestFromSQC
.master
= ruby_system
.network
.slave
641 sqc_cntrl
.probeToSQC
= MessageBuffer(ordered
= True)
642 sqc_cntrl
.probeToSQC
.slave
= ruby_system
.network
.master
644 sqc_cntrl
.responseToSQC
= MessageBuffer(ordered
= True)
645 sqc_cntrl
.responseToSQC
.slave
= ruby_system
.network
.master
647 sqc_cntrl
.mandatoryQueue
= MessageBuffer()
649 # SQC also in GPU cluster
650 gpuCluster
.add(sqc_cntrl
)
652 for i
in xrange(options
.num_scalar_cache
):
653 scalar_cntrl
= SQCCntrl(TCC_select_num_bits
= TCC_bits
)
654 scalar_cntrl
.create(options
, ruby_system
, system
)
656 exec('ruby_system.scalar_cntrl%d = scalar_cntrl' % i
)
658 cpu_sequencers
.append(scalar_cntrl
.sequencer
)
660 scalar_cntrl
.requestFromSQC
= MessageBuffer(ordered
= True)
661 scalar_cntrl
.requestFromSQC
.master
= ruby_system
.network
.slave
663 scalar_cntrl
.probeToSQC
= MessageBuffer(ordered
= True)
664 scalar_cntrl
.probeToSQC
.slave
= ruby_system
.network
.master
666 scalar_cntrl
.responseToSQC
= MessageBuffer(ordered
= True)
667 scalar_cntrl
.responseToSQC
.slave
= ruby_system
.network
.master
669 scalar_cntrl
.mandatoryQueue
= \
670 MessageBuffer(buffer_size
=options
.buffers_size
)
672 gpuCluster
.add(scalar_cntrl
)
674 for i
in xrange(options
.num_cp
):
676 tcp_ID
= options
.num_compute_units
+ i
677 sqc_ID
= options
.num_sqc
+ i
679 tcp_cntrl
= TCPCntrl(TCC_select_num_bits
= TCC_bits
,
681 number_of_TBEs
= 2560)
682 # TBEs set to max outstanding requests
683 tcp_cntrl
.createCP(options
, ruby_system
, system
)
684 tcp_cntrl
.WB
= options
.WB_L1
685 tcp_cntrl
.disableL1
= options
.noL1
686 tcp_cntrl
.L1cache
.tagAccessLatency
= options
.TCP_latency
687 tcp_cntrl
.L1cache
.dataAccessLatency
= options
.TCP_latency
689 exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % tcp_ID
)
691 # Add controllers and sequencers to the appropriate lists
693 cpu_sequencers
.append(tcp_cntrl
.sequencer
)
694 tcp_cntrl_nodes
.append(tcp_cntrl
)
696 # Connect the CP (TCP) controllers to the ruby network
697 tcp_cntrl
.requestFromTCP
= MessageBuffer(ordered
= True)
698 tcp_cntrl
.requestFromTCP
.master
= ruby_system
.network
.slave
700 tcp_cntrl
.responseFromTCP
= MessageBuffer(ordered
= True)
701 tcp_cntrl
.responseFromTCP
.master
= ruby_system
.network
.slave
703 tcp_cntrl
.unblockFromCore
= MessageBuffer(ordered
= True)
704 tcp_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
706 tcp_cntrl
.probeToTCP
= MessageBuffer(ordered
= True)
707 tcp_cntrl
.probeToTCP
.slave
= ruby_system
.network
.master
709 tcp_cntrl
.responseToTCP
= MessageBuffer(ordered
= True)
710 tcp_cntrl
.responseToTCP
.slave
= ruby_system
.network
.master
712 tcp_cntrl
.mandatoryQueue
= MessageBuffer()
714 gpuCluster
.add(tcp_cntrl
)
716 sqc_cntrl
= SQCCntrl(TCC_select_num_bits
= TCC_bits
)
717 sqc_cntrl
.create(options
, ruby_system
, system
)
719 exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % sqc_ID
)
721 # Add controllers and sequencers to the appropriate lists
723 cpu_sequencers
.append(sqc_cntrl
.sequencer
)
725 # SQC also in GPU cluster
726 gpuCluster
.add(sqc_cntrl
)
728 for i
in range(options
.num_tccs
):
730 tcc_cntrl
= TCCCntrl(l2_response_latency
= options
.TCC_latency
)
731 tcc_cntrl
.create(options
, ruby_system
, system
)
732 tcc_cntrl
.l2_request_latency
= options
.gpu_to_dir_latency
733 tcc_cntrl
.l2_response_latency
= options
.TCC_latency
734 tcc_cntrl_nodes
.append(tcc_cntrl
)
735 tcc_cntrl
.WB
= options
.WB_L2
736 tcc_cntrl
.number_of_TBEs
= 2560 * options
.num_compute_units
737 # the number_of_TBEs is inclusive of TBEs below
739 # Connect the TCC controllers to the ruby network
740 tcc_cntrl
.requestFromTCP
= MessageBuffer(ordered
= True)
741 tcc_cntrl
.requestFromTCP
.slave
= ruby_system
.network
.master
743 tcc_cntrl
.responseToCore
= MessageBuffer(ordered
= True)
744 tcc_cntrl
.responseToCore
.master
= ruby_system
.network
.slave
746 tcc_cntrl
.probeFromNB
= MessageBuffer()
747 tcc_cntrl
.probeFromNB
.slave
= ruby_system
.network
.master
749 tcc_cntrl
.responseFromNB
= MessageBuffer()
750 tcc_cntrl
.responseFromNB
.slave
= ruby_system
.network
.master
752 tcc_cntrl
.requestToNB
= MessageBuffer(ordered
= True)
753 tcc_cntrl
.requestToNB
.master
= ruby_system
.network
.slave
755 tcc_cntrl
.responseToNB
= MessageBuffer()
756 tcc_cntrl
.responseToNB
.master
= ruby_system
.network
.slave
758 tcc_cntrl
.unblockToNB
= MessageBuffer()
759 tcc_cntrl
.unblockToNB
.master
= ruby_system
.network
.slave
761 tcc_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
763 exec("ruby_system.tcc_cntrl%d = tcc_cntrl" % i
)
765 # connect all of the wire buffers between L3 and dirs up
766 # TCC cntrls added to the GPU cluster
767 gpuCluster
.add(tcc_cntrl
)
769 for i
, dma_device
in enumerate(dma_devices
):
770 dma_seq
= DMASequencer(version
=i
, ruby_system
=ruby_system
)
771 dma_cntrl
= DMA_Controller(version
=i
, dma_sequencer
=dma_seq
,
772 ruby_system
=ruby_system
)
773 exec('system.dma_cntrl%d = dma_cntrl' % i
)
774 if dma_device
.type == 'MemTest':
775 exec('system.dma_cntrl%d.dma_sequencer.slave = dma_devices.test'
778 exec('system.dma_cntrl%d.dma_sequencer.slave = dma_device.dma' % i
)
779 dma_cntrl
.requestToDir
= MessageBuffer(buffer_size
=0)
780 dma_cntrl
.requestToDir
.master
= ruby_system
.network
.slave
781 dma_cntrl
.responseFromDir
= MessageBuffer(buffer_size
=0)
782 dma_cntrl
.responseFromDir
.slave
= ruby_system
.network
.master
783 dma_cntrl
.mandatoryQueue
= MessageBuffer(buffer_size
= 0)
784 gpuCluster
.add(dma_cntrl
)
786 # Add cpu/gpu clusters to main cluster
787 mainCluster
.add(cpuCluster
)
788 mainCluster
.add(gpuCluster
)
790 ruby_system
.network
.number_of_virtual_networks
= 11
792 return (cpu_sequencers
, dir_cntrl_nodes
, mainCluster
)