configs, mem: Support running VIPER with GCN3
[gem5.git] / configs / ruby / GPU_VIPER.py
1 # Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
2 # All rights reserved.
3 #
4 # For use for simulation and test purposes only
5 #
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
8 #
9 # 1. Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
11 #
12 # 2. Redistributions in binary form must reproduce the above copyright notice,
13 # this list of conditions and the following disclaimer in the documentation
14 # and/or other materials provided with the distribution.
15 #
16 # 3. Neither the name of the copyright holder nor the names of its
17 # contributors may be used to endorse or promote products derived from this
18 # software without specific prior written permission.
19 #
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
31
32 import six
33 import math
34 import m5
35 from m5.objects import *
36 from m5.defines import buildEnv
37 from m5.util import addToPath
38 from .Ruby import create_topology
39 from .Ruby import send_evicts
40
41 addToPath('../')
42
43 from topologies.Cluster import Cluster
44 from topologies.Crossbar import Crossbar
45
46 if six.PY3:
47 long = int
48
49 class CntrlBase:
50 _seqs = 0
51 @classmethod
52 def seqCount(cls):
53 # Use SeqCount not class since we need global count
54 CntrlBase._seqs += 1
55 return CntrlBase._seqs - 1
56
57 _cntrls = 0
58 @classmethod
59 def cntrlCount(cls):
60 # Use CntlCount not class since we need global count
61 CntrlBase._cntrls += 1
62 return CntrlBase._cntrls - 1
63
64 _version = 0
65 @classmethod
66 def versionCount(cls):
67 cls._version += 1 # Use count for this particular type
68 return cls._version - 1
69
70 class L1Cache(RubyCache):
71 resourceStalls = False
72 dataArrayBanks = 2
73 tagArrayBanks = 2
74 dataAccessLatency = 1
75 tagAccessLatency = 1
76 def create(self, size, assoc, options):
77 self.size = MemorySize(size)
78 self.assoc = assoc
79 self.replacement_policy = TreePLRURP()
80
81 class L2Cache(RubyCache):
82 resourceStalls = False
83 assoc = 16
84 dataArrayBanks = 16
85 tagArrayBanks = 16
86 def create(self, size, assoc, options):
87 self.size = MemorySize(size)
88 self.assoc = assoc
89 self.replacement_policy = TreePLRURP()
90
91 class CPCntrl(CorePair_Controller, CntrlBase):
92
93 def create(self, options, ruby_system, system):
94 self.version = self.versionCount()
95
96 self.L1Icache = L1Cache()
97 self.L1Icache.create(options.l1i_size, options.l1i_assoc, options)
98 self.L1D0cache = L1Cache()
99 self.L1D0cache.create(options.l1d_size, options.l1d_assoc, options)
100 self.L1D1cache = L1Cache()
101 self.L1D1cache.create(options.l1d_size, options.l1d_assoc, options)
102 self.L2cache = L2Cache()
103 self.L2cache.create(options.l2_size, options.l2_assoc, options)
104
105 self.sequencer = RubySequencer()
106 self.sequencer.version = self.seqCount()
107 self.sequencer.icache = self.L1Icache
108 self.sequencer.dcache = self.L1D0cache
109 self.sequencer.ruby_system = ruby_system
110 self.sequencer.coreid = 0
111 self.sequencer.is_cpu_sequencer = True
112
113 self.sequencer1 = RubySequencer()
114 self.sequencer1.version = self.seqCount()
115 self.sequencer1.icache = self.L1Icache
116 self.sequencer1.dcache = self.L1D1cache
117 self.sequencer1.ruby_system = ruby_system
118 self.sequencer1.coreid = 1
119 self.sequencer1.is_cpu_sequencer = True
120
121 self.issue_latency = options.cpu_to_dir_latency
122 self.send_evictions = send_evicts(options)
123
124 self.ruby_system = ruby_system
125
126 if options.recycle_latency:
127 self.recycle_latency = options.recycle_latency
128
129 class TCPCache(RubyCache):
130 size = "16kB"
131 assoc = 16
132 dataArrayBanks = 16 #number of data banks
133 tagArrayBanks = 16 #number of tag banks
134 dataAccessLatency = 4
135 tagAccessLatency = 1
136 def create(self, options):
137 self.size = MemorySize(options.tcp_size)
138 self.assoc = options.tcp_assoc
139 self.resourceStalls = options.no_tcc_resource_stalls
140 self.replacement_policy = TreePLRURP()
141
142 class TCPCntrl(TCP_Controller, CntrlBase):
143
144 def create(self, options, ruby_system, system):
145 self.version = self.versionCount()
146
147 self.L1cache = TCPCache(tagAccessLatency = options.TCP_latency,
148 dataAccessLatency = options.TCP_latency)
149 self.L1cache.resourceStalls = options.no_resource_stalls
150 self.L1cache.create(options)
151 self.issue_latency = 1
152
153 self.coalescer = VIPERCoalescer()
154 self.coalescer.version = self.seqCount()
155 self.coalescer.icache = self.L1cache
156 self.coalescer.dcache = self.L1cache
157 self.coalescer.ruby_system = ruby_system
158 self.coalescer.support_inst_reqs = False
159 self.coalescer.is_cpu_sequencer = False
160 if options.tcp_deadlock_threshold:
161 self.coalescer.deadlock_threshold = \
162 options.tcp_deadlock_threshold
163 self.coalescer.max_coalesces_per_cycle = \
164 options.max_coalesces_per_cycle
165
166 self.sequencer = RubySequencer()
167 self.sequencer.version = self.seqCount()
168 self.sequencer.icache = self.L1cache
169 self.sequencer.dcache = self.L1cache
170 self.sequencer.ruby_system = ruby_system
171 self.sequencer.is_cpu_sequencer = True
172
173 self.use_seq_not_coal = False
174
175 self.ruby_system = ruby_system
176
177 if options.recycle_latency:
178 self.recycle_latency = options.recycle_latency
179
180 def createCP(self, options, ruby_system, system):
181 self.version = self.versionCount()
182
183 self.L1cache = TCPCache(tagAccessLatency = options.TCP_latency,
184 dataAccessLatency = options.TCP_latency)
185 self.L1cache.resourceStalls = options.no_resource_stalls
186 self.L1cache.create(options)
187 self.issue_latency = 1
188
189 self.coalescer = VIPERCoalescer()
190 self.coalescer.version = self.seqCount()
191 self.coalescer.icache = self.L1cache
192 self.coalescer.dcache = self.L1cache
193 self.coalescer.ruby_system = ruby_system
194 self.coalescer.support_inst_reqs = False
195 self.coalescer.is_cpu_sequencer = False
196
197 self.sequencer = RubySequencer()
198 self.sequencer.version = self.seqCount()
199 self.sequencer.icache = self.L1cache
200 self.sequencer.dcache = self.L1cache
201 self.sequencer.ruby_system = ruby_system
202 self.sequencer.is_cpu_sequencer = True
203
204 self.use_seq_not_coal = True
205
206 self.ruby_system = ruby_system
207
208 if options.recycle_latency:
209 self.recycle_latency = options.recycle_latency
210
211 class SQCCache(RubyCache):
212 dataArrayBanks = 8
213 tagArrayBanks = 8
214 dataAccessLatency = 1
215 tagAccessLatency = 1
216
217 def create(self, options):
218 self.size = MemorySize(options.sqc_size)
219 self.assoc = options.sqc_assoc
220 self.replacement_policy = TreePLRURP()
221
222 class SQCCntrl(SQC_Controller, CntrlBase):
223
224 def create(self, options, ruby_system, system):
225 self.version = self.versionCount()
226
227 self.L1cache = SQCCache()
228 self.L1cache.create(options)
229 self.L1cache.resourceStalls = options.no_resource_stalls
230
231 self.sequencer = RubySequencer()
232
233 self.sequencer.version = self.seqCount()
234 self.sequencer.icache = self.L1cache
235 self.sequencer.dcache = self.L1cache
236 self.sequencer.ruby_system = ruby_system
237 self.sequencer.support_data_reqs = False
238 self.sequencer.is_cpu_sequencer = False
239 if options.sqc_deadlock_threshold:
240 self.sequencer.deadlock_threshold = \
241 options.sqc_deadlock_threshold
242
243 self.ruby_system = ruby_system
244
245 if options.recycle_latency:
246 self.recycle_latency = options.recycle_latency
247
248 class TCC(RubyCache):
249 size = MemorySize("256kB")
250 assoc = 16
251 dataAccessLatency = 8
252 tagAccessLatency = 2
253 resourceStalls = True
254 def create(self, options):
255 self.assoc = options.tcc_assoc
256 if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
257 s = options.num_compute_units
258 tcc_size = s * 128
259 tcc_size = str(tcc_size)+'kB'
260 self.size = MemorySize(tcc_size)
261 self.dataArrayBanks = 64
262 self.tagArrayBanks = 64
263 else:
264 self.size = MemorySize(options.tcc_size)
265 self.dataArrayBanks = 256 / options.num_tccs #number of data banks
266 self.tagArrayBanks = 256 / options.num_tccs #number of tag banks
267 self.size.value = self.size.value / options.num_tccs
268 if ((self.size.value / long(self.assoc)) < 128):
269 self.size.value = long(128 * self.assoc)
270 self.start_index_bit = math.log(options.cacheline_size, 2) + \
271 math.log(options.num_tccs, 2)
272 self.replacement_policy = TreePLRURP()
273
274
275 class TCCCntrl(TCC_Controller, CntrlBase):
276 def create(self, options, ruby_system, system):
277 self.version = self.versionCount()
278 self.L2cache = TCC()
279 self.L2cache.create(options)
280 self.L2cache.resourceStalls = options.no_tcc_resource_stalls
281
282 self.ruby_system = ruby_system
283
284 if options.recycle_latency:
285 self.recycle_latency = options.recycle_latency
286
287 class L3Cache(RubyCache):
288 dataArrayBanks = 16
289 tagArrayBanks = 16
290
291 def create(self, options, ruby_system, system):
292 self.size = MemorySize(options.l3_size)
293 self.size.value /= options.num_dirs
294 self.assoc = options.l3_assoc
295 self.dataArrayBanks /= options.num_dirs
296 self.tagArrayBanks /= options.num_dirs
297 self.dataArrayBanks /= options.num_dirs
298 self.tagArrayBanks /= options.num_dirs
299 self.dataAccessLatency = options.l3_data_latency
300 self.tagAccessLatency = options.l3_tag_latency
301 self.resourceStalls = False
302 self.replacement_policy = TreePLRURP()
303
304 class L3Cntrl(L3Cache_Controller, CntrlBase):
305 def create(self, options, ruby_system, system):
306 self.version = self.versionCount()
307 self.L3cache = L3Cache()
308 self.L3cache.create(options, ruby_system, system)
309
310 self.l3_response_latency = max(self.L3cache.dataAccessLatency, self.L3cache.tagAccessLatency)
311 self.ruby_system = ruby_system
312
313 if options.recycle_latency:
314 self.recycle_latency = options.recycle_latency
315
316 def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
317 req_to_l3, probe_to_l3, resp_to_l3):
318 self.reqToDir = req_to_dir
319 self.respToDir = resp_to_dir
320 self.l3UnblockToDir = l3_unblock_to_dir
321 self.reqToL3 = req_to_l3
322 self.probeToL3 = probe_to_l3
323 self.respToL3 = resp_to_l3
324
325 class DirMem(RubyDirectoryMemory, CntrlBase):
326 def create(self, options, ruby_system, system):
327 self.version = self.versionCount()
328
329 phys_mem_size = AddrRange(options.mem_size).size()
330 mem_module_size = phys_mem_size / options.num_dirs
331 dir_size = MemorySize('0B')
332 dir_size.value = mem_module_size
333 self.size = dir_size
334
335 class DirCntrl(Directory_Controller, CntrlBase):
336 def create(self, options, ruby_system, system):
337 self.version = self.versionCount()
338
339 self.response_latency = 30
340
341 self.directory = DirMem()
342 self.directory.create(options, ruby_system, system)
343
344 self.L3CacheMemory = L3Cache()
345 self.L3CacheMemory.create(options, ruby_system, system)
346
347 self.l3_hit_latency = max(self.L3CacheMemory.dataAccessLatency,
348 self.L3CacheMemory.tagAccessLatency)
349
350 self.number_of_TBEs = options.num_tbes
351
352 self.ruby_system = ruby_system
353
354 if options.recycle_latency:
355 self.recycle_latency = options.recycle_latency
356
357 def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
358 req_to_l3, probe_to_l3, resp_to_l3):
359 self.reqToDir = req_to_dir
360 self.respToDir = resp_to_dir
361 self.l3UnblockToDir = l3_unblock_to_dir
362 self.reqToL3 = req_to_l3
363 self.probeToL3 = probe_to_l3
364 self.respToL3 = resp_to_l3
365
366 def define_options(parser):
367 parser.add_option("--num-subcaches", type = "int", default = 4)
368 parser.add_option("--l3-data-latency", type = "int", default = 20)
369 parser.add_option("--l3-tag-latency", type = "int", default = 15)
370 parser.add_option("--cpu-to-dir-latency", type = "int", default = 120)
371 parser.add_option("--gpu-to-dir-latency", type = "int", default = 120)
372 parser.add_option("--no-resource-stalls", action = "store_false",
373 default = True)
374 parser.add_option("--no-tcc-resource-stalls", action = "store_false",
375 default = True)
376 parser.add_option("--use-L3-on-WT", action = "store_true", default = False)
377 parser.add_option("--num-tbes", type = "int", default = 256)
378 parser.add_option("--l2-latency", type = "int", default = 50) # load to use
379 parser.add_option("--num-tccs", type = "int", default = 1,
380 help = "number of TCC banks in the GPU")
381 parser.add_option("--sqc-size", type = 'string', default = '32kB',
382 help = "SQC cache size")
383 parser.add_option("--sqc-assoc", type = 'int', default = 8,
384 help = "SQC cache assoc")
385 parser.add_option("--sqc-deadlock-threshold", type='int',
386 help="Set the SQC deadlock threshold to some value")
387
388 parser.add_option("--WB_L1", action = "store_true", default = False,
389 help = "writeback L1")
390 parser.add_option("--WB_L2", action = "store_true", default = False,
391 help = "writeback L2")
392 parser.add_option("--TCP_latency", type = "int", default = 4,
393 help = "TCP latency")
394 parser.add_option("--TCC_latency", type = "int", default = 16,
395 help = "TCC latency")
396 parser.add_option("--tcc-size", type = 'string', default = '256kB',
397 help = "agregate tcc size")
398 parser.add_option("--tcc-assoc", type = 'int', default = 16,
399 help = "tcc assoc")
400 parser.add_option("--tcp-size", type = 'string', default = '16kB',
401 help = "tcp size")
402 parser.add_option("--tcp-assoc", type = 'int', default = 16,
403 help = "tcp assoc")
404 parser.add_option("--tcp-deadlock-threshold", type='int',
405 help="Set the TCP deadlock threshold to some value")
406 parser.add_option("--max-coalesces-per-cycle", type="int", default=1,
407 help="Maximum insts that may coalesce in a cycle");
408
409 parser.add_option("--noL1", action = "store_true", default = False,
410 help = "bypassL1")
411
412 def create_system(options, full_system, system, dma_devices, bootmem,
413 ruby_system):
414 if buildEnv['PROTOCOL'] != 'GPU_VIPER':
415 panic("This script requires the GPU_VIPER protocol to be built.")
416
417 cpu_sequencers = []
418
419 #
420 # The ruby network creation expects the list of nodes in the system to be
421 # consistent with the NetDest list. Therefore the l1 controller nodes
422 # must be listed before the directory nodes and directory nodes before
423 # dma nodes, etc.
424 #
425 cp_cntrl_nodes = []
426 tcp_cntrl_nodes = []
427 sqc_cntrl_nodes = []
428 tcc_cntrl_nodes = []
429 dir_cntrl_nodes = []
430 l3_cntrl_nodes = []
431
432 #
433 # Must create the individual controllers before the network to ensure the
434 # controller constructors are called before the network constructor
435 #
436
437 # For an odd number of CPUs, still create the right number of controllers
438 TCC_bits = int(math.log(options.num_tccs, 2))
439
440 # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
441 # Clusters
442 crossbar_bw = None
443 mainCluster = None
444 if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
445 #Assuming a 2GHz clock
446 crossbar_bw = 16 * options.num_compute_units * options.bw_scalor
447 mainCluster = Cluster(intBW=crossbar_bw)
448 else:
449 mainCluster = Cluster(intBW=8) # 16 GB/s
450 for i in range(options.num_dirs):
451
452 dir_cntrl = DirCntrl(noTCCdir = True, TCC_select_num_bits = TCC_bits)
453 dir_cntrl.create(options, ruby_system, system)
454 dir_cntrl.number_of_TBEs = options.num_tbes
455 dir_cntrl.useL3OnWT = options.use_L3_on_WT
456 # the number_of_TBEs is inclusive of TBEs below
457
458 # Connect the Directory controller to the ruby network
459 dir_cntrl.requestFromCores = MessageBuffer(ordered = True)
460 dir_cntrl.requestFromCores.slave = ruby_system.network.master
461
462 dir_cntrl.responseFromCores = MessageBuffer()
463 dir_cntrl.responseFromCores.slave = ruby_system.network.master
464
465 dir_cntrl.unblockFromCores = MessageBuffer()
466 dir_cntrl.unblockFromCores.slave = ruby_system.network.master
467
468 dir_cntrl.probeToCore = MessageBuffer()
469 dir_cntrl.probeToCore.master = ruby_system.network.slave
470
471 dir_cntrl.responseToCore = MessageBuffer()
472 dir_cntrl.responseToCore.master = ruby_system.network.slave
473
474 dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
475 dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
476 dir_cntrl.requestToMemory = MessageBuffer()
477 dir_cntrl.responseFromMemory = MessageBuffer()
478
479 dir_cntrl.requestFromDMA = MessageBuffer(ordered=True)
480 dir_cntrl.requestFromDMA.slave = ruby_system.network.master
481
482 dir_cntrl.responseToDMA = MessageBuffer()
483 dir_cntrl.responseToDMA.master = ruby_system.network.slave
484
485 dir_cntrl.requestToMemory = MessageBuffer()
486 dir_cntrl.responseFromMemory = MessageBuffer()
487
488 exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
489 dir_cntrl_nodes.append(dir_cntrl)
490
491 mainCluster.add(dir_cntrl)
492
493 cpuCluster = None
494 if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
495 cpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
496 else:
497 cpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s
498 for i in range((options.num_cpus + 1) // 2):
499
500 cp_cntrl = CPCntrl()
501 cp_cntrl.create(options, ruby_system, system)
502
503 exec("ruby_system.cp_cntrl%d = cp_cntrl" % i)
504 #
505 # Add controllers and sequencers to the appropriate lists
506 #
507 cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1])
508
509 # Connect the CP controllers and the network
510 cp_cntrl.requestFromCore = MessageBuffer()
511 cp_cntrl.requestFromCore.master = ruby_system.network.slave
512
513 cp_cntrl.responseFromCore = MessageBuffer()
514 cp_cntrl.responseFromCore.master = ruby_system.network.slave
515
516 cp_cntrl.unblockFromCore = MessageBuffer()
517 cp_cntrl.unblockFromCore.master = ruby_system.network.slave
518
519 cp_cntrl.probeToCore = MessageBuffer()
520 cp_cntrl.probeToCore.slave = ruby_system.network.master
521
522 cp_cntrl.responseToCore = MessageBuffer()
523 cp_cntrl.responseToCore.slave = ruby_system.network.master
524
525 cp_cntrl.mandatoryQueue = MessageBuffer()
526 cp_cntrl.triggerQueue = MessageBuffer(ordered = True)
527
528 cpuCluster.add(cp_cntrl)
529
530 # Register CPUs and caches for each CorePair and directory (SE mode only)
531 if not full_system:
532 for i in range((options.num_cpus + 1) // 2):
533 FileSystemConfig.register_cpu(physical_package_id = 0,
534 core_siblings = \
535 range(options.num_cpus),
536 core_id = i*2,
537 thread_siblings = [])
538
539 FileSystemConfig.register_cpu(physical_package_id = 0,
540 core_siblings = \
541 range(options.num_cpus),
542 core_id = i*2+1,
543 thread_siblings = [])
544
545 FileSystemConfig.register_cache(level = 0,
546 idu_type = 'Instruction',
547 size = options.l1i_size,
548 line_size = options.cacheline_size,
549 assoc = options.l1i_assoc,
550 cpus = [i*2, i*2+1])
551
552 FileSystemConfig.register_cache(level = 0,
553 idu_type = 'Data',
554 size = options.l1d_size,
555 line_size = options.cacheline_size,
556 assoc = options.l1d_assoc,
557 cpus = [i*2])
558
559 FileSystemConfig.register_cache(level = 0,
560 idu_type = 'Data',
561 size = options.l1d_size,
562 line_size = options.cacheline_size,
563 assoc = options.l1d_assoc,
564 cpus = [i*2+1])
565
566 FileSystemConfig.register_cache(level = 1,
567 idu_type = 'Unified',
568 size = options.l2_size,
569 line_size = options.cacheline_size,
570 assoc = options.l2_assoc,
571 cpus = [i*2, i*2+1])
572
573 for i in range(options.num_dirs):
574 FileSystemConfig.register_cache(level = 2,
575 idu_type = 'Unified',
576 size = options.l3_size,
577 line_size = options.cacheline_size,
578 assoc = options.l3_assoc,
579 cpus = [n for n in
580 range(options.num_cpus)])
581
582 gpuCluster = None
583 if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
584 gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
585 else:
586 gpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s
587 for i in range(options.num_compute_units):
588
589 tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
590 issue_latency = 1,
591 number_of_TBEs = 2560)
592 # TBEs set to max outstanding requests
593 tcp_cntrl.create(options, ruby_system, system)
594 tcp_cntrl.WB = options.WB_L1
595 tcp_cntrl.disableL1 = options.noL1
596 tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency
597 tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency
598
599 exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % i)
600 #
601 # Add controllers and sequencers to the appropriate lists
602 #
603 cpu_sequencers.append(tcp_cntrl.coalescer)
604 tcp_cntrl_nodes.append(tcp_cntrl)
605
606 # Connect the TCP controller to the ruby network
607 tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
608 tcp_cntrl.requestFromTCP.master = ruby_system.network.slave
609
610 tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
611 tcp_cntrl.responseFromTCP.master = ruby_system.network.slave
612
613 tcp_cntrl.unblockFromCore = MessageBuffer()
614 tcp_cntrl.unblockFromCore.master = ruby_system.network.slave
615
616 tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
617 tcp_cntrl.probeToTCP.slave = ruby_system.network.master
618
619 tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
620 tcp_cntrl.responseToTCP.slave = ruby_system.network.master
621
622 tcp_cntrl.mandatoryQueue = MessageBuffer()
623
624 gpuCluster.add(tcp_cntrl)
625
626 for i in range(options.num_sqc):
627
628 sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
629 sqc_cntrl.create(options, ruby_system, system)
630
631 exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % i)
632 #
633 # Add controllers and sequencers to the appropriate lists
634 #
635 cpu_sequencers.append(sqc_cntrl.sequencer)
636
637 # Connect the SQC controller to the ruby network
638 sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True)
639 sqc_cntrl.requestFromSQC.master = ruby_system.network.slave
640
641 sqc_cntrl.probeToSQC = MessageBuffer(ordered = True)
642 sqc_cntrl.probeToSQC.slave = ruby_system.network.master
643
644 sqc_cntrl.responseToSQC = MessageBuffer(ordered = True)
645 sqc_cntrl.responseToSQC.slave = ruby_system.network.master
646
647 sqc_cntrl.mandatoryQueue = MessageBuffer()
648
649 # SQC also in GPU cluster
650 gpuCluster.add(sqc_cntrl)
651
652 for i in xrange(options.num_scalar_cache):
653 scalar_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
654 scalar_cntrl.create(options, ruby_system, system)
655
656 exec('ruby_system.scalar_cntrl%d = scalar_cntrl' % i)
657
658 cpu_sequencers.append(scalar_cntrl.sequencer)
659
660 scalar_cntrl.requestFromSQC = MessageBuffer(ordered = True)
661 scalar_cntrl.requestFromSQC.master = ruby_system.network.slave
662
663 scalar_cntrl.probeToSQC = MessageBuffer(ordered = True)
664 scalar_cntrl.probeToSQC.slave = ruby_system.network.master
665
666 scalar_cntrl.responseToSQC = MessageBuffer(ordered = True)
667 scalar_cntrl.responseToSQC.slave = ruby_system.network.master
668
669 scalar_cntrl.mandatoryQueue = \
670 MessageBuffer(buffer_size=options.buffers_size)
671
672 gpuCluster.add(scalar_cntrl)
673
674 for i in xrange(options.num_cp):
675
676 tcp_ID = options.num_compute_units + i
677 sqc_ID = options.num_sqc + i
678
679 tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
680 issue_latency = 1,
681 number_of_TBEs = 2560)
682 # TBEs set to max outstanding requests
683 tcp_cntrl.createCP(options, ruby_system, system)
684 tcp_cntrl.WB = options.WB_L1
685 tcp_cntrl.disableL1 = options.noL1
686 tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency
687 tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency
688
689 exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % tcp_ID)
690 #
691 # Add controllers and sequencers to the appropriate lists
692 #
693 cpu_sequencers.append(tcp_cntrl.sequencer)
694 tcp_cntrl_nodes.append(tcp_cntrl)
695
696 # Connect the CP (TCP) controllers to the ruby network
697 tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
698 tcp_cntrl.requestFromTCP.master = ruby_system.network.slave
699
700 tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
701 tcp_cntrl.responseFromTCP.master = ruby_system.network.slave
702
703 tcp_cntrl.unblockFromCore = MessageBuffer(ordered = True)
704 tcp_cntrl.unblockFromCore.master = ruby_system.network.slave
705
706 tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
707 tcp_cntrl.probeToTCP.slave = ruby_system.network.master
708
709 tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
710 tcp_cntrl.responseToTCP.slave = ruby_system.network.master
711
712 tcp_cntrl.mandatoryQueue = MessageBuffer()
713
714 gpuCluster.add(tcp_cntrl)
715
716 sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
717 sqc_cntrl.create(options, ruby_system, system)
718
719 exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % sqc_ID)
720 #
721 # Add controllers and sequencers to the appropriate lists
722 #
723 cpu_sequencers.append(sqc_cntrl.sequencer)
724
725 # SQC also in GPU cluster
726 gpuCluster.add(sqc_cntrl)
727
728 for i in range(options.num_tccs):
729
730 tcc_cntrl = TCCCntrl(l2_response_latency = options.TCC_latency)
731 tcc_cntrl.create(options, ruby_system, system)
732 tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency
733 tcc_cntrl.l2_response_latency = options.TCC_latency
734 tcc_cntrl_nodes.append(tcc_cntrl)
735 tcc_cntrl.WB = options.WB_L2
736 tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units
737 # the number_of_TBEs is inclusive of TBEs below
738
739 # Connect the TCC controllers to the ruby network
740 tcc_cntrl.requestFromTCP = MessageBuffer(ordered = True)
741 tcc_cntrl.requestFromTCP.slave = ruby_system.network.master
742
743 tcc_cntrl.responseToCore = MessageBuffer(ordered = True)
744 tcc_cntrl.responseToCore.master = ruby_system.network.slave
745
746 tcc_cntrl.probeFromNB = MessageBuffer()
747 tcc_cntrl.probeFromNB.slave = ruby_system.network.master
748
749 tcc_cntrl.responseFromNB = MessageBuffer()
750 tcc_cntrl.responseFromNB.slave = ruby_system.network.master
751
752 tcc_cntrl.requestToNB = MessageBuffer(ordered = True)
753 tcc_cntrl.requestToNB.master = ruby_system.network.slave
754
755 tcc_cntrl.responseToNB = MessageBuffer()
756 tcc_cntrl.responseToNB.master = ruby_system.network.slave
757
758 tcc_cntrl.unblockToNB = MessageBuffer()
759 tcc_cntrl.unblockToNB.master = ruby_system.network.slave
760
761 tcc_cntrl.triggerQueue = MessageBuffer(ordered = True)
762
763 exec("ruby_system.tcc_cntrl%d = tcc_cntrl" % i)
764
765 # connect all of the wire buffers between L3 and dirs up
766 # TCC cntrls added to the GPU cluster
767 gpuCluster.add(tcc_cntrl)
768
769 for i, dma_device in enumerate(dma_devices):
770 dma_seq = DMASequencer(version=i, ruby_system=ruby_system)
771 dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq,
772 ruby_system=ruby_system)
773 exec('system.dma_cntrl%d = dma_cntrl' % i)
774 if dma_device.type == 'MemTest':
775 exec('system.dma_cntrl%d.dma_sequencer.slave = dma_devices.test'
776 % i)
777 else:
778 exec('system.dma_cntrl%d.dma_sequencer.slave = dma_device.dma' % i)
779 dma_cntrl.requestToDir = MessageBuffer(buffer_size=0)
780 dma_cntrl.requestToDir.master = ruby_system.network.slave
781 dma_cntrl.responseFromDir = MessageBuffer(buffer_size=0)
782 dma_cntrl.responseFromDir.slave = ruby_system.network.master
783 dma_cntrl.mandatoryQueue = MessageBuffer(buffer_size = 0)
784 gpuCluster.add(dma_cntrl)
785
786 # Add cpu/gpu clusters to main cluster
787 mainCluster.add(cpuCluster)
788 mainCluster.add(gpuCluster)
789
790 ruby_system.network.number_of_virtual_networks = 11
791
792 return (cpu_sequencers, dir_cntrl_nodes, mainCluster)