configs: Replace DirMem w/RubyDirectoryMemory, set addr_ranges
[gem5.git] / configs / ruby / GPU_VIPER_Baseline.py
1 # Copyright (c) 2015 Advanced Micro Devices, Inc.
2 # All rights reserved.
3 #
4 # For use for simulation and test purposes only
5 #
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
8 #
9 # 1. Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
11 #
12 # 2. Redistributions in binary form must reproduce the above copyright notice,
13 # this list of conditions and the following disclaimer in the documentation
14 # and/or other materials provided with the distribution.
15 #
16 # 3. Neither the name of the copyright holder nor the names of its
17 # contributors may be used to endorse or promote products derived from this
18 # software without specific prior written permission.
19 #
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
31
32 import six
33 import math
34 import m5
35 from m5.objects import *
36 from m5.defines import buildEnv
37 from m5.util import addToPath
38 from .Ruby import create_topology
39 from .Ruby import send_evicts
40
41 addToPath('../')
42
43 from topologies.Cluster import Cluster
44 from topologies.Crossbar import Crossbar
45
46 if six.PY3:
47 long = int
48
49 class CntrlBase:
50 _seqs = 0
51 @classmethod
52 def seqCount(cls):
53 # Use SeqCount not class since we need global count
54 CntrlBase._seqs += 1
55 return CntrlBase._seqs - 1
56
57 _cntrls = 0
58 @classmethod
59 def cntrlCount(cls):
60 # Use CntlCount not class since we need global count
61 CntrlBase._cntrls += 1
62 return CntrlBase._cntrls - 1
63
64 _version = 0
65 @classmethod
66 def versionCount(cls):
67 cls._version += 1 # Use count for this particular type
68 return cls._version - 1
69
70 class L1Cache(RubyCache):
71 resourceStalls = False
72 dataArrayBanks = 2
73 tagArrayBanks = 2
74 dataAccessLatency = 1
75 tagAccessLatency = 1
76 def create(self, size, assoc, options):
77 self.size = MemorySize(size)
78 self.assoc = assoc
79 self.replacement_policy = TreePLRURP()
80
81 class L2Cache(RubyCache):
82 resourceStalls = False
83 assoc = 16
84 dataArrayBanks = 16
85 tagArrayBanks = 16
86 def create(self, size, assoc, options):
87 self.size = MemorySize(size)
88 self.assoc = assoc
89 self.replacement_policy = TreePLRURP()
90
91 class CPCntrl(CorePair_Controller, CntrlBase):
92
93 def create(self, options, ruby_system, system):
94 self.version = self.versionCount()
95
96 self.L1Icache = L1Cache()
97 self.L1Icache.create(options.l1i_size, options.l1i_assoc, options)
98 self.L1D0cache = L1Cache()
99 self.L1D0cache.create(options.l1d_size, options.l1d_assoc, options)
100 self.L1D1cache = L1Cache()
101 self.L1D1cache.create(options.l1d_size, options.l1d_assoc, options)
102 self.L2cache = L2Cache()
103 self.L2cache.create(options.l2_size, options.l2_assoc, options)
104
105 self.sequencer = RubySequencer()
106 self.sequencer.version = self.seqCount()
107 self.sequencer.icache = self.L1Icache
108 self.sequencer.dcache = self.L1D0cache
109 self.sequencer.ruby_system = ruby_system
110 self.sequencer.coreid = 0
111 self.sequencer.is_cpu_sequencer = True
112
113 self.sequencer1 = RubySequencer()
114 self.sequencer1.version = self.seqCount()
115 self.sequencer1.icache = self.L1Icache
116 self.sequencer1.dcache = self.L1D1cache
117 self.sequencer1.ruby_system = ruby_system
118 self.sequencer1.coreid = 1
119 self.sequencer1.is_cpu_sequencer = True
120
121 self.issue_latency = options.cpu_to_dir_latency
122 self.send_evictions = send_evicts(options)
123
124 self.ruby_system = ruby_system
125
126 if options.recycle_latency:
127 self.recycle_latency = options.recycle_latency
128
129 class TCPCache(RubyCache):
130 size = "16kB"
131 assoc = 16
132 dataArrayBanks = 16
133 tagArrayBanks = 16
134 dataAccessLatency = 4
135 tagAccessLatency = 1
136 def create(self, options):
137 self.size = MemorySize(options.tcp_size)
138 self.dataArrayBanks = 16
139 self.tagArrayBanks = 16
140 self.dataAccessLatency = 4
141 self.tagAccessLatency = 1
142 self.resourceStalls = options.no_tcc_resource_stalls
143 self.replacement_policy = TreePLRURP()
144
145 class TCPCntrl(TCP_Controller, CntrlBase):
146
147 def create(self, options, ruby_system, system):
148 self.version = self.versionCount()
149 self.L1cache = TCPCache()
150 self.L1cache.create(options)
151 self.issue_latency = 1
152
153 self.coalescer = VIPERCoalescer()
154 self.coalescer.version = self.seqCount()
155 self.coalescer.icache = self.L1cache
156 self.coalescer.dcache = self.L1cache
157 self.coalescer.ruby_system = ruby_system
158 self.coalescer.support_inst_reqs = False
159 self.coalescer.is_cpu_sequencer = False
160 if options.tcp_deadlock_threshold:
161 self.coalescer.deadlock_threshold = \
162 options.tcp_deadlock_threshold
163 self.coalescer.max_coalesces_per_cycle = \
164 options.max_coalesces_per_cycle
165
166 self.sequencer = RubySequencer()
167 self.sequencer.version = self.seqCount()
168 self.sequencer.icache = self.L1cache
169 self.sequencer.dcache = self.L1cache
170 self.sequencer.ruby_system = ruby_system
171 self.sequencer.is_cpu_sequencer = True
172
173 self.use_seq_not_coal = False
174
175 self.ruby_system = ruby_system
176 if options.recycle_latency:
177 self.recycle_latency = options.recycle_latency
178
179 class SQCCache(RubyCache):
180 dataArrayBanks = 8
181 tagArrayBanks = 8
182 dataAccessLatency = 1
183 tagAccessLatency = 1
184
185 def create(self, options):
186 self.size = MemorySize(options.sqc_size)
187 self.assoc = options.sqc_assoc
188 self.replacement_policy = TreePLRURP()
189
190 class SQCCntrl(SQC_Controller, CntrlBase):
191
192 def create(self, options, ruby_system, system):
193 self.version = self.versionCount()
194 self.L1cache = SQCCache()
195 self.L1cache.create(options)
196 self.L1cache.resourceStalls = False
197 self.sequencer = RubySequencer()
198 self.sequencer.version = self.seqCount()
199 self.sequencer.icache = self.L1cache
200 self.sequencer.dcache = self.L1cache
201 self.sequencer.ruby_system = ruby_system
202 self.sequencer.support_data_reqs = False
203 self.sequencer.is_cpu_sequencer = False
204 if options.sqc_deadlock_threshold:
205 self.sequencer.deadlock_threshold = \
206 options.sqc_deadlock_threshold
207
208 self.ruby_system = ruby_system
209 if options.recycle_latency:
210 self.recycle_latency = options.recycle_latency
211
212 class TCC(RubyCache):
213 size = MemorySize("256kB")
214 assoc = 16
215 dataAccessLatency = 8
216 tagAccessLatency = 2
217 resourceStalls = True
218 def create(self, options):
219 self.assoc = options.tcc_assoc
220 if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
221 s = options.num_compute_units
222 tcc_size = s * 128
223 tcc_size = str(tcc_size)+'kB'
224 self.size = MemorySize(tcc_size)
225 self.dataArrayBanks = 64
226 self.tagArrayBanks = 64
227 else:
228 self.size = MemorySize(options.tcc_size)
229 self.dataArrayBanks = 256 / options.num_tccs #number of data banks
230 self.tagArrayBanks = 256 / options.num_tccs #number of tag banks
231 self.size.value = self.size.value / options.num_tccs
232 if ((self.size.value / long(self.assoc)) < 128):
233 self.size.value = long(128 * self.assoc)
234 self.start_index_bit = math.log(options.cacheline_size, 2) + \
235 math.log(options.num_tccs, 2)
236 self.replacement_policy = TreePLRURP()
237
238 class TCCCntrl(TCC_Controller, CntrlBase):
239 def create(self, options, ruby_system, system):
240 self.version = self.versionCount()
241 self.L2cache = TCC()
242 self.L2cache.create(options)
243 self.ruby_system = ruby_system
244 self.L2cache.resourceStalls = options.no_tcc_resource_stalls
245
246 if options.recycle_latency:
247 self.recycle_latency = options.recycle_latency
248
249 class L3Cache(RubyCache):
250 dataArrayBanks = 16
251 tagArrayBanks = 16
252
253 def create(self, options, ruby_system, system):
254 self.size = MemorySize(options.l3_size)
255 self.size.value /= options.num_dirs
256 self.assoc = options.l3_assoc
257 self.dataArrayBanks /= options.num_dirs
258 self.tagArrayBanks /= options.num_dirs
259 self.dataArrayBanks /= options.num_dirs
260 self.tagArrayBanks /= options.num_dirs
261 self.dataAccessLatency = options.l3_data_latency
262 self.tagAccessLatency = options.l3_tag_latency
263 self.resourceStalls = False
264 self.replacement_policy = TreePLRURP()
265
266 class ProbeFilter(RubyCache):
267 size = "4MB"
268 assoc = 16
269 dataArrayBanks = 256
270 tagArrayBanks = 256
271
272 def create(self, options, ruby_system, system):
273 self.block_size = "%dB" % (64 * options.blocks_per_region)
274 self.size = options.region_dir_entries * \
275 self.block_size * options.num_compute_units
276 self.assoc = 8
277 self.tagArrayBanks = 8
278 self.tagAccessLatency = options.dir_tag_latency
279 self.dataAccessLatency = 1
280 self.resourceStalls = options.no_resource_stalls
281 self.start_index_bit = 6 + int(math.log(options.blocks_per_region, 2))
282 self.replacement_policy = TreePLRURP()
283
284 class L3Cntrl(L3Cache_Controller, CntrlBase):
285 def create(self, options, ruby_system, system):
286 self.version = self.versionCount()
287 self.L3cache = L3Cache()
288 self.L3cache.create(options, ruby_system, system)
289 self.l3_response_latency = \
290 max(self.L3cache.dataAccessLatency, self.L3cache.tagAccessLatency)
291 self.ruby_system = ruby_system
292 if options.recycle_latency:
293 self.recycle_latency = options.recycle_latency
294
295 def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
296 req_to_l3, probe_to_l3, resp_to_l3):
297 self.reqToDir = req_to_dir
298 self.respToDir = resp_to_dir
299 self.l3UnblockToDir = l3_unblock_to_dir
300 self.reqToL3 = req_to_l3
301 self.probeToL3 = probe_to_l3
302 self.respToL3 = resp_to_l3
303
304 class DirCntrl(Directory_Controller, CntrlBase):
305 def create(self, options, dir_ranges, ruby_system, system):
306 self.version = self.versionCount()
307 self.response_latency = 30
308 self.addr_ranges = dir_ranges
309 self.directory = RubyDirectoryMemory()
310 self.L3CacheMemory = L3Cache()
311 self.L3CacheMemory.create(options, ruby_system, system)
312 self.ProbeFilterMemory = ProbeFilter()
313 self.ProbeFilterMemory.create(options, ruby_system, system)
314 self.l3_hit_latency = \
315 max(self.L3CacheMemory.dataAccessLatency,
316 self.L3CacheMemory.tagAccessLatency)
317
318 self.ruby_system = ruby_system
319 if options.recycle_latency:
320 self.recycle_latency = options.recycle_latency
321
322 def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
323 req_to_l3, probe_to_l3, resp_to_l3):
324 self.reqToDir = req_to_dir
325 self.respToDir = resp_to_dir
326 self.l3UnblockToDir = l3_unblock_to_dir
327 self.reqToL3 = req_to_l3
328 self.probeToL3 = probe_to_l3
329 self.respToL3 = resp_to_l3
330
331 def define_options(parser):
332 parser.add_option("--num-subcaches", type = "int", default = 4)
333 parser.add_option("--l3-data-latency", type = "int", default = 20)
334 parser.add_option("--l3-tag-latency", type = "int", default = 15)
335 parser.add_option("--cpu-to-dir-latency", type = "int", default = 120)
336 parser.add_option("--gpu-to-dir-latency", type = "int", default = 120)
337 parser.add_option("--no-resource-stalls", action = "store_false",
338 default = True)
339 parser.add_option("--no-tcc-resource-stalls", action = "store_false",
340 default = True)
341 parser.add_option("--num-tbes", type = "int", default = 2560)
342 parser.add_option("--l2-latency", type = "int", default = 50) # load to use
343 parser.add_option("--num-tccs", type = "int", default = 1,
344 help = "number of TCC banks in the GPU")
345 parser.add_option("--sqc-size", type = 'string', default = '32kB',
346 help = "SQC cache size")
347 parser.add_option("--sqc-assoc", type = 'int', default = 8,
348 help = "SQC cache assoc")
349 parser.add_option("--sqc-deadlock-threshold", type='int',
350 help="Set the SQC deadlock threshold to some value")
351
352 parser.add_option("--region-dir-entries", type = "int", default = 8192)
353 parser.add_option("--dir-tag-latency", type = "int", default = 8)
354 parser.add_option("--dir-tag-banks", type = "int", default = 4)
355 parser.add_option("--blocks-per-region", type = "int", default = 1)
356 parser.add_option("--use-L3-on-WT", action = "store_true", default = False)
357 parser.add_option("--nonInclusiveDir", action = "store_true",
358 default = False)
359 parser.add_option("--WB_L1", action = "store_true",
360 default = False, help = "writeback L2")
361 parser.add_option("--WB_L2", action = "store_true",
362 default = False, help = "writeback L2")
363 parser.add_option("--TCP_latency", type = "int",
364 default = 4, help = "TCP latency")
365 parser.add_option("--TCC_latency", type = "int",
366 default = 16, help = "TCC latency")
367 parser.add_option("--tcc-size", type = 'string', default = '2MB',
368 help = "agregate tcc size")
369 parser.add_option("--tcc-assoc", type = 'int', default = 16,
370 help = "tcc assoc")
371 parser.add_option("--tcp-size", type = 'string', default = '16kB',
372 help = "tcp size")
373 parser.add_option("--tcp-deadlock-threshold", type='int',
374 help="Set the TCP deadlock threshold to some value")
375 parser.add_option("--max-coalesces-per-cycle", type="int", default=1,
376 help="Maximum insts that may coalesce in a cycle");
377
378 parser.add_option("--sampler-sets", type = "int", default = 1024)
379 parser.add_option("--sampler-assoc", type = "int", default = 16)
380 parser.add_option("--sampler-counter", type = "int", default = 512)
381 parser.add_option("--noL1", action = "store_true", default = False,
382 help = "bypassL1")
383 parser.add_option("--noL2", action = "store_true", default = False,
384 help = "bypassL2")
385
386 def create_system(options, full_system, system, dma_devices, bootmem,
387 ruby_system):
388 if buildEnv['PROTOCOL'] != 'GPU_VIPER_Baseline':
389 panic("This script requires the" \
390 "GPU_VIPER_Baseline protocol to be built.")
391
392 cpu_sequencers = []
393
394 #
395 # The ruby network creation expects the list of nodes in the system to be
396 # consistent with the NetDest list. Therefore the l1 controller nodes
397 # must be listed before the directory nodes and directory nodes before
398 # dma nodes, etc.
399 #
400 cp_cntrl_nodes = []
401 tcp_cntrl_nodes = []
402 sqc_cntrl_nodes = []
403 tcc_cntrl_nodes = []
404 dir_cntrl_nodes = []
405 l3_cntrl_nodes = []
406
407 #
408 # Must create the individual controllers before the network to ensure the
409 # controller constructors are called before the network constructor
410 #
411
412 # For an odd number of CPUs, still create the right number of controllers
413 TCC_bits = int(math.log(options.num_tccs, 2))
414
415 # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
416 # Clusters
417 crossbar_bw = 16 * options.num_compute_units #Assuming a 2GHz clock
418 mainCluster = Cluster(intBW = crossbar_bw)
419
420 if options.numa_high_bit:
421 numa_bit = options.numa_high_bit
422 else:
423 # if the numa_bit is not specified, set the directory bits as the
424 # lowest bits above the block offset bits, and the numa_bit as the
425 # highest of those directory bits
426 dir_bits = int(math.log(options.num_dirs, 2))
427 block_size_bits = int(math.log(options.cacheline_size, 2))
428 numa_bit = block_size_bits + dir_bits - 1
429
430 for i in range(options.num_dirs):
431 dir_ranges = []
432 for r in system.mem_ranges:
433 addr_range = m5.objects.AddrRange(r.start, size = r.size(),
434 intlvHighBit = numa_bit,
435 intlvBits = dir_bits,
436 intlvMatch = i)
437 dir_ranges.append(addr_range)
438
439 dir_cntrl = DirCntrl(noTCCdir=True,TCC_select_num_bits = TCC_bits)
440 dir_cntrl.create(options, dir_ranges, ruby_system, system)
441 dir_cntrl.number_of_TBEs = options.num_tbes
442 dir_cntrl.useL3OnWT = options.use_L3_on_WT
443 dir_cntrl.inclusiveDir = not options.nonInclusiveDir
444
445 # Connect the Directory controller to the ruby network
446 dir_cntrl.requestFromCores = MessageBuffer(ordered = True)
447 dir_cntrl.requestFromCores.slave = ruby_system.network.master
448
449 dir_cntrl.responseFromCores = MessageBuffer()
450 dir_cntrl.responseFromCores.slave = ruby_system.network.master
451
452 dir_cntrl.unblockFromCores = MessageBuffer()
453 dir_cntrl.unblockFromCores.slave = ruby_system.network.master
454
455 dir_cntrl.probeToCore = MessageBuffer()
456 dir_cntrl.probeToCore.master = ruby_system.network.slave
457
458 dir_cntrl.responseToCore = MessageBuffer()
459 dir_cntrl.responseToCore.master = ruby_system.network.slave
460
461 dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
462 dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
463 dir_cntrl.requestToMemory = MessageBuffer()
464 dir_cntrl.responseFromMemory = MessageBuffer()
465
466 exec("system.dir_cntrl%d = dir_cntrl" % i)
467 dir_cntrl_nodes.append(dir_cntrl)
468 mainCluster.add(dir_cntrl)
469
470 cpuCluster = Cluster(extBW = crossbar_bw, intBW=crossbar_bw)
471 for i in range((options.num_cpus + 1) // 2):
472
473 cp_cntrl = CPCntrl()
474 cp_cntrl.create(options, ruby_system, system)
475
476 exec("system.cp_cntrl%d = cp_cntrl" % i)
477 #
478 # Add controllers and sequencers to the appropriate lists
479 #
480 cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1])
481
482 # Connect the CP controllers and the network
483 cp_cntrl.requestFromCore = MessageBuffer()
484 cp_cntrl.requestFromCore.master = ruby_system.network.slave
485
486 cp_cntrl.responseFromCore = MessageBuffer()
487 cp_cntrl.responseFromCore.master = ruby_system.network.slave
488
489 cp_cntrl.unblockFromCore = MessageBuffer()
490 cp_cntrl.unblockFromCore.master = ruby_system.network.slave
491
492 cp_cntrl.probeToCore = MessageBuffer()
493 cp_cntrl.probeToCore.slave = ruby_system.network.master
494
495 cp_cntrl.responseToCore = MessageBuffer()
496 cp_cntrl.responseToCore.slave = ruby_system.network.master
497
498 cp_cntrl.mandatoryQueue = MessageBuffer()
499 cp_cntrl.triggerQueue = MessageBuffer(ordered = True)
500
501 cpuCluster.add(cp_cntrl)
502
503 gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
504 for i in range(options.num_compute_units):
505
506 tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
507 issue_latency = 1,
508 number_of_TBEs = 2560)
509 # TBEs set to max outstanding requests
510 tcp_cntrl.create(options, ruby_system, system)
511 tcp_cntrl.WB = options.WB_L1
512 tcp_cntrl.disableL1 = options.noL1
513
514 exec("system.tcp_cntrl%d = tcp_cntrl" % i)
515 #
516 # Add controllers and sequencers to the appropriate lists
517 #
518 cpu_sequencers.append(tcp_cntrl.coalescer)
519 tcp_cntrl_nodes.append(tcp_cntrl)
520
521 # Connect the CP (TCP) controllers to the ruby network
522 tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
523 tcp_cntrl.requestFromTCP.master = ruby_system.network.slave
524
525 tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
526 tcp_cntrl.responseFromTCP.master = ruby_system.network.slave
527
528 tcp_cntrl.unblockFromCore = MessageBuffer()
529 tcp_cntrl.unblockFromCore.master = ruby_system.network.slave
530
531 tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
532 tcp_cntrl.probeToTCP.slave = ruby_system.network.master
533
534 tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
535 tcp_cntrl.responseToTCP.slave = ruby_system.network.master
536
537 tcp_cntrl.mandatoryQueue = MessageBuffer()
538
539 gpuCluster.add(tcp_cntrl)
540
541 for i in range(options.num_sqc):
542
543 sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
544 sqc_cntrl.create(options, ruby_system, system)
545
546 exec("system.sqc_cntrl%d = sqc_cntrl" % i)
547 #
548 # Add controllers and sequencers to the appropriate lists
549 #
550 cpu_sequencers.append(sqc_cntrl.sequencer)
551
552 # Connect the SQC controller to the ruby network
553 sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True)
554 sqc_cntrl.requestFromSQC.master = ruby_system.network.slave
555
556 sqc_cntrl.probeToSQC = MessageBuffer(ordered = True)
557 sqc_cntrl.probeToSQC.slave = ruby_system.network.master
558
559 sqc_cntrl.responseToSQC = MessageBuffer(ordered = True)
560 sqc_cntrl.responseToSQC.slave = ruby_system.network.master
561
562 sqc_cntrl.mandatoryQueue = MessageBuffer()
563
564 # SQC also in GPU cluster
565 gpuCluster.add(sqc_cntrl)
566
567 # Because of wire buffers, num_tccs must equal num_tccdirs
568 numa_bit = 6
569
570 for i in range(options.num_tccs):
571
572 tcc_cntrl = TCCCntrl()
573 tcc_cntrl.create(options, ruby_system, system)
574 tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency
575 tcc_cntrl.l2_response_latency = options.TCC_latency
576 tcc_cntrl_nodes.append(tcc_cntrl)
577 tcc_cntrl.WB = options.WB_L2
578 tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units
579
580 # Connect the TCC controllers to the ruby network
581 tcc_cntrl.requestFromTCP = MessageBuffer(ordered = True)
582 tcc_cntrl.requestFromTCP.slave = ruby_system.network.master
583
584 tcc_cntrl.responseToCore = MessageBuffer(ordered = True)
585 tcc_cntrl.responseToCore.master = ruby_system.network.slave
586
587 tcc_cntrl.probeFromNB = MessageBuffer()
588 tcc_cntrl.probeFromNB.slave = ruby_system.network.master
589
590 tcc_cntrl.responseFromNB = MessageBuffer()
591 tcc_cntrl.responseFromNB.slave = ruby_system.network.master
592
593 tcc_cntrl.requestToNB = MessageBuffer(ordered = True)
594 tcc_cntrl.requestToNB.master = ruby_system.network.slave
595
596 tcc_cntrl.responseToNB = MessageBuffer()
597 tcc_cntrl.responseToNB.master = ruby_system.network.slave
598
599 tcc_cntrl.unblockToNB = MessageBuffer()
600 tcc_cntrl.unblockToNB.master = ruby_system.network.slave
601
602 tcc_cntrl.triggerQueue = MessageBuffer(ordered = True)
603
604 exec("system.tcc_cntrl%d = tcc_cntrl" % i)
605 # connect all of the wire buffers between L3 and dirs up
606 # TCC cntrls added to the GPU cluster
607 gpuCluster.add(tcc_cntrl)
608
609 # Assuming no DMA devices
610 assert(len(dma_devices) == 0)
611
612 # Add cpu/gpu clusters to main cluster
613 mainCluster.add(cpuCluster)
614 mainCluster.add(gpuCluster)
615
616 ruby_system.network.number_of_virtual_networks = 10
617
618 return (cpu_sequencers, dir_cntrl_nodes, mainCluster)