misc: Merge branch 'release-staging-v20.0.0.0' into develop
[gem5.git] / configs / ruby / GPU_VIPER_Region.py
1 # Copyright (c) 2015 Advanced Micro Devices, Inc.
2 # All rights reserved.
3 #
4 # For use for simulation and test purposes only
5 #
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
8 #
9 # 1. Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
11 #
12 # 2. Redistributions in binary form must reproduce the above copyright notice,
13 # this list of conditions and the following disclaimer in the documentation
14 # and/or other materials provided with the distribution.
15 #
16 # 3. Neither the name of the copyright holder nor the names of its
17 # contributors may be used to endorse or promote products derived from this
18 # software without specific prior written permission.
19 #
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
31
32 import six
33 import math
34 import m5
35 from m5.objects import *
36 from m5.defines import buildEnv
37 from m5.util import addToPath
38 from .Ruby import send_evicts
39
40 addToPath('../')
41
42 from topologies.Cluster import Cluster
43
44 if six.PY3:
45 long = int
46
47 class CntrlBase:
48 _seqs = 0
49 @classmethod
50 def seqCount(cls):
51 # Use SeqCount not class since we need global count
52 CntrlBase._seqs += 1
53 return CntrlBase._seqs - 1
54
55 _cntrls = 0
56 @classmethod
57 def cntrlCount(cls):
58 # Use CntlCount not class since we need global count
59 CntrlBase._cntrls += 1
60 return CntrlBase._cntrls - 1
61
62 _version = 0
63 @classmethod
64 def versionCount(cls):
65 cls._version += 1 # Use count for this particular type
66 return cls._version - 1
67
68 #
69 # Note: the L1 Cache latency is only used by the sequencer on fast path hits
70 #
71 class L1Cache(RubyCache):
72 resourceStalls = False
73 dataArrayBanks = 2
74 tagArrayBanks = 2
75 dataAccessLatency = 1
76 tagAccessLatency = 1
77 def create(self, size, assoc, options):
78 self.size = MemorySize(size)
79 self.assoc = assoc
80 self.replacement_policy = TreePLRURP()
81
82 class L2Cache(RubyCache):
83 resourceStalls = False
84 assoc = 16
85 dataArrayBanks = 16
86 tagArrayBanks = 16
87 def create(self, size, assoc, options):
88 self.size = MemorySize(size)
89 self.assoc = assoc
90 self.replacement_policy = TreePLRURP()
91
92 class CPCntrl(CorePair_Controller, CntrlBase):
93
94 def create(self, options, ruby_system, system):
95 self.version = self.versionCount()
96
97 self.L1Icache = L1Cache()
98 self.L1Icache.create(options.l1i_size, options.l1i_assoc, options)
99 self.L1D0cache = L1Cache()
100 self.L1D0cache.create(options.l1d_size, options.l1d_assoc, options)
101 self.L1D1cache = L1Cache()
102 self.L1D1cache.create(options.l1d_size, options.l1d_assoc, options)
103 self.L2cache = L2Cache()
104 self.L2cache.create(options.l2_size, options.l2_assoc, options)
105
106 self.sequencer = RubySequencer()
107 self.sequencer.version = self.seqCount()
108 self.sequencer.icache = self.L1Icache
109 self.sequencer.dcache = self.L1D0cache
110 self.sequencer.ruby_system = ruby_system
111 self.sequencer.coreid = 0
112 self.sequencer.is_cpu_sequencer = True
113
114 self.sequencer1 = RubySequencer()
115 self.sequencer1.version = self.seqCount()
116 self.sequencer1.icache = self.L1Icache
117 self.sequencer1.dcache = self.L1D1cache
118 self.sequencer1.ruby_system = ruby_system
119 self.sequencer1.coreid = 1
120 self.sequencer1.is_cpu_sequencer = True
121
122 self.issue_latency = 1
123 self.send_evictions = send_evicts(options)
124
125 self.ruby_system = ruby_system
126
127 if options.recycle_latency:
128 self.recycle_latency = options.recycle_latency
129
130 class TCPCache(RubyCache):
131 size = "16kB"
132 assoc = 16
133 dataArrayBanks = 16
134 tagArrayBanks = 16
135 dataAccessLatency = 4
136 tagAccessLatency = 1
137 def create(self, options):
138 self.size = MemorySize(options.tcp_size)
139 self.dataArrayBanks = 16
140 self.tagArrayBanks = 16
141 self.dataAccessLatency = 4
142 self.tagAccessLatency = 1
143 self.resourceStalls = options.no_tcc_resource_stalls
144 self.replacement_policy = TreePLRURP(num_leaves = self.assoc)
145
146 class TCPCntrl(TCP_Controller, CntrlBase):
147
148 def create(self, options, ruby_system, system):
149 self.version = self.versionCount()
150 self.L1cache = TCPCache(dataAccessLatency = options.TCP_latency)
151 self.L1cache.create(options)
152 self.issue_latency = 1
153
154 self.coalescer = VIPERCoalescer()
155 self.coalescer.version = self.seqCount()
156 self.coalescer.icache = self.L1cache
157 self.coalescer.dcache = self.L1cache
158 self.coalescer.ruby_system = ruby_system
159 self.coalescer.support_inst_reqs = False
160 self.coalescer.is_cpu_sequencer = False
161 if options.tcp_deadlock_threshold:
162 self.coalescer.deadlock_threshold = \
163 options.tcp_deadlock_threshold
164 self.coalescer.max_coalesces_per_cycle = \
165 options.max_coalesces_per_cycle
166
167 self.sequencer = RubySequencer()
168 self.sequencer.version = self.seqCount()
169 self.sequencer.icache = self.L1cache
170 self.sequencer.dcache = self.L1cache
171 self.sequencer.ruby_system = ruby_system
172 self.sequencer.is_cpu_sequencer = True
173
174 self.use_seq_not_coal = False
175
176 self.ruby_system = ruby_system
177 if options.recycle_latency:
178 self.recycle_latency = options.recycle_latency
179
180 class SQCCache(RubyCache):
181 dataArrayBanks = 8
182 tagArrayBanks = 8
183 dataAccessLatency = 1
184 tagAccessLatency = 1
185
186 def create(self, options):
187 self.size = MemorySize(options.sqc_size)
188 self.assoc = options.sqc_assoc
189 self.replacement_policy = TreePLRURP(num_leaves = self.assoc)
190
191 class SQCCntrl(SQC_Controller, CntrlBase):
192
193 def create(self, options, ruby_system, system):
194 self.version = self.versionCount()
195 self.L1cache = SQCCache()
196 self.L1cache.create(options)
197 self.L1cache.resourceStalls = False
198 self.sequencer = RubySequencer()
199 self.sequencer.version = self.seqCount()
200 self.sequencer.icache = self.L1cache
201 self.sequencer.dcache = self.L1cache
202 self.sequencer.ruby_system = ruby_system
203 self.sequencer.support_data_reqs = False
204 self.sequencer.is_cpu_sequencer = False
205 if options.sqc_deadlock_threshold:
206 self.sequencer.deadlock_threshold = \
207 options.sqc_deadlock_threshold
208
209 self.ruby_system = ruby_system
210 if options.recycle_latency:
211 self.recycle_latency = options.recycle_latency
212
213 class TCC(RubyCache):
214 size = MemorySize("256kB")
215 assoc = 16
216 dataAccessLatency = 8
217 tagAccessLatency = 2
218 resourceStalls = False
219 def create(self, options):
220 self.assoc = options.tcc_assoc
221 if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
222 s = options.num_compute_units
223 tcc_size = s * 128
224 tcc_size = str(tcc_size)+'kB'
225 self.size = MemorySize(tcc_size)
226 self.dataArrayBanks = 64
227 self.tagArrayBanks = 64
228 else:
229 self.size = MemorySize(options.tcc_size)
230 self.dataArrayBanks = 256 / options.num_tccs #number of data banks
231 self.tagArrayBanks = 256 / options.num_tccs #number of tag banks
232 self.size.value = self.size.value / options.num_tccs
233 if ((self.size.value / long(self.assoc)) < 128):
234 self.size.value = long(128 * self.assoc)
235 self.start_index_bit = math.log(options.cacheline_size, 2) + \
236 math.log(options.num_tccs, 2)
237 self.replacement_policy = TreePLRURP(num_leaves = self.assoc)
238
239 class TCCCntrl(TCC_Controller, CntrlBase):
240 def create(self, options, ruby_system, system):
241 self.version = self.versionCount()
242 self.L2cache = TCC()
243 self.L2cache.create(options)
244 self.ruby_system = ruby_system
245 if options.recycle_latency:
246 self.recycle_latency = options.recycle_latency
247
248 class L3Cache(RubyCache):
249 dataArrayBanks = 16
250 tagArrayBanks = 16
251
252 def create(self, options, ruby_system, system):
253 self.size = MemorySize(options.l3_size)
254 self.size.value /= options.num_dirs
255 self.assoc = options.l3_assoc
256 self.dataArrayBanks /= options.num_dirs
257 self.tagArrayBanks /= options.num_dirs
258 self.dataArrayBanks /= options.num_dirs
259 self.tagArrayBanks /= options.num_dirs
260 self.dataAccessLatency = options.l3_data_latency
261 self.tagAccessLatency = options.l3_tag_latency
262 self.resourceStalls = False
263 self.replacement_policy = TreePLRURP(num_leaves = self.assoc)
264
265 class L3Cntrl(L3Cache_Controller, CntrlBase):
266 def create(self, options, ruby_system, system):
267 self.version = self.versionCount()
268 self.L3cache = L3Cache()
269 self.L3cache.create(options, ruby_system, system)
270 self.l3_response_latency = \
271 max(self.L3cache.dataAccessLatency, self.L3cache.tagAccessLatency)
272 self.ruby_system = ruby_system
273 if options.recycle_latency:
274 self.recycle_latency = options.recycle_latency
275
276 def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
277 req_to_l3, probe_to_l3, resp_to_l3):
278 self.reqToDir = req_to_dir
279 self.respToDir = resp_to_dir
280 self.l3UnblockToDir = l3_unblock_to_dir
281 self.reqToL3 = req_to_l3
282 self.probeToL3 = probe_to_l3
283 self.respToL3 = resp_to_l3
284
285 # Directory memory: Directory memory of infinite size which is
286 # used by directory controller to store the "states" of the
287 # state machine. The state machine is implemented per cache block
288 class DirMem(RubyDirectoryMemory, CntrlBase):
289 def create(self, options, ruby_system, system):
290 self.version = self.versionCount()
291 phys_mem_size = AddrRange(options.mem_size).size()
292 mem_module_size = phys_mem_size / options.num_dirs
293 dir_size = MemorySize('0B')
294 dir_size.value = mem_module_size
295 self.size = dir_size
296
297 # Directory controller: Contains directory memory, L3 cache and associated state
298 # machine which is used to accurately redirect a data request to L3 cache or to
299 # memory. The permissions requests do not come to this directory for region
300 # based protocols as they are handled exclusively by the region directory.
301 # However, region directory controller uses this directory controller for
302 # sending probe requests and receiving probe responses.
303 class DirCntrl(Directory_Controller, CntrlBase):
304 def create(self, options, ruby_system, system):
305 self.version = self.versionCount()
306 self.response_latency = 25
307 self.response_latency_regionDir = 1
308 self.directory = DirMem()
309 self.directory.create(options, ruby_system, system)
310 self.L3CacheMemory = L3Cache()
311 self.L3CacheMemory.create(options, ruby_system, system)
312 self.l3_hit_latency = \
313 max(self.L3CacheMemory.dataAccessLatency,
314 self.L3CacheMemory.tagAccessLatency)
315
316 self.ruby_system = ruby_system
317 if options.recycle_latency:
318 self.recycle_latency = options.recycle_latency
319
320 def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
321 req_to_l3, probe_to_l3, resp_to_l3):
322 self.reqToDir = req_to_dir
323 self.respToDir = resp_to_dir
324 self.l3UnblockToDir = l3_unblock_to_dir
325 self.reqToL3 = req_to_l3
326 self.probeToL3 = probe_to_l3
327 self.respToL3 = resp_to_l3
328
329 # Region directory : Stores region permissions
330 class RegionDir(RubyCache):
331
332 def create(self, options, ruby_system, system):
333 self.block_size = "%dB" % (64 * options.blocks_per_region)
334 self.size = options.region_dir_entries * \
335 self.block_size * options.num_compute_units
336 self.assoc = 8
337 self.tagArrayBanks = 8
338 self.tagAccessLatency = options.dir_tag_latency
339 self.dataAccessLatency = 1
340 self.resourceStalls = options.no_resource_stalls
341 self.start_index_bit = 6 + int(math.log(options.blocks_per_region, 2))
342 self.replacement_policy = TreePLRURP(num_leaves = self.assoc)
343 # Region directory controller : Contains region directory and associated state
344 # machine for dealing with region coherence requests.
345 class RegionCntrl(RegionDir_Controller, CntrlBase):
346 def create(self, options, ruby_system, system):
347 self.version = self.versionCount()
348 self.cacheMemory = RegionDir()
349 self.cacheMemory.create(options, ruby_system, system)
350 self.blocksPerRegion = options.blocks_per_region
351 self.toDirLatency = \
352 max(self.cacheMemory.dataAccessLatency,
353 self.cacheMemory.tagAccessLatency)
354 self.ruby_system = ruby_system
355 self.always_migrate = options.always_migrate
356 self.sym_migrate = options.symmetric_migrate
357 self.asym_migrate = options.asymmetric_migrate
358 if self.always_migrate:
359 assert(not self.asym_migrate and not self.sym_migrate)
360 if self.sym_migrate:
361 assert(not self.always_migrate and not self.asym_migrate)
362 if self.asym_migrate:
363 assert(not self.always_migrate and not self.sym_migrate)
364 if options.recycle_latency:
365 self.recycle_latency = options.recycle_latency
366
367 # Region Buffer: A region directory cache which avoids some potential
368 # long latency lookup of region directory for getting region permissions
369 class RegionBuffer(RubyCache):
370 assoc = 4
371 dataArrayBanks = 256
372 tagArrayBanks = 256
373 dataAccessLatency = 1
374 tagAccessLatency = 1
375 resourceStalls = True
376
377 class RBCntrl(RegionBuffer_Controller, CntrlBase):
378 def create(self, options, ruby_system, system):
379 self.version = self.versionCount()
380 self.cacheMemory = RegionBuffer()
381 self.cacheMemory.resourceStalls = options.no_tcc_resource_stalls
382 self.cacheMemory.dataArrayBanks = 64
383 self.cacheMemory.tagArrayBanks = 64
384 self.blocksPerRegion = options.blocks_per_region
385 self.cacheMemory.block_size = "%dB" % (64 * self.blocksPerRegion)
386 self.cacheMemory.start_index_bit = \
387 6 + int(math.log(self.blocksPerRegion, 2))
388 self.cacheMemory.size = options.region_buffer_entries * \
389 self.cacheMemory.block_size * options.num_compute_units
390 self.toDirLatency = options.gpu_to_dir_latency
391 self.toRegionDirLatency = options.cpu_to_dir_latency
392 self.noTCCdir = True
393 TCC_bits = int(math.log(options.num_tccs, 2))
394 self.TCC_select_num_bits = TCC_bits
395 self.ruby_system = ruby_system
396
397 if options.recycle_latency:
398 self.recycle_latency = options.recycle_latency
399 self.cacheMemory.replacement_policy = \
400 TreePLRURP(num_leaves = self.cacheMemory.assoc)
401
402 def define_options(parser):
403 parser.add_option("--num-subcaches", type="int", default=4)
404 parser.add_option("--l3-data-latency", type="int", default=20)
405 parser.add_option("--l3-tag-latency", type="int", default=15)
406 parser.add_option("--cpu-to-dir-latency", type="int", default=120)
407 parser.add_option("--gpu-to-dir-latency", type="int", default=60)
408 parser.add_option("--no-resource-stalls", action="store_false",
409 default=True)
410 parser.add_option("--no-tcc-resource-stalls", action="store_false",
411 default=True)
412 parser.add_option("--num-tbes", type="int", default=32)
413 parser.add_option("--l2-latency", type="int", default=50) # load to use
414 parser.add_option("--num-tccs", type="int", default=1,
415 help="number of TCC banks in the GPU")
416
417 parser.add_option("--sqc-size", type='string', default='32kB',
418 help="SQC cache size")
419 parser.add_option("--sqc-assoc", type='int', default=8,
420 help="SQC cache assoc")
421 parser.add_option("--sqc-deadlock-threshold", type='int',
422 help="Set the SQC deadlock threshold to some value")
423
424 parser.add_option("--WB_L1", action="store_true",
425 default=False, help="L2 Writeback Cache")
426 parser.add_option("--WB_L2", action="store_true",
427 default=False, help="L2 Writeback Cache")
428 parser.add_option("--TCP_latency",
429 type="int", default=4, help="TCP latency")
430 parser.add_option("--TCC_latency",
431 type="int", default=16, help="TCC latency")
432 parser.add_option("--tcc-size", type='string', default='2MB',
433 help="agregate tcc size")
434 parser.add_option("--tcc-assoc", type='int', default=16,
435 help="tcc assoc")
436 parser.add_option("--tcp-size", type='string', default='16kB',
437 help="tcp size")
438 parser.add_option("--tcp-deadlock-threshold", type='int',
439 help="Set the TCP deadlock threshold to some value")
440 parser.add_option("--max-coalesces-per-cycle", type="int", default=1,
441 help="Maximum insts that may coalesce in a cycle");
442
443 parser.add_option("--dir-tag-latency", type="int", default=4)
444 parser.add_option("--dir-tag-banks", type="int", default=4)
445 parser.add_option("--blocks-per-region", type="int", default=16)
446 parser.add_option("--dir-entries", type="int", default=8192)
447
448 # Region buffer is a cache of region directory. Hence region
449 # directory is inclusive with respect to region directory.
450 # However, region directory is non-inclusive with respect to
451 # the caches in the system
452 parser.add_option("--region-dir-entries", type="int", default=1024)
453 parser.add_option("--region-buffer-entries", type="int", default=512)
454
455 parser.add_option("--always-migrate",
456 action="store_true", default=False)
457 parser.add_option("--symmetric-migrate",
458 action="store_true", default=False)
459 parser.add_option("--asymmetric-migrate",
460 action="store_true", default=False)
461 parser.add_option("--use-L3-on-WT", action="store_true", default=False)
462
463 def create_system(options, full_system, system, dma_devices, bootmem,
464 ruby_system):
465 if buildEnv['PROTOCOL'] != 'GPU_VIPER_Region':
466 panic("This script requires the GPU_VIPER_Region protocol to be built.")
467
468 cpu_sequencers = []
469
470 #
471 # The ruby network creation expects the list of nodes in the system to be
472 # consistent with the NetDest list. Therefore the l1 controller nodes
473 # must be listed before the directory nodes and directory nodes before
474 # dma nodes, etc.
475 #
476 dir_cntrl_nodes = []
477
478 # For an odd number of CPUs, still create the right number of controllers
479 TCC_bits = int(math.log(options.num_tccs, 2))
480
481 #
482 # Must create the individual controllers before the network to ensure the
483 # controller constructors are called before the network constructor
484 #
485
486 # For an odd number of CPUs, still create the right number of controllers
487 crossbar_bw = 16 * options.num_compute_units #Assuming a 2GHz clock
488 cpuCluster = Cluster(extBW = (crossbar_bw), intBW=crossbar_bw)
489 for i in range((options.num_cpus + 1) // 2):
490
491 cp_cntrl = CPCntrl()
492 cp_cntrl.create(options, ruby_system, system)
493
494 rb_cntrl = RBCntrl()
495 rb_cntrl.create(options, ruby_system, system)
496 rb_cntrl.number_of_TBEs = 256
497 rb_cntrl.isOnCPU = True
498
499 cp_cntrl.regionBufferNum = rb_cntrl.version
500
501 exec("system.cp_cntrl%d = cp_cntrl" % i)
502 exec("system.rb_cntrl%d = rb_cntrl" % i)
503 #
504 # Add controllers and sequencers to the appropriate lists
505 #
506 cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1])
507
508 # Connect the CP controllers and the network
509 cp_cntrl.requestFromCore = MessageBuffer()
510 cp_cntrl.requestFromCore.master = ruby_system.network.slave
511
512 cp_cntrl.responseFromCore = MessageBuffer()
513 cp_cntrl.responseFromCore.master = ruby_system.network.slave
514
515 cp_cntrl.unblockFromCore = MessageBuffer()
516 cp_cntrl.unblockFromCore.master = ruby_system.network.slave
517
518 cp_cntrl.probeToCore = MessageBuffer()
519 cp_cntrl.probeToCore.slave = ruby_system.network.master
520
521 cp_cntrl.responseToCore = MessageBuffer()
522 cp_cntrl.responseToCore.slave = ruby_system.network.master
523
524 cp_cntrl.mandatoryQueue = MessageBuffer()
525 cp_cntrl.triggerQueue = MessageBuffer(ordered = True)
526
527 # Connect the RB controllers to the ruby network
528 rb_cntrl.requestFromCore = MessageBuffer(ordered = True)
529 rb_cntrl.requestFromCore.slave = ruby_system.network.master
530
531 rb_cntrl.responseFromCore = MessageBuffer()
532 rb_cntrl.responseFromCore.slave = ruby_system.network.master
533
534 rb_cntrl.requestToNetwork = MessageBuffer()
535 rb_cntrl.requestToNetwork.master = ruby_system.network.slave
536
537 rb_cntrl.notifyFromRegionDir = MessageBuffer()
538 rb_cntrl.notifyFromRegionDir.slave = ruby_system.network.master
539
540 rb_cntrl.probeFromRegionDir = MessageBuffer()
541 rb_cntrl.probeFromRegionDir.slave = ruby_system.network.master
542
543 rb_cntrl.unblockFromDir = MessageBuffer()
544 rb_cntrl.unblockFromDir.slave = ruby_system.network.master
545
546 rb_cntrl.responseToRegDir = MessageBuffer()
547 rb_cntrl.responseToRegDir.master = ruby_system.network.slave
548
549 rb_cntrl.triggerQueue = MessageBuffer(ordered = True)
550
551 cpuCluster.add(cp_cntrl)
552 cpuCluster.add(rb_cntrl)
553
554 gpuCluster = Cluster(extBW = (crossbar_bw), intBW = crossbar_bw)
555 for i in range(options.num_compute_units):
556
557 tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
558 issue_latency = 1,
559 number_of_TBEs = 2560)
560 # TBEs set to max outstanding requests
561 tcp_cntrl.create(options, ruby_system, system)
562 tcp_cntrl.WB = options.WB_L1
563 tcp_cntrl.disableL1 = False
564
565 exec("system.tcp_cntrl%d = tcp_cntrl" % i)
566 #
567 # Add controllers and sequencers to the appropriate lists
568 #
569 cpu_sequencers.append(tcp_cntrl.coalescer)
570
571 # Connect the CP (TCP) controllers to the ruby network
572 tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
573 tcp_cntrl.requestFromTCP.master = ruby_system.network.slave
574
575 tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
576 tcp_cntrl.responseFromTCP.master = ruby_system.network.slave
577
578 tcp_cntrl.unblockFromCore = MessageBuffer()
579 tcp_cntrl.unblockFromCore.master = ruby_system.network.slave
580
581 tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
582 tcp_cntrl.probeToTCP.slave = ruby_system.network.master
583
584 tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
585 tcp_cntrl.responseToTCP.slave = ruby_system.network.master
586
587 tcp_cntrl.mandatoryQueue = MessageBuffer()
588
589 gpuCluster.add(tcp_cntrl)
590
591 for i in range(options.num_sqc):
592
593 sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
594 sqc_cntrl.create(options, ruby_system, system)
595
596 exec("system.sqc_cntrl%d = sqc_cntrl" % i)
597 #
598 # Add controllers and sequencers to the appropriate lists
599 #
600 cpu_sequencers.append(sqc_cntrl.sequencer)
601
602 # Connect the SQC controller to the ruby network
603 sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True)
604 sqc_cntrl.requestFromSQC.master = ruby_system.network.slave
605
606 sqc_cntrl.probeToSQC = MessageBuffer(ordered = True)
607 sqc_cntrl.probeToSQC.slave = ruby_system.network.master
608
609 sqc_cntrl.responseToSQC = MessageBuffer(ordered = True)
610 sqc_cntrl.responseToSQC.slave = ruby_system.network.master
611
612 sqc_cntrl.mandatoryQueue = MessageBuffer()
613
614 # SQC also in GPU cluster
615 gpuCluster.add(sqc_cntrl)
616
617 numa_bit = 6
618
619 for i in range(options.num_tccs):
620
621 tcc_cntrl = TCCCntrl()
622 tcc_cntrl.create(options, ruby_system, system)
623 tcc_cntrl.l2_request_latency = 1
624 tcc_cntrl.l2_response_latency = options.TCC_latency
625 tcc_cntrl.WB = options.WB_L2
626 tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units
627
628 # Connect the TCC controllers to the ruby network
629 tcc_cntrl.requestFromTCP = MessageBuffer(ordered = True)
630 tcc_cntrl.requestFromTCP.slave = ruby_system.network.master
631
632 tcc_cntrl.responseToCore = MessageBuffer(ordered = True)
633 tcc_cntrl.responseToCore.master = ruby_system.network.slave
634
635 tcc_cntrl.probeFromNB = MessageBuffer()
636 tcc_cntrl.probeFromNB.slave = ruby_system.network.master
637
638 tcc_cntrl.responseFromNB = MessageBuffer()
639 tcc_cntrl.responseFromNB.slave = ruby_system.network.master
640
641 tcc_cntrl.requestToNB = MessageBuffer(ordered = True)
642 tcc_cntrl.requestToNB.master = ruby_system.network.slave
643
644 tcc_cntrl.responseToNB = MessageBuffer()
645 tcc_cntrl.responseToNB.master = ruby_system.network.slave
646
647 tcc_cntrl.unblockToNB = MessageBuffer()
648 tcc_cntrl.unblockToNB.master = ruby_system.network.slave
649
650 tcc_cntrl.triggerQueue = MessageBuffer(ordered = True)
651
652 rb_cntrl = RBCntrl()
653 rb_cntrl.create(options, ruby_system, system)
654 rb_cntrl.number_of_TBEs = 2560 * options.num_compute_units
655 rb_cntrl.isOnCPU = False
656
657 # Connect the RB controllers to the ruby network
658 rb_cntrl.requestFromCore = MessageBuffer(ordered = True)
659 rb_cntrl.requestFromCore.slave = ruby_system.network.master
660
661 rb_cntrl.responseFromCore = MessageBuffer()
662 rb_cntrl.responseFromCore.slave = ruby_system.network.master
663
664 rb_cntrl.requestToNetwork = MessageBuffer()
665 rb_cntrl.requestToNetwork.master = ruby_system.network.slave
666
667 rb_cntrl.notifyFromRegionDir = MessageBuffer()
668 rb_cntrl.notifyFromRegionDir.slave = ruby_system.network.master
669
670 rb_cntrl.probeFromRegionDir = MessageBuffer()
671 rb_cntrl.probeFromRegionDir.slave = ruby_system.network.master
672
673 rb_cntrl.unblockFromDir = MessageBuffer()
674 rb_cntrl.unblockFromDir.slave = ruby_system.network.master
675
676 rb_cntrl.responseToRegDir = MessageBuffer()
677 rb_cntrl.responseToRegDir.master = ruby_system.network.slave
678
679 rb_cntrl.triggerQueue = MessageBuffer(ordered = True)
680
681 tcc_cntrl.regionBufferNum = rb_cntrl.version
682
683 exec("system.tcc_cntrl%d = tcc_cntrl" % i)
684 exec("system.tcc_rb_cntrl%d = rb_cntrl" % i)
685
686 # TCC cntrls added to the GPU cluster
687 gpuCluster.add(tcc_cntrl)
688 gpuCluster.add(rb_cntrl)
689
690 # Because of wire buffers, num_l3caches must equal num_dirs
691 # Region coherence only works with 1 dir
692 assert(options.num_l3caches == options.num_dirs == 1)
693
694 # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
695 # Clusters
696 mainCluster = Cluster(intBW = crossbar_bw)
697
698 dir_cntrl = DirCntrl()
699 dir_cntrl.create(options, ruby_system, system)
700 dir_cntrl.number_of_TBEs = 2560 * options.num_compute_units
701 dir_cntrl.useL3OnWT = options.use_L3_on_WT
702
703 # Connect the Directory controller to the ruby network
704 dir_cntrl.requestFromCores = MessageBuffer()
705 dir_cntrl.requestFromCores.slave = ruby_system.network.master
706
707 dir_cntrl.responseFromCores = MessageBuffer()
708 dir_cntrl.responseFromCores.slave = ruby_system.network.master
709
710 dir_cntrl.unblockFromCores = MessageBuffer()
711 dir_cntrl.unblockFromCores.slave = ruby_system.network.master
712
713 dir_cntrl.probeToCore = MessageBuffer()
714 dir_cntrl.probeToCore.master = ruby_system.network.slave
715
716 dir_cntrl.responseToCore = MessageBuffer()
717 dir_cntrl.responseToCore.master = ruby_system.network.slave
718
719 dir_cntrl.reqFromRegBuf = MessageBuffer()
720 dir_cntrl.reqFromRegBuf.slave = ruby_system.network.master
721
722 dir_cntrl.reqToRegDir = MessageBuffer(ordered = True)
723 dir_cntrl.reqToRegDir.master = ruby_system.network.slave
724
725 dir_cntrl.reqFromRegDir = MessageBuffer(ordered = True)
726 dir_cntrl.reqFromRegDir.slave = ruby_system.network.master
727
728 dir_cntrl.unblockToRegDir = MessageBuffer()
729 dir_cntrl.unblockToRegDir.master = ruby_system.network.slave
730
731 dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
732 dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
733 dir_cntrl.requestToMemory = MessageBuffer()
734 dir_cntrl.responseFromMemory = MessageBuffer()
735
736 exec("system.dir_cntrl%d = dir_cntrl" % i)
737 dir_cntrl_nodes.append(dir_cntrl)
738
739 mainCluster.add(dir_cntrl)
740
741 reg_cntrl = RegionCntrl(noTCCdir=True,TCC_select_num_bits = TCC_bits)
742 reg_cntrl.create(options, ruby_system, system)
743 reg_cntrl.number_of_TBEs = options.num_tbes
744 reg_cntrl.cpuRegionBufferNum = system.rb_cntrl0.version
745 reg_cntrl.gpuRegionBufferNum = system.tcc_rb_cntrl0.version
746
747 # Connect the Region Dir controllers to the ruby network
748 reg_cntrl.requestToDir = MessageBuffer(ordered = True)
749 reg_cntrl.requestToDir.master = ruby_system.network.slave
750
751 reg_cntrl.notifyToRBuffer = MessageBuffer()
752 reg_cntrl.notifyToRBuffer.master = ruby_system.network.slave
753
754 reg_cntrl.probeToRBuffer = MessageBuffer()
755 reg_cntrl.probeToRBuffer.master = ruby_system.network.slave
756
757 reg_cntrl.responseFromRBuffer = MessageBuffer()
758 reg_cntrl.responseFromRBuffer.slave = ruby_system.network.master
759
760 reg_cntrl.requestFromRegBuf = MessageBuffer()
761 reg_cntrl.requestFromRegBuf.slave = ruby_system.network.master
762
763 reg_cntrl.triggerQueue = MessageBuffer(ordered = True)
764
765 exec("system.reg_cntrl%d = reg_cntrl" % i)
766
767 mainCluster.add(reg_cntrl)
768
769 # Assuming no DMA devices
770 assert(len(dma_devices) == 0)
771
772 # Add cpu/gpu clusters to main cluster
773 mainCluster.add(cpuCluster)
774 mainCluster.add(gpuCluster)
775
776 ruby_system.network.number_of_virtual_networks = 10
777
778 return (cpu_sequencers, dir_cntrl_nodes, mainCluster)