mem-ruby: Sequencer can be used without cache
[gem5.git] / configs / ruby / GPU_VIPER.py
1 # Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
2 # All rights reserved.
3 #
4 # For use for simulation and test purposes only
5 #
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
8 #
9 # 1. Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
11 #
12 # 2. Redistributions in binary form must reproduce the above copyright notice,
13 # this list of conditions and the following disclaimer in the documentation
14 # and/or other materials provided with the distribution.
15 #
16 # 3. Neither the name of the copyright holder nor the names of its
17 # contributors may be used to endorse or promote products derived from this
18 # software without specific prior written permission.
19 #
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
31
32 import six
33 import math
34 import m5
35 from m5.objects import *
36 from m5.defines import buildEnv
37 from m5.util import addToPath
38 from .Ruby import create_topology
39 from .Ruby import send_evicts
40 from common import FileSystemConfig
41
42 addToPath('../')
43
44 from topologies.Cluster import Cluster
45 from topologies.Crossbar import Crossbar
46
47 if six.PY3:
48 long = int
49
50 class CntrlBase:
51 _seqs = 0
52 @classmethod
53 def seqCount(cls):
54 # Use SeqCount not class since we need global count
55 CntrlBase._seqs += 1
56 return CntrlBase._seqs - 1
57
58 _cntrls = 0
59 @classmethod
60 def cntrlCount(cls):
61 # Use CntlCount not class since we need global count
62 CntrlBase._cntrls += 1
63 return CntrlBase._cntrls - 1
64
65 _version = 0
66 @classmethod
67 def versionCount(cls):
68 cls._version += 1 # Use count for this particular type
69 return cls._version - 1
70
71 class L1Cache(RubyCache):
72 resourceStalls = False
73 dataArrayBanks = 2
74 tagArrayBanks = 2
75 dataAccessLatency = 1
76 tagAccessLatency = 1
77 def create(self, size, assoc, options):
78 self.size = MemorySize(size)
79 self.assoc = assoc
80 self.replacement_policy = TreePLRURP()
81
82 class L2Cache(RubyCache):
83 resourceStalls = False
84 assoc = 16
85 dataArrayBanks = 16
86 tagArrayBanks = 16
87 def create(self, size, assoc, options):
88 self.size = MemorySize(size)
89 self.assoc = assoc
90 self.replacement_policy = TreePLRURP()
91
92 class CPCntrl(CorePair_Controller, CntrlBase):
93
94 def create(self, options, ruby_system, system):
95 self.version = self.versionCount()
96
97 self.L1Icache = L1Cache()
98 self.L1Icache.create(options.l1i_size, options.l1i_assoc, options)
99 self.L1D0cache = L1Cache()
100 self.L1D0cache.create(options.l1d_size, options.l1d_assoc, options)
101 self.L1D1cache = L1Cache()
102 self.L1D1cache.create(options.l1d_size, options.l1d_assoc, options)
103 self.L2cache = L2Cache()
104 self.L2cache.create(options.l2_size, options.l2_assoc, options)
105
106 self.sequencer = RubySequencer()
107 self.sequencer.version = self.seqCount()
108 self.sequencer.dcache = self.L1D0cache
109 self.sequencer.ruby_system = ruby_system
110 self.sequencer.coreid = 0
111 self.sequencer.is_cpu_sequencer = True
112
113 self.sequencer1 = RubySequencer()
114 self.sequencer1.version = self.seqCount()
115 self.sequencer1.dcache = self.L1D1cache
116 self.sequencer1.ruby_system = ruby_system
117 self.sequencer1.coreid = 1
118 self.sequencer1.is_cpu_sequencer = True
119
120 self.issue_latency = options.cpu_to_dir_latency
121 self.send_evictions = send_evicts(options)
122
123 self.ruby_system = ruby_system
124
125 if options.recycle_latency:
126 self.recycle_latency = options.recycle_latency
127
128 class TCPCache(RubyCache):
129 size = "16kB"
130 assoc = 16
131 dataArrayBanks = 16 #number of data banks
132 tagArrayBanks = 16 #number of tag banks
133 dataAccessLatency = 4
134 tagAccessLatency = 1
135 def create(self, options):
136 self.size = MemorySize(options.tcp_size)
137 self.assoc = options.tcp_assoc
138 self.resourceStalls = options.no_tcc_resource_stalls
139 self.replacement_policy = TreePLRURP()
140
141 class TCPCntrl(TCP_Controller, CntrlBase):
142
143 def create(self, options, ruby_system, system):
144 self.version = self.versionCount()
145
146 self.L1cache = TCPCache(tagAccessLatency = options.TCP_latency,
147 dataAccessLatency = options.TCP_latency)
148 self.L1cache.resourceStalls = options.no_resource_stalls
149 self.L1cache.create(options)
150 self.issue_latency = 1
151
152 self.coalescer = VIPERCoalescer()
153 self.coalescer.version = self.seqCount()
154 self.coalescer.icache = self.L1cache
155 self.coalescer.dcache = self.L1cache
156 self.coalescer.ruby_system = ruby_system
157 self.coalescer.support_inst_reqs = False
158 self.coalescer.is_cpu_sequencer = False
159 if options.tcp_deadlock_threshold:
160 self.coalescer.deadlock_threshold = \
161 options.tcp_deadlock_threshold
162 self.coalescer.max_coalesces_per_cycle = \
163 options.max_coalesces_per_cycle
164
165 self.sequencer = RubySequencer()
166 self.sequencer.version = self.seqCount()
167 self.sequencer.dcache = self.L1cache
168 self.sequencer.ruby_system = ruby_system
169 self.sequencer.is_cpu_sequencer = True
170
171 self.use_seq_not_coal = False
172
173 self.ruby_system = ruby_system
174
175 if options.recycle_latency:
176 self.recycle_latency = options.recycle_latency
177
178 def createCP(self, options, ruby_system, system):
179 self.version = self.versionCount()
180
181 self.L1cache = TCPCache(tagAccessLatency = options.TCP_latency,
182 dataAccessLatency = options.TCP_latency)
183 self.L1cache.resourceStalls = options.no_resource_stalls
184 self.L1cache.create(options)
185 self.issue_latency = 1
186
187 self.coalescer = VIPERCoalescer()
188 self.coalescer.version = self.seqCount()
189 self.coalescer.icache = self.L1cache
190 self.coalescer.dcache = self.L1cache
191 self.coalescer.ruby_system = ruby_system
192 self.coalescer.support_inst_reqs = False
193 self.coalescer.is_cpu_sequencer = False
194
195 self.sequencer = RubySequencer()
196 self.sequencer.version = self.seqCount()
197 self.sequencer.dcache = self.L1cache
198 self.sequencer.ruby_system = ruby_system
199 self.sequencer.is_cpu_sequencer = True
200
201 self.use_seq_not_coal = True
202
203 self.ruby_system = ruby_system
204
205 if options.recycle_latency:
206 self.recycle_latency = options.recycle_latency
207
208 class SQCCache(RubyCache):
209 dataArrayBanks = 8
210 tagArrayBanks = 8
211 dataAccessLatency = 1
212 tagAccessLatency = 1
213
214 def create(self, options):
215 self.size = MemorySize(options.sqc_size)
216 self.assoc = options.sqc_assoc
217 self.replacement_policy = TreePLRURP()
218
219 class SQCCntrl(SQC_Controller, CntrlBase):
220
221 def create(self, options, ruby_system, system):
222 self.version = self.versionCount()
223
224 self.L1cache = SQCCache()
225 self.L1cache.create(options)
226 self.L1cache.resourceStalls = options.no_resource_stalls
227
228 self.sequencer = RubySequencer()
229
230 self.sequencer.version = self.seqCount()
231 self.sequencer.dcache = self.L1cache
232 self.sequencer.ruby_system = ruby_system
233 self.sequencer.support_data_reqs = False
234 self.sequencer.is_cpu_sequencer = False
235 if options.sqc_deadlock_threshold:
236 self.sequencer.deadlock_threshold = \
237 options.sqc_deadlock_threshold
238
239 self.ruby_system = ruby_system
240
241 if options.recycle_latency:
242 self.recycle_latency = options.recycle_latency
243
244 class TCC(RubyCache):
245 size = MemorySize("256kB")
246 assoc = 16
247 dataAccessLatency = 8
248 tagAccessLatency = 2
249 resourceStalls = True
250 def create(self, options):
251 self.assoc = options.tcc_assoc
252 if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
253 s = options.num_compute_units
254 tcc_size = s * 128
255 tcc_size = str(tcc_size)+'kB'
256 self.size = MemorySize(tcc_size)
257 self.dataArrayBanks = 64
258 self.tagArrayBanks = 64
259 else:
260 self.size = MemorySize(options.tcc_size)
261 self.dataArrayBanks = 256 / options.num_tccs #number of data banks
262 self.tagArrayBanks = 256 / options.num_tccs #number of tag banks
263 self.size.value = self.size.value / options.num_tccs
264 if ((self.size.value / long(self.assoc)) < 128):
265 self.size.value = long(128 * self.assoc)
266 self.start_index_bit = math.log(options.cacheline_size, 2) + \
267 math.log(options.num_tccs, 2)
268 self.replacement_policy = TreePLRURP()
269
270
271 class TCCCntrl(TCC_Controller, CntrlBase):
272 def create(self, options, ruby_system, system):
273 self.version = self.versionCount()
274 self.L2cache = TCC()
275 self.L2cache.create(options)
276 self.L2cache.resourceStalls = options.no_tcc_resource_stalls
277
278 self.ruby_system = ruby_system
279
280 if options.recycle_latency:
281 self.recycle_latency = options.recycle_latency
282
283 class L3Cache(RubyCache):
284 dataArrayBanks = 16
285 tagArrayBanks = 16
286
287 def create(self, options, ruby_system, system):
288 self.size = MemorySize(options.l3_size)
289 self.size.value /= options.num_dirs
290 self.assoc = options.l3_assoc
291 self.dataArrayBanks /= options.num_dirs
292 self.tagArrayBanks /= options.num_dirs
293 self.dataArrayBanks /= options.num_dirs
294 self.tagArrayBanks /= options.num_dirs
295 self.dataAccessLatency = options.l3_data_latency
296 self.tagAccessLatency = options.l3_tag_latency
297 self.resourceStalls = False
298 self.replacement_policy = TreePLRURP()
299
300 class L3Cntrl(L3Cache_Controller, CntrlBase):
301 def create(self, options, ruby_system, system):
302 self.version = self.versionCount()
303 self.L3cache = L3Cache()
304 self.L3cache.create(options, ruby_system, system)
305
306 self.l3_response_latency = max(self.L3cache.dataAccessLatency, self.L3cache.tagAccessLatency)
307 self.ruby_system = ruby_system
308
309 if options.recycle_latency:
310 self.recycle_latency = options.recycle_latency
311
312 def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
313 req_to_l3, probe_to_l3, resp_to_l3):
314 self.reqToDir = req_to_dir
315 self.respToDir = resp_to_dir
316 self.l3UnblockToDir = l3_unblock_to_dir
317 self.reqToL3 = req_to_l3
318 self.probeToL3 = probe_to_l3
319 self.respToL3 = resp_to_l3
320
321 class DirCntrl(Directory_Controller, CntrlBase):
322 def create(self, options, dir_ranges, ruby_system, system):
323 self.version = self.versionCount()
324
325 self.response_latency = 30
326
327 self.addr_ranges = dir_ranges
328 self.directory = RubyDirectoryMemory()
329
330 self.L3CacheMemory = L3Cache()
331 self.L3CacheMemory.create(options, ruby_system, system)
332
333 self.l3_hit_latency = max(self.L3CacheMemory.dataAccessLatency,
334 self.L3CacheMemory.tagAccessLatency)
335
336 self.number_of_TBEs = options.num_tbes
337
338 self.ruby_system = ruby_system
339
340 if options.recycle_latency:
341 self.recycle_latency = options.recycle_latency
342
343 def connectWireBuffers(self, req_to_dir, resp_to_dir, l3_unblock_to_dir,
344 req_to_l3, probe_to_l3, resp_to_l3):
345 self.reqToDir = req_to_dir
346 self.respToDir = resp_to_dir
347 self.l3UnblockToDir = l3_unblock_to_dir
348 self.reqToL3 = req_to_l3
349 self.probeToL3 = probe_to_l3
350 self.respToL3 = resp_to_l3
351
352 def define_options(parser):
353 parser.add_option("--num-subcaches", type = "int", default = 4)
354 parser.add_option("--l3-data-latency", type = "int", default = 20)
355 parser.add_option("--l3-tag-latency", type = "int", default = 15)
356 parser.add_option("--cpu-to-dir-latency", type = "int", default = 120)
357 parser.add_option("--gpu-to-dir-latency", type = "int", default = 120)
358 parser.add_option("--no-resource-stalls", action = "store_false",
359 default = True)
360 parser.add_option("--no-tcc-resource-stalls", action = "store_false",
361 default = True)
362 parser.add_option("--use-L3-on-WT", action = "store_true", default = False)
363 parser.add_option("--num-tbes", type = "int", default = 256)
364 parser.add_option("--l2-latency", type = "int", default = 50) # load to use
365 parser.add_option("--num-tccs", type = "int", default = 1,
366 help = "number of TCC banks in the GPU")
367 parser.add_option("--sqc-size", type = 'string', default = '32kB',
368 help = "SQC cache size")
369 parser.add_option("--sqc-assoc", type = 'int', default = 8,
370 help = "SQC cache assoc")
371 parser.add_option("--sqc-deadlock-threshold", type='int',
372 help="Set the SQC deadlock threshold to some value")
373
374 parser.add_option("--WB_L1", action = "store_true", default = False,
375 help = "writeback L1")
376 parser.add_option("--WB_L2", action = "store_true", default = False,
377 help = "writeback L2")
378 parser.add_option("--TCP_latency", type = "int", default = 4,
379 help = "TCP latency")
380 parser.add_option("--TCC_latency", type = "int", default = 16,
381 help = "TCC latency")
382 parser.add_option("--tcc-size", type = 'string', default = '256kB',
383 help = "agregate tcc size")
384 parser.add_option("--tcc-assoc", type = 'int', default = 16,
385 help = "tcc assoc")
386 parser.add_option("--tcp-size", type = 'string', default = '16kB',
387 help = "tcp size")
388 parser.add_option("--tcp-assoc", type = 'int', default = 16,
389 help = "tcp assoc")
390 parser.add_option("--tcp-deadlock-threshold", type='int',
391 help="Set the TCP deadlock threshold to some value")
392 parser.add_option("--max-coalesces-per-cycle", type="int", default=1,
393 help="Maximum insts that may coalesce in a cycle");
394
395 parser.add_option("--noL1", action = "store_true", default = False,
396 help = "bypassL1")
397 parser.add_option("--scalar-buffer-size", type = 'int', default = 128,
398 help="Size of the mandatory queue in the GPU scalar "
399 "cache controller")
400
401 def create_system(options, full_system, system, dma_devices, bootmem,
402 ruby_system):
403 if buildEnv['PROTOCOL'] != 'GPU_VIPER':
404 panic("This script requires the GPU_VIPER protocol to be built.")
405
406 cpu_sequencers = []
407
408 #
409 # The ruby network creation expects the list of nodes in the system to be
410 # consistent with the NetDest list. Therefore the l1 controller nodes
411 # must be listed before the directory nodes and directory nodes before
412 # dma nodes, etc.
413 #
414 cp_cntrl_nodes = []
415 tcp_cntrl_nodes = []
416 sqc_cntrl_nodes = []
417 tcc_cntrl_nodes = []
418 dir_cntrl_nodes = []
419 l3_cntrl_nodes = []
420
421 #
422 # Must create the individual controllers before the network to ensure the
423 # controller constructors are called before the network constructor
424 #
425
426 # For an odd number of CPUs, still create the right number of controllers
427 TCC_bits = int(math.log(options.num_tccs, 2))
428
429 # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
430 # Clusters
431 crossbar_bw = None
432 mainCluster = None
433
434 if options.numa_high_bit:
435 numa_bit = options.numa_high_bit
436 else:
437 # if the numa_bit is not specified, set the directory bits as the
438 # lowest bits above the block offset bits, and the numa_bit as the
439 # highest of those directory bits
440 dir_bits = int(math.log(options.num_dirs, 2))
441 block_size_bits = int(math.log(options.cacheline_size, 2))
442 numa_bit = block_size_bits + dir_bits - 1
443
444 if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
445 #Assuming a 2GHz clock
446 crossbar_bw = 16 * options.num_compute_units * options.bw_scalor
447 mainCluster = Cluster(intBW=crossbar_bw)
448 else:
449 mainCluster = Cluster(intBW=8) # 16 GB/s
450 for i in range(options.num_dirs):
451 dir_ranges = []
452 for r in system.mem_ranges:
453 addr_range = m5.objects.AddrRange(r.start, size = r.size(),
454 intlvHighBit = numa_bit,
455 intlvBits = dir_bits,
456 intlvMatch = i)
457 dir_ranges.append(addr_range)
458
459 dir_cntrl = DirCntrl(noTCCdir = True, TCC_select_num_bits = TCC_bits)
460 dir_cntrl.create(options, dir_ranges, ruby_system, system)
461 dir_cntrl.number_of_TBEs = options.num_tbes
462 dir_cntrl.useL3OnWT = options.use_L3_on_WT
463 # the number_of_TBEs is inclusive of TBEs below
464
465 # Connect the Directory controller to the ruby network
466 dir_cntrl.requestFromCores = MessageBuffer(ordered = True)
467 dir_cntrl.requestFromCores.slave = ruby_system.network.master
468
469 dir_cntrl.responseFromCores = MessageBuffer()
470 dir_cntrl.responseFromCores.slave = ruby_system.network.master
471
472 dir_cntrl.unblockFromCores = MessageBuffer()
473 dir_cntrl.unblockFromCores.slave = ruby_system.network.master
474
475 dir_cntrl.probeToCore = MessageBuffer()
476 dir_cntrl.probeToCore.master = ruby_system.network.slave
477
478 dir_cntrl.responseToCore = MessageBuffer()
479 dir_cntrl.responseToCore.master = ruby_system.network.slave
480
481 dir_cntrl.triggerQueue = MessageBuffer(ordered = True)
482 dir_cntrl.L3triggerQueue = MessageBuffer(ordered = True)
483 dir_cntrl.requestToMemory = MessageBuffer()
484 dir_cntrl.responseFromMemory = MessageBuffer()
485
486 dir_cntrl.requestFromDMA = MessageBuffer(ordered=True)
487 dir_cntrl.requestFromDMA.slave = ruby_system.network.master
488
489 dir_cntrl.responseToDMA = MessageBuffer()
490 dir_cntrl.responseToDMA.master = ruby_system.network.slave
491
492 dir_cntrl.requestToMemory = MessageBuffer()
493 dir_cntrl.responseFromMemory = MessageBuffer()
494
495 exec("ruby_system.dir_cntrl%d = dir_cntrl" % i)
496 dir_cntrl_nodes.append(dir_cntrl)
497
498 mainCluster.add(dir_cntrl)
499
500 cpuCluster = None
501 if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
502 cpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
503 else:
504 cpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s
505 for i in range((options.num_cpus + 1) // 2):
506
507 cp_cntrl = CPCntrl()
508 cp_cntrl.create(options, ruby_system, system)
509
510 exec("ruby_system.cp_cntrl%d = cp_cntrl" % i)
511 #
512 # Add controllers and sequencers to the appropriate lists
513 #
514 cpu_sequencers.extend([cp_cntrl.sequencer, cp_cntrl.sequencer1])
515
516 # Connect the CP controllers and the network
517 cp_cntrl.requestFromCore = MessageBuffer()
518 cp_cntrl.requestFromCore.master = ruby_system.network.slave
519
520 cp_cntrl.responseFromCore = MessageBuffer()
521 cp_cntrl.responseFromCore.master = ruby_system.network.slave
522
523 cp_cntrl.unblockFromCore = MessageBuffer()
524 cp_cntrl.unblockFromCore.master = ruby_system.network.slave
525
526 cp_cntrl.probeToCore = MessageBuffer()
527 cp_cntrl.probeToCore.slave = ruby_system.network.master
528
529 cp_cntrl.responseToCore = MessageBuffer()
530 cp_cntrl.responseToCore.slave = ruby_system.network.master
531
532 cp_cntrl.mandatoryQueue = MessageBuffer()
533 cp_cntrl.triggerQueue = MessageBuffer(ordered = True)
534
535 cpuCluster.add(cp_cntrl)
536
537 # Register CPUs and caches for each CorePair and directory (SE mode only)
538 if not full_system:
539 for i in range((options.num_cpus + 1) // 2):
540 FileSystemConfig.register_cpu(physical_package_id = 0,
541 core_siblings = \
542 range(options.num_cpus),
543 core_id = i*2,
544 thread_siblings = [])
545
546 FileSystemConfig.register_cpu(physical_package_id = 0,
547 core_siblings = \
548 range(options.num_cpus),
549 core_id = i*2+1,
550 thread_siblings = [])
551
552 FileSystemConfig.register_cache(level = 0,
553 idu_type = 'Instruction',
554 size = options.l1i_size,
555 line_size = options.cacheline_size,
556 assoc = options.l1i_assoc,
557 cpus = [i*2, i*2+1])
558
559 FileSystemConfig.register_cache(level = 0,
560 idu_type = 'Data',
561 size = options.l1d_size,
562 line_size = options.cacheline_size,
563 assoc = options.l1d_assoc,
564 cpus = [i*2])
565
566 FileSystemConfig.register_cache(level = 0,
567 idu_type = 'Data',
568 size = options.l1d_size,
569 line_size = options.cacheline_size,
570 assoc = options.l1d_assoc,
571 cpus = [i*2+1])
572
573 FileSystemConfig.register_cache(level = 1,
574 idu_type = 'Unified',
575 size = options.l2_size,
576 line_size = options.cacheline_size,
577 assoc = options.l2_assoc,
578 cpus = [i*2, i*2+1])
579
580 for i in range(options.num_dirs):
581 FileSystemConfig.register_cache(level = 2,
582 idu_type = 'Unified',
583 size = options.l3_size,
584 line_size = options.cacheline_size,
585 assoc = options.l3_assoc,
586 cpus = [n for n in
587 range(options.num_cpus)])
588
589 gpuCluster = None
590 if hasattr(options, 'bw_scalor') and options.bw_scalor > 0:
591 gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw)
592 else:
593 gpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s
594 for i in range(options.num_compute_units):
595
596 tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
597 issue_latency = 1,
598 number_of_TBEs = 2560)
599 # TBEs set to max outstanding requests
600 tcp_cntrl.create(options, ruby_system, system)
601 tcp_cntrl.WB = options.WB_L1
602 tcp_cntrl.disableL1 = options.noL1
603 tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency
604 tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency
605
606 exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % i)
607 #
608 # Add controllers and sequencers to the appropriate lists
609 #
610 cpu_sequencers.append(tcp_cntrl.coalescer)
611 tcp_cntrl_nodes.append(tcp_cntrl)
612
613 # Connect the TCP controller to the ruby network
614 tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
615 tcp_cntrl.requestFromTCP.master = ruby_system.network.slave
616
617 tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
618 tcp_cntrl.responseFromTCP.master = ruby_system.network.slave
619
620 tcp_cntrl.unblockFromCore = MessageBuffer()
621 tcp_cntrl.unblockFromCore.master = ruby_system.network.slave
622
623 tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
624 tcp_cntrl.probeToTCP.slave = ruby_system.network.master
625
626 tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
627 tcp_cntrl.responseToTCP.slave = ruby_system.network.master
628
629 tcp_cntrl.mandatoryQueue = MessageBuffer()
630
631 gpuCluster.add(tcp_cntrl)
632
633 for i in range(options.num_sqc):
634
635 sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
636 sqc_cntrl.create(options, ruby_system, system)
637
638 exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % i)
639 #
640 # Add controllers and sequencers to the appropriate lists
641 #
642 cpu_sequencers.append(sqc_cntrl.sequencer)
643
644 # Connect the SQC controller to the ruby network
645 sqc_cntrl.requestFromSQC = MessageBuffer(ordered = True)
646 sqc_cntrl.requestFromSQC.master = ruby_system.network.slave
647
648 sqc_cntrl.probeToSQC = MessageBuffer(ordered = True)
649 sqc_cntrl.probeToSQC.slave = ruby_system.network.master
650
651 sqc_cntrl.responseToSQC = MessageBuffer(ordered = True)
652 sqc_cntrl.responseToSQC.slave = ruby_system.network.master
653
654 sqc_cntrl.mandatoryQueue = MessageBuffer()
655
656 # SQC also in GPU cluster
657 gpuCluster.add(sqc_cntrl)
658
659 for i in xrange(options.num_scalar_cache):
660 scalar_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
661 scalar_cntrl.create(options, ruby_system, system)
662
663 exec('ruby_system.scalar_cntrl%d = scalar_cntrl' % i)
664
665 cpu_sequencers.append(scalar_cntrl.sequencer)
666
667 scalar_cntrl.requestFromSQC = MessageBuffer(ordered = True)
668 scalar_cntrl.requestFromSQC.master = ruby_system.network.slave
669
670 scalar_cntrl.probeToSQC = MessageBuffer(ordered = True)
671 scalar_cntrl.probeToSQC.slave = ruby_system.network.master
672
673 scalar_cntrl.responseToSQC = MessageBuffer(ordered = True)
674 scalar_cntrl.responseToSQC.slave = ruby_system.network.master
675
676 scalar_cntrl.mandatoryQueue = \
677 MessageBuffer(buffer_size=options.scalar_buffer_size)
678
679 gpuCluster.add(scalar_cntrl)
680
681 for i in xrange(options.num_cp):
682
683 tcp_ID = options.num_compute_units + i
684 sqc_ID = options.num_sqc + i
685
686 tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits,
687 issue_latency = 1,
688 number_of_TBEs = 2560)
689 # TBEs set to max outstanding requests
690 tcp_cntrl.createCP(options, ruby_system, system)
691 tcp_cntrl.WB = options.WB_L1
692 tcp_cntrl.disableL1 = options.noL1
693 tcp_cntrl.L1cache.tagAccessLatency = options.TCP_latency
694 tcp_cntrl.L1cache.dataAccessLatency = options.TCP_latency
695
696 exec("ruby_system.tcp_cntrl%d = tcp_cntrl" % tcp_ID)
697 #
698 # Add controllers and sequencers to the appropriate lists
699 #
700 cpu_sequencers.append(tcp_cntrl.sequencer)
701 tcp_cntrl_nodes.append(tcp_cntrl)
702
703 # Connect the CP (TCP) controllers to the ruby network
704 tcp_cntrl.requestFromTCP = MessageBuffer(ordered = True)
705 tcp_cntrl.requestFromTCP.master = ruby_system.network.slave
706
707 tcp_cntrl.responseFromTCP = MessageBuffer(ordered = True)
708 tcp_cntrl.responseFromTCP.master = ruby_system.network.slave
709
710 tcp_cntrl.unblockFromCore = MessageBuffer(ordered = True)
711 tcp_cntrl.unblockFromCore.master = ruby_system.network.slave
712
713 tcp_cntrl.probeToTCP = MessageBuffer(ordered = True)
714 tcp_cntrl.probeToTCP.slave = ruby_system.network.master
715
716 tcp_cntrl.responseToTCP = MessageBuffer(ordered = True)
717 tcp_cntrl.responseToTCP.slave = ruby_system.network.master
718
719 tcp_cntrl.mandatoryQueue = MessageBuffer()
720
721 gpuCluster.add(tcp_cntrl)
722
723 sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits)
724 sqc_cntrl.create(options, ruby_system, system)
725
726 exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % sqc_ID)
727 #
728 # Add controllers and sequencers to the appropriate lists
729 #
730 cpu_sequencers.append(sqc_cntrl.sequencer)
731
732 # SQC also in GPU cluster
733 gpuCluster.add(sqc_cntrl)
734
735 for i in range(options.num_tccs):
736
737 tcc_cntrl = TCCCntrl(l2_response_latency = options.TCC_latency)
738 tcc_cntrl.create(options, ruby_system, system)
739 tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency
740 tcc_cntrl.l2_response_latency = options.TCC_latency
741 tcc_cntrl_nodes.append(tcc_cntrl)
742 tcc_cntrl.WB = options.WB_L2
743 tcc_cntrl.number_of_TBEs = 2560 * options.num_compute_units
744 # the number_of_TBEs is inclusive of TBEs below
745
746 # Connect the TCC controllers to the ruby network
747 tcc_cntrl.requestFromTCP = MessageBuffer(ordered = True)
748 tcc_cntrl.requestFromTCP.slave = ruby_system.network.master
749
750 tcc_cntrl.responseToCore = MessageBuffer(ordered = True)
751 tcc_cntrl.responseToCore.master = ruby_system.network.slave
752
753 tcc_cntrl.probeFromNB = MessageBuffer()
754 tcc_cntrl.probeFromNB.slave = ruby_system.network.master
755
756 tcc_cntrl.responseFromNB = MessageBuffer()
757 tcc_cntrl.responseFromNB.slave = ruby_system.network.master
758
759 tcc_cntrl.requestToNB = MessageBuffer(ordered = True)
760 tcc_cntrl.requestToNB.master = ruby_system.network.slave
761
762 tcc_cntrl.responseToNB = MessageBuffer()
763 tcc_cntrl.responseToNB.master = ruby_system.network.slave
764
765 tcc_cntrl.unblockToNB = MessageBuffer()
766 tcc_cntrl.unblockToNB.master = ruby_system.network.slave
767
768 tcc_cntrl.triggerQueue = MessageBuffer(ordered = True)
769
770 exec("ruby_system.tcc_cntrl%d = tcc_cntrl" % i)
771
772 # connect all of the wire buffers between L3 and dirs up
773 # TCC cntrls added to the GPU cluster
774 gpuCluster.add(tcc_cntrl)
775
776 for i, dma_device in enumerate(dma_devices):
777 dma_seq = DMASequencer(version=i, ruby_system=ruby_system)
778 dma_cntrl = DMA_Controller(version=i, dma_sequencer=dma_seq,
779 ruby_system=ruby_system)
780 exec('system.dma_cntrl%d = dma_cntrl' % i)
781 if dma_device.type == 'MemTest':
782 exec('system.dma_cntrl%d.dma_sequencer.slave = dma_devices.test'
783 % i)
784 else:
785 exec('system.dma_cntrl%d.dma_sequencer.slave = dma_device.dma' % i)
786 dma_cntrl.requestToDir = MessageBuffer(buffer_size=0)
787 dma_cntrl.requestToDir.master = ruby_system.network.slave
788 dma_cntrl.responseFromDir = MessageBuffer(buffer_size=0)
789 dma_cntrl.responseFromDir.slave = ruby_system.network.master
790 dma_cntrl.mandatoryQueue = MessageBuffer(buffer_size = 0)
791 gpuCluster.add(dma_cntrl)
792
793 # Add cpu/gpu clusters to main cluster
794 mainCluster.add(cpuCluster)
795 mainCluster.add(gpuCluster)
796
797 ruby_system.network.number_of_virtual_networks = 11
798
799 return (cpu_sequencers, dir_cntrl_nodes, mainCluster)