2 # Copyright (c) 2015 Advanced Micro Devices, Inc.
5 # For use for simulation and test purposes only
7 # Redistribution and use in source and binary forms, with or without
8 # modification, are permitted provided that the following conditions are met:
10 # 1. Redistributions of source code must retain the above copyright notice,
11 # this list of conditions and the following disclaimer.
13 # 2. Redistributions in binary form must reproduce the above copyright notice,
14 # this list of conditions and the following disclaimer in the documentation
15 # and/or other materials provided with the distribution.
17 # 3. Neither the name of the copyright holder nor the names of its contributors
18 # may be used to endorse or promote products derived from this software
19 # without specific prior written permission.
21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 # POSSIBILITY OF SUCH DAMAGE.
33 # Author: Sooraj Puthoor
38 from m5
.objects
import *
39 from m5
.defines
import buildEnv
40 from Ruby
import send_evicts
42 from topologies
.Cluster
import Cluster
48 # Use SeqCount not class since we need global count
50 return CntrlBase
._seqs
- 1
55 # Use CntlCount not class since we need global count
56 CntrlBase
._cntrls
+= 1
57 return CntrlBase
._cntrls
- 1
61 def versionCount(cls
):
62 cls
._version
+= 1 # Use count for this particular type
63 return cls
._version
- 1
66 # Note: the L1 Cache latency is only used by the sequencer on fast path hits
68 class L1Cache(RubyCache
):
69 resourceStalls
= False
74 def create(self
, size
, assoc
, options
):
75 self
.size
= MemorySize(size
)
77 self
.replacement_policy
= PseudoLRUReplacementPolicy()
79 class L2Cache(RubyCache
):
80 resourceStalls
= False
84 def create(self
, size
, assoc
, options
):
85 self
.size
= MemorySize(size
)
87 self
.replacement_policy
= PseudoLRUReplacementPolicy()
89 class CPCntrl(CorePair_Controller
, CntrlBase
):
91 def create(self
, options
, ruby_system
, system
):
92 self
.version
= self
.versionCount()
94 self
.L1Icache
= L1Cache()
95 self
.L1Icache
.create(options
.l1i_size
, options
.l1i_assoc
, options
)
96 self
.L1D0cache
= L1Cache()
97 self
.L1D0cache
.create(options
.l1d_size
, options
.l1d_assoc
, options
)
98 self
.L1D1cache
= L1Cache()
99 self
.L1D1cache
.create(options
.l1d_size
, options
.l1d_assoc
, options
)
100 self
.L2cache
= L2Cache()
101 self
.L2cache
.create(options
.l2_size
, options
.l2_assoc
, options
)
103 self
.sequencer
= RubySequencer()
104 self
.sequencer
.version
= self
.seqCount()
105 self
.sequencer
.icache
= self
.L1Icache
106 self
.sequencer
.dcache
= self
.L1D0cache
107 self
.sequencer
.ruby_system
= ruby_system
108 self
.sequencer
.coreid
= 0
109 self
.sequencer
.is_cpu_sequencer
= True
111 self
.sequencer1
= RubySequencer()
112 self
.sequencer1
.version
= self
.seqCount()
113 self
.sequencer1
.icache
= self
.L1Icache
114 self
.sequencer1
.dcache
= self
.L1D1cache
115 self
.sequencer1
.ruby_system
= ruby_system
116 self
.sequencer1
.coreid
= 1
117 self
.sequencer1
.is_cpu_sequencer
= True
119 self
.issue_latency
= 1
120 self
.send_evictions
= send_evicts(options
)
122 self
.ruby_system
= ruby_system
124 if options
.recycle_latency
:
125 self
.recycle_latency
= options
.recycle_latency
127 class TCPCache(RubyCache
):
132 dataAccessLatency
= 4
134 def create(self
, options
):
135 self
.size
= MemorySize(options
.tcp_size
)
136 self
.dataArrayBanks
= 16
137 self
.tagArrayBanks
= 16
138 self
.dataAccessLatency
= 4
139 self
.tagAccessLatency
= 1
140 self
.resourceStalls
= options
.no_tcc_resource_stalls
141 self
.replacement_policy
= PseudoLRUReplacementPolicy(assoc
= self
.assoc
)
143 class TCPCntrl(TCP_Controller
, CntrlBase
):
145 def create(self
, options
, ruby_system
, system
):
146 self
.version
= self
.versionCount()
147 self
.L1cache
= TCPCache(dataAccessLatency
= options
.TCP_latency
)
148 self
.L1cache
.create(options
)
149 self
.issue_latency
= 1
151 self
.coalescer
= VIPERCoalescer()
152 self
.coalescer
.version
= self
.seqCount()
153 self
.coalescer
.icache
= self
.L1cache
154 self
.coalescer
.dcache
= self
.L1cache
155 self
.coalescer
.ruby_system
= ruby_system
156 self
.coalescer
.support_inst_reqs
= False
157 self
.coalescer
.is_cpu_sequencer
= False
159 self
.sequencer
= RubySequencer()
160 self
.sequencer
.version
= self
.seqCount()
161 self
.sequencer
.icache
= self
.L1cache
162 self
.sequencer
.dcache
= self
.L1cache
163 self
.sequencer
.ruby_system
= ruby_system
164 self
.sequencer
.is_cpu_sequencer
= True
166 self
.use_seq_not_coal
= False
168 self
.ruby_system
= ruby_system
169 if options
.recycle_latency
:
170 self
.recycle_latency
= options
.recycle_latency
172 class SQCCache(RubyCache
):
175 dataAccessLatency
= 1
178 def create(self
, options
):
179 self
.size
= MemorySize(options
.sqc_size
)
180 self
.assoc
= options
.sqc_assoc
181 self
.replacement_policy
= PseudoLRUReplacementPolicy(assoc
= self
.assoc
)
183 class SQCCntrl(SQC_Controller
, CntrlBase
):
185 def create(self
, options
, ruby_system
, system
):
186 self
.version
= self
.versionCount()
187 self
.L1cache
= SQCCache()
188 self
.L1cache
.create(options
)
189 self
.L1cache
.resourceStalls
= False
190 self
.sequencer
= RubySequencer()
191 self
.sequencer
.version
= self
.seqCount()
192 self
.sequencer
.icache
= self
.L1cache
193 self
.sequencer
.dcache
= self
.L1cache
194 self
.sequencer
.ruby_system
= ruby_system
195 self
.sequencer
.support_data_reqs
= False
196 self
.sequencer
.is_cpu_sequencer
= False
197 self
.ruby_system
= ruby_system
198 if options
.recycle_latency
:
199 self
.recycle_latency
= options
.recycle_latency
201 class TCC(RubyCache
):
202 size
= MemorySize("256kB")
204 dataAccessLatency
= 8
206 resourceStalls
= False
207 def create(self
, options
):
208 self
.assoc
= options
.tcc_assoc
209 if hasattr(options
, 'bw_scalor') and options
.bw_scalor
> 0:
210 s
= options
.num_compute_units
212 tcc_size
= str(tcc_size
)+'kB'
213 self
.size
= MemorySize(tcc_size
)
214 self
.dataArrayBanks
= 64
215 self
.tagArrayBanks
= 64
217 self
.size
= MemorySize(options
.tcc_size
)
218 self
.dataArrayBanks
= 256 / options
.num_tccs
#number of data banks
219 self
.tagArrayBanks
= 256 / options
.num_tccs
#number of tag banks
220 self
.size
.value
= self
.size
.value
/ options
.num_tccs
221 if ((self
.size
.value
/ long(self
.assoc
)) < 128):
222 self
.size
.value
= long(128 * self
.assoc
)
223 self
.start_index_bit
= math
.log(options
.cacheline_size
, 2) + \
224 math
.log(options
.num_tccs
, 2)
225 self
.replacement_policy
= PseudoLRUReplacementPolicy(assoc
= self
.assoc
)
227 class TCCCntrl(TCC_Controller
, CntrlBase
):
228 def create(self
, options
, ruby_system
, system
):
229 self
.version
= self
.versionCount()
231 self
.L2cache
.create(options
)
232 self
.ruby_system
= ruby_system
233 if options
.recycle_latency
:
234 self
.recycle_latency
= options
.recycle_latency
236 class L3Cache(RubyCache
):
240 def create(self
, options
, ruby_system
, system
):
241 self
.size
= MemorySize(options
.l3_size
)
242 self
.size
.value
/= options
.num_dirs
243 self
.assoc
= options
.l3_assoc
244 self
.dataArrayBanks
/= options
.num_dirs
245 self
.tagArrayBanks
/= options
.num_dirs
246 self
.dataArrayBanks
/= options
.num_dirs
247 self
.tagArrayBanks
/= options
.num_dirs
248 self
.dataAccessLatency
= options
.l3_data_latency
249 self
.tagAccessLatency
= options
.l3_tag_latency
250 self
.resourceStalls
= False
251 self
.replacement_policy
= PseudoLRUReplacementPolicy(assoc
= self
.assoc
)
253 class L3Cntrl(L3Cache_Controller
, CntrlBase
):
254 def create(self
, options
, ruby_system
, system
):
255 self
.version
= self
.versionCount()
256 self
.L3cache
= L3Cache()
257 self
.L3cache
.create(options
, ruby_system
, system
)
258 self
.l3_response_latency
= \
259 max(self
.L3cache
.dataAccessLatency
, self
.L3cache
.tagAccessLatency
)
260 self
.ruby_system
= ruby_system
261 if options
.recycle_latency
:
262 self
.recycle_latency
= options
.recycle_latency
264 def connectWireBuffers(self
, req_to_dir
, resp_to_dir
, l3_unblock_to_dir
,
265 req_to_l3
, probe_to_l3
, resp_to_l3
):
266 self
.reqToDir
= req_to_dir
267 self
.respToDir
= resp_to_dir
268 self
.l3UnblockToDir
= l3_unblock_to_dir
269 self
.reqToL3
= req_to_l3
270 self
.probeToL3
= probe_to_l3
271 self
.respToL3
= resp_to_l3
273 # Directory memory: Directory memory of infinite size which is
274 # used by directory controller to store the "states" of the
275 # state machine. The state machine is implemented per cache block
276 class DirMem(RubyDirectoryMemory
, CntrlBase
):
277 def create(self
, options
, ruby_system
, system
):
278 self
.version
= self
.versionCount()
279 phys_mem_size
= AddrRange(options
.mem_size
).size()
280 mem_module_size
= phys_mem_size
/ options
.num_dirs
281 dir_size
= MemorySize('0B')
282 dir_size
.value
= mem_module_size
285 # Directory controller: Contains directory memory, L3 cache and associated state
286 # machine which is used to accurately redirect a data request to L3 cache or to
287 # memory. The permissions requests do not come to this directory for region
288 # based protocols as they are handled exclusively by the region directory.
289 # However, region directory controller uses this directory controller for
290 # sending probe requests and receiving probe responses.
291 class DirCntrl(Directory_Controller
, CntrlBase
):
292 def create(self
, options
, ruby_system
, system
):
293 self
.version
= self
.versionCount()
294 self
.response_latency
= 25
295 self
.response_latency_regionDir
= 1
296 self
.directory
= DirMem()
297 self
.directory
.create(options
, ruby_system
, system
)
298 self
.L3CacheMemory
= L3Cache()
299 self
.L3CacheMemory
.create(options
, ruby_system
, system
)
300 self
.l3_hit_latency
= \
301 max(self
.L3CacheMemory
.dataAccessLatency
,
302 self
.L3CacheMemory
.tagAccessLatency
)
304 self
.ruby_system
= ruby_system
305 if options
.recycle_latency
:
306 self
.recycle_latency
= options
.recycle_latency
308 def connectWireBuffers(self
, req_to_dir
, resp_to_dir
, l3_unblock_to_dir
,
309 req_to_l3
, probe_to_l3
, resp_to_l3
):
310 self
.reqToDir
= req_to_dir
311 self
.respToDir
= resp_to_dir
312 self
.l3UnblockToDir
= l3_unblock_to_dir
313 self
.reqToL3
= req_to_l3
314 self
.probeToL3
= probe_to_l3
315 self
.respToL3
= resp_to_l3
317 # Region directory : Stores region permissions
318 class RegionDir(RubyCache
):
320 def create(self
, options
, ruby_system
, system
):
321 self
.block_size
= "%dB" % (64 * options
.blocks_per_region
)
322 self
.size
= options
.region_dir_entries
* \
323 self
.block_size
* options
.num_compute_units
325 self
.tagArrayBanks
= 8
326 self
.tagAccessLatency
= options
.dir_tag_latency
327 self
.dataAccessLatency
= 1
328 self
.resourceStalls
= options
.no_resource_stalls
329 self
.start_index_bit
= 6 + int(math
.log(options
.blocks_per_region
, 2))
330 self
.replacement_policy
= PseudoLRUReplacementPolicy(assoc
= self
.assoc
)
331 # Region directory controller : Contains region directory and associated state
332 # machine for dealing with region coherence requests.
333 class RegionCntrl(RegionDir_Controller
, CntrlBase
):
334 def create(self
, options
, ruby_system
, system
):
335 self
.version
= self
.versionCount()
336 self
.cacheMemory
= RegionDir()
337 self
.cacheMemory
.create(options
, ruby_system
, system
)
338 self
.blocksPerRegion
= options
.blocks_per_region
339 self
.toDirLatency
= \
340 max(self
.cacheMemory
.dataAccessLatency
,
341 self
.cacheMemory
.tagAccessLatency
)
342 self
.ruby_system
= ruby_system
343 self
.always_migrate
= options
.always_migrate
344 self
.sym_migrate
= options
.symmetric_migrate
345 self
.asym_migrate
= options
.asymmetric_migrate
346 if self
.always_migrate
:
347 assert(not self
.asym_migrate
and not self
.sym_migrate
)
349 assert(not self
.always_migrate
and not self
.asym_migrate
)
350 if self
.asym_migrate
:
351 assert(not self
.always_migrate
and not self
.sym_migrate
)
352 if options
.recycle_latency
:
353 self
.recycle_latency
= options
.recycle_latency
355 # Region Buffer: A region directory cache which avoids some potential
356 # long latency lookup of region directory for getting region permissions
357 class RegionBuffer(RubyCache
):
361 dataAccessLatency
= 1
363 resourceStalls
= True
365 class RBCntrl(RegionBuffer_Controller
, CntrlBase
):
366 def create(self
, options
, ruby_system
, system
):
367 self
.version
= self
.versionCount()
368 self
.cacheMemory
= RegionBuffer()
369 self
.cacheMemory
.resourceStalls
= options
.no_tcc_resource_stalls
370 self
.cacheMemory
.dataArrayBanks
= 64
371 self
.cacheMemory
.tagArrayBanks
= 64
372 self
.blocksPerRegion
= options
.blocks_per_region
373 self
.cacheMemory
.block_size
= "%dB" % (64 * self
.blocksPerRegion
)
374 self
.cacheMemory
.start_index_bit
= \
375 6 + int(math
.log(self
.blocksPerRegion
, 2))
376 self
.cacheMemory
.size
= options
.region_buffer_entries
* \
377 self
.cacheMemory
.block_size
* options
.num_compute_units
378 self
.toDirLatency
= options
.gpu_to_dir_latency
379 self
.toRegionDirLatency
= options
.cpu_to_dir_latency
381 TCC_bits
= int(math
.log(options
.num_tccs
, 2))
382 self
.TCC_select_num_bits
= TCC_bits
383 self
.ruby_system
= ruby_system
385 if options
.recycle_latency
:
386 self
.recycle_latency
= options
.recycle_latency
387 self
.cacheMemory
.replacement_policy
= \
388 PseudoLRUReplacementPolicy(assoc
= self
.cacheMemory
.assoc
)
390 def define_options(parser
):
391 parser
.add_option("--num-subcaches", type="int", default
=4)
392 parser
.add_option("--l3-data-latency", type="int", default
=20)
393 parser
.add_option("--l3-tag-latency", type="int", default
=15)
394 parser
.add_option("--cpu-to-dir-latency", type="int", default
=120)
395 parser
.add_option("--gpu-to-dir-latency", type="int", default
=60)
396 parser
.add_option("--no-resource-stalls", action
="store_false",
398 parser
.add_option("--no-tcc-resource-stalls", action
="store_false",
400 parser
.add_option("--num-tbes", type="int", default
=32)
401 parser
.add_option("--l2-latency", type="int", default
=50) # load to use
402 parser
.add_option("--num-tccs", type="int", default
=1,
403 help="number of TCC banks in the GPU")
405 parser
.add_option("--sqc-size", type='string', default
='32kB',
406 help="SQC cache size")
407 parser
.add_option("--sqc-assoc", type='int', default
=8,
408 help="SQC cache assoc")
410 parser
.add_option("--WB_L1", action
="store_true",
411 default
=False, help="L2 Writeback Cache")
412 parser
.add_option("--WB_L2", action
="store_true",
413 default
=False, help="L2 Writeback Cache")
414 parser
.add_option("--TCP_latency",
415 type="int", default
=4, help="TCP latency")
416 parser
.add_option("--TCC_latency",
417 type="int", default
=16, help="TCC latency")
418 parser
.add_option("--tcc-size", type='string', default
='2MB',
419 help="agregate tcc size")
420 parser
.add_option("--tcc-assoc", type='int', default
=16,
422 parser
.add_option("--tcp-size", type='string', default
='16kB',
425 parser
.add_option("--dir-tag-latency", type="int", default
=4)
426 parser
.add_option("--dir-tag-banks", type="int", default
=4)
427 parser
.add_option("--blocks-per-region", type="int", default
=16)
428 parser
.add_option("--dir-entries", type="int", default
=8192)
430 # Region buffer is a cache of region directory. Hence region
431 # directory is inclusive with respect to region directory.
432 # However, region directory is non-inclusive with respect to
433 # the caches in the system
434 parser
.add_option("--region-dir-entries", type="int", default
=1024)
435 parser
.add_option("--region-buffer-entries", type="int", default
=512)
437 parser
.add_option("--always-migrate",
438 action
="store_true", default
=False)
439 parser
.add_option("--symmetric-migrate",
440 action
="store_true", default
=False)
441 parser
.add_option("--asymmetric-migrate",
442 action
="store_true", default
=False)
443 parser
.add_option("--use-L3-on-WT", action
="store_true", default
=False)
445 def create_system(options
, full_system
, system
, dma_devices
, bootmem
,
447 if buildEnv
['PROTOCOL'] != 'GPU_VIPER_Region':
448 panic("This script requires the GPU_VIPER_Region protocol to be built.")
453 # The ruby network creation expects the list of nodes in the system to be
454 # consistent with the NetDest list. Therefore the l1 controller nodes
455 # must be listed before the directory nodes and directory nodes before
460 # For an odd number of CPUs, still create the right number of controllers
461 TCC_bits
= int(math
.log(options
.num_tccs
, 2))
464 # Must create the individual controllers before the network to ensure the
465 # controller constructors are called before the network constructor
468 # For an odd number of CPUs, still create the right number of controllers
469 crossbar_bw
= 16 * options
.num_compute_units
#Assuming a 2GHz clock
470 cpuCluster
= Cluster(extBW
= (crossbar_bw
), intBW
=crossbar_bw
)
471 for i
in xrange((options
.num_cpus
+ 1) / 2):
474 cp_cntrl
.create(options
, ruby_system
, system
)
477 rb_cntrl
.create(options
, ruby_system
, system
)
478 rb_cntrl
.number_of_TBEs
= 256
479 rb_cntrl
.isOnCPU
= True
481 cp_cntrl
.regionBufferNum
= rb_cntrl
.version
483 exec("system.cp_cntrl%d = cp_cntrl" % i
)
484 exec("system.rb_cntrl%d = rb_cntrl" % i
)
486 # Add controllers and sequencers to the appropriate lists
488 cpu_sequencers
.extend([cp_cntrl
.sequencer
, cp_cntrl
.sequencer1
])
490 # Connect the CP controllers and the network
491 cp_cntrl
.requestFromCore
= MessageBuffer()
492 cp_cntrl
.requestFromCore
.master
= ruby_system
.network
.slave
494 cp_cntrl
.responseFromCore
= MessageBuffer()
495 cp_cntrl
.responseFromCore
.master
= ruby_system
.network
.slave
497 cp_cntrl
.unblockFromCore
= MessageBuffer()
498 cp_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
500 cp_cntrl
.probeToCore
= MessageBuffer()
501 cp_cntrl
.probeToCore
.slave
= ruby_system
.network
.master
503 cp_cntrl
.responseToCore
= MessageBuffer()
504 cp_cntrl
.responseToCore
.slave
= ruby_system
.network
.master
506 cp_cntrl
.mandatoryQueue
= MessageBuffer()
507 cp_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
509 # Connect the RB controllers to the ruby network
510 rb_cntrl
.requestFromCore
= MessageBuffer(ordered
= True)
511 rb_cntrl
.requestFromCore
.slave
= ruby_system
.network
.master
513 rb_cntrl
.responseFromCore
= MessageBuffer()
514 rb_cntrl
.responseFromCore
.slave
= ruby_system
.network
.master
516 rb_cntrl
.requestToNetwork
= MessageBuffer()
517 rb_cntrl
.requestToNetwork
.master
= ruby_system
.network
.slave
519 rb_cntrl
.notifyFromRegionDir
= MessageBuffer()
520 rb_cntrl
.notifyFromRegionDir
.slave
= ruby_system
.network
.master
522 rb_cntrl
.probeFromRegionDir
= MessageBuffer()
523 rb_cntrl
.probeFromRegionDir
.slave
= ruby_system
.network
.master
525 rb_cntrl
.unblockFromDir
= MessageBuffer()
526 rb_cntrl
.unblockFromDir
.slave
= ruby_system
.network
.master
528 rb_cntrl
.responseToRegDir
= MessageBuffer()
529 rb_cntrl
.responseToRegDir
.master
= ruby_system
.network
.slave
531 rb_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
533 cpuCluster
.add(cp_cntrl
)
534 cpuCluster
.add(rb_cntrl
)
536 gpuCluster
= Cluster(extBW
= (crossbar_bw
), intBW
= crossbar_bw
)
537 for i
in xrange(options
.num_compute_units
):
539 tcp_cntrl
= TCPCntrl(TCC_select_num_bits
= TCC_bits
,
541 number_of_TBEs
= 2560)
542 # TBEs set to max outstanding requests
543 tcp_cntrl
.create(options
, ruby_system
, system
)
544 tcp_cntrl
.WB
= options
.WB_L1
545 tcp_cntrl
.disableL1
= False
547 exec("system.tcp_cntrl%d = tcp_cntrl" % i
)
549 # Add controllers and sequencers to the appropriate lists
551 cpu_sequencers
.append(tcp_cntrl
.coalescer
)
553 # Connect the CP (TCP) controllers to the ruby network
554 tcp_cntrl
.requestFromTCP
= MessageBuffer(ordered
= True)
555 tcp_cntrl
.requestFromTCP
.master
= ruby_system
.network
.slave
557 tcp_cntrl
.responseFromTCP
= MessageBuffer(ordered
= True)
558 tcp_cntrl
.responseFromTCP
.master
= ruby_system
.network
.slave
560 tcp_cntrl
.unblockFromCore
= MessageBuffer()
561 tcp_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
563 tcp_cntrl
.probeToTCP
= MessageBuffer(ordered
= True)
564 tcp_cntrl
.probeToTCP
.slave
= ruby_system
.network
.master
566 tcp_cntrl
.responseToTCP
= MessageBuffer(ordered
= True)
567 tcp_cntrl
.responseToTCP
.slave
= ruby_system
.network
.master
569 tcp_cntrl
.mandatoryQueue
= MessageBuffer()
571 gpuCluster
.add(tcp_cntrl
)
573 for i
in xrange(options
.num_sqc
):
575 sqc_cntrl
= SQCCntrl(TCC_select_num_bits
= TCC_bits
)
576 sqc_cntrl
.create(options
, ruby_system
, system
)
578 exec("system.sqc_cntrl%d = sqc_cntrl" % i
)
580 # Add controllers and sequencers to the appropriate lists
582 cpu_sequencers
.append(sqc_cntrl
.sequencer
)
584 # Connect the SQC controller to the ruby network
585 sqc_cntrl
.requestFromSQC
= MessageBuffer(ordered
= True)
586 sqc_cntrl
.requestFromSQC
.master
= ruby_system
.network
.slave
588 sqc_cntrl
.probeToSQC
= MessageBuffer(ordered
= True)
589 sqc_cntrl
.probeToSQC
.slave
= ruby_system
.network
.master
591 sqc_cntrl
.responseToSQC
= MessageBuffer(ordered
= True)
592 sqc_cntrl
.responseToSQC
.slave
= ruby_system
.network
.master
594 sqc_cntrl
.mandatoryQueue
= MessageBuffer()
596 # SQC also in GPU cluster
597 gpuCluster
.add(sqc_cntrl
)
601 for i
in xrange(options
.num_tccs
):
603 tcc_cntrl
= TCCCntrl()
604 tcc_cntrl
.create(options
, ruby_system
, system
)
605 tcc_cntrl
.l2_request_latency
= 1
606 tcc_cntrl
.l2_response_latency
= options
.TCC_latency
607 tcc_cntrl
.WB
= options
.WB_L2
608 tcc_cntrl
.number_of_TBEs
= 2560 * options
.num_compute_units
610 # Connect the TCC controllers to the ruby network
611 tcc_cntrl
.requestFromTCP
= MessageBuffer(ordered
= True)
612 tcc_cntrl
.requestFromTCP
.slave
= ruby_system
.network
.master
614 tcc_cntrl
.responseToCore
= MessageBuffer(ordered
= True)
615 tcc_cntrl
.responseToCore
.master
= ruby_system
.network
.slave
617 tcc_cntrl
.probeFromNB
= MessageBuffer()
618 tcc_cntrl
.probeFromNB
.slave
= ruby_system
.network
.master
620 tcc_cntrl
.responseFromNB
= MessageBuffer()
621 tcc_cntrl
.responseFromNB
.slave
= ruby_system
.network
.master
623 tcc_cntrl
.requestToNB
= MessageBuffer(ordered
= True)
624 tcc_cntrl
.requestToNB
.master
= ruby_system
.network
.slave
626 tcc_cntrl
.responseToNB
= MessageBuffer()
627 tcc_cntrl
.responseToNB
.master
= ruby_system
.network
.slave
629 tcc_cntrl
.unblockToNB
= MessageBuffer()
630 tcc_cntrl
.unblockToNB
.master
= ruby_system
.network
.slave
632 tcc_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
635 rb_cntrl
.create(options
, ruby_system
, system
)
636 rb_cntrl
.number_of_TBEs
= 2560 * options
.num_compute_units
637 rb_cntrl
.isOnCPU
= False
639 # Connect the RB controllers to the ruby network
640 rb_cntrl
.requestFromCore
= MessageBuffer(ordered
= True)
641 rb_cntrl
.requestFromCore
.slave
= ruby_system
.network
.master
643 rb_cntrl
.responseFromCore
= MessageBuffer()
644 rb_cntrl
.responseFromCore
.slave
= ruby_system
.network
.master
646 rb_cntrl
.requestToNetwork
= MessageBuffer()
647 rb_cntrl
.requestToNetwork
.master
= ruby_system
.network
.slave
649 rb_cntrl
.notifyFromRegionDir
= MessageBuffer()
650 rb_cntrl
.notifyFromRegionDir
.slave
= ruby_system
.network
.master
652 rb_cntrl
.probeFromRegionDir
= MessageBuffer()
653 rb_cntrl
.probeFromRegionDir
.slave
= ruby_system
.network
.master
655 rb_cntrl
.unblockFromDir
= MessageBuffer()
656 rb_cntrl
.unblockFromDir
.slave
= ruby_system
.network
.master
658 rb_cntrl
.responseToRegDir
= MessageBuffer()
659 rb_cntrl
.responseToRegDir
.master
= ruby_system
.network
.slave
661 rb_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
663 tcc_cntrl
.regionBufferNum
= rb_cntrl
.version
665 exec("system.tcc_cntrl%d = tcc_cntrl" % i
)
666 exec("system.tcc_rb_cntrl%d = rb_cntrl" % i
)
668 # TCC cntrls added to the GPU cluster
669 gpuCluster
.add(tcc_cntrl
)
670 gpuCluster
.add(rb_cntrl
)
672 # Because of wire buffers, num_l3caches must equal num_dirs
673 # Region coherence only works with 1 dir
674 assert(options
.num_l3caches
== options
.num_dirs
== 1)
676 # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
678 mainCluster
= Cluster(intBW
= crossbar_bw
)
680 dir_cntrl
= DirCntrl()
681 dir_cntrl
.create(options
, ruby_system
, system
)
682 dir_cntrl
.number_of_TBEs
= 2560 * options
.num_compute_units
683 dir_cntrl
.useL3OnWT
= options
.use_L3_on_WT
685 # Connect the Directory controller to the ruby network
686 dir_cntrl
.requestFromCores
= MessageBuffer()
687 dir_cntrl
.requestFromCores
.slave
= ruby_system
.network
.master
689 dir_cntrl
.responseFromCores
= MessageBuffer()
690 dir_cntrl
.responseFromCores
.slave
= ruby_system
.network
.master
692 dir_cntrl
.unblockFromCores
= MessageBuffer()
693 dir_cntrl
.unblockFromCores
.slave
= ruby_system
.network
.master
695 dir_cntrl
.probeToCore
= MessageBuffer()
696 dir_cntrl
.probeToCore
.master
= ruby_system
.network
.slave
698 dir_cntrl
.responseToCore
= MessageBuffer()
699 dir_cntrl
.responseToCore
.master
= ruby_system
.network
.slave
701 dir_cntrl
.reqFromRegBuf
= MessageBuffer()
702 dir_cntrl
.reqFromRegBuf
.slave
= ruby_system
.network
.master
704 dir_cntrl
.reqToRegDir
= MessageBuffer(ordered
= True)
705 dir_cntrl
.reqToRegDir
.master
= ruby_system
.network
.slave
707 dir_cntrl
.reqFromRegDir
= MessageBuffer(ordered
= True)
708 dir_cntrl
.reqFromRegDir
.slave
= ruby_system
.network
.master
710 dir_cntrl
.unblockToRegDir
= MessageBuffer()
711 dir_cntrl
.unblockToRegDir
.master
= ruby_system
.network
.slave
713 dir_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
714 dir_cntrl
.L3triggerQueue
= MessageBuffer(ordered
= True)
715 dir_cntrl
.responseFromMemory
= MessageBuffer()
717 exec("system.dir_cntrl%d = dir_cntrl" % i
)
718 dir_cntrl_nodes
.append(dir_cntrl
)
720 mainCluster
.add(dir_cntrl
)
722 reg_cntrl
= RegionCntrl(noTCCdir
=True,TCC_select_num_bits
= TCC_bits
)
723 reg_cntrl
.create(options
, ruby_system
, system
)
724 reg_cntrl
.number_of_TBEs
= options
.num_tbes
725 reg_cntrl
.cpuRegionBufferNum
= system
.rb_cntrl0
.version
726 reg_cntrl
.gpuRegionBufferNum
= system
.tcc_rb_cntrl0
.version
728 # Connect the Region Dir controllers to the ruby network
729 reg_cntrl
.requestToDir
= MessageBuffer(ordered
= True)
730 reg_cntrl
.requestToDir
.master
= ruby_system
.network
.slave
732 reg_cntrl
.notifyToRBuffer
= MessageBuffer()
733 reg_cntrl
.notifyToRBuffer
.master
= ruby_system
.network
.slave
735 reg_cntrl
.probeToRBuffer
= MessageBuffer()
736 reg_cntrl
.probeToRBuffer
.master
= ruby_system
.network
.slave
738 reg_cntrl
.responseFromRBuffer
= MessageBuffer()
739 reg_cntrl
.responseFromRBuffer
.slave
= ruby_system
.network
.master
741 reg_cntrl
.requestFromRegBuf
= MessageBuffer()
742 reg_cntrl
.requestFromRegBuf
.slave
= ruby_system
.network
.master
744 reg_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
746 exec("system.reg_cntrl%d = reg_cntrl" % i
)
748 mainCluster
.add(reg_cntrl
)
750 # Assuming no DMA devices
751 assert(len(dma_devices
) == 0)
753 # Add cpu/gpu clusters to main cluster
754 mainCluster
.add(cpuCluster
)
755 mainCluster
.add(gpuCluster
)
757 ruby_system
.network
.number_of_virtual_networks
= 10
759 return (cpu_sequencers
, dir_cntrl_nodes
, mainCluster
)