1 # Copyright (c) 2015 Advanced Micro Devices, Inc.
4 # For use for simulation and test purposes only
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
9 # 1. Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
12 # 2. Redistributions in binary form must reproduce the above copyright notice,
13 # this list of conditions and the following disclaimer in the documentation
14 # and/or other materials provided with the distribution.
16 # 3. Neither the name of the copyright holder nor the names of its
17 # contributors may be used to endorse or promote products derived from this
18 # software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
35 from m5
.objects
import *
36 from m5
.defines
import buildEnv
37 from m5
.util
import addToPath
38 from .Ruby
import send_evicts
42 from topologies
.Cluster
import Cluster
51 # Use SeqCount not class since we need global count
53 return CntrlBase
._seqs
- 1
58 # Use CntlCount not class since we need global count
59 CntrlBase
._cntrls
+= 1
60 return CntrlBase
._cntrls
- 1
64 def versionCount(cls
):
65 cls
._version
+= 1 # Use count for this particular type
66 return cls
._version
- 1
69 # Note: the L1 Cache latency is only used by the sequencer on fast path hits
71 class L1Cache(RubyCache
):
72 resourceStalls
= False
77 def create(self
, size
, assoc
, options
):
78 self
.size
= MemorySize(size
)
80 self
.replacement_policy
= TreePLRURP()
82 class L2Cache(RubyCache
):
83 resourceStalls
= False
87 def create(self
, size
, assoc
, options
):
88 self
.size
= MemorySize(size
)
90 self
.replacement_policy
= TreePLRURP()
92 class CPCntrl(CorePair_Controller
, CntrlBase
):
94 def create(self
, options
, ruby_system
, system
):
95 self
.version
= self
.versionCount()
97 self
.L1Icache
= L1Cache()
98 self
.L1Icache
.create(options
.l1i_size
, options
.l1i_assoc
, options
)
99 self
.L1D0cache
= L1Cache()
100 self
.L1D0cache
.create(options
.l1d_size
, options
.l1d_assoc
, options
)
101 self
.L1D1cache
= L1Cache()
102 self
.L1D1cache
.create(options
.l1d_size
, options
.l1d_assoc
, options
)
103 self
.L2cache
= L2Cache()
104 self
.L2cache
.create(options
.l2_size
, options
.l2_assoc
, options
)
106 self
.sequencer
= RubySequencer()
107 self
.sequencer
.version
= self
.seqCount()
108 self
.sequencer
.icache
= self
.L1Icache
109 self
.sequencer
.dcache
= self
.L1D0cache
110 self
.sequencer
.ruby_system
= ruby_system
111 self
.sequencer
.coreid
= 0
112 self
.sequencer
.is_cpu_sequencer
= True
114 self
.sequencer1
= RubySequencer()
115 self
.sequencer1
.version
= self
.seqCount()
116 self
.sequencer1
.icache
= self
.L1Icache
117 self
.sequencer1
.dcache
= self
.L1D1cache
118 self
.sequencer1
.ruby_system
= ruby_system
119 self
.sequencer1
.coreid
= 1
120 self
.sequencer1
.is_cpu_sequencer
= True
122 self
.issue_latency
= 1
123 self
.send_evictions
= send_evicts(options
)
125 self
.ruby_system
= ruby_system
127 if options
.recycle_latency
:
128 self
.recycle_latency
= options
.recycle_latency
130 class TCPCache(RubyCache
):
135 dataAccessLatency
= 4
137 def create(self
, options
):
138 self
.size
= MemorySize(options
.tcp_size
)
139 self
.dataArrayBanks
= 16
140 self
.tagArrayBanks
= 16
141 self
.dataAccessLatency
= 4
142 self
.tagAccessLatency
= 1
143 self
.resourceStalls
= options
.no_tcc_resource_stalls
144 self
.replacement_policy
= TreePLRURP(num_leaves
= self
.assoc
)
146 class TCPCntrl(TCP_Controller
, CntrlBase
):
148 def create(self
, options
, ruby_system
, system
):
149 self
.version
= self
.versionCount()
150 self
.L1cache
= TCPCache(dataAccessLatency
= options
.TCP_latency
)
151 self
.L1cache
.create(options
)
152 self
.issue_latency
= 1
154 self
.coalescer
= VIPERCoalescer()
155 self
.coalescer
.version
= self
.seqCount()
156 self
.coalescer
.icache
= self
.L1cache
157 self
.coalescer
.dcache
= self
.L1cache
158 self
.coalescer
.ruby_system
= ruby_system
159 self
.coalescer
.support_inst_reqs
= False
160 self
.coalescer
.is_cpu_sequencer
= False
161 if options
.tcp_deadlock_threshold
:
162 self
.coalescer
.deadlock_threshold
= \
163 options
.tcp_deadlock_threshold
164 self
.coalescer
.max_coalesces_per_cycle
= \
165 options
.max_coalesces_per_cycle
167 self
.sequencer
= RubySequencer()
168 self
.sequencer
.version
= self
.seqCount()
169 self
.sequencer
.icache
= self
.L1cache
170 self
.sequencer
.dcache
= self
.L1cache
171 self
.sequencer
.ruby_system
= ruby_system
172 self
.sequencer
.is_cpu_sequencer
= True
174 self
.use_seq_not_coal
= False
176 self
.ruby_system
= ruby_system
177 if options
.recycle_latency
:
178 self
.recycle_latency
= options
.recycle_latency
180 class SQCCache(RubyCache
):
183 dataAccessLatency
= 1
186 def create(self
, options
):
187 self
.size
= MemorySize(options
.sqc_size
)
188 self
.assoc
= options
.sqc_assoc
189 self
.replacement_policy
= TreePLRURP(num_leaves
= self
.assoc
)
191 class SQCCntrl(SQC_Controller
, CntrlBase
):
193 def create(self
, options
, ruby_system
, system
):
194 self
.version
= self
.versionCount()
195 self
.L1cache
= SQCCache()
196 self
.L1cache
.create(options
)
197 self
.L1cache
.resourceStalls
= False
198 self
.sequencer
= RubySequencer()
199 self
.sequencer
.version
= self
.seqCount()
200 self
.sequencer
.icache
= self
.L1cache
201 self
.sequencer
.dcache
= self
.L1cache
202 self
.sequencer
.ruby_system
= ruby_system
203 self
.sequencer
.support_data_reqs
= False
204 self
.sequencer
.is_cpu_sequencer
= False
205 if options
.sqc_deadlock_threshold
:
206 self
.sequencer
.deadlock_threshold
= \
207 options
.sqc_deadlock_threshold
209 self
.ruby_system
= ruby_system
210 if options
.recycle_latency
:
211 self
.recycle_latency
= options
.recycle_latency
213 class TCC(RubyCache
):
214 size
= MemorySize("256kB")
216 dataAccessLatency
= 8
218 resourceStalls
= False
219 def create(self
, options
):
220 self
.assoc
= options
.tcc_assoc
221 if hasattr(options
, 'bw_scalor') and options
.bw_scalor
> 0:
222 s
= options
.num_compute_units
224 tcc_size
= str(tcc_size
)+'kB'
225 self
.size
= MemorySize(tcc_size
)
226 self
.dataArrayBanks
= 64
227 self
.tagArrayBanks
= 64
229 self
.size
= MemorySize(options
.tcc_size
)
230 self
.dataArrayBanks
= 256 / options
.num_tccs
#number of data banks
231 self
.tagArrayBanks
= 256 / options
.num_tccs
#number of tag banks
232 self
.size
.value
= self
.size
.value
/ options
.num_tccs
233 if ((self
.size
.value
/ long(self
.assoc
)) < 128):
234 self
.size
.value
= long(128 * self
.assoc
)
235 self
.start_index_bit
= math
.log(options
.cacheline_size
, 2) + \
236 math
.log(options
.num_tccs
, 2)
237 self
.replacement_policy
= TreePLRURP(num_leaves
= self
.assoc
)
239 class TCCCntrl(TCC_Controller
, CntrlBase
):
240 def create(self
, options
, ruby_system
, system
):
241 self
.version
= self
.versionCount()
243 self
.L2cache
.create(options
)
244 self
.ruby_system
= ruby_system
245 if options
.recycle_latency
:
246 self
.recycle_latency
= options
.recycle_latency
248 class L3Cache(RubyCache
):
252 def create(self
, options
, ruby_system
, system
):
253 self
.size
= MemorySize(options
.l3_size
)
254 self
.size
.value
/= options
.num_dirs
255 self
.assoc
= options
.l3_assoc
256 self
.dataArrayBanks
/= options
.num_dirs
257 self
.tagArrayBanks
/= options
.num_dirs
258 self
.dataArrayBanks
/= options
.num_dirs
259 self
.tagArrayBanks
/= options
.num_dirs
260 self
.dataAccessLatency
= options
.l3_data_latency
261 self
.tagAccessLatency
= options
.l3_tag_latency
262 self
.resourceStalls
= False
263 self
.replacement_policy
= TreePLRURP(num_leaves
= self
.assoc
)
265 class L3Cntrl(L3Cache_Controller
, CntrlBase
):
266 def create(self
, options
, ruby_system
, system
):
267 self
.version
= self
.versionCount()
268 self
.L3cache
= L3Cache()
269 self
.L3cache
.create(options
, ruby_system
, system
)
270 self
.l3_response_latency
= \
271 max(self
.L3cache
.dataAccessLatency
, self
.L3cache
.tagAccessLatency
)
272 self
.ruby_system
= ruby_system
273 if options
.recycle_latency
:
274 self
.recycle_latency
= options
.recycle_latency
276 def connectWireBuffers(self
, req_to_dir
, resp_to_dir
, l3_unblock_to_dir
,
277 req_to_l3
, probe_to_l3
, resp_to_l3
):
278 self
.reqToDir
= req_to_dir
279 self
.respToDir
= resp_to_dir
280 self
.l3UnblockToDir
= l3_unblock_to_dir
281 self
.reqToL3
= req_to_l3
282 self
.probeToL3
= probe_to_l3
283 self
.respToL3
= resp_to_l3
285 # Directory memory: Directory memory of infinite size which is
286 # used by directory controller to store the "states" of the
287 # state machine. The state machine is implemented per cache block
288 class DirMem(RubyDirectoryMemory
, CntrlBase
):
289 def create(self
, options
, ruby_system
, system
):
290 self
.version
= self
.versionCount()
291 phys_mem_size
= AddrRange(options
.mem_size
).size()
292 mem_module_size
= phys_mem_size
/ options
.num_dirs
293 dir_size
= MemorySize('0B')
294 dir_size
.value
= mem_module_size
297 # Directory controller: Contains directory memory, L3 cache and associated state
298 # machine which is used to accurately redirect a data request to L3 cache or to
299 # memory. The permissions requests do not come to this directory for region
300 # based protocols as they are handled exclusively by the region directory.
301 # However, region directory controller uses this directory controller for
302 # sending probe requests and receiving probe responses.
303 class DirCntrl(Directory_Controller
, CntrlBase
):
304 def create(self
, options
, ruby_system
, system
):
305 self
.version
= self
.versionCount()
306 self
.response_latency
= 25
307 self
.response_latency_regionDir
= 1
308 self
.directory
= DirMem()
309 self
.directory
.create(options
, ruby_system
, system
)
310 self
.L3CacheMemory
= L3Cache()
311 self
.L3CacheMemory
.create(options
, ruby_system
, system
)
312 self
.l3_hit_latency
= \
313 max(self
.L3CacheMemory
.dataAccessLatency
,
314 self
.L3CacheMemory
.tagAccessLatency
)
316 self
.ruby_system
= ruby_system
317 if options
.recycle_latency
:
318 self
.recycle_latency
= options
.recycle_latency
320 def connectWireBuffers(self
, req_to_dir
, resp_to_dir
, l3_unblock_to_dir
,
321 req_to_l3
, probe_to_l3
, resp_to_l3
):
322 self
.reqToDir
= req_to_dir
323 self
.respToDir
= resp_to_dir
324 self
.l3UnblockToDir
= l3_unblock_to_dir
325 self
.reqToL3
= req_to_l3
326 self
.probeToL3
= probe_to_l3
327 self
.respToL3
= resp_to_l3
329 # Region directory : Stores region permissions
330 class RegionDir(RubyCache
):
332 def create(self
, options
, ruby_system
, system
):
333 self
.block_size
= "%dB" % (64 * options
.blocks_per_region
)
334 self
.size
= options
.region_dir_entries
* \
335 self
.block_size
* options
.num_compute_units
337 self
.tagArrayBanks
= 8
338 self
.tagAccessLatency
= options
.dir_tag_latency
339 self
.dataAccessLatency
= 1
340 self
.resourceStalls
= options
.no_resource_stalls
341 self
.start_index_bit
= 6 + int(math
.log(options
.blocks_per_region
, 2))
342 self
.replacement_policy
= TreePLRURP(num_leaves
= self
.assoc
)
343 # Region directory controller : Contains region directory and associated state
344 # machine for dealing with region coherence requests.
345 class RegionCntrl(RegionDir_Controller
, CntrlBase
):
346 def create(self
, options
, ruby_system
, system
):
347 self
.version
= self
.versionCount()
348 self
.cacheMemory
= RegionDir()
349 self
.cacheMemory
.create(options
, ruby_system
, system
)
350 self
.blocksPerRegion
= options
.blocks_per_region
351 self
.toDirLatency
= \
352 max(self
.cacheMemory
.dataAccessLatency
,
353 self
.cacheMemory
.tagAccessLatency
)
354 self
.ruby_system
= ruby_system
355 self
.always_migrate
= options
.always_migrate
356 self
.sym_migrate
= options
.symmetric_migrate
357 self
.asym_migrate
= options
.asymmetric_migrate
358 if self
.always_migrate
:
359 assert(not self
.asym_migrate
and not self
.sym_migrate
)
361 assert(not self
.always_migrate
and not self
.asym_migrate
)
362 if self
.asym_migrate
:
363 assert(not self
.always_migrate
and not self
.sym_migrate
)
364 if options
.recycle_latency
:
365 self
.recycle_latency
= options
.recycle_latency
367 # Region Buffer: A region directory cache which avoids some potential
368 # long latency lookup of region directory for getting region permissions
369 class RegionBuffer(RubyCache
):
373 dataAccessLatency
= 1
375 resourceStalls
= True
377 class RBCntrl(RegionBuffer_Controller
, CntrlBase
):
378 def create(self
, options
, ruby_system
, system
):
379 self
.version
= self
.versionCount()
380 self
.cacheMemory
= RegionBuffer()
381 self
.cacheMemory
.resourceStalls
= options
.no_tcc_resource_stalls
382 self
.cacheMemory
.dataArrayBanks
= 64
383 self
.cacheMemory
.tagArrayBanks
= 64
384 self
.blocksPerRegion
= options
.blocks_per_region
385 self
.cacheMemory
.block_size
= "%dB" % (64 * self
.blocksPerRegion
)
386 self
.cacheMemory
.start_index_bit
= \
387 6 + int(math
.log(self
.blocksPerRegion
, 2))
388 self
.cacheMemory
.size
= options
.region_buffer_entries
* \
389 self
.cacheMemory
.block_size
* options
.num_compute_units
390 self
.toDirLatency
= options
.gpu_to_dir_latency
391 self
.toRegionDirLatency
= options
.cpu_to_dir_latency
393 TCC_bits
= int(math
.log(options
.num_tccs
, 2))
394 self
.TCC_select_num_bits
= TCC_bits
395 self
.ruby_system
= ruby_system
397 if options
.recycle_latency
:
398 self
.recycle_latency
= options
.recycle_latency
399 self
.cacheMemory
.replacement_policy
= \
400 TreePLRURP(num_leaves
= self
.cacheMemory
.assoc
)
402 def define_options(parser
):
403 parser
.add_option("--num-subcaches", type="int", default
=4)
404 parser
.add_option("--l3-data-latency", type="int", default
=20)
405 parser
.add_option("--l3-tag-latency", type="int", default
=15)
406 parser
.add_option("--cpu-to-dir-latency", type="int", default
=120)
407 parser
.add_option("--gpu-to-dir-latency", type="int", default
=60)
408 parser
.add_option("--no-resource-stalls", action
="store_false",
410 parser
.add_option("--no-tcc-resource-stalls", action
="store_false",
412 parser
.add_option("--num-tbes", type="int", default
=32)
413 parser
.add_option("--l2-latency", type="int", default
=50) # load to use
414 parser
.add_option("--num-tccs", type="int", default
=1,
415 help="number of TCC banks in the GPU")
417 parser
.add_option("--sqc-size", type='string', default
='32kB',
418 help="SQC cache size")
419 parser
.add_option("--sqc-assoc", type='int', default
=8,
420 help="SQC cache assoc")
421 parser
.add_option("--sqc-deadlock-threshold", type='int',
422 help="Set the SQC deadlock threshold to some value")
424 parser
.add_option("--WB_L1", action
="store_true",
425 default
=False, help="L2 Writeback Cache")
426 parser
.add_option("--WB_L2", action
="store_true",
427 default
=False, help="L2 Writeback Cache")
428 parser
.add_option("--TCP_latency",
429 type="int", default
=4, help="TCP latency")
430 parser
.add_option("--TCC_latency",
431 type="int", default
=16, help="TCC latency")
432 parser
.add_option("--tcc-size", type='string', default
='2MB',
433 help="agregate tcc size")
434 parser
.add_option("--tcc-assoc", type='int', default
=16,
436 parser
.add_option("--tcp-size", type='string', default
='16kB',
438 parser
.add_option("--tcp-deadlock-threshold", type='int',
439 help="Set the TCP deadlock threshold to some value")
440 parser
.add_option("--max-coalesces-per-cycle", type="int", default
=1,
441 help="Maximum insts that may coalesce in a cycle");
443 parser
.add_option("--dir-tag-latency", type="int", default
=4)
444 parser
.add_option("--dir-tag-banks", type="int", default
=4)
445 parser
.add_option("--blocks-per-region", type="int", default
=16)
446 parser
.add_option("--dir-entries", type="int", default
=8192)
448 # Region buffer is a cache of region directory. Hence region
449 # directory is inclusive with respect to region directory.
450 # However, region directory is non-inclusive with respect to
451 # the caches in the system
452 parser
.add_option("--region-dir-entries", type="int", default
=1024)
453 parser
.add_option("--region-buffer-entries", type="int", default
=512)
455 parser
.add_option("--always-migrate",
456 action
="store_true", default
=False)
457 parser
.add_option("--symmetric-migrate",
458 action
="store_true", default
=False)
459 parser
.add_option("--asymmetric-migrate",
460 action
="store_true", default
=False)
461 parser
.add_option("--use-L3-on-WT", action
="store_true", default
=False)
463 def create_system(options
, full_system
, system
, dma_devices
, bootmem
,
465 if buildEnv
['PROTOCOL'] != 'GPU_VIPER_Region':
466 panic("This script requires the GPU_VIPER_Region protocol to be built.")
471 # The ruby network creation expects the list of nodes in the system to be
472 # consistent with the NetDest list. Therefore the l1 controller nodes
473 # must be listed before the directory nodes and directory nodes before
478 # For an odd number of CPUs, still create the right number of controllers
479 TCC_bits
= int(math
.log(options
.num_tccs
, 2))
482 # Must create the individual controllers before the network to ensure the
483 # controller constructors are called before the network constructor
486 # For an odd number of CPUs, still create the right number of controllers
487 crossbar_bw
= 16 * options
.num_compute_units
#Assuming a 2GHz clock
488 cpuCluster
= Cluster(extBW
= (crossbar_bw
), intBW
=crossbar_bw
)
489 for i
in range((options
.num_cpus
+ 1) // 2):
492 cp_cntrl
.create(options
, ruby_system
, system
)
495 rb_cntrl
.create(options
, ruby_system
, system
)
496 rb_cntrl
.number_of_TBEs
= 256
497 rb_cntrl
.isOnCPU
= True
499 cp_cntrl
.regionBufferNum
= rb_cntrl
.version
501 exec("system.cp_cntrl%d = cp_cntrl" % i
)
502 exec("system.rb_cntrl%d = rb_cntrl" % i
)
504 # Add controllers and sequencers to the appropriate lists
506 cpu_sequencers
.extend([cp_cntrl
.sequencer
, cp_cntrl
.sequencer1
])
508 # Connect the CP controllers and the network
509 cp_cntrl
.requestFromCore
= MessageBuffer()
510 cp_cntrl
.requestFromCore
.master
= ruby_system
.network
.slave
512 cp_cntrl
.responseFromCore
= MessageBuffer()
513 cp_cntrl
.responseFromCore
.master
= ruby_system
.network
.slave
515 cp_cntrl
.unblockFromCore
= MessageBuffer()
516 cp_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
518 cp_cntrl
.probeToCore
= MessageBuffer()
519 cp_cntrl
.probeToCore
.slave
= ruby_system
.network
.master
521 cp_cntrl
.responseToCore
= MessageBuffer()
522 cp_cntrl
.responseToCore
.slave
= ruby_system
.network
.master
524 cp_cntrl
.mandatoryQueue
= MessageBuffer()
525 cp_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
527 # Connect the RB controllers to the ruby network
528 rb_cntrl
.requestFromCore
= MessageBuffer(ordered
= True)
529 rb_cntrl
.requestFromCore
.slave
= ruby_system
.network
.master
531 rb_cntrl
.responseFromCore
= MessageBuffer()
532 rb_cntrl
.responseFromCore
.slave
= ruby_system
.network
.master
534 rb_cntrl
.requestToNetwork
= MessageBuffer()
535 rb_cntrl
.requestToNetwork
.master
= ruby_system
.network
.slave
537 rb_cntrl
.notifyFromRegionDir
= MessageBuffer()
538 rb_cntrl
.notifyFromRegionDir
.slave
= ruby_system
.network
.master
540 rb_cntrl
.probeFromRegionDir
= MessageBuffer()
541 rb_cntrl
.probeFromRegionDir
.slave
= ruby_system
.network
.master
543 rb_cntrl
.unblockFromDir
= MessageBuffer()
544 rb_cntrl
.unblockFromDir
.slave
= ruby_system
.network
.master
546 rb_cntrl
.responseToRegDir
= MessageBuffer()
547 rb_cntrl
.responseToRegDir
.master
= ruby_system
.network
.slave
549 rb_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
551 cpuCluster
.add(cp_cntrl
)
552 cpuCluster
.add(rb_cntrl
)
554 gpuCluster
= Cluster(extBW
= (crossbar_bw
), intBW
= crossbar_bw
)
555 for i
in range(options
.num_compute_units
):
557 tcp_cntrl
= TCPCntrl(TCC_select_num_bits
= TCC_bits
,
559 number_of_TBEs
= 2560)
560 # TBEs set to max outstanding requests
561 tcp_cntrl
.create(options
, ruby_system
, system
)
562 tcp_cntrl
.WB
= options
.WB_L1
563 tcp_cntrl
.disableL1
= False
565 exec("system.tcp_cntrl%d = tcp_cntrl" % i
)
567 # Add controllers and sequencers to the appropriate lists
569 cpu_sequencers
.append(tcp_cntrl
.coalescer
)
571 # Connect the CP (TCP) controllers to the ruby network
572 tcp_cntrl
.requestFromTCP
= MessageBuffer(ordered
= True)
573 tcp_cntrl
.requestFromTCP
.master
= ruby_system
.network
.slave
575 tcp_cntrl
.responseFromTCP
= MessageBuffer(ordered
= True)
576 tcp_cntrl
.responseFromTCP
.master
= ruby_system
.network
.slave
578 tcp_cntrl
.unblockFromCore
= MessageBuffer()
579 tcp_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
581 tcp_cntrl
.probeToTCP
= MessageBuffer(ordered
= True)
582 tcp_cntrl
.probeToTCP
.slave
= ruby_system
.network
.master
584 tcp_cntrl
.responseToTCP
= MessageBuffer(ordered
= True)
585 tcp_cntrl
.responseToTCP
.slave
= ruby_system
.network
.master
587 tcp_cntrl
.mandatoryQueue
= MessageBuffer()
589 gpuCluster
.add(tcp_cntrl
)
591 for i
in range(options
.num_sqc
):
593 sqc_cntrl
= SQCCntrl(TCC_select_num_bits
= TCC_bits
)
594 sqc_cntrl
.create(options
, ruby_system
, system
)
596 exec("system.sqc_cntrl%d = sqc_cntrl" % i
)
598 # Add controllers and sequencers to the appropriate lists
600 cpu_sequencers
.append(sqc_cntrl
.sequencer
)
602 # Connect the SQC controller to the ruby network
603 sqc_cntrl
.requestFromSQC
= MessageBuffer(ordered
= True)
604 sqc_cntrl
.requestFromSQC
.master
= ruby_system
.network
.slave
606 sqc_cntrl
.probeToSQC
= MessageBuffer(ordered
= True)
607 sqc_cntrl
.probeToSQC
.slave
= ruby_system
.network
.master
609 sqc_cntrl
.responseToSQC
= MessageBuffer(ordered
= True)
610 sqc_cntrl
.responseToSQC
.slave
= ruby_system
.network
.master
612 sqc_cntrl
.mandatoryQueue
= MessageBuffer()
614 # SQC also in GPU cluster
615 gpuCluster
.add(sqc_cntrl
)
619 for i
in range(options
.num_tccs
):
621 tcc_cntrl
= TCCCntrl()
622 tcc_cntrl
.create(options
, ruby_system
, system
)
623 tcc_cntrl
.l2_request_latency
= 1
624 tcc_cntrl
.l2_response_latency
= options
.TCC_latency
625 tcc_cntrl
.WB
= options
.WB_L2
626 tcc_cntrl
.number_of_TBEs
= 2560 * options
.num_compute_units
628 # Connect the TCC controllers to the ruby network
629 tcc_cntrl
.requestFromTCP
= MessageBuffer(ordered
= True)
630 tcc_cntrl
.requestFromTCP
.slave
= ruby_system
.network
.master
632 tcc_cntrl
.responseToCore
= MessageBuffer(ordered
= True)
633 tcc_cntrl
.responseToCore
.master
= ruby_system
.network
.slave
635 tcc_cntrl
.probeFromNB
= MessageBuffer()
636 tcc_cntrl
.probeFromNB
.slave
= ruby_system
.network
.master
638 tcc_cntrl
.responseFromNB
= MessageBuffer()
639 tcc_cntrl
.responseFromNB
.slave
= ruby_system
.network
.master
641 tcc_cntrl
.requestToNB
= MessageBuffer(ordered
= True)
642 tcc_cntrl
.requestToNB
.master
= ruby_system
.network
.slave
644 tcc_cntrl
.responseToNB
= MessageBuffer()
645 tcc_cntrl
.responseToNB
.master
= ruby_system
.network
.slave
647 tcc_cntrl
.unblockToNB
= MessageBuffer()
648 tcc_cntrl
.unblockToNB
.master
= ruby_system
.network
.slave
650 tcc_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
653 rb_cntrl
.create(options
, ruby_system
, system
)
654 rb_cntrl
.number_of_TBEs
= 2560 * options
.num_compute_units
655 rb_cntrl
.isOnCPU
= False
657 # Connect the RB controllers to the ruby network
658 rb_cntrl
.requestFromCore
= MessageBuffer(ordered
= True)
659 rb_cntrl
.requestFromCore
.slave
= ruby_system
.network
.master
661 rb_cntrl
.responseFromCore
= MessageBuffer()
662 rb_cntrl
.responseFromCore
.slave
= ruby_system
.network
.master
664 rb_cntrl
.requestToNetwork
= MessageBuffer()
665 rb_cntrl
.requestToNetwork
.master
= ruby_system
.network
.slave
667 rb_cntrl
.notifyFromRegionDir
= MessageBuffer()
668 rb_cntrl
.notifyFromRegionDir
.slave
= ruby_system
.network
.master
670 rb_cntrl
.probeFromRegionDir
= MessageBuffer()
671 rb_cntrl
.probeFromRegionDir
.slave
= ruby_system
.network
.master
673 rb_cntrl
.unblockFromDir
= MessageBuffer()
674 rb_cntrl
.unblockFromDir
.slave
= ruby_system
.network
.master
676 rb_cntrl
.responseToRegDir
= MessageBuffer()
677 rb_cntrl
.responseToRegDir
.master
= ruby_system
.network
.slave
679 rb_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
681 tcc_cntrl
.regionBufferNum
= rb_cntrl
.version
683 exec("system.tcc_cntrl%d = tcc_cntrl" % i
)
684 exec("system.tcc_rb_cntrl%d = rb_cntrl" % i
)
686 # TCC cntrls added to the GPU cluster
687 gpuCluster
.add(tcc_cntrl
)
688 gpuCluster
.add(rb_cntrl
)
690 # Because of wire buffers, num_l3caches must equal num_dirs
691 # Region coherence only works with 1 dir
692 assert(options
.num_l3caches
== options
.num_dirs
== 1)
694 # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu
696 mainCluster
= Cluster(intBW
= crossbar_bw
)
698 dir_cntrl
= DirCntrl()
699 dir_cntrl
.create(options
, ruby_system
, system
)
700 dir_cntrl
.number_of_TBEs
= 2560 * options
.num_compute_units
701 dir_cntrl
.useL3OnWT
= options
.use_L3_on_WT
703 # Connect the Directory controller to the ruby network
704 dir_cntrl
.requestFromCores
= MessageBuffer()
705 dir_cntrl
.requestFromCores
.slave
= ruby_system
.network
.master
707 dir_cntrl
.responseFromCores
= MessageBuffer()
708 dir_cntrl
.responseFromCores
.slave
= ruby_system
.network
.master
710 dir_cntrl
.unblockFromCores
= MessageBuffer()
711 dir_cntrl
.unblockFromCores
.slave
= ruby_system
.network
.master
713 dir_cntrl
.probeToCore
= MessageBuffer()
714 dir_cntrl
.probeToCore
.master
= ruby_system
.network
.slave
716 dir_cntrl
.responseToCore
= MessageBuffer()
717 dir_cntrl
.responseToCore
.master
= ruby_system
.network
.slave
719 dir_cntrl
.reqFromRegBuf
= MessageBuffer()
720 dir_cntrl
.reqFromRegBuf
.slave
= ruby_system
.network
.master
722 dir_cntrl
.reqToRegDir
= MessageBuffer(ordered
= True)
723 dir_cntrl
.reqToRegDir
.master
= ruby_system
.network
.slave
725 dir_cntrl
.reqFromRegDir
= MessageBuffer(ordered
= True)
726 dir_cntrl
.reqFromRegDir
.slave
= ruby_system
.network
.master
728 dir_cntrl
.unblockToRegDir
= MessageBuffer()
729 dir_cntrl
.unblockToRegDir
.master
= ruby_system
.network
.slave
731 dir_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
732 dir_cntrl
.L3triggerQueue
= MessageBuffer(ordered
= True)
733 dir_cntrl
.requestToMemory
= MessageBuffer()
734 dir_cntrl
.responseFromMemory
= MessageBuffer()
736 exec("system.dir_cntrl%d = dir_cntrl" % i
)
737 dir_cntrl_nodes
.append(dir_cntrl
)
739 mainCluster
.add(dir_cntrl
)
741 reg_cntrl
= RegionCntrl(noTCCdir
=True,TCC_select_num_bits
= TCC_bits
)
742 reg_cntrl
.create(options
, ruby_system
, system
)
743 reg_cntrl
.number_of_TBEs
= options
.num_tbes
744 reg_cntrl
.cpuRegionBufferNum
= system
.rb_cntrl0
.version
745 reg_cntrl
.gpuRegionBufferNum
= system
.tcc_rb_cntrl0
.version
747 # Connect the Region Dir controllers to the ruby network
748 reg_cntrl
.requestToDir
= MessageBuffer(ordered
= True)
749 reg_cntrl
.requestToDir
.master
= ruby_system
.network
.slave
751 reg_cntrl
.notifyToRBuffer
= MessageBuffer()
752 reg_cntrl
.notifyToRBuffer
.master
= ruby_system
.network
.slave
754 reg_cntrl
.probeToRBuffer
= MessageBuffer()
755 reg_cntrl
.probeToRBuffer
.master
= ruby_system
.network
.slave
757 reg_cntrl
.responseFromRBuffer
= MessageBuffer()
758 reg_cntrl
.responseFromRBuffer
.slave
= ruby_system
.network
.master
760 reg_cntrl
.requestFromRegBuf
= MessageBuffer()
761 reg_cntrl
.requestFromRegBuf
.slave
= ruby_system
.network
.master
763 reg_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
765 exec("system.reg_cntrl%d = reg_cntrl" % i
)
767 mainCluster
.add(reg_cntrl
)
769 # Assuming no DMA devices
770 assert(len(dma_devices
) == 0)
772 # Add cpu/gpu clusters to main cluster
773 mainCluster
.add(cpuCluster
)
774 mainCluster
.add(gpuCluster
)
776 ruby_system
.network
.number_of_virtual_networks
= 10
778 return (cpu_sequencers
, dir_cntrl_nodes
, mainCluster
)