1 # Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
4 # For use for simulation and test purposes only
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are met:
9 # 1. Redistributions of source code must retain the above copyright notice,
10 # this list of conditions and the following disclaimer.
12 # 2. Redistributions in binary form must reproduce the above copyright notice,
13 # this list of conditions and the following disclaimer in the documentation
14 # and/or other materials provided with the distribution.
16 # 3. Neither the name of the copyright holder nor the names of its
17 # contributors may be used to endorse or promote products derived from this
18 # software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 # POSSIBILITY OF SUCH DAMAGE.
36 from m5
.objects
import *
37 from m5
.defines
import buildEnv
38 from m5
.util
import addToPath
39 from Ruby
import create_topology
40 from Ruby
import send_evicts
41 from common
import FileSystemConfig
45 from topologies
.Cluster
import Cluster
46 from topologies
.Crossbar
import Crossbar
52 # Use SeqCount not class since we need global count
54 return CntrlBase
._seqs
- 1
59 # Use CntlCount not class since we need global count
60 CntrlBase
._cntrls
+= 1
61 return CntrlBase
._cntrls
- 1
65 def versionCount(cls
):
66 cls
._version
+= 1 # Use count for this particular type
67 return cls
._version
- 1
69 class L1DCache(RubyCache
):
70 resourceStalls
= False
71 def create(self
, options
):
72 self
.size
= MemorySize(options
.l1d_size
)
73 self
.assoc
= options
.l1d_assoc
74 self
.replacement_policy
= TreePLRURP()
76 class L1ICache(RubyCache
):
77 resourceStalls
= False
78 def create(self
, options
):
79 self
.size
= MemorySize(options
.l1i_size
)
80 self
.assoc
= options
.l1i_assoc
81 self
.replacement_policy
= TreePLRURP()
83 class L2Cache(RubyCache
):
84 resourceStalls
= False
85 def create(self
, options
):
86 self
.size
= MemorySize(options
.l2_size
)
87 self
.assoc
= options
.l2_assoc
88 self
.replacement_policy
= TreePLRURP()
90 class CPCntrl(CorePair_Controller
, CntrlBase
):
92 def create(self
, options
, ruby_system
, system
):
93 self
.version
= self
.versionCount()
95 self
.L1Icache
= L1ICache()
96 self
.L1Icache
.create(options
)
97 self
.L1D0cache
= L1DCache()
98 self
.L1D0cache
.create(options
)
99 self
.L1D1cache
= L1DCache()
100 self
.L1D1cache
.create(options
)
101 self
.L2cache
= L2Cache()
102 self
.L2cache
.create(options
)
104 self
.sequencer
= RubySequencer()
105 self
.sequencer
.version
= self
.seqCount()
106 self
.sequencer
.icache
= self
.L1Icache
107 self
.sequencer
.dcache
= self
.L1D0cache
108 self
.sequencer
.ruby_system
= ruby_system
109 self
.sequencer
.coreid
= 0
110 self
.sequencer
.is_cpu_sequencer
= True
112 self
.sequencer1
= RubySequencer()
113 self
.sequencer1
.version
= self
.seqCount()
114 self
.sequencer1
.icache
= self
.L1Icache
115 self
.sequencer1
.dcache
= self
.L1D1cache
116 self
.sequencer1
.ruby_system
= ruby_system
117 self
.sequencer1
.coreid
= 1
118 self
.sequencer1
.is_cpu_sequencer
= True
120 # Defines icache/dcache hit latency
121 self
.mandatory_queue_latency
= 2
123 self
.issue_latency
= options
.cpu_to_dir_latency
124 self
.send_evictions
= send_evicts(options
)
126 self
.ruby_system
= ruby_system
128 if options
.recycle_latency
:
129 self
.recycle_latency
= options
.recycle_latency
131 class L3Cache(RubyCache
):
136 def create(self
, options
, ruby_system
, system
):
137 self
.size
= MemorySize(options
.l3_size
)
138 self
.size
.value
/= options
.num_dirs
139 self
.dataArrayBanks
/= options
.num_dirs
140 self
.tagArrayBanks
/= options
.num_dirs
141 self
.dataArrayBanks
/= options
.num_dirs
142 self
.tagArrayBanks
/= options
.num_dirs
143 self
.dataAccessLatency
= options
.l3_data_latency
144 self
.tagAccessLatency
= options
.l3_tag_latency
145 self
.resourceStalls
= options
.no_resource_stalls
146 self
.replacement_policy
= TreePLRURP()
148 class L3Cntrl(L3Cache_Controller
, CntrlBase
):
149 def create(self
, options
, ruby_system
, system
):
150 self
.version
= self
.versionCount()
151 self
.L3cache
= L3Cache()
152 self
.L3cache
.create(options
, ruby_system
, system
)
154 self
.l3_response_latency
= max(self
.L3cache
.dataAccessLatency
,
155 self
.L3cache
.tagAccessLatency
)
156 self
.ruby_system
= ruby_system
158 if options
.recycle_latency
:
159 self
.recycle_latency
= options
.recycle_latency
161 def connectWireBuffers(self
, req_to_dir
, resp_to_dir
, l3_unblock_to_dir
,
162 req_to_l3
, probe_to_l3
, resp_to_l3
):
163 self
.reqToDir
= req_to_dir
164 self
.respToDir
= resp_to_dir
165 self
.l3UnblockToDir
= l3_unblock_to_dir
166 self
.reqToL3
= req_to_l3
167 self
.probeToL3
= probe_to_l3
168 self
.respToL3
= resp_to_l3
170 class DirCntrl(Directory_Controller
, CntrlBase
):
171 def create(self
, options
, dir_ranges
, ruby_system
, system
):
172 self
.version
= self
.versionCount()
174 self
.response_latency
= 30
176 self
.addr_ranges
= dir_ranges
177 self
.directory
= RubyDirectoryMemory()
179 self
.L3CacheMemory
= L3Cache()
180 self
.L3CacheMemory
.create(options
, ruby_system
, system
)
182 self
.l3_hit_latency
= max(self
.L3CacheMemory
.dataAccessLatency
,
183 self
.L3CacheMemory
.tagAccessLatency
)
185 self
.number_of_TBEs
= options
.num_tbes
187 self
.ruby_system
= ruby_system
189 if options
.recycle_latency
:
190 self
.recycle_latency
= options
.recycle_latency
194 def connectWireBuffers(self
, req_to_dir
, resp_to_dir
, l3_unblock_to_dir
,
195 req_to_l3
, probe_to_l3
, resp_to_l3
):
196 self
.reqToDir
= req_to_dir
197 self
.respToDir
= resp_to_dir
198 self
.l3UnblockToDir
= l3_unblock_to_dir
199 self
.reqToL3
= req_to_l3
200 self
.probeToL3
= probe_to_l3
201 self
.respToL3
= resp_to_l3
203 def define_options(parser
):
204 parser
.add_option("--num-subcaches", type="int", default
=4)
205 parser
.add_option("--l3-data-latency", type="int", default
=20)
206 parser
.add_option("--l3-tag-latency", type="int", default
=15)
207 parser
.add_option("--cpu-to-dir-latency", type="int", default
=15)
208 parser
.add_option("--no-resource-stalls", action
="store_false",
210 parser
.add_option("--num-tbes", type="int", default
=256)
211 parser
.add_option("--l2-latency", type="int", default
=50) # load to use
213 def create_system(options
, full_system
, system
, dma_devices
, bootmem
,
215 if buildEnv
['PROTOCOL'] != 'MOESI_AMD_Base':
216 panic("This script requires the MOESI_AMD_Base protocol.")
221 # The ruby network creation expects the list of nodes in the system to
222 # be consistent with the NetDest list. Therefore the l1 controller
223 # nodes must be listed before the directory nodes and directory nodes
224 # before dma nodes, etc.
233 # Must create the individual controllers before the network to ensure
234 # the controller constructors are called before the network constructor
237 # This is the base crossbar that connects the L3s, Dirs, and cpu
239 mainCluster
= Cluster(extBW
= 512, intBW
= 512) # 1 TB/s
241 if options
.numa_high_bit
:
242 numa_bit
= options
.numa_high_bit
244 # if the numa_bit is not specified, set the directory bits as the
245 # lowest bits above the block offset bits, and the numa_bit as the
246 # highest of those directory bits
247 dir_bits
= int(math
.log(options
.num_dirs
, 2))
248 block_size_bits
= int(math
.log(options
.cacheline_size
, 2))
249 numa_bit
= block_size_bits
+ dir_bits
- 1
251 for i
in range(options
.num_dirs
):
253 for r
in system
.mem_ranges
:
254 addr_range
= m5
.objects
.AddrRange(r
.start
, size
= r
.size(),
255 intlvHighBit
= numa_bit
,
256 intlvBits
= dir_bits
,
258 dir_ranges
.append(addr_range
)
261 dir_cntrl
= DirCntrl(TCC_select_num_bits
= 0)
262 dir_cntrl
.create(options
, dir_ranges
, ruby_system
, system
)
264 # Connect the Directory controller to the ruby network
265 dir_cntrl
.requestFromCores
= MessageBuffer(ordered
= True)
266 dir_cntrl
.requestFromCores
.slave
= ruby_system
.network
.master
268 dir_cntrl
.responseFromCores
= MessageBuffer()
269 dir_cntrl
.responseFromCores
.slave
= ruby_system
.network
.master
271 dir_cntrl
.unblockFromCores
= MessageBuffer()
272 dir_cntrl
.unblockFromCores
.slave
= ruby_system
.network
.master
274 dir_cntrl
.probeToCore
= MessageBuffer()
275 dir_cntrl
.probeToCore
.master
= ruby_system
.network
.slave
277 dir_cntrl
.responseToCore
= MessageBuffer()
278 dir_cntrl
.responseToCore
.master
= ruby_system
.network
.slave
280 dir_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
281 dir_cntrl
.L3triggerQueue
= MessageBuffer(ordered
= True)
282 dir_cntrl
.responseFromMemory
= MessageBuffer()
284 exec("system.dir_cntrl%d = dir_cntrl" % i
)
285 dir_cntrl_nodes
.append(dir_cntrl
)
287 mainCluster
.add(dir_cntrl
)
289 # Technically this config can support an odd number of cpus, but the top
290 # level config files, such as the ruby_random_tester, will get confused if
291 # the number of cpus does not equal the number of sequencers. Thus make
292 # sure that an even number of cpus is specified.
293 assert((options
.num_cpus
% 2) == 0)
295 # For an odd number of CPUs, still create the right number of controllers
296 cpuCluster
= Cluster(extBW
= 512, intBW
= 512) # 1 TB/s
297 for i
in range((options
.num_cpus
+ 1) // 2):
300 cp_cntrl
.create(options
, ruby_system
, system
)
302 exec("system.cp_cntrl%d = cp_cntrl" % i
)
304 # Add controllers and sequencers to the appropriate lists
306 cpu_sequencers
.extend([cp_cntrl
.sequencer
, cp_cntrl
.sequencer1
])
308 # Connect the CP controllers and the network
309 cp_cntrl
.requestFromCore
= MessageBuffer()
310 cp_cntrl
.requestFromCore
.master
= ruby_system
.network
.slave
312 cp_cntrl
.responseFromCore
= MessageBuffer()
313 cp_cntrl
.responseFromCore
.master
= ruby_system
.network
.slave
315 cp_cntrl
.unblockFromCore
= MessageBuffer()
316 cp_cntrl
.unblockFromCore
.master
= ruby_system
.network
.slave
318 cp_cntrl
.probeToCore
= MessageBuffer()
319 cp_cntrl
.probeToCore
.slave
= ruby_system
.network
.master
321 cp_cntrl
.responseToCore
= MessageBuffer()
322 cp_cntrl
.responseToCore
.slave
= ruby_system
.network
.master
324 cp_cntrl
.mandatoryQueue
= MessageBuffer()
325 cp_cntrl
.triggerQueue
= MessageBuffer(ordered
= True)
327 cpuCluster
.add(cp_cntrl
)
329 # Register CPUs and caches for each CorePair and directory (SE mode only)
331 for i
in xrange((options
.num_cpus
+ 1) // 2):
332 FileSystemConfig
.register_cpu(physical_package_id
= 0,
334 xrange(options
.num_cpus
),
336 thread_siblings
= [])
338 FileSystemConfig
.register_cpu(physical_package_id
= 0,
340 xrange(options
.num_cpus
),
342 thread_siblings
= [])
344 FileSystemConfig
.register_cache(level
= 0,
345 idu_type
= 'Instruction',
346 size
= options
.l1i_size
,
347 line_size
= options
.cacheline_size
,
348 assoc
= options
.l1i_assoc
,
351 FileSystemConfig
.register_cache(level
= 0,
353 size
= options
.l1d_size
,
354 line_size
= options
.cacheline_size
,
355 assoc
= options
.l1d_assoc
,
358 FileSystemConfig
.register_cache(level
= 0,
360 size
= options
.l1d_size
,
361 line_size
= options
.cacheline_size
,
362 assoc
= options
.l1d_assoc
,
365 FileSystemConfig
.register_cache(level
= 1,
366 idu_type
= 'Unified',
367 size
= options
.l2_size
,
368 line_size
= options
.cacheline_size
,
369 assoc
= options
.l2_assoc
,
372 for i
in range(options
.num_dirs
):
373 FileSystemConfig
.register_cache(level
= 2,
374 idu_type
= 'Unified',
375 size
= options
.l3_size
,
376 line_size
= options
.cacheline_size
,
377 assoc
= options
.l3_assoc
,
379 xrange(options
.num_cpus
)])
381 # Assuming no DMA devices
382 assert(len(dma_devices
) == 0)
384 # Add cpu/gpu clusters to main cluster
385 mainCluster
.add(cpuCluster
)
387 ruby_system
.network
.number_of_virtual_networks
= 10
389 return (cpu_sequencers
, dir_cntrl_nodes
, mainCluster
)