1 # Copyright (c) 2015, 2018 ARM Limited
4 # The license below extends only to copyright in the software and shall
5 # not be construed as granting a license to any other intellectual
6 # property including but not limited to intellectual property relating
7 # to a hardware implementation of the functionality of the software
8 # licensed hereunder. You may use the software subject to the license
9 # terms below provided that you ensure that this notice is replicated
10 # unmodified and in its entirety in all distributions of the software,
11 # modified or unmodified, in source code or in binary form.
13 # Copyright (c) 2006-2007 The Regents of The University of Michigan
14 # All rights reserved.
16 # Redistribution and use in source and binary forms, with or without
17 # modification, are permitted provided that the following conditions are
18 # met: redistributions of source code must retain the above copyright
19 # notice, this list of conditions and the following disclaimer;
20 # redistributions in binary form must reproduce the above copyright
21 # notice, this list of conditions and the following disclaimer in the
22 # documentation and/or other materials provided with the distribution;
23 # neither the name of the copyright holders nor the names of its
24 # contributors may be used to endorse or promote products derived from
25 # this software without specific prior written permission.
27 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 # Authors: Ron Dreslinski
42 from __future__
import print_function
49 from m5
.objects
import *
51 # This example script stress tests the memory system by creating false
52 # sharing in a tree topology. At the bottom of the tree is a shared
53 # memory, and then at each level a number of testers are attached,
54 # along with a number of caches that them selves fan out to subtrees
55 # of testers and caches. Thus, it is possible to create a system with
56 # arbitrarily deep cache hierarchies, sharing or no sharing of caches,
57 # and testers not only at the L1s, but also at the L2s, L3s etc.
59 parser
= optparse
.OptionParser()
61 parser
.add_option("-a", "--atomic", action
="store_true",
62 help="Use atomic (non-timing) mode")
63 parser
.add_option("-b", "--blocking", action
="store_true",
64 help="Use blocking caches")
65 parser
.add_option("-l", "--maxloads", metavar
="N", default
=0,
66 help="Stop after N loads")
67 parser
.add_option("-m", "--maxtick", type="int", default
=m5
.MaxTick
,
69 help="Stop after T ticks")
71 # The tree specification consists of two colon-separated lists of one
72 # or more integers, one for the caches, and one for the testers. The
73 # first integer is the number of caches/testers closest to main
74 # memory. Each cache then fans out to a subtree. The last integer in
75 # the list is the number of caches/testers associated with the
76 # uppermost level of memory. The other integers (if any) specify the
77 # number of caches/testers connected at each level of the crossbar
78 # hierarchy. The tester string should have one element more than the
79 # cache string as there should always be testers attached to the
82 parser
.add_option("-c", "--caches", type="string", default
="2:2:1",
83 help="Colon-separated cache hierarchy specification, "
84 "see script comments for details "
85 "[default: %default]")
86 parser
.add_option("--noncoherent-cache", action
="store_true",
87 help="Adds a non-coherent, last-level cache")
88 parser
.add_option("-t", "--testers", type="string", default
="1:1:0:2",
89 help="Colon-separated tester hierarchy specification, "
90 "see script comments for details "
91 "[default: %default]")
92 parser
.add_option("-f", "--functional", type="int", default
=10,
94 help="Target percentage of functional accesses "
95 "[default: %default]")
96 parser
.add_option("-u", "--uncacheable", type="int", default
=10,
98 help="Target percentage of uncacheable accesses "
99 "[default: %default]")
100 parser
.add_option("-r", "--random", action
="store_true",
101 help="Generate a random tree topology")
102 parser
.add_option("--progress", type="int", default
=100000,
104 help="Progress message interval "
105 "[default: %default]")
106 parser
.add_option("--sys-clock", action
="store", type="string",
108 help = """Top-level clock for blocks running at system
111 (options
, args
) = parser
.parse_args()
114 print("Error: script doesn't take any positional arguments")
117 # Get the total number of testers
118 def numtesters(cachespec
, testerspec
):
119 # Determine the tester multiplier for each level as the
120 # elements are per subsystem and it fans out
123 multiplier
.append(multiplier
[-1] * c
)
126 for t
, m
in zip(testerspec
, multiplier
):
133 # Start by parsing the command line options and do some basic sanity
136 # Generate a tree with a valid number of testers
138 tree_depth
= random
.randint(1, 4)
139 cachespec
= [random
.randint(1, 3) for i
in range(tree_depth
)]
140 testerspec
= [random
.randint(1, 3) for i
in range(tree_depth
+ 1)]
141 if numtesters(cachespec
, testerspec
) < block_size
:
144 print("Generated random tree -c", ':'.join(map(str, cachespec
)),
145 "-t", ':'.join(map(str, testerspec
)))
148 cachespec
= [int(x
) for x
in options
.caches
.split(':')]
149 testerspec
= [int(x
) for x
in options
.testers
.split(':')]
151 print("Error: Unable to parse caches or testers option")
154 if len(cachespec
) < 1:
155 print("Error: Must have at least one level of caches")
158 if len(cachespec
) != len(testerspec
) - 1:
159 print("Error: Testers must have one element more than caches")
162 if testerspec
[-1] == 0:
163 print("Error: Must have testers at the uppermost level")
168 print("Error: Cannot have a negative number of testers")
173 print("Error: Must have 1 or more caches at each level")
176 if numtesters(cachespec
, testerspec
) > block_size
:
177 print("Error: Limited to %s testers because of false sharing"
181 # Define a prototype L1 cache that we scale for all successive levels
182 proto_l1
= Cache(size
= '32kB', assoc
= 4,
183 tag_latency
= 1, data_latency
= 1, response_latency
= 1,
184 tgts_per_mshr
= 8, clusivity
= 'mostly_incl',
185 writeback_clean
= True)
192 cache_proto
= [proto_l1
]
194 # Now add additional cache levels (if any) by scaling L1 params, the
195 # first element is Ln, and the last element L1
196 for scale
in cachespec
[:-1]:
197 # Clone previous level and update params
198 prev
= cache_proto
[0]
200 next
.size
= prev
.size
* scale
201 next
.tag_latency
= prev
.tag_latency
* 10
202 next
.data_latency
= prev
.data_latency
* 10
203 next
.response_latency
= prev
.response_latency
* 10
204 next
.assoc
= prev
.assoc
* scale
205 next
.mshrs
= prev
.mshrs
* scale
207 # Swap the inclusivity/exclusivity at each level. L2 is mostly
208 # exclusive with respect to L1, L3 mostly inclusive, L4 mostly
210 next
.writeback_clean
= not prev
.writeback_clean
211 if (prev
.clusivity
.value
== 'mostly_incl'):
212 next
.clusivity
= 'mostly_excl'
214 next
.clusivity
= 'mostly_incl'
216 cache_proto
.insert(0, next
)
218 # Make a prototype for the tester to be used throughout
219 proto_tester
= MemTest(max_loads
= options
.maxloads
,
220 percent_functional
= options
.functional
,
221 percent_uncacheable
= options
.uncacheable
,
222 progress_interval
= options
.progress
)
224 # Set up the system along with a simple memory and reference memory
225 system
= System(physmem
= SimpleMemory(),
226 cache_line_size
= block_size
)
228 system
.voltage_domain
= VoltageDomain(voltage
= '1V')
230 system
.clk_domain
= SrcClockDomain(clock
= options
.sys_clock
,
231 voltage_domain
= system
.voltage_domain
)
233 # For each level, track the next subsys index to use
234 next_subsys_index
= [0] * (len(cachespec
) + 1)
236 # Recursive function to create a sub-tree of the cache and tester
238 def make_cache_level(ncaches
, prototypes
, level
, next_cache
):
239 global next_subsys_index
, proto_l1
, testerspec
, proto_tester
241 index
= next_subsys_index
[level
]
242 next_subsys_index
[level
] += 1
244 # Create a subsystem to contain the crossbar and caches, and
247 setattr(system
, 'l%dsubsys%d' % (level
, index
), subsys
)
249 # The levels are indexing backwards through the list
250 ntesters
= testerspec
[len(cachespec
) - level
]
252 # Scale the progress threshold as testers higher up in the tree
253 # (smaller level) get a smaller portion of the overall bandwidth,
254 # and also make the interval of packet injection longer for the
255 # testers closer to the memory (larger level) to prevent them
256 # hogging all the bandwidth
257 limit
= (len(cachespec
) - level
+ 1) * 100000000
258 testers
= [proto_tester(interval
= 10 * (level
* level
+ 1),
259 progress_check
= limit
) \
260 for i
in xrange(ntesters
)]
262 subsys
.tester
= testers
265 # Create a crossbar and add it to the subsystem, note that
266 # we do this even with a single element on this level
270 xbar
.master
= next_cache
.cpu_side
272 # Create and connect the caches, both the ones fanning out
273 # to create the tree, and the ones used to connect testers
275 tree_caches
= [prototypes
[0]() for i
in xrange(ncaches
[0])]
276 tester_caches
= [proto_l1() for i
in xrange(ntesters
)]
278 subsys
.cache
= tester_caches
+ tree_caches
279 for cache
in tree_caches
:
280 cache
.mem_side
= xbar
.slave
281 make_cache_level(ncaches
[1:], prototypes
[1:], level
- 1, cache
)
282 for tester
, cache
in zip(testers
, tester_caches
):
283 tester
.port
= cache
.cpu_side
284 cache
.mem_side
= xbar
.slave
287 print("Error: No next-level cache at top level")
291 # Create a crossbar and add it to the subsystem
294 xbar
.master
= next_cache
.cpu_side
295 for tester
in testers
:
296 tester
.port
= xbar
.slave
299 testers
[0].port
= next_cache
.cpu_side
301 # Top level call to create the cache hierarchy, bottom up
302 make_cache_level(cachespec
, cache_proto
, len(cachespec
), None)
304 # Connect the lowest level crossbar to the last-level cache and memory
306 last_subsys
= getattr(system
, 'l%dsubsys0' % len(cachespec
))
307 last_subsys
.xbar
.point_of_coherency
= True
308 if options
.noncoherent_cache
:
309 system
.llc
= NoncoherentCache(size
= '16MB', assoc
= 16, tag_latency
= 10,
310 data_latency
= 10, sequential_access
= True,
311 response_latency
= 20, tgts_per_mshr
= 8,
313 last_subsys
.xbar
.master
= system
.llc
.cpu_side
314 system
.llc
.mem_side
= system
.physmem
.port
316 last_subsys
.xbar
.master
= system
.physmem
.port
318 root
= Root(full_system
= False, system
= system
)
320 root
.system
.mem_mode
= 'atomic'
322 root
.system
.mem_mode
= 'timing'
324 # The system port is never used in the tester so merely connect it
326 root
.system
.system_port
= last_subsys
.xbar
.slave
328 # Instantiate configuration
331 # Simulate until program terminates
332 exit_event
= m5
.simulate(options
.maxtick
)
334 print('Exiting @ tick', m5
.curTick(), 'because', exit_event
.getCause())