a1aa77df41de4e709aa8871a3103c02679d40f44
1 # Copyright (c) 2015-2016 ARM Limited
4 # The license below extends only to copyright in the software and shall
5 # not be construed as granting a license to any other intellectual
6 # property including but not limited to intellectual property relating
7 # to a hardware implementation of the functionality of the software
8 # licensed hereunder. You may use the software subject to the license
9 # terms below provided that you ensure that this notice is replicated
10 # unmodified and in its entirety in all distributions of the software,
11 # modified or unmodified, in source code or in binary form.
13 # Redistribution and use in source and binary forms, with or without
14 # modification, are permitted provided that the following conditions are
15 # met: redistributions of source code must retain the above copyright
16 # notice, this list of conditions and the following disclaimer;
17 # redistributions in binary form must reproduce the above copyright
18 # notice, this list of conditions and the following disclaimer in the
19 # documentation and/or other materials provided with the distribution;
20 # neither the name of the copyright holders nor the names of its
21 # contributors may be used to endorse or promote products derived from
22 # this software without specific prior written permission.
24 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
31 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
32 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
33 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
34 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 # Authors: Andreas Hansson
38 from __future__
import print_function
45 from m5
.objects
import *
46 from m5
.util
import addToPath
47 from m5
.stats
import periodicStatDump
50 from common
import MemConfig
52 addToPath('../../util')
55 # this script is helpful to observe the memory latency for various
56 # levels in a cache hierarchy, and various cache and memory
57 # configurations, in essence replicating the lmbench lat_mem_rd thrash
60 # import the packet proto definitions, and if they are not found,
61 # attempt to generate them automatically
65 print("Did not find packet proto definitions, attempting to generate")
66 from subprocess
import call
67 error
= call(['protoc', '--python_out=configs/dram',
68 '--proto_path=src/proto', 'src/proto/packet.proto'])
70 print("Generated packet proto definitions")
73 import google
.protobuf
75 print("Please install the Python protobuf module")
80 print("Failed to import packet proto definitions")
83 parser
= optparse
.OptionParser()
85 parser
.add_option("--mem-type", type="choice", default
="DDR3_1600_8x8",
86 choices
=MemConfig
.mem_names(),
87 help = "type of memory to use")
88 parser
.add_option("--mem-size", action
="store", type="string",
90 help="Specify the memory size")
91 parser
.add_option("--reuse-trace", action
="store_true",
92 help="Prevent generation of traces and reuse existing")
94 (options
, args
) = parser
.parse_args()
97 print("Error: script doesn't take any positional arguments")
100 # start by creating the system itself, using a multi-layer 2.0 GHz
101 # crossbar, delivering 64 bytes / 3 cycles (one header cycle) which
102 # amounts to 42.7 GByte/s per layer and thus per port
103 system
= System(membus
= SystemXBar(width
= 32))
104 system
.clk_domain
= SrcClockDomain(clock
= '2.0GHz',
106 VoltageDomain(voltage
= '1V'))
108 mem_range
= AddrRange(options
.mem_size
)
109 system
.mem_ranges
= [mem_range
]
111 # do not worry about reserving space for the backing store
112 system
.mmap_using_noreserve
= True
114 # currently not exposed as command-line options, set here for now
115 options
.mem_channels
= 1
116 options
.mem_ranks
= 1
117 options
.external_memory_system
= 0
118 options
.tlm_memory
= 0
119 options
.elastic_trace_en
= 0
121 MemConfig
.config_mem(options
, system
)
123 # there is no point slowing things down by saving any data
124 for ctrl
in system
.mem_ctrls
:
127 # the following assumes that we are using the native DRAM
128 # controller, check to be sure
129 if isinstance(ctrl
, m5
.objects
.DRAMCtrl
):
130 # make the DRAM refresh interval sufficiently infinite to avoid
134 # use the same concept as the utilisation sweep, and print the config
135 # so that we can later read it in
136 cfg_file_name
= os
.path
.join(m5
.options
.outdir
, "lat_mem_rd.cfg")
137 cfg_file
= open(cfg_file_name
, 'w')
139 # set an appropriate burst length in bytes
141 system
.cache_line_size
= burst_size
143 # lazy version to check if an integer is a power of two
145 return num
!= 0 and ((num
& (num
- 1)) == 0)
147 # assume we start every range at 0
148 max_range
= int(mem_range
.end
)
150 # start at a size of 4 kByte, and go up till we hit the max, increase
151 # the step every time we hit a power of two
156 while ranges
[-1] < max_range
:
157 new_range
= ranges
[-1] + step
158 if is_pow2(new_range
):
160 ranges
.append(new_range
)
162 # how many times to repeat the measurement for each data point
165 # 150 ns in ticks, this is choosen to be high enough that transactions
166 # do not pile up in the system, adjust if needed
169 # for every data point, we create a trace containing a random address
170 # sequence, so that we can play back the same sequence for warming and
171 # the actual measurement
172 def create_trace(filename
, max_addr
, burst_size
, itt
):
174 proto_out
= gzip
.open(filename
, 'wb')
176 print("Failed to open ", filename
, " for writing")
179 # write the magic number in 4-byte Little Endian, similar to what
180 # is done in src/proto/protoio.cc
181 proto_out
.write("gem5")
183 # add the packet header
184 header
= packet_pb2
.PacketHeader()
185 header
.obj_id
= "lat_mem_rd for range 0:" + str(max_addr
)
186 # assume the default tick rate (1 ps)
187 header
.tick_freq
= 1000000000000
188 protolib
.encodeMessage(proto_out
, header
)
190 # create a list of every single address to touch
191 addrs
= list(range(0, max_addr
, burst_size
))
194 random
.shuffle(addrs
)
198 # create a packet we can re-use for all the addresses
199 packet
= packet_pb2
.Packet()
200 # ReadReq is 1 in src/mem/packet.hh Command enum
202 packet
.size
= int(burst_size
)
205 packet
.tick
= long(tick
)
206 packet
.addr
= long(addr
)
207 protolib
.encodeMessage(proto_out
, packet
)
212 # this will take a while, so keep the user informed
213 print("Generating traces, please wait...")
217 period
= long(itt
* (max_range
/ burst_size
))
219 # now we create the states for each range
221 filename
= os
.path
.join(m5
.options
.outdir
,
222 'lat_mem_rd%d.trc.gz' % nxt_range
)
224 if not options
.reuse_trace
:
225 # create the actual random trace for this range
226 create_trace(filename
, r
, burst_size
, itt
)
229 cfg_file
.write("STATE %d %d TRACE %s 0\n" %
230 (nxt_state
, period
, filename
))
231 nxt_state
= nxt_state
+ 1
233 # the measuring states
234 for i
in range(iterations
):
235 cfg_file
.write("STATE %d %d TRACE %s 0\n" %
236 (nxt_state
, period
, filename
))
237 nxt_state
= nxt_state
+ 1
239 nxt_range
= nxt_range
+ 1
241 cfg_file
.write("INIT 0\n")
243 # go through the states one by one
244 for state
in range(1, nxt_state
):
245 cfg_file
.write("TRANSITION %d %d 1\n" % (state
- 1, state
))
247 cfg_file
.write("TRANSITION %d %d 1\n" % (nxt_state
- 1, nxt_state
- 1))
251 # create a traffic generator, and point it to the file we just created
252 system
.tgen
= TrafficGen(config_file
= cfg_file_name
,
253 progress_check
= '10s')
255 # add a communication monitor
256 system
.monitor
= CommMonitor()
257 system
.monitor
.footprint
= MemFootprintProbe()
259 # connect the traffic generator to the system
260 system
.tgen
.port
= system
.monitor
.slave
262 # create the actual cache hierarchy, for now just go with something
263 # basic to explore some of the options
264 from common
.Caches
import *
266 # a starting point for an L3 cache
267 class L3Cache(Cache
):
271 sequential_access
= True
272 response_latency
= 40
277 # note that everything is in the same clock domain, 2.0 GHz as
279 system
.l1cache
= L1_DCache(size
= '64kB')
280 system
.monitor
.master
= system
.l1cache
.cpu_side
282 system
.l2cache
= L2Cache(size
= '512kB', writeback_clean
= True)
283 system
.l2cache
.xbar
= L2XBar()
284 system
.l1cache
.mem_side
= system
.l2cache
.xbar
.slave
285 system
.l2cache
.cpu_side
= system
.l2cache
.xbar
.master
287 # make the L3 mostly exclusive, and correspondingly ensure that the L2
288 # writes back also clean lines to the L3
289 system
.l3cache
= L3Cache(size
= '4MB', clusivity
= 'mostly_excl')
290 system
.l3cache
.xbar
= L2XBar()
291 system
.l2cache
.mem_side
= system
.l3cache
.xbar
.slave
292 system
.l3cache
.cpu_side
= system
.l3cache
.xbar
.master
293 system
.l3cache
.mem_side
= system
.membus
.slave
295 # connect the system port even if it is not used in this example
296 system
.system_port
= system
.membus
.slave
298 # every period, dump and reset all stats
299 periodicStatDump(period
)
302 root
= Root(full_system
= False, system
= system
)
303 root
.system
.mem_mode
= 'timing'
306 m5
.simulate(nxt_state
* period
)
308 # print all we need to make sense of the stats output
309 print("lat_mem_rd with %d iterations, ranges:" % iterations
)