#
# Authors: Ron Dreslinski
+import optparse
+import sys
+
import m5
from m5.objects import *
-import os, optparse, sys
-m5.AddToPath('../common')
parser = optparse.OptionParser()
-parser.add_option("-c", "--cache-levels", type="int", default=2,
- metavar="LEVELS",
- help="Number of cache levels [default: %default]")
parser.add_option("-a", "--atomic", action="store_true",
help="Use atomic (non-timing) mode")
parser.add_option("-b", "--blocking", action="store_true",
help="Use blocking caches")
-parser.add_option("-l", "--maxloads", default="1G", metavar="N",
- help="Stop after N loads [default: %default]")
+parser.add_option("-l", "--maxloads", metavar="N", default=0,
+ help="Stop after N loads")
parser.add_option("-m", "--maxtick", type="int", default=m5.MaxTick,
metavar="T",
help="Stop after T ticks")
-parser.add_option("-n", "--numtesters", type="int", default=8,
- metavar="N",
- help="Number of tester pseudo-CPUs [default: %default]")
-parser.add_option("-p", "--protocol", default="moesi",
- help="Coherence protocol [default: %default]")
+
+#
+# The "tree" specification is a colon-separated list of one or more
+# integers. The first integer is the number of caches/testers
+# connected directly to main memory. The last integer in the list is
+# the number of testers associated with the uppermost level of memory
+# (L1 cache, if there are caches, or main memory if no caches). Thus
+# if there is only one integer, there are no caches, and the integer
+# specifies the number of testers connected directly to main memory.
+# The other integers (if any) specify the number of caches at each
+# level of the hierarchy between.
+#
+# Examples:
+#
+# "2:1" Two caches connected to memory with a single tester behind each
+# (single-level hierarchy, two testers total)
+#
+# "2:2:1" Two-level hierarchy, 2 L1s behind each of 2 L2s, 4 testers total
+#
+parser.add_option("-t", "--treespec", type="string", default="8:1",
+ help="Colon-separated multilevel tree specification, "
+ "see script comments for details "
+ "[default: %default]")
+
+parser.add_option("--force-bus", action="store_true",
+ help="Use bus between levels even with single cache")
parser.add_option("-f", "--functional", type="int", default=0,
metavar="PCT",
metavar="NLOADS",
help="Progress message interval "
"[default: %default]")
+parser.add_option("--sys-clock", action="store", type="string",
+ default='1GHz',
+ help = """Top-level clock for blocks running at system
+ speed""")
(options, args) = parser.parse_args()
print "Error: script doesn't take any positional arguments"
sys.exit(1)
-# Should generalize this someday... would be cool to have a loop that
-# just iterates, adding a level of caching each time.
-#if options.cache_levels != 2 and options.cache_levels != 0:
-# print "Error: number of cache levels must be 0 or 2"
-# sys.exit(1)
-
-if options.blocking:
- num_l1_mshrs = 1
- num_l2_mshrs = 1
-else:
- num_l1_mshrs = 12
- num_l2_mshrs = 92
-
block_size = 64
-# --------------------
-# Base L1 Cache
-# ====================
-
-class L1(BaseCache):
- latency = '1ns'
- block_size = block_size
- mshrs = num_l1_mshrs
- tgts_per_mshr = 8
- protocol = CoherenceProtocol(protocol=options.protocol)
-
-# ----------------------
-# Base L2 Cache
-# ----------------------
-
-class L2(BaseCache):
- block_size = block_size
- latency = '10ns'
- mshrs = num_l2_mshrs
- tgts_per_mshr = 16
- write_buffers = 8
- protocol = CoherenceProtocol(protocol=options.protocol)
-
-if options.numtesters > block_size:
+try:
+ treespec = [int(x) for x in options.treespec.split(':')]
+ numtesters = reduce(lambda x,y: x*y, treespec)
+except:
+ print "Error parsing treespec option"
+ sys.exit(1)
+
+if numtesters > block_size:
print "Error: Number of testers limited to %s because of false sharing" \
% (block_size)
sys.exit(1)
-cpus = [ MemTest(atomic=options.atomic, max_loads=options.maxloads,
- percent_functional=options.functional,
- percent_uncacheable=options.uncacheable,
- progress_interval=options.progress)
- for i in xrange(options.numtesters) ]
+if len(treespec) < 1:
+ print "Error parsing treespec"
+ sys.exit(1)
-# system simulated
-system = System(cpu = cpus, funcmem = PhysicalMemory(),
- physmem = PhysicalMemory(latency = "100ns"),
- membus = Bus(clock="500MHz", width=16))
-
-# l2cache & bus
-if options.cache_levels == 2:
- system.toL2Bus = Bus(clock="500MHz", width=16)
- system.l2c = L2(size='64kB', assoc=8)
- system.l2c.cpu_side = system.toL2Bus.port
-
- # connect l2c to membus
- system.l2c.mem_side = system.membus.port
-
-# add L1 caches
-for cpu in cpus:
- if options.cache_levels == 2:
- cpu.l1c = L1(size = '32kB', assoc = 4)
- cpu.test = cpu.l1c.cpu_side
- cpu.l1c.mem_side = system.toL2Bus.port
- elif options.cache_levels == 1:
- cpu.l1c = L1(size = '32kB', assoc = 4)
- cpu.test = cpu.l1c.cpu_side
- cpu.l1c.mem_side = system.membus.port
- else:
- cpu.test = system.membus.port
- system.funcmem.port = cpu.functional
-
-# connect memory to membus
-system.physmem.port = system.membus.port
+# define prototype L1 cache
+proto_l1 = BaseCache(size = '32kB', assoc = 4,
+ hit_latency = 1, response_latency = 1,
+ tgts_per_mshr = 8)
+if options.blocking:
+ proto_l1.mshrs = 1
+else:
+ proto_l1.mshrs = 4
+
+# build a list of prototypes, one for each level of treespec, starting
+# at the end (last entry is tester objects)
+prototypes = [ MemTest(atomic=options.atomic, max_loads=options.maxloads,
+ percent_functional=options.functional,
+ percent_uncacheable=options.uncacheable,
+ progress_interval=options.progress) ]
+
+# next comes L1 cache, if any
+if len(treespec) > 1:
+ prototypes.insert(0, proto_l1)
+
+# now add additional cache levels (if any) by scaling L1 params
+for scale in treespec[:-2]:
+ # clone previous level and update params
+ prev = prototypes[0]
+ next = prev()
+ next.size = prev.size * scale
+ next.hit_latency = prev.hit_latency * 10
+ next.response_latency = prev.response_latency * 10
+ next.assoc = prev.assoc * scale
+ next.mshrs = prev.mshrs * scale
+ prototypes.insert(0, next)
+
+# system simulated
+system = System(funcmem = SimpleMemory(in_addr_map = False),
+ funcbus = NoncoherentXBar(),
+ physmem = SimpleMemory(latency = "100ns"),
+ cache_line_size = block_size)
+
+
+system.voltage_domain = VoltageDomain(voltage = '1V')
+
+system.clk_domain = SrcClockDomain(clock = options.sys_clock,
+ voltage_domain = system.voltage_domain)
+
+def make_level(spec, prototypes, attach_obj, attach_port):
+ fanout = spec[0]
+ parent = attach_obj # use attach obj as config parent too
+ if len(spec) > 1 and (fanout > 1 or options.force_bus):
+ port = getattr(attach_obj, attach_port)
+ new_bus = CoherentXBar(width=16)
+ if (port.role == 'MASTER'):
+ new_bus.slave = port
+ attach_port = "master"
+ else:
+ new_bus.master = port
+ attach_port = "slave"
+ parent.cpu_side_bus = new_bus
+ attach_obj = new_bus
+ objs = [prototypes[0]() for i in xrange(fanout)]
+ if len(spec) > 1:
+ # we just built caches, more levels to go
+ parent.cache = objs
+ for cache in objs:
+ cache.mem_side = getattr(attach_obj, attach_port)
+ make_level(spec[1:], prototypes[1:], cache, "cpu_side")
+ else:
+ # we just built the MemTest objects
+ parent.cpu = objs
+ for t in objs:
+ t.test = getattr(attach_obj, attach_port)
+ t.functional = system.funcbus.slave
+
+make_level(treespec, prototypes, system.physmem, "port")
+
+# connect reference memory to funcbus
+system.funcbus.master = system.funcmem.port
# -----------------------
# run simulation
# -----------------------
-root = Root( system = system )
+root = Root( full_system = False, system = system )
if options.atomic:
root.system.mem_mode = 'atomic'
else:
root.system.mem_mode = 'timing'
+# The system port is never used in the tester so merely connect it
+# to avoid problems
+root.system.system_port = root.system.funcbus.slave
+
# Not much point in this being higher than the L1 latency
m5.ticks.setGlobalFrequency('1ns')
# instantiate configuration
-m5.instantiate(root)
+m5.instantiate()
# simulate until program terminates
exit_event = m5.simulate(options.maxtick)