From c3268f882029c7501867540ccf04db054fdff084 Mon Sep 17 00:00:00 2001 From: Curtis Dunham Date: Wed, 8 Apr 2015 15:56:06 -0500 Subject: [PATCH] config: Support full-system with SST's memory system This patch adds an example configuration in ext/sst/tests/ that allows an SST/gem5 instance to simulate a 4-core AArch64 system with SST's memHierarchy components providing all the caches and memories. --- configs/common/CacheConfig.py | 49 ++++++++- configs/common/FSConfig.py | 34 +++++- configs/common/MemConfig.py | 8 ++ configs/common/Options.py | 2 + configs/example/fs.py | 5 +- ext/sst/tests/test6_arm_4c.py | 199 ++++++++++++++++++++++++++++++++++ src/dev/arm/RealView.py | 33 +++--- 7 files changed, 307 insertions(+), 23 deletions(-) create mode 100644 ext/sst/tests/test6_arm_4c.py diff --git a/configs/common/CacheConfig.py b/configs/common/CacheConfig.py index 66fe491e1..899090af5 100644 --- a/configs/common/CacheConfig.py +++ b/configs/common/CacheConfig.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012-2013 ARM Limited +# Copyright (c) 2012-2013, 2015 ARM Limited # All rights reserved # # The license below extends only to copyright in the software and shall @@ -46,6 +46,13 @@ from m5.objects import * from Caches import * def config_cache(options, system): + if options.external_memory_system and (options.caches or options.l2cache): + print "External caches and internal caches are exclusive options.\n" + sys.exit(1) + + if options.external_memory_system: + ExternalCache = ExternalCacheFactory(options.external_memory_system) + if options.cpu_type == "arm_detailed": try: from O3_ARM_v7a import * @@ -114,10 +121,50 @@ def config_cache(options, system): system.cpu[i].dcache = dcache_real system.cpu[i].dcache_mon = dcache_mon + elif options.external_memory_system: + # These port names are presented to whatever 'external' system + # gem5 is connecting to. Its configuration will likely depend + # on these names. For simplicity, we would advise configuring + # it to use this naming scheme; if this isn't possible, change + # the names below. + if buildEnv['TARGET_ISA'] in ['x86', 'arm']: + system.cpu[i].addPrivateSplitL1Caches( + ExternalCache("cpu%d.icache" % i), + ExternalCache("cpu%d.dcache" % i), + ExternalCache("cpu%d.itb_walker_cache" % i), + ExternalCache("cpu%d.dtb_walker_cache" % i)) + else: + system.cpu[i].addPrivateSplitL1Caches( + ExternalCache("cpu%d.icache" % i), + ExternalCache("cpu%d.dcache" % i)) + system.cpu[i].createInterruptController() if options.l2cache: system.cpu[i].connectAllPorts(system.tol2bus, system.membus) + elif options.external_memory_system: + system.cpu[i].connectUncachedPorts(system.membus) else: system.cpu[i].connectAllPorts(system.membus) return system + +# ExternalSlave provides a "port", but when that port connects to a cache, +# the connecting CPU SimObject wants to refer to its "cpu_side". +# The 'ExternalCache' class provides this adaptation by rewriting the name, +# eliminating distracting changes elsewhere in the config code. +class ExternalCache(ExternalSlave): + def __getattr__(cls, attr): + if (attr == "cpu_side"): + attr = "port" + return super(ExternalSlave, cls).__getattr__(attr) + + def __setattr__(cls, attr, value): + if (attr == "cpu_side"): + attr = "port" + return super(ExternalSlave, cls).__setattr__(attr, value) + +def ExternalCacheFactory(port_type): + def make(name): + return ExternalCache(port_data=name, port_type=port_type, + addr_ranges=[AllMemory]) + return make diff --git a/configs/common/FSConfig.py b/configs/common/FSConfig.py index cfc156649..17f1f7641 100644 --- a/configs/common/FSConfig.py +++ b/configs/common/FSConfig.py @@ -1,4 +1,4 @@ -# Copyright (c) 2010-2012 ARM Limited +# Copyright (c) 2010-2012, 2015 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -203,7 +203,8 @@ def makeSparcSystem(mem_mode, mdesc=None): return self def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None, - dtb_filename=None, bare_metal=False, cmdline=None): + dtb_filename=None, bare_metal=False, cmdline=None, + external_memory=""): assert machine_type if bare_metal: @@ -293,7 +294,15 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None, 'lpj=19988480 norandmaps rw loglevel=8 ' + \ 'mem=%(mem)s root=%(rootdev)s' - self.realview.setupBootLoader(self.membus, self, binary) + # When using external memory, gem5 writes the boot loader to nvmem + # and then SST will read from it, but SST can only get to nvmem from + # iobus, as gem5's membus is only used for initialization and + # SST doesn't use it. Attaching nvmem to iobus solves this issue. + # During initialization, system_port -> membus -> iobus -> nvmem. + if external_memory: + self.realview.setupBootLoader(self.iobus, self, binary) + else: + self.realview.setupBootLoader(self.membus, self, binary) self.gic_cpu_addr = self.realview.gic.cpu_addr self.flags_addr = self.realview.realview_io.pio_addr + 0x30 @@ -322,7 +331,24 @@ def makeArmSystem(mem_mode, machine_type, num_cpus=1, mdesc=None, self.boot_osflags = fillInCmdline(mdesc, cmdline) - self.realview.attachOnChipIO(self.membus, self.bridge) + if external_memory: + # I/O traffic enters iobus + self.external_io = ExternalMaster(port_data="external_io", + port_type=external_memory) + self.external_io.port = self.iobus.slave + + # Ensure iocache only receives traffic destined for (actual) memory. + self.iocache = ExternalSlave(port_data="iocache", + port_type=external_memory, + addr_ranges=self.mem_ranges) + self.iocache.port = self.iobus.master + + # Let system_port get to nvmem and nothing else. + self.bridge.ranges = [self.realview.nvmem.range] + + self.realview.attachOnChipIO(self.iobus) + else: + self.realview.attachOnChipIO(self.membus, self.bridge) self.realview.attachIO(self.iobus) self.intrctrl = IntrControl() self.terminal = Terminal() diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py index b0ac44406..5266667ec 100644 --- a/configs/common/MemConfig.py +++ b/configs/common/MemConfig.py @@ -189,6 +189,14 @@ def config_mem(options, system): them. """ + if options.external_memory_system: + system.external_memory = m5.objects.ExternalSlave( + port_type=options.external_memory_system, + port_data="init_mem0", port=system.membus.master, + addr_ranges=system.mem_ranges) + system.kernel_addr_check = False + return + nbr_mem_ctrls = options.mem_channels import math from m5.util import fatal diff --git a/configs/common/Options.py b/configs/common/Options.py index f110f7dfb..a383b40ca 100644 --- a/configs/common/Options.py +++ b/configs/common/Options.py @@ -104,6 +104,8 @@ def addCommonOptions(parser): parser.add_option("--memchecker", action="store_true") # Cache Options + parser.add_option("--external-memory-system", type="string", + help="use external ports of this port_type for caches") parser.add_option("--caches", action="store_true") parser.add_option("--l2cache", action="store_true") parser.add_option("--fastmem", action="store_true") diff --git a/configs/example/fs.py b/configs/example/fs.py index 98c7db480..70a3b950e 100644 --- a/configs/example/fs.py +++ b/configs/example/fs.py @@ -98,7 +98,8 @@ def build_test_system(np): test_sys = makeArmSystem(test_mem_mode, options.machine_type, options.num_cpus, bm[0], options.dtb_filename, bare_metal=options.bare_metal, - cmdline=cmdline) + cmdline=cmdline, + external_memory=options.external_memory_system) if options.enable_context_switch_stats_dump: test_sys.enable_context_switch_stats_dump = True else: @@ -185,7 +186,7 @@ def build_test_system(np): test_sys.iocache = IOCache(addr_ranges = test_sys.mem_ranges) test_sys.iocache.cpu_side = test_sys.iobus.master test_sys.iocache.mem_side = test_sys.membus.slave - else: + elif not options.external_memory_system: test_sys.iobridge = Bridge(delay='50ns', ranges = test_sys.mem_ranges) test_sys.iobridge.slave = test_sys.iobus.master test_sys.iobridge.master = test_sys.membus.slave diff --git a/ext/sst/tests/test6_arm_4c.py b/ext/sst/tests/test6_arm_4c.py new file mode 100644 index 000000000..e8683145e --- /dev/null +++ b/ext/sst/tests/test6_arm_4c.py @@ -0,0 +1,199 @@ +# Copyright (c)2015 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Curtis Dunham + +import sst +import sys +import os + +lat="1 ns" +buslat="2 ns" +clockRate = "1GHz" + + +def getenv(name): + res = "" + try: + res = os.environ[name] + except KeyError: + pass + return res + +baseCacheParams = ({ + "debug" :getenv("DEBUG"), + "debug_level" : 6, + "coherence_protocol" : "MSI", + "replacement_policy" : "LRU", + "cache_line_size" : 64, + "cache_frequency" : clockRate, + "statistics" : 1 + }) + +l1CacheParams = ({ + "debug" : getenv("DEBUG"), + "debug_level" : 6, + "L1" : 1, + "cache_size" : "64 KB", + "associativity" : 4, + "access_latency_cycles" : 2, + "low_network_links" : 1 + }) + +l2CacheParams = ({ + "debug" : getenv("DEBUG"), + "debug_level" : 6, + "L1" : 0, + "cache_size" : "256 KB", + "associativity" : 8, + "access_latency_cycles" : 8, + "high_network_links" : 1, + "mshr_num_entries" : 4096, + "low_network_links" : 1 + }) + + +GEM5 = sst.Component("system", "gem5.gem5") +GEM5.addParams({ + "comp_debug" : getenv("GEM5_DEBUG"), + "gem5DebugFlags" : getenv("M5_DEBUG"), + "frequency" : clockRate, + "cmd" : "configs/example/fs.py --num-cpus 4 --disk-image=vexpress64-openembedded_minimal-armv8_20130623-376.img --root-device=/dev/sda2 --kernel=vmlinux.aarch64.20140821 --dtb-filename=vexpress.aarch64.20140821.dtb --mem-size=256MB --machine-type=VExpress_EMM64 --cpu-type=timing --external-memory-system=sst --initialize-only" + }) + +bus = sst.Component("membus", "memHierarchy.Bus") +bus.addParams({ + "bus_frequency": "2GHz", + "debug" : getenv("DEBUG"), + "debug_level" : 8 + }) + +def buildL1(name, m5, connector): + cache = sst.Component(name, "memHierarchy.Cache") + cache.addParams(baseCacheParams) + cache.addParams(l1CacheParams) + link = sst.Link("cpu_%s_link"%name) + link.connect((m5, connector, lat), (cache, "high_network_0", lat)) + return cache + +SysBusConn = buildL1("gem5SystemBus", GEM5, "system.external_memory.port") +bus_port = 0 +link = sst.Link("sysbus_bus_link") +link.connect((SysBusConn, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat)) + +bus_port = bus_port + 1 +ioCache = buildL1("ioCache", GEM5, "system.iocache.port") +ioCache.addParams({ + "debug" : 0, + "debug_level" : 6, + "cache_size" : "16 KB", + "associativity" : 4 + }) +link = sst.Link("ioCache_bus_link") +link.connect((ioCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat)) + +def buildCPU(m5, num): + l1iCache = buildL1("cpu%u.l1iCache" % num, m5, "system.cpu%u.icache.port" % num) + l1dCache = buildL1("cpu%u.l1dCache" % num, m5, "system.cpu%u.dcache.port" % num) + itlbCache = buildL1("cpu%u.itlbCache" % num, m5, "system.cpu%u.itb_walker_cache.port" % num) + dtlbCache = buildL1("cpu%u.dtlbCache" % num, m5, "system.cpu%u.dtb_walker_cache.port" % num) + l1dCache.addParams({ + "debug" : 0, + "debug_level" : 10, + "snoop_l1_invalidations" : 1 + }) + + global bus_port + link = sst.Link("cpu%u.l1iCache_bus_link" % num) ; bus_port = bus_port + 1 + link.connect((l1iCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat)) + link = sst.Link("cpu%u.l1dCache_bus_link" % num) ; bus_port = bus_port + 1 + link.connect((l1dCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat)) + link = sst.Link("cpu%u.itlbCache_bus_link" % num) ; bus_port = bus_port + 1 + link.connect((itlbCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat)) + link = sst.Link("cpu%u.dtlbCache_bus_link" % num) ; bus_port = bus_port + 1 + link.connect((dtlbCache, "low_network_0", buslat), (bus, "high_network_%u" % bus_port, buslat)) + +buildCPU(GEM5, 0) +buildCPU(GEM5, 1) +buildCPU(GEM5, 2) +buildCPU(GEM5, 3) + +l2cache = sst.Component("l2cache", "memHierarchy.Cache") +l2cache.addParams(baseCacheParams) +l2cache.addParams(l2CacheParams) +l2cache.addParams({ + "network_address" : "2", + "directory_at_next_level" : "1" +}) + +link = sst.Link("l2cache_bus_link") +link.connect((l2cache, "high_network_0", buslat), (bus, "low_network_0", buslat)) + +memory = sst.Component("memory", "memHierarchy.MemController") +memory.addParams({ + "request_width" : 64, + "coherence_protocol" : "MSI", + "access_time" : "25 ns", + "backend.mem_size" : 256, + "clock" : "2GHz", + "debug" : getenv("DEBUG"), + "range_start" : 0, # 2 * (1024 ** 3), # it's behind a directory controller. + }) + +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams({ + "xbar_bw" : "16GB/s", + "link_bw" : "16GB/s", + "input_buf_size" : "1KB", + "num_ports" : "3", + "flit_size" : "72B", + "output_buf_size" : "1KB", + "id" : "0", + "topology" : "merlin.singlerouter" +}) +comp_dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +comp_dirctrl.addParams({ + "coherence_protocol" : "MSI", + "network_address" : "1", + "entry_cache_size" : "16384", + "network_bw" : "1GB/s", + "addr_range_start" : 2 * (1024 ** 3), + "addr_range_end" : 2 * (1024 ** 3) + 256 * (1024 ** 2) +}) + +sst.Link("link_cache_net_0").connect((l2cache, "directory", "10ns"), (comp_chiprtr, "port2", "2ns")) +sst.Link("link_dir_net_0").connect((comp_chiprtr, "port1", "2ns"), (comp_dirctrl, "network", "2ns")) +sst.Link("l2cache_io_link").connect((comp_chiprtr, "port0", "2ns"), (GEM5, "network", buslat)) +sst.Link("link_dir_mem_link").connect((comp_dirctrl, "memory", "10ns"), (memory, "direct_link", "10ns")) diff --git a/src/dev/arm/RealView.py b/src/dev/arm/RealView.py index 9c9eff710..95edb9d53 100644 --- a/src/dev/arm/RealView.py +++ b/src/dev/arm/RealView.py @@ -1,4 +1,4 @@ -# Copyright (c) 2009-2014 ARM Limited +# Copyright (c) 2009-2015 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -509,21 +509,22 @@ class VExpress_EMM(RealView): # Attach I/O devices that are on chip and also set the appropriate # ranges for the bridge - def attachOnChipIO(self, bus, bridge): - self.gic.pio = bus.master - self.local_cpu_timer.pio = bus.master - if hasattr(self, "gicv2m"): - self.gicv2m.pio = bus.master - self.hdlcd.dma = bus.slave - # Bridge ranges based on excluding what is part of on-chip I/O - # (gic, a9scu) - bridge.ranges = [AddrRange(0x2F000000, size='16MB'), - AddrRange(0x2B000000, size='4MB'), - AddrRange(0x30000000, size='256MB'), - AddrRange(0x40000000, size='512MB'), - AddrRange(0x18000000, size='64MB'), - AddrRange(0x1C000000, size='64MB')] - self.vgic.pio = bus.master + def attachOnChipIO(self, bus, bridge=None): + self.gic.pio = bus.master + self.vgic.pio = bus.master + self.local_cpu_timer.pio = bus.master + if hasattr(self, "gicv2m"): + self.gicv2m.pio = bus.master + self.hdlcd.dma = bus.slave + if bridge: + # Bridge ranges based on excluding what is part of on-chip I/O + # (gic, a9scu) + bridge.ranges = [AddrRange(0x2F000000, size='16MB'), + AddrRange(0x2B000000, size='4MB'), + AddrRange(0x30000000, size='256MB'), + AddrRange(0x40000000, size='512MB'), + AddrRange(0x18000000, size='64MB'), + AddrRange(0x1C000000, size='64MB')] # Set the clock domain for IO objects that are considered -- 2.30.2