From: Radhika Jagtap Date: Mon, 7 Dec 2015 22:42:16 +0000 (-0600) Subject: config: Enable elastic trace capture and replay in se/fs X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9bd5051b6022249f95364ef30b100b69ac7e7c37;p=gem5.git config: Enable elastic trace capture and replay in se/fs This patch adds changes to the configuration scripts to support elastic tracing and replay. The patch adds a command line option to enable elastic tracing in SE mode and FS mode. When enabled the Elastic Trace cpu probe is attached to O3CPU and a few O3 CPU parameters are tuned. The Elastic Trace probe writes out both instruction fetch and data dependency traces. The patch also enables configuring the TraceCPU to replay traces using the SE and FS script. The replay run is designed to resume from checkpoint using atomic cpu to restore state keeping it consistent with FS run flow. It then switches to TraceCPU to replay the input traces. --- diff --git a/configs/common/CacheConfig.py b/configs/common/CacheConfig.py index d54df7490..b96a7011c 100644 --- a/configs/common/CacheConfig.py +++ b/configs/common/CacheConfig.py @@ -69,6 +69,13 @@ def config_cache(options, system): # Set the cache line size of the system system.cache_line_size = options.cacheline_size + # If elastic trace generation is enabled, make sure the memory system is + # minimal so that compute delays do not include memory access latencies. + # Configure the compulsory L1 caches for the O3CPU, do not configure + # any more caches. + if options.l2cache and options.elastic_trace_en: + fatal("When elastic trace is enabled, do not configure L2 caches.") + if options.l2cache: # Provide a clock for the L2 and the L1-to-L2 bus here as they # are not connected using addTwoLevelCacheHierarchy. Use the diff --git a/configs/common/CpuConfig.py b/configs/common/CpuConfig.py index 6b530623d..3220acecd 100644 --- a/configs/common/CpuConfig.py +++ b/configs/common/CpuConfig.py @@ -53,6 +53,7 @@ _cpu_aliases_all = [ ("minor", "MinorCPU"), ("detailed", "DerivO3CPU"), ("kvm", ("ArmKvmCPU", "ArmV8KvmCPU", "X86KvmCPU")), + ("trace", "TraceCPU"), ] # Filtered list of aliases. Only aliases for existing CPUs exist in @@ -108,6 +109,30 @@ def cpu_names(): """Return a list of valid CPU names.""" return _cpu_classes.keys() + _cpu_aliases.keys() +def config_etrace(cpu_cls, cpu_list, options): + if issubclass(cpu_cls, m5.objects.DerivO3CPU): + # Assign the same file name to all cpus for now. This must be + # revisited when creating elastic traces for multi processor systems. + for cpu in cpu_list: + # Attach the elastic trace probe listener. Set the protobuf trace + # file names. Set the dependency window size equal to the cpu it + # is attached to. + cpu.traceListener = m5.objects.ElasticTrace( + instFetchTraceFile = options.inst_trace_file, + dataDepTraceFile = options.data_trace_file, + depWindowSize = 3 * cpu.numROBEntries) + # Make the number of entries in the ROB, LQ and SQ very + # large so that there are no stalls due to resource + # limitation as such stalls will get captured in the trace + # as compute delay. For replay, ROB, LQ and SQ sizes are + # modelled in the Trace CPU. + cpu.numROBEntries = 512; + cpu.LQEntries = 128; + cpu.SQEntries = 128; + else: + fatal("%s does not support data dependency tracing. Use a CPU model of" + " type or inherited from DerivO3CPU.", cpu_cls) + # The ARM detailed CPU is special in the sense that it doesn't exist # in the normal object hierarchy, so we have to add it manually. try: diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py index 286898798..4685cd5d1 100644 --- a/configs/common/MemConfig.py +++ b/configs/common/MemConfig.py @@ -187,6 +187,11 @@ def config_mem(options, system): cls = get(options.mem_type) mem_ctrls = [] + if options.elastic_trace_en and not issubclass(cls, \ + m5.objects.SimpleMemory): + fatal("When elastic trace is enabled, configure mem-type as " + "simple-mem.") + # The default behaviour is to interleave memory channels on 128 # byte granularity, or cache line granularity if larger than 128 # byte. This value is based on the locality seen across a large @@ -206,6 +211,11 @@ def config_mem(options, system): options.mem_ranks: mem_ctrl.ranks_per_channel = options.mem_ranks + if options.elastic_trace_en: + mem_ctrl.latency = '1ns' + print "For elastic trace, over-riding Simple Memory " \ + "latency to 1ns." + mem_ctrls.append(mem_ctrl) subsystem.mem_ctrls = mem_ctrls diff --git a/configs/common/Options.py b/configs/common/Options.py index f4cf9fbd6..45be8e2f8 100644 --- a/configs/common/Options.py +++ b/configs/common/Options.py @@ -87,6 +87,19 @@ def addCommonOptions(parser): Only used if multiple programs are specified. If true, then the number of threads per cpu is same as the number of programs.""") + parser.add_option("--elastic-trace-en", action="store_true", + help="""Enable capture of data dependency and instruction + fetch traces using elastic trace probe.""") + # Trace file paths input to trace probe in a capture simulation and input + # to Trace CPU in a replay simulation + parser.add_option("--inst-trace-file", action="store", type="string", + help="""Instruction fetch trace file input to + Elastic Trace probe in a capture simulation and + Trace CPU in a replay simulation""", default="") + parser.add_option("--data-trace-file", action="store", type="string", + help="""Data dependency trace file input to + Elastic Trace probe in a capture simulation and + Trace CPU in a replay simulation""", default="") # Memory Options parser.add_option("--list-mem-types", diff --git a/configs/common/Simulation.py b/configs/common/Simulation.py index 73874674c..13edf601b 100644 --- a/configs/common/Simulation.py +++ b/configs/common/Simulation.py @@ -459,10 +459,11 @@ def run(options, root, testsys, cpu_class): for i in xrange(np): if options.fast_forward: testsys.cpu[i].max_insts_any_thread = int(options.fast_forward) - switch_cpus[i].system = testsys + switch_cpus[i].system = testsys switch_cpus[i].workload = testsys.cpu[i].workload switch_cpus[i].clk_domain = testsys.cpu[i].clk_domain - switch_cpus[i].progress_interval = testsys.cpu[i].progress_interval + switch_cpus[i].progress_interval = \ + testsys.cpu[i].progress_interval # simulation period if options.maxinsts: switch_cpus[i].max_insts_any_thread = options.maxinsts @@ -470,6 +471,11 @@ def run(options, root, testsys, cpu_class): if options.checker: switch_cpus[i].addCheckerCpu() + # If elastic tracing is enabled attach the elastic trace probe + # to the switch CPUs + if options.elastic_trace_en: + CpuConfig.config_etrace(cpu_class, switch_cpus, options) + testsys.switch_cpus = switch_cpus switch_cpu_list = [(testsys.cpu[i], switch_cpus[i]) for i in xrange(np)] diff --git a/configs/dram/sweep.py b/configs/dram/sweep.py index 8459f9d00..368e39488 100644 --- a/configs/dram/sweep.py +++ b/configs/dram/sweep.py @@ -101,6 +101,7 @@ system.mmap_using_noreserve = True options.mem_channels = 1 options.external_memory_system = 0 options.tlm_memory = 0 +options.elastic_trace_en = 0 MemConfig.config_mem(options, system) # the following assumes that we are using the native DRAM diff --git a/configs/example/etrace_replay.py b/configs/example/etrace_replay.py new file mode 100644 index 000000000..e39024f0f --- /dev/null +++ b/configs/example/etrace_replay.py @@ -0,0 +1,119 @@ +# Copyright (c) 2015 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Radhika Jagtap + +# Basic elastic traces replay script that configures a Trace CPU + +import optparse + +from m5.util import addToPath, fatal + +addToPath('../common') + +import Options +import Simulation +import CacheConfig +import MemConfig +from Caches import * + +parser = optparse.OptionParser() +Options.addCommonOptions(parser) + +if '--ruby' in sys.argv: + print "This script does not support Ruby configuration, mainly"\ + " because Trace CPU has been tested only with classic memory system" + sys.exit(1) + +(options, args) = parser.parse_args() + +if args: + print "Error: script doesn't take any positional arguments" + sys.exit(1) + +numThreads = 1 + +if options.cpu_type != "trace": + fatal("This is a script for elastic trace replay simulation, use "\ + "--cpu-type=trace\n"); + +if options.num_cpus > 1: + fatal("This script does not support multi-processor trace replay.\n") + +# In this case FutureClass will be None as there is not fast forwarding or +# switching +(CPUClass, test_mem_mode, FutureClass) = Simulation.setCPUClass(options) +CPUClass.numThreads = numThreads + +system = System(cpu = CPUClass(cpu_id=0), + mem_mode = test_mem_mode, + mem_ranges = [AddrRange(options.mem_size)], + cache_line_size = options.cacheline_size) + +# Create a top-level voltage domain +system.voltage_domain = VoltageDomain(voltage = options.sys_voltage) + +# Create a source clock for the system. This is used as the clock period for +# xbar and memory +system.clk_domain = SrcClockDomain(clock = options.sys_clock, + voltage_domain = system.voltage_domain) + +# Create a CPU voltage domain +system.cpu_voltage_domain = VoltageDomain() + +# Create a separate clock domain for the CPUs. In case of Trace CPUs this clock +# is actually used only by the caches connected to the CPU. +system.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock, + voltage_domain = + system.cpu_voltage_domain) + +# All cpus belong to a common cpu_clk_domain, therefore running at a common +# frequency. +for cpu in system.cpu: + cpu.clk_domain = system.cpu_clk_domain + +# Assign input trace files to the Trace CPU +system.cpu.instTraceFile=options.inst_trace_file +system.cpu.dataTraceFile=options.data_trace_file + +# Configure the classic memory system options +MemClass = Simulation.setMemClass(options) +system.membus = SystemXBar() +system.system_port = system.membus.slave +CacheConfig.config_cache(options, system) +MemConfig.config_mem(options, system) + +root = Root(full_system = False, system = system) +Simulation.run(options, root, system, FutureClass) diff --git a/configs/example/fs.py b/configs/example/fs.py index 69b2e970a..dddb2ea3c 100644 --- a/configs/example/fs.py +++ b/configs/example/fs.py @@ -214,6 +214,17 @@ def build_test_system(np): test_sys.cpu[i].addCheckerCpu() test_sys.cpu[i].createThreads() + # If elastic tracing is enabled when not restoring from checkpoint and + # when not fast forwarding using the atomic cpu, then check that the + # TestCPUClass is DerivO3CPU or inherits from DerivO3CPU. If the check + # passes then attach the elastic trace probe. + # If restoring from checkpoint or fast forwarding, the code that does this for + # FutureCPUClass is in the Simulation module. If the check passes then the + # elastic trace probe is attached to the switch CPUs. + if options.elastic_trace_en and options.checkpoint_restore == None and \ + not options.fast_forward: + CpuConfig.config_etrace(TestCPUClass, test_sys.cpu, options) + CacheConfig.config_cache(options, test_sys) MemConfig.config_mem(options, test_sys) diff --git a/configs/example/se.py b/configs/example/se.py index 0928482b7..1b3458709 100644 --- a/configs/example/se.py +++ b/configs/example/se.py @@ -58,6 +58,7 @@ import Options import Ruby import Simulation import CacheConfig +import CpuConfig import MemConfig from Caches import * from cpu2000 import * @@ -196,6 +197,11 @@ system.cpu_clk_domain = SrcClockDomain(clock = options.cpu_clock, voltage_domain = system.cpu_voltage_domain) +# If elastic tracing is enabled, then configure the cpu and attach the elastic +# trace probe +if options.elastic_trace_en: + CpuConfig.config_etrace(CPUClass, system.cpu, options) + # All cpus belong to a common cpu_clk_domain, therefore running at a common # frequency. for cpu in system.cpu: