From 5bf2a86c6a8a1fc68422053e1a782d3efe0e6f3c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Pierre-Yves=20P=C3=A9neau?= Date: Wed, 22 Mar 2017 18:36:13 +0100 Subject: [PATCH] arm, config: added support for ex5 model of big.LITTLE MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This patch enables using calibrated big and LITTLE cores, ex5_big and ex5_LITTLE instead of the default 'arm_detailed' and 'minor' cpus. The ex5 model is based on the Samsung Exynos 5 Octa (5422) SoC. Operation and memory hierarchy latencies have been calibrated using the lmbench micro-benchmark suite. The preliminary validation results have been published as: 'Full-System Simulation of big.LITTLE Multicore Architecture for Performance and Energy Exploration', in International Symposium on Embedded Multicore/Many-core Systems-on-Chip (MCSoC'16), Lyon, France (Sep, 2016). From http://reviews.gem5.org/r/3666 Change-Id: I4935dee0a9222bd1bf7adfccb9443014945bb2d7 Signed-off-by: Anastasiia Butko Signed-off-by: Pierre-Yves Péneau Reviewed-on: https://gem5-review.googlesource.com/2464 Reviewed-by: Gabor Dozsa Maintainer: Jason Lowe-Power --- configs/common/CpuConfig.py | 14 ++ configs/common/ex5_LITTLE.py | 154 +++++++++++++++++++++ configs/common/ex5_big.py | 206 ++++++++++++++++++++++++++++ configs/example/arm/fs_bigLITTLE.py | 18 +++ 4 files changed, 392 insertions(+) create mode 100644 configs/common/ex5_LITTLE.py create mode 100644 configs/common/ex5_big.py diff --git a/configs/common/CpuConfig.py b/configs/common/CpuConfig.py index 8810622aa..28267c7dd 100644 --- a/configs/common/CpuConfig.py +++ b/configs/common/CpuConfig.py @@ -117,6 +117,20 @@ try: except: pass +# The calibrated ex5-model cores +try: + from ex5_LITTLE import ex5_LITTLE + _cpu_classes["ex5_LITTLE"] = ex5_LITTLE +except: + pass + +try: + from ex5_big import ex5_big + _cpu_classes["ex5_big"] = ex5_big +except: + pass + + # Add all CPUs in the object hierarchy. for name, cls in inspect.getmembers(m5.objects, is_cpu_class): _cpu_classes[name] = cls diff --git a/configs/common/ex5_LITTLE.py b/configs/common/ex5_LITTLE.py new file mode 100644 index 000000000..c9c419f45 --- /dev/null +++ b/configs/common/ex5_LITTLE.py @@ -0,0 +1,154 @@ +# Copyright (c) 2012 The Regents of The University of Michigan +# Copyright (c) 2016 Centre National de la Recherche Scientifique +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ron Dreslinski +# Anastasiia Butko +# Louisa Bessad + +from m5.objects import * +from O3_ARM_v7a import * +from Caches import * + +#----------------------------------------------------------------------- +# ex5 LITTLE core (based on the ARM Cortex-A7) +#----------------------------------------------------------------------- + +# Simple ALU Instructions have a latency of 3 +class ex5_LITTLE_Simple_Int(MinorDefaultIntFU): + opList = [ OpDesc(opClass='IntAlu', opLat=4) ] + +# Complex ALU instructions have a variable latencies +class ex5_LITTLE_Complex_IntMul(MinorDefaultIntMulFU): + opList = [ OpDesc(opClass='IntMult', opLat=7) ] + +class ex5_LITTLE_Complex_IntDiv(MinorDefaultIntDivFU): + opList = [ OpDesc(opClass='IntDiv', opLat=9) ] + +# Floating point and SIMD instructions +class ex5_LITTLE_FP(MinorDefaultFloatSimdFU): + opList = [ OpDesc(opClass='SimdAdd', opLat=6), + OpDesc(opClass='SimdAddAcc', opLat=4), + OpDesc(opClass='SimdAlu', opLat=4), + OpDesc(opClass='SimdCmp', opLat=1), + OpDesc(opClass='SimdCvt', opLat=3), + OpDesc(opClass='SimdMisc', opLat=3), + OpDesc(opClass='SimdMult',opLat=4), + OpDesc(opClass='SimdMultAcc',opLat=5), + OpDesc(opClass='SimdShift',opLat=3), + OpDesc(opClass='SimdShiftAcc', opLat=3), + OpDesc(opClass='SimdSqrt', opLat=9), + OpDesc(opClass='SimdFloatAdd',opLat=8), + OpDesc(opClass='SimdFloatAlu',opLat=6), + OpDesc(opClass='SimdFloatCmp', opLat=6), + OpDesc(opClass='SimdFloatCvt', opLat=6), + OpDesc(opClass='SimdFloatDiv', opLat=20, pipelined=False), + OpDesc(opClass='SimdFloatMisc', opLat=6), + OpDesc(opClass='SimdFloatMult', opLat=15), + OpDesc(opClass='SimdFloatMultAcc',opLat=6), + OpDesc(opClass='SimdFloatSqrt', opLat=17), + OpDesc(opClass='FloatAdd', opLat=8), + OpDesc(opClass='FloatCmp', opLat=6), + OpDesc(opClass='FloatCvt', opLat=6), + OpDesc(opClass='FloatDiv', opLat=15, pipelined=False), + OpDesc(opClass='FloatSqrt', opLat=33), + OpDesc(opClass='FloatMult', opLat=6) ] + +# Load/Store Units +class ex5_LITTLE_MemFU(MinorDefaultMemFU): + opList = [ OpDesc(opClass='MemRead',opLat=1), + OpDesc(opClass='MemWrite',opLat=1) ] + +# Misc Unit +class ex5_LITTLE_MiscFU(MinorDefaultMiscFU): + opList = [ OpDesc(opClass='IprAccess',opLat=1), + OpDesc(opClass='InstPrefetch',opLat=1) ] + +# Functional Units for this CPU +class ex5_LITTLE_FUP(MinorFUPool): + funcUnits = [ex5_LITTLE_Simple_Int(), ex5_LITTLE_Simple_Int(), + ex5_LITTLE_Complex_IntMul(), ex5_LITTLE_Complex_IntDiv(), + ex5_LITTLE_FP(), ex5_LITTLE_MemFU(), + ex5_LITTLE_MiscFU()] + +class ex5_LITTLE(MinorCPU): + executeFuncUnits = ex5_LITTLE_FUP() + +class L1I(L1Cache): + tag_latency = 2 + data_latency = 2 + response_latency = 2 + mshrs = 2 + size = '32kB' + assoc = 2 + is_read_only = True + # Writeback clean lines as well + writeback_clean = True + +class L1D(L1Cache): + tag_latency = 2 + data_latency = 2 + response_latency = 2 + mshrs = 4 + tgts_per_mshr = 8 + size = '32kB' + assoc = 4 + write_buffers = 4 + # Consider the L2 a victim cache also for clean lines + writeback_clean = True + +# TLB Cache +# Use a cache as a L2 TLB +class WalkCache(PageTableWalkerCache): + tag_latency = 2 + data_latency = 2 + response_latency = 2 + mshrs = 6 + tgts_per_mshr = 8 + size = '1kB' + assoc = 2 + write_buffers = 16 + is_read_only = True + # Writeback clean lines as well + writeback_clean = True + +# L2 Cache +class L2(L2Cache): + tag_latency = 9 + data_latency = 9 + response_latency = 9 + mshrs = 8 + tgts_per_mshr = 12 + size = '512kB' + assoc = 8 + write_buffers = 16 + prefetch_on_access = True + clusivity = 'mostly_excl' + # Simple stride prefetcher + prefetcher = StridePrefetcher(degree=1, latency = 1) + tags = RandomRepl() + + diff --git a/configs/common/ex5_big.py b/configs/common/ex5_big.py new file mode 100644 index 000000000..94b35ae0b --- /dev/null +++ b/configs/common/ex5_big.py @@ -0,0 +1,206 @@ +# Copyright (c) 2012 The Regents of The University of Michigan +# Copyright (c) 2016 Centre National de la Recherche Scientifique +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Ron Dreslinski +# Anastasiia Butko +# Louisa Bessad + +from m5.objects import * +from O3_ARM_v7a import * +from Caches import * + +#----------------------------------------------------------------------- +# ex5 big core (based on the ARM Cortex-A15) +#----------------------------------------------------------------------- + +# Simple ALU Instructions have a latency of 1 +class ex5_big_Simple_Int(O3_ARM_v7a_Simple_Int): + opList = [ OpDesc(opClass='IntAlu', opLat=1) ] + count = 2 + +# Complex ALU instructions have a variable latencies +class ex5_big_Complex_Int(O3_ARM_v7a_Complex_Int): + opList = [ OpDesc(opClass='IntMult', opLat=4, pipelined=True), + OpDesc(opClass='IntDiv', opLat=11, pipelined=False), + OpDesc(opClass='IprAccess', opLat=3, pipelined=True) ] + count = 1 + +# Floating point and SIMD instructions +class ex5_big_FP(O3_ARM_v7a_FP): + opList = [ OpDesc(opClass='SimdAdd', opLat=3), + OpDesc(opClass='SimdAddAcc', opLat=4), + OpDesc(opClass='SimdAlu', opLat=4), + OpDesc(opClass='SimdCmp', opLat=4), + OpDesc(opClass='SimdCvt', opLat=3), + OpDesc(opClass='SimdMisc', opLat=3), + OpDesc(opClass='SimdMult',opLat=6), + OpDesc(opClass='SimdMultAcc',opLat=5), + OpDesc(opClass='SimdShift',opLat=3), + OpDesc(opClass='SimdShiftAcc', opLat=3), + OpDesc(opClass='SimdSqrt', opLat=9), + OpDesc(opClass='SimdFloatAdd',opLat=6), + OpDesc(opClass='SimdFloatAlu',opLat=5), + OpDesc(opClass='SimdFloatCmp', opLat=3), + OpDesc(opClass='SimdFloatCvt', opLat=3), + OpDesc(opClass='SimdFloatDiv', opLat=21), + OpDesc(opClass='SimdFloatMisc', opLat=3), + OpDesc(opClass='SimdFloatMult', opLat=6), + OpDesc(opClass='SimdFloatMultAcc',opLat=1), + OpDesc(opClass='SimdFloatSqrt', opLat=9), + OpDesc(opClass='FloatAdd', opLat=6), + OpDesc(opClass='FloatCmp', opLat=5), + OpDesc(opClass='FloatCvt', opLat=5), + OpDesc(opClass='FloatDiv', opLat=12, pipelined=False), + OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False), + OpDesc(opClass='FloatMult', opLat=8) ] + count = 2 + + +# Load/Store Units +class ex5_big_Load(O3_ARM_v7a_Load): + opList = [ OpDesc(opClass='MemRead',opLat=2) ] + count = 1 + +class ex5_big_Store(O3_ARM_v7a_Store): + opList = [OpDesc(opClass='MemWrite',opLat=2) ] + count = 1 + +# Functional Units for this CPU +class ex5_big_FUP(O3_ARM_v7a_FUP): + FUList = [ex5_big_Simple_Int(), ex5_big_Complex_Int(), + ex5_big_Load(), ex5_big_Store(), ex5_big_FP()] + +# Bi-Mode Branch Predictor +class ex5_big_BP(O3_ARM_v7a_BP): + globalPredictorSize = 4096 + globalCtrBits = 2 + choicePredictorSize = 1024 + choiceCtrBits = 3 + BTBEntries = 4096 + BTBTagSize = 18 + RASSize = 48 + instShiftAmt = 2 + +class ex5_big(O3_ARM_v7a_3): + LQEntries = 16 + SQEntries = 16 + LSQDepCheckShift = 0 + LFSTSize = 1024 + SSITSize = 1024 + decodeToFetchDelay = 1 + renameToFetchDelay = 1 + iewToFetchDelay = 1 + commitToFetchDelay = 1 + renameToDecodeDelay = 1 + iewToDecodeDelay = 1 + commitToDecodeDelay = 1 + iewToRenameDelay = 1 + commitToRenameDelay = 1 + commitToIEWDelay = 1 + fetchWidth = 3 + fetchBufferSize = 16 + fetchToDecodeDelay = 3 + decodeWidth = 3 + decodeToRenameDelay = 2 + renameWidth = 3 + renameToIEWDelay = 1 + issueToExecuteDelay = 1 + dispatchWidth = 6 + issueWidth = 8 + wbWidth = 8 + fuPool = ex5_big_FUP() + iewToCommitDelay = 1 + renameToROBDelay = 1 + commitWidth = 8 + squashWidth = 8 + trapLatency = 13 + backComSize = 5 + forwardComSize = 5 + numPhysIntRegs = 90 + numPhysFloatRegs = 256 + numIQEntries = 48 + numROBEntries = 60 + + switched_out = False + branchPred = ex5_big_BP() + +# Instruction Cache +class L1I(O3_ARM_v7a_ICache): + tag_latency = 2 + data_latency = 2 + response_latency = 2 + mshrs = 2 + tgts_per_mshr = 8 + size = '32kB' + assoc = 2 + is_read_only = True + # Writeback clean lines as well + writeback_clean = True + +# Data Cache +class L1D(O3_ARM_v7a_DCache): + tag_latency = 2 + data_latency = 2 + response_latency = 2 + mshrs = 6 + tgts_per_mshr = 8 + size = '32kB' + assoc = 2 + write_buffers = 16 + # Consider the L2 a victim cache also for clean lines + writeback_clean = True + +# TLB Cache +# Use a cache as a L2 TLB +class WalkCache(O3_ARM_v7aWalkCache): + tag_latency = 4 + data_latency = 4 + response_latency = 4 + mshrs = 6 + tgts_per_mshr = 8 + size = '1kB' + assoc = 8 + write_buffers = 16 + is_read_only = True + # Writeback clean lines as well + writeback_clean = True + +# L2 Cache +class L2(O3_ARM_v7aL2): + tag_latency = 15 + data_latency = 15 + response_latency = 15 + mshrs = 16 + tgts_per_mshr = 8 + size = '2MB' + assoc = 16 + write_buffers = 8 + prefetch_on_access = True + clusivity = 'mostly_excl' + # Simple stride prefetcher + prefetcher = StridePrefetcher(degree=8, latency = 1) + tags = RandomRepl() diff --git a/configs/example/arm/fs_bigLITTLE.py b/configs/example/arm/fs_bigLITTLE.py index d6825dfaa..3e7ac2f76 100644 --- a/configs/example/arm/fs_bigLITTLE.py +++ b/configs/example/arm/fs_bigLITTLE.py @@ -51,6 +51,8 @@ m5.util.addToPath("../../") from common import SysPaths from common import CpuConfig +from common import ex5_big +from common import ex5_LITTLE import devices from devices import AtomicCluster, KvmCluster @@ -95,6 +97,21 @@ class LittleCluster(devices.CpuCluster): super(LittleCluster, self).__init__(system, num_cpus, cpu_clock, cpu_voltage, *cpu_config) +class Ex5BigCluster(devices.CpuCluster): + def __init__(self, system, num_cpus, cpu_clock, + cpu_voltage="1.0V"): + cpu_config = [ CpuConfig.get("ex5_big"), ex5_big.L1I, ex5_big.L1D, + ex5_big.WalkCache, ex5_big.L2 ] + super(Ex5BigCluster, self).__init__(system, num_cpus, cpu_clock, + cpu_voltage, *cpu_config) + +class Ex5LittleCluster(devices.CpuCluster): + def __init__(self, system, num_cpus, cpu_clock, + cpu_voltage="1.0V"): + cpu_config = [ CpuConfig.get("ex5_LITTLE"), ex5_LITTLE.L1I, + ex5_LITTLE.L1D, ex5_LITTLE.WalkCache, ex5_LITTLE.L2 ] + super(Ex5LittleCluster, self).__init__(system, num_cpus, cpu_clock, + cpu_voltage, *cpu_config) def createSystem(caches, kernel, bootscript, disks=[]): sys = devices.SimpleSystem(caches, default_mem_size, @@ -127,6 +144,7 @@ def createSystem(caches, kernel, bootscript, disks=[]): cpu_types = { "atomic" : (AtomicCluster, AtomicCluster), "timing" : (BigCluster, LittleCluster), + "exynos" : (Ex5BigCluster, Ex5LittleCluster), } # Only add the KVM CPU if it has been compiled into gem5 -- 2.30.2