arm, config: added support for ex5 model of big.LITTLE
authorPierre-Yves Péneau <pierre-yves.peneau@lirmm.fr>
Wed, 22 Mar 2017 17:36:13 +0000 (18:36 +0100)
committerPierre-Yves Péneau <pierre-yves.peneau@lirmm.fr>
Thu, 18 May 2017 14:56:08 +0000 (14:56 +0000)
This patch enables using calibrated big and LITTLE cores, ex5_big and
ex5_LITTLE instead of the default 'arm_detailed' and 'minor' cpus. The ex5
model is based on the Samsung Exynos 5 Octa (5422) SoC. Operation and memory
hierarchy latencies have been calibrated using the lmbench micro-benchmark
suite. The preliminary validation results have been published as: 'Full-System
Simulation of big.LITTLE Multicore Architecture for Performance and Energy
Exploration', in International Symposium on Embedded Multicore/Many-core
Systems-on-Chip (MCSoC'16), Lyon, France (Sep, 2016).

From http://reviews.gem5.org/r/3666

Change-Id: I4935dee0a9222bd1bf7adfccb9443014945bb2d7
Signed-off-by: Anastasiia Butko <abutko@lbl.gov>
Signed-off-by: Pierre-Yves Péneau <pierre-yves.peneau@lirmm.fr>
Reviewed-on: https://gem5-review.googlesource.com/2464
Reviewed-by: Gabor Dozsa <gabor.dozsa@arm.com>
Maintainer: Jason Lowe-Power <jason@lowepower.com>

configs/common/CpuConfig.py
configs/common/ex5_LITTLE.py [new file with mode: 0644]
configs/common/ex5_big.py [new file with mode: 0644]
configs/example/arm/fs_bigLITTLE.py

index 8810622aafb917904cf391acea64b51a4626c6f1..28267c7dd9d43a4e7a73ba2366de375643025675 100644 (file)
@@ -117,6 +117,20 @@ try:
 except:
     pass
 
+# The calibrated ex5-model cores
+try:
+    from ex5_LITTLE import ex5_LITTLE
+    _cpu_classes["ex5_LITTLE"] = ex5_LITTLE
+except:
+     pass
+
+try:
+    from ex5_big import ex5_big
+    _cpu_classes["ex5_big"] = ex5_big
+except:
+     pass
+
+
 # Add all CPUs in the object hierarchy.
 for name, cls in inspect.getmembers(m5.objects, is_cpu_class):
     _cpu_classes[name] = cls
diff --git a/configs/common/ex5_LITTLE.py b/configs/common/ex5_LITTLE.py
new file mode 100644 (file)
index 0000000..c9c419f
--- /dev/null
@@ -0,0 +1,154 @@
+# Copyright (c) 2012 The Regents of The University of Michigan
+# Copyright (c) 2016 Centre National de la Recherche Scientifique
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Ron Dreslinski
+#          Anastasiia Butko
+#          Louisa Bessad
+
+from m5.objects import *
+from O3_ARM_v7a import *
+from Caches import *
+
+#-----------------------------------------------------------------------
+#                ex5 LITTLE core (based on the ARM Cortex-A7)
+#-----------------------------------------------------------------------
+
+# Simple ALU Instructions have a latency of 3
+class ex5_LITTLE_Simple_Int(MinorDefaultIntFU):
+    opList = [ OpDesc(opClass='IntAlu', opLat=4) ]
+
+# Complex ALU instructions have a variable latencies
+class ex5_LITTLE_Complex_IntMul(MinorDefaultIntMulFU):
+    opList = [ OpDesc(opClass='IntMult', opLat=7) ]
+
+class ex5_LITTLE_Complex_IntDiv(MinorDefaultIntDivFU):
+    opList = [ OpDesc(opClass='IntDiv', opLat=9) ]
+
+# Floating point and SIMD instructions
+class ex5_LITTLE_FP(MinorDefaultFloatSimdFU):
+    opList = [ OpDesc(opClass='SimdAdd', opLat=6),
+               OpDesc(opClass='SimdAddAcc', opLat=4),
+               OpDesc(opClass='SimdAlu', opLat=4),
+               OpDesc(opClass='SimdCmp', opLat=1),
+               OpDesc(opClass='SimdCvt', opLat=3),
+               OpDesc(opClass='SimdMisc', opLat=3),
+               OpDesc(opClass='SimdMult',opLat=4),
+               OpDesc(opClass='SimdMultAcc',opLat=5),
+               OpDesc(opClass='SimdShift',opLat=3),
+               OpDesc(opClass='SimdShiftAcc', opLat=3),
+               OpDesc(opClass='SimdSqrt', opLat=9),
+               OpDesc(opClass='SimdFloatAdd',opLat=8),
+               OpDesc(opClass='SimdFloatAlu',opLat=6),
+               OpDesc(opClass='SimdFloatCmp', opLat=6),
+               OpDesc(opClass='SimdFloatCvt', opLat=6),
+               OpDesc(opClass='SimdFloatDiv', opLat=20, pipelined=False),
+               OpDesc(opClass='SimdFloatMisc', opLat=6),
+               OpDesc(opClass='SimdFloatMult', opLat=15),
+               OpDesc(opClass='SimdFloatMultAcc',opLat=6),
+               OpDesc(opClass='SimdFloatSqrt', opLat=17),
+               OpDesc(opClass='FloatAdd', opLat=8),
+               OpDesc(opClass='FloatCmp', opLat=6),
+               OpDesc(opClass='FloatCvt', opLat=6),
+               OpDesc(opClass='FloatDiv', opLat=15, pipelined=False),
+               OpDesc(opClass='FloatSqrt', opLat=33),
+               OpDesc(opClass='FloatMult', opLat=6) ]
+
+# Load/Store Units
+class ex5_LITTLE_MemFU(MinorDefaultMemFU):
+    opList = [ OpDesc(opClass='MemRead',opLat=1),
+               OpDesc(opClass='MemWrite',opLat=1) ]
+
+# Misc Unit
+class ex5_LITTLE_MiscFU(MinorDefaultMiscFU):
+    opList = [ OpDesc(opClass='IprAccess',opLat=1),
+               OpDesc(opClass='InstPrefetch',opLat=1) ]
+
+# Functional Units for this CPU
+class ex5_LITTLE_FUP(MinorFUPool):
+    funcUnits = [ex5_LITTLE_Simple_Int(), ex5_LITTLE_Simple_Int(),
+        ex5_LITTLE_Complex_IntMul(), ex5_LITTLE_Complex_IntDiv(),
+        ex5_LITTLE_FP(), ex5_LITTLE_MemFU(),
+        ex5_LITTLE_MiscFU()]
+
+class ex5_LITTLE(MinorCPU):
+    executeFuncUnits = ex5_LITTLE_FUP()
+
+class L1I(L1Cache):
+    tag_latency = 2
+    data_latency = 2
+    response_latency = 2
+    mshrs = 2
+    size = '32kB'
+    assoc = 2
+    is_read_only = True
+    # Writeback clean lines as well
+    writeback_clean = True
+
+class L1D(L1Cache):
+    tag_latency = 2
+    data_latency = 2
+    response_latency = 2
+    mshrs = 4
+    tgts_per_mshr = 8
+    size = '32kB'
+    assoc = 4
+    write_buffers = 4
+    # Consider the L2 a victim cache also for clean lines
+    writeback_clean = True
+
+# TLB Cache
+# Use a cache as a L2 TLB
+class WalkCache(PageTableWalkerCache):
+    tag_latency = 2
+    data_latency = 2
+    response_latency = 2
+    mshrs = 6
+    tgts_per_mshr = 8
+    size = '1kB'
+    assoc = 2
+    write_buffers = 16
+    is_read_only = True
+    # Writeback clean lines as well
+    writeback_clean = True
+
+# L2 Cache
+class L2(L2Cache):
+    tag_latency = 9
+    data_latency = 9
+    response_latency = 9
+    mshrs = 8
+    tgts_per_mshr = 12
+    size = '512kB'
+    assoc = 8
+    write_buffers = 16
+    prefetch_on_access = True
+    clusivity = 'mostly_excl'
+    # Simple stride prefetcher
+    prefetcher = StridePrefetcher(degree=1, latency = 1)
+    tags = RandomRepl()
+
+
diff --git a/configs/common/ex5_big.py b/configs/common/ex5_big.py
new file mode 100644 (file)
index 0000000..94b35ae
--- /dev/null
@@ -0,0 +1,206 @@
+# Copyright (c) 2012 The Regents of The University of Michigan
+# Copyright (c) 2016 Centre National de la Recherche Scientifique
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Ron Dreslinski
+#          Anastasiia Butko
+#          Louisa Bessad
+
+from m5.objects import *
+from O3_ARM_v7a import *
+from Caches import *
+
+#-----------------------------------------------------------------------
+#                ex5 big core (based on the ARM Cortex-A15)
+#-----------------------------------------------------------------------
+
+# Simple ALU Instructions have a latency of 1
+class ex5_big_Simple_Int(O3_ARM_v7a_Simple_Int):
+    opList = [ OpDesc(opClass='IntAlu', opLat=1) ]
+    count = 2
+
+# Complex ALU instructions have a variable latencies
+class ex5_big_Complex_Int(O3_ARM_v7a_Complex_Int):
+    opList = [ OpDesc(opClass='IntMult', opLat=4, pipelined=True),
+               OpDesc(opClass='IntDiv', opLat=11, pipelined=False),
+               OpDesc(opClass='IprAccess', opLat=3, pipelined=True) ]
+    count = 1
+
+# Floating point and SIMD instructions
+class ex5_big_FP(O3_ARM_v7a_FP):
+    opList = [ OpDesc(opClass='SimdAdd', opLat=3),
+               OpDesc(opClass='SimdAddAcc', opLat=4),
+               OpDesc(opClass='SimdAlu', opLat=4),
+               OpDesc(opClass='SimdCmp', opLat=4),
+               OpDesc(opClass='SimdCvt', opLat=3),
+               OpDesc(opClass='SimdMisc', opLat=3),
+               OpDesc(opClass='SimdMult',opLat=6),
+               OpDesc(opClass='SimdMultAcc',opLat=5),
+               OpDesc(opClass='SimdShift',opLat=3),
+               OpDesc(opClass='SimdShiftAcc', opLat=3),
+               OpDesc(opClass='SimdSqrt', opLat=9),
+               OpDesc(opClass='SimdFloatAdd',opLat=6),
+               OpDesc(opClass='SimdFloatAlu',opLat=5),
+               OpDesc(opClass='SimdFloatCmp', opLat=3),
+               OpDesc(opClass='SimdFloatCvt', opLat=3),
+               OpDesc(opClass='SimdFloatDiv', opLat=21),
+               OpDesc(opClass='SimdFloatMisc', opLat=3),
+               OpDesc(opClass='SimdFloatMult', opLat=6),
+               OpDesc(opClass='SimdFloatMultAcc',opLat=1),
+               OpDesc(opClass='SimdFloatSqrt', opLat=9),
+               OpDesc(opClass='FloatAdd', opLat=6),
+               OpDesc(opClass='FloatCmp', opLat=5),
+               OpDesc(opClass='FloatCvt', opLat=5),
+               OpDesc(opClass='FloatDiv', opLat=12, pipelined=False),
+               OpDesc(opClass='FloatSqrt', opLat=33, pipelined=False),
+               OpDesc(opClass='FloatMult', opLat=8) ]
+    count = 2
+
+
+# Load/Store Units
+class ex5_big_Load(O3_ARM_v7a_Load):
+    opList = [ OpDesc(opClass='MemRead',opLat=2) ]
+    count = 1
+
+class ex5_big_Store(O3_ARM_v7a_Store):
+    opList = [OpDesc(opClass='MemWrite',opLat=2) ]
+    count = 1
+
+# Functional Units for this CPU
+class ex5_big_FUP(O3_ARM_v7a_FUP):
+    FUList = [ex5_big_Simple_Int(), ex5_big_Complex_Int(),
+              ex5_big_Load(), ex5_big_Store(), ex5_big_FP()]
+
+# Bi-Mode Branch Predictor
+class ex5_big_BP(O3_ARM_v7a_BP):
+    globalPredictorSize = 4096
+    globalCtrBits = 2
+    choicePredictorSize = 1024
+    choiceCtrBits = 3
+    BTBEntries = 4096
+    BTBTagSize = 18
+    RASSize = 48
+    instShiftAmt = 2
+
+class ex5_big(O3_ARM_v7a_3):
+    LQEntries = 16
+    SQEntries = 16
+    LSQDepCheckShift = 0
+    LFSTSize = 1024
+    SSITSize = 1024
+    decodeToFetchDelay = 1
+    renameToFetchDelay = 1
+    iewToFetchDelay = 1
+    commitToFetchDelay = 1
+    renameToDecodeDelay = 1
+    iewToDecodeDelay = 1
+    commitToDecodeDelay = 1
+    iewToRenameDelay = 1
+    commitToRenameDelay = 1
+    commitToIEWDelay = 1
+    fetchWidth = 3
+    fetchBufferSize = 16
+    fetchToDecodeDelay = 3
+    decodeWidth = 3
+    decodeToRenameDelay = 2
+    renameWidth = 3
+    renameToIEWDelay = 1
+    issueToExecuteDelay = 1
+    dispatchWidth = 6
+    issueWidth = 8
+    wbWidth = 8
+    fuPool = ex5_big_FUP()
+    iewToCommitDelay = 1
+    renameToROBDelay = 1
+    commitWidth = 8
+    squashWidth = 8
+    trapLatency = 13
+    backComSize = 5
+    forwardComSize = 5
+    numPhysIntRegs = 90
+    numPhysFloatRegs = 256
+    numIQEntries = 48
+    numROBEntries = 60
+
+    switched_out = False
+    branchPred = ex5_big_BP()
+
+# Instruction Cache
+class L1I(O3_ARM_v7a_ICache):
+    tag_latency = 2
+    data_latency = 2
+    response_latency = 2
+    mshrs = 2
+    tgts_per_mshr = 8
+    size = '32kB'
+    assoc = 2
+    is_read_only = True
+    # Writeback clean lines as well
+    writeback_clean = True
+
+# Data Cache
+class L1D(O3_ARM_v7a_DCache):
+    tag_latency = 2
+    data_latency = 2
+    response_latency = 2
+    mshrs = 6
+    tgts_per_mshr = 8
+    size = '32kB'
+    assoc = 2
+    write_buffers = 16
+    # Consider the L2 a victim cache also for clean lines
+    writeback_clean = True
+
+# TLB Cache
+# Use a cache as a L2 TLB
+class WalkCache(O3_ARM_v7aWalkCache):
+    tag_latency = 4
+    data_latency = 4
+    response_latency = 4
+    mshrs = 6
+    tgts_per_mshr = 8
+    size = '1kB'
+    assoc = 8
+    write_buffers = 16
+    is_read_only = True
+    # Writeback clean lines as well
+    writeback_clean = True
+
+# L2 Cache
+class L2(O3_ARM_v7aL2):
+    tag_latency = 15
+    data_latency = 15
+    response_latency = 15
+    mshrs = 16
+    tgts_per_mshr = 8
+    size = '2MB'
+    assoc = 16
+    write_buffers = 8
+    prefetch_on_access = True
+    clusivity = 'mostly_excl'
+    # Simple stride prefetcher
+    prefetcher = StridePrefetcher(degree=8, latency = 1)
+    tags = RandomRepl()
index d6825dfaafa37d0e7ed64d7a2d6f688589dedc5b..3e7ac2f766d0c189e76a918999b83f568a1d9c34 100644 (file)
@@ -51,6 +51,8 @@ m5.util.addToPath("../../")
 
 from common import SysPaths
 from common import CpuConfig
+from common import ex5_big
+from common import ex5_LITTLE
 
 import devices
 from devices import AtomicCluster, KvmCluster
@@ -95,6 +97,21 @@ class LittleCluster(devices.CpuCluster):
         super(LittleCluster, self).__init__(system, num_cpus, cpu_clock,
                                          cpu_voltage, *cpu_config)
 
+class Ex5BigCluster(devices.CpuCluster):
+    def __init__(self, system, num_cpus, cpu_clock,
+                 cpu_voltage="1.0V"):
+        cpu_config = [ CpuConfig.get("ex5_big"), ex5_big.L1I, ex5_big.L1D,
+                    ex5_big.WalkCache, ex5_big.L2 ]
+        super(Ex5BigCluster, self).__init__(system, num_cpus, cpu_clock,
+                                         cpu_voltage, *cpu_config)
+
+class Ex5LittleCluster(devices.CpuCluster):
+    def __init__(self, system, num_cpus, cpu_clock,
+                 cpu_voltage="1.0V"):
+        cpu_config = [ CpuConfig.get("ex5_LITTLE"), ex5_LITTLE.L1I,
+                    ex5_LITTLE.L1D, ex5_LITTLE.WalkCache, ex5_LITTLE.L2 ]
+        super(Ex5LittleCluster, self).__init__(system, num_cpus, cpu_clock,
+                                         cpu_voltage, *cpu_config)
 
 def createSystem(caches, kernel, bootscript, disks=[]):
     sys = devices.SimpleSystem(caches, default_mem_size,
@@ -127,6 +144,7 @@ def createSystem(caches, kernel, bootscript, disks=[]):
 cpu_types = {
     "atomic" : (AtomicCluster, AtomicCluster),
     "timing" : (BigCluster, LittleCluster),
+    "exynos" : (Ex5BigCluster, Ex5LittleCluster),
 }
 
 # Only add the KVM CPU if it has been compiled into gem5