From fb349aa984daf39831cde68611bd196057cb1b48 Mon Sep 17 00:00:00 2001 From: Gabor Dozsa Date: Thu, 15 Sep 2016 18:00:59 +0100 Subject: [PATCH] arm, config: Fixups for the example big.LITTLE(tm) configuration This patch refactors the configuration file to use a more object-oriented design. Change-Id: I44ac2d063c2b5901f385544fb6ce3f259459cb05 Reviewed-by: Andreas Sandberg Reviewed-by: Gabor Dozsa --- configs/example/arm/devices.py | 147 +++++++++++++++++++--- configs/example/arm/fs_bigLITTLE.py | 183 ++++++++-------------------- 2 files changed, 182 insertions(+), 148 deletions(-) diff --git a/configs/example/arm/devices.py b/configs/example/arm/devices.py index 3415fad8c..65892d9e6 100644 --- a/configs/example/arm/devices.py +++ b/configs/example/arm/devices.py @@ -42,6 +42,7 @@ import m5 from m5.objects import * m5.util.addToPath('../../common') from Caches import * +import CpuConfig class L1I(L1_ICache): hit_latency = 1 @@ -98,33 +99,104 @@ class MemBus(SystemXBar): default = Self.badaddr_responder.pio +class CpuCluster(SubSystem): + def __init__(self, system, num_cpus, cpu_clock, cpu_voltage, + cpu_type, l1i_type, l1d_type, wcache_type, l2_type): + super(CpuCluster, self).__init__() + self._cpu_type = cpu_type + self._l1i_type = l1i_type + self._l1d_type = l1d_type + self._wcache_type = wcache_type + self._l2_type = l2_type + + assert num_cpus > 0 + + self.voltage_domain = VoltageDomain(voltage=cpu_voltage) + self.clk_domain = SrcClockDomain(clock=cpu_clock, + voltage_domain=self.voltage_domain) + + self.cpus = [ self._cpu_type(cpu_id=system.numCpus() + idx, + clk_domain=self.clk_domain) + for idx in range(num_cpus) ] + + for cpu in self.cpus: + cpu.createThreads() + cpu.createInterruptController() + cpu.socket_id = system.numCpuClusters() + system.addCpuCluster(self, num_cpus) + + def requireCaches(self): + return self._cpu_type.require_caches() + + def memoryMode(self): + return self._cpu_type.memory_mode() + + def addL1(self): + for cpu in self.cpus: + l1i = None if self._l1i_type is None else self._l1i_type() + l1d = None if self._l1d_type is None else self._l1d_type() + iwc = None if self._wcache_type is None else self._wcache_type() + dwc = None if self._wcache_type is None else self._wcache_type() + cpu.addPrivateSplitL1Caches(l1i, l1d, iwc, dwc) + + def addL2(self, clk_domain): + if self._l2_type is None: + return + self.toL2Bus = L2XBar(width=64, clk_domain=clk_domain) + self.l2 = self._l2_type() + for cpu in self.cpus: + cpu.connectAllPorts(self.toL2Bus) + self.toL2Bus.master = self.l2.cpu_side + + def connectMemSide(self, bus): + bus.slave + try: + self.l2.mem_side = bus.slave + except AttributeError: + for cpu in self.cpus: + cpu.connectAllPorts(bus) + + +class AtomicCluster(CpuCluster): + def __init__(self, system, num_cpus, cpu_clock, cpu_voltage="1.0V"): + cpu_config = [ CpuConfig.get("atomic"), None, None, None, None ] + super(AtomicCluster, self).__init__(system, num_cpus, cpu_clock, + cpu_voltage, *cpu_config) + def addL1(self): + pass + + class SimpleSystem(LinuxArmSystem): cache_line_size = 64 - voltage_domain = VoltageDomain(voltage="1.0V") - clk_domain = SrcClockDomain(clock="1GHz", - voltage_domain=Parent.voltage_domain) + def __init__(self, **kwargs): + super(SimpleSystem, self).__init__(**kwargs) - realview = VExpress_GEM5_V1() + self.voltage_domain = VoltageDomain(voltage="1.0V") + self.clk_domain = SrcClockDomain(clock="1GHz", + voltage_domain=Parent.voltage_domain) - gic_cpu_addr = realview.gic.cpu_addr - flags_addr = realview.realview_io.pio_addr + 0x30 + self.realview = VExpress_GEM5_V1() - membus = MemBus() + self.gic_cpu_addr = self.realview.gic.cpu_addr + self.flags_addr = self.realview.realview_io.pio_addr + 0x30 - intrctrl = IntrControl() - terminal = Terminal() - vncserver = VncServer() + self.membus = MemBus() - iobus = IOXBar() - # CPUs->PIO - iobridge = Bridge(delay='50ns') - # Device DMA -> MEM - dmabridge = Bridge(delay='50ns', ranges=realview._mem_regions) + self.intrctrl = IntrControl() + self.terminal = Terminal() + self.vncserver = VncServer() - _pci_devices = 0 - _clusters = [] - _cpus = [] + self.iobus = IOXBar() + # CPUs->PIO + self.iobridge = Bridge(delay='50ns') + # Device DMA -> MEM + self.dmabridge = Bridge(delay='50ns', + ranges=self.realview._mem_regions) + + self._pci_devices = 0 + self._clusters = [] + self._num_cpus = 0 def attach_pci(self, dev): dev.pci_bus, dev.pci_dev, dev.pci_func = (0, self._pci_devices + 1, 0) @@ -142,3 +214,42 @@ class SimpleSystem(LinuxArmSystem): self.realview.attachOnChipIO(self.membus, self.iobridge) self.realview.attachIO(self.iobus) self.system_port = self.membus.slave + + def numCpuClusters(self): + return len(self._clusters) + + def addCpuCluster(self, cpu_cluster, num_cpus): + assert cpu_cluster not in self._clusters + assert num_cpus > 0 + self._clusters.append(cpu_cluster) + self._num_cpus += num_cpus + + def numCpus(self): + return self._num_cpus + + def addCaches(self, need_caches, last_cache_level): + if not need_caches: + # connect each cluster to the memory hierarchy + for cluster in self._clusters: + cluster.connectMemSide(self.membus) + return + + cluster_mem_bus = self.membus + assert last_cache_level >= 1 and last_cache_level <= 3 + for cluster in self._clusters: + cluster.addL1() + if last_cache_level > 1: + for cluster in self._clusters: + cluster.addL2(cluster.clk_domain) + if last_cache_level > 2: + max_clock_cluster = max(self._clusters, + key=lambda c: c.clk_domain.clock[0]) + self.l3 = L3(clk_domain=max_clock_cluster.clk_domain) + self.toL3Bus = L2XBar(width=64) + self.toL3Bus.master = self.l3.cpu_side + self.l3.mem_side = self.membus.slave + cluster_mem_bus = self.toL3Bus + + # connect each cluster to the memory hierarchy + for cluster in self._clusters: + cluster.connectMemSide(cluster_mem_bus) diff --git a/configs/example/arm/fs_bigLITTLE.py b/configs/example/arm/fs_bigLITTLE.py index ed71e15dd..80e0817eb 100644 --- a/configs/example/arm/fs_bigLITTLE.py +++ b/configs/example/arm/fs_bigLITTLE.py @@ -60,10 +60,27 @@ default_rcs = 'bootscript.rcS' default_mem_size= "2GB" -def createSystem(kernel, mem_mode, bootscript, disks=[]): + +class BigCluster(devices.CpuCluster): + def __init__(self, system, num_cpus, cpu_clock, + cpu_voltage="1.0V"): + cpu_config = [ CpuConfig.get("arm_detailed"), devices.L1I, devices.L1D, + devices.WalkCache, devices.L2 ] + super(BigCluster, self).__init__(system, num_cpus, cpu_clock, + cpu_voltage, *cpu_config) + +class LittleCluster(devices.CpuCluster): + def __init__(self, system, num_cpus, cpu_clock, + cpu_voltage="1.0V"): + cpu_config = [ CpuConfig.get("minor"), devices.L1I, devices.L1D, + devices.WalkCache, devices.L2 ] + super(LittleCluster, self).__init__(system, num_cpus, cpu_clock, + cpu_voltage, *cpu_config) + + +def createSystem(kernel, bootscript, disks=[]): sys = devices.SimpleSystem(kernel=SysPaths.binary(kernel), readfile=bootscript, - mem_mode=mem_mode, machine_type="DTOnly") mem_region = sys.realview._mem_regions[0] @@ -91,90 +108,6 @@ def createSystem(kernel, mem_mode, bootscript, disks=[]): return sys -class CpuCluster(SubSystem): - def addCPUs(self, cpu_config, num_cpus, cpu_clock, cpu_voltage="1.0V"): - try: - self._cluster_id - m5.util.panic("CpuCluster.addCPUs() must be called exactly once") - except AttributeError: - pass - - assert num_cpus > 0 - system = self._parent - self._cluster_id = len(system._clusters) - system._clusters.append(self) - self._config = cpu_config - - self.voltage_domain = VoltageDomain(voltage=cpu_voltage) - self.clk_domain = SrcClockDomain(clock=cpu_clock, - voltage_domain=self.voltage_domain) - - cpu_class = cpu_config['cpu'] - self.cpus = [ cpu_class(cpu_id=len(system._cpus) + idx, - clk_domain=self.clk_domain) - for idx in range(num_cpus) ] - - for cpu in self.cpus: - cpu.createThreads() - cpu.createInterruptController() - cpu.socket_id = self._cluster_id - system._cpus.append(cpu) - - def createCache(self, key): - try: - return self._config[key]() - except KeyError: - return None - - def addL1(self): - self._cluster_id - for cpu in self.cpus: - l1i = self.createCache('l1i') - l1d = self.createCache('l1d') - iwc = self.createCache('wcache') - dwc = self.createCache('wcache') - cpu.addPrivateSplitL1Caches(l1i, l1d, iwc, dwc) - - def addL2(self, clk_domain): - self._cluster_id - self.toL2Bus = L2XBar(width=64, clk_domain=clk_domain) - #self.toL2Bus = L2XBar(width=64, clk_domain=clk_domain, - #snoop_filter=NULL) - self.l2 = self._config['l2']() - for cpu in self.cpus: - cpu.connectAllPorts(self.toL2Bus) - self.toL2Bus.master = self.l2.cpu_side - - def connectMemSide(self, bus): - self._cluster_id - bus.slave - try: - self.l2.mem_side = bus.slave - except AttributeError: - for cpu in self.cpus: - cpu.connectAllPorts(bus) - - -def addCaches(system, last_cache_level): - cluster_mem_bus = system.membus - assert last_cache_level >= 1 and last_cache_level <= 3 - for cluster in system._clusters: - cluster.addL1() - if last_cache_level > 1: - for cluster in system._clusters: - cluster.addL2(cluster.clk_domain) - if last_cache_level > 2: - max_clock_cluster = max(system._clusters, - key=lambda c: c.clk_domain.clock[0]) - system.l3 = devices.L3(clk_domain=max_clock_cluster.clk_domain) - system.toL3Bus = L2XBar(width=64) - system.toL3Bus.master = system.l3.cpu_side - system.l3.mem_side = system.membus.slave - cluster_mem_bus = system.toL3Bus - - return cluster_mem_bus - - def main(): parser = argparse.ArgumentParser( description="Generic ARM big.LITTLE configuration") @@ -210,24 +143,6 @@ def main(): options = parser.parse_args() - if options.atomic: - cpu_config = { 'cpu' : AtomicSimpleCPU } - big_cpu_config, little_cpu_config = cpu_config, cpu_config - else: - big_cpu_config = { 'cpu' : CpuConfig.get("arm_detailed"), - 'l1i' : devices.L1I, - 'l1d' : devices.L1D, - 'wcache' : devices.WalkCache, - 'l2' : devices.L2 } - little_cpu_config = { 'cpu' : MinorCPU, - 'l1i' : devices.L1I, - 'l1d' : devices.L1D, - 'wcache' : devices.WalkCache, - 'l2' : devices.L2 } - - big_cpu_class = big_cpu_config['cpu'] - little_cpu_class = little_cpu_config['cpu'] - kernel_cmd = [ "earlyprintk=pl011,0x1c090000", "console=ttyAMA0", @@ -243,41 +158,49 @@ def main(): root = Root(full_system=True) - assert big_cpu_class.memory_mode() == little_cpu_class.memory_mode() disks = default_disk if len(options.disk) == 0 else options.disk - system = createSystem(options.kernel, big_cpu_class.memory_mode(), - options.bootscript, disks=disks) + system = createSystem(options.kernel, options.bootscript, disks=disks) root.system = system system.boot_osflags = " ".join(kernel_cmd) - # big cluster - if options.big_cpus > 0: - system.bigCluster = CpuCluster() - system.bigCluster.addCPUs(big_cpu_config, options.big_cpus, - options.big_cpu_clock) + AtomicCluster = devices.AtomicCluster + if options.big_cpus + options.little_cpus == 0: + m5.util.panic("Empty CPU clusters") - # LITTLE cluster + # big cluster + if options.big_cpus > 0: + if options.atomic: + system.bigCluster = AtomicCluster(system, options.big_cpus, + options.big_cpu_clock) + else: + system.bigCluster = BigCluster(system, options.big_cpus, + options.big_cpu_clock) + mem_mode = system.bigCluster.memoryMode() + # little cluster if options.little_cpus > 0: - system.littleCluster = CpuCluster() - system.littleCluster.addCPUs(little_cpu_config, options.little_cpus, - options.little_cpu_clock) + if options.atomic: + system.littleCluster = AtomicCluster(system, options.little_cpus, + options.little_cpu_clock) - # add caches - if options.caches: - cluster_mem_bus = addCaches(system, options.last_cache_level) - else: - if big_cpu_class.require_caches(): - m5.util.panic("CPU model %s requires caches" % str(big_cpu_class)) - if little_cpu_class.require_caches(): - m5.util.panic("CPU model %s requires caches" % - str(little_cpu_class)) - cluster_mem_bus = system.membus - - # connect each cluster to the memory hierarchy - for cluster in system._clusters: - cluster.connectMemSide(cluster_mem_bus) + else: + system.littleCluster = LittleCluster(system, options.little_cpus, + options.little_cpu_clock) + mem_mode = system.littleCluster.memoryMode() + + if options.big_cpus > 0 and options.little_cpus > 0: + if system.bigCluster.memoryMode() != system.littleCluster.memoryMode(): + m5.util.panic("Memory mode missmatch among CPU clusters") + system.mem_mode = mem_mode + + # create caches + system.addCaches(options.caches, options.last_cache_level) + if not options.caches: + if options.big_cpus > 0 and system.bigCluster.requireCaches(): + m5.util.panic("Big CPU model requires caches") + if options.little_cpus > 0 and system.littleCluster.requireCaches(): + m5.util.panic("Little CPU model requires caches") # Linux device tree system.dtb_filename = SysPaths.binary(options.dtb) -- 2.30.2