if options.l2cache:
# Provide a clock for the L2 and the L1-to-L2 bus here as they
# are not connected using addTwoLevelCacheHierarchy. Use the
- # same clock as the CPUs, and set the L1-to-L2 bus width to 32
- # bytes (256 bits).
+ # same clock as the CPUs.
system.l2 = l2_cache_class(clk_domain=system.cpu_clk_domain,
size=options.l2_size,
assoc=options.l2_assoc)
- system.tol2bus = CoherentXBar(clk_domain = system.cpu_clk_domain,
- width = 32)
+ system.tol2bus = L2XBar(clk_domain = system.cpu_clk_domain)
system.l2.cpu_side = system.tol2bus.master
system.l2.mem_side = system.membus.slave
def childImage(self, ci):
self.image.child.image_file = ci
-class MemBus(CoherentXBar):
+class MemBus(SystemXBar):
badaddr_responder = BadAddr()
default = Self.badaddr_responder.pio
self.tsunami = BaseTsunami()
# Create the io bus to connect all device ports
- self.iobus = NoncoherentXBar()
+ self.iobus = IOXBar()
self.tsunami.attachIO(self.iobus)
self.tsunami.ide.pio = self.iobus.master
# generic system
mdesc = SysConfig()
self.readfile = mdesc.script()
- self.iobus = NoncoherentXBar()
+ self.iobus = IOXBar()
self.membus = MemBus()
self.bridge = Bridge(delay='50ns')
self.t1000 = T1000()
mdesc = SysConfig()
self.readfile = mdesc.script()
- self.iobus = NoncoherentXBar()
+ self.iobus = IOXBar()
self.membus = MemBus()
self.membus.badaddr_responder.warn_access = "warn"
self.bridge = Bridge(delay='50ns')
# generic system
mdesc = SysConfig()
self.readfile = mdesc.script()
- self.iobus = NoncoherentXBar()
+ self.iobus = IOXBar()
self.membus = MemBus()
self.bridge = Bridge(delay='50ns')
self.mem_ranges = [AddrRange('1GB')]
x86_sys.membus = MemBus()
# North Bridge
- x86_sys.iobus = NoncoherentXBar()
+ x86_sys.iobus = IOXBar()
x86_sys.bridge = Bridge(delay='50ns')
x86_sys.bridge.master = x86_sys.iobus.slave
x86_sys.bridge.slave = x86_sys.membus.master
def connectX86RubySystem(x86_sys):
# North Bridge
- x86_sys.iobus = NoncoherentXBar()
+ x86_sys.iobus = IOXBar()
# add the ide to the list of dma devices that later need to attach to
# dma controllers
# start with the system itself, using a multi-layer 1.5 GHz
# crossbar, delivering 64 bytes / 5 cycles (one header cycle)
# which amounts to 19.2 GByte/s per layer and thus per port
-system = System(membus = NoncoherentXBar(width = 16))
+system = System(membus = IOXBar(width = 16))
system.clk_domain = SrcClockDomain(clock = '1.5GHz',
voltage_domain =
VoltageDomain(voltage = '1V'))
if level != 0:
# Create a crossbar and add it to the subsystem, note that
# we do this even with a single element on this level
- xbar = CoherentXBar(width = 32)
+ xbar = L2XBar(width = 32)
subsys.xbar = xbar
if next_cache:
xbar.master = next_cache.cpu_side
if ntesters > 1:
# Create a crossbar and add it to the subsystem
- xbar = CoherentXBar(width = 32)
+ xbar = L2XBar(width = 32)
subsys.xbar = xbar
xbar.master = next_cache.cpu_side
for tester, checker in zip(testers, checkers):
if level != 0:
# Create a crossbar and add it to the subsystem, note that
# we do this even with a single element on this level
- xbar = CoherentXBar(width = 32)
+ xbar = L2XBar()
subsys.xbar = xbar
if next_cache:
xbar.master = next_cache.cpu_side
if ntesters > 1:
# Create a crossbar and add it to the subsystem
- xbar = CoherentXBar(width = 32)
+ xbar = L2XBar()
subsys.xbar = xbar
xbar.master = next_cache.cpu_side
for tester in testers:
system = System(cpu = cpus,
funcmem = SimpleMemory(in_addr_map = False),
- funcbus = NoncoherentXBar(),
+ funcbus = IOXBar(),
clk_domain = SrcClockDomain(clock = options.sys_clock),
mem_ranges = [AddrRange(options.mem_size)])
system.cpu[i].dtb.walker.port = ruby_port.slave
else:
MemClass = Simulation.setMemClass(options)
- system.membus = CoherentXBar()
+ system.membus = SystemXBar()
system.system_port = system.membus.slave
CacheConfig.config_cache(options, system)
MemConfig.config_mem(options, system)
crossbar = None
if len(system.mem_ranges) > 1:
- crossbar = NoncoherentXBar()
+ crossbar = IOXBar()
crossbars.append(crossbar)
dir_cntrl.memory = crossbar.slave
for j in xrange(options.numclusters):
clusters[j].id = j
for cluster in clusters:
- cluster.clusterbus = CoherentXBar(clock=busFrequency)
+ cluster.clusterbus = L2XBar(clock=busFrequency)
all_l1buses += [cluster.clusterbus]
cluster.cpus = [TimingSimpleCPU(cpu_id = i + cluster.id,
clock=options.frequency)
for j in xrange(options.numclusters):
clusters[j].id = j
for cluster in clusters:
- cluster.clusterbus = CoherentXBar(clock=busFrequency)
+ cluster.clusterbus = L2XBar(clock=busFrequency)
all_l1buses += [cluster.clusterbus]
cluster.cpus = [DerivO3CPU(cpu_id = i + cluster.id,
clock=options.frequency)
for j in xrange(options.numclusters):
clusters[j].id = j
for cluster in clusters:
- cluster.clusterbus = CoherentXBar(clock=busFrequency)
+ cluster.clusterbus = L2XBar(clock=busFrequency)
all_l1buses += [cluster.clusterbus]
cluster.cpus = [AtomicSimpleCPU(cpu_id = i + cluster.id,
clock=options.frequency)
# ----------------------
system = System(cpu = all_cpus, l1_ = all_l1s, l1bus_ = all_l1buses,
physmem = SimpleMemory(),
- membus = CoherentXBar(clock = busFrequency))
+ membus = SystemXBar(clock = busFrequency))
system.clock = '1GHz'
-system.toL2bus = CoherentXBar(clock = busFrequency)
+system.toL2bus = L2XBar(clock = busFrequency)
system.l2 = L2(size = options.l2size, assoc = 8)
# ----------------------
# Create a system, and add system wide objects
# ----------------------
system = System(cpu = cpus, physmem = SimpleMemory(),
- membus = CoherentXBar(clock = busFrequency))
+ membus = SystemXBar(clock = busFrequency))
system.clock = '1GHz'
-system.toL2bus = CoherentXBar(clock = busFrequency)
+system.toL2bus = L2XBar(clock = busFrequency)
system.l2 = L2(size = options.l2size, assoc = 8)
# ----------------------
from m5.params import *
from m5.proxy import *
-from XBar import CoherentXBar
+from XBar import L2XBar
from InstTracer import InstTracer
from CPUTracers import ExeTracer
from MemObject import MemObject
def addTwoLevelCacheHierarchy(self, ic, dc, l2c, iwc = None, dwc = None):
self.addPrivateSplitL1Caches(ic, dc, iwc, dwc)
- # Set a width of 32 bytes (256-bits), which is four times that
- # of the default bus. The clock of the CPU is inherited by
- # default.
- self.toL2Bus = CoherentXBar(width = 32)
+ self.toL2Bus = L2XBar()
self.connectCachedPorts(self.toL2Bus)
self.l2cache = l2c
self.toL2Bus.master = self.l2cache.cpu_side
# is the latency involved once a decision is made to forward the
# request. The response latency, is similar to the forward
# latency, but for responses rather than requests.
- frontend_latency = Param.Cycles(3, "Frontend latency")
- forward_latency = Param.Cycles(4, "Forward latency")
- response_latency = Param.Cycles(2, "Response latency")
+ frontend_latency = Param.Cycles("Frontend latency")
+ forward_latency = Param.Cycles("Forward latency")
+ response_latency = Param.Cycles("Response latency")
# Width governing the throughput of the crossbar
- width = Param.Unsigned(8, "Datapath width per port (bytes)")
+ width = Param.Unsigned("Datapath width per port (bytes)")
# The default port can be left unconnected, or be used to connect
# a default slave port
# The coherent crossbar additionally has snoop responses that are
# forwarded after a specific latency.
- snoop_response_latency = Param.Cycles(4, "Snoop response latency")
+ snoop_response_latency = Param.Cycles("Snoop response latency")
# An optional snoop filter
snoop_filter = Param.SnoopFilter(NULL, "Selected snoop filter")
lookup_latency = Param.Cycles(1, "Lookup latency")
system = Param.System(Parent.any, "System that the crossbar belongs to.")
+
+# We use a coherent crossbar to connect multiple masters to the L2
+# caches. Normally this crossbar would be part of the cache itself.
+class L2XBar(CoherentXBar):
+ # 256-bit crossbar by default
+ width = 32
+
+ # Assume that most of this is covered by the cache latencies, with
+ # no more than a single pipeline stage for any packet.
+ frontend_latency = 1
+ forward_latency = 0
+ response_latency = 1
+ snoop_response_latency = 1
+
+# One of the key coherent crossbar instances is the system
+# interconnect, tying together the CPU clusters, GPUs, and any I/O
+# coherent masters, and DRAM controllers.
+class SystemXBar(CoherentXBar):
+ # 128-bit crossbar by default
+ width = 16
+
+ # A handful pipeline stages for each portion of the latency
+ # contributions.
+ frontend_latency = 3
+ forward_latency = 4
+ response_latency = 2
+ snoop_response_latency = 4
+
+# In addition to the system interconnect, we typically also have one
+# or more on-chip I/O crossbars. Note that at some point we might want
+# to also define an off-chip I/O crossbar such as PCIe.
+class IOXBar(NoncoherentXBar):
+ # 128-bit crossbar by default
+ width = 16
+
+ # Assume a simpler datapath than a coherent crossbar, incuring
+ # less pipeline stages for decision making and forwarding of
+ # requests.
+ frontend_latency = 2
+ forward_latency = 1
+ response_latency = 2
Returns:
A bus that CPUs should use to connect to the shared cache.
"""
- system.toL2Bus = CoherentXBar(clk_domain=system.cpu_clk_domain)
+ system.toL2Bus = L2XBar(clk_domain=system.cpu_clk_domain)
system.l2c = L2Cache(clk_domain=system.cpu_clk_domain,
size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.master
def create_system(self):
system = System(physmem = self.mem_class(),
- membus = CoherentXBar(),
+ membus = SystemXBar(),
mem_mode = self.mem_mode)
system.system_port = system.membus.slave
system.physmem.port = system.membus.master
# system simulated
system = System(cpu = cpus,
physmem = SimpleMemory(),
- membus = CoherentXBar(width=16, snoop_filter = SnoopFilter()))
+ membus = SystemXBar(width=16, snoop_filter = SnoopFilter()))
# Dummy voltage domain for all our clock domains
system.voltage_domain = VoltageDomain()
system.clk_domain = SrcClockDomain(clock = '1GHz',
system.cpu_clk_domain = SrcClockDomain(clock = '2GHz',
voltage_domain = system.voltage_domain)
-system.toL2Bus = CoherentXBar(clk_domain = system.cpu_clk_domain, width=16,
- snoop_filter = SnoopFilter())
+system.toL2Bus = L2XBar(clk_domain = system.cpu_clk_domain,
+ snoop_filter = SnoopFilter())
system.l2c = L2Cache(clk_domain = system.cpu_clk_domain, size='64kB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.master
# system simulated
system = System(cpu = cpus,
physmem = SimpleMemory(),
- membus = CoherentXBar(width=16))
+ membus = SystemXBar())
# Dummy voltage domain for all our clock domains
system.voltage_domain = VoltageDomain()
system.clk_domain = SrcClockDomain(clock = '1GHz',
system.cpu_clk_domain = SrcClockDomain(clock = '2GHz',
voltage_domain = system.voltage_domain)
-system.toL2Bus = CoherentXBar(clk_domain = system.cpu_clk_domain, width=16)
+system.toL2Bus = L2XBar(clk_domain = system.cpu_clk_domain)
system.l2c = L2Cache(clk_domain = system.cpu_clk_domain, size='64kB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.master
ruby_memory = ruby_config.generate("TwoLevel_SplitL1UnifiedL2.rb", nb_cores)
# system simulated
-system = System(cpu = cpus, physmem = ruby_memory, membus = CoherentXBar(),
+system = System(cpu = cpus, physmem = ruby_memory, membus = SystemXBar(),
mem_mode = "timing",
clk_domain = SrcClockDomain(clock = '1GHz'))
system = System(cpu = cpu,
physmem = ruby_memory,
- membus = CoherentXBar(),
+ membus = SystemXBar(),
mem_mode = "timing",
clk_domain = SrcClockDomain(clock = '1GHz'))
ruby_memory = ruby_config.generate("TwoLevel_SplitL1UnifiedL2.rb", nb_cores)
# system simulated
-system = System(cpu = cpus, physmem = ruby_memory, membus = CoherentXBar(),
+system = System(cpu = cpus, physmem = ruby_memory, membus = SystemXBar(),
clk_domain = SrcClockDomain(clock = '1GHz'))
# Create a seperate clock domain for components that should run at
# system simulated
system = System(cpu = cpu, physmem = DDR3_1600_x64(),
- membus = NoncoherentXBar(width = 16),
+ membus = IOXBar(width = 16),
clk_domain = SrcClockDomain(clock = '1GHz',
voltage_domain =
VoltageDomain()))
# system simulated
system = System(cpu = cpu, physmem = SimpleMemory(),
- membus = NoncoherentXBar(width = 16),
+ membus = IOXBar(width = 16),
clk_domain = SrcClockDomain(clock = '1GHz',
voltage_domain =
VoltageDomain()))