From 496d5ed3e1f7dad42b0c2ebe0050d84621be8f99 Mon Sep 17 00:00:00 2001 From: Tiago Muck Date: Tue, 19 Feb 2019 15:58:33 -0600 Subject: [PATCH] mem-ruby: Hit latencies defined by the controllers Removed the icache/dcache hit latency parameters from the Sequencer. They were replaced by the mandatory queue enqueue latency that is now defined by the top-level cache controller. By default, the latency is defined by the mandatory_queue_latency parameter. When the latency depends on specific protocol states or on the request type, the protocol may override the mandatoryQueueLatency function. Change-Id: I72e57a7ea49501ef81dc7f591bef14134274647c Signed-off-by: Tiago Muck Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/18413 Tested-by: kokoro Reviewed-by: Nikos Nikoleris Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power --- configs/ruby/GPU_RfO.py | 7 +++---- configs/ruby/MOESI_AMD_Base.py | 7 +++---- .../slicc_interface/AbstractController.cc | 3 ++- .../slicc_interface/AbstractController.hh | 10 ++++++++- src/mem/ruby/slicc_interface/Controller.py | 10 ++++++++- src/mem/ruby/system/GPUCoalescer.cc | 10 ++++----- src/mem/ruby/system/GPUCoalescer.hh | 5 ----- src/mem/ruby/system/GPUCoalescer.py | 1 - src/mem/ruby/system/Sequencer.cc | 21 +++---------------- src/mem/ruby/system/Sequencer.py | 7 +------ 10 files changed, 34 insertions(+), 47 deletions(-) diff --git a/configs/ruby/GPU_RfO.py b/configs/ruby/GPU_RfO.py index c9bda0bc9..1f4df38ea 100644 --- a/configs/ruby/GPU_RfO.py +++ b/configs/ruby/GPU_RfO.py @@ -115,8 +115,6 @@ class CPCntrl(CorePair_Controller, CntrlBase): self.L2cache.create(options) self.sequencer = RubySequencer() - self.sequencer.icache_hit_latency = 2 - self.sequencer.dcache_hit_latency = 2 self.sequencer.version = self.seqCount() self.sequencer.icache = self.L1Icache self.sequencer.dcache = self.L1D0cache @@ -128,12 +126,13 @@ class CPCntrl(CorePair_Controller, CntrlBase): self.sequencer1.version = self.seqCount() self.sequencer1.icache = self.L1Icache self.sequencer1.dcache = self.L1D1cache - self.sequencer1.icache_hit_latency = 2 - self.sequencer1.dcache_hit_latency = 2 self.sequencer1.ruby_system = ruby_system self.sequencer1.coreid = 1 self.sequencer1.is_cpu_sequencer = True + # Defines icache/dcache hit latency + self.mandatory_queue_latency = 2 + self.issue_latency = options.cpu_to_dir_latency self.send_evictions = send_evicts(options) diff --git a/configs/ruby/MOESI_AMD_Base.py b/configs/ruby/MOESI_AMD_Base.py index 5c4bbe09c..a1faf1dfd 100644 --- a/configs/ruby/MOESI_AMD_Base.py +++ b/configs/ruby/MOESI_AMD_Base.py @@ -102,8 +102,6 @@ class CPCntrl(CorePair_Controller, CntrlBase): self.L2cache.create(options) self.sequencer = RubySequencer() - self.sequencer.icache_hit_latency = 2 - self.sequencer.dcache_hit_latency = 2 self.sequencer.version = self.seqCount() self.sequencer.icache = self.L1Icache self.sequencer.dcache = self.L1D0cache @@ -115,12 +113,13 @@ class CPCntrl(CorePair_Controller, CntrlBase): self.sequencer1.version = self.seqCount() self.sequencer1.icache = self.L1Icache self.sequencer1.dcache = self.L1D1cache - self.sequencer1.icache_hit_latency = 2 - self.sequencer1.dcache_hit_latency = 2 self.sequencer1.ruby_system = ruby_system self.sequencer1.coreid = 1 self.sequencer1.is_cpu_sequencer = True + # Defines icache/dcache hit latency + self.mandatory_queue_latency = 2 + self.issue_latency = options.cpu_to_dir_latency self.send_evictions = send_evicts(options) diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc index 68edcba59..c953e8257 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.cc +++ b/src/mem/ruby/slicc_interface/AbstractController.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited + * Copyright (c) 2017,2019 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -55,6 +55,7 @@ AbstractController::AbstractController(const Params *p) m_number_of_TBEs(p->number_of_TBEs), m_transitions_per_cycle(p->transitions_per_cycle), m_buffer_size(p->buffer_size), m_recycle_latency(p->recycle_latency), + m_mandatory_queue_latency(p->mandatory_queue_latency), memoryPort(csprintf("%s.memory", name()), this, ""), addrRanges(p->addr_ranges.begin(), p->addr_ranges.end()) { diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh index 4d0654698..8888bd0a7 100644 --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited + * Copyright (c) 2017,2019 ARM Limited * All rights reserved. * * The license below extends only to copyright in the software and shall @@ -102,6 +102,13 @@ class AbstractController : public ClockedObject, public Consumer virtual Sequencer* getCPUSequencer() const = 0; virtual GPUCoalescer* getGPUCoalescer() const = 0; + // This latency is used by the sequencer when enqueueing requests. + // Different latencies may be used depending on the request type. + // This is the hit latency unless the top-level cache controller + // introduces additional cycles in the response path. + virtual Cycles mandatoryQueueLatency(const RubyRequestType& param_type) + { return m_mandatory_queue_latency; } + //! These functions are used by ruby system to read/write the data blocks //! that exist with in the controller. virtual void functionalRead(const Addr &addr, PacketPtr) = 0; @@ -195,6 +202,7 @@ class AbstractController : public ClockedObject, public Consumer const int m_transitions_per_cycle; const unsigned int m_buffer_size; Cycles m_recycle_latency; + const Cycles m_mandatory_queue_latency; //! Counter for the number of cycles when the transitions carried out //! were equal to the maximum allowed diff --git a/src/mem/ruby/slicc_interface/Controller.py b/src/mem/ruby/slicc_interface/Controller.py index 4d3c1900e..de48929b6 100644 --- a/src/mem/ruby/slicc_interface/Controller.py +++ b/src/mem/ruby/slicc_interface/Controller.py @@ -1,4 +1,4 @@ -# Copyright (c) 2017 ARM Limited +# Copyright (c) 2017,2019 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -61,5 +61,13 @@ class RubyController(ClockedObject): number_of_TBEs = Param.Int(256, "") ruby_system = Param.RubySystem("") + # This is typically a proxy to the icache/dcache hit latency. + # If the latency depends on the request type or protocol-specific states, + # the protocol may ignore this parameter by overriding the + # mandatoryQueueLatency function + mandatory_queue_latency = \ + Param.Cycles(1, "Default latency for requests added to the " \ + "mandatory queue on top-level controllers") + memory = MasterPort("Port for attaching a memory controller") system = Param.System(Parent.any, "system object parameter") diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc index 8b8c9566f..5f8725249 100644 --- a/src/mem/ruby/system/GPUCoalescer.cc +++ b/src/mem/ruby/system/GPUCoalescer.cc @@ -143,8 +143,6 @@ GPUCoalescer::GPUCoalescer(const Params *p) assert(m_instCache_ptr); assert(m_dataCache_ptr); - m_data_cache_hit_latency = p->dcache_hit_latency; - m_runningGarnetStandalone = p->garnet_standalone; assumingRfOCoherence = p->assume_rfo; } @@ -950,12 +948,12 @@ GPUCoalescer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) fatal_if(secondary_type == RubyRequestType_IFETCH, "there should not be any I-Fetch requests in the GPU Coalescer"); - // Send the message to the cache controller - fatal_if(m_data_cache_hit_latency == 0, - "should not have a latency of zero"); + Tick latency = cyclesToTicks( + m_controller->mandatoryQueueLatency(secondary_type)); + assert(latency > 0); assert(m_mandatory_q_ptr); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); } template diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh index 6576ecb36..6e40238c1 100644 --- a/src/mem/ruby/system/GPUCoalescer.hh +++ b/src/mem/ruby/system/GPUCoalescer.hh @@ -266,11 +266,6 @@ class GPUCoalescer : public RubyPort CacheMemory* m_dataCache_ptr; CacheMemory* m_instCache_ptr; - // The cache access latency for this GPU data cache. This is assessed at the - // beginning of each access. This should be very similar to the - // implementation in Sequencer() as this is very much like a Sequencer - Cycles m_data_cache_hit_latency; - // We need to track both the primary and secondary request types. // The secondary request type comprises a subset of RubyRequestTypes that // are understood by the L1 Controller. A primary request type can be any diff --git a/src/mem/ruby/system/GPUCoalescer.py b/src/mem/ruby/system/GPUCoalescer.py index ec6429342..eeb05c42a 100644 --- a/src/mem/ruby/system/GPUCoalescer.py +++ b/src/mem/ruby/system/GPUCoalescer.py @@ -54,4 +54,3 @@ class RubyGPUCoalescer(RubyPort): "max outstanding cycles for a request before " \ "deadlock/livelock declared") garnet_standalone = Param.Bool(False, "") - dcache_hit_latency = Param.Cycles(1, "Data cache hit latency") diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index 41ec6ea6c..a282995da 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -60,8 +60,6 @@ Sequencer::Sequencer(const Params *p) m_instCache_ptr = p->icache; m_dataCache_ptr = p->dcache; - m_data_cache_hit_latency = p->dcache_hit_latency; - m_inst_cache_hit_latency = p->icache_hit_latency; m_max_outstanding_requests = p->max_outstanding_requests; m_deadlock_threshold = p->deadlock_threshold; @@ -70,8 +68,6 @@ Sequencer::Sequencer(const Params *p) assert(m_deadlock_threshold > 0); assert(m_instCache_ptr != NULL); assert(m_dataCache_ptr != NULL); - assert(m_data_cache_hit_latency > 0); - assert(m_inst_cache_hit_latency > 0); m_runningGarnetStandalone = p->garnet_standalone; } @@ -650,23 +646,12 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) printAddress(msg->getPhysicalAddress()), RubyRequestType_to_string(secondary_type)); - // The Sequencer currently assesses instruction and data cache hit latency - // for the top-level caches at the beginning of a memory access. - // TODO: Eventually, this latency should be moved to represent the actual - // cache access latency portion of the memory access. This will require - // changing cache controller protocol files to assess the latency on the - // access response path. - Cycles latency(0); // Initialize to zero to catch misconfigured latency - if (secondary_type == RubyRequestType_IFETCH) - latency = m_inst_cache_hit_latency; - else - latency = m_data_cache_hit_latency; - - // Send the message to the cache controller + Tick latency = cyclesToTicks( + m_controller->mandatoryQueueLatency(secondary_type)); assert(latency > 0); assert(m_mandatory_q_ptr != NULL); - m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(latency)); + m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); } template diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py index 2aede349d..47f51462b 100644 --- a/src/mem/ruby/system/Sequencer.py +++ b/src/mem/ruby/system/Sequencer.py @@ -63,12 +63,7 @@ class RubySequencer(RubyPort): icache = Param.RubyCache("") dcache = Param.RubyCache("") - # Cache latencies currently assessed at the beginning of each access - # NOTE: Setting these values to a value greater than one will result in - # O3 CPU pipeline bubbles and negatively impact performance - # TODO: Latencies should be migrated into each top-level cache controller - icache_hit_latency = Param.Cycles(1, "Inst cache hit latency") - dcache_hit_latency = Param.Cycles(1, "Data cache hit latency") + max_outstanding_requests = Param.Int(16, "max requests (incl. prefetches) outstanding") deadlock_threshold = Param.Cycles(500000, -- 2.30.2