mem-ruby: Hit latencies defined by the controllers

author Tiago Muck <tiago.muck@arm.com>

Tue, 19 Feb 2019 21:58:33 +0000 (15:58 -0600)

committer Tiago Mück <tiago.muck@arm.com>

Tue, 14 May 2019 22:01:12 +0000 (22:01 +0000)
author Tiago Muck <tiago.muck@arm.com>
Tue, 19 Feb 2019 21:58:33 +0000 (15:58 -0600)
committer Tiago Mück <tiago.muck@arm.com>
Tue, 14 May 2019 22:01:12 +0000 (22:01 +0000)
diff --git a/configs/ruby/GPU_RfO.py b/configs/ruby/GPU_RfO.py

index c9bda0bc998415c770b0858db7a281ac642b9e61..1f4df38eaa956846a9caf4e1bb99a57b17c74d1f 100644 (file)
--- a/configs/ruby/GPU_RfO.py
+++ b/configs/ruby/GPU_RfO.py
@@ -115,8 +115,6 @@ class CPCntrl(CorePair_Controller, CntrlBase):
          self.L2cache.create(options)
  
          self.sequencer = RubySequencer()
-        self.sequencer.icache_hit_latency = 2
-        self.sequencer.dcache_hit_latency = 2
          self.sequencer.version = self.seqCount()
          self.sequencer.icache = self.L1Icache
          self.sequencer.dcache = self.L1D0cache
@@ -128,12 +126,13 @@ class CPCntrl(CorePair_Controller, CntrlBase):
          self.sequencer1.version = self.seqCount()
          self.sequencer1.icache = self.L1Icache
          self.sequencer1.dcache = self.L1D1cache
-        self.sequencer1.icache_hit_latency = 2
-        self.sequencer1.dcache_hit_latency = 2
          self.sequencer1.ruby_system = ruby_system
          self.sequencer1.coreid = 1
          self.sequencer1.is_cpu_sequencer = True
  
+        # Defines icache/dcache hit latency
+        self.mandatory_queue_latency = 2
+
          self.issue_latency = options.cpu_to_dir_latency
          self.send_evictions = send_evicts(options)
  
diff --git a/configs/ruby/MOESI_AMD_Base.py b/configs/ruby/MOESI_AMD_Base.py

index 5c4bbe09c61e182bc7d1bd07be9cf8f5efd830e2..a1faf1dfd89eae3238741896ba6f8c51b1b3fdea 100644 (file)
--- a/configs/ruby/MOESI_AMD_Base.py
+++ b/configs/ruby/MOESI_AMD_Base.py
@@ -102,8 +102,6 @@ class CPCntrl(CorePair_Controller, CntrlBase):
          self.L2cache.create(options)
  
          self.sequencer = RubySequencer()
-        self.sequencer.icache_hit_latency = 2
-        self.sequencer.dcache_hit_latency = 2
          self.sequencer.version = self.seqCount()
          self.sequencer.icache = self.L1Icache
          self.sequencer.dcache = self.L1D0cache
@@ -115,12 +113,13 @@ class CPCntrl(CorePair_Controller, CntrlBase):
          self.sequencer1.version = self.seqCount()
          self.sequencer1.icache = self.L1Icache
          self.sequencer1.dcache = self.L1D1cache
-        self.sequencer1.icache_hit_latency = 2
-        self.sequencer1.dcache_hit_latency = 2
          self.sequencer1.ruby_system = ruby_system
          self.sequencer1.coreid = 1
          self.sequencer1.is_cpu_sequencer = True
  
+        # Defines icache/dcache hit latency
+        self.mandatory_queue_latency = 2
+
          self.issue_latency = options.cpu_to_dir_latency
          self.send_evictions = send_evicts(options)
  
diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc

index 68edcba595b56cb69495de01e405f61d70246362..c953e82571c78489b12e2d8e92debb22456c8962 100644 (file)
--- a/src/mem/ruby/slicc_interface/AbstractController.cc
+++ b/src/mem/ruby/slicc_interface/AbstractController.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017 ARM Limited
+ * Copyright (c) 2017,2019 ARM Limited
   * All rights reserved.
   *
   * The license below extends only to copyright in the software and shall
@@ -55,6 +55,7 @@ AbstractController::AbstractController(const Params *p)
        m_number_of_TBEs(p->number_of_TBEs),
        m_transitions_per_cycle(p->transitions_per_cycle),
        m_buffer_size(p->buffer_size), m_recycle_latency(p->recycle_latency),
+      m_mandatory_queue_latency(p->mandatory_queue_latency),
        memoryPort(csprintf("%s.memory", name()), this, ""),
        addrRanges(p->addr_ranges.begin(), p->addr_ranges.end())
  {
diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh

index 4d065469802e6dc83d5747f952b7e1b4ad305417..8888bd0a7df0d7c1298104e009a2e3f4cee67ee4 100644 (file)
--- a/src/mem/ruby/slicc_interface/AbstractController.hh
+++ b/src/mem/ruby/slicc_interface/AbstractController.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017 ARM Limited
+ * Copyright (c) 2017,2019 ARM Limited
   * All rights reserved.
   *
   * The license below extends only to copyright in the software and shall
@@ -102,6 +102,13 @@ class AbstractController : public ClockedObject, public Consumer
      virtual Sequencer* getCPUSequencer() const = 0;
      virtual GPUCoalescer* getGPUCoalescer() const = 0;
  
+    // This latency is used by the sequencer when enqueueing requests.
+    // Different latencies may be used depending on the request type.
+    // This is the hit latency unless the top-level cache controller
+    // introduces additional cycles in the response path.
+    virtual Cycles mandatoryQueueLatency(const RubyRequestType& param_type)
+    { return m_mandatory_queue_latency; }
+
      //! These functions are used by ruby system to read/write the data blocks
      //! that exist with in the controller.
      virtual void functionalRead(const Addr &addr, PacketPtr) = 0;
@@ -195,6 +202,7 @@ class AbstractController : public ClockedObject, public Consumer
      const int m_transitions_per_cycle;
      const unsigned int m_buffer_size;
      Cycles m_recycle_latency;
+    const Cycles m_mandatory_queue_latency;
  
      //! Counter for the number of cycles when the transitions carried out
      //! were equal to the maximum allowed
diff --git a/src/mem/ruby/slicc_interface/Controller.py b/src/mem/ruby/slicc_interface/Controller.py

index 4d3c1900e1a9304a90a36a082670bc0bf3249034..de48929b6eb189818ca1198f6e42cdb57edf3800 100644 (file)
--- a/src/mem/ruby/slicc_interface/Controller.py
+++ b/src/mem/ruby/slicc_interface/Controller.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2017 ARM Limited
+# Copyright (c) 2017,2019 ARM Limited
  # All rights reserved.
  #
  # The license below extends only to copyright in the software and shall
@@ -61,5 +61,13 @@ class RubyController(ClockedObject):
      number_of_TBEs = Param.Int(256, "")
      ruby_system = Param.RubySystem("")
  
+    # This is typically a proxy to the icache/dcache hit latency.
+    # If the latency depends on the request type or protocol-specific states,
+    # the protocol may ignore this parameter by overriding the
+    # mandatoryQueueLatency function
+    mandatory_queue_latency = \
+        Param.Cycles(1, "Default latency for requests added to the " \
+                        "mandatory queue on top-level controllers")
+
      memory = MasterPort("Port for attaching a memory controller")
      system = Param.System(Parent.any, "system object parameter")
diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc

index 8b8c9566f6c36147b4a2df318dd080ef0e0940c3..5f8725249a204065917f2329fbf921e85a25318a 100644 (file)
--- a/src/mem/ruby/system/GPUCoalescer.cc
+++ b/src/mem/ruby/system/GPUCoalescer.cc
@@ -143,8 +143,6 @@ GPUCoalescer::GPUCoalescer(const Params *p)
      assert(m_instCache_ptr);
      assert(m_dataCache_ptr);
  
-    m_data_cache_hit_latency = p->dcache_hit_latency;
-
      m_runningGarnetStandalone = p->garnet_standalone;
      assumingRfOCoherence = p->assume_rfo;
  }
@@ -950,12 +948,12 @@ GPUCoalescer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
      fatal_if(secondary_type == RubyRequestType_IFETCH,
               "there should not be any I-Fetch requests in the GPU Coalescer");
  
-    // Send the message to the cache controller
-    fatal_if(m_data_cache_hit_latency == 0,
-             "should not have a latency of zero");
+    Tick latency = cyclesToTicks(
+                        m_controller->mandatoryQueueLatency(secondary_type));
+    assert(latency > 0);
  
      assert(m_mandatory_q_ptr);
-    m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
+    m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
  }
  
  template <class KEY, class VALUE>
diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh

index 6576ecb36c24b846ffc6449dbf98dc461ce08442..6e40238c13c95885c1f7c7a8fcf987ed731fe5a8 100644 (file)
--- a/src/mem/ruby/system/GPUCoalescer.hh
+++ b/src/mem/ruby/system/GPUCoalescer.hh
@@ -266,11 +266,6 @@ class GPUCoalescer : public RubyPort
      CacheMemory* m_dataCache_ptr;
      CacheMemory* m_instCache_ptr;
  
-    // The cache access latency for this GPU data cache. This is assessed at the
-    // beginning of each access. This should be very similar to the
-    // implementation in Sequencer() as this is very much like a Sequencer
-    Cycles m_data_cache_hit_latency;
-
      // We need to track both the primary and secondary request types.
      // The secondary request type comprises a subset of RubyRequestTypes that
      // are understood by the L1 Controller. A primary request type can be any
diff --git a/src/mem/ruby/system/GPUCoalescer.py b/src/mem/ruby/system/GPUCoalescer.py

index ec6429342f22b11f55976a0404188afb89d93cac..eeb05c42a1f0454cfa43e09828fc52a560c235bf 100644 (file)
--- a/src/mem/ruby/system/GPUCoalescer.py
+++ b/src/mem/ruby/system/GPUCoalescer.py
@@ -54,4 +54,3 @@ class RubyGPUCoalescer(RubyPort):
         "max outstanding cycles for a request before " \
         "deadlock/livelock declared")
     garnet_standalone = Param.Bool(False, "")
-   dcache_hit_latency = Param.Cycles(1, "Data cache hit latency")
diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc

index 41ec6ea6c9c1ae6be6c6b789b0ebb32ac926996f..a282995daa7aeb78a52ba6137b4eb751436002a6 100644 (file)
--- a/src/mem/ruby/system/Sequencer.cc
+++ b/src/mem/ruby/system/Sequencer.cc
@@ -60,8 +60,6 @@ Sequencer::Sequencer(const Params *p)
  
      m_instCache_ptr = p->icache;
      m_dataCache_ptr = p->dcache;
-    m_data_cache_hit_latency = p->dcache_hit_latency;
-    m_inst_cache_hit_latency = p->icache_hit_latency;
      m_max_outstanding_requests = p->max_outstanding_requests;
      m_deadlock_threshold = p->deadlock_threshold;
  
@@ -70,8 +68,6 @@ Sequencer::Sequencer(const Params *p)
      assert(m_deadlock_threshold > 0);
      assert(m_instCache_ptr != NULL);
      assert(m_dataCache_ptr != NULL);
-    assert(m_data_cache_hit_latency > 0);
-    assert(m_inst_cache_hit_latency > 0);
  
      m_runningGarnetStandalone = p->garnet_standalone;
  }
@@ -650,23 +646,12 @@ Sequencer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
              printAddress(msg->getPhysicalAddress()),
              RubyRequestType_to_string(secondary_type));
  
-    // The Sequencer currently assesses instruction and data cache hit latency
-    // for the top-level caches at the beginning of a memory access.
-    // TODO: Eventually, this latency should be moved to represent the actual
-    // cache access latency portion of the memory access. This will require
-    // changing cache controller protocol files to assess the latency on the
-    // access response path.
-    Cycles latency(0);  // Initialize to zero to catch misconfigured latency
-    if (secondary_type == RubyRequestType_IFETCH)
-        latency = m_inst_cache_hit_latency;
-    else
-        latency = m_data_cache_hit_latency;
-
-    // Send the message to the cache controller
+    Tick latency = cyclesToTicks(
+                        m_controller->mandatoryQueueLatency(secondary_type));
      assert(latency > 0);
  
      assert(m_mandatory_q_ptr != NULL);
-    m_mandatory_q_ptr->enqueue(msg, clockEdge(), cyclesToTicks(latency));
+    m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
  }
  
  template <class KEY, class VALUE>
diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py

index 2aede349d2c42b7a61e42c60f491e4abb53b0385..47f51462b02fda6f66161eec7690e66daf18b7c8 100644 (file)
--- a/src/mem/ruby/system/Sequencer.py
+++ b/src/mem/ruby/system/Sequencer.py
@@ -63,12 +63,7 @@ class RubySequencer(RubyPort):
  
     icache = Param.RubyCache("")
     dcache = Param.RubyCache("")
-   # Cache latencies currently assessed at the beginning of each access
-   # NOTE: Setting these values to a value greater than one will result in
-   # O3 CPU pipeline bubbles and negatively impact performance
-   # TODO: Latencies should be migrated into each top-level cache controller
-   icache_hit_latency = Param.Cycles(1, "Inst cache hit latency")
-   dcache_hit_latency = Param.Cycles(1, "Data cache hit latency")
+
     max_outstanding_requests = Param.Int(16,
         "max requests (incl. prefetches) outstanding")
     deadlock_threshold = Param.Cycles(500000,
author	Tiago Muck <tiago.muck@arm.com>
	Tue, 19 Feb 2019 21:58:33 +0000 (15:58 -0600)
committer	Tiago Mück <tiago.muck@arm.com>
	Tue, 14 May 2019 22:01:12 +0000 (22:01 +0000)
configs/ruby/GPU_RfO.py		patch \| blob \| history
configs/ruby/MOESI_AMD_Base.py		patch \| blob \| history
src/mem/ruby/slicc_interface/AbstractController.cc		patch \| blob \| history
src/mem/ruby/slicc_interface/AbstractController.hh		patch \| blob \| history
src/mem/ruby/slicc_interface/Controller.py		patch \| blob \| history
src/mem/ruby/system/GPUCoalescer.cc		patch \| blob \| history
src/mem/ruby/system/GPUCoalescer.hh		patch \| blob \| history
src/mem/ruby/system/GPUCoalescer.py		patch \| blob \| history
src/mem/ruby/system/Sequencer.cc		patch \| blob \| history
src/mem/ruby/system/Sequencer.py		patch \| blob \| history