gpu-compute: Fix Python/C++ object hierarchy discrepancies
authorAndreas Sandberg <andreas.sandberg@arm.com>
Mon, 27 Feb 2017 13:17:51 +0000 (13:17 +0000)
committerAndreas Sandberg <andreas.sandberg@arm.com>
Tue, 7 Mar 2017 11:50:35 +0000 (11:50 +0000)
The GPUCoalescer and the Shader classes have different base classes in
C++ and Python. This causes subtle bugs in SWIG and compilation errors
for PyBind.

Change-Id: I1ddd2a8ea43f083470538ddfea891347b21d14d8
Reviewed-by: Andreas Hansson <andreas.hansson@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/2228
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
Reviewed-by: Tony Gutierrez <anthony.gutierrez@amd.com>
Reviewed-by: Pierre-Yves PĂ©neau <pierre-yves.peneau@lirmm.fr>
Reviewed-by: Bradford Beckmann <brad.beckmann@amd.com>
src/gpu-compute/shader.cc
src/gpu-compute/shader.hh
src/mem/ruby/system/GPUCoalescer.hh
src/mem/ruby/system/GPUCoalescer.py

index 6deaaab9462a22bec5f4c5503e6a5559cbce67bb..6c328a7fd84bcb98578e2219cd7e5ea7d11b74ef 100644 (file)
@@ -50,7 +50,7 @@
 #include "mem/ruby/system/RubySystem.hh"
 #include "sim/sim_exit.hh"
 
-Shader::Shader(const Params *p) : SimObject(p),
+Shader::Shader(const Params *p) : ClockedObject(p),
     clock(p->clk_domain->clockPeriod()), cpuThread(nullptr), gpuTc(nullptr),
     cpuPointer(p->cpu_pointer), tickEvent(this), timingSim(p->timing),
     hsail_mode(SIMT), impl_kern_boundary_sync(p->impl_kern_boundary_sync),
index 13afab977d48d7cf8fe6c66c498dd45601b11dbe..55c3feef9a819fbb63654067d188620891c0c206 100644 (file)
@@ -73,7 +73,7 @@ static const int LDS_SIZE = 65536;
 // Class Shader: This describes a single shader instance. Most
 // configurations will only have a single shader.
 
-class Shader : public SimObject
+class Shader : public ClockedObject
 {
   protected:
       // Shader's clock period in terms of number of ticks of curTime,
index 557d39235bdc718eca0b313454dcd584bd731b46..2b42e19331f083664efbfb99a3da014c0697f6dc 100644 (file)
@@ -49,7 +49,7 @@
 #include "mem/request.hh"
 #include "mem/ruby/common/Address.hh"
 #include "mem/ruby/common/Consumer.hh"
-#include "mem/ruby/system/RubyPort.hh"
+#include "mem/ruby/system/Sequencer.hh"
 
 class DataBlock;
 class CacheMsg;
@@ -255,10 +255,6 @@ class GPUCoalescer : public RubyPort
 
     bool handleLlsc(Addr address, GPUCoalescerRequest* request);
 
-    // Private copy constructor and assignment operator
-    GPUCoalescer(const GPUCoalescer& obj);
-    GPUCoalescer& operator=(const GPUCoalescer& obj);
-
     class IssueEvent : public Event
     {
       private:
@@ -370,6 +366,11 @@ class GPUCoalescer : public RubyPort
     std::vector<Stats::Histogram *> m_InitialToForwardDelayHist;
     std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHist;
     std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHist;
+
+private:
+    // Private copy constructor and assignment operator
+    GPUCoalescer(const GPUCoalescer& obj);
+    GPUCoalescer& operator=(const GPUCoalescer& obj);
 };
 
 inline std::ostream&
index 0c19f875d1164523e1de3fabaa2db871684baa78..101a5fe30f30dd2884a7251cb07077e62d4a72be 100644 (file)
@@ -36,7 +36,7 @@ from m5.params import *
 from m5.proxy import *
 from Sequencer import *
 
-class RubyGPUCoalescer(RubySequencer):
+class RubyGPUCoalescer(RubyPort):
    type = 'RubyGPUCoalescer'
    cxx_class = 'GPUCoalescer'
    cxx_header = "mem/ruby/system/GPUCoalescer.hh"
@@ -46,3 +46,11 @@ class RubyGPUCoalescer(RubySequencer):
                                 "max requests (incl. prefetches) outstanding")
    assume_rfo = Param.Bool(True, "assume protocol implementes Read for "
                            "Ownership coherence");
+
+   icache = Param.RubyCache("")
+   dcache = Param.RubyCache("")
+   deadlock_threshold = Param.Cycles(500000,
+       "max outstanding cycles for a request before " \
+       "deadlock/livelock declared")
+   garnet_standalone = Param.Bool(False, "")
+   dcache_hit_latency = Param.Cycles(1, "Data cache hit latency")