ruby: Detect garnet network-level deadlock.
authorJieming Yin <jieming.yin@amd.com>
Thu, 15 Dec 2016 21:59:17 +0000 (16:59 -0500)
committerJieming Yin <jieming.yin@amd.com>
Thu, 15 Dec 2016 21:59:17 +0000 (16:59 -0500)
This patch detects garnet network deadlock by monitoring
network interfaces. If a network interface continuously
fails to allocate virtual channels for a message, a
possible deadlock is detected.

configs/network/Network.py
src/mem/ruby/network/garnet2.0/GarnetNetwork.py
src/mem/ruby/network/garnet2.0/NetworkInterface.cc
src/mem/ruby/network/garnet2.0/NetworkInterface.hh

index 3c15a4f79acdf961b3b3b4d474190fcea7151ea1..1fbb38167060ed3cb670586d41f8fad415ae3960 100644 (file)
@@ -70,6 +70,9 @@ def define_options(parser):
                       default=False,
                       help="""enable network fault model:
                             see src/mem/ruby/network/fault_model/""")
+    parser.add_option("--garnet-deadlock-threshold", action="store",
+                      type="int", default=50000,
+                      help="network-level deadlock threshold.")
 
 
 def create_network(options, ruby):
@@ -103,6 +106,7 @@ def init_network(options, network, InterfaceClass):
         network.vcs_per_vnet = options.vcs_per_vnet
         network.ni_flit_size = options.link_width_bits / 8
         network.routing_algorithm = options.routing_algorithm
+        network.garnet_deadlock_threshold = options.garnet_deadlock_threshold
 
     if options.network == "simple":
         network.setup_buffers()
index 70453278280ef81e9db1fa50b3ad4fcd06a39d60..00213d60f44c385adb1f1213e2d128bb81fb2d06 100644 (file)
@@ -46,6 +46,8 @@ class GarnetNetwork(RubyNetwork):
         "0: Weight-based Table, 1: XY, 2: Custom");
     enable_fault_model = Param.Bool(False, "enable network fault model");
     fault_model = Param.FaultModel(NULL, "network fault model");
+    garnet_deadlock_threshold = Param.UInt32(50000,
+                              "network-level deadlock threshold")
 
 class GarnetNetworkInterface(ClockedObject):
     type = 'GarnetNetworkInterface'
@@ -57,6 +59,8 @@ class GarnetNetworkInterface(ClockedObject):
                              "virtual channels per virtual network")
     virt_nets = Param.UInt32(Parent.number_of_virtual_networks,
                           "number of virtual networks")
+    garnet_deadlock_threshold = Param.UInt32(Parent.garnet_deadlock_threshold,
+                                      "network-level deadlock threshold")
 
 class GarnetRouter(BasicRouter):
     type = 'GarnetRouter'
index 6bdaf39af0cfe440d818690126dae0324d2d245e..0ac55d2605fb255d423cdaea18eda9edc1a9973c 100644 (file)
@@ -50,7 +50,9 @@ using m5::stl_helpers::deletePointers;
 NetworkInterface::NetworkInterface(const Params *p)
     : ClockedObject(p), Consumer(this), m_id(p->id),
       m_virtual_networks(p->virt_nets), m_vc_per_vnet(p->vcs_per_vnet),
-      m_num_vcs(m_vc_per_vnet * m_virtual_networks)
+      m_num_vcs(m_vc_per_vnet * m_virtual_networks),
+      m_deadlock_threshold(p->garnet_deadlock_threshold),
+      vc_busy_counter(m_virtual_networks, 0)
 {
     m_router_id = -1;
     m_vc_round_robin = 0;
@@ -314,9 +316,16 @@ NetworkInterface::calculateVC(int vnet)
 
         if (m_out_vc_state[(vnet*m_vc_per_vnet) + delta]->isInState(
                     IDLE_, curCycle())) {
+            vc_busy_counter[vnet] = 0;
             return ((vnet*m_vc_per_vnet) + delta);
         }
     }
+
+    vc_busy_counter[vnet] += 1;
+    panic_if(vc_busy_counter[vnet] > m_deadlock_threshold,
+        "%s: Possible network deadlock in vnet: %d at time: %llu \n",
+        name(), vnet, curTick());
+
     return -1;
 }
 
index 85e0145af1a2c115b6234a3582c63382ae377203..f1d1fd50507e87538ad08d36b4afd5fe4d29b608 100644 (file)
@@ -83,6 +83,7 @@ class NetworkInterface : public ClockedObject, public Consumer
     int m_vc_round_robin; // For round robin scheduling
     flitBuffer *outFlitQueue; // For modeling link contention
     flitBuffer *outCreditQueue;
+    int m_deadlock_threshold;
 
     NetworkLink *inNetLink;
     NetworkLink *outNetLink;
@@ -98,6 +99,8 @@ class NetworkInterface : public ClockedObject, public Consumer
     std::vector<MessageBuffer *> inNode_ptr;
     // The Message buffers that provides messages to the protocol
     std::vector<MessageBuffer *> outNode_ptr;
+    // When a vc stays busy for a long time, it indicates a deadlock
+    std::vector<int> vc_busy_counter;
 
     bool flitisizeMessage(MsgPtr msg_ptr, int vnet);
     int calculateVC(int vnet);