ruby: made Locked read/write atomic requests within ruby
[gem5.git] / src / mem / ruby / system / Sequencer.cc
index db60bb11a1986f33061da4a12f9931037a7bdb0e..a342550eff7498ad62a7cc10771dee00bfd8d062 100644 (file)
@@ -43,6 +43,8 @@
 
 //Sequencer::Sequencer(int core_id, MessageBuffer* mandatory_q)
 
+#define LLSC_FAIL -2
+
 Sequencer::Sequencer(const string & name)
   :RubyPort(name)
 {
@@ -59,6 +61,8 @@ void Sequencer::init(const vector<string> & argv)
   m_instCache_ptr = NULL;
   m_dataCache_ptr = NULL;
   m_controller = NULL;
+  m_servicing_atomic = 200;
+  m_atomics_counter = 0;
   for (size_t i=0; i<argv.size(); i+=2) {
     if ( argv[i] == "controller") {
       m_controller = RubySystem::getController(argv[i+1]); // args[i] = "L1Cache"
@@ -104,6 +108,7 @@ void Sequencer::wakeup() {
       WARN_MSG("Possible Deadlock detected");
       WARN_EXPR(request);
       WARN_EXPR(m_version);
+      WARN_EXPR(request->ruby_request.paddr);
       WARN_EXPR(keys.size());
       WARN_EXPR(current_time);
       WARN_EXPR(request->issue_time);
@@ -202,7 +207,9 @@ bool Sequencer::insertRequest(SequencerRequest* request) {
   line_addr.makeLineAddress();
   if ((request->ruby_request.type == RubyRequestType_ST) ||
       (request->ruby_request.type == RubyRequestType_RMW_Read) ||  
-      (request->ruby_request.type == RubyRequestType_RMW_Write)) {
+      (request->ruby_request.type == RubyRequestType_RMW_Write) ||  
+      (request->ruby_request.type == RubyRequestType_Locked_Read) ||  
+      (request->ruby_request.type == RubyRequestType_Locked_Write)) {
     if (m_writeRequestTable.exist(line_addr)) {
       m_writeRequestTable.lookup(line_addr) = request;
       //      return true;
@@ -238,8 +245,10 @@ void Sequencer::removeRequest(SequencerRequest* srequest) {
   Address line_addr(ruby_request.paddr);
   line_addr.makeLineAddress();
   if ((ruby_request.type == RubyRequestType_ST) ||
-      (ruby_request.type == RubyRequestType_RMW_Read) ||
-      (ruby_request.type == RubyRequestType_RMW_Write)) {
+      (ruby_request.type == RubyRequestType_RMW_Read) ||  
+      (ruby_request.type == RubyRequestType_RMW_Write) ||  
+      (ruby_request.type == RubyRequestType_Locked_Read) ||
+      (ruby_request.type == RubyRequestType_Locked_Write)) {
     m_writeRequestTable.deallocate(line_addr);
   } else {
     m_readRequestTable.deallocate(line_addr);
@@ -258,12 +267,20 @@ void Sequencer::writeCallback(const Address& address, DataBlock& data) {
   removeRequest(request);
 
   assert((request->ruby_request.type == RubyRequestType_ST) ||
-         (request->ruby_request.type == RubyRequestType_RMW_Read) ||
-         (request->ruby_request.type == RubyRequestType_RMW_Write));
+         (request->ruby_request.type == RubyRequestType_RMW_Read) ||  
+         (request->ruby_request.type == RubyRequestType_RMW_Write) ||  
+         (request->ruby_request.type == RubyRequestType_Locked_Read) ||
+         (request->ruby_request.type == RubyRequestType_Locked_Write));
   // POLINA: the assumption is that atomics are only on data cache and not instruction cache
-  if (request->ruby_request.type == RubyRequestType_RMW_Read) {
+  if (request->ruby_request.type == RubyRequestType_Locked_Read) {
     m_dataCache_ptr->setLocked(address, m_version);
   }
+  else if (request->ruby_request.type == RubyRequestType_RMW_Read) {
+    m_controller->set_atomic(address);
+  }
+  else if (request->ruby_request.type == RubyRequestType_RMW_Write) {
+    m_controller->clear_atomic();
+  }
 
   hitCallback(request, data);
 }
@@ -277,6 +294,7 @@ void Sequencer::readCallback(const Address& address, DataBlock& data) {
   removeRequest(request);
 
   assert((request->ruby_request.type == RubyRequestType_LD) ||
+        (request->ruby_request.type == RubyRequestType_RMW_Read) ||
          (request->ruby_request.type == RubyRequestType_IFETCH));
 
   hitCallback(request, data);
@@ -320,19 +338,29 @@ void Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data) {
   // update the data
   if (ruby_request.data != NULL) {
     if ((type == RubyRequestType_LD) ||
-        (type == RubyRequestType_IFETCH)) {
+        (type == RubyRequestType_IFETCH) ||
+        (type == RubyRequestType_RMW_Read)) {
       memcpy(ruby_request.data, data.getData(request_address.getOffset(), ruby_request.len), ruby_request.len);
     } else {
       data.setData(ruby_request.data, request_address.getOffset(), ruby_request.len);
     }
   }
-
+  if (type == RubyRequestType_RMW_Write) {
+    if (m_servicing_atomic != ruby_request.proc_id) {
+      assert(0);
+    }
+    assert(m_atomics_counter > 0);
+    m_atomics_counter--;
+    if (m_atomics_counter == 0) {
+      m_servicing_atomic = 200;
+    }
+  }
   m_hit_callback(srequest->id);
   delete srequest;
 }
 
 // Returns true if the sequencer already has a load or store outstanding
-bool Sequencer::isReady(const RubyRequest& request) const {
+bool Sequencer::isReady(const RubyRequest& request, bool dont_set) {
   // POLINA: check if we are currently flushing the write buffer, if so Ruby is returned as not ready
   // to simulate stalling of the front-end
   // Do we stall all the sequencers? If it is atomic instruction - yes!
@@ -347,6 +375,34 @@ bool Sequencer::isReady(const RubyRequest& request) const {
     return false;
   }
 
+  assert(request.proc_id != 100);
+  if (m_servicing_atomic != 200 && m_servicing_atomic != request.proc_id) {
+    assert(m_atomics_counter > 0);
+    return false;
+  }
+  else {
+    if (!dont_set) {
+      if (request.type == RubyRequestType_RMW_Read) {
+        if (m_servicing_atomic == 200) {
+          assert(m_atomics_counter == 0);
+          m_servicing_atomic = request.proc_id;
+        }
+        else {
+          assert(m_servicing_atomic == request.proc_id);
+        }
+        m_atomics_counter++;
+      }
+      else {
+        if (m_servicing_atomic == request.proc_id) {
+          if (request.type != RubyRequestType_RMW_Write) {
+            m_servicing_atomic = 200;
+            m_atomics_counter = 0;
+          }
+        }
+      }
+    }
+  }
+
   return true;
 }
 
@@ -355,7 +411,6 @@ bool Sequencer::empty() const {
 }
 
 
-// -2 means that the LLSC failed
 int64_t Sequencer::makeRequest(const RubyRequest & request)
 {
   assert(Address(request.paddr).getOffset() + request.len <= RubySystem::getBlockSizeBytes());
@@ -363,19 +418,28 @@ int64_t Sequencer::makeRequest(const RubyRequest & request)
     int64_t id = makeUniqueRequestID();
     SequencerRequest *srequest = new SequencerRequest(request, id, g_eventQueue_ptr->getTime());
     bool found = insertRequest(srequest);
-    if (!found)
-      if (request.type == RubyRequestType_RMW_Write) {
+    if (!found) {
+      if (request.type == RubyRequestType_Locked_Write) {
+        // NOTE: it is OK to check the locked flag here as the mandatory queue will be checked first
+        // ensuring that nothing comes between checking the flag and servicing the store
         if (!m_dataCache_ptr->isLocked(line_address(Address(request.paddr)), m_version)) {
-          return -2;
+          return LLSC_FAIL;
         }
         else {
           m_dataCache_ptr->clearLocked(line_address(Address(request.paddr)));
         }
       }
+      if (request.type == RubyRequestType_RMW_Write) {
+        m_controller->started_writes();
+      }
       issueRequest(request);
 
     // TODO: issue hardware prefetches here
     return id;
+    }
+    else {
+      assert(0);
+    }
   }
   else {
     return -1;
@@ -396,9 +460,9 @@ void Sequencer::issueRequest(const RubyRequest& request) {
   case RubyRequestType_ST:
     ctype = CacheRequestType_ST;
     break;
+  case RubyRequestType_Locked_Read:
+  case RubyRequestType_Locked_Write:
   case RubyRequestType_RMW_Read:
-    ctype = CacheRequestType_ATOMIC;
-    break;
   case RubyRequestType_RMW_Write:
     ctype = CacheRequestType_ATOMIC;
     break;
@@ -421,7 +485,7 @@ void Sequencer::issueRequest(const RubyRequest& request) {
   }
   Address line_addr(request.paddr);
   line_addr.makeLineAddress();
-  CacheMsg msg(line_addr, Address(request.paddr), ctype, Address(request.pc), amtype, request.len, PrefetchBit_No);
+  CacheMsg msg(line_addr, Address(request.paddr), ctype, Address(request.pc), amtype, request.len, PrefetchBit_No, request.proc_id);
 
   if (Debug::getProtocolTrace()) {
     g_system_ptr->getProfiler()->profileTransition("Seq", m_version, Address(request.paddr),
@@ -475,183 +539,3 @@ void Sequencer::checkCoherence(const Address& addr) {
 #endif
 }
 
-/*
-bool Sequencer::getRubyMemoryValue(const Address& addr, char* value,
-                                   unsigned int size_in_bytes )
-{
-    bool found = false;
-    const Address lineAddr = line_address(addr);
-    DataBlock data;
-    PhysAddress paddr(addr);
-    DataBlock* dataPtr = &data;
-
-    MachineID l2_mach = map_L2ChipId_to_L2Cache(addr, m_chip_ptr->getID() );
-    int l2_ver = l2_mach.num%RubyConfig::numberOfL2CachePerChip();
-
-    if (Protocol::m_TwoLevelCache) {
-      if(Protocol::m_CMP){
-        assert(n->m_L2Cache_L2cacheMemory_vec[l2_ver] != NULL);
-      }
-      else{
-        assert(n->m_L1Cache_cacheMemory_vec[m_version] != NULL);
-      }
-    }
-
-    if (n->m_L1Cache_L1IcacheMemory_vec[m_version]->tryCacheAccess(lineAddr, CacheRequestType_IFETCH, dataPtr)){
-      n->m_L1Cache_L1IcacheMemory_vec[m_version]->getMemoryValue(addr, value, size_in_bytes);
-      found = true;
-    } else if (n->m_L1Cache_L1DcacheMemory_vec[m_version]->tryCacheAccess(lineAddr, CacheRequestType_LD, dataPtr)){
-      n->m_L1Cache_L1DcacheMemory_vec[m_version]->getMemoryValue(addr, value, size_in_bytes);
-      found = true;
-    } else if (Protocol::m_CMP && n->m_L2Cache_L2cacheMemory_vec[l2_ver]->tryCacheAccess(lineAddr, CacheRequestType_LD, dataPtr)){
-      n->m_L2Cache_L2cacheMemory_vec[l2_ver]->getMemoryValue(addr, value, size_in_bytes);
-      found = true;
-    // } else if (n->TBE_TABLE_MEMBER_VARIABLE->isPresent(lineAddr)){
-//       ASSERT(n->TBE_TABLE_MEMBER_VARIABLE->isPresent(lineAddr));
-//       L1Cache_TBE tbeEntry = n->TBE_TABLE_MEMBER_VARIABLE->lookup(lineAddr);
-
-//       int offset = addr.getOffset();
-//       for(int i=0; i<size_in_bytes; ++i){
-//         value[i] = tbeEntry.getDataBlk().getByte(offset + i);
-//       }
-
-//       found = true;
-    } else {
-      // Address not found
-      //cout << "  " << m_chip_ptr->getID() << " NOT IN CACHE, Value at Directory is: " << (int) value[0] << endl;
-      n = dynamic_cast<Chip*>(g_system_ptr->getChip(map_Address_to_DirectoryNode(addr)/RubyConfig::numberOfDirectoryPerChip()));
-      int dir_version = map_Address_to_DirectoryNode(addr)%RubyConfig::numberOfDirectoryPerChip();
-      for(unsigned int i=0; i<size_in_bytes; ++i){
-        int offset = addr.getOffset();
-        value[i] = n->m_Directory_directory_vec[dir_version]->lookup(lineAddr).m_DataBlk.getByte(offset + i);
-      }
-      // Address not found
-      //WARN_MSG("Couldn't find address");
-      //WARN_EXPR(addr);
-      found = false;
-    }
-    return true;
-}
-
-bool Sequencer::setRubyMemoryValue(const Address& addr, char *value,
-                                   unsigned int size_in_bytes) {
-  char test_buffer[64];
-
-  // idea here is that coherent cache should find the
-  // latest data, the update it
-  bool found = false;
-  const Address lineAddr = line_address(addr);
-  PhysAddress paddr(addr);
-  DataBlock data;
-  DataBlock* dataPtr = &data;
-  Chip* n = dynamic_cast<Chip*>(m_chip_ptr);
-
-  MachineID l2_mach = map_L2ChipId_to_L2Cache(addr, m_chip_ptr->getID() );
-  int l2_ver = l2_mach.num%RubyConfig::numberOfL2CachePerChip();
-
-  assert(n->m_L1Cache_L1IcacheMemory_vec[m_version] != NULL);
-  assert(n->m_L1Cache_L1DcacheMemory_vec[m_version] != NULL);
-  if (Protocol::m_TwoLevelCache) {
-    if(Protocol::m_CMP){
-      assert(n->m_L2Cache_L2cacheMemory_vec[l2_ver] != NULL);
-    }
-    else{
-      assert(n->m_L1Cache_cacheMemory_vec[m_version] != NULL);
-    }
-  }
-
-  if (n->m_L1Cache_L1IcacheMemory_vec[m_version]->tryCacheAccess(lineAddr, CacheRequestType_IFETCH, dataPtr)){
-    n->m_L1Cache_L1IcacheMemory_vec[m_version]->setMemoryValue(addr, value, size_in_bytes);
-    found = true;
-  } else if (n->m_L1Cache_L1DcacheMemory_vec[m_version]->tryCacheAccess(lineAddr, CacheRequestType_LD, dataPtr)){
-    n->m_L1Cache_L1DcacheMemory_vec[m_version]->setMemoryValue(addr, value, size_in_bytes);
-    found = true;
-  } else if (Protocol::m_CMP && n->m_L2Cache_L2cacheMemory_vec[l2_ver]->tryCacheAccess(lineAddr, CacheRequestType_LD, dataPtr)){
-    n->m_L2Cache_L2cacheMemory_vec[l2_ver]->setMemoryValue(addr, value, size_in_bytes);
-    found = true;
-  } else {
-    // Address not found
-    n = dynamic_cast<Chip*>(g_system_ptr->getChip(map_Address_to_DirectoryNode(addr)/RubyConfig::numberOfDirectoryPerChip()));
-    int dir_version = map_Address_to_DirectoryNode(addr)%RubyConfig::numberOfDirectoryPerChip();
-    for(unsigned int i=0; i<size_in_bytes; ++i){
-      int offset = addr.getOffset();
-      n->m_Directory_directory_vec[dir_version]->lookup(lineAddr).m_DataBlk.setByte(offset + i, value[i]);
-    }
-    found = false;
-  }
-
-  if (found){
-    found = getRubyMemoryValue(addr, test_buffer, size_in_bytes);
-    assert(found);
-    if(value[0] != test_buffer[0]){
-      WARN_EXPR((int) value[0]);
-      WARN_EXPR((int) test_buffer[0]);
-      ERROR_MSG("setRubyMemoryValue failed to set value.");
-    }
-  }
-
-  return true;
-}
-*/
-/*
-
-void
-Sequencer::rubyMemAccess(const uint64 paddr, char* data, const int len, const AccessType type)
-{
-  if ( type == AccessType_Read || type == AccessType_Write ) {
-    // need to break up the packet data
-    uint64 guest_ptr = paddr;
-    Vector<DataBlock*> datablocks;
-    while (paddr + len != guest_ptr) {
-      Address addr(guest_ptr);
-      Address line_addr = line_address(addr);
-
-      int bytes_copied;
-      if (addr.getOffset() == 0) {
-        bytes_copied = (guest_ptr + RubyConfig::dataBlockBytes() > paddr + len)?
-          (paddr + len - guest_ptr):
-          RubyConfig::dataBlockBytes();
-      } else {
-        bytes_copied = RubyConfig::dataBlockBytes() - addr.getOffset();
-        if (guest_ptr + bytes_copied > paddr + len)
-          bytes_copied = paddr + len - guest_ptr;
-      }
-
-      // first we need to find all data blocks that have to be updated for a write
-      // and the highest block for a read
-     for(int i=0;i<RubyConfig::numberOfProcessors();i++) {
-        if (Protocol::m_TwoLevelCache){
-          if(m_chip_ptr->m_L1Cache_L1IcacheMemory_vec[i]->isTagPresent(line_address(addr)))
-            datablocks.insertAtBottom(&m_chip_ptr->m_L1Cache_L1IcacheMemory_vec[i]->lookup(line_addr).getDataBlk());
-          if(m_chip_ptr->m_L1Cache_L1DcacheMemory_vec[i]->isTagPresent(line_address(addr)))
-            datablocks.insertAtBottom(&m_chip_ptr->m_L1Cache_L1DcacheMemory_vec[i]->lookup(line_addr).getDataBlk());
-        } else {
-          if(m_chip_ptr->m_L1Cache_cacheMemory_vec[i]->isTagPresent(line_address(addr)))
-            datablocks.insertAtBottom(&m_chip_ptr->m_L1Cache_cacheMemory_vec[i]->lookup(line_addr).getDataBlk());
-        }
-      }
-      if (Protocol::m_TwoLevelCache){
-        int l2_bank = map_L2ChipId_to_L2Cache(addr, 0).num; // TODO: ONLY WORKS WITH CMP!!!
-        if (m_chip_ptr->m_L2Cache_L2cacheMemory_vec[l2_bank]->isTagPresent(line_address(Address(paddr)))) {
-          datablocks.insertAtBottom(&m_chip_ptr->m_L2Cache_L2cacheMemory_vec[l2_bank]->lookup(addr).getDataBlk());
-        }
-      }
-      assert(dynamic_cast<Chip*>(m_chip_ptr)->m_Directory_directory_vec.size() > map_Address_to_DirectoryNode(addr));
-      DirectoryMemory* dir = dynamic_cast<Chip*>(m_chip_ptr)->m_Directory_directory_vec[map_Address_to_DirectoryNode(addr)];
-      Directory_Entry& entry = dir->lookup(line_addr);
-      datablocks.insertAtBottom(&entry.getDataBlk());
-
-      if (pkt->isRead()){
-        datablocks[0]->copyData(pkt_data, addr.getOffset(), bytes_copied);
-      } else {// pkt->isWrite() {
-        for (int i=0;i<datablocks.size();i++)
-          datablocks[i]->setData(pkt_data, addr.getOffset(), bytes_copied);
-      }
-
-      guest_ptr += bytes_copied;
-      pkt_data += bytes_copied;
-      datablocks.clear();
-    }
-}
-
-*/