mem: Perform write merging in the DRAM write queue
authorAndreas Hansson <andreas.hansson@arm.com>
Mon, 19 Aug 2013 07:52:31 +0000 (03:52 -0400)
committerAndreas Hansson <andreas.hansson@arm.com>
Mon, 19 Aug 2013 07:52:31 +0000 (03:52 -0400)
This patch implements basic write merging in the DRAM to avoid
redundant bursts. When a new access is added to the queue it is
compared against the existing entries, and if it is either
intersecting or immediately succeeding/preceeding an existing item it
is merged.

There is currently no attempt made at avoiding iterating over the
existing items in determining whether merging is possible or not.

src/mem/simple_dram.cc
src/mem/simple_dram.hh

index faeedbb2b9cf0d539b3691874e78827d1351a309..e8c1dfbcd93bcbd348b9ab18223cf91b7a5b24a3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010-2012 ARM Limited
+ * Copyright (c) 2010-2013 ARM Limited
  * All rights reserved
  *
  * The license below extends only to copyright in the software and shall
@@ -312,7 +312,10 @@ SimpleDRAM::addToReadQueue(PacketPtr pkt, unsigned int pktCount)
         bool foundInWrQ = false;
         list<DRAMPacket*>::const_iterator i;
         for (i = writeQueue.begin(); i != writeQueue.end(); ++i) {
-            if ((*i)->addr == addr && (*i)->size >= size){
+            // check if the read is subsumed in the write entry we are
+            // looking at
+            if ((*i)->addr <= addr &&
+                (addr + size) <= ((*i)->addr + (*i)->size)) {
                 foundInWrQ = true;
                 servicedByWrQ++;
                 pktsServicedByWrQ++;
@@ -394,6 +397,10 @@ SimpleDRAM::processWriteEvent()
 
         chooseNextWrite();
         DRAMPacket* dram_pkt = writeQueue.front();
+
+        // sanity check
+        assert(dram_pkt->size <= burstSize);
+
         // What's the earliest the request can be put on the bus
         Tick schedTime = std::max(curTick(), busBusyUntil);
 
@@ -513,23 +520,79 @@ SimpleDRAM::addToWriteQueue(PacketPtr pkt, unsigned int pktCount)
         writePktSize[ceilLog2(size)]++;
         writeBursts++;
 
-        DRAMPacket* dram_pkt = decodeAddr(pkt, addr, size);
+        // see if we can merge with an existing item in the write
+        // queue and keep track of whether we have merged or not, as
+        // there is only ever one item to merge with
+        bool merged = false;
+        auto w = writeQueue.begin();
+
+        while(!merged && w != writeQueue.end()) {
+            // either of the two could be first, if they are the same
+            // it does not matter which way we go
+            if ((*w)->addr >= addr) {
+                if ((addr + size) >= ((*w)->addr + (*w)->size)) {
+                    // check if the existing one is completely
+                    // subsumed in the new one
+                    DPRINTF(DRAM, "Merging write covering existing burst\n");
+                    merged = true;
+                    // update both the address and the size
+                    (*w)->addr = addr;
+                    (*w)->size = size;
+                } else if ((addr + size) >= (*w)->addr &&
+                           ((*w)->addr + (*w)->size - addr) <= burstSize) {
+                    // the new one is just before or partially
+                    // overlapping with the existing one, and together
+                    // they fit within a burst
+                    DPRINTF(DRAM, "Merging write before existing burst\n");
+                    merged = true;
+                    // the existing queue item needs to be adjusted with
+                    // respect to both address and size
+                    (*w)->addr = addr;
+                    (*w)->size = (*w)->addr + (*w)->size - addr;
+                }
+            } else {
+                if (((*w)->addr + (*w)->size) >= (addr + size)) {
+                    // check if the new one is completely subsumed in the
+                    // existing one
+                    DPRINTF(DRAM, "Merging write into existing burst\n");
+                    merged = true;
+                    // no adjustments necessary
+                } else if (((*w)->addr + (*w)->size) >= addr &&
+                           (addr + size - (*w)->addr) <= burstSize) {
+                    // the existing one is just before or partially
+                    // overlapping with the new one, and together
+                    // they fit within a burst
+                    DPRINTF(DRAM, "Merging write after existing burst\n");
+                    merged = true;
+                    // the address is right, and only the size has
+                    // to be adjusted
+                    (*w)->size = addr + size - (*w)->addr;
+                }
+            }
+            ++w;
+        }
 
-        assert(writeQueue.size() < writeBufferSize);
-        wrQLenPdf[writeQueue.size()]++;
+        // if the item was not merged we need to create a new write
+        // and enqueue it
+        if (!merged) {
+            DRAMPacket* dram_pkt = decodeAddr(pkt, addr, size);
 
-        DPRINTF(DRAM, "Adding to write queue\n");
+            assert(writeQueue.size() < writeBufferSize);
+            wrQLenPdf[writeQueue.size()]++;
 
-        writeQueue.push_back(dram_pkt);
+            DPRINTF(DRAM, "Adding to write queue\n");
 
-        // Update stats
-        uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
-        assert(bank_id < ranksPerChannel * banksPerRank);
-        perBankWrReqs[bank_id]++;
+            writeQueue.push_back(dram_pkt);
 
-        avgWrQLen = writeQueue.size();
+            // Update stats
+            uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
+            assert(bank_id < ranksPerChannel * banksPerRank);
+            perBankWrReqs[bank_id]++;
+
+            avgWrQLen = writeQueue.size();
+        }
 
-        bytesConsumedWr += dram_pkt->size;
+        bytesConsumedWr += size;
         bytesWritten += burstSize;
 
         // Starting address of next dram pkt (aligend to burstSize boundary)
@@ -1077,6 +1140,9 @@ SimpleDRAM::moveToRespQ()
     DRAMPacket* dram_pkt = readQueue.front();
     readQueue.pop_front();
 
+    // sanity check
+    assert(dram_pkt->size <= burstSize);
+
     // Insert into response queue sorted by readyTime
     // It will be sent back to the requestor at its
     // readyTime
index 313ad067b5ff3b3b5d31a44c5293741b6776a0f0..9473f010fd83dc11d0bc94b171e1d6eabe8a10c5 100644 (file)
@@ -209,13 +209,13 @@ class SimpleDRAM : public AbstractMemory
          * reason is to keep the address offset so we can accurately check
          * incoming read packets with packets in the write queue.
          */
-        const Addr addr;
+        Addr addr;
 
         /**
          * The size of this dram packet in bytes
          * It is always equal or smaller than DRAM burst size
          */
-        const unsigned int size;
+        unsigned int size;
 
         /**
          * A pointer to the BurstHelper if this DRAMPacket is a split packet