mem: Replacing bytesPerCacheLine with DRAM burstLength in SimpleDRAM

author Amin Farmahini <aminfar@gmail.com>

Mon, 19 Aug 2013 07:52:30 +0000 (03:52 -0400)

committer Amin Farmahini <aminfar@gmail.com>

Mon, 19 Aug 2013 07:52:30 +0000 (03:52 -0400)
author Amin Farmahini <aminfar@gmail.com>
Mon, 19 Aug 2013 07:52:30 +0000 (03:52 -0400)
committer Amin Farmahini <aminfar@gmail.com>
Mon, 19 Aug 2013 07:52:30 +0000 (03:52 -0400)
diff --git a/src/mem/SimpleDRAM.py b/src/mem/SimpleDRAM.py

index ec76542d8c2fd0301fd8c60681a3817094b19fea..b066b27def5ba194256bc868849a6355fbb33f0a 100644 (file)
--- a/src/mem/SimpleDRAM.py
+++ b/src/mem/SimpleDRAM.py
@@ -10,6 +10,9 @@
  # unmodified and in its entirety in all distributions of the software,
  # modified or unmodified, in source code or in binary form.
  #
+# Copyright (c) 2013 Amin Farmahini-Farahani
+# All rights reserved.
+#
  # Redistribution and use in source and binary forms, with or without
  # modification, are permitted provided that the following conditions are
  # met: redistributions of source code must retain the above copyright
@@ -118,7 +121,12 @@ class SimpleDRAM(AbstractMemory):
      static_backend_latency = Param.Latency("10ns", "Static backend latency")
  
      # the physical organisation of the DRAM
-    lines_per_rowbuffer = Param.Unsigned("Row buffer size in cache lines")
+    device_bus_width = Param.Unsigned("data bus width in bits for each DRAM "\
+                                      "device/chip")
+    burst_length = Param.Unsigned("Burst lenght (BL) in beats")
+    device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
+                                           "device/chip")
+    devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
      ranks_per_channel = Param.Unsigned("Number of ranks per channel")
      banks_per_rank = Param.Unsigned("Number of banks per rank")
      # only used for the address mapping as the controller by
@@ -141,9 +149,9 @@ class SimpleDRAM(AbstractMemory):
      # time to complete a burst transfer, typically the burst length
      # divided by two due to the DDR bus, but by making it a parameter
      # it is easier to also evaluate SDR memories like WideIO.
-    # This parameter has to account for bus width and burst length.
-    # Adjustment also necessary if cache line size is greater than
-    # data size read/written by one full burst.
+    # This parameter has to account for burst length.
+    # Read/Write requests with data size larger than one full burst are broken
+    # down into multiple requests in the SimpleDRAM controller
      tBURST = Param.Latency("Burst duration (for DDR burst length / 2 cycles)")
  
      # time taken to complete one refresh cycle (N rows in all banks)
@@ -170,15 +178,22 @@ class SimpleDRAM(AbstractMemory):
  
      # tRC  - assumed to be 4 * tRP
  
-    # burst length for an access derived from the cache line size
-
  # A single DDR3 x64 interface (one command and address bus), with
  # default timings based on DDR3-1600 4 Gbit parts in an 8x8
  # configuration, which would amount to 4 Gbyte of memory.
  class DDR3_1600_x64(SimpleDRAM):
-    # Assuming 64 byte cache lines, and a 1kbyte page size per module
+    # 8x8 configuration, 8 devices each with an 8-bit interface
+    device_bus_width = 8
+
+    # DDR3 is a BL8 device
+    burst_length = 8
+
+    # Each device has a page (row buffer) size of 1KB
      # (this depends on the memory density)
-    lines_per_rowbuffer = 128
+    device_rowbuffer_size = '1kB'
+
+    # 8x8 configuration, so 8 devices
+    devices_per_rank = 8
  
      # Use two ranks
      ranks_per_channel = 2
@@ -191,8 +206,8 @@ class DDR3_1600_x64(SimpleDRAM):
      tCL = '13.75ns'
      tRP = '13.75ns'
  
-    # Assuming 64 byte cache lines, across an x64
-    # interface, translates to BL8, 4 clocks @ 800 MHz
+    # 8 beats across an x64 interface translates to 4 clocks @ 800 MHz.
+    # Note this is a BL8 DDR device.
      tBURST = '5ns'
  
      # DDR3, 4 Gbit has a tRFC of 240 CK and tCK = 1.25 ns
@@ -213,9 +228,18 @@ class DDR3_1600_x64(SimpleDRAM):
  # default timings based on a LPDDR2-1066 4 Gbit part in a 1x32
  # configuration.
  class LPDDR2_S4_1066_x32(SimpleDRAM):
-    # Assuming 64 byte cache lines, use a 1kbyte page size, this
-    # depends on the memory density
-    lines_per_rowbuffer = 16
+    # 1x32 configuration, 1 device with a 32-bit interface
+    device_bus_width = 32
+
+    # LPDDR2_S4 is a BL4 and BL8 device
+    burst_length = 8
+
+    # Each device has a page (row buffer) size of 1KB
+    # (this depends on the memory density)
+    device_rowbuffer_size = '1kB'
+
+    # 1x32 configuration, so 1 device
+    devices_per_rank = 1
  
      # Use a single rank
      ranks_per_channel = 1
@@ -232,10 +256,11 @@ class LPDDR2_S4_1066_x32(SimpleDRAM):
      # Pre-charge one bank 15 ns (all banks 18 ns)
      tRP = '15ns'
  
-    # Assuming 64 byte cache lines, across a x32 DDR interface
-    # translates to two BL8, 8 clocks @ 533 MHz. Note that this is a
-    # simplification
-    tBURST = '15ns'
+    # 8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
+    # Note this is a BL8 DDR device.
+    # Requests larger than 32 bytes are broken down into multiple requests
+    # in the SimpleDRAM controller
+    tBURST = '7.5ns'
  
      # LPDDR2-S4, 4 Gbit
      tRFC = '130ns'
@@ -251,9 +276,18 @@ class LPDDR2_S4_1066_x32(SimpleDRAM):
  # A single WideIO x128 interface (one command and address bus), with
  # default timings based on an estimated WIO-200 8 Gbit part.
  class WideIO_200_x128(SimpleDRAM):
-    # Assuming 64 byte cache lines, use a 4kbyte page size, this
-    # depends on the memory density
-    lines_per_rowbuffer = 64
+    # 1x128 configuration, 1 device with a 128-bit interface
+    device_bus_width = 128
+
+    # This is a BL4 device
+    burst_length = 4
+
+    # Each device has a page (row buffer) size of 4KB
+    # (this depends on the memory density)
+    device_rowbuffer_size = '4kB'
+
+    # 1x128 configuration, so 1 device
+    devices_per_rank = 1
  
      # Use one rank for a one-high die stack
      ranks_per_channel = 1
@@ -266,8 +300,8 @@ class WideIO_200_x128(SimpleDRAM):
      tCL = '18ns'
      tRP = '18ns'
  
-    # Assuming 64 byte cache lines, across an x128 SDR interface,
-    # translates to BL4, 4 clocks @ 200 MHz
+    # 4 beats across an x128 SDR interface translates to 4 clocks @ 200 MHz.
+    # Note this is a BL4 SDR device.
      tBURST = '20ns'
  
      # WIO 8 Gb
@@ -287,9 +321,18 @@ class WideIO_200_x128(SimpleDRAM):
  # default timings based on a LPDDR3-1600 4 Gbit part in a 1x32
  # configuration
  class LPDDR3_1600_x32(SimpleDRAM):
-    # 4 Gbit and 8 Gbit devices use a 1 kByte page size, so ssuming 64
-    # byte cache lines, that is 16 lines
-    lines_per_rowbuffer = 16
+    # 1x32 configuration, 1 device with a 32-bit interface
+    device_bus_width = 32
+
+    # LPDDR3 is a BL8 device
+    burst_length = 8
+
+    # Each device has a page (row buffer) size of 1KB
+    # (this depends on the memory density)
+    device_rowbuffer_size = '1kB'
+
+    # 1x32 configuration, so 1 device
+    devices_per_rank = 1
  
      # Use a single rank
      ranks_per_channel = 1
@@ -306,9 +349,11 @@ class LPDDR3_1600_x32(SimpleDRAM):
      # Pre-charge one bank 15 ns (all banks 18 ns)
      tRP = '15ns'
  
-    # Assuming 64 byte cache lines, across a x32 DDR interface
-    # translates to two bursts of BL8, 8 clocks @ 800 MHz
-    tBURST = '10ns'
+    # 8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
+    # Note this is a BL8 DDR device.
+    # Requests larger than 32 bytes are broken down into multiple requests
+    # in the SimpleDRAM controller
+    tBURST = '5ns'
  
      # LPDDR3, 4 Gb
      tRFC = '130ns'
diff --git a/src/mem/simple_dram.cc b/src/mem/simple_dram.cc

index 9091288ec82e9142051bd31ca0f964cd45006eb4..faeedbb2b9cf0d539b3691874e78827d1351a309 100644 (file)
--- a/src/mem/simple_dram.cc
+++ b/src/mem/simple_dram.cc
@@ -11,6 +11,9 @@
   * unmodified and in its entirety in all distributions of the software,
   * modified or unmodified, in source code or in binary form.
   *
+ * Copyright (c) 2013 Amin Farmahini-Farahani
+ * All rights reserved.
+ *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are
   * met: redistributions of source code must retain the above copyright
@@ -54,8 +57,11 @@ SimpleDRAM::SimpleDRAM(const SimpleDRAMParams* p) :
      rowHitFlag(false), stopReads(false), actTicks(p->activation_limit, 0),
      writeEvent(this), respondEvent(this),
      refreshEvent(this), nextReqEvent(this), drainManager(NULL),
-    bytesPerCacheLine(0),
-    linesPerRowBuffer(p->lines_per_rowbuffer),
+    deviceBusWidth(p->device_bus_width), burstLength(p->burst_length),
+    deviceRowBufferSize(p->device_rowbuffer_size),
+    devicesPerRank(p->devices_per_rank),
+    burstSize((devicesPerRank * burstLength * deviceBusWidth) / 8),
+    rowBufferSize(devicesPerRank * deviceRowBufferSize),
      ranksPerChannel(p->ranks_per_channel),
      banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0),
      readBufferSize(p->read_buffer_size),
@@ -93,22 +99,22 @@ SimpleDRAM::init()
          port.sendRangeChange();
      }
  
-    // get the burst size from the connected port as it is currently
-    // assumed to be equal to the cache line size
-    bytesPerCacheLine = _system->cacheLineSize();
-
      // we could deal with plenty options here, but for now do a quick
      // sanity check
-    if (bytesPerCacheLine != 64 && bytesPerCacheLine != 32)
-        panic("Unexpected burst size %d", bytesPerCacheLine);
+    DPRINTF(DRAM, "Burst size %d bytes\n", burstSize);
  
      // determine the rows per bank by looking at the total capacity
      uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
  
      DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
              AbstractMemory::size());
-    rowsPerBank = capacity / (bytesPerCacheLine * linesPerRowBuffer *
-                              banksPerRank * ranksPerChannel);
+
+    columnsPerRowBuffer = rowBufferSize / burstSize;
+
+    DPRINTF(DRAM, "Row buffer size %d bytes with %d columns per row buffer\n",
+            rowBufferSize, columnsPerRowBuffer);
+
+    rowsPerBank = capacity / (rowBufferSize * banksPerRank * ranksPerChannel);
  
      if (range.interleaved()) {
          if (channels != range.stripes())
@@ -116,18 +122,17 @@ SimpleDRAM::init()
                    name(), range.stripes(), channels);
  
          if (addrMapping == Enums::RaBaChCo) {
-            if (bytesPerCacheLine * linesPerRowBuffer !=
-                range.granularity()) {
+            if (rowBufferSize != range.granularity()) {
                  panic("Interleaving of %s doesn't match RaBaChCo address map\n",
                        name());
              }
          } else if (addrMapping == Enums::RaBaCoCh) {
-            if (bytesPerCacheLine != range.granularity()) {
+            if (burstSize != range.granularity()) {
                  panic("Interleaving of %s doesn't match RaBaCoCh address map\n",
                        name());
              }
          } else if (addrMapping == Enums::CoRaBaCh) {
-            if (bytesPerCacheLine != range.granularity())
+            if (burstSize != range.granularity())
                  panic("Interleaving of %s doesn't match CoRaBaCh address map\n",
                        name());
          }
@@ -162,24 +167,26 @@ SimpleDRAM::recvAtomic(PacketPtr pkt)
  }
  
  bool
-SimpleDRAM::readQueueFull() const
+SimpleDRAM::readQueueFull(unsigned int neededEntries) const
  {
-    DPRINTF(DRAM, "Read queue limit %d current size %d\n",
-            readBufferSize, readQueue.size() + respQueue.size());
+    DPRINTF(DRAM, "Read queue limit %d, current size %d, entries needed %d\n",
+            readBufferSize, readQueue.size() + respQueue.size(),
+            neededEntries);
  
-    return (readQueue.size() + respQueue.size()) == readBufferSize;
+    return
+        (readQueue.size() + respQueue.size() + neededEntries) > readBufferSize;
  }
  
  bool
-SimpleDRAM::writeQueueFull() const
+SimpleDRAM::writeQueueFull(unsigned int neededEntries) const
  {
-    DPRINTF(DRAM, "Write queue limit %d current size %d\n",
-            writeBufferSize, writeQueue.size());
-    return writeQueue.size() == writeBufferSize;
+    DPRINTF(DRAM, "Write queue limit %d, current size %d, entries needed %d\n",
+            writeBufferSize, writeQueue.size(), neededEntries);
+    return (writeQueue.size() + neededEntries) > writeBufferSize;
  }
  
  SimpleDRAM::DRAMPacket*
-SimpleDRAM::decodeAddr(PacketPtr pkt)
+SimpleDRAM::decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned size)
  {
      // decode the address based on the address mapping scheme, with
      // Ra, Co, Ba and Ch denoting rank, column, bank and channel,
@@ -188,17 +195,15 @@ SimpleDRAM::decodeAddr(PacketPtr pkt)
      uint16_t bank;
      uint16_t row;
  
-    Addr addr = pkt->getAddr();
-
      // truncate the address to the access granularity
-    addr = addr / bytesPerCacheLine;
+    Addr addr = dramPktAddr / burstSize;
  
      // we have removed the lowest order address bits that denote the
-    // position within the cache line
+    // position within the column
      if (addrMapping == Enums::RaBaChCo) {
          // the lowest order bits denote the column to ensure that
          // sequential cache lines occupy the same row
-        addr = addr / linesPerRowBuffer;
+        addr = addr / columnsPerRowBuffer;
  
          // take out the channel part of the address
          addr = addr / channels;
@@ -221,7 +226,7 @@ SimpleDRAM::decodeAddr(PacketPtr pkt)
          addr = addr / channels;
  
          // next, the column
-        addr = addr / linesPerRowBuffer;
+        addr = addr / columnsPerRowBuffer;
  
          // after the column bits, we get the bank bits to interleave
          // over the banks
@@ -256,7 +261,7 @@ SimpleDRAM::decodeAddr(PacketPtr pkt)
  
          // next the column bits which we do not need to keep track of
          // and simply skip past
-        addr = addr / linesPerRowBuffer;
+        addr = addr / columnsPerRowBuffer;
  
          // lastly, get the row bits
          row = addr % rowsPerBank;
@@ -269,54 +274,98 @@ SimpleDRAM::decodeAddr(PacketPtr pkt)
      assert(row < rowsPerBank);
  
      DPRINTF(DRAM, "Address: %lld Rank %d Bank %d Row %d\n",
-            pkt->getAddr(), rank, bank, row);
+            dramPktAddr, rank, bank, row);
  
      // create the corresponding DRAM packet with the entry time and
      // ready time set to the current tick, the latter will be updated
      // later
-    return new DRAMPacket(pkt, rank, bank, row, pkt->getAddr(),
+    return new DRAMPacket(pkt, rank, bank, row, dramPktAddr, size,
                            banks[rank][bank]);
  }
  
  void
-SimpleDRAM::addToReadQueue(PacketPtr pkt)
+SimpleDRAM::addToReadQueue(PacketPtr pkt, unsigned int pktCount)
  {
      // only add to the read queue here. whenever the request is
      // eventually done, set the readyTime, and call schedule()
      assert(!pkt->isWrite());
  
-    // First check write buffer to see if the data is already at
-    // the controller
-    list<DRAMPacket*>::const_iterator i;
-    Addr addr = pkt->getAddr();
+    assert(pktCount != 0);
  
-    // @todo: add size check
-    for (i = writeQueue.begin(); i != writeQueue.end(); ++i) {
-        if ((*i)->addr == addr){
-            servicedByWrQ++;
-            DPRINTF(DRAM, "Read to %lld serviced by write queue\n", addr);
-            bytesRead += bytesPerCacheLine;
-            bytesConsumedRd += pkt->getSize();
-            accessAndRespond(pkt, frontendLatency);
-            return;
+    // if the request size is larger than burst size, the pkt is split into
+    // multiple DRAM packets
+    // Note if the pkt starting address is not aligened to burst size, the
+    // address of first DRAM packet is kept unaliged. Subsequent DRAM packets
+    // are aligned to burst size boundaries. This is to ensure we accurately
+    // check read packets against packets in write queue.
+    Addr addr = pkt->getAddr();
+    unsigned pktsServicedByWrQ = 0;
+    BurstHelper* burst_helper = NULL;
+    for (int cnt = 0; cnt < pktCount; ++cnt) {
+        unsigned size = std::min((addr | (burstSize - 1)) + 1,
+                        pkt->getAddr() + pkt->getSize()) - addr;
+        readPktSize[ceilLog2(size)]++;
+        readBursts++;
+
+        // First check write buffer to see if the data is already at
+        // the controller
+        bool foundInWrQ = false;
+        list<DRAMPacket*>::const_iterator i;
+        for (i = writeQueue.begin(); i != writeQueue.end(); ++i) {
+            if ((*i)->addr == addr && (*i)->size >= size){
+                foundInWrQ = true;
+                servicedByWrQ++;
+                pktsServicedByWrQ++;
+                DPRINTF(DRAM, "Read to addr %lld with size %d serviced by "
+                        "write queue\n", addr, size);
+                bytesRead += burstSize;
+                bytesConsumedRd += size;
+                break;
+            }
          }
-    }
  
-    DRAMPacket* dram_pkt = decodeAddr(pkt);
+        // If not found in the write q, make a DRAM packet and
+        // push it onto the read queue
+        if (!foundInWrQ) {
  
-    assert(readQueue.size() + respQueue.size() < readBufferSize);
-    rdQLenPdf[readQueue.size() + respQueue.size()]++;
+            // Make the burst helper for split packets
+            if (pktCount > 1 && burst_helper == NULL) {
+                DPRINTF(DRAM, "Read to addr %lld translates to %d "
+                        "dram requests\n", pkt->getAddr(), pktCount);
+                burst_helper = new BurstHelper(pktCount);
+            }
  
-    DPRINTF(DRAM, "Adding to read queue\n");
+            DRAMPacket* dram_pkt = decodeAddr(pkt, addr, size);
+            dram_pkt->burstHelper = burst_helper;
  
-    readQueue.push_back(dram_pkt);
+            assert(!readQueueFull(1));
+            rdQLenPdf[readQueue.size() + respQueue.size()]++;
  
-    // Update stats
-    uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
-    assert(bank_id < ranksPerChannel * banksPerRank);
-    perBankRdReqs[bank_id]++;
+            DPRINTF(DRAM, "Adding to read queue\n");
  
-    avgRdQLen = readQueue.size() + respQueue.size();
+            readQueue.push_back(dram_pkt);
+
+            // Update stats
+            uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
+            assert(bank_id < ranksPerChannel * banksPerRank);
+            perBankRdReqs[bank_id]++;
+
+            avgRdQLen = readQueue.size() + respQueue.size();
+        }
+
+        // Starting address of next dram pkt (aligend to burstSize boundary)
+        addr = (addr | (burstSize - 1)) + 1;
+    }
+
+    // If all packets are serviced by write queue, we send the repsonse back
+    if (pktsServicedByWrQ == pktCount) {
+        accessAndRespond(pkt, frontendLatency);
+        return;
+    }
+
+    // Update how many split packets are serviced by write queue
+    if (burst_helper != NULL)
+        burst_helper->burstsServiced = pktsServicedByWrQ;
  
      // If we are not already scheduled to get the read request out of
      // the queue, do so now
@@ -364,7 +413,7 @@ SimpleDRAM::processWriteEvent()
              bank.openRow = dram_pkt->row;
              bank.freeAt = schedTime + tBURST + std::max(accessLat, tCL);
              busBusyUntil = bank.freeAt - tCL;
-            bank.bytesAccessed += bytesPerCacheLine;
+            bank.bytesAccessed += burstSize;
  
              if (!rowHitFlag) {
                  bank.tRASDoneAt = bank.freeAt + tRP;
@@ -385,7 +434,7 @@ SimpleDRAM::processWriteEvent()
                      "banks_id %d is %lld\n",
                      dram_pkt->rank * banksPerRank + dram_pkt->bank,
                      bank.freeAt);
-            bytesPerActivate.sample(bytesPerCacheLine);
+            bytesPerActivate.sample(burstSize);
          } else
              panic("Unknown page management policy chosen\n");
  
@@ -449,34 +498,49 @@ SimpleDRAM::triggerWrites()
  }
  
  void
-SimpleDRAM::addToWriteQueue(PacketPtr pkt)
+SimpleDRAM::addToWriteQueue(PacketPtr pkt, unsigned int pktCount)
  {
      // only add to the write queue here. whenever the request is
      // eventually done, set the readyTime, and call schedule()
      assert(pkt->isWrite());
  
-    DRAMPacket* dram_pkt = decodeAddr(pkt);
+    // if the request size is larger than burst size, the pkt is split into
+    // multiple DRAM packets
+    Addr addr = pkt->getAddr();
+    for (int cnt = 0; cnt < pktCount; ++cnt) {
+        unsigned size = std::min((addr | (burstSize - 1)) + 1,
+                        pkt->getAddr() + pkt->getSize()) - addr;
+        writePktSize[ceilLog2(size)]++;
+        writeBursts++;
  
-    assert(writeQueue.size() < writeBufferSize);
-    wrQLenPdf[writeQueue.size()]++;
+        DRAMPacket* dram_pkt = decodeAddr(pkt, addr, size);
  
-    DPRINTF(DRAM, "Adding to write queue\n");
+        assert(writeQueue.size() < writeBufferSize);
+        wrQLenPdf[writeQueue.size()]++;
  
-    writeQueue.push_back(dram_pkt);
+        DPRINTF(DRAM, "Adding to write queue\n");
  
-    // Update stats
-    uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
-    assert(bank_id < ranksPerChannel * banksPerRank);
-    perBankWrReqs[bank_id]++;
+        writeQueue.push_back(dram_pkt);
  
-    avgWrQLen = writeQueue.size();
+        // Update stats
+        uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
+        assert(bank_id < ranksPerChannel * banksPerRank);
+        perBankWrReqs[bank_id]++;
+
+        avgWrQLen = writeQueue.size();
+
+        bytesConsumedWr += dram_pkt->size;
+        bytesWritten += burstSize;
+
+        // Starting address of next dram pkt (aligend to burstSize boundary)
+        addr = (addr | (burstSize - 1)) + 1;
+    }
  
      // we do not wait for the writes to be send to the actual memory,
      // but instead take responsibility for the consistency here and
      // snoop the write queue for any upcoming reads
-
-    bytesConsumedWr += pkt->getSize();
-    bytesWritten += bytesPerCacheLine;
+    // @todo, if a pkt size is larger than burst size, we might need a
+    // different front end latency
      accessAndRespond(pkt, frontendLatency);
  
      // If your write buffer is starting to fill up, drain it!
@@ -491,15 +555,18 @@ SimpleDRAM::printParams() const
      // Sanity check print of important parameters
      DPRINTF(DRAM,
              "Memory controller %s physical organization\n"      \
-            "Bytes per cacheline  %d\n"                         \
-            "Lines per row buffer %d\n"                         \
-            "Rows  per bank       %d\n"                         \
-            "Banks per rank       %d\n"                         \
-            "Ranks per channel    %d\n"                         \
-            "Total mem capacity   %u\n",
-            name(), bytesPerCacheLine, linesPerRowBuffer, rowsPerBank,
-            banksPerRank, ranksPerChannel, bytesPerCacheLine *
-            linesPerRowBuffer * rowsPerBank * banksPerRank * ranksPerChannel);
+            "Number of devices per rank   %d\n"                 \
+            "Device bus width (in bits)   %d\n"                 \
+            "DRAM data bus burst          %d\n"                 \
+            "Row buffer size              %d\n"                 \
+            "Columns per row buffer       %d\n"                 \
+            "Rows    per bank             %d\n"                 \
+            "Banks   per rank             %d\n"                 \
+            "Ranks   per channel          %d\n"                 \
+            "Total mem capacity           %u\n",
+            name(), devicesPerRank, deviceBusWidth, burstSize, rowBufferSize,
+            columnsPerRowBuffer, rowsPerBank, banksPerRank, ranksPerChannel,
+            rowBufferSize * rowsPerBank * banksPerRank * ranksPerChannel);
  
      string scheduler =  memSchedPolicy == Enums::fcfs ? "FCFS" : "FR-FCFS";
      string address_mapping = addrMapping == Enums::RaBaChCo ? "RaBaChCo" :
@@ -560,7 +627,7 @@ SimpleDRAM::recvTimingReq(PacketPtr pkt)
  
      // This is where we enter from the outside world
      DPRINTF(DRAM, "recvTimingReq: request %s addr %lld size %d\n",
-            pkt->cmdString(),pkt->getAddr(), pkt->getSize());
+            pkt->cmdString(), pkt->getAddr(), pkt->getSize());
  
      // simply drop inhibited packets for now
      if (pkt->memInhibitAsserted()) {
@@ -569,9 +636,6 @@ SimpleDRAM::recvTimingReq(PacketPtr pkt)
          return true;
      }
  
-   if (pkt->getSize() == bytesPerCacheLine)
-       cpuReqs++;
-
     // Every million accesses, print the state of the queues
     if (numReqs % 1000000 == 0)
         printQs();
@@ -582,37 +646,39 @@ SimpleDRAM::recvTimingReq(PacketPtr pkt)
      }
      prevArrival = curTick();
  
+
+    // Find out how many dram packets a pkt translates to
+    // If the burst size is equal or larger than the pkt size, then a pkt
+    // translates to only one dram packet. Otherwise, a pkt translates to
+    // multiple dram packets
      unsigned size = pkt->getSize();
-    if (size > bytesPerCacheLine)
-        panic("Request size %d is greater than burst size %d",
-              size, bytesPerCacheLine);
+    unsigned offset = pkt->getAddr() & (burstSize - 1);
+    unsigned int dram_pkt_count = divCeil(offset + size, burstSize);
  
      // check local buffers and do not accept if full
      if (pkt->isRead()) {
          assert(size != 0);
-        if (readQueueFull()) {
+        if (readQueueFull(dram_pkt_count)) {
              DPRINTF(DRAM, "Read queue full, not accepting\n");
              // remember that we have to retry this port
              retryRdReq = true;
              numRdRetry++;
              return false;
          } else {
-            readPktSize[ceilLog2(size)]++;
-            addToReadQueue(pkt);
+            addToReadQueue(pkt, dram_pkt_count);
              readReqs++;
              numReqs++;
          }
      } else if (pkt->isWrite()) {
          assert(size != 0);
-        if (writeQueueFull()) {
+        if (writeQueueFull(dram_pkt_count)) {
              DPRINTF(DRAM, "Write queue full, not accepting\n");
              // remember that we have to retry this port
              retryWrReq = true;
              numWrRetry++;
              return false;
          } else {
-            writePktSize[ceilLog2(size)]++;
-            addToWriteQueue(pkt);
+            addToWriteQueue(pkt, dram_pkt_count);
              writeReqs++;
              numReqs++;
          }
@@ -633,38 +699,54 @@ SimpleDRAM::processRespondEvent()
      DPRINTF(DRAM,
              "processRespondEvent(): Some req has reached its readyTime\n");
  
-     PacketPtr pkt = respQueue.front()->pkt;
-
-     // Actually responds to the requestor
-     bytesConsumedRd += pkt->getSize();
-     bytesRead += bytesPerCacheLine;
-     accessAndRespond(pkt, frontendLatency + backendLatency);
-
-     delete respQueue.front();
-     respQueue.pop_front();
-
-     // Update stats
-     avgRdQLen = readQueue.size() + respQueue.size();
-
-     if (!respQueue.empty()) {
-         assert(respQueue.front()->readyTime >= curTick());
-         assert(!respondEvent.scheduled());
-         schedule(respondEvent, respQueue.front()->readyTime);
-     } else {
-         // if there is nothing left in any queue, signal a drain
-         if (writeQueue.empty() && readQueue.empty() &&
-             drainManager) {
-             drainManager->signalDrainDone();
-             drainManager = NULL;
-         }
-     }
-
-     // We have made a location in the queue available at this point,
-     // so if there is a read that was forced to wait, retry now
-     if (retryRdReq) {
-         retryRdReq = false;
-         port.sendRetry();
-     }
+    DRAMPacket* dram_pkt = respQueue.front();
+
+    // Actually responds to the requestor
+    bytesConsumedRd += dram_pkt->size;
+    bytesRead += burstSize;
+    if (dram_pkt->burstHelper) {
+        // it is a split packet
+        dram_pkt->burstHelper->burstsServiced++;
+        if (dram_pkt->burstHelper->burstsServiced ==
+                                  dram_pkt->burstHelper->burstCount) {
+            // we have now serviced all children packets of a system packet
+            // so we can now respond to the requester
+            // @todo we probably want to have a different front end and back
+            // end latency for split packets
+            accessAndRespond(dram_pkt->pkt, frontendLatency + backendLatency);
+            delete dram_pkt->burstHelper;
+            dram_pkt->burstHelper = NULL;
+        }
+    } else {
+        // it is not a split packet
+        accessAndRespond(dram_pkt->pkt, frontendLatency + backendLatency);
+    }
+
+    delete respQueue.front();
+    respQueue.pop_front();
+
+    // Update stats
+    avgRdQLen = readQueue.size() + respQueue.size();
+
+    if (!respQueue.empty()) {
+        assert(respQueue.front()->readyTime >= curTick());
+        assert(!respondEvent.scheduled());
+        schedule(respondEvent, respQueue.front()->readyTime);
+    } else {
+        // if there is nothing left in any queue, signal a drain
+        if (writeQueue.empty() && readQueue.empty() &&
+            drainManager) {
+            drainManager->signalDrainDone();
+            drainManager = NULL;
+        }
+    }
+
+    // We have made a location in the queue available at this point,
+    // so if there is a read that was forced to wait, retry now
+    if (retryRdReq) {
+        retryRdReq = false;
+        port.sendRetry();
+    }
  }
  
  void
@@ -911,7 +993,7 @@ SimpleDRAM::doDRAMAccess(DRAMPacket* dram_pkt)
      if (pageMgmt == Enums::open) {
          bank.openRow = dram_pkt->row;
          bank.freeAt = curTick() + addDelay + accessLat;
-        bank.bytesAccessed += bytesPerCacheLine;
+        bank.bytesAccessed += burstSize;
  
          // If you activated a new row do to this access, the next access
          // will have to respect tRAS for this bank. Assume tRAS ~= 3 * tRP.
@@ -931,7 +1013,7 @@ SimpleDRAM::doDRAMAccess(DRAMPacket* dram_pkt)
          bank.freeAt = curTick() + addDelay + accessLat + tRP + tRP;
          recordActivate(bank.freeAt - tRP - tRP - tCL - tRCD); //essentially (freeAt - tRC)
          DPRINTF(DRAM,"doDRAMAccess::bank.freeAt is %lld\n",bank.freeAt);
-        bytesPerActivate.sample(bytesPerCacheLine);
+        bytesPerActivate.sample(burstSize);
      } else
          panic("No page management policy chosen\n");
  
@@ -1080,19 +1162,27 @@ SimpleDRAM::regStats()
  
      readReqs
          .name(name() + ".readReqs")
-        .desc("Total number of read requests seen");
+        .desc("Total number of read requests accepted by DRAM controller");
  
      writeReqs
          .name(name() + ".writeReqs")
-        .desc("Total number of write requests seen");
+        .desc("Total number of write requests accepted by DRAM controller");
+
+    readBursts
+        .name(name() + ".readBursts")
+        .desc("Total number of DRAM read bursts. "
+              "Each DRAM read request translates to either one or multiple "
+              "DRAM read bursts");
+
+    writeBursts
+        .name(name() + ".writeBursts")
+        .desc("Total number of DRAM write bursts. "
+              "Each DRAM write request translates to either one or multiple "
+              "DRAM write bursts");
  
      servicedByWrQ
          .name(name() + ".servicedByWrQ")
-        .desc("Number of read reqs serviced by write Q");
-
-    cpuReqs
-        .name(name() + ".cpureqs")
-        .desc("Reqs generatd by CPU via cache - shady");
+        .desc("Number of DRAM read bursts serviced by write Q");
  
      neitherReadNorWrite
          .name(name() + ".neitherReadNorWrite")
@@ -1139,28 +1229,28 @@ SimpleDRAM::regStats()
          .desc("Average queueing delay per request")
          .precision(2);
  
-    avgQLat = totQLat / (readReqs - servicedByWrQ);
+    avgQLat = totQLat / (readBursts - servicedByWrQ);
  
      avgBankLat
          .name(name() + ".avgBankLat")
          .desc("Average bank access latency per request")
          .precision(2);
  
-    avgBankLat = totBankLat / (readReqs - servicedByWrQ);
+    avgBankLat = totBankLat / (readBursts - servicedByWrQ);
  
      avgBusLat
          .name(name() + ".avgBusLat")
          .desc("Average bus latency per request")
          .precision(2);
  
-    avgBusLat = totBusLat / (readReqs - servicedByWrQ);
+    avgBusLat = totBusLat / (readBursts - servicedByWrQ);
  
      avgMemAccLat
          .name(name() + ".avgMemAccLat")
          .desc("Average memory access latency")
          .precision(2);
  
-    avgMemAccLat = totMemAccLat / (readReqs - servicedByWrQ);
+    avgMemAccLat = totMemAccLat / (readBursts - servicedByWrQ);
  
      numRdRetry
          .name(name() + ".numRdRetry")
@@ -1183,22 +1273,22 @@ SimpleDRAM::regStats()
          .desc("Row buffer hit rate for reads")
          .precision(2);
  
-    readRowHitRate = (readRowHits / (readReqs - servicedByWrQ)) * 100;
+    readRowHitRate = (readRowHits / (readBursts - servicedByWrQ)) * 100;
  
      writeRowHitRate
          .name(name() + ".writeRowHitRate")
          .desc("Row buffer hit rate for writes")
          .precision(2);
  
-    writeRowHitRate = (writeRowHits / writeReqs) * 100;
+    writeRowHitRate = (writeRowHits / writeBursts) * 100;
  
      readPktSize
-        .init(ceilLog2(bytesPerCacheLine) + 1)
+        .init(ceilLog2(burstSize) + 1)
          .name(name() + ".readPktSize")
          .desc("Categorize read packet sizes");
  
       writePktSize
-        .init(ceilLog2(bytesPerCacheLine) + 1)
+        .init(ceilLog2(burstSize) + 1)
          .name(name() + ".writePktSize")
          .desc("Categorize write packet sizes");
  
@@ -1213,7 +1303,7 @@ SimpleDRAM::regStats()
          .desc("What write queue length does an incoming req see");
  
       bytesPerActivate
-         .init(bytesPerCacheLine * linesPerRowBuffer)
+         .init(rowBufferSize)
           .name(name() + ".bytesPerActivate")
           .desc("Bytes accessed per row activation")
           .flags(nozero);
@@ -1267,7 +1357,7 @@ SimpleDRAM::regStats()
          .desc("Theoretical peak bandwidth in MB/s")
          .precision(2);
  
-    peakBW = (SimClock::Frequency / tBURST) * bytesPerCacheLine / 1000000;
+    peakBW = (SimClock::Frequency / tBURST) * burstSize / 1000000;
  
      busUtil
          .name(name() + ".busUtil")
diff --git a/src/mem/simple_dram.hh b/src/mem/simple_dram.hh

index e4d20163a65ce9347d3e19ad9c60f8a46d038dcb..313ad067b5ff3b3b5d31a44c5293741b6776a0f0 100644 (file)
--- a/src/mem/simple_dram.hh
+++ b/src/mem/simple_dram.hh
@@ -11,6 +11,9 @@
   * unmodified and in its entirety in all distributions of the software,
   * modified or unmodified, in source code or in binary form.
   *
+ * Copyright (c) 2013 Amin Farmahini-Farahani
+ * All rights reserved.
+ *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are
   * met: redistributions of source code must retain the above copyright
@@ -157,6 +160,27 @@ class SimpleDRAM : public AbstractMemory
          { }
      };
  
+    /**
+     * A burst helper helps organize and manage a packet that is larger than
+     * the DRAM burst size. A system packet that is larger than the burst size
+     * is split into multiple DRAM packets and all those DRAM packets point to
+     * a single burst helper such that we know when the whole packet is served.
+     */
+    class BurstHelper {
+
+      public:
+
+        /** Number of DRAM bursts requred for a system packet **/
+        const unsigned int burstCount;
+
+        /** Number of DRAM bursts serviced so far for a system packet **/
+        unsigned int burstsServiced;
+
+        BurstHelper(unsigned int _burstCount)
+            : burstCount(_burstCount), burstsServiced(0)
+            { }
+    };
+
      /**
       * A DRAM packet stores packets along with the timestamp of when
       * the packet entered the queue, and also the decoded address.
@@ -178,14 +202,34 @@ class SimpleDRAM : public AbstractMemory
          const uint8_t rank;
          const uint16_t bank;
          const uint16_t row;
+
+        /**
+         * The starting address of the DRAM packet.
+         * This address could be unaligned to burst size boundaries. The
+         * reason is to keep the address offset so we can accurately check
+         * incoming read packets with packets in the write queue.
+         */
          const Addr addr;
+
+        /**
+         * The size of this dram packet in bytes
+         * It is always equal or smaller than DRAM burst size
+         */
+        const unsigned int size;
+
+        /**
+         * A pointer to the BurstHelper if this DRAMPacket is a split packet
+         * If not a split packet (common case), this is set to NULL
+         */
+        BurstHelper* burstHelper;
          Bank& bank_ref;
  
-        DRAMPacket(PacketPtr _pkt, uint8_t _rank,
-                   uint16_t _bank, uint16_t _row, Addr _addr, Bank& _bank_ref)
+        DRAMPacket(PacketPtr _pkt, uint8_t _rank, uint16_t _bank,
+                   uint16_t _row, Addr _addr, unsigned int _size,
+                   Bank& _bank_ref)
              : entryTime(curTick()), readyTime(curTick()),
                pkt(_pkt), rank(_rank), bank(_bank), row(_row), addr(_addr),
-              bank_ref(_bank_ref)
+              size(_size), burstHelper(NULL), bank_ref(_bank_ref)
          { }
  
      };
@@ -212,28 +256,34 @@ class SimpleDRAM : public AbstractMemory
      /**
       * Check if the read queue has room for more entries
       *
+     * @param pktCount The number of entries needed in the read queue
       * @return true if read queue is full, false otherwise
       */
-    bool readQueueFull() const;
+    bool readQueueFull(unsigned int pktCount) const;
  
      /**
       * Check if the write queue has room for more entries
       *
+     * @param pktCount The number of entries needed in the write queue
       * @return true if write queue is full, false otherwise
       */
-    bool writeQueueFull() const;
+    bool writeQueueFull(unsigned int pktCount) const;
  
      /**
       * When a new read comes in, first check if the write q has a
       * pending request to the same address.\ If not, decode the
-     * address to populate rank/bank/row, create a "dram_pkt", and
-     * push it to the back of the read queue.\ If this is the only
+     * address to populate rank/bank/row, create one or mutliple
+     * "dram_pkt", and push them to the back of the read queue.\
+     * If this is the only
       * read request in the system, schedule an event to start
       * servicing it.
       *
       * @param pkt The request packet from the outside world
+     * @param pktCount The number of DRAM bursts the pkt
+     * translate to. If pkt size is larger then one full burst,
+     * then pktCount is greater than one.
       */
-    void addToReadQueue(PacketPtr pkt);
+    void addToReadQueue(PacketPtr pkt, unsigned int pktCount);
  
      /**
       * Decode the incoming pkt, create a dram_pkt and push to the
@@ -242,8 +292,11 @@ class SimpleDRAM : public AbstractMemory
       * to get full, stop reads, and start draining writes.
       *
       * @param pkt The request packet from the outside world
+     * @param pktCount The number of DRAM bursts the pkt
+     * translate to. If pkt size is larger then one full burst,
+     * then pktCount is greater than one.
       */
-    void addToWriteQueue(PacketPtr pkt);
+    void addToWriteQueue(PacketPtr pkt, unsigned int pktCount);
  
      /**
       * Actually do the DRAM access - figure out the latency it
@@ -276,12 +329,16 @@ class SimpleDRAM : public AbstractMemory
  
      /**
       * Address decoder to figure out physical mapping onto ranks,
-     * banks, and rows.
+     * banks, and rows. This function is called multiple times on the same
+     * system packet if the pakcet is larger than burst of the memory. The
+     * dramPktAddr is used for the offset within the packet.
       *
       * @param pkt The packet from the outside world
+     * @param dramPktAddr The starting address of the DRAM packet
+     * @param size The size of the DRAM packet in bytes
       * @return A DRAMPacket pointer with the decoded information
       */
-    DRAMPacket* decodeAddr(PacketPtr pkt);
+    DRAMPacket* decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned int size);
  
      /**
       * The memory schduler/arbiter - picks which read request needs to
@@ -376,18 +433,21 @@ class SimpleDRAM : public AbstractMemory
  
      /**
       * The following are basic design parameters of the memory
-     * controller, and are initialized based on parameter values. The
-     * bytesPerCacheLine is based on the neighbouring ports cache line
-     * size and thus determined outside the constructor. Similarly,
-     * the rowsPerBank is determined based on the capacity, number of
-     * ranks and banks, the cache line size, and the row buffer size.
-     */
-    uint32_t bytesPerCacheLine;
-    const uint32_t linesPerRowBuffer;
+     * controller, and are initialized based on parameter values.
+     * The rowsPerBank is determined based on the capacity, number of
+     * ranks and banks, the burst size, and the row buffer size.
+     */
+    const uint32_t deviceBusWidth;
+    const uint32_t burstLength;
+    const uint32_t deviceRowBufferSize;
+    const uint32_t devicesPerRank;
+    const uint32_t burstSize;
+    const uint32_t rowBufferSize;
      const uint32_t ranksPerChannel;
      const uint32_t banksPerRank;
      const uint32_t channels;
      uint32_t rowsPerBank;
+    uint32_t columnsPerRowBuffer;
      const uint32_t readBufferSize;
      const uint32_t writeBufferSize;
      const double writeThresholdPerc;
@@ -441,7 +501,8 @@ class SimpleDRAM : public AbstractMemory
      // All statistics that the model needs to capture
      Stats::Scalar readReqs;
      Stats::Scalar writeReqs;
-    Stats::Scalar cpuReqs;
+    Stats::Scalar readBursts;
+    Stats::Scalar writeBursts;
      Stats::Scalar bytesRead;
      Stats::Scalar bytesWritten;
      Stats::Scalar bytesConsumedRd;
author	Amin Farmahini <aminfar@gmail.com>
	Mon, 19 Aug 2013 07:52:30 +0000 (03:52 -0400)
committer	Amin Farmahini <aminfar@gmail.com>
	Mon, 19 Aug 2013 07:52:30 +0000 (03:52 -0400)
src/mem/SimpleDRAM.py		patch \| blob \| history
src/mem/simple_dram.cc		patch \| blob \| history
src/mem/simple_dram.hh		patch \| blob \| history