# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
+# Copyright (c) 2013 Amin Farmahini-Farahani
+# All rights reserved.
+#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
static_backend_latency = Param.Latency("10ns", "Static backend latency")
# the physical organisation of the DRAM
- lines_per_rowbuffer = Param.Unsigned("Row buffer size in cache lines")
+ device_bus_width = Param.Unsigned("data bus width in bits for each DRAM "\
+ "device/chip")
+ burst_length = Param.Unsigned("Burst lenght (BL) in beats")
+ device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
+ "device/chip")
+ devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
ranks_per_channel = Param.Unsigned("Number of ranks per channel")
banks_per_rank = Param.Unsigned("Number of banks per rank")
# only used for the address mapping as the controller by
# time to complete a burst transfer, typically the burst length
# divided by two due to the DDR bus, but by making it a parameter
# it is easier to also evaluate SDR memories like WideIO.
- # This parameter has to account for bus width and burst length.
- # Adjustment also necessary if cache line size is greater than
- # data size read/written by one full burst.
+ # This parameter has to account for burst length.
+ # Read/Write requests with data size larger than one full burst are broken
+ # down into multiple requests in the SimpleDRAM controller
tBURST = Param.Latency("Burst duration (for DDR burst length / 2 cycles)")
# time taken to complete one refresh cycle (N rows in all banks)
# tRC - assumed to be 4 * tRP
- # burst length for an access derived from the cache line size
-
# A single DDR3 x64 interface (one command and address bus), with
# default timings based on DDR3-1600 4 Gbit parts in an 8x8
# configuration, which would amount to 4 Gbyte of memory.
class DDR3_1600_x64(SimpleDRAM):
- # Assuming 64 byte cache lines, and a 1kbyte page size per module
+ # 8x8 configuration, 8 devices each with an 8-bit interface
+ device_bus_width = 8
+
+ # DDR3 is a BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 1KB
# (this depends on the memory density)
- lines_per_rowbuffer = 128
+ device_rowbuffer_size = '1kB'
+
+ # 8x8 configuration, so 8 devices
+ devices_per_rank = 8
# Use two ranks
ranks_per_channel = 2
tCL = '13.75ns'
tRP = '13.75ns'
- # Assuming 64 byte cache lines, across an x64
- # interface, translates to BL8, 4 clocks @ 800 MHz
+ # 8 beats across an x64 interface translates to 4 clocks @ 800 MHz.
+ # Note this is a BL8 DDR device.
tBURST = '5ns'
# DDR3, 4 Gbit has a tRFC of 240 CK and tCK = 1.25 ns
# default timings based on a LPDDR2-1066 4 Gbit part in a 1x32
# configuration.
class LPDDR2_S4_1066_x32(SimpleDRAM):
- # Assuming 64 byte cache lines, use a 1kbyte page size, this
- # depends on the memory density
- lines_per_rowbuffer = 16
+ # 1x32 configuration, 1 device with a 32-bit interface
+ device_bus_width = 32
+
+ # LPDDR2_S4 is a BL4 and BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 1KB
+ # (this depends on the memory density)
+ device_rowbuffer_size = '1kB'
+
+ # 1x32 configuration, so 1 device
+ devices_per_rank = 1
# Use a single rank
ranks_per_channel = 1
# Pre-charge one bank 15 ns (all banks 18 ns)
tRP = '15ns'
- # Assuming 64 byte cache lines, across a x32 DDR interface
- # translates to two BL8, 8 clocks @ 533 MHz. Note that this is a
- # simplification
- tBURST = '15ns'
+ # 8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
+ # Note this is a BL8 DDR device.
+ # Requests larger than 32 bytes are broken down into multiple requests
+ # in the SimpleDRAM controller
+ tBURST = '7.5ns'
# LPDDR2-S4, 4 Gbit
tRFC = '130ns'
# A single WideIO x128 interface (one command and address bus), with
# default timings based on an estimated WIO-200 8 Gbit part.
class WideIO_200_x128(SimpleDRAM):
- # Assuming 64 byte cache lines, use a 4kbyte page size, this
- # depends on the memory density
- lines_per_rowbuffer = 64
+ # 1x128 configuration, 1 device with a 128-bit interface
+ device_bus_width = 128
+
+ # This is a BL4 device
+ burst_length = 4
+
+ # Each device has a page (row buffer) size of 4KB
+ # (this depends on the memory density)
+ device_rowbuffer_size = '4kB'
+
+ # 1x128 configuration, so 1 device
+ devices_per_rank = 1
# Use one rank for a one-high die stack
ranks_per_channel = 1
tCL = '18ns'
tRP = '18ns'
- # Assuming 64 byte cache lines, across an x128 SDR interface,
- # translates to BL4, 4 clocks @ 200 MHz
+ # 4 beats across an x128 SDR interface translates to 4 clocks @ 200 MHz.
+ # Note this is a BL4 SDR device.
tBURST = '20ns'
# WIO 8 Gb
# default timings based on a LPDDR3-1600 4 Gbit part in a 1x32
# configuration
class LPDDR3_1600_x32(SimpleDRAM):
- # 4 Gbit and 8 Gbit devices use a 1 kByte page size, so ssuming 64
- # byte cache lines, that is 16 lines
- lines_per_rowbuffer = 16
+ # 1x32 configuration, 1 device with a 32-bit interface
+ device_bus_width = 32
+
+ # LPDDR3 is a BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 1KB
+ # (this depends on the memory density)
+ device_rowbuffer_size = '1kB'
+
+ # 1x32 configuration, so 1 device
+ devices_per_rank = 1
# Use a single rank
ranks_per_channel = 1
# Pre-charge one bank 15 ns (all banks 18 ns)
tRP = '15ns'
- # Assuming 64 byte cache lines, across a x32 DDR interface
- # translates to two bursts of BL8, 8 clocks @ 800 MHz
- tBURST = '10ns'
+ # 8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
+ # Note this is a BL8 DDR device.
+ # Requests larger than 32 bytes are broken down into multiple requests
+ # in the SimpleDRAM controller
+ tBURST = '5ns'
# LPDDR3, 4 Gb
tRFC = '130ns'
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
+ * Copyright (c) 2013 Amin Farmahini-Farahani
+ * All rights reserved.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
rowHitFlag(false), stopReads(false), actTicks(p->activation_limit, 0),
writeEvent(this), respondEvent(this),
refreshEvent(this), nextReqEvent(this), drainManager(NULL),
- bytesPerCacheLine(0),
- linesPerRowBuffer(p->lines_per_rowbuffer),
+ deviceBusWidth(p->device_bus_width), burstLength(p->burst_length),
+ deviceRowBufferSize(p->device_rowbuffer_size),
+ devicesPerRank(p->devices_per_rank),
+ burstSize((devicesPerRank * burstLength * deviceBusWidth) / 8),
+ rowBufferSize(devicesPerRank * deviceRowBufferSize),
ranksPerChannel(p->ranks_per_channel),
banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0),
readBufferSize(p->read_buffer_size),
port.sendRangeChange();
}
- // get the burst size from the connected port as it is currently
- // assumed to be equal to the cache line size
- bytesPerCacheLine = _system->cacheLineSize();
-
// we could deal with plenty options here, but for now do a quick
// sanity check
- if (bytesPerCacheLine != 64 && bytesPerCacheLine != 32)
- panic("Unexpected burst size %d", bytesPerCacheLine);
+ DPRINTF(DRAM, "Burst size %d bytes\n", burstSize);
// determine the rows per bank by looking at the total capacity
uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
AbstractMemory::size());
- rowsPerBank = capacity / (bytesPerCacheLine * linesPerRowBuffer *
- banksPerRank * ranksPerChannel);
+
+ columnsPerRowBuffer = rowBufferSize / burstSize;
+
+ DPRINTF(DRAM, "Row buffer size %d bytes with %d columns per row buffer\n",
+ rowBufferSize, columnsPerRowBuffer);
+
+ rowsPerBank = capacity / (rowBufferSize * banksPerRank * ranksPerChannel);
if (range.interleaved()) {
if (channels != range.stripes())
name(), range.stripes(), channels);
if (addrMapping == Enums::RaBaChCo) {
- if (bytesPerCacheLine * linesPerRowBuffer !=
- range.granularity()) {
+ if (rowBufferSize != range.granularity()) {
panic("Interleaving of %s doesn't match RaBaChCo address map\n",
name());
}
} else if (addrMapping == Enums::RaBaCoCh) {
- if (bytesPerCacheLine != range.granularity()) {
+ if (burstSize != range.granularity()) {
panic("Interleaving of %s doesn't match RaBaCoCh address map\n",
name());
}
} else if (addrMapping == Enums::CoRaBaCh) {
- if (bytesPerCacheLine != range.granularity())
+ if (burstSize != range.granularity())
panic("Interleaving of %s doesn't match CoRaBaCh address map\n",
name());
}
}
bool
-SimpleDRAM::readQueueFull() const
+SimpleDRAM::readQueueFull(unsigned int neededEntries) const
{
- DPRINTF(DRAM, "Read queue limit %d current size %d\n",
- readBufferSize, readQueue.size() + respQueue.size());
+ DPRINTF(DRAM, "Read queue limit %d, current size %d, entries needed %d\n",
+ readBufferSize, readQueue.size() + respQueue.size(),
+ neededEntries);
- return (readQueue.size() + respQueue.size()) == readBufferSize;
+ return
+ (readQueue.size() + respQueue.size() + neededEntries) > readBufferSize;
}
bool
-SimpleDRAM::writeQueueFull() const
+SimpleDRAM::writeQueueFull(unsigned int neededEntries) const
{
- DPRINTF(DRAM, "Write queue limit %d current size %d\n",
- writeBufferSize, writeQueue.size());
- return writeQueue.size() == writeBufferSize;
+ DPRINTF(DRAM, "Write queue limit %d, current size %d, entries needed %d\n",
+ writeBufferSize, writeQueue.size(), neededEntries);
+ return (writeQueue.size() + neededEntries) > writeBufferSize;
}
SimpleDRAM::DRAMPacket*
-SimpleDRAM::decodeAddr(PacketPtr pkt)
+SimpleDRAM::decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned size)
{
// decode the address based on the address mapping scheme, with
// Ra, Co, Ba and Ch denoting rank, column, bank and channel,
uint16_t bank;
uint16_t row;
- Addr addr = pkt->getAddr();
-
// truncate the address to the access granularity
- addr = addr / bytesPerCacheLine;
+ Addr addr = dramPktAddr / burstSize;
// we have removed the lowest order address bits that denote the
- // position within the cache line
+ // position within the column
if (addrMapping == Enums::RaBaChCo) {
// the lowest order bits denote the column to ensure that
// sequential cache lines occupy the same row
- addr = addr / linesPerRowBuffer;
+ addr = addr / columnsPerRowBuffer;
// take out the channel part of the address
addr = addr / channels;
addr = addr / channels;
// next, the column
- addr = addr / linesPerRowBuffer;
+ addr = addr / columnsPerRowBuffer;
// after the column bits, we get the bank bits to interleave
// over the banks
// next the column bits which we do not need to keep track of
// and simply skip past
- addr = addr / linesPerRowBuffer;
+ addr = addr / columnsPerRowBuffer;
// lastly, get the row bits
row = addr % rowsPerBank;
assert(row < rowsPerBank);
DPRINTF(DRAM, "Address: %lld Rank %d Bank %d Row %d\n",
- pkt->getAddr(), rank, bank, row);
+ dramPktAddr, rank, bank, row);
// create the corresponding DRAM packet with the entry time and
// ready time set to the current tick, the latter will be updated
// later
- return new DRAMPacket(pkt, rank, bank, row, pkt->getAddr(),
+ return new DRAMPacket(pkt, rank, bank, row, dramPktAddr, size,
banks[rank][bank]);
}
void
-SimpleDRAM::addToReadQueue(PacketPtr pkt)
+SimpleDRAM::addToReadQueue(PacketPtr pkt, unsigned int pktCount)
{
// only add to the read queue here. whenever the request is
// eventually done, set the readyTime, and call schedule()
assert(!pkt->isWrite());
- // First check write buffer to see if the data is already at
- // the controller
- list<DRAMPacket*>::const_iterator i;
- Addr addr = pkt->getAddr();
+ assert(pktCount != 0);
- // @todo: add size check
- for (i = writeQueue.begin(); i != writeQueue.end(); ++i) {
- if ((*i)->addr == addr){
- servicedByWrQ++;
- DPRINTF(DRAM, "Read to %lld serviced by write queue\n", addr);
- bytesRead += bytesPerCacheLine;
- bytesConsumedRd += pkt->getSize();
- accessAndRespond(pkt, frontendLatency);
- return;
+ // if the request size is larger than burst size, the pkt is split into
+ // multiple DRAM packets
+ // Note if the pkt starting address is not aligened to burst size, the
+ // address of first DRAM packet is kept unaliged. Subsequent DRAM packets
+ // are aligned to burst size boundaries. This is to ensure we accurately
+ // check read packets against packets in write queue.
+ Addr addr = pkt->getAddr();
+ unsigned pktsServicedByWrQ = 0;
+ BurstHelper* burst_helper = NULL;
+ for (int cnt = 0; cnt < pktCount; ++cnt) {
+ unsigned size = std::min((addr | (burstSize - 1)) + 1,
+ pkt->getAddr() + pkt->getSize()) - addr;
+ readPktSize[ceilLog2(size)]++;
+ readBursts++;
+
+ // First check write buffer to see if the data is already at
+ // the controller
+ bool foundInWrQ = false;
+ list<DRAMPacket*>::const_iterator i;
+ for (i = writeQueue.begin(); i != writeQueue.end(); ++i) {
+ if ((*i)->addr == addr && (*i)->size >= size){
+ foundInWrQ = true;
+ servicedByWrQ++;
+ pktsServicedByWrQ++;
+ DPRINTF(DRAM, "Read to addr %lld with size %d serviced by "
+ "write queue\n", addr, size);
+ bytesRead += burstSize;
+ bytesConsumedRd += size;
+ break;
+ }
}
- }
- DRAMPacket* dram_pkt = decodeAddr(pkt);
+ // If not found in the write q, make a DRAM packet and
+ // push it onto the read queue
+ if (!foundInWrQ) {
- assert(readQueue.size() + respQueue.size() < readBufferSize);
- rdQLenPdf[readQueue.size() + respQueue.size()]++;
+ // Make the burst helper for split packets
+ if (pktCount > 1 && burst_helper == NULL) {
+ DPRINTF(DRAM, "Read to addr %lld translates to %d "
+ "dram requests\n", pkt->getAddr(), pktCount);
+ burst_helper = new BurstHelper(pktCount);
+ }
- DPRINTF(DRAM, "Adding to read queue\n");
+ DRAMPacket* dram_pkt = decodeAddr(pkt, addr, size);
+ dram_pkt->burstHelper = burst_helper;
- readQueue.push_back(dram_pkt);
+ assert(!readQueueFull(1));
+ rdQLenPdf[readQueue.size() + respQueue.size()]++;
- // Update stats
- uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
- assert(bank_id < ranksPerChannel * banksPerRank);
- perBankRdReqs[bank_id]++;
+ DPRINTF(DRAM, "Adding to read queue\n");
- avgRdQLen = readQueue.size() + respQueue.size();
+ readQueue.push_back(dram_pkt);
+
+ // Update stats
+ uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
+ assert(bank_id < ranksPerChannel * banksPerRank);
+ perBankRdReqs[bank_id]++;
+
+ avgRdQLen = readQueue.size() + respQueue.size();
+ }
+
+ // Starting address of next dram pkt (aligend to burstSize boundary)
+ addr = (addr | (burstSize - 1)) + 1;
+ }
+
+ // If all packets are serviced by write queue, we send the repsonse back
+ if (pktsServicedByWrQ == pktCount) {
+ accessAndRespond(pkt, frontendLatency);
+ return;
+ }
+
+ // Update how many split packets are serviced by write queue
+ if (burst_helper != NULL)
+ burst_helper->burstsServiced = pktsServicedByWrQ;
// If we are not already scheduled to get the read request out of
// the queue, do so now
bank.openRow = dram_pkt->row;
bank.freeAt = schedTime + tBURST + std::max(accessLat, tCL);
busBusyUntil = bank.freeAt - tCL;
- bank.bytesAccessed += bytesPerCacheLine;
+ bank.bytesAccessed += burstSize;
if (!rowHitFlag) {
bank.tRASDoneAt = bank.freeAt + tRP;
"banks_id %d is %lld\n",
dram_pkt->rank * banksPerRank + dram_pkt->bank,
bank.freeAt);
- bytesPerActivate.sample(bytesPerCacheLine);
+ bytesPerActivate.sample(burstSize);
} else
panic("Unknown page management policy chosen\n");
}
void
-SimpleDRAM::addToWriteQueue(PacketPtr pkt)
+SimpleDRAM::addToWriteQueue(PacketPtr pkt, unsigned int pktCount)
{
// only add to the write queue here. whenever the request is
// eventually done, set the readyTime, and call schedule()
assert(pkt->isWrite());
- DRAMPacket* dram_pkt = decodeAddr(pkt);
+ // if the request size is larger than burst size, the pkt is split into
+ // multiple DRAM packets
+ Addr addr = pkt->getAddr();
+ for (int cnt = 0; cnt < pktCount; ++cnt) {
+ unsigned size = std::min((addr | (burstSize - 1)) + 1,
+ pkt->getAddr() + pkt->getSize()) - addr;
+ writePktSize[ceilLog2(size)]++;
+ writeBursts++;
- assert(writeQueue.size() < writeBufferSize);
- wrQLenPdf[writeQueue.size()]++;
+ DRAMPacket* dram_pkt = decodeAddr(pkt, addr, size);
- DPRINTF(DRAM, "Adding to write queue\n");
+ assert(writeQueue.size() < writeBufferSize);
+ wrQLenPdf[writeQueue.size()]++;
- writeQueue.push_back(dram_pkt);
+ DPRINTF(DRAM, "Adding to write queue\n");
- // Update stats
- uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
- assert(bank_id < ranksPerChannel * banksPerRank);
- perBankWrReqs[bank_id]++;
+ writeQueue.push_back(dram_pkt);
- avgWrQLen = writeQueue.size();
+ // Update stats
+ uint32_t bank_id = banksPerRank * dram_pkt->rank + dram_pkt->bank;
+ assert(bank_id < ranksPerChannel * banksPerRank);
+ perBankWrReqs[bank_id]++;
+
+ avgWrQLen = writeQueue.size();
+
+ bytesConsumedWr += dram_pkt->size;
+ bytesWritten += burstSize;
+
+ // Starting address of next dram pkt (aligend to burstSize boundary)
+ addr = (addr | (burstSize - 1)) + 1;
+ }
// we do not wait for the writes to be send to the actual memory,
// but instead take responsibility for the consistency here and
// snoop the write queue for any upcoming reads
-
- bytesConsumedWr += pkt->getSize();
- bytesWritten += bytesPerCacheLine;
+ // @todo, if a pkt size is larger than burst size, we might need a
+ // different front end latency
accessAndRespond(pkt, frontendLatency);
// If your write buffer is starting to fill up, drain it!
// Sanity check print of important parameters
DPRINTF(DRAM,
"Memory controller %s physical organization\n" \
- "Bytes per cacheline %d\n" \
- "Lines per row buffer %d\n" \
- "Rows per bank %d\n" \
- "Banks per rank %d\n" \
- "Ranks per channel %d\n" \
- "Total mem capacity %u\n",
- name(), bytesPerCacheLine, linesPerRowBuffer, rowsPerBank,
- banksPerRank, ranksPerChannel, bytesPerCacheLine *
- linesPerRowBuffer * rowsPerBank * banksPerRank * ranksPerChannel);
+ "Number of devices per rank %d\n" \
+ "Device bus width (in bits) %d\n" \
+ "DRAM data bus burst %d\n" \
+ "Row buffer size %d\n" \
+ "Columns per row buffer %d\n" \
+ "Rows per bank %d\n" \
+ "Banks per rank %d\n" \
+ "Ranks per channel %d\n" \
+ "Total mem capacity %u\n",
+ name(), devicesPerRank, deviceBusWidth, burstSize, rowBufferSize,
+ columnsPerRowBuffer, rowsPerBank, banksPerRank, ranksPerChannel,
+ rowBufferSize * rowsPerBank * banksPerRank * ranksPerChannel);
string scheduler = memSchedPolicy == Enums::fcfs ? "FCFS" : "FR-FCFS";
string address_mapping = addrMapping == Enums::RaBaChCo ? "RaBaChCo" :
// This is where we enter from the outside world
DPRINTF(DRAM, "recvTimingReq: request %s addr %lld size %d\n",
- pkt->cmdString(),pkt->getAddr(), pkt->getSize());
+ pkt->cmdString(), pkt->getAddr(), pkt->getSize());
// simply drop inhibited packets for now
if (pkt->memInhibitAsserted()) {
return true;
}
- if (pkt->getSize() == bytesPerCacheLine)
- cpuReqs++;
-
// Every million accesses, print the state of the queues
if (numReqs % 1000000 == 0)
printQs();
}
prevArrival = curTick();
+
+ // Find out how many dram packets a pkt translates to
+ // If the burst size is equal or larger than the pkt size, then a pkt
+ // translates to only one dram packet. Otherwise, a pkt translates to
+ // multiple dram packets
unsigned size = pkt->getSize();
- if (size > bytesPerCacheLine)
- panic("Request size %d is greater than burst size %d",
- size, bytesPerCacheLine);
+ unsigned offset = pkt->getAddr() & (burstSize - 1);
+ unsigned int dram_pkt_count = divCeil(offset + size, burstSize);
// check local buffers and do not accept if full
if (pkt->isRead()) {
assert(size != 0);
- if (readQueueFull()) {
+ if (readQueueFull(dram_pkt_count)) {
DPRINTF(DRAM, "Read queue full, not accepting\n");
// remember that we have to retry this port
retryRdReq = true;
numRdRetry++;
return false;
} else {
- readPktSize[ceilLog2(size)]++;
- addToReadQueue(pkt);
+ addToReadQueue(pkt, dram_pkt_count);
readReqs++;
numReqs++;
}
} else if (pkt->isWrite()) {
assert(size != 0);
- if (writeQueueFull()) {
+ if (writeQueueFull(dram_pkt_count)) {
DPRINTF(DRAM, "Write queue full, not accepting\n");
// remember that we have to retry this port
retryWrReq = true;
numWrRetry++;
return false;
} else {
- writePktSize[ceilLog2(size)]++;
- addToWriteQueue(pkt);
+ addToWriteQueue(pkt, dram_pkt_count);
writeReqs++;
numReqs++;
}
DPRINTF(DRAM,
"processRespondEvent(): Some req has reached its readyTime\n");
- PacketPtr pkt = respQueue.front()->pkt;
-
- // Actually responds to the requestor
- bytesConsumedRd += pkt->getSize();
- bytesRead += bytesPerCacheLine;
- accessAndRespond(pkt, frontendLatency + backendLatency);
-
- delete respQueue.front();
- respQueue.pop_front();
-
- // Update stats
- avgRdQLen = readQueue.size() + respQueue.size();
-
- if (!respQueue.empty()) {
- assert(respQueue.front()->readyTime >= curTick());
- assert(!respondEvent.scheduled());
- schedule(respondEvent, respQueue.front()->readyTime);
- } else {
- // if there is nothing left in any queue, signal a drain
- if (writeQueue.empty() && readQueue.empty() &&
- drainManager) {
- drainManager->signalDrainDone();
- drainManager = NULL;
- }
- }
-
- // We have made a location in the queue available at this point,
- // so if there is a read that was forced to wait, retry now
- if (retryRdReq) {
- retryRdReq = false;
- port.sendRetry();
- }
+ DRAMPacket* dram_pkt = respQueue.front();
+
+ // Actually responds to the requestor
+ bytesConsumedRd += dram_pkt->size;
+ bytesRead += burstSize;
+ if (dram_pkt->burstHelper) {
+ // it is a split packet
+ dram_pkt->burstHelper->burstsServiced++;
+ if (dram_pkt->burstHelper->burstsServiced ==
+ dram_pkt->burstHelper->burstCount) {
+ // we have now serviced all children packets of a system packet
+ // so we can now respond to the requester
+ // @todo we probably want to have a different front end and back
+ // end latency for split packets
+ accessAndRespond(dram_pkt->pkt, frontendLatency + backendLatency);
+ delete dram_pkt->burstHelper;
+ dram_pkt->burstHelper = NULL;
+ }
+ } else {
+ // it is not a split packet
+ accessAndRespond(dram_pkt->pkt, frontendLatency + backendLatency);
+ }
+
+ delete respQueue.front();
+ respQueue.pop_front();
+
+ // Update stats
+ avgRdQLen = readQueue.size() + respQueue.size();
+
+ if (!respQueue.empty()) {
+ assert(respQueue.front()->readyTime >= curTick());
+ assert(!respondEvent.scheduled());
+ schedule(respondEvent, respQueue.front()->readyTime);
+ } else {
+ // if there is nothing left in any queue, signal a drain
+ if (writeQueue.empty() && readQueue.empty() &&
+ drainManager) {
+ drainManager->signalDrainDone();
+ drainManager = NULL;
+ }
+ }
+
+ // We have made a location in the queue available at this point,
+ // so if there is a read that was forced to wait, retry now
+ if (retryRdReq) {
+ retryRdReq = false;
+ port.sendRetry();
+ }
}
void
if (pageMgmt == Enums::open) {
bank.openRow = dram_pkt->row;
bank.freeAt = curTick() + addDelay + accessLat;
- bank.bytesAccessed += bytesPerCacheLine;
+ bank.bytesAccessed += burstSize;
// If you activated a new row do to this access, the next access
// will have to respect tRAS for this bank. Assume tRAS ~= 3 * tRP.
bank.freeAt = curTick() + addDelay + accessLat + tRP + tRP;
recordActivate(bank.freeAt - tRP - tRP - tCL - tRCD); //essentially (freeAt - tRC)
DPRINTF(DRAM,"doDRAMAccess::bank.freeAt is %lld\n",bank.freeAt);
- bytesPerActivate.sample(bytesPerCacheLine);
+ bytesPerActivate.sample(burstSize);
} else
panic("No page management policy chosen\n");
readReqs
.name(name() + ".readReqs")
- .desc("Total number of read requests seen");
+ .desc("Total number of read requests accepted by DRAM controller");
writeReqs
.name(name() + ".writeReqs")
- .desc("Total number of write requests seen");
+ .desc("Total number of write requests accepted by DRAM controller");
+
+ readBursts
+ .name(name() + ".readBursts")
+ .desc("Total number of DRAM read bursts. "
+ "Each DRAM read request translates to either one or multiple "
+ "DRAM read bursts");
+
+ writeBursts
+ .name(name() + ".writeBursts")
+ .desc("Total number of DRAM write bursts. "
+ "Each DRAM write request translates to either one or multiple "
+ "DRAM write bursts");
servicedByWrQ
.name(name() + ".servicedByWrQ")
- .desc("Number of read reqs serviced by write Q");
-
- cpuReqs
- .name(name() + ".cpureqs")
- .desc("Reqs generatd by CPU via cache - shady");
+ .desc("Number of DRAM read bursts serviced by write Q");
neitherReadNorWrite
.name(name() + ".neitherReadNorWrite")
.desc("Average queueing delay per request")
.precision(2);
- avgQLat = totQLat / (readReqs - servicedByWrQ);
+ avgQLat = totQLat / (readBursts - servicedByWrQ);
avgBankLat
.name(name() + ".avgBankLat")
.desc("Average bank access latency per request")
.precision(2);
- avgBankLat = totBankLat / (readReqs - servicedByWrQ);
+ avgBankLat = totBankLat / (readBursts - servicedByWrQ);
avgBusLat
.name(name() + ".avgBusLat")
.desc("Average bus latency per request")
.precision(2);
- avgBusLat = totBusLat / (readReqs - servicedByWrQ);
+ avgBusLat = totBusLat / (readBursts - servicedByWrQ);
avgMemAccLat
.name(name() + ".avgMemAccLat")
.desc("Average memory access latency")
.precision(2);
- avgMemAccLat = totMemAccLat / (readReqs - servicedByWrQ);
+ avgMemAccLat = totMemAccLat / (readBursts - servicedByWrQ);
numRdRetry
.name(name() + ".numRdRetry")
.desc("Row buffer hit rate for reads")
.precision(2);
- readRowHitRate = (readRowHits / (readReqs - servicedByWrQ)) * 100;
+ readRowHitRate = (readRowHits / (readBursts - servicedByWrQ)) * 100;
writeRowHitRate
.name(name() + ".writeRowHitRate")
.desc("Row buffer hit rate for writes")
.precision(2);
- writeRowHitRate = (writeRowHits / writeReqs) * 100;
+ writeRowHitRate = (writeRowHits / writeBursts) * 100;
readPktSize
- .init(ceilLog2(bytesPerCacheLine) + 1)
+ .init(ceilLog2(burstSize) + 1)
.name(name() + ".readPktSize")
.desc("Categorize read packet sizes");
writePktSize
- .init(ceilLog2(bytesPerCacheLine) + 1)
+ .init(ceilLog2(burstSize) + 1)
.name(name() + ".writePktSize")
.desc("Categorize write packet sizes");
.desc("What write queue length does an incoming req see");
bytesPerActivate
- .init(bytesPerCacheLine * linesPerRowBuffer)
+ .init(rowBufferSize)
.name(name() + ".bytesPerActivate")
.desc("Bytes accessed per row activation")
.flags(nozero);
.desc("Theoretical peak bandwidth in MB/s")
.precision(2);
- peakBW = (SimClock::Frequency / tBURST) * bytesPerCacheLine / 1000000;
+ peakBW = (SimClock::Frequency / tBURST) * burstSize / 1000000;
busUtil
.name(name() + ".busUtil")
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
+ * Copyright (c) 2013 Amin Farmahini-Farahani
+ * All rights reserved.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
{ }
};
+ /**
+ * A burst helper helps organize and manage a packet that is larger than
+ * the DRAM burst size. A system packet that is larger than the burst size
+ * is split into multiple DRAM packets and all those DRAM packets point to
+ * a single burst helper such that we know when the whole packet is served.
+ */
+ class BurstHelper {
+
+ public:
+
+ /** Number of DRAM bursts requred for a system packet **/
+ const unsigned int burstCount;
+
+ /** Number of DRAM bursts serviced so far for a system packet **/
+ unsigned int burstsServiced;
+
+ BurstHelper(unsigned int _burstCount)
+ : burstCount(_burstCount), burstsServiced(0)
+ { }
+ };
+
/**
* A DRAM packet stores packets along with the timestamp of when
* the packet entered the queue, and also the decoded address.
const uint8_t rank;
const uint16_t bank;
const uint16_t row;
+
+ /**
+ * The starting address of the DRAM packet.
+ * This address could be unaligned to burst size boundaries. The
+ * reason is to keep the address offset so we can accurately check
+ * incoming read packets with packets in the write queue.
+ */
const Addr addr;
+
+ /**
+ * The size of this dram packet in bytes
+ * It is always equal or smaller than DRAM burst size
+ */
+ const unsigned int size;
+
+ /**
+ * A pointer to the BurstHelper if this DRAMPacket is a split packet
+ * If not a split packet (common case), this is set to NULL
+ */
+ BurstHelper* burstHelper;
Bank& bank_ref;
- DRAMPacket(PacketPtr _pkt, uint8_t _rank,
- uint16_t _bank, uint16_t _row, Addr _addr, Bank& _bank_ref)
+ DRAMPacket(PacketPtr _pkt, uint8_t _rank, uint16_t _bank,
+ uint16_t _row, Addr _addr, unsigned int _size,
+ Bank& _bank_ref)
: entryTime(curTick()), readyTime(curTick()),
pkt(_pkt), rank(_rank), bank(_bank), row(_row), addr(_addr),
- bank_ref(_bank_ref)
+ size(_size), burstHelper(NULL), bank_ref(_bank_ref)
{ }
};
/**
* Check if the read queue has room for more entries
*
+ * @param pktCount The number of entries needed in the read queue
* @return true if read queue is full, false otherwise
*/
- bool readQueueFull() const;
+ bool readQueueFull(unsigned int pktCount) const;
/**
* Check if the write queue has room for more entries
*
+ * @param pktCount The number of entries needed in the write queue
* @return true if write queue is full, false otherwise
*/
- bool writeQueueFull() const;
+ bool writeQueueFull(unsigned int pktCount) const;
/**
* When a new read comes in, first check if the write q has a
* pending request to the same address.\ If not, decode the
- * address to populate rank/bank/row, create a "dram_pkt", and
- * push it to the back of the read queue.\ If this is the only
+ * address to populate rank/bank/row, create one or mutliple
+ * "dram_pkt", and push them to the back of the read queue.\
+ * If this is the only
* read request in the system, schedule an event to start
* servicing it.
*
* @param pkt The request packet from the outside world
+ * @param pktCount The number of DRAM bursts the pkt
+ * translate to. If pkt size is larger then one full burst,
+ * then pktCount is greater than one.
*/
- void addToReadQueue(PacketPtr pkt);
+ void addToReadQueue(PacketPtr pkt, unsigned int pktCount);
/**
* Decode the incoming pkt, create a dram_pkt and push to the
* to get full, stop reads, and start draining writes.
*
* @param pkt The request packet from the outside world
+ * @param pktCount The number of DRAM bursts the pkt
+ * translate to. If pkt size is larger then one full burst,
+ * then pktCount is greater than one.
*/
- void addToWriteQueue(PacketPtr pkt);
+ void addToWriteQueue(PacketPtr pkt, unsigned int pktCount);
/**
* Actually do the DRAM access - figure out the latency it
/**
* Address decoder to figure out physical mapping onto ranks,
- * banks, and rows.
+ * banks, and rows. This function is called multiple times on the same
+ * system packet if the pakcet is larger than burst of the memory. The
+ * dramPktAddr is used for the offset within the packet.
*
* @param pkt The packet from the outside world
+ * @param dramPktAddr The starting address of the DRAM packet
+ * @param size The size of the DRAM packet in bytes
* @return A DRAMPacket pointer with the decoded information
*/
- DRAMPacket* decodeAddr(PacketPtr pkt);
+ DRAMPacket* decodeAddr(PacketPtr pkt, Addr dramPktAddr, unsigned int size);
/**
* The memory schduler/arbiter - picks which read request needs to
/**
* The following are basic design parameters of the memory
- * controller, and are initialized based on parameter values. The
- * bytesPerCacheLine is based on the neighbouring ports cache line
- * size and thus determined outside the constructor. Similarly,
- * the rowsPerBank is determined based on the capacity, number of
- * ranks and banks, the cache line size, and the row buffer size.
- */
- uint32_t bytesPerCacheLine;
- const uint32_t linesPerRowBuffer;
+ * controller, and are initialized based on parameter values.
+ * The rowsPerBank is determined based on the capacity, number of
+ * ranks and banks, the burst size, and the row buffer size.
+ */
+ const uint32_t deviceBusWidth;
+ const uint32_t burstLength;
+ const uint32_t deviceRowBufferSize;
+ const uint32_t devicesPerRank;
+ const uint32_t burstSize;
+ const uint32_t rowBufferSize;
const uint32_t ranksPerChannel;
const uint32_t banksPerRank;
const uint32_t channels;
uint32_t rowsPerBank;
+ uint32_t columnsPerRowBuffer;
const uint32_t readBufferSize;
const uint32_t writeBufferSize;
const double writeThresholdPerc;
// All statistics that the model needs to capture
Stats::Scalar readReqs;
Stats::Scalar writeReqs;
- Stats::Scalar cpuReqs;
+ Stats::Scalar readBursts;
+ Stats::Scalar writeBursts;
Stats::Scalar bytesRead;
Stats::Scalar bytesWritten;
Stats::Scalar bytesConsumedRd;