if opt_mem_type == "HMC_2500_1x32":
# The static latency of the vault controllers is estimated
# to be smaller than a full DRAM channel controller
- mem_ctrl = m5.objects.DRAMCtrl(min_writes_per_switch = 8,
+ mem_ctrl = m5.objects.MemCtrl(min_writes_per_switch = 8,
static_backend_latency = '4ns',
static_frontend_latency = '4ns')
else:
- mem_ctrl = m5.objects.DRAMCtrl()
+ mem_ctrl = m5.objects.MemCtrl()
# Hookup the controller to the interface and add to the list
mem_ctrl.dram = dram_intf
# Create a controller if not sharing a channel with DRAM
# in which case the controller has already been created
if not opt_hybrid_channel:
- mem_ctrl = m5.objects.DRAMCtrl()
+ mem_ctrl = m5.objects.MemCtrl()
mem_ctrl.nvm = nvm_intf
mem_ctrls.append(mem_ctrl)
default="512MB",
help="Specify the physical memory size (single memory)")
parser.add_option("--enable-dram-powerdown", action="store_true",
- help="Enable low-power states in DRAMCtrl")
+ help="Enable low-power states in DRAMInterface")
parser.add_option("--mem-channels-intlv", type="int", default=0,
help="Memory channels interleave")
# the following assumes that we are using the native DRAM
# controller, check to be sure
- if isinstance(ctrl, m5.objects.DRAMCtrl):
+ if isinstance(ctrl, m5.objects.MemCtrl):
# make the DRAM refresh interval sufficiently infinite to avoid
# latency spikes
ctrl.tREFI = '100s'
MemConfig.config_mem(args, system)
# Sanity check for memory controller class.
-if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl):
- fatal("This script assumes the controller is a DRAMCtrl subclass")
+if not isinstance(system.mem_ctrls[0], m5.objects.MemCtrl):
+ fatal("This script assumes the controller is a MemCtrl subclass")
if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface):
fatal("This script assumes the memory is a DRAMInterface subclass")
# the following assumes that we are using the native DRAM
# controller, check to be sure
-if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl):
- fatal("This script assumes the controller is a DRAMCtrl subclass")
+if not isinstance(system.mem_ctrls[0], m5.objects.MemCtrl):
+ fatal("This script assumes the controller is a MemCtrl subclass")
if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface):
fatal("This script assumes the memory is a DRAMInterface subclass")
proto_tester = TrafficGen(config_file = cfg_file_path)
# Set up the system along with a DRAM controller
-system = System(physmem = DRAMCtrl(dram = DDR3_1600_8x8()))
+system = System(physmem = MemCtrl(dram = DDR3_1600_8x8()))
system.voltage_domain = VoltageDomain(voltage = '1V')
system.cpu.interrupts[0].int_slave = system.membus.master
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl = MemCtrl()
system.mem_ctrl.dram = DDR3_1600_8x8()
system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
system.system_port = system.membus.slave
# Create a DDR3 memory controller
-system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl = MemCtrl()
system.mem_ctrl.dram = DDR3_1600_8x8()
system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
system.cpu.interrupts[0].int_slave = system.membus.master
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl = MemCtrl()
system.mem_ctrl.dram = DDR3_1600_8x8()
system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
system.cpu.interrupts[0].int_slave = system.membus.master
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl = MemCtrl()
system.mem_ctrl.dram = DDR3_1600_8x8()
system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
system.cpu = [TimingSimpleCPU() for i in range(2)]
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl = MemCtrl()
system.mem_ctrl.dram = DDR3_1600_8x8()
system.mem_ctrl.dram.range = system.mem_ranges[0]
# the following assumes that we are using the native memory
# controller with an NVM interface, check to be sure
-if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl):
- fatal("This script assumes the controller is a DRAMCtrl subclass")
+if not isinstance(system.mem_ctrls[0], m5.objects.MemCtrl):
+ fatal("This script assumes the controller is a MemCtrl subclass")
if not isinstance(system.mem_ctrls[0].nvm, m5.objects.NVMInterface):
fatal("This script assumes the memory is a NVMInterface class")
# the following assumes that we are using the native controller
# with NVM and DRAM interfaces, check to be sure
-if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl):
- fatal("This script assumes the controller is a DRAMCtrl subclass")
+if not isinstance(system.mem_ctrls[0], m5.objects.MemCtrl):
+ fatal("This script assumes the controller is a MemCtrl subclass")
if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface):
fatal("This script assumes the first memory is a DRAMInterface subclass")
if not isinstance(system.mem_ctrls[0].nvm, m5.objects.NVMInterface):
dram_intf = MemConfig.create_mem_intf(mem_type, r, index,
options.num_dirs, int(math.log(options.num_dirs, 2)),
intlv_size, options.xor_low_bit)
- mem_ctrl = m5.objects.DRAMCtrl(dram = dram_intf)
+ mem_ctrl = m5.objects.MemCtrl(dram = dram_intf)
if options.access_backing_store:
mem_ctrl.kvm_map=False
+++ /dev/null
-# Copyright (c) 2012-2020 ARM Limited
-# All rights reserved.
-#
-# The license below extends only to copyright in the software and shall
-# not be construed as granting a license to any other intellectual
-# property including but not limited to intellectual property relating
-# to a hardware implementation of the functionality of the software
-# licensed hereunder. You may use the software subject to the license
-# terms below provided that you ensure that this notice is replicated
-# unmodified and in its entirety in all distributions of the software,
-# modified or unmodified, in source code or in binary form.
-#
-# Copyright (c) 2013 Amin Farmahini-Farahani
-# Copyright (c) 2015 University of Kaiserslautern
-# Copyright (c) 2015 The University of Bologna
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met: redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer;
-# redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution;
-# neither the name of the copyright holders nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from m5.params import *
-from m5.proxy import *
-from m5.objects.QoSMemCtrl import *
-
-# Enum for memory scheduling algorithms, currently First-Come
-# First-Served and a First-Row Hit then First-Come First-Served
-class MemSched(Enum): vals = ['fcfs', 'frfcfs']
-
-# DRAMCtrl is a single-channel single-ported DRAM controller model
-# that aims to model the most important system-level performance
-# effects of a DRAM without getting into too much detail of the DRAM
-# itself.
-class DRAMCtrl(QoSMemCtrl):
- type = 'DRAMCtrl'
- cxx_header = "mem/dram_ctrl.hh"
-
- # single-ported on the system interface side, instantiate with a
- # bus in front of the controller for multiple ports
- port = SlavePort("Slave port")
-
- # Interface to volatile, DRAM media
- dram = Param.DRAMInterface(NULL, "DRAM interface")
-
- # Interface to non-volatile media
- nvm = Param.NVMInterface(NULL, "NVM interface")
-
- # read and write buffer depths are set in the interface
- # the controller will read these values when instantiated
-
- # threshold in percent for when to forcefully trigger writes and
- # start emptying the write buffer
- write_high_thresh_perc = Param.Percent(85, "Threshold to force writes")
-
- # threshold in percentage for when to start writes if the read
- # queue is empty
- write_low_thresh_perc = Param.Percent(50, "Threshold to start writes")
-
- # minimum write bursts to schedule before switching back to reads
- min_writes_per_switch = Param.Unsigned(16, "Minimum write bursts before "
- "switching to reads")
-
- # scheduler, address map and page policy
- mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy")
-
- # pipeline latency of the controller and PHY, split into a
- # frontend part and a backend part, with reads and writes serviced
- # by the queues only seeing the frontend contribution, and reads
- # serviced by the memory seeing the sum of the two
- static_frontend_latency = Param.Latency("10ns", "Static frontend latency")
- static_backend_latency = Param.Latency("10ns", "Static backend latency")
-
- command_window = Param.Latency("10ns", "Static backend latency")
class DRAMInterface(MemInterface):
type = 'DRAMInterface'
- cxx_header = "mem/dram_ctrl.hh"
+ cxx_header = "mem/mem_interface.hh"
# scheduler page policy
page_policy = Param.PageManage('open_adaptive', "Page management policy")
--- /dev/null
+# Copyright (c) 2012-2020 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Copyright (c) 2013 Amin Farmahini-Farahani
+# Copyright (c) 2015 University of Kaiserslautern
+# Copyright (c) 2015 The University of Bologna
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.params import *
+from m5.proxy import *
+from m5.objects.QoSMemCtrl import *
+
+# Enum for memory scheduling algorithms, currently First-Come
+# First-Served and a First-Row Hit then First-Come First-Served
+class MemSched(Enum): vals = ['fcfs', 'frfcfs']
+
+# MemCtrl is a single-channel single-ported Memory controller model
+# that aims to model the most important system-level performance
+# effects of a memory controller, interfacing with media specific
+# interfaces
+class MemCtrl(QoSMemCtrl):
+ type = 'MemCtrl'
+ cxx_header = "mem/mem_ctrl.hh"
+
+ # single-ported on the system interface side, instantiate with a
+ # bus in front of the controller for multiple ports
+ port = SlavePort("Slave port")
+
+ # Interface to volatile, DRAM media
+ dram = Param.DRAMInterface(NULL, "DRAM interface")
+
+ # Interface to non-volatile media
+ nvm = Param.NVMInterface(NULL, "NVM interface")
+
+ # read and write buffer depths are set in the interface
+ # the controller will read these values when instantiated
+
+ # threshold in percent for when to forcefully trigger writes and
+ # start emptying the write buffer
+ write_high_thresh_perc = Param.Percent(85, "Threshold to force writes")
+
+ # threshold in percentage for when to start writes if the read
+ # queue is empty
+ write_low_thresh_perc = Param.Percent(50, "Threshold to start writes")
+
+ # minimum write bursts to schedule before switching back to reads
+ min_writes_per_switch = Param.Unsigned(16, "Minimum write bursts before "
+ "switching to reads")
+
+ # scheduler, address map and page policy
+ mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy")
+
+ # pipeline latency of the controller and PHY, split into a
+ # frontend part and a backend part, with reads and writes serviced
+ # by the queues only seeing the frontend contribution, and reads
+ # serviced by the memory seeing the sum of the two
+ static_frontend_latency = Param.Latency("10ns", "Static frontend latency")
+ static_backend_latency = Param.Latency("10ns", "Static backend latency")
+
+ command_window = Param.Latency("10ns", "Static backend latency")
class MemInterface(AbstractMemory):
type = 'MemInterface'
abstract = True
- cxx_header = "mem/dram_ctrl.hh"
+ cxx_header = "mem/mem_interface.hh"
# Allow the interface to set required controller buffer sizes
# each entry corresponds to a burst for the specific memory channel
# are modeled without getting into too much detail of the media itself.
class NVMInterface(MemInterface):
type = 'NVMInterface'
- cxx_header = "mem/dram_ctrl.hh"
+ cxx_header = "mem/mem_interface.hh"
# NVM DIMM could have write buffer to offload writes
# define buffer depth, which will limit the number of pending writes
SimObject('AbstractMemory.py')
SimObject('AddrMapper.py')
SimObject('Bridge.py')
-SimObject('DRAMCtrl.py')
+SimObject('MemCtrl.py')
SimObject('MemInterface.py')
SimObject('DRAMInterface.py')
SimObject('NVMInterface.py')
Source('bridge.cc')
Source('coherent_xbar.cc')
Source('drampower.cc')
-Source('dram_ctrl.cc')
Source('external_master.cc')
Source('external_slave.cc')
+Source('mem_ctrl.cc')
+Source('mem_interface.cc')
Source('noncoherent_xbar.cc')
Source('packet.cc')
Source('port.cc')
DebugFlag('ExternalPort')
DebugFlag('HtmMem', 'Hardware Transactional Memory (Mem side)')
DebugFlag('LLSC')
+DebugFlag('MemCtrl')
DebugFlag('MMU')
DebugFlag('MemoryAccess')
DebugFlag('PacketQueue')
+++ /dev/null
-/*
- * Copyright (c) 2010-2020 ARM Limited
- * All rights reserved
- *
- * The license below extends only to copyright in the software and shall
- * not be construed as granting a license to any other intellectual
- * property including but not limited to intellectual property relating
- * to a hardware implementation of the functionality of the software
- * licensed hereunder. You may use the software subject to the license
- * terms below provided that you ensure that this notice is replicated
- * unmodified and in its entirety in all distributions of the software,
- * modified or unmodified, in source code or in binary form.
- *
- * Copyright (c) 2013 Amin Farmahini-Farahani
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "mem/dram_ctrl.hh"
-
-#include "base/bitfield.hh"
-#include "base/trace.hh"
-#include "debug/DRAM.hh"
-#include "debug/DRAMPower.hh"
-#include "debug/DRAMState.hh"
-#include "debug/Drain.hh"
-#include "debug/NVM.hh"
-#include "debug/QOS.hh"
-#include "params/DRAMInterface.hh"
-#include "params/NVMInterface.hh"
-#include "sim/system.hh"
-
-using namespace std;
-using namespace Data;
-
-DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
- QoS::MemCtrl(p),
- port(name() + ".port", *this), isTimingMode(false),
- retryRdReq(false), retryWrReq(false),
- nextReqEvent([this]{ processNextReqEvent(); }, name()),
- respondEvent([this]{ processRespondEvent(); }, name()),
- dram(p->dram), nvm(p->nvm),
- readBufferSize((dram ? dram->readBufferSize : 0) +
- (nvm ? nvm->readBufferSize : 0)),
- writeBufferSize((dram ? dram->writeBufferSize : 0) +
- (nvm ? nvm->writeBufferSize : 0)),
- writeHighThreshold(writeBufferSize * p->write_high_thresh_perc / 100.0),
- writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0),
- minWritesPerSwitch(p->min_writes_per_switch),
- writesThisTime(0), readsThisTime(0),
- memSchedPolicy(p->mem_sched_policy),
- frontendLatency(p->static_frontend_latency),
- backendLatency(p->static_backend_latency),
- commandWindow(p->command_window),
- nextBurstAt(0), prevArrival(0),
- nextReqTime(0),
- stats(*this)
-{
- DPRINTF(DRAM, "Setting up controller\n");
- readQueue.resize(p->qos_priorities);
- writeQueue.resize(p->qos_priorities);
-
- // Hook up interfaces to the controller
- if (dram)
- dram->setCtrl(this, commandWindow);
- if (nvm)
- nvm->setCtrl(this, commandWindow);
-
- fatal_if(!dram && !nvm, "Memory controller must have an interface");
-
- // perform a basic check of the write thresholds
- if (p->write_low_thresh_perc >= p->write_high_thresh_perc)
- fatal("Write buffer low threshold %d must be smaller than the "
- "high threshold %d\n", p->write_low_thresh_perc,
- p->write_high_thresh_perc);
-}
-
-void
-DRAMCtrl::init()
-{
- if (!port.isConnected()) {
- fatal("DRAMCtrl %s is unconnected!\n", name());
- } else {
- port.sendRangeChange();
- }
-}
-
-void
-DRAMCtrl::startup()
-{
- // remember the memory system mode of operation
- isTimingMode = system()->isTimingMode();
-
- if (isTimingMode) {
- // shift the bus busy time sufficiently far ahead that we never
- // have to worry about negative values when computing the time for
- // the next request, this will add an insignificant bubble at the
- // start of simulation
- nextBurstAt = curTick() + (dram ? dram->commandOffset() :
- nvm->commandOffset());
- }
-}
-
-Tick
-DRAMCtrl::recvAtomic(PacketPtr pkt)
-{
- DPRINTF(DRAM, "recvAtomic: %s 0x%x\n", pkt->cmdString(), pkt->getAddr());
-
- panic_if(pkt->cacheResponding(), "Should not see packets where cache "
- "is responding");
-
- Tick latency = 0;
- // do the actual memory access and turn the packet into a response
- if (dram && dram->getAddrRange().contains(pkt->getAddr())) {
- dram->access(pkt);
-
- if (pkt->hasData()) {
- // this value is not supposed to be accurate, just enough to
- // keep things going, mimic a closed page
- latency = dram->accessLatency();
- }
- } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) {
- nvm->access(pkt);
-
- if (pkt->hasData()) {
- // this value is not supposed to be accurate, just enough to
- // keep things going, mimic a closed page
- latency = nvm->accessLatency();
- }
- } else {
- panic("Can't handle address range for packet %s\n",
- pkt->print());
- }
-
- return latency;
-}
-
-bool
-DRAMCtrl::readQueueFull(unsigned int neededEntries) const
-{
- DPRINTF(DRAM, "Read queue limit %d, current size %d, entries needed %d\n",
- readBufferSize, totalReadQueueSize + respQueue.size(),
- neededEntries);
-
- auto rdsize_new = totalReadQueueSize + respQueue.size() + neededEntries;
- return rdsize_new > readBufferSize;
-}
-
-bool
-DRAMCtrl::writeQueueFull(unsigned int neededEntries) const
-{
- DPRINTF(DRAM, "Write queue limit %d, current size %d, entries needed %d\n",
- writeBufferSize, totalWriteQueueSize, neededEntries);
-
- auto wrsize_new = (totalWriteQueueSize + neededEntries);
- return wrsize_new > writeBufferSize;
-}
-
-DRAMPacket*
-MemInterface::decodePacket(const PacketPtr pkt, Addr pkt_addr,
- unsigned size, bool is_read, bool is_dram)
-{
- // decode the address based on the address mapping scheme, with
- // Ro, Ra, Co, Ba and Ch denoting row, rank, column, bank and
- // channel, respectively
- uint8_t rank;
- uint8_t bank;
- // use a 64-bit unsigned during the computations as the row is
- // always the top bits, and check before creating the packet
- uint64_t row;
-
- // Get packed address, starting at 0
- Addr addr = getCtrlAddr(pkt_addr);
-
- // truncate the address to a memory burst, which makes it unique to
- // a specific buffer, row, bank, rank and channel
- addr = addr / burstSize;
-
- // we have removed the lowest order address bits that denote the
- // position within the column
- if (addrMapping == Enums::RoRaBaChCo || addrMapping == Enums::RoRaBaCoCh) {
- // the lowest order bits denote the column to ensure that
- // sequential cache lines occupy the same row
- addr = addr / burstsPerRowBuffer;
-
- // after the channel bits, get the bank bits to interleave
- // over the banks
- bank = addr % banksPerRank;
- addr = addr / banksPerRank;
-
- // after the bank, we get the rank bits which thus interleaves
- // over the ranks
- rank = addr % ranksPerChannel;
- addr = addr / ranksPerChannel;
-
- // lastly, get the row bits, no need to remove them from addr
- row = addr % rowsPerBank;
- } else if (addrMapping == Enums::RoCoRaBaCh) {
- // with emerging technologies, could have small page size with
- // interleaving granularity greater than row buffer
- if (burstsPerStripe > burstsPerRowBuffer) {
- // remove column bits which are a subset of burstsPerStripe
- addr = addr / burstsPerRowBuffer;
- } else {
- // remove lower column bits below channel bits
- addr = addr / burstsPerStripe;
- }
-
- // start with the bank bits, as this provides the maximum
- // opportunity for parallelism between requests
- bank = addr % banksPerRank;
- addr = addr / banksPerRank;
-
- // next get the rank bits
- rank = addr % ranksPerChannel;
- addr = addr / ranksPerChannel;
-
- // next, the higher-order column bites
- if (burstsPerStripe < burstsPerRowBuffer) {
- addr = addr / (burstsPerRowBuffer / burstsPerStripe);
- }
-
- // lastly, get the row bits, no need to remove them from addr
- row = addr % rowsPerBank;
- } else
- panic("Unknown address mapping policy chosen!");
-
- assert(rank < ranksPerChannel);
- assert(bank < banksPerRank);
- assert(row < rowsPerBank);
- assert(row < Bank::NO_ROW);
-
- DPRINTF(DRAM, "Address: %lld Rank %d Bank %d Row %d\n",
- pkt_addr, rank, bank, row);
-
- // create the corresponding memory packet with the entry time and
- // ready time set to the current tick, the latter will be updated
- // later
- uint16_t bank_id = banksPerRank * rank + bank;
-
- return new DRAMPacket(pkt, is_read, is_dram, rank, bank, row, bank_id,
- pkt_addr, size);
-}
-
-void
-DRAMCtrl::addToReadQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram)
-{
- // only add to the read queue here. whenever the request is
- // eventually done, set the readyTime, and call schedule()
- assert(!pkt->isWrite());
-
- assert(pkt_count != 0);
-
- // if the request size is larger than burst size, the pkt is split into
- // multiple packets
- // Note if the pkt starting address is not aligened to burst size, the
- // address of first packet is kept unaliged. Subsequent packets
- // are aligned to burst size boundaries. This is to ensure we accurately
- // check read packets against packets in write queue.
- const Addr base_addr = pkt->getAddr();
- Addr addr = base_addr;
- unsigned pktsServicedByWrQ = 0;
- BurstHelper* burst_helper = NULL;
-
- uint32_t burst_size = is_dram ? dram->bytesPerBurst() :
- nvm->bytesPerBurst();
- for (int cnt = 0; cnt < pkt_count; ++cnt) {
- unsigned size = std::min((addr | (burst_size - 1)) + 1,
- base_addr + pkt->getSize()) - addr;
- stats.readPktSize[ceilLog2(size)]++;
- stats.readBursts++;
- stats.masterReadAccesses[pkt->masterId()]++;
-
- // First check write buffer to see if the data is already at
- // the controller
- bool foundInWrQ = false;
- Addr burst_addr = burstAlign(addr, is_dram);
- // if the burst address is not present then there is no need
- // looking any further
- if (isInWriteQueue.find(burst_addr) != isInWriteQueue.end()) {
- for (const auto& vec : writeQueue) {
- for (const auto& p : vec) {
- // check if the read is subsumed in the write queue
- // packet we are looking at
- if (p->addr <= addr &&
- ((addr + size) <= (p->addr + p->size))) {
-
- foundInWrQ = true;
- stats.servicedByWrQ++;
- pktsServicedByWrQ++;
- DPRINTF(DRAM,
- "Read to addr %lld with size %d serviced by "
- "write queue\n",
- addr, size);
- stats.bytesReadWrQ += burst_size;
- break;
- }
- }
- }
- }
-
- // If not found in the write q, make a DRAM packet and
- // push it onto the read queue
- if (!foundInWrQ) {
-
- // Make the burst helper for split packets
- if (pkt_count > 1 && burst_helper == NULL) {
- DPRINTF(DRAM, "Read to addr %lld translates to %d "
- "memory requests\n", pkt->getAddr(), pkt_count);
- burst_helper = new BurstHelper(pkt_count);
- }
-
- DRAMPacket* dram_pkt;
- if (is_dram) {
- dram_pkt = dram->decodePacket(pkt, addr, size, true, true);
- // increment read entries of the rank
- dram->setupRank(dram_pkt->rank, true);
- } else {
- dram_pkt = nvm->decodePacket(pkt, addr, size, true, false);
- // Increment count to trigger issue of non-deterministic read
- nvm->setupRank(dram_pkt->rank, true);
- // Default readyTime to Max; will be reset once read is issued
- dram_pkt->readyTime = MaxTick;
- }
- dram_pkt->burstHelper = burst_helper;
-
- assert(!readQueueFull(1));
- stats.rdQLenPdf[totalReadQueueSize + respQueue.size()]++;
-
- DPRINTF(DRAM, "Adding to read queue\n");
-
- readQueue[dram_pkt->qosValue()].push_back(dram_pkt);
-
- // log packet
- logRequest(MemCtrl::READ, pkt->masterId(), pkt->qosValue(),
- dram_pkt->addr, 1);
-
- // Update stats
- stats.avgRdQLen = totalReadQueueSize + respQueue.size();
- }
-
- // Starting address of next memory pkt (aligned to burst boundary)
- addr = (addr | (burst_size - 1)) + 1;
- }
-
- // If all packets are serviced by write queue, we send the repsonse back
- if (pktsServicedByWrQ == pkt_count) {
- accessAndRespond(pkt, frontendLatency);
- return;
- }
-
- // Update how many split packets are serviced by write queue
- if (burst_helper != NULL)
- burst_helper->burstsServiced = pktsServicedByWrQ;
-
- // If we are not already scheduled to get a request out of the
- // queue, do so now
- if (!nextReqEvent.scheduled()) {
- DPRINTF(DRAM, "Request scheduled immediately\n");
- schedule(nextReqEvent, curTick());
- }
-}
-
-void
-DRAMCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram)
-{
- // only add to the write queue here. whenever the request is
- // eventually done, set the readyTime, and call schedule()
- assert(pkt->isWrite());
-
- // if the request size is larger than burst size, the pkt is split into
- // multiple packets
- const Addr base_addr = pkt->getAddr();
- Addr addr = base_addr;
- uint32_t burst_size = is_dram ? dram->bytesPerBurst() :
- nvm->bytesPerBurst();
- for (int cnt = 0; cnt < pkt_count; ++cnt) {
- unsigned size = std::min((addr | (burst_size - 1)) + 1,
- base_addr + pkt->getSize()) - addr;
- stats.writePktSize[ceilLog2(size)]++;
- stats.writeBursts++;
- stats.masterWriteAccesses[pkt->masterId()]++;
-
- // see if we can merge with an existing item in the write
- // queue and keep track of whether we have merged or not
- bool merged = isInWriteQueue.find(burstAlign(addr, is_dram)) !=
- isInWriteQueue.end();
-
- // if the item was not merged we need to create a new write
- // and enqueue it
- if (!merged) {
- DRAMPacket* dram_pkt;
- if (is_dram) {
- dram_pkt = dram->decodePacket(pkt, addr, size, false, true);
- dram->setupRank(dram_pkt->rank, false);
- } else {
- dram_pkt = nvm->decodePacket(pkt, addr, size, false, false);
- nvm->setupRank(dram_pkt->rank, false);
- }
- assert(totalWriteQueueSize < writeBufferSize);
- stats.wrQLenPdf[totalWriteQueueSize]++;
-
- DPRINTF(DRAM, "Adding to write queue\n");
-
- writeQueue[dram_pkt->qosValue()].push_back(dram_pkt);
- isInWriteQueue.insert(burstAlign(addr, is_dram));
-
- // log packet
- logRequest(MemCtrl::WRITE, pkt->masterId(), pkt->qosValue(),
- dram_pkt->addr, 1);
-
- assert(totalWriteQueueSize == isInWriteQueue.size());
-
- // Update stats
- stats.avgWrQLen = totalWriteQueueSize;
-
- } else {
- DPRINTF(DRAM, "Merging write burst with existing queue entry\n");
-
- // keep track of the fact that this burst effectively
- // disappeared as it was merged with an existing one
- stats.mergedWrBursts++;
- }
-
- // Starting address of next memory pkt (aligned to burst_size boundary)
- addr = (addr | (burst_size - 1)) + 1;
- }
-
- // we do not wait for the writes to be send to the actual memory,
- // but instead take responsibility for the consistency here and
- // snoop the write queue for any upcoming reads
- // @todo, if a pkt size is larger than burst size, we might need a
- // different front end latency
- accessAndRespond(pkt, frontendLatency);
-
- // If we are not already scheduled to get a request out of the
- // queue, do so now
- if (!nextReqEvent.scheduled()) {
- DPRINTF(DRAM, "Request scheduled immediately\n");
- schedule(nextReqEvent, curTick());
- }
-}
-
-void
-DRAMCtrl::printQs() const
-{
-#if TRACING_ON
- DPRINTF(DRAM, "===READ QUEUE===\n\n");
- for (const auto& queue : readQueue) {
- for (const auto& packet : queue) {
- DPRINTF(DRAM, "Read %lu\n", packet->addr);
- }
- }
-
- DPRINTF(DRAM, "\n===RESP QUEUE===\n\n");
- for (const auto& packet : respQueue) {
- DPRINTF(DRAM, "Response %lu\n", packet->addr);
- }
-
- DPRINTF(DRAM, "\n===WRITE QUEUE===\n\n");
- for (const auto& queue : writeQueue) {
- for (const auto& packet : queue) {
- DPRINTF(DRAM, "Write %lu\n", packet->addr);
- }
- }
-#endif // TRACING_ON
-}
-
-bool
-DRAMCtrl::recvTimingReq(PacketPtr pkt)
-{
- // This is where we enter from the outside world
- DPRINTF(DRAM, "recvTimingReq: request %s addr %lld size %d\n",
- pkt->cmdString(), pkt->getAddr(), pkt->getSize());
-
- panic_if(pkt->cacheResponding(), "Should not see packets where cache "
- "is responding");
-
- panic_if(!(pkt->isRead() || pkt->isWrite()),
- "Should only see read and writes at memory controller\n");
-
- // Calc avg gap between requests
- if (prevArrival != 0) {
- stats.totGap += curTick() - prevArrival;
- }
- prevArrival = curTick();
-
- // What type of media does this packet access?
- bool is_dram;
- if (dram && dram->getAddrRange().contains(pkt->getAddr())) {
- is_dram = true;
- } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) {
- is_dram = false;
- } else {
- panic("Can't handle address range for packet %s\n",
- pkt->print());
- }
-
-
- // Find out how many memory packets a pkt translates to
- // If the burst size is equal or larger than the pkt size, then a pkt
- // translates to only one memory packet. Otherwise, a pkt translates to
- // multiple memory packets
- unsigned size = pkt->getSize();
- uint32_t burst_size = is_dram ? dram->bytesPerBurst() :
- nvm->bytesPerBurst();
- unsigned offset = pkt->getAddr() & (burst_size - 1);
- unsigned int pkt_count = divCeil(offset + size, burst_size);
-
- // run the QoS scheduler and assign a QoS priority value to the packet
- qosSchedule( { &readQueue, &writeQueue }, burst_size, pkt);
-
- // check local buffers and do not accept if full
- if (pkt->isWrite()) {
- assert(size != 0);
- if (writeQueueFull(pkt_count)) {
- DPRINTF(DRAM, "Write queue full, not accepting\n");
- // remember that we have to retry this port
- retryWrReq = true;
- stats.numWrRetry++;
- return false;
- } else {
- addToWriteQueue(pkt, pkt_count, is_dram);
- stats.writeReqs++;
- stats.bytesWrittenSys += size;
- }
- } else {
- assert(pkt->isRead());
- assert(size != 0);
- if (readQueueFull(pkt_count)) {
- DPRINTF(DRAM, "Read queue full, not accepting\n");
- // remember that we have to retry this port
- retryRdReq = true;
- stats.numRdRetry++;
- return false;
- } else {
- addToReadQueue(pkt, pkt_count, is_dram);
- stats.readReqs++;
- stats.bytesReadSys += size;
- }
- }
-
- return true;
-}
-
-void
-DRAMCtrl::processRespondEvent()
-{
- DPRINTF(DRAM,
- "processRespondEvent(): Some req has reached its readyTime\n");
-
- DRAMPacket* dram_pkt = respQueue.front();
-
- if (dram_pkt->isDram()) {
- // media specific checks and functions when read response is complete
- dram->respondEvent(dram_pkt->rank);
- }
-
- if (dram_pkt->burstHelper) {
- // it is a split packet
- dram_pkt->burstHelper->burstsServiced++;
- if (dram_pkt->burstHelper->burstsServiced ==
- dram_pkt->burstHelper->burstCount) {
- // we have now serviced all children packets of a system packet
- // so we can now respond to the requester
- // @todo we probably want to have a different front end and back
- // end latency for split packets
- accessAndRespond(dram_pkt->pkt, frontendLatency + backendLatency);
- delete dram_pkt->burstHelper;
- dram_pkt->burstHelper = NULL;
- }
- } else {
- // it is not a split packet
- accessAndRespond(dram_pkt->pkt, frontendLatency + backendLatency);
- }
-
- assert(respQueue.front() == dram_pkt);
- respQueue.pop_front();
-
- if (!respQueue.empty()) {
- assert(respQueue.front()->readyTime >= curTick());
- assert(!respondEvent.scheduled());
- schedule(respondEvent, respQueue.front()->readyTime);
- } else {
- // if there is nothing left in any queue, signal a drain
- if (drainState() == DrainState::Draining &&
- !totalWriteQueueSize && !totalReadQueueSize &&
- allIntfDrained()) {
-
- DPRINTF(Drain, "DRAM controller done draining\n");
- signalDrainDone();
- } else if (dram_pkt->isDram()) {
- // check the refresh state and kick the refresh event loop
- // into action again if banks already closed and just waiting
- // for read to complete
- dram->checkRefreshState(dram_pkt->rank);
- }
- }
-
- delete dram_pkt;
-
- // We have made a location in the queue available at this point,
- // so if there is a read that was forced to wait, retry now
- if (retryRdReq) {
- retryRdReq = false;
- port.sendRetryReq();
- }
-}
-
-DRAMPacketQueue::iterator
-DRAMCtrl::chooseNext(DRAMPacketQueue& queue, Tick extra_col_delay)
-{
- // This method does the arbitration between requests.
-
- DRAMPacketQueue::iterator ret = queue.end();
-
- if (!queue.empty()) {
- if (queue.size() == 1) {
- // available rank corresponds to state refresh idle
- DRAMPacket* dram_pkt = *(queue.begin());
- if (packetReady(dram_pkt)) {
- ret = queue.begin();
- DPRINTF(DRAM, "Single request, going to a free rank\n");
- } else {
- DPRINTF(DRAM, "Single request, going to a busy rank\n");
- }
- } else if (memSchedPolicy == Enums::fcfs) {
- // check if there is a packet going to a free rank
- for (auto i = queue.begin(); i != queue.end(); ++i) {
- DRAMPacket* dram_pkt = *i;
- if (packetReady(dram_pkt)) {
- ret = i;
- break;
- }
- }
- } else if (memSchedPolicy == Enums::frfcfs) {
- ret = chooseNextFRFCFS(queue, extra_col_delay);
- } else {
- panic("No scheduling policy chosen\n");
- }
- }
- return ret;
-}
-
-DRAMPacketQueue::iterator
-DRAMCtrl::chooseNextFRFCFS(DRAMPacketQueue& queue, Tick extra_col_delay)
-{
- auto selected_pkt_it = queue.end();
- Tick col_allowed_at = MaxTick;
-
- // time we need to issue a column command to be seamless
- const Tick min_col_at = std::max(nextBurstAt + extra_col_delay, curTick());
-
- // find optimal packet for each interface
- if (dram && nvm) {
- // create 2nd set of parameters for NVM
- auto nvm_pkt_it = queue.end();
- Tick nvm_col_at = MaxTick;
-
- // Select DRAM packet by default to give priority if both
- // can issue at the same time or seamlessly
- std::tie(selected_pkt_it, col_allowed_at) =
- dram->chooseNextFRFCFS(queue, min_col_at);
- if (selected_pkt_it == queue.end()) {
- DPRINTF(DRAM, "%s no available DRAM ranks found\n", __func__);
- }
-
- std::tie(nvm_pkt_it, nvm_col_at) =
- nvm->chooseNextFRFCFS(queue, min_col_at);
- if (nvm_pkt_it == queue.end()) {
- DPRINTF(NVM, "%s no available NVM ranks found\n", __func__);
- }
-
- // Compare DRAM and NVM and select NVM if it can issue
- // earlier than the DRAM packet
- if (col_allowed_at > nvm_col_at) {
- selected_pkt_it = nvm_pkt_it;
- }
- } else if (dram) {
- std::tie(selected_pkt_it, col_allowed_at) =
- dram->chooseNextFRFCFS(queue, min_col_at);
-
- if (selected_pkt_it == queue.end()) {
- DPRINTF(DRAM, "%s no available DRAM ranks found\n", __func__);
- }
- } else if (nvm) {
- std::tie(selected_pkt_it, col_allowed_at) =
- nvm->chooseNextFRFCFS(queue, min_col_at);
-
- if (selected_pkt_it == queue.end()) {
- DPRINTF(NVM, "%s no available NVM ranks found\n", __func__);
- }
- }
-
- return selected_pkt_it;
-}
-
-pair<DRAMPacketQueue::iterator, Tick>
-DRAMInterface::chooseNextFRFCFS(DRAMPacketQueue& queue, Tick min_col_at) const
-{
- vector<uint32_t> earliest_banks(ranksPerChannel, 0);
-
- // Has minBankPrep been called to populate earliest_banks?
- bool filled_earliest_banks = false;
- // can the PRE/ACT sequence be done without impacting utlization?
- bool hidden_bank_prep = false;
-
- // search for seamless row hits first, if no seamless row hit is
- // found then determine if there are other packets that can be issued
- // without incurring additional bus delay due to bank timing
- // Will select closed rows first to enable more open row possibilies
- // in future selections
- bool found_hidden_bank = false;
-
- // remember if we found a row hit, not seamless, but bank prepped
- // and ready
- bool found_prepped_pkt = false;
-
- // if we have no row hit, prepped or not, and no seamless packet,
- // just go for the earliest possible
- bool found_earliest_pkt = false;
-
- Tick selected_col_at = MaxTick;
- auto selected_pkt_it = queue.end();
-
- for (auto i = queue.begin(); i != queue.end() ; ++i) {
- DRAMPacket* pkt = *i;
-
- // select optimal DRAM packet in Q
- if (pkt->isDram()) {
- const Bank& bank = ranks[pkt->rank]->banks[pkt->bank];
- const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt :
- bank.wrAllowedAt;
-
- DPRINTF(DRAM, "%s checking DRAM packet in bank %d, row %d\n",
- __func__, pkt->bank, pkt->row);
-
- // check if rank is not doing a refresh and thus is available,
- // if not, jump to the next packet
- if (burstReady(pkt)) {
-
- DPRINTF(DRAM,
- "%s bank %d - Rank %d available\n", __func__,
- pkt->bank, pkt->rank);
-
- // check if it is a row hit
- if (bank.openRow == pkt->row) {
- // no additional rank-to-rank or same bank-group
- // delays, or we switched read/write and might as well
- // go for the row hit
- if (col_allowed_at <= min_col_at) {
- // FCFS within the hits, giving priority to
- // commands that can issue seamlessly, without
- // additional delay, such as same rank accesses
- // and/or different bank-group accesses
- DPRINTF(DRAM, "%s Seamless buffer hit\n", __func__);
- selected_pkt_it = i;
- selected_col_at = col_allowed_at;
- // no need to look through the remaining queue entries
- break;
- } else if (!found_hidden_bank && !found_prepped_pkt) {
- // if we did not find a packet to a closed row that can
- // issue the bank commands without incurring delay, and
- // did not yet find a packet to a prepped row, remember
- // the current one
- selected_pkt_it = i;
- selected_col_at = col_allowed_at;
- found_prepped_pkt = true;
- DPRINTF(DRAM, "%s Prepped row buffer hit\n", __func__);
- }
- } else if (!found_earliest_pkt) {
- // if we have not initialised the bank status, do it
- // now, and only once per scheduling decisions
- if (!filled_earliest_banks) {
- // determine entries with earliest bank delay
- std::tie(earliest_banks, hidden_bank_prep) =
- minBankPrep(queue, min_col_at);
- filled_earliest_banks = true;
- }
-
- // bank is amongst first available banks
- // minBankPrep will give priority to packets that can
- // issue seamlessly
- if (bits(earliest_banks[pkt->rank],
- pkt->bank, pkt->bank)) {
- found_earliest_pkt = true;
- found_hidden_bank = hidden_bank_prep;
-
- // give priority to packets that can issue
- // bank commands 'behind the scenes'
- // any additional delay if any will be due to
- // col-to-col command requirements
- if (hidden_bank_prep || !found_prepped_pkt) {
- selected_pkt_it = i;
- selected_col_at = col_allowed_at;
- }
- }
- }
- } else {
- DPRINTF(DRAM, "%s bank %d - Rank %d not available\n", __func__,
- pkt->bank, pkt->rank);
- }
- }
- }
-
- if (selected_pkt_it == queue.end()) {
- DPRINTF(DRAM, "%s no available DRAM ranks found\n", __func__);
- }
-
- return make_pair(selected_pkt_it, selected_col_at);
-}
-
-void
-DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency)
-{
- DPRINTF(DRAM, "Responding to Address %lld.. \n",pkt->getAddr());
-
- bool needsResponse = pkt->needsResponse();
- // do the actual memory access which also turns the packet into a
- // response
- if (dram && dram->getAddrRange().contains(pkt->getAddr())) {
- dram->access(pkt);
- } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) {
- nvm->access(pkt);
- } else {
- panic("Can't handle address range for packet %s\n",
- pkt->print());
- }
-
- // turn packet around to go back to requester if response expected
- if (needsResponse) {
- // access already turned the packet into a response
- assert(pkt->isResponse());
- // response_time consumes the static latency and is charged also
- // with headerDelay that takes into account the delay provided by
- // the xbar and also the payloadDelay that takes into account the
- // number of data beats.
- Tick response_time = curTick() + static_latency + pkt->headerDelay +
- pkt->payloadDelay;
- // Here we reset the timing of the packet before sending it out.
- pkt->headerDelay = pkt->payloadDelay = 0;
-
- // queue the packet in the response queue to be sent out after
- // the static latency has passed
- port.schedTimingResp(pkt, response_time);
- } else {
- // @todo the packet is going to be deleted, and the DRAMPacket
- // is still having a pointer to it
- pendingDelete.reset(pkt);
- }
-
- DPRINTF(DRAM, "Done\n");
-
- return;
-}
-
-void
-DRAMCtrl::pruneBurstTick()
-{
- auto it = burstTicks.begin();
- while (it != burstTicks.end()) {
- auto current_it = it++;
- if (curTick() > *current_it) {
- DPRINTF(DRAM, "Removing burstTick for %d\n", *current_it);
- burstTicks.erase(current_it);
- }
- }
-}
-
-Tick
-DRAMCtrl::getBurstWindow(Tick cmd_tick)
-{
- // get tick aligned to burst window
- Tick burst_offset = cmd_tick % commandWindow;
- return (cmd_tick - burst_offset);
-}
-
-Tick
-DRAMCtrl::verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst)
-{
- // start with assumption that there is no contention on command bus
- Tick cmd_at = cmd_tick;
-
- // get tick aligned to burst window
- Tick burst_tick = getBurstWindow(cmd_tick);
-
- // verify that we have command bandwidth to issue the command
- // if not, iterate over next window(s) until slot found
- while (burstTicks.count(burst_tick) >= max_cmds_per_burst) {
- DPRINTF(DRAM, "Contention found on command bus at %d\n", burst_tick);
- burst_tick += commandWindow;
- cmd_at = burst_tick;
- }
-
- // add command into burst window and return corresponding Tick
- burstTicks.insert(burst_tick);
- return cmd_at;
-}
-
-Tick
-DRAMCtrl::verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst,
- Tick max_multi_cmd_split)
-{
- // start with assumption that there is no contention on command bus
- Tick cmd_at = cmd_tick;
-
- // get tick aligned to burst window
- Tick burst_tick = getBurstWindow(cmd_tick);
-
- // Command timing requirements are from 2nd command
- // Start with assumption that 2nd command will issue at cmd_at and
- // find prior slot for 1st command to issue
- // Given a maximum latency of max_multi_cmd_split between the commands,
- // find the burst at the maximum latency prior to cmd_at
- Tick burst_offset = 0;
- Tick first_cmd_offset = cmd_tick % commandWindow;
- while (max_multi_cmd_split > (first_cmd_offset + burst_offset)) {
- burst_offset += commandWindow;
- }
- // get the earliest burst aligned address for first command
- // ensure that the time does not go negative
- Tick first_cmd_tick = burst_tick - std::min(burst_offset, burst_tick);
-
- // Can required commands issue?
- bool first_can_issue = false;
- bool second_can_issue = false;
- // verify that we have command bandwidth to issue the command(s)
- while (!first_can_issue || !second_can_issue) {
- bool same_burst = (burst_tick == first_cmd_tick);
- auto first_cmd_count = burstTicks.count(first_cmd_tick);
- auto second_cmd_count = same_burst ? first_cmd_count + 1 :
- burstTicks.count(burst_tick);
-
- first_can_issue = first_cmd_count < max_cmds_per_burst;
- second_can_issue = second_cmd_count < max_cmds_per_burst;
-
- if (!second_can_issue) {
- DPRINTF(DRAM, "Contention (cmd2) found on command bus at %d\n",
- burst_tick);
- burst_tick += commandWindow;
- cmd_at = burst_tick;
- }
-
- // Verify max_multi_cmd_split isn't violated when command 2 is shifted
- // If commands initially were issued in same burst, they are
- // now in consecutive bursts and can still issue B2B
- bool gap_violated = !same_burst &&
- ((burst_tick - first_cmd_tick) > max_multi_cmd_split);
-
- if (!first_can_issue || (!second_can_issue && gap_violated)) {
- DPRINTF(DRAM, "Contention (cmd1) found on command bus at %d\n",
- first_cmd_tick);
- first_cmd_tick += commandWindow;
- }
- }
-
- // Add command to burstTicks
- burstTicks.insert(burst_tick);
- burstTicks.insert(first_cmd_tick);
-
- return cmd_at;
-}
-
-void
-DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref,
- Tick act_tick, uint32_t row)
-{
- assert(rank_ref.actTicks.size() == activationLimit);
-
- // verify that we have command bandwidth to issue the activate
- // if not, shift to next burst window
- Tick act_at;
- if (twoCycleActivate)
- act_at = ctrl->verifyMultiCmd(act_tick, maxCommandsPerWindow, tAAD);
- else
- act_at = ctrl->verifySingleCmd(act_tick, maxCommandsPerWindow);
-
- DPRINTF(DRAM, "Activate at tick %d\n", act_at);
-
- // update the open row
- assert(bank_ref.openRow == Bank::NO_ROW);
- bank_ref.openRow = row;
-
- // start counting anew, this covers both the case when we
- // auto-precharged, and when this access is forced to
- // precharge
- bank_ref.bytesAccessed = 0;
- bank_ref.rowAccesses = 0;
-
- ++rank_ref.numBanksActive;
- assert(rank_ref.numBanksActive <= banksPerRank);
-
- DPRINTF(DRAM, "Activate bank %d, rank %d at tick %lld, now got "
- "%d active\n", bank_ref.bank, rank_ref.rank, act_at,
- ranks[rank_ref.rank]->numBanksActive);
-
- rank_ref.cmdList.push_back(Command(MemCommand::ACT, bank_ref.bank,
- act_at));
-
- DPRINTF(DRAMPower, "%llu,ACT,%d,%d\n", divCeil(act_at, tCK) -
- timeStampOffset, bank_ref.bank, rank_ref.rank);
-
- // The next access has to respect tRAS for this bank
- bank_ref.preAllowedAt = act_at + tRAS;
-
- // Respect the row-to-column command delay for both read and write cmds
- bank_ref.rdAllowedAt = std::max(act_at + tRCD, bank_ref.rdAllowedAt);
- bank_ref.wrAllowedAt = std::max(act_at + tRCD, bank_ref.wrAllowedAt);
-
- // start by enforcing tRRD
- for (int i = 0; i < banksPerRank; i++) {
- // next activate to any bank in this rank must not happen
- // before tRRD
- if (bankGroupArch && (bank_ref.bankgr == rank_ref.banks[i].bankgr)) {
- // bank group architecture requires longer delays between
- // ACT commands within the same bank group. Use tRRD_L
- // in this case
- rank_ref.banks[i].actAllowedAt = std::max(act_at + tRRD_L,
- rank_ref.banks[i].actAllowedAt);
- } else {
- // use shorter tRRD value when either
- // 1) bank group architecture is not supportted
- // 2) bank is in a different bank group
- rank_ref.banks[i].actAllowedAt = std::max(act_at + tRRD,
- rank_ref.banks[i].actAllowedAt);
- }
- }
-
- // next, we deal with tXAW, if the activation limit is disabled
- // then we directly schedule an activate power event
- if (!rank_ref.actTicks.empty()) {
- // sanity check
- if (rank_ref.actTicks.back() &&
- (act_at - rank_ref.actTicks.back()) < tXAW) {
- panic("Got %d activates in window %d (%llu - %llu) which "
- "is smaller than %llu\n", activationLimit, act_at -
- rank_ref.actTicks.back(), act_at,
- rank_ref.actTicks.back(), tXAW);
- }
-
- // shift the times used for the book keeping, the last element
- // (highest index) is the oldest one and hence the lowest value
- rank_ref.actTicks.pop_back();
-
- // record an new activation (in the future)
- rank_ref.actTicks.push_front(act_at);
-
- // cannot activate more than X times in time window tXAW, push the
- // next one (the X + 1'st activate) to be tXAW away from the
- // oldest in our window of X
- if (rank_ref.actTicks.back() &&
- (act_at - rank_ref.actTicks.back()) < tXAW) {
- DPRINTF(DRAM, "Enforcing tXAW with X = %d, next activate "
- "no earlier than %llu\n", activationLimit,
- rank_ref.actTicks.back() + tXAW);
- for (int j = 0; j < banksPerRank; j++)
- // next activate must not happen before end of window
- rank_ref.banks[j].actAllowedAt =
- std::max(rank_ref.actTicks.back() + tXAW,
- rank_ref.banks[j].actAllowedAt);
- }
- }
-
- // at the point when this activate takes place, make sure we
- // transition to the active power state
- if (!rank_ref.activateEvent.scheduled())
- schedule(rank_ref.activateEvent, act_at);
- else if (rank_ref.activateEvent.when() > act_at)
- // move it sooner in time
- reschedule(rank_ref.activateEvent, act_at);
-}
-
-void
-DRAMInterface::prechargeBank(Rank& rank_ref, Bank& bank, Tick pre_tick,
- bool auto_or_preall, bool trace)
-{
- // make sure the bank has an open row
- assert(bank.openRow != Bank::NO_ROW);
-
- // sample the bytes per activate here since we are closing
- // the page
- stats.bytesPerActivate.sample(bank.bytesAccessed);
-
- bank.openRow = Bank::NO_ROW;
-
- Tick pre_at = pre_tick;
- if (auto_or_preall) {
- // no precharge allowed before this one
- bank.preAllowedAt = pre_at;
- } else {
- // Issuing an explicit PRE command
- // Verify that we have command bandwidth to issue the precharge
- // if not, shift to next burst window
- pre_at = ctrl->verifySingleCmd(pre_tick, maxCommandsPerWindow);
- // enforce tPPD
- for (int i = 0; i < banksPerRank; i++) {
- rank_ref.banks[i].preAllowedAt = std::max(pre_at + tPPD,
- rank_ref.banks[i].preAllowedAt);
- }
- }
-
- Tick pre_done_at = pre_at + tRP;
-
- bank.actAllowedAt = std::max(bank.actAllowedAt, pre_done_at);
-
- assert(rank_ref.numBanksActive != 0);
- --rank_ref.numBanksActive;
-
- DPRINTF(DRAM, "Precharging bank %d, rank %d at tick %lld, now got "
- "%d active\n", bank.bank, rank_ref.rank, pre_at,
- rank_ref.numBanksActive);
-
- if (trace) {
-
- rank_ref.cmdList.push_back(Command(MemCommand::PRE, bank.bank,
- pre_at));
- DPRINTF(DRAMPower, "%llu,PRE,%d,%d\n", divCeil(pre_at, tCK) -
- timeStampOffset, bank.bank, rank_ref.rank);
- }
-
- // if we look at the current number of active banks we might be
- // tempted to think the DRAM is now idle, however this can be
- // undone by an activate that is scheduled to happen before we
- // would have reached the idle state, so schedule an event and
- // rather check once we actually make it to the point in time when
- // the (last) precharge takes place
- if (!rank_ref.prechargeEvent.scheduled()) {
- schedule(rank_ref.prechargeEvent, pre_done_at);
- // New event, increment count
- ++rank_ref.outstandingEvents;
- } else if (rank_ref.prechargeEvent.when() < pre_done_at) {
- reschedule(rank_ref.prechargeEvent, pre_done_at);
- }
-}
-
-pair<Tick, Tick>
-DRAMInterface::doBurstAccess(DRAMPacket* dram_pkt, Tick next_burst_at,
- const std::vector<DRAMPacketQueue>& queue)
-{
- DPRINTF(DRAM, "Timing access to addr %lld, rank/bank/row %d %d %d\n",
- dram_pkt->addr, dram_pkt->rank, dram_pkt->bank, dram_pkt->row);
-
- // get the rank
- Rank& rank_ref = *ranks[dram_pkt->rank];
-
- assert(rank_ref.inRefIdleState());
-
- // are we in or transitioning to a low-power state and have not scheduled
- // a power-up event?
- // if so, wake up from power down to issue RD/WR burst
- if (rank_ref.inLowPowerState) {
- assert(rank_ref.pwrState != PWR_SREF);
- rank_ref.scheduleWakeUpEvent(tXP);
- }
-
- // get the bank
- Bank& bank_ref = rank_ref.banks[dram_pkt->bank];
-
- // for the state we need to track if it is a row hit or not
- bool row_hit = true;
-
- // Determine the access latency and update the bank state
- if (bank_ref.openRow == dram_pkt->row) {
- // nothing to do
- } else {
- row_hit = false;
-
- // If there is a page open, precharge it.
- if (bank_ref.openRow != Bank::NO_ROW) {
- prechargeBank(rank_ref, bank_ref, std::max(bank_ref.preAllowedAt,
- curTick()));
- }
-
- // next we need to account for the delay in activating the page
- Tick act_tick = std::max(bank_ref.actAllowedAt, curTick());
-
- // Record the activation and deal with all the global timing
- // constraints caused be a new activation (tRRD and tXAW)
- activateBank(rank_ref, bank_ref, act_tick, dram_pkt->row);
- }
-
- // respect any constraints on the command (e.g. tRCD or tCCD)
- const Tick col_allowed_at = dram_pkt->isRead() ?
- bank_ref.rdAllowedAt : bank_ref.wrAllowedAt;
-
- // we need to wait until the bus is available before we can issue
- // the command; need to ensure minimum bus delay requirement is met
- Tick cmd_at = std::max({col_allowed_at, next_burst_at, curTick()});
-
- // verify that we have command bandwidth to issue the burst
- // if not, shift to next burst window
- if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) > clkResyncDelay))
- cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK);
- else
- cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow);
-
- // if we are interleaving bursts, ensure that
- // 1) we don't double interleave on next burst issue
- // 2) we are at an interleave boundary; if not, shift to next boundary
- Tick burst_gap = tBURST_MIN;
- if (burstInterleave) {
- if (cmd_at == (rank_ref.lastBurstTick + tBURST_MIN)) {
- // already interleaving, push next command to end of full burst
- burst_gap = tBURST;
- } else if (cmd_at < (rank_ref.lastBurstTick + tBURST)) {
- // not at an interleave boundary after bandwidth check
- // Shift command to tBURST boundary to avoid data contention
- // Command will remain in the same burst window given that
- // tBURST is less than tBURST_MAX
- cmd_at = rank_ref.lastBurstTick + tBURST;
- }
- }
- DPRINTF(DRAM, "Schedule RD/WR burst at tick %d\n", cmd_at);
-
- // update the packet ready time
- dram_pkt->readyTime = cmd_at + tCL + tBURST;
-
- rank_ref.lastBurstTick = cmd_at;
-
- // update the time for the next read/write burst for each
- // bank (add a max with tCCD/tCCD_L/tCCD_L_WR here)
- Tick dly_to_rd_cmd;
- Tick dly_to_wr_cmd;
- for (int j = 0; j < ranksPerChannel; j++) {
- for (int i = 0; i < banksPerRank; i++) {
- if (dram_pkt->rank == j) {
- if (bankGroupArch &&
- (bank_ref.bankgr == ranks[j]->banks[i].bankgr)) {
- // bank group architecture requires longer delays between
- // RD/WR burst commands to the same bank group.
- // tCCD_L is default requirement for same BG timing
- // tCCD_L_WR is required for write-to-write
- // Need to also take bus turnaround delays into account
- dly_to_rd_cmd = dram_pkt->isRead() ?
- tCCD_L : std::max(tCCD_L, wrToRdDlySameBG);
- dly_to_wr_cmd = dram_pkt->isRead() ?
- std::max(tCCD_L, rdToWrDlySameBG) :
- tCCD_L_WR;
- } else {
- // tBURST is default requirement for diff BG timing
- // Need to also take bus turnaround delays into account
- dly_to_rd_cmd = dram_pkt->isRead() ? burst_gap :
- writeToReadDelay();
- dly_to_wr_cmd = dram_pkt->isRead() ? readToWriteDelay() :
- burst_gap;
- }
- } else {
- // different rank is by default in a different bank group and
- // doesn't require longer tCCD or additional RTW, WTR delays
- // Need to account for rank-to-rank switching
- dly_to_wr_cmd = rankToRankDelay();
- dly_to_rd_cmd = rankToRankDelay();
- }
- ranks[j]->banks[i].rdAllowedAt = std::max(cmd_at + dly_to_rd_cmd,
- ranks[j]->banks[i].rdAllowedAt);
- ranks[j]->banks[i].wrAllowedAt = std::max(cmd_at + dly_to_wr_cmd,
- ranks[j]->banks[i].wrAllowedAt);
- }
- }
-
- // Save rank of current access
- activeRank = dram_pkt->rank;
-
- // If this is a write, we also need to respect the write recovery
- // time before a precharge, in the case of a read, respect the
- // read to precharge constraint
- bank_ref.preAllowedAt = std::max(bank_ref.preAllowedAt,
- dram_pkt->isRead() ? cmd_at + tRTP :
- dram_pkt->readyTime + tWR);
-
- // increment the bytes accessed and the accesses per row
- bank_ref.bytesAccessed += burstSize;
- ++bank_ref.rowAccesses;
-
- // if we reached the max, then issue with an auto-precharge
- bool auto_precharge = pageMgmt == Enums::close ||
- bank_ref.rowAccesses == maxAccessesPerRow;
-
- // if we did not hit the limit, we might still want to
- // auto-precharge
- if (!auto_precharge &&
- (pageMgmt == Enums::open_adaptive ||
- pageMgmt == Enums::close_adaptive)) {
- // a twist on the open and close page policies:
- // 1) open_adaptive page policy does not blindly keep the
- // page open, but close it if there are no row hits, and there
- // are bank conflicts in the queue
- // 2) close_adaptive page policy does not blindly close the
- // page, but closes it only if there are no row hits in the queue.
- // In this case, only force an auto precharge when there
- // are no same page hits in the queue
- bool got_more_hits = false;
- bool got_bank_conflict = false;
-
- for (uint8_t i = 0; i < ctrl->numPriorities(); ++i) {
- auto p = queue[i].begin();
- // keep on looking until we find a hit or reach the end of the
- // queue
- // 1) if a hit is found, then both open and close adaptive
- // policies keep the page open
- // 2) if no hit is found, got_bank_conflict is set to true if a
- // bank conflict request is waiting in the queue
- // 3) make sure we are not considering the packet that we are
- // currently dealing with
- while (!got_more_hits && p != queue[i].end()) {
- if (dram_pkt != (*p)) {
- bool same_rank_bank = (dram_pkt->rank == (*p)->rank) &&
- (dram_pkt->bank == (*p)->bank);
-
- bool same_row = dram_pkt->row == (*p)->row;
- got_more_hits |= same_rank_bank && same_row;
- got_bank_conflict |= same_rank_bank && !same_row;
- }
- ++p;
- }
-
- if (got_more_hits)
- break;
- }
-
- // auto pre-charge when either
- // 1) open_adaptive policy, we have not got any more hits, and
- // have a bank conflict
- // 2) close_adaptive policy and we have not got any more hits
- auto_precharge = !got_more_hits &&
- (got_bank_conflict || pageMgmt == Enums::close_adaptive);
- }
-
- // DRAMPower trace command to be written
- std::string mem_cmd = dram_pkt->isRead() ? "RD" : "WR";
-
- // MemCommand required for DRAMPower library
- MemCommand::cmds command = (mem_cmd == "RD") ? MemCommand::RD :
- MemCommand::WR;
-
- rank_ref.cmdList.push_back(Command(command, dram_pkt->bank, cmd_at));
-
- DPRINTF(DRAMPower, "%llu,%s,%d,%d\n", divCeil(cmd_at, tCK) -
- timeStampOffset, mem_cmd, dram_pkt->bank, dram_pkt->rank);
-
- // if this access should use auto-precharge, then we are
- // closing the row after the read/write burst
- if (auto_precharge) {
- // if auto-precharge push a PRE command at the correct tick to the
- // list used by DRAMPower library to calculate power
- prechargeBank(rank_ref, bank_ref, std::max(curTick(),
- bank_ref.preAllowedAt), true);
-
- DPRINTF(DRAM, "Auto-precharged bank: %d\n", dram_pkt->bankId);
- }
-
- // Update the stats and schedule the next request
- if (dram_pkt->isRead()) {
- // Every respQueue which will generate an event, increment count
- ++rank_ref.outstandingEvents;
-
- stats.readBursts++;
- if (row_hit)
- stats.readRowHits++;
- stats.bytesRead += burstSize;
- stats.perBankRdBursts[dram_pkt->bankId]++;
-
- // Update latency stats
- stats.totMemAccLat += dram_pkt->readyTime - dram_pkt->entryTime;
- stats.totQLat += cmd_at - dram_pkt->entryTime;
- stats.totBusLat += tBURST;
- } else {
- // Schedule write done event to decrement event count
- // after the readyTime has been reached
- // Only schedule latest write event to minimize events
- // required; only need to ensure that final event scheduled covers
- // the time that writes are outstanding and bus is active
- // to holdoff power-down entry events
- if (!rank_ref.writeDoneEvent.scheduled()) {
- schedule(rank_ref.writeDoneEvent, dram_pkt->readyTime);
- // New event, increment count
- ++rank_ref.outstandingEvents;
-
- } else if (rank_ref.writeDoneEvent.when() < dram_pkt->readyTime) {
- reschedule(rank_ref.writeDoneEvent, dram_pkt->readyTime);
- }
- // will remove write from queue when returned to parent function
- // decrement count for DRAM rank
- --rank_ref.writeEntries;
-
- stats.writeBursts++;
- if (row_hit)
- stats.writeRowHits++;
- stats.bytesWritten += burstSize;
- stats.perBankWrBursts[dram_pkt->bankId]++;
-
- }
- // Update bus state to reflect when previous command was issued
- return make_pair(cmd_at, cmd_at + burst_gap);
-}
-
-void
-DRAMInterface::addRankToRankDelay(Tick cmd_at)
-{
- // update timing for DRAM ranks due to bursts issued
- // to ranks on other media interfaces
- for (auto n : ranks) {
- for (int i = 0; i < banksPerRank; i++) {
- // different rank by default
- // Need to only account for rank-to-rank switching
- n->banks[i].rdAllowedAt = std::max(cmd_at + rankToRankDelay(),
- n->banks[i].rdAllowedAt);
- n->banks[i].wrAllowedAt = std::max(cmd_at + rankToRankDelay(),
- n->banks[i].wrAllowedAt);
- }
- }
-}
-
-bool
-DRAMCtrl::inReadBusState(bool next_state) const
-{
- // check the bus state
- if (next_state) {
- // use busStateNext to get the state that will be used
- // for the next burst
- return (busStateNext == DRAMCtrl::READ);
- } else {
- return (busState == DRAMCtrl::READ);
- }
-}
-
-bool
-DRAMCtrl::inWriteBusState(bool next_state) const
-{
- // check the bus state
- if (next_state) {
- // use busStateNext to get the state that will be used
- // for the next burst
- return (busStateNext == DRAMCtrl::WRITE);
- } else {
- return (busState == DRAMCtrl::WRITE);
- }
-}
-
-void
-DRAMCtrl::doBurstAccess(DRAMPacket* dram_pkt)
-{
- // first clean up the burstTick set, removing old entries
- // before adding new entries for next burst
- pruneBurstTick();
-
- // When was command issued?
- Tick cmd_at;
-
- // Issue the next burst and update bus state to reflect
- // when previous command was issued
- if (dram_pkt->isDram()) {
- std::vector<DRAMPacketQueue>& queue = selQueue(dram_pkt->isRead());
- std::tie(cmd_at, nextBurstAt) =
- dram->doBurstAccess(dram_pkt, nextBurstAt, queue);
-
- // Update timing for NVM ranks if NVM is configured on this channel
- if (nvm)
- nvm->addRankToRankDelay(cmd_at);
-
- } else {
- std::tie(cmd_at, nextBurstAt) =
- nvm->doBurstAccess(dram_pkt, nextBurstAt);
-
- // Update timing for NVM ranks if NVM is configured on this channel
- if (dram)
- dram->addRankToRankDelay(cmd_at);
-
- }
-
- DPRINTF(DRAM, "Access to %lld, ready at %lld next burst at %lld.\n",
- dram_pkt->addr, dram_pkt->readyTime, nextBurstAt);
-
- // Update the minimum timing between the requests, this is a
- // conservative estimate of when we have to schedule the next
- // request to not introduce any unecessary bubbles. In most cases
- // we will wake up sooner than we have to.
- nextReqTime = nextBurstAt - (dram ? dram->commandOffset() :
- nvm->commandOffset());
-
-
- // Update the common bus stats
- if (dram_pkt->isRead()) {
- ++readsThisTime;
- // Update latency stats
- stats.masterReadTotalLat[dram_pkt->masterId()] +=
- dram_pkt->readyTime - dram_pkt->entryTime;
- stats.masterReadBytes[dram_pkt->masterId()] += dram_pkt->size;
- } else {
- ++writesThisTime;
- stats.masterWriteBytes[dram_pkt->masterId()] += dram_pkt->size;
- stats.masterWriteTotalLat[dram_pkt->masterId()] +=
- dram_pkt->readyTime - dram_pkt->entryTime;
- }
-}
-
-void
-DRAMCtrl::processNextReqEvent()
-{
- // transition is handled by QoS algorithm if enabled
- if (turnPolicy) {
- // select bus state - only done if QoS algorithms are in use
- busStateNext = selectNextBusState();
- }
-
- // detect bus state change
- bool switched_cmd_type = (busState != busStateNext);
- // record stats
- recordTurnaroundStats();
-
- DPRINTF(DRAM, "QoS Turnarounds selected state %s %s\n",
- (busState==MemCtrl::READ)?"READ":"WRITE",
- switched_cmd_type?"[turnaround triggered]":"");
-
- if (switched_cmd_type) {
- if (busState == MemCtrl::READ) {
- DPRINTF(DRAM,
- "Switching to writes after %d reads with %d reads "
- "waiting\n", readsThisTime, totalReadQueueSize);
- stats.rdPerTurnAround.sample(readsThisTime);
- readsThisTime = 0;
- } else {
- DPRINTF(DRAM,
- "Switching to reads after %d writes with %d writes "
- "waiting\n", writesThisTime, totalWriteQueueSize);
- stats.wrPerTurnAround.sample(writesThisTime);
- writesThisTime = 0;
- }
- }
-
- // updates current state
- busState = busStateNext;
-
- if (nvm) {
- for (auto queue = readQueue.rbegin();
- queue != readQueue.rend(); ++queue) {
- // select non-deterministic NVM read to issue
- // assume that we have the command bandwidth to issue this along
- // with additional RD/WR burst with needed bank operations
- if (nvm->readsWaitingToIssue()) {
- // select non-deterministic NVM read to issue
- nvm->chooseRead(*queue);
- }
- }
- }
-
- // check ranks for refresh/wakeup - uses busStateNext, so done after
- // turnaround decisions
- // Default to busy status and update based on interface specifics
- bool dram_busy = dram ? dram->isBusy() : true;
- bool nvm_busy = true;
- bool all_writes_nvm = false;
- if (nvm) {
- all_writes_nvm = nvm->numWritesQueued == totalWriteQueueSize;
- bool read_queue_empty = totalReadQueueSize == 0;
- nvm_busy = nvm->isBusy(read_queue_empty, all_writes_nvm);
- }
- // Default state of unused interface is 'true'
- // Simply AND the busy signals to determine if system is busy
- if (dram_busy && nvm_busy) {
- // if all ranks are refreshing wait for them to finish
- // and stall this state machine without taking any further
- // action, and do not schedule a new nextReqEvent
- return;
- }
-
- // when we get here it is either a read or a write
- if (busState == READ) {
-
- // track if we should switch or not
- bool switch_to_writes = false;
-
- if (totalReadQueueSize == 0) {
- // In the case there is no read request to go next,
- // trigger writes if we have passed the low threshold (or
- // if we are draining)
- if (!(totalWriteQueueSize == 0) &&
- (drainState() == DrainState::Draining ||
- totalWriteQueueSize > writeLowThreshold)) {
-
- DPRINTF(DRAM, "Switching to writes due to read queue empty\n");
- switch_to_writes = true;
- } else {
- // check if we are drained
- // not done draining until in PWR_IDLE state
- // ensuring all banks are closed and
- // have exited low power states
- if (drainState() == DrainState::Draining &&
- respQueue.empty() && allIntfDrained()) {
-
- DPRINTF(Drain, "DRAM controller done draining\n");
- signalDrainDone();
- }
-
- // nothing to do, not even any point in scheduling an
- // event for the next request
- return;
- }
- } else {
-
- bool read_found = false;
- DRAMPacketQueue::iterator to_read;
- uint8_t prio = numPriorities();
-
- for (auto queue = readQueue.rbegin();
- queue != readQueue.rend(); ++queue) {
-
- prio--;
-
- DPRINTF(QOS,
- "DRAM controller checking READ queue [%d] priority [%d elements]\n",
- prio, queue->size());
-
- // Figure out which read request goes next
- // If we are changing command type, incorporate the minimum
- // bus turnaround delay which will be rank to rank delay
- to_read = chooseNext((*queue), switched_cmd_type ?
- minWriteToReadDataGap() : 0);
-
- if (to_read != queue->end()) {
- // candidate read found
- read_found = true;
- break;
- }
- }
-
- // if no read to an available rank is found then return
- // at this point. There could be writes to the available ranks
- // which are above the required threshold. However, to
- // avoid adding more complexity to the code, return and wait
- // for a refresh event to kick things into action again.
- if (!read_found) {
- DPRINTF(DRAM, "No Reads Found - exiting\n");
- return;
- }
-
- auto dram_pkt = *to_read;
-
- doBurstAccess(dram_pkt);
-
- // sanity check
- assert(dram_pkt->size <= (dram_pkt->isDram() ?
- dram->bytesPerBurst() :
- nvm->bytesPerBurst()) );
- assert(dram_pkt->readyTime >= curTick());
-
- // log the response
- logResponse(MemCtrl::READ, (*to_read)->masterId(),
- dram_pkt->qosValue(), dram_pkt->getAddr(), 1,
- dram_pkt->readyTime - dram_pkt->entryTime);
-
-
- // Insert into response queue. It will be sent back to the
- // requester at its readyTime
- if (respQueue.empty()) {
- assert(!respondEvent.scheduled());
- schedule(respondEvent, dram_pkt->readyTime);
- } else {
- assert(respQueue.back()->readyTime <= dram_pkt->readyTime);
- assert(respondEvent.scheduled());
- }
-
- respQueue.push_back(dram_pkt);
-
- // we have so many writes that we have to transition
- // don't transition if the writeRespQueue is full and
- // there are no other writes that can issue
- if ((totalWriteQueueSize > writeHighThreshold) &&
- !(nvm && all_writes_nvm && nvm->writeRespQueueFull())) {
- switch_to_writes = true;
- }
-
- // remove the request from the queue - the iterator is no longer valid .
- readQueue[dram_pkt->qosValue()].erase(to_read);
- }
-
- // switching to writes, either because the read queue is empty
- // and the writes have passed the low threshold (or we are
- // draining), or because the writes hit the hight threshold
- if (switch_to_writes) {
- // transition to writing
- busStateNext = WRITE;
- }
- } else {
-
- bool write_found = false;
- DRAMPacketQueue::iterator to_write;
- uint8_t prio = numPriorities();
-
- for (auto queue = writeQueue.rbegin();
- queue != writeQueue.rend(); ++queue) {
-
- prio--;
-
- DPRINTF(QOS,
- "DRAM controller checking WRITE queue [%d] priority [%d elements]\n",
- prio, queue->size());
-
- // If we are changing command type, incorporate the minimum
- // bus turnaround delay
- to_write = chooseNext((*queue),
- switched_cmd_type ? minReadToWriteDataGap() : 0);
-
- if (to_write != queue->end()) {
- write_found = true;
- break;
- }
- }
-
- // if there are no writes to a rank that is available to service
- // requests (i.e. rank is in refresh idle state) are found then
- // return. There could be reads to the available ranks. However, to
- // avoid adding more complexity to the code, return at this point and
- // wait for a refresh event to kick things into action again.
- if (!write_found) {
- DPRINTF(DRAM, "No Writes Found - exiting\n");
- return;
- }
-
- auto dram_pkt = *to_write;
-
- // sanity check
- assert(dram_pkt->size <= (dram_pkt->isDram() ?
- dram->bytesPerBurst() :
- nvm->bytesPerBurst()) );
-
- doBurstAccess(dram_pkt);
-
- isInWriteQueue.erase(burstAlign(dram_pkt->addr, dram_pkt->isDram()));
-
- // log the response
- logResponse(MemCtrl::WRITE, dram_pkt->masterId(),
- dram_pkt->qosValue(), dram_pkt->getAddr(), 1,
- dram_pkt->readyTime - dram_pkt->entryTime);
-
-
- // remove the request from the queue - the iterator is no longer valid
- writeQueue[dram_pkt->qosValue()].erase(to_write);
-
- delete dram_pkt;
-
- // If we emptied the write queue, or got sufficiently below the
- // threshold (using the minWritesPerSwitch as the hysteresis) and
- // are not draining, or we have reads waiting and have done enough
- // writes, then switch to reads.
- // If we are interfacing to NVM and have filled the writeRespQueue,
- // with only NVM writes in Q, then switch to reads
- bool below_threshold =
- totalWriteQueueSize + minWritesPerSwitch < writeLowThreshold;
-
- if (totalWriteQueueSize == 0 ||
- (below_threshold && drainState() != DrainState::Draining) ||
- (totalReadQueueSize && writesThisTime >= minWritesPerSwitch) ||
- (totalReadQueueSize && nvm && nvm->writeRespQueueFull() &&
- all_writes_nvm)) {
-
- // turn the bus back around for reads again
- busStateNext = MemCtrl::READ;
-
- // note that the we switch back to reads also in the idle
- // case, which eventually will check for any draining and
- // also pause any further scheduling if there is really
- // nothing to do
- }
- }
- // It is possible that a refresh to another rank kicks things back into
- // action before reaching this point.
- if (!nextReqEvent.scheduled())
- schedule(nextReqEvent, std::max(nextReqTime, curTick()));
-
- // If there is space available and we have writes waiting then let
- // them retry. This is done here to ensure that the retry does not
- // cause a nextReqEvent to be scheduled before we do so as part of
- // the next request processing
- if (retryWrReq && totalWriteQueueSize < writeBufferSize) {
- retryWrReq = false;
- port.sendRetryReq();
- }
-}
-
-MemInterface::MemInterface(const MemInterfaceParams* _p)
- : AbstractMemory(_p),
- addrMapping(_p->addr_mapping),
- burstSize((_p->devices_per_rank * _p->burst_length *
- _p->device_bus_width) / 8),
- deviceSize(_p->device_size),
- deviceRowBufferSize(_p->device_rowbuffer_size),
- devicesPerRank(_p->devices_per_rank),
- rowBufferSize(devicesPerRank * deviceRowBufferSize),
- burstsPerRowBuffer(rowBufferSize / burstSize),
- burstsPerStripe(range.interleaved() ?
- range.granularity() / burstSize : 1),
- ranksPerChannel(_p->ranks_per_channel),
- banksPerRank(_p->banks_per_rank), rowsPerBank(0),
- tCK(_p->tCK), tCS(_p->tCS), tBURST(_p->tBURST),
- tRTW(_p->tRTW),
- tWTR(_p->tWTR)
-{}
-
-void
-MemInterface::setCtrl(DRAMCtrl* _ctrl, unsigned int command_window)
-{
- ctrl = _ctrl;
- maxCommandsPerWindow = command_window / tCK;
-}
-
-DRAMInterface::DRAMInterface(const DRAMInterfaceParams* _p)
- : MemInterface(_p),
- bankGroupsPerRank(_p->bank_groups_per_rank),
- bankGroupArch(_p->bank_groups_per_rank > 0),
- tCL(_p->tCL),
- tBURST_MIN(_p->tBURST_MIN), tBURST_MAX(_p->tBURST_MAX),
- tCCD_L_WR(_p->tCCD_L_WR), tCCD_L(_p->tCCD_L), tRCD(_p->tRCD),
- tRP(_p->tRP), tRAS(_p->tRAS), tWR(_p->tWR), tRTP(_p->tRTP),
- tRFC(_p->tRFC), tREFI(_p->tREFI), tRRD(_p->tRRD), tRRD_L(_p->tRRD_L),
- tPPD(_p->tPPD), tAAD(_p->tAAD),
- tXAW(_p->tXAW), tXP(_p->tXP), tXS(_p->tXS),
- clkResyncDelay(tCL + _p->tBURST_MAX),
- dataClockSync(_p->data_clock_sync),
- burstInterleave(tBURST != tBURST_MIN),
- twoCycleActivate(_p->two_cycle_activate),
- activationLimit(_p->activation_limit),
- wrToRdDlySameBG(tCL + _p->tBURST_MAX + _p->tWTR_L),
- rdToWrDlySameBG(_p->tRTW + _p->tBURST_MAX),
- pageMgmt(_p->page_policy),
- maxAccessesPerRow(_p->max_accesses_per_row),
- timeStampOffset(0), activeRank(0),
- enableDRAMPowerdown(_p->enable_dram_powerdown),
- lastStatsResetTick(0),
- stats(*this),
- readBufferSize(_p->read_buffer_size),
- writeBufferSize(_p->write_buffer_size)
-{
- DPRINTF(DRAM, "Setting up DRAM Interface\n");
-
- fatal_if(!isPowerOf2(burstSize), "DRAM burst size %d is not allowed, "
- "must be a power of two\n", burstSize);
-
- // sanity check the ranks since we rely on bit slicing for the
- // address decoding
- fatal_if(!isPowerOf2(ranksPerChannel), "DRAM rank count of %d is "
- "not allowed, must be a power of two\n", ranksPerChannel);
-
- for (int i = 0; i < ranksPerChannel; i++) {
- DPRINTF(DRAM, "Creating DRAM rank %d \n", i);
- Rank* rank = new Rank(_p, i, *this);
- ranks.push_back(rank);
- }
-
- // determine the dram actual capacity from the DRAM config in Mbytes
- uint64_t deviceCapacity = deviceSize / (1024 * 1024) * devicesPerRank *
- ranksPerChannel;
-
- uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
-
- DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
- AbstractMemory::size());
-
- // if actual DRAM size does not match memory capacity in system warn!
- if (deviceCapacity != capacity / (1024 * 1024))
- warn("DRAM device capacity (%d Mbytes) does not match the "
- "address range assigned (%d Mbytes)\n", deviceCapacity,
- capacity / (1024 * 1024));
-
- DPRINTF(DRAM, "Row buffer size %d bytes with %d bursts per row buffer\n",
- rowBufferSize, burstsPerRowBuffer);
-
- rowsPerBank = capacity / (rowBufferSize * banksPerRank * ranksPerChannel);
-
- // some basic sanity checks
- if (tREFI <= tRP || tREFI <= tRFC) {
- fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n",
- tREFI, tRP, tRFC);
- }
-
- // basic bank group architecture checks ->
- if (bankGroupArch) {
- // must have at least one bank per bank group
- if (bankGroupsPerRank > banksPerRank) {
- fatal("banks per rank (%d) must be equal to or larger than "
- "banks groups per rank (%d)\n",
- banksPerRank, bankGroupsPerRank);
- }
- // must have same number of banks in each bank group
- if ((banksPerRank % bankGroupsPerRank) != 0) {
- fatal("Banks per rank (%d) must be evenly divisible by bank "
- "groups per rank (%d) for equal banks per bank group\n",
- banksPerRank, bankGroupsPerRank);
- }
- // tCCD_L should be greater than minimal, back-to-back burst delay
- if (tCCD_L <= tBURST) {
- fatal("tCCD_L (%d) should be larger than the minimum bus delay "
- "(%d) when bank groups per rank (%d) is greater than 1\n",
- tCCD_L, tBURST, bankGroupsPerRank);
- }
- // tCCD_L_WR should be greater than minimal, back-to-back burst delay
- if (tCCD_L_WR <= tBURST) {
- fatal("tCCD_L_WR (%d) should be larger than the minimum bus delay "
- " (%d) when bank groups per rank (%d) is greater than 1\n",
- tCCD_L_WR, tBURST, bankGroupsPerRank);
- }
- // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay
- // some datasheets might specify it equal to tRRD
- if (tRRD_L < tRRD) {
- fatal("tRRD_L (%d) should be larger than tRRD (%d) when "
- "bank groups per rank (%d) is greater than 1\n",
- tRRD_L, tRRD, bankGroupsPerRank);
- }
- }
-}
-
-void
-DRAMInterface::init()
-{
- AbstractMemory::init();
-
- // a bit of sanity checks on the interleaving, save it for here to
- // ensure that the system pointer is initialised
- if (range.interleaved()) {
- if (addrMapping == Enums::RoRaBaChCo) {
- if (rowBufferSize != range.granularity()) {
- fatal("Channel interleaving of %s doesn't match RoRaBaChCo "
- "address map\n", name());
- }
- } else if (addrMapping == Enums::RoRaBaCoCh ||
- addrMapping == Enums::RoCoRaBaCh) {
- // for the interleavings with channel bits in the bottom,
- // if the system uses a channel striping granularity that
- // is larger than the DRAM burst size, then map the
- // sequential accesses within a stripe to a number of
- // columns in the DRAM, effectively placing some of the
- // lower-order column bits as the least-significant bits
- // of the address (above the ones denoting the burst size)
- assert(burstsPerStripe >= 1);
-
- // channel striping has to be done at a granularity that
- // is equal or larger to a cache line
- if (system()->cacheLineSize() > range.granularity()) {
- fatal("Channel interleaving of %s must be at least as large "
- "as the cache line size\n", name());
- }
-
- // ...and equal or smaller than the row-buffer size
- if (rowBufferSize < range.granularity()) {
- fatal("Channel interleaving of %s must be at most as large "
- "as the row-buffer size\n", name());
- }
- // this is essentially the check above, so just to be sure
- assert(burstsPerStripe <= burstsPerRowBuffer);
- }
- }
-}
-
-void
-DRAMInterface::startup()
-{
- if (system()->isTimingMode()) {
- // timestamp offset should be in clock cycles for DRAMPower
- timeStampOffset = divCeil(curTick(), tCK);
-
- for (auto r : ranks) {
- r->startup(curTick() + tREFI - tRP);
- }
- }
-}
-
-bool
-DRAMInterface::isBusy()
-{
- int busy_ranks = 0;
- for (auto r : ranks) {
- if (!r->inRefIdleState()) {
- if (r->pwrState != PWR_SREF) {
- // rank is busy refreshing
- DPRINTF(DRAMState, "Rank %d is not available\n", r->rank);
- busy_ranks++;
-
- // let the rank know that if it was waiting to drain, it
- // is now done and ready to proceed
- r->checkDrainDone();
- }
-
- // check if we were in self-refresh and haven't started
- // to transition out
- if ((r->pwrState == PWR_SREF) && r->inLowPowerState) {
- DPRINTF(DRAMState, "Rank %d is in self-refresh\n", r->rank);
- // if we have commands queued to this rank and we don't have
- // a minimum number of active commands enqueued,
- // exit self-refresh
- if (r->forceSelfRefreshExit()) {
- DPRINTF(DRAMState, "rank %d was in self refresh and"
- " should wake up\n", r->rank);
- //wake up from self-refresh
- r->scheduleWakeUpEvent(tXS);
- // things are brought back into action once a refresh is
- // performed after self-refresh
- // continue with selection for other ranks
- }
- }
- }
- }
- return (busy_ranks == ranksPerChannel);
-}
-
-void DRAMInterface::setupRank(const uint8_t rank, const bool is_read)
-{
- // increment entry count of the rank based on packet type
- if (is_read) {
- ++ranks[rank]->readEntries;
- } else {
- ++ranks[rank]->writeEntries;
- }
-}
-
-void
-DRAMInterface::respondEvent(uint8_t rank)
-{
- Rank& rank_ref = *ranks[rank];
-
- // if a read has reached its ready-time, decrement the number of reads
- // At this point the packet has been handled and there is a possibility
- // to switch to low-power mode if no other packet is available
- --rank_ref.readEntries;
- DPRINTF(DRAM, "number of read entries for rank %d is %d\n",
- rank, rank_ref.readEntries);
-
- // counter should at least indicate one outstanding request
- // for this read
- assert(rank_ref.outstandingEvents > 0);
- // read response received, decrement count
- --rank_ref.outstandingEvents;
-
- // at this moment should not have transitioned to a low-power state
- assert((rank_ref.pwrState != PWR_SREF) &&
- (rank_ref.pwrState != PWR_PRE_PDN) &&
- (rank_ref.pwrState != PWR_ACT_PDN));
-
- // track if this is the last packet before idling
- // and that there are no outstanding commands to this rank
- if (rank_ref.isQueueEmpty() && rank_ref.outstandingEvents == 0 &&
- rank_ref.inRefIdleState() && enableDRAMPowerdown) {
- // verify that there are no events scheduled
- assert(!rank_ref.activateEvent.scheduled());
- assert(!rank_ref.prechargeEvent.scheduled());
-
- // if coming from active state, schedule power event to
- // active power-down else go to precharge power-down
- DPRINTF(DRAMState, "Rank %d sleep at tick %d; current power state is "
- "%d\n", rank, curTick(), rank_ref.pwrState);
-
- // default to ACT power-down unless already in IDLE state
- // could be in IDLE if PRE issued before data returned
- PowerState next_pwr_state = PWR_ACT_PDN;
- if (rank_ref.pwrState == PWR_IDLE) {
- next_pwr_state = PWR_PRE_PDN;
- }
-
- rank_ref.powerDownSleep(next_pwr_state, curTick());
- }
-}
-
-void
-DRAMInterface::checkRefreshState(uint8_t rank)
-{
- Rank& rank_ref = *ranks[rank];
-
- if ((rank_ref.refreshState == REF_PRE) &&
- !rank_ref.prechargeEvent.scheduled()) {
- // kick the refresh event loop into action again if banks already
- // closed and just waiting for read to complete
- schedule(rank_ref.refreshEvent, curTick());
- }
-}
-
-void
-DRAMInterface::drainRanks()
-{
- // also need to kick off events to exit self-refresh
- for (auto r : ranks) {
- // force self-refresh exit, which in turn will issue auto-refresh
- if (r->pwrState == PWR_SREF) {
- DPRINTF(DRAM,"Rank%d: Forcing self-refresh wakeup in drain\n",
- r->rank);
- r->scheduleWakeUpEvent(tXS);
- }
- }
-}
-
-bool
-DRAMInterface::allRanksDrained() const
-{
- // true until proven false
- bool all_ranks_drained = true;
- for (auto r : ranks) {
- // then verify that the power state is IDLE ensuring all banks are
- // closed and rank is not in a low power state. Also verify that rank
- // is idle from a refresh point of view.
- all_ranks_drained = r->inPwrIdleState() && r->inRefIdleState() &&
- all_ranks_drained;
- }
- return all_ranks_drained;
-}
-
-void
-DRAMInterface::suspend()
-{
- for (auto r : ranks) {
- r->suspend();
- }
-}
-
-pair<vector<uint32_t>, bool>
-DRAMInterface::minBankPrep(const DRAMPacketQueue& queue,
- Tick min_col_at) const
-{
- Tick min_act_at = MaxTick;
- vector<uint32_t> bank_mask(ranksPerChannel, 0);
-
- // latest Tick for which ACT can occur without incurring additoinal
- // delay on the data bus
- const Tick hidden_act_max = std::max(min_col_at - tRCD, curTick());
-
- // Flag condition when burst can issue back-to-back with previous burst
- bool found_seamless_bank = false;
-
- // Flag condition when bank can be opened without incurring additional
- // delay on the data bus
- bool hidden_bank_prep = false;
-
- // determine if we have queued transactions targetting the
- // bank in question
- vector<bool> got_waiting(ranksPerChannel * banksPerRank, false);
- for (const auto& p : queue) {
- if (p->isDram() && ranks[p->rank]->inRefIdleState())
- got_waiting[p->bankId] = true;
- }
-
- // Find command with optimal bank timing
- // Will prioritize commands that can issue seamlessly.
- for (int i = 0; i < ranksPerChannel; i++) {
- for (int j = 0; j < banksPerRank; j++) {
- uint16_t bank_id = i * banksPerRank + j;
-
- // if we have waiting requests for the bank, and it is
- // amongst the first available, update the mask
- if (got_waiting[bank_id]) {
- // make sure this rank is not currently refreshing.
- assert(ranks[i]->inRefIdleState());
- // simplistic approximation of when the bank can issue
- // an activate, ignoring any rank-to-rank switching
- // cost in this calculation
- Tick act_at = ranks[i]->banks[j].openRow == Bank::NO_ROW ?
- std::max(ranks[i]->banks[j].actAllowedAt, curTick()) :
- std::max(ranks[i]->banks[j].preAllowedAt, curTick()) + tRP;
-
- // When is the earliest the R/W burst can issue?
- const Tick col_allowed_at = ctrl->inReadBusState(false) ?
- ranks[i]->banks[j].rdAllowedAt :
- ranks[i]->banks[j].wrAllowedAt;
- Tick col_at = std::max(col_allowed_at, act_at + tRCD);
-
- // bank can issue burst back-to-back (seamlessly) with
- // previous burst
- bool new_seamless_bank = col_at <= min_col_at;
-
- // if we found a new seamless bank or we have no
- // seamless banks, and got a bank with an earlier
- // activate time, it should be added to the bit mask
- if (new_seamless_bank ||
- (!found_seamless_bank && act_at <= min_act_at)) {
- // if we did not have a seamless bank before, and
- // we do now, reset the bank mask, also reset it
- // if we have not yet found a seamless bank and
- // the activate time is smaller than what we have
- // seen so far
- if (!found_seamless_bank &&
- (new_seamless_bank || act_at < min_act_at)) {
- std::fill(bank_mask.begin(), bank_mask.end(), 0);
- }
-
- found_seamless_bank |= new_seamless_bank;
-
- // ACT can occur 'behind the scenes'
- hidden_bank_prep = act_at <= hidden_act_max;
-
- // set the bit corresponding to the available bank
- replaceBits(bank_mask[i], j, j, 1);
- min_act_at = act_at;
- }
- }
- }
- }
-
- return make_pair(bank_mask, hidden_bank_prep);
-}
-
-DRAMInterface*
-DRAMInterfaceParams::create()
-{
- return new DRAMInterface(this);
-}
-
-DRAMInterface::Rank::Rank(const DRAMInterfaceParams* _p,
- int _rank, DRAMInterface& _dram)
- : EventManager(&_dram), dram(_dram),
- pwrStateTrans(PWR_IDLE), pwrStatePostRefresh(PWR_IDLE),
- pwrStateTick(0), refreshDueAt(0), pwrState(PWR_IDLE),
- refreshState(REF_IDLE), inLowPowerState(false), rank(_rank),
- readEntries(0), writeEntries(0), outstandingEvents(0),
- wakeUpAllowedAt(0), power(_p, false), banks(_p->banks_per_rank),
- numBanksActive(0), actTicks(_p->activation_limit, 0), lastBurstTick(0),
- writeDoneEvent([this]{ processWriteDoneEvent(); }, name()),
- activateEvent([this]{ processActivateEvent(); }, name()),
- prechargeEvent([this]{ processPrechargeEvent(); }, name()),
- refreshEvent([this]{ processRefreshEvent(); }, name()),
- powerEvent([this]{ processPowerEvent(); }, name()),
- wakeUpEvent([this]{ processWakeUpEvent(); }, name()),
- stats(_dram, *this)
-{
- for (int b = 0; b < _p->banks_per_rank; b++) {
- banks[b].bank = b;
- // GDDR addressing of banks to BG is linear.
- // Here we assume that all DRAM generations address bank groups as
- // follows:
- if (_p->bank_groups_per_rank > 0) {
- // Simply assign lower bits to bank group in order to
- // rotate across bank groups as banks are incremented
- // e.g. with 4 banks per bank group and 16 banks total:
- // banks 0,4,8,12 are in bank group 0
- // banks 1,5,9,13 are in bank group 1
- // banks 2,6,10,14 are in bank group 2
- // banks 3,7,11,15 are in bank group 3
- banks[b].bankgr = b % _p->bank_groups_per_rank;
- } else {
- // No bank groups; simply assign to bank number
- banks[b].bankgr = b;
- }
- }
-}
-
-void
-DRAMInterface::Rank::startup(Tick ref_tick)
-{
- assert(ref_tick > curTick());
-
- pwrStateTick = curTick();
-
- // kick off the refresh, and give ourselves enough time to
- // precharge
- schedule(refreshEvent, ref_tick);
-}
-
-void
-DRAMInterface::Rank::suspend()
-{
- deschedule(refreshEvent);
-
- // Update the stats
- updatePowerStats();
-
- // don't automatically transition back to LP state after next REF
- pwrStatePostRefresh = PWR_IDLE;
-}
-
-bool
-DRAMInterface::Rank::isQueueEmpty() const
-{
- // check commmands in Q based on current bus direction
- bool no_queued_cmds = (dram.ctrl->inReadBusState(true) &&
- (readEntries == 0))
- || (dram.ctrl->inWriteBusState(true) &&
- (writeEntries == 0));
- return no_queued_cmds;
-}
-
-void
-DRAMInterface::Rank::checkDrainDone()
-{
- // if this rank was waiting to drain it is now able to proceed to
- // precharge
- if (refreshState == REF_DRAIN) {
- DPRINTF(DRAM, "Refresh drain done, now precharging\n");
-
- refreshState = REF_PD_EXIT;
-
- // hand control back to the refresh event loop
- schedule(refreshEvent, curTick());
- }
-}
-
-void
-DRAMInterface::Rank::flushCmdList()
-{
- // at the moment sort the list of commands and update the counters
- // for DRAMPower libray when doing a refresh
- sort(cmdList.begin(), cmdList.end(), DRAMInterface::sortTime);
-
- auto next_iter = cmdList.begin();
- // push to commands to DRAMPower
- for ( ; next_iter != cmdList.end() ; ++next_iter) {
- Command cmd = *next_iter;
- if (cmd.timeStamp <= curTick()) {
- // Move all commands at or before curTick to DRAMPower
- power.powerlib.doCommand(cmd.type, cmd.bank,
- divCeil(cmd.timeStamp, dram.tCK) -
- dram.timeStampOffset);
- } else {
- // done - found all commands at or before curTick()
- // next_iter references the 1st command after curTick
- break;
- }
- }
- // reset cmdList to only contain commands after curTick
- // if there are no commands after curTick, updated cmdList will be empty
- // in this case, next_iter is cmdList.end()
- cmdList.assign(next_iter, cmdList.end());
-}
-
-void
-DRAMInterface::Rank::processActivateEvent()
-{
- // we should transition to the active state as soon as any bank is active
- if (pwrState != PWR_ACT)
- // note that at this point numBanksActive could be back at
- // zero again due to a precharge scheduled in the future
- schedulePowerEvent(PWR_ACT, curTick());
-}
-
-void
-DRAMInterface::Rank::processPrechargeEvent()
-{
- // counter should at least indicate one outstanding request
- // for this precharge
- assert(outstandingEvents > 0);
- // precharge complete, decrement count
- --outstandingEvents;
-
- // if we reached zero, then special conditions apply as we track
- // if all banks are precharged for the power models
- if (numBanksActive == 0) {
- // no reads to this rank in the Q and no pending
- // RD/WR or refresh commands
- if (isQueueEmpty() && outstandingEvents == 0 &&
- dram.enableDRAMPowerdown) {
- // should still be in ACT state since bank still open
- assert(pwrState == PWR_ACT);
-
- // All banks closed - switch to precharge power down state.
- DPRINTF(DRAMState, "Rank %d sleep at tick %d\n",
- rank, curTick());
- powerDownSleep(PWR_PRE_PDN, curTick());
- } else {
- // we should transition to the idle state when the last bank
- // is precharged
- schedulePowerEvent(PWR_IDLE, curTick());
- }
- }
-}
-
-void
-DRAMInterface::Rank::processWriteDoneEvent()
-{
- // counter should at least indicate one outstanding request
- // for this write
- assert(outstandingEvents > 0);
- // Write transfer on bus has completed
- // decrement per rank counter
- --outstandingEvents;
-}
-
-void
-DRAMInterface::Rank::processRefreshEvent()
-{
- // when first preparing the refresh, remember when it was due
- if ((refreshState == REF_IDLE) || (refreshState == REF_SREF_EXIT)) {
- // remember when the refresh is due
- refreshDueAt = curTick();
-
- // proceed to drain
- refreshState = REF_DRAIN;
-
- // make nonzero while refresh is pending to ensure
- // power down and self-refresh are not entered
- ++outstandingEvents;
-
- DPRINTF(DRAM, "Refresh due\n");
- }
-
- // let any scheduled read or write to the same rank go ahead,
- // after which it will
- // hand control back to this event loop
- if (refreshState == REF_DRAIN) {
- // if a request is at the moment being handled and this request is
- // accessing the current rank then wait for it to finish
- if ((rank == dram.activeRank)
- && (dram.ctrl->requestEventScheduled())) {
- // hand control over to the request loop until it is
- // evaluated next
- DPRINTF(DRAM, "Refresh awaiting draining\n");
-
- return;
- } else {
- refreshState = REF_PD_EXIT;
- }
- }
-
- // at this point, ensure that rank is not in a power-down state
- if (refreshState == REF_PD_EXIT) {
- // if rank was sleeping and we have't started exit process,
- // wake-up for refresh
- if (inLowPowerState) {
- DPRINTF(DRAM, "Wake Up for refresh\n");
- // save state and return after refresh completes
- scheduleWakeUpEvent(dram.tXP);
- return;
- } else {
- refreshState = REF_PRE;
- }
- }
-
- // at this point, ensure that all banks are precharged
- if (refreshState == REF_PRE) {
- // precharge any active bank
- if (numBanksActive != 0) {
- // at the moment, we use a precharge all even if there is
- // only a single bank open
- DPRINTF(DRAM, "Precharging all\n");
-
- // first determine when we can precharge
- Tick pre_at = curTick();
-
- for (auto &b : banks) {
- // respect both causality and any existing bank
- // constraints, some banks could already have a
- // (auto) precharge scheduled
- pre_at = std::max(b.preAllowedAt, pre_at);
- }
-
- // make sure all banks per rank are precharged, and for those that
- // already are, update their availability
- Tick act_allowed_at = pre_at + dram.tRP;
-
- for (auto &b : banks) {
- if (b.openRow != Bank::NO_ROW) {
- dram.prechargeBank(*this, b, pre_at, true, false);
- } else {
- b.actAllowedAt = std::max(b.actAllowedAt, act_allowed_at);
- b.preAllowedAt = std::max(b.preAllowedAt, pre_at);
- }
- }
-
- // precharge all banks in rank
- cmdList.push_back(Command(MemCommand::PREA, 0, pre_at));
-
- DPRINTF(DRAMPower, "%llu,PREA,0,%d\n",
- divCeil(pre_at, dram.tCK) -
- dram.timeStampOffset, rank);
- } else if ((pwrState == PWR_IDLE) && (outstandingEvents == 1)) {
- // Banks are closed, have transitioned to IDLE state, and
- // no outstanding ACT,RD/WR,Auto-PRE sequence scheduled
- DPRINTF(DRAM, "All banks already precharged, starting refresh\n");
-
- // go ahead and kick the power state machine into gear since
- // we are already idle
- schedulePowerEvent(PWR_REF, curTick());
- } else {
- // banks state is closed but haven't transitioned pwrState to IDLE
- // or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled
- // should have outstanding precharge or read response event
- assert(prechargeEvent.scheduled() ||
- dram.ctrl->respondEventScheduled());
- // will start refresh when pwrState transitions to IDLE
- }
-
- assert(numBanksActive == 0);
-
- // wait for all banks to be precharged or read to complete
- // When precharge commands are done, power state machine will
- // transition to the idle state, and automatically move to a
- // refresh, at that point it will also call this method to get
- // the refresh event loop going again
- // Similarly, when read response completes, if all banks are
- // precharged, will call this method to get loop re-started
- return;
- }
-
- // last but not least we perform the actual refresh
- if (refreshState == REF_START) {
- // should never get here with any banks active
- assert(numBanksActive == 0);
- assert(pwrState == PWR_REF);
-
- Tick ref_done_at = curTick() + dram.tRFC;
-
- for (auto &b : banks) {
- b.actAllowedAt = ref_done_at;
- }
-
- // at the moment this affects all ranks
- cmdList.push_back(Command(MemCommand::REF, 0, curTick()));
-
- // Update the stats
- updatePowerStats();
-
- DPRINTF(DRAMPower, "%llu,REF,0,%d\n", divCeil(curTick(), dram.tCK) -
- dram.timeStampOffset, rank);
-
- // Update for next refresh
- refreshDueAt += dram.tREFI;
-
- // make sure we did not wait so long that we cannot make up
- // for it
- if (refreshDueAt < ref_done_at) {
- fatal("Refresh was delayed so long we cannot catch up\n");
- }
-
- // Run the refresh and schedule event to transition power states
- // when refresh completes
- refreshState = REF_RUN;
- schedule(refreshEvent, ref_done_at);
- return;
- }
-
- if (refreshState == REF_RUN) {
- // should never get here with any banks active
- assert(numBanksActive == 0);
- assert(pwrState == PWR_REF);
-
- assert(!powerEvent.scheduled());
-
- if ((dram.ctrl->drainState() == DrainState::Draining) ||
- (dram.ctrl->drainState() == DrainState::Drained)) {
- // if draining, do not re-enter low-power mode.
- // simply go to IDLE and wait
- schedulePowerEvent(PWR_IDLE, curTick());
- } else {
- // At the moment, we sleep when the refresh ends and wait to be
- // woken up again if previously in a low-power state.
- if (pwrStatePostRefresh != PWR_IDLE) {
- // power State should be power Refresh
- assert(pwrState == PWR_REF);
- DPRINTF(DRAMState, "Rank %d sleeping after refresh and was in "
- "power state %d before refreshing\n", rank,
- pwrStatePostRefresh);
- powerDownSleep(pwrState, curTick());
-
- // Force PRE power-down if there are no outstanding commands
- // in Q after refresh.
- } else if (isQueueEmpty() && dram.enableDRAMPowerdown) {
- // still have refresh event outstanding but there should
- // be no other events outstanding
- assert(outstandingEvents == 1);
- DPRINTF(DRAMState, "Rank %d sleeping after refresh but was NOT"
- " in a low power state before refreshing\n", rank);
- powerDownSleep(PWR_PRE_PDN, curTick());
-
- } else {
- // move to the idle power state once the refresh is done, this
- // will also move the refresh state machine to the refresh
- // idle state
- schedulePowerEvent(PWR_IDLE, curTick());
- }
- }
-
- // At this point, we have completed the current refresh.
- // In the SREF bypass case, we do not get to this state in the
- // refresh STM and therefore can always schedule next event.
- // Compensate for the delay in actually performing the refresh
- // when scheduling the next one
- schedule(refreshEvent, refreshDueAt - dram.tRP);
-
- DPRINTF(DRAMState, "Refresh done at %llu and next refresh"
- " at %llu\n", curTick(), refreshDueAt);
- }
-}
-
-void
-DRAMInterface::Rank::schedulePowerEvent(PowerState pwr_state, Tick tick)
-{
- // respect causality
- assert(tick >= curTick());
-
- if (!powerEvent.scheduled()) {
- DPRINTF(DRAMState, "Scheduling power event at %llu to state %d\n",
- tick, pwr_state);
-
- // insert the new transition
- pwrStateTrans = pwr_state;
-
- schedule(powerEvent, tick);
- } else {
- panic("Scheduled power event at %llu to state %d, "
- "with scheduled event at %llu to %d\n", tick, pwr_state,
- powerEvent.when(), pwrStateTrans);
- }
-}
-
-void
-DRAMInterface::Rank::powerDownSleep(PowerState pwr_state, Tick tick)
-{
- // if low power state is active low, schedule to active low power state.
- // in reality tCKE is needed to enter active low power. This is neglected
- // here and could be added in the future.
- if (pwr_state == PWR_ACT_PDN) {
- schedulePowerEvent(pwr_state, tick);
- // push command to DRAMPower
- cmdList.push_back(Command(MemCommand::PDN_F_ACT, 0, tick));
- DPRINTF(DRAMPower, "%llu,PDN_F_ACT,0,%d\n", divCeil(tick,
- dram.tCK) - dram.timeStampOffset, rank);
- } else if (pwr_state == PWR_PRE_PDN) {
- // if low power state is precharge low, schedule to precharge low
- // power state. In reality tCKE is needed to enter active low power.
- // This is neglected here.
- schedulePowerEvent(pwr_state, tick);
- //push Command to DRAMPower
- cmdList.push_back(Command(MemCommand::PDN_F_PRE, 0, tick));
- DPRINTF(DRAMPower, "%llu,PDN_F_PRE,0,%d\n", divCeil(tick,
- dram.tCK) - dram.timeStampOffset, rank);
- } else if (pwr_state == PWR_REF) {
- // if a refresh just occurred
- // transition to PRE_PDN now that all banks are closed
- // precharge power down requires tCKE to enter. For simplicity
- // this is not considered.
- schedulePowerEvent(PWR_PRE_PDN, tick);
- //push Command to DRAMPower
- cmdList.push_back(Command(MemCommand::PDN_F_PRE, 0, tick));
- DPRINTF(DRAMPower, "%llu,PDN_F_PRE,0,%d\n", divCeil(tick,
- dram.tCK) - dram.timeStampOffset, rank);
- } else if (pwr_state == PWR_SREF) {
- // should only enter SREF after PRE-PD wakeup to do a refresh
- assert(pwrStatePostRefresh == PWR_PRE_PDN);
- // self refresh requires time tCKESR to enter. For simplicity,
- // this is not considered.
- schedulePowerEvent(PWR_SREF, tick);
- // push Command to DRAMPower
- cmdList.push_back(Command(MemCommand::SREN, 0, tick));
- DPRINTF(DRAMPower, "%llu,SREN,0,%d\n", divCeil(tick,
- dram.tCK) - dram.timeStampOffset, rank);
- }
- // Ensure that we don't power-down and back up in same tick
- // Once we commit to PD entry, do it and wait for at least 1tCK
- // This could be replaced with tCKE if/when that is added to the model
- wakeUpAllowedAt = tick + dram.tCK;
-
- // Transitioning to a low power state, set flag
- inLowPowerState = true;
-}
-
-void
-DRAMInterface::Rank::scheduleWakeUpEvent(Tick exit_delay)
-{
- Tick wake_up_tick = std::max(curTick(), wakeUpAllowedAt);
-
- DPRINTF(DRAMState, "Scheduling wake-up for rank %d at tick %d\n",
- rank, wake_up_tick);
-
- // if waking for refresh, hold previous state
- // else reset state back to IDLE
- if (refreshState == REF_PD_EXIT) {
- pwrStatePostRefresh = pwrState;
- } else {
- // don't automatically transition back to LP state after next REF
- pwrStatePostRefresh = PWR_IDLE;
- }
-
- // schedule wake-up with event to ensure entry has completed before
- // we try to wake-up
- schedule(wakeUpEvent, wake_up_tick);
-
- for (auto &b : banks) {
- // respect both causality and any existing bank
- // constraints, some banks could already have a
- // (auto) precharge scheduled
- b.wrAllowedAt = std::max(wake_up_tick + exit_delay, b.wrAllowedAt);
- b.rdAllowedAt = std::max(wake_up_tick + exit_delay, b.rdAllowedAt);
- b.preAllowedAt = std::max(wake_up_tick + exit_delay, b.preAllowedAt);
- b.actAllowedAt = std::max(wake_up_tick + exit_delay, b.actAllowedAt);
- }
- // Transitioning out of low power state, clear flag
- inLowPowerState = false;
-
- // push to DRAMPower
- // use pwrStateTrans for cases where we have a power event scheduled
- // to enter low power that has not yet been processed
- if (pwrStateTrans == PWR_ACT_PDN) {
- cmdList.push_back(Command(MemCommand::PUP_ACT, 0, wake_up_tick));
- DPRINTF(DRAMPower, "%llu,PUP_ACT,0,%d\n", divCeil(wake_up_tick,
- dram.tCK) - dram.timeStampOffset, rank);
-
- } else if (pwrStateTrans == PWR_PRE_PDN) {
- cmdList.push_back(Command(MemCommand::PUP_PRE, 0, wake_up_tick));
- DPRINTF(DRAMPower, "%llu,PUP_PRE,0,%d\n", divCeil(wake_up_tick,
- dram.tCK) - dram.timeStampOffset, rank);
- } else if (pwrStateTrans == PWR_SREF) {
- cmdList.push_back(Command(MemCommand::SREX, 0, wake_up_tick));
- DPRINTF(DRAMPower, "%llu,SREX,0,%d\n", divCeil(wake_up_tick,
- dram.tCK) - dram.timeStampOffset, rank);
- }
-}
-
-void
-DRAMInterface::Rank::processWakeUpEvent()
-{
- // Should be in a power-down or self-refresh state
- assert((pwrState == PWR_ACT_PDN) || (pwrState == PWR_PRE_PDN) ||
- (pwrState == PWR_SREF));
-
- // Check current state to determine transition state
- if (pwrState == PWR_ACT_PDN) {
- // banks still open, transition to PWR_ACT
- schedulePowerEvent(PWR_ACT, curTick());
- } else {
- // transitioning from a precharge power-down or self-refresh state
- // banks are closed - transition to PWR_IDLE
- schedulePowerEvent(PWR_IDLE, curTick());
- }
-}
-
-void
-DRAMInterface::Rank::processPowerEvent()
-{
- assert(curTick() >= pwrStateTick);
- // remember where we were, and for how long
- Tick duration = curTick() - pwrStateTick;
- PowerState prev_state = pwrState;
-
- // update the accounting
- stats.pwrStateTime[prev_state] += duration;
-
- // track to total idle time
- if ((prev_state == PWR_PRE_PDN) || (prev_state == PWR_ACT_PDN) ||
- (prev_state == PWR_SREF)) {
- stats.totalIdleTime += duration;
- }
-
- pwrState = pwrStateTrans;
- pwrStateTick = curTick();
-
- // if rank was refreshing, make sure to start scheduling requests again
- if (prev_state == PWR_REF) {
- // bus IDLED prior to REF
- // counter should be one for refresh command only
- assert(outstandingEvents == 1);
- // REF complete, decrement count and go back to IDLE
- --outstandingEvents;
- refreshState = REF_IDLE;
-
- DPRINTF(DRAMState, "Was refreshing for %llu ticks\n", duration);
- // if moving back to power-down after refresh
- if (pwrState != PWR_IDLE) {
- assert(pwrState == PWR_PRE_PDN);
- DPRINTF(DRAMState, "Switching to power down state after refreshing"
- " rank %d at %llu tick\n", rank, curTick());
- }
-
- // completed refresh event, ensure next request is scheduled
- if (!dram.ctrl->requestEventScheduled()) {
- DPRINTF(DRAM, "Scheduling next request after refreshing"
- " rank %d\n", rank);
- dram.ctrl->restartScheduler(curTick());
- }
- }
-
- if ((pwrState == PWR_ACT) && (refreshState == REF_PD_EXIT)) {
- // have exited ACT PD
- assert(prev_state == PWR_ACT_PDN);
-
- // go back to REF event and close banks
- refreshState = REF_PRE;
- schedule(refreshEvent, curTick());
- } else if (pwrState == PWR_IDLE) {
- DPRINTF(DRAMState, "All banks precharged\n");
- if (prev_state == PWR_SREF) {
- // set refresh state to REF_SREF_EXIT, ensuring inRefIdleState
- // continues to return false during tXS after SREF exit
- // Schedule a refresh which kicks things back into action
- // when it finishes
- refreshState = REF_SREF_EXIT;
- schedule(refreshEvent, curTick() + dram.tXS);
- } else {
- // if we have a pending refresh, and are now moving to
- // the idle state, directly transition to, or schedule refresh
- if ((refreshState == REF_PRE) || (refreshState == REF_PD_EXIT)) {
- // ensure refresh is restarted only after final PRE command.
- // do not restart refresh if controller is in an intermediate
- // state, after PRE_PDN exit, when banks are IDLE but an
- // ACT is scheduled.
- if (!activateEvent.scheduled()) {
- // there should be nothing waiting at this point
- assert(!powerEvent.scheduled());
- if (refreshState == REF_PD_EXIT) {
- // exiting PRE PD, will be in IDLE until tXP expires
- // and then should transition to PWR_REF state
- assert(prev_state == PWR_PRE_PDN);
- schedulePowerEvent(PWR_REF, curTick() + dram.tXP);
- } else if (refreshState == REF_PRE) {
- // can directly move to PWR_REF state and proceed below
- pwrState = PWR_REF;
- }
- } else {
- // must have PRE scheduled to transition back to IDLE
- // and re-kick off refresh
- assert(prechargeEvent.scheduled());
- }
- }
- }
- }
-
- // transition to the refresh state and re-start refresh process
- // refresh state machine will schedule the next power state transition
- if (pwrState == PWR_REF) {
- // completed final PRE for refresh or exiting power-down
- assert(refreshState == REF_PRE || refreshState == REF_PD_EXIT);
-
- // exited PRE PD for refresh, with no pending commands
- // bypass auto-refresh and go straight to SREF, where memory
- // will issue refresh immediately upon entry
- if (pwrStatePostRefresh == PWR_PRE_PDN && isQueueEmpty() &&
- (dram.ctrl->drainState() != DrainState::Draining) &&
- (dram.ctrl->drainState() != DrainState::Drained) &&
- dram.enableDRAMPowerdown) {
- DPRINTF(DRAMState, "Rank %d bypassing refresh and transitioning "
- "to self refresh at %11u tick\n", rank, curTick());
- powerDownSleep(PWR_SREF, curTick());
-
- // Since refresh was bypassed, remove event by decrementing count
- assert(outstandingEvents == 1);
- --outstandingEvents;
-
- // reset state back to IDLE temporarily until SREF is entered
- pwrState = PWR_IDLE;
-
- // Not bypassing refresh for SREF entry
- } else {
- DPRINTF(DRAMState, "Refreshing\n");
-
- // there should be nothing waiting at this point
- assert(!powerEvent.scheduled());
-
- // kick the refresh event loop into action again, and that
- // in turn will schedule a transition to the idle power
- // state once the refresh is done
- schedule(refreshEvent, curTick());
-
- // Banks transitioned to IDLE, start REF
- refreshState = REF_START;
- }
- }
-
-}
-
-void
-DRAMInterface::Rank::updatePowerStats()
-{
- // All commands up to refresh have completed
- // flush cmdList to DRAMPower
- flushCmdList();
-
- // Call the function that calculates window energy at intermediate update
- // events like at refresh, stats dump as well as at simulation exit.
- // Window starts at the last time the calcWindowEnergy function was called
- // and is upto current time.
- power.powerlib.calcWindowEnergy(divCeil(curTick(), dram.tCK) -
- dram.timeStampOffset);
-
- // Get the energy from DRAMPower
- Data::MemoryPowerModel::Energy energy = power.powerlib.getEnergy();
-
- // The energy components inside the power lib are calculated over
- // the window so accumulate into the corresponding gem5 stat
- stats.actEnergy += energy.act_energy * dram.devicesPerRank;
- stats.preEnergy += energy.pre_energy * dram.devicesPerRank;
- stats.readEnergy += energy.read_energy * dram.devicesPerRank;
- stats.writeEnergy += energy.write_energy * dram.devicesPerRank;
- stats.refreshEnergy += energy.ref_energy * dram.devicesPerRank;
- stats.actBackEnergy += energy.act_stdby_energy * dram.devicesPerRank;
- stats.preBackEnergy += energy.pre_stdby_energy * dram.devicesPerRank;
- stats.actPowerDownEnergy += energy.f_act_pd_energy * dram.devicesPerRank;
- stats.prePowerDownEnergy += energy.f_pre_pd_energy * dram.devicesPerRank;
- stats.selfRefreshEnergy += energy.sref_energy * dram.devicesPerRank;
-
- // Accumulate window energy into the total energy.
- stats.totalEnergy += energy.window_energy * dram.devicesPerRank;
- // Average power must not be accumulated but calculated over the time
- // since last stats reset. SimClock::Frequency is tick period not tick
- // frequency.
- // energy (pJ) 1e-9
- // power (mW) = ----------- * ----------
- // time (tick) tick_frequency
- stats.averagePower = (stats.totalEnergy.value() /
- (curTick() - dram.lastStatsResetTick)) *
- (SimClock::Frequency / 1000000000.0);
-}
-
-void
-DRAMInterface::Rank::computeStats()
-{
- DPRINTF(DRAM,"Computing stats due to a dump callback\n");
-
- // Update the stats
- updatePowerStats();
-
- // final update of power state times
- stats.pwrStateTime[pwrState] += (curTick() - pwrStateTick);
- pwrStateTick = curTick();
-}
-
-void
-DRAMInterface::Rank::resetStats() {
- // The only way to clear the counters in DRAMPower is to call
- // calcWindowEnergy function as that then calls clearCounters. The
- // clearCounters method itself is private.
- power.powerlib.calcWindowEnergy(divCeil(curTick(), dram.tCK) -
- dram.timeStampOffset);
-
-}
-
-bool
-DRAMInterface::Rank::forceSelfRefreshExit() const {
- return (readEntries != 0) ||
- (dram.ctrl->inWriteBusState(true) && (writeEntries != 0));
-}
-
-DRAMCtrl::CtrlStats::CtrlStats(DRAMCtrl &_ctrl)
- : Stats::Group(&_ctrl),
- ctrl(_ctrl),
-
- ADD_STAT(readReqs, "Number of read requests accepted"),
- ADD_STAT(writeReqs, "Number of write requests accepted"),
-
- ADD_STAT(readBursts,
- "Number of controller read bursts, "
- "including those serviced by the write queue"),
- ADD_STAT(writeBursts,
- "Number of controller write bursts, "
- "including those merged in the write queue"),
- ADD_STAT(servicedByWrQ,
- "Number of controller read bursts serviced by the write queue"),
- ADD_STAT(mergedWrBursts,
- "Number of controller write bursts merged with an existing one"),
-
- ADD_STAT(neitherReadNorWriteReqs,
- "Number of requests that are neither read nor write"),
-
- ADD_STAT(avgRdQLen, "Average read queue length when enqueuing"),
- ADD_STAT(avgWrQLen, "Average write queue length when enqueuing"),
-
- ADD_STAT(numRdRetry, "Number of times read queue was full causing retry"),
- ADD_STAT(numWrRetry, "Number of times write queue was full causing retry"),
-
- ADD_STAT(readPktSize, "Read request sizes (log2)"),
- ADD_STAT(writePktSize, "Write request sizes (log2)"),
-
- ADD_STAT(rdQLenPdf, "What read queue length does an incoming req see"),
- ADD_STAT(wrQLenPdf, "What write queue length does an incoming req see"),
-
- ADD_STAT(rdPerTurnAround,
- "Reads before turning the bus around for writes"),
- ADD_STAT(wrPerTurnAround,
- "Writes before turning the bus around for reads"),
-
- ADD_STAT(bytesReadWrQ, "Total number of bytes read from write queue"),
- ADD_STAT(bytesReadSys, "Total read bytes from the system interface side"),
- ADD_STAT(bytesWrittenSys,
- "Total written bytes from the system interface side"),
-
- ADD_STAT(avgRdBWSys, "Average system read bandwidth in MiByte/s"),
- ADD_STAT(avgWrBWSys, "Average system write bandwidth in MiByte/s"),
-
- ADD_STAT(totGap, "Total gap between requests"),
- ADD_STAT(avgGap, "Average gap between requests"),
-
- ADD_STAT(masterReadBytes, "Per-master bytes read from memory"),
- ADD_STAT(masterWriteBytes, "Per-master bytes write to memory"),
- ADD_STAT(masterReadRate,
- "Per-master bytes read from memory rate (Bytes/sec)"),
- ADD_STAT(masterWriteRate,
- "Per-master bytes write to memory rate (Bytes/sec)"),
- ADD_STAT(masterReadAccesses,
- "Per-master read serviced memory accesses"),
- ADD_STAT(masterWriteAccesses,
- "Per-master write serviced memory accesses"),
- ADD_STAT(masterReadTotalLat,
- "Per-master read total memory access latency"),
- ADD_STAT(masterWriteTotalLat,
- "Per-master write total memory access latency"),
- ADD_STAT(masterReadAvgLat,
- "Per-master read average memory access latency"),
- ADD_STAT(masterWriteAvgLat,
- "Per-master write average memory access latency")
-
-{
-}
-
-void
-DRAMCtrl::CtrlStats::regStats()
-{
- using namespace Stats;
-
- assert(ctrl.system());
- const auto max_masters = ctrl.system()->maxMasters();
-
- avgRdQLen.precision(2);
- avgWrQLen.precision(2);
-
- readPktSize.init(ceilLog2(ctrl.system()->cacheLineSize()) + 1);
- writePktSize.init(ceilLog2(ctrl.system()->cacheLineSize()) + 1);
-
- rdQLenPdf.init(ctrl.readBufferSize);
- wrQLenPdf.init(ctrl.writeBufferSize);
-
- rdPerTurnAround
- .init(ctrl.readBufferSize)
- .flags(nozero);
- wrPerTurnAround
- .init(ctrl.writeBufferSize)
- .flags(nozero);
-
- avgRdBWSys.precision(2);
- avgWrBWSys.precision(2);
- avgGap.precision(2);
-
- // per-master bytes read and written to memory
- masterReadBytes
- .init(max_masters)
- .flags(nozero | nonan);
-
- masterWriteBytes
- .init(max_masters)
- .flags(nozero | nonan);
-
- // per-master bytes read and written to memory rate
- masterReadRate
- .flags(nozero | nonan)
- .precision(12);
-
- masterReadAccesses
- .init(max_masters)
- .flags(nozero);
-
- masterWriteAccesses
- .init(max_masters)
- .flags(nozero);
-
- masterReadTotalLat
- .init(max_masters)
- .flags(nozero | nonan);
-
- masterReadAvgLat
- .flags(nonan)
- .precision(2);
-
- masterWriteRate
- .flags(nozero | nonan)
- .precision(12);
-
- masterWriteTotalLat
- .init(max_masters)
- .flags(nozero | nonan);
-
- masterWriteAvgLat
- .flags(nonan)
- .precision(2);
-
- for (int i = 0; i < max_masters; i++) {
- const std::string master = ctrl.system()->getMasterName(i);
- masterReadBytes.subname(i, master);
- masterReadRate.subname(i, master);
- masterWriteBytes.subname(i, master);
- masterWriteRate.subname(i, master);
- masterReadAccesses.subname(i, master);
- masterWriteAccesses.subname(i, master);
- masterReadTotalLat.subname(i, master);
- masterReadAvgLat.subname(i, master);
- masterWriteTotalLat.subname(i, master);
- masterWriteAvgLat.subname(i, master);
- }
-
- // Formula stats
- avgRdBWSys = (bytesReadSys / 1000000) / simSeconds;
- avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds;
-
- avgGap = totGap / (readReqs + writeReqs);
-
- masterReadRate = masterReadBytes / simSeconds;
- masterWriteRate = masterWriteBytes / simSeconds;
- masterReadAvgLat = masterReadTotalLat / masterReadAccesses;
- masterWriteAvgLat = masterWriteTotalLat / masterWriteAccesses;
-}
-
-void
-DRAMInterface::DRAMStats::resetStats()
-{
- dram.lastStatsResetTick = curTick();
-}
-
-DRAMInterface::DRAMStats::DRAMStats(DRAMInterface &_dram)
- : Stats::Group(&_dram),
- dram(_dram),
-
- ADD_STAT(readBursts, "Number of DRAM read bursts"),
- ADD_STAT(writeBursts, "Number of DRAM write bursts"),
-
- ADD_STAT(perBankRdBursts, "Per bank write bursts"),
- ADD_STAT(perBankWrBursts, "Per bank write bursts"),
-
- ADD_STAT(totQLat, "Total ticks spent queuing"),
- ADD_STAT(totBusLat, "Total ticks spent in databus transfers"),
- ADD_STAT(totMemAccLat,
- "Total ticks spent from burst creation until serviced "
- "by the DRAM"),
-
- ADD_STAT(avgQLat, "Average queueing delay per DRAM burst"),
- ADD_STAT(avgBusLat, "Average bus latency per DRAM burst"),
- ADD_STAT(avgMemAccLat, "Average memory access latency per DRAM burst"),
-
- ADD_STAT(readRowHits, "Number of row buffer hits during reads"),
- ADD_STAT(writeRowHits, "Number of row buffer hits during writes"),
- ADD_STAT(readRowHitRate, "Row buffer hit rate for reads"),
- ADD_STAT(writeRowHitRate, "Row buffer hit rate for writes"),
-
- ADD_STAT(bytesPerActivate, "Bytes accessed per row activation"),
- ADD_STAT(bytesRead, "Total number of bytes read from DRAM"),
- ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"),
- ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiBytes/s"),
- ADD_STAT(avgWrBW, "Average DRAM write bandwidth in MiBytes/s"),
- ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"),
-
- ADD_STAT(busUtil, "Data bus utilization in percentage"),
- ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"),
- ADD_STAT(busUtilWrite, "Data bus utilization in percentage for writes"),
-
- ADD_STAT(pageHitRate, "Row buffer hit rate, read and write combined")
-
-{
-}
-
-void
-DRAMInterface::DRAMStats::regStats()
-{
- using namespace Stats;
-
- avgQLat.precision(2);
- avgBusLat.precision(2);
- avgMemAccLat.precision(2);
-
- readRowHitRate.precision(2);
- writeRowHitRate.precision(2);
-
- perBankRdBursts.init(dram.banksPerRank * dram.ranksPerChannel);
- perBankWrBursts.init(dram.banksPerRank * dram.ranksPerChannel);
-
- bytesPerActivate
- .init(dram.maxAccessesPerRow ?
- dram.maxAccessesPerRow : dram.rowBufferSize)
- .flags(nozero);
-
- peakBW.precision(2);
- busUtil.precision(2);
- busUtilWrite.precision(2);
- busUtilRead.precision(2);
-
- pageHitRate.precision(2);
-
- // Formula stats
- avgQLat = totQLat / readBursts;
- avgBusLat = totBusLat / readBursts;
- avgMemAccLat = totMemAccLat / readBursts;
-
- readRowHitRate = (readRowHits / readBursts) * 100;
- writeRowHitRate = (writeRowHits / writeBursts) * 100;
-
- avgRdBW = (bytesRead / 1000000) / simSeconds;
- avgWrBW = (bytesWritten / 1000000) / simSeconds;
- peakBW = (SimClock::Frequency / dram.burstDelay()) *
- dram.bytesPerBurst() / 1000000;
-
- busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
- busUtilRead = avgRdBW / peakBW * 100;
- busUtilWrite = avgWrBW / peakBW * 100;
-
- pageHitRate = (writeRowHits + readRowHits) /
- (writeBursts + readBursts) * 100;
-}
-
-DRAMInterface::RankStats::RankStats(DRAMInterface &_dram, Rank &_rank)
- : Stats::Group(&_dram, csprintf("rank%d", _rank.rank).c_str()),
- rank(_rank),
-
- ADD_STAT(actEnergy, "Energy for activate commands per rank (pJ)"),
- ADD_STAT(preEnergy, "Energy for precharge commands per rank (pJ)"),
- ADD_STAT(readEnergy, "Energy for read commands per rank (pJ)"),
- ADD_STAT(writeEnergy, "Energy for write commands per rank (pJ)"),
- ADD_STAT(refreshEnergy, "Energy for refresh commands per rank (pJ)"),
- ADD_STAT(actBackEnergy, "Energy for active background per rank (pJ)"),
- ADD_STAT(preBackEnergy, "Energy for precharge background per rank (pJ)"),
- ADD_STAT(actPowerDownEnergy,
- "Energy for active power-down per rank (pJ)"),
- ADD_STAT(prePowerDownEnergy,
- "Energy for precharge power-down per rank (pJ)"),
- ADD_STAT(selfRefreshEnergy, "Energy for self refresh per rank (pJ)"),
-
- ADD_STAT(totalEnergy, "Total energy per rank (pJ)"),
- ADD_STAT(averagePower, "Core power per rank (mW)"),
-
- ADD_STAT(totalIdleTime, "Total Idle time Per DRAM Rank"),
- ADD_STAT(pwrStateTime, "Time in different power states")
-{
-}
-
-void
-DRAMInterface::RankStats::regStats()
-{
- Stats::Group::regStats();
-
- pwrStateTime
- .init(6)
- .subname(0, "IDLE")
- .subname(1, "REF")
- .subname(2, "SREF")
- .subname(3, "PRE_PDN")
- .subname(4, "ACT")
- .subname(5, "ACT_PDN");
-}
-
-void
-DRAMInterface::RankStats::resetStats()
-{
- Stats::Group::resetStats();
-
- rank.resetStats();
-}
-
-void
-DRAMInterface::RankStats::preDumpStats()
-{
- Stats::Group::preDumpStats();
-
- rank.computeStats();
-}
-
-void
-DRAMCtrl::recvFunctional(PacketPtr pkt)
-{
- if (dram && dram->getAddrRange().contains(pkt->getAddr())) {
- // rely on the abstract memory
- dram->functionalAccess(pkt);
- } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) {
- // rely on the abstract memory
- nvm->functionalAccess(pkt);
- } else {
- panic("Can't handle address range for packet %s\n",
- pkt->print());
- }
-}
-
-Port &
-DRAMCtrl::getPort(const string &if_name, PortID idx)
-{
- if (if_name != "port") {
- return QoS::MemCtrl::getPort(if_name, idx);
- } else {
- return port;
- }
-}
-
-bool
-DRAMCtrl::allIntfDrained() const
-{
- // ensure dram is in power down and refresh IDLE states
- bool dram_drained = !dram || dram->allRanksDrained();
- // No outstanding NVM writes
- // All other queues verified as needed with calling logic
- bool nvm_drained = !nvm || nvm->allRanksDrained();
- return (dram_drained && nvm_drained);
-}
-
-DrainState
-DRAMCtrl::drain()
-{
- // if there is anything in any of our internal queues, keep track
- // of that as well
- if (!(!totalWriteQueueSize && !totalReadQueueSize && respQueue.empty() &&
- allIntfDrained())) {
-
- DPRINTF(Drain, "DRAM controller not drained, write: %d, read: %d,"
- " resp: %d\n", totalWriteQueueSize, totalReadQueueSize,
- respQueue.size());
-
- // the only queue that is not drained automatically over time
- // is the write queue, thus kick things into action if needed
- if (!totalWriteQueueSize && !nextReqEvent.scheduled()) {
- schedule(nextReqEvent, curTick());
- }
-
- if (dram)
- dram->drainRanks();
-
- return DrainState::Draining;
- } else {
- return DrainState::Drained;
- }
-}
-
-void
-DRAMCtrl::drainResume()
-{
- if (!isTimingMode && system()->isTimingMode()) {
- // if we switched to timing mode, kick things into action,
- // and behave as if we restored from a checkpoint
- startup();
- dram->startup();
- } else if (isTimingMode && !system()->isTimingMode()) {
- // if we switch from timing mode, stop the refresh events to
- // not cause issues with KVM
- if (dram)
- dram->suspend();
- }
-
- // update the mode
- isTimingMode = system()->isTimingMode();
-}
-
-DRAMCtrl::MemoryPort::MemoryPort(const std::string& name, DRAMCtrl& _ctrl)
- : QueuedSlavePort(name, &_ctrl, queue), queue(_ctrl, *this, true),
- ctrl(_ctrl)
-{ }
-
-AddrRangeList
-DRAMCtrl::MemoryPort::getAddrRanges() const
-{
- AddrRangeList ranges;
- if (ctrl.dram) {
- DPRINTF(DRAM, "Pushing DRAM ranges to port\n");
- ranges.push_back(ctrl.dram->getAddrRange());
- }
- if (ctrl.nvm) {
- DPRINTF(DRAM, "Pushing NVM ranges to port\n");
- ranges.push_back(ctrl.nvm->getAddrRange());
- }
- return ranges;
-}
-
-void
-DRAMCtrl::MemoryPort::recvFunctional(PacketPtr pkt)
-{
- pkt->pushLabel(ctrl.name());
-
- if (!queue.trySatisfyFunctional(pkt)) {
- // Default implementation of SimpleTimingPort::recvFunctional()
- // calls recvAtomic() and throws away the latency; we can save a
- // little here by just not calculating the latency.
- ctrl.recvFunctional(pkt);
- }
-
- pkt->popLabel();
-}
-
-Tick
-DRAMCtrl::MemoryPort::recvAtomic(PacketPtr pkt)
-{
- return ctrl.recvAtomic(pkt);
-}
-
-bool
-DRAMCtrl::MemoryPort::recvTimingReq(PacketPtr pkt)
-{
- // pass it to the memory controller
- return ctrl.recvTimingReq(pkt);
-}
-
-DRAMCtrl*
-DRAMCtrlParams::create()
-{
- return new DRAMCtrl(this);
-}
-
-NVMInterface::NVMInterface(const NVMInterfaceParams* _p)
- : MemInterface(_p),
- maxPendingWrites(_p->max_pending_writes),
- maxPendingReads(_p->max_pending_reads),
- twoCycleRdWr(_p->two_cycle_rdwr),
- tREAD(_p->tREAD), tWRITE(_p->tWRITE), tSEND(_p->tSEND),
- stats(*this),
- writeRespondEvent([this]{ processWriteRespondEvent(); }, name()),
- readReadyEvent([this]{ processReadReadyEvent(); }, name()),
- nextReadAt(0), numPendingReads(0), numReadDataReady(0),
- numReadsToIssue(0), numWritesQueued(0),
- readBufferSize(_p->read_buffer_size),
- writeBufferSize(_p->write_buffer_size)
-{
- DPRINTF(NVM, "Setting up NVM Interface\n");
-
- fatal_if(!isPowerOf2(burstSize), "NVM burst size %d is not allowed, "
- "must be a power of two\n", burstSize);
-
- // sanity check the ranks since we rely on bit slicing for the
- // address decoding
- fatal_if(!isPowerOf2(ranksPerChannel), "NVM rank count of %d is "
- "not allowed, must be a power of two\n", ranksPerChannel);
-
- for (int i =0; i < ranksPerChannel; i++) {
- // Add NVM ranks to the system
- DPRINTF(NVM, "Creating NVM rank %d \n", i);
- Rank* rank = new Rank(_p, i, *this);
- ranks.push_back(rank);
- }
-
- uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
-
- DPRINTF(NVM, "NVM capacity %lld (%lld) bytes\n", capacity,
- AbstractMemory::size());
-
- rowsPerBank = capacity / (rowBufferSize *
- banksPerRank * ranksPerChannel);
-
-}
-
-NVMInterface*
-NVMInterfaceParams::create()
-{
- return new NVMInterface(this);
-}
-
-NVMInterface::Rank::Rank(const NVMInterfaceParams* _p,
- int _rank, NVMInterface& _nvm)
- : EventManager(&_nvm), nvm(_nvm), rank(_rank),
- banks(_p->banks_per_rank)
-{
- for (int b = 0; b < _p->banks_per_rank; b++) {
- banks[b].bank = b;
- // No bank groups; simply assign to bank number
- banks[b].bankgr = b;
- }
-}
-
-void
-NVMInterface::init()
-{
- AbstractMemory::init();
-}
-
-void NVMInterface::setupRank(const uint8_t rank, const bool is_read)
-{
- if (is_read) {
- // increment count to trigger read and track number of reads in Q
- numReadsToIssue++;
- } else {
- // increment count to track number of writes in Q
- numWritesQueued++;
- }
-}
-
-pair<DRAMPacketQueue::iterator, Tick>
-NVMInterface::chooseNextFRFCFS(DRAMPacketQueue& queue, Tick min_col_at) const
-{
- // remember if we found a hit, but one that cannit issue seamlessly
- bool found_prepped_pkt = false;
-
- auto selected_pkt_it = queue.end();
- Tick selected_col_at = MaxTick;
-
- for (auto i = queue.begin(); i != queue.end() ; ++i) {
- DRAMPacket* pkt = *i;
-
- // select optimal NVM packet in Q
- if (!pkt->isDram()) {
- const Bank& bank = ranks[pkt->rank]->banks[pkt->bank];
- const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt :
- bank.wrAllowedAt;
-
- // check if rank is not doing a refresh and thus is available,
- // if not, jump to the next packet
- if (burstReady(pkt)) {
- DPRINTF(NVM, "%s bank %d - Rank %d available\n", __func__,
- pkt->bank, pkt->rank);
-
- // no additional rank-to-rank or media delays
- if (col_allowed_at <= min_col_at) {
- // FCFS within entries that can issue without
- // additional delay, such as same rank accesses
- // or media delay requirements
- selected_pkt_it = i;
- selected_col_at = col_allowed_at;
- // no need to look through the remaining queue entries
- DPRINTF(NVM, "%s Seamless buffer hit\n", __func__);
- break;
- } else if (!found_prepped_pkt) {
- // packet is to prepped region but cannnot issue
- // seamlessly; remember this one and continue
- selected_pkt_it = i;
- selected_col_at = col_allowed_at;
- DPRINTF(NVM, "%s Prepped packet found \n", __func__);
- found_prepped_pkt = true;
- }
- } else {
- DPRINTF(NVM, "%s bank %d - Rank %d not available\n", __func__,
- pkt->bank, pkt->rank);
- }
- }
- }
-
- if (selected_pkt_it == queue.end()) {
- DPRINTF(NVM, "%s no available NVM ranks found\n", __func__);
- }
-
- return make_pair(selected_pkt_it, selected_col_at);
-}
-
-void
-NVMInterface::chooseRead(DRAMPacketQueue& queue)
-{
- Tick cmd_at = std::max(curTick(), nextReadAt);
-
- // This method does the arbitration between non-deterministic read
- // requests to NVM. The chosen packet is not removed from the queue
- // at this time. Removal from the queue will occur when the data is
- // ready and a separate SEND command is issued to retrieve it via the
- // chooseNext function in the top-level controller.
- assert(!queue.empty());
-
- assert(numReadsToIssue > 0);
- numReadsToIssue--;
- // For simplicity, issue non-deterministic reads in order (fcfs)
- for (auto i = queue.begin(); i != queue.end() ; ++i) {
- DRAMPacket* pkt = *i;
-
- // Find 1st NVM read packet that hasn't issued read command
- if (pkt->readyTime == MaxTick && !pkt->isDram() && pkt->isRead()) {
- // get the bank
- Bank& bank_ref = ranks[pkt->rank]->banks[pkt->bank];
-
- // issueing a read, inc counter and verify we haven't overrun
- numPendingReads++;
- assert(numPendingReads <= maxPendingReads);
-
- // increment the bytes accessed and the accesses per row
- bank_ref.bytesAccessed += burstSize;
-
- // Verify command bandiwth to issue
- // Host can issue read immediately uith buffering closer
- // to the NVM. The actual execution at the NVM may be delayed
- // due to busy resources
- if (twoCycleRdWr) {
- cmd_at = ctrl->verifyMultiCmd(cmd_at,
- maxCommandsPerWindow, tCK);
- } else {
- cmd_at = ctrl->verifySingleCmd(cmd_at,
- maxCommandsPerWindow);
- }
-
- // Update delay to next read
- // Ensures single read command issued per cycle
- nextReadAt = cmd_at + tCK;
-
- // If accessing a new location in this bank, update timing
- // and stats
- if (bank_ref.openRow != pkt->row) {
- // update the open bank, re-using row field
- bank_ref.openRow = pkt->row;
-
- // sample the bytes accessed to a buffer in this bank
- // here when we are re-buffering the data
- stats.bytesPerBank.sample(bank_ref.bytesAccessed);
- // start counting anew
- bank_ref.bytesAccessed = 0;
-
- // holdoff next command to this bank until the read completes
- // and the data has been successfully buffered
- // can pipeline accesses to the same bank, sending them
- // across the interface B2B, but will incur full access
- // delay between data ready responses to different buffers
- // in a bank
- bank_ref.actAllowedAt = std::max(cmd_at,
- bank_ref.actAllowedAt) + tREAD;
- }
- // update per packet readyTime to holdoff burst read operation
- // overloading readyTime, which will be updated again when the
- // burst is issued
- pkt->readyTime = std::max(cmd_at, bank_ref.actAllowedAt);
-
- DPRINTF(NVM, "Issuing NVM Read to bank %d at tick %d. "
- "Data ready at %d\n",
- bank_ref.bank, cmd_at, pkt->readyTime);
-
- // Insert into read ready queue. It will be handled after
- // the media delay has been met
- if (readReadyQueue.empty()) {
- assert(!readReadyEvent.scheduled());
- schedule(readReadyEvent, pkt->readyTime);
- } else if (readReadyEvent.when() > pkt->readyTime) {
- // move it sooner in time, to the first read with data
- reschedule(readReadyEvent, pkt->readyTime);
- } else {
- assert(readReadyEvent.scheduled());
- }
- readReadyQueue.push_back(pkt->readyTime);
-
- // found an NVM read to issue - break out
- break;
- }
- }
-}
-
-void
-NVMInterface::processReadReadyEvent()
-{
- // signal that there is read data ready to be transmitted
- numReadDataReady++;
-
- DPRINTF(NVM,
- "processReadReadyEvent(): Data for an NVM read is ready. "
- "numReadDataReady is %d\t numPendingReads is %d\n",
- numReadDataReady, numPendingReads);
-
- // Find lowest ready time and verify it is equal to curTick
- // also find the next lowest to schedule next event
- // Done with this response, erase entry
- auto ready_it = readReadyQueue.begin();
- Tick next_ready_at = MaxTick;
- for (auto i = readReadyQueue.begin(); i != readReadyQueue.end() ; ++i) {
- if (*ready_it > *i) {
- next_ready_at = *ready_it;
- ready_it = i;
- } else if ((next_ready_at > *i) && (i != ready_it)) {
- next_ready_at = *i;
- }
- }
-
- // Verify we found the time of this event and remove it
- assert(*ready_it == curTick());
- readReadyQueue.erase(ready_it);
-
- if (!readReadyQueue.empty()) {
- assert(readReadyQueue.front() >= curTick());
- assert(!readReadyEvent.scheduled());
- schedule(readReadyEvent, next_ready_at);
- }
-
- // It is possible that a new command kicks things back into
- // action before reaching this point but need to ensure that we
- // continue to process new commands as read data becomes ready
- // This will also trigger a drain if needed
- if (!ctrl->requestEventScheduled()) {
- DPRINTF(NVM, "Restart controller scheduler immediately\n");
- ctrl->restartScheduler(curTick());
- }
-}
-
-
-bool
-NVMInterface::burstReady(DRAMPacket* pkt) const {
- bool read_rdy = pkt->isRead() && (ctrl->inReadBusState(true)) &&
- (pkt->readyTime <= curTick()) && (numReadDataReady > 0);
- bool write_rdy = !pkt->isRead() && !ctrl->inReadBusState(true) &&
- !writeRespQueueFull();
- return (read_rdy || write_rdy);
-}
-
-pair<Tick, Tick>
-NVMInterface::doBurstAccess(DRAMPacket* pkt, Tick next_burst_at)
-{
- DPRINTF(NVM, "NVM Timing access to addr %lld, rank/bank/row %d %d %d\n",
- pkt->addr, pkt->rank, pkt->bank, pkt->row);
-
- // get the bank
- Bank& bank_ref = ranks[pkt->rank]->banks[pkt->bank];
-
- // respect any constraints on the command
- const Tick bst_allowed_at = pkt->isRead() ?
- bank_ref.rdAllowedAt : bank_ref.wrAllowedAt;
-
- // we need to wait until the bus is available before we can issue
- // the command; need minimum of tBURST between commands
- Tick cmd_at = std::max(bst_allowed_at, curTick());
-
- // we need to wait until the bus is available before we can issue
- // the command; need minimum of tBURST between commands
- cmd_at = std::max(cmd_at, next_burst_at);
-
- // Verify there is command bandwidth to issue
- // Read burst (send command) is a simple data access and only requires
- // one command cycle
- // Write command may require multiple cycles to enable larger address space
- if (pkt->isRead() || !twoCycleRdWr) {
- cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow);
- } else {
- cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK);
- }
- // update the packet ready time to reflect when data will be transferred
- // Use the same bus delays defined for NVM
- pkt->readyTime = cmd_at + tSEND + tBURST;
-
- Tick dly_to_rd_cmd;
- Tick dly_to_wr_cmd;
- for (auto n : ranks) {
- for (int i = 0; i < banksPerRank; i++) {
- // base delay is a function of tBURST and bus turnaround
- dly_to_rd_cmd = pkt->isRead() ? tBURST : writeToReadDelay();
- dly_to_wr_cmd = pkt->isRead() ? readToWriteDelay() : tBURST;
-
- if (pkt->rank != n->rank) {
- // adjust timing for different ranks
- // Need to account for rank-to-rank switching with tCS
- dly_to_wr_cmd = rankToRankDelay();
- dly_to_rd_cmd = rankToRankDelay();
- }
- n->banks[i].rdAllowedAt = std::max(cmd_at + dly_to_rd_cmd,
- n->banks[i].rdAllowedAt);
-
- n->banks[i].wrAllowedAt = std::max(cmd_at + dly_to_wr_cmd,
- n->banks[i].wrAllowedAt);
- }
- }
-
- DPRINTF(NVM, "NVM Access to %lld, ready at %lld.\n",
- pkt->addr, pkt->readyTime);
-
- if (pkt->isRead()) {
- // completed the read, decrement counters
- assert(numPendingReads != 0);
- assert(numReadDataReady != 0);
-
- numPendingReads--;
- numReadDataReady--;
- } else {
- // Adjust number of NVM writes in Q
- assert(numWritesQueued > 0);
- numWritesQueued--;
-
- // increment the bytes accessed and the accesses per row
- // only increment for writes as the reads are handled when
- // the non-deterministic read is issued, before the data transfer
- bank_ref.bytesAccessed += burstSize;
-
- // Commands will be issued serially when accessing the same bank
- // Commands can issue in parallel to different banks
- if ((bank_ref.bank == pkt->bank) &&
- (bank_ref.openRow != pkt->row)) {
- // update the open buffer, re-using row field
- bank_ref.openRow = pkt->row;
-
- // sample the bytes accessed to a buffer in this bank
- // here when we are re-buffering the data
- stats.bytesPerBank.sample(bank_ref.bytesAccessed);
- // start counting anew
- bank_ref.bytesAccessed = 0;
- }
-
- // Determine when write will actually complete, assuming it is
- // scheduled to push to NVM immediately
- // update actAllowedAt to serialize next command completion that
- // accesses this bank; must wait until this write completes
- // Data accesses to the same buffer in this bank
- // can issue immediately after actAllowedAt expires, without
- // waiting additional delay of tWRITE. Can revisit this
- // assumption/simplification in the future.
- bank_ref.actAllowedAt = std::max(pkt->readyTime,
- bank_ref.actAllowedAt) + tWRITE;
-
- // Need to track number of outstanding writes to
- // ensure 'buffer' on media controller does not overflow
- assert(!writeRespQueueFull());
-
- // Insert into write done queue. It will be handled after
- // the media delay has been met
- if (writeRespQueueEmpty()) {
- assert(!writeRespondEvent.scheduled());
- schedule(writeRespondEvent, bank_ref.actAllowedAt);
- } else {
- assert(writeRespondEvent.scheduled());
- }
- writeRespQueue.push_back(bank_ref.actAllowedAt);
- writeRespQueue.sort();
- if (writeRespondEvent.when() > bank_ref.actAllowedAt) {
- DPRINTF(NVM, "Rescheduled respond event from %lld to %11d\n",
- writeRespondEvent.when(), bank_ref.actAllowedAt);
- DPRINTF(NVM, "Front of response queue is %11d\n",
- writeRespQueue.front());
- reschedule(writeRespondEvent, bank_ref.actAllowedAt);
- }
-
- }
-
- // Update the stats
- if (pkt->isRead()) {
- stats.readBursts++;
- stats.bytesRead += burstSize;
- stats.perBankRdBursts[pkt->bankId]++;
- stats.pendingReads.sample(numPendingReads);
-
- // Update latency stats
- stats.totMemAccLat += pkt->readyTime - pkt->entryTime;
- stats.totBusLat += tBURST;
- stats.totQLat += cmd_at - pkt->entryTime;
- } else {
- stats.writeBursts++;
- stats.bytesWritten += burstSize;
- stats.perBankWrBursts[pkt->bankId]++;
- }
-
- return make_pair(cmd_at, cmd_at + tBURST);
-}
-
-void
-NVMInterface::processWriteRespondEvent()
-{
- DPRINTF(NVM,
- "processWriteRespondEvent(): A NVM write reached its readyTime. "
- "%d remaining pending NVM writes\n", writeRespQueue.size());
-
- // Update stat to track histogram of pending writes
- stats.pendingWrites.sample(writeRespQueue.size());
-
- // Done with this response, pop entry
- writeRespQueue.pop_front();
-
- if (!writeRespQueue.empty()) {
- assert(writeRespQueue.front() >= curTick());
- assert(!writeRespondEvent.scheduled());
- schedule(writeRespondEvent, writeRespQueue.front());
- }
-
- // It is possible that a new command kicks things back into
- // action before reaching this point but need to ensure that we
- // continue to process new commands as writes complete at the media and
- // credits become available. This will also trigger a drain if needed
- if (!ctrl->requestEventScheduled()) {
- DPRINTF(NVM, "Restart controller scheduler immediately\n");
- ctrl->restartScheduler(curTick());
- }
-}
-
-void
-NVMInterface::addRankToRankDelay(Tick cmd_at)
-{
- // update timing for NVM ranks due to bursts issued
- // to ranks for other media interfaces
- for (auto n : ranks) {
- for (int i = 0; i < banksPerRank; i++) {
- // different rank by default
- // Need to only account for rank-to-rank switching
- n->banks[i].rdAllowedAt = std::max(cmd_at + rankToRankDelay(),
- n->banks[i].rdAllowedAt);
- n->banks[i].wrAllowedAt = std::max(cmd_at + rankToRankDelay(),
- n->banks[i].wrAllowedAt);
- }
- }
-}
-
-bool
-NVMInterface::isBusy(bool read_queue_empty, bool all_writes_nvm)
-{
- DPRINTF(NVM,"isBusy: numReadDataReady = %d\n", numReadDataReady);
- // Determine NVM is busy and cannot issue a burst
- // A read burst cannot issue when data is not ready from the NVM
- // Also check that we have reads queued to ensure we can change
- // bus direction to service potential write commands.
- // A write cannot issue once we've reached MAX pending writes
- // Only assert busy for the write case when there are also
- // no reads in Q and the write queue only contains NVM commands
- // This allows the bus state to switch and service reads
- return (ctrl->inReadBusState(true) ?
- (numReadDataReady == 0) && !read_queue_empty :
- writeRespQueueFull() && read_queue_empty &&
- all_writes_nvm);
-}
-
-
-NVMInterface::NVMStats::NVMStats(NVMInterface &_nvm)
- : Stats::Group(&_nvm),
- nvm(_nvm),
-
- ADD_STAT(readBursts, "Number of NVM read bursts"),
- ADD_STAT(writeBursts, "Number of NVM write bursts"),
-
- ADD_STAT(perBankRdBursts, "Per bank write bursts"),
- ADD_STAT(perBankWrBursts, "Per bank write bursts"),
-
- ADD_STAT(totQLat, "Total ticks spent queuing"),
- ADD_STAT(totBusLat, "Total ticks spent in databus transfers"),
- ADD_STAT(totMemAccLat,
- "Total ticks spent from burst creation until serviced "
- "by the NVM"),
- ADD_STAT(avgQLat, "Average queueing delay per NVM burst"),
- ADD_STAT(avgBusLat, "Average bus latency per NVM burst"),
- ADD_STAT(avgMemAccLat, "Average memory access latency per NVM burst"),
-
- ADD_STAT(bytesRead, "Total number of bytes read from DRAM"),
- ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"),
- ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiBytes/s"),
- ADD_STAT(avgWrBW, "Average DRAM write bandwidth in MiBytes/s"),
- ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"),
- ADD_STAT(busUtil, "NVM Data bus utilization in percentage"),
- ADD_STAT(busUtilRead, "NVM Data bus read utilization in percentage"),
- ADD_STAT(busUtilWrite, "NVM Data bus write utilization in percentage"),
-
- ADD_STAT(pendingReads, "Reads issued to NVM for which data has not been "
- "transferred"),
- ADD_STAT(pendingWrites, "Number of outstanding writes to NVM"),
- ADD_STAT(bytesPerBank, "Bytes read within a bank before loading "
- "new bank")
-{
-}
-
-void
-NVMInterface::NVMStats::regStats()
-{
- using namespace Stats;
-
- perBankRdBursts.init(nvm.ranksPerChannel == 0 ? 1 :
- nvm.banksPerRank * nvm.ranksPerChannel);
-
- perBankWrBursts.init(nvm.ranksPerChannel == 0 ? 1 :
- nvm.banksPerRank * nvm.ranksPerChannel);
-
- avgQLat.precision(2);
- avgBusLat.precision(2);
- avgMemAccLat.precision(2);
-
- avgRdBW.precision(2);
- avgWrBW.precision(2);
- peakBW.precision(2);
-
- busUtil.precision(2);
- busUtilRead.precision(2);
- busUtilWrite.precision(2);
-
- pendingReads
- .init(nvm.maxPendingReads)
- .flags(nozero);
-
- pendingWrites
- .init(nvm.maxPendingWrites)
- .flags(nozero);
-
- bytesPerBank
- .init(nvm.rowBufferSize)
- .flags(nozero);
-
- avgQLat = totQLat / readBursts;
- avgBusLat = totBusLat / readBursts;
- avgMemAccLat = totMemAccLat / readBursts;
-
- avgRdBW = (bytesRead / 1000000) / simSeconds;
- avgWrBW = (bytesWritten / 1000000) / simSeconds;
- peakBW = (SimClock::Frequency / nvm.tBURST) *
- nvm.burstSize / 1000000;
-
- busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
- busUtilRead = avgRdBW / peakBW * 100;
- busUtilWrite = avgWrBW / peakBW * 100;
-}
+++ /dev/null
-/*
- * Copyright (c) 2012-2020 ARM Limited
- * All rights reserved
- *
- * The license below extends only to copyright in the software and shall
- * not be construed as granting a license to any other intellectual
- * property including but not limited to intellectual property relating
- * to a hardware implementation of the functionality of the software
- * licensed hereunder. You may use the software subject to the license
- * terms below provided that you ensure that this notice is replicated
- * unmodified and in its entirety in all distributions of the software,
- * modified or unmodified, in source code or in binary form.
- *
- * Copyright (c) 2013 Amin Farmahini-Farahani
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met: redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer;
- * redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution;
- * neither the name of the copyright holders nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/**
- * @file
- * DRAMCtrl declaration
- */
-
-#ifndef __MEM_DRAM_CTRL_HH__
-#define __MEM_DRAM_CTRL_HH__
-
-#include <deque>
-#include <string>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "base/statistics.hh"
-#include "enums/AddrMap.hh"
-#include "enums/MemSched.hh"
-#include "enums/PageManage.hh"
-#include "mem/abstract_mem.hh"
-#include "mem/drampower.hh"
-#include "mem/qos/mem_ctrl.hh"
-#include "mem/qport.hh"
-#include "params/DRAMCtrl.hh"
-#include "params/DRAMInterface.hh"
-#include "params/MemInterface.hh"
-#include "params/NVMInterface.hh"
-#include "sim/eventq.hh"
-
-class DRAMInterfaceParams;
-class NVMInterfaceParams;
-
-/**
- * A burst helper helps organize and manage a packet that is larger than
- * the DRAM burst size. A system packet that is larger than the burst size
- * is split into multiple DRAM packets and all those DRAM packets point to
- * a single burst helper such that we know when the whole packet is served.
- */
-class BurstHelper
-{
- public:
-
- /** Number of DRAM bursts requred for a system packet **/
- const unsigned int burstCount;
-
- /** Number of DRAM bursts serviced so far for a system packet **/
- unsigned int burstsServiced;
-
- BurstHelper(unsigned int _burstCount)
- : burstCount(_burstCount), burstsServiced(0)
- { }
-};
-
-/**
- * A DRAM packet stores packets along with the timestamp of when
- * the packet entered the queue, and also the decoded address.
- */
-class DRAMPacket
-{
- public:
-
- /** When did request enter the controller */
- const Tick entryTime;
-
- /** When will request leave the controller */
- Tick readyTime;
-
- /** This comes from the outside world */
- const PacketPtr pkt;
-
- /** MasterID associated with the packet */
- const MasterID _masterId;
-
- const bool read;
-
- /** Does this packet access DRAM?*/
- const bool dram;
-
- /** Will be populated by address decoder */
- const uint8_t rank;
- const uint8_t bank;
- const uint32_t row;
-
- /**
- * Bank id is calculated considering banks in all the ranks
- * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and
- * bankId = 8 --> rank1, bank0
- */
- const uint16_t bankId;
-
- /**
- * The starting address of the DRAM packet.
- * This address could be unaligned to burst size boundaries. The
- * reason is to keep the address offset so we can accurately check
- * incoming read packets with packets in the write queue.
- */
- Addr addr;
-
- /**
- * The size of this dram packet in bytes
- * It is always equal or smaller than DRAM burst size
- */
- unsigned int size;
-
- /**
- * A pointer to the BurstHelper if this DRAMPacket is a split packet
- * If not a split packet (common case), this is set to NULL
- */
- BurstHelper* burstHelper;
-
- /**
- * QoS value of the encapsulated packet read at queuing time
- */
- uint8_t _qosValue;
-
- /**
- * Set the packet QoS value
- * (interface compatibility with Packet)
- */
- inline void qosValue(const uint8_t qv) { _qosValue = qv; }
-
- /**
- * Get the packet QoS value
- * (interface compatibility with Packet)
- */
- inline uint8_t qosValue() const { return _qosValue; }
-
- /**
- * Get the packet MasterID
- * (interface compatibility with Packet)
- */
- inline MasterID masterId() const { return _masterId; }
-
- /**
- * Get the packet size
- * (interface compatibility with Packet)
- */
- inline unsigned int getSize() const { return size; }
-
- /**
- * Get the packet address
- * (interface compatibility with Packet)
- */
- inline Addr getAddr() const { return addr; }
-
- /**
- * Return true if its a read packet
- * (interface compatibility with Packet)
- */
- inline bool isRead() const { return read; }
-
- /**
- * Return true if its a write packet
- * (interface compatibility with Packet)
- */
- inline bool isWrite() const { return !read; }
-
- /**
- * Return true if its a DRAM access
- */
- inline bool isDram() const { return dram; }
-
- DRAMPacket(PacketPtr _pkt, bool is_read, bool is_dram, uint8_t _rank,
- uint8_t _bank, uint32_t _row, uint16_t bank_id, Addr _addr,
- unsigned int _size)
- : entryTime(curTick()), readyTime(curTick()), pkt(_pkt),
- _masterId(pkt->masterId()),
- read(is_read), dram(is_dram), rank(_rank), bank(_bank), row(_row),
- bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL),
- _qosValue(_pkt->qosValue())
- { }
-
-};
-
-// The DRAM packets are store in a multiple dequeue structure,
-// based on their QoS priority
-typedef std::deque<DRAMPacket*> DRAMPacketQueue;
-
-
-/**
- * General interface to memory device
- * Includes functions and parameters shared across media types
- */
-class MemInterface : public AbstractMemory
-{
- protected:
- /**
- * A basic class to track the bank state, i.e. what row is
- * currently open (if any), when is the bank free to accept a new
- * column (read/write) command, when can it be precharged, and
- * when can it be activated.
- *
- * The bank also keeps track of how many bytes have been accessed
- * in the open row since it was opened.
- */
- class Bank
- {
-
- public:
-
- static const uint32_t NO_ROW = -1;
-
- uint32_t openRow;
- uint8_t bank;
- uint8_t bankgr;
-
- Tick rdAllowedAt;
- Tick wrAllowedAt;
- Tick preAllowedAt;
- Tick actAllowedAt;
-
- uint32_t rowAccesses;
- uint32_t bytesAccessed;
-
- Bank() :
- openRow(NO_ROW), bank(0), bankgr(0),
- rdAllowedAt(0), wrAllowedAt(0), preAllowedAt(0), actAllowedAt(0),
- rowAccesses(0), bytesAccessed(0)
- { }
- };
-
- /**
- * A pointer to the parent DRAMCtrl instance
- */
- DRAMCtrl* ctrl;
-
- /**
- * Number of commands that can issue in the defined controller
- * command window, used to verify command bandwidth
- */
- unsigned int maxCommandsPerWindow;
-
- /**
- * Memory controller configuration initialized based on parameter
- * values.
- */
- Enums::AddrMap addrMapping;
-
- /**
- * General device and channel characteristics
- * The rowsPerBank is determined based on the capacity, number of
- * ranks and banks, the burst size, and the row buffer size.
- */
- const uint32_t burstSize;
- const uint32_t deviceSize;
- const uint32_t deviceRowBufferSize;
- const uint32_t devicesPerRank;
- const uint32_t rowBufferSize;
- const uint32_t burstsPerRowBuffer;
- const uint32_t burstsPerStripe;
- const uint32_t ranksPerChannel;
- const uint32_t banksPerRank;
- uint32_t rowsPerBank;
-
- /**
- * General timing requirements
- */
- const Tick M5_CLASS_VAR_USED tCK;
- const Tick tCS;
- const Tick tBURST;
- const Tick tRTW;
- const Tick tWTR;
-
- /*
- * @return delay between write and read commands
- */
- virtual Tick writeToReadDelay() const { return tBURST + tWTR; }
-
- /*
- * @return delay between write and read commands
- */
- Tick readToWriteDelay() const { return tBURST + tRTW; }
-
- /*
- * @return delay between accesses to different ranks
- */
- Tick rankToRankDelay() const { return tBURST + tCS; }
-
-
- public:
- /** Set a pointer to the controller and initialize
- * interface based on controller parameters
- * @param _ctrl pointer to the parent controller
- * @param command_window size of command window used to
- * check command bandwidth
- */
- void setCtrl(DRAMCtrl* _ctrl, unsigned int command_window);
-
- /**
- * Get an address in a dense range which starts from 0. The input
- * address is the physical address of the request in an address
- * space that contains other SimObjects apart from this
- * controller.
- *
- * @param addr The intput address which should be in the addrRange
- * @return An address in the continues range [0, max)
- */
- Addr getCtrlAddr(Addr addr) { return range.getOffset(addr); }
-
- /**
- * Setup the rank based on packet received
- *
- * @param integer value of rank to be setup. used to index ranks vector
- * @param are we setting up rank for read or write packet?
- */
- virtual void setupRank(const uint8_t rank, const bool is_read) = 0;
-
- /**
- * Check drain state of interface
- *
- * @return true if all ranks are drained and idle
- *
- */
- virtual bool allRanksDrained() const = 0;
-
- /**
- * For FR-FCFS policy, find first command that can issue
- * Function will be overriden by interface to select based
- * on media characteristics, used to determine when read
- * or write can issue.
- *
- * @param queue Queued requests to consider
- * @param min_col_at Minimum tick for 'seamless' issue
- * @return an iterator to the selected packet, else queue.end()
- * @return the tick when the packet selected will issue
- */
- virtual std::pair<DRAMPacketQueue::iterator, Tick>
- chooseNextFRFCFS(DRAMPacketQueue& queue, Tick min_col_at) const = 0;
-
- /*
- * Function to calulate unloaded latency
- */
- virtual Tick accessLatency() const = 0;
-
- /**
- * @return number of bytes in a burst for this interface
- */
- uint32_t bytesPerBurst() const { return burstSize; }
-
- /*
- * @return time to offset next command
- */
- virtual Tick commandOffset() const = 0;
-
- /**
- * Check if a burst operation can be issued to the interface
- *
- * @param Return true if RD/WR can issue
- */
- virtual bool burstReady(DRAMPacket* pkt) const = 0;
-
- /**
- * Determine the required delay for an access to a different rank
- *
- * @return required rank to rank delay
- */
- Tick rankDelay() const { return tCS; }
-
- /**
- *
- * @return minimum additional bus turnaround required for read-to-write
- */
- Tick minReadToWriteDataGap() const { return std::min(tRTW, tCS); }
-
- /**
- *
- * @return minimum additional bus turnaround required for write-to-read
- */
- Tick minWriteToReadDataGap() const { return std::min(tWTR, tCS); }
-
- /**
- * Address decoder to figure out physical mapping onto ranks,
- * banks, and rows. This function is called multiple times on the same
- * system packet if the pakcet is larger than burst of the memory. The
- * pkt_addr is used for the offset within the packet.
- *
- * @param pkt The packet from the outside world
- * @param pkt_addr The starting address of the DRAM packet
- * @param size The size of the DRAM packet in bytes
- * @param is_read Is the request for a read or a write to memory
- * @param is_dram Is the request to a DRAM interface
- * @return A DRAMPacket pointer with the decoded information
- */
- DRAMPacket* decodePacket(const PacketPtr pkt, Addr pkt_addr,
- unsigned int size, bool is_read, bool is_dram);
-
- /**
- * Add rank to rank delay to bus timing to all banks in all ranks
- * when access to an alternate interface is issued
- *
- * param cmd_at Time of current command used as starting point for
- * addition of rank-to-rank delay
- */
- virtual void addRankToRankDelay(Tick cmd_at) = 0;
-
- typedef MemInterfaceParams Params;
- MemInterface(const Params* _p);
-};
-
-/**
- * Interface to DRAM devices with media specific parameters,
- * statistics, and functions.
- * The DRAMInterface includes a class for individual ranks
- * and per rank functions.
- */
-class DRAMInterface : public MemInterface
-{
- private:
- /**
- * Simple structure to hold the values needed to keep track of
- * commands for DRAMPower
- */
- struct Command
- {
- Data::MemCommand::cmds type;
- uint8_t bank;
- Tick timeStamp;
-
- constexpr Command(Data::MemCommand::cmds _type, uint8_t _bank,
- Tick time_stamp)
- : type(_type), bank(_bank), timeStamp(time_stamp)
- { }
- };
-
- /**
- * The power state captures the different operational states of
- * the DRAM and interacts with the bus read/write state machine,
- * and the refresh state machine.
- *
- * PWR_IDLE : The idle state in which all banks are closed
- * From here can transition to: PWR_REF, PWR_ACT,
- * PWR_PRE_PDN
- *
- * PWR_REF : Auto-refresh state. Will transition when refresh is
- * complete based on power state prior to PWR_REF
- * From here can transition to: PWR_IDLE, PWR_PRE_PDN,
- * PWR_SREF
- *
- * PWR_SREF : Self-refresh state. Entered after refresh if
- * previous state was PWR_PRE_PDN
- * From here can transition to: PWR_IDLE
- *
- * PWR_PRE_PDN : Precharge power down state
- * From here can transition to: PWR_REF, PWR_IDLE
- *
- * PWR_ACT : Activate state in which one or more banks are open
- * From here can transition to: PWR_IDLE, PWR_ACT_PDN
- *
- * PWR_ACT_PDN : Activate power down state
- * From here can transition to: PWR_ACT
- */
- enum PowerState
- {
- PWR_IDLE = 0,
- PWR_REF,
- PWR_SREF,
- PWR_PRE_PDN,
- PWR_ACT,
- PWR_ACT_PDN
- };
-
- /**
- * The refresh state is used to control the progress of the
- * refresh scheduling. When normal operation is in progress the
- * refresh state is idle. Once tREFI has elasped, a refresh event
- * is triggered to start the following STM transitions which are
- * used to issue a refresh and return back to normal operation
- *
- * REF_IDLE : IDLE state used during normal operation
- * From here can transition to: REF_DRAIN
- *
- * REF_SREF_EXIT : Exiting a self-refresh; refresh event scheduled
- * after self-refresh exit completes
- * From here can transition to: REF_DRAIN
- *
- * REF_DRAIN : Drain state in which on going accesses complete.
- * From here can transition to: REF_PD_EXIT
- *
- * REF_PD_EXIT : Evaluate pwrState and issue wakeup if needed
- * Next state dependent on whether banks are open
- * From here can transition to: REF_PRE, REF_START
- *
- * REF_PRE : Close (precharge) all open banks
- * From here can transition to: REF_START
- *
- * REF_START : Issue refresh command and update DRAMPower stats
- * From here can transition to: REF_RUN
- *
- * REF_RUN : Refresh running, waiting for tRFC to expire
- * From here can transition to: REF_IDLE, REF_SREF_EXIT
- */
- enum RefreshState
- {
- REF_IDLE = 0,
- REF_DRAIN,
- REF_PD_EXIT,
- REF_SREF_EXIT,
- REF_PRE,
- REF_START,
- REF_RUN
- };
-
- class Rank;
- struct RankStats : public Stats::Group
- {
- RankStats(DRAMInterface &dram, Rank &rank);
-
- void regStats() override;
- void resetStats() override;
- void preDumpStats() override;
-
- Rank &rank;
-
- /*
- * Command energies
- */
- Stats::Scalar actEnergy;
- Stats::Scalar preEnergy;
- Stats::Scalar readEnergy;
- Stats::Scalar writeEnergy;
- Stats::Scalar refreshEnergy;
-
- /*
- * Active Background Energy
- */
- Stats::Scalar actBackEnergy;
-
- /*
- * Precharge Background Energy
- */
- Stats::Scalar preBackEnergy;
-
- /*
- * Active Power-Down Energy
- */
- Stats::Scalar actPowerDownEnergy;
-
- /*
- * Precharge Power-Down Energy
- */
- Stats::Scalar prePowerDownEnergy;
-
- /*
- * self Refresh Energy
- */
- Stats::Scalar selfRefreshEnergy;
-
- Stats::Scalar totalEnergy;
- Stats::Scalar averagePower;
-
- /**
- * Stat to track total DRAM idle time
- *
- */
- Stats::Scalar totalIdleTime;
-
- /**
- * Track time spent in each power state.
- */
- Stats::Vector pwrStateTime;
- };
-
- /**
- * Rank class includes a vector of banks. Refresh and Power state
- * machines are defined per rank. Events required to change the
- * state of the refresh and power state machine are scheduled per
- * rank. This class allows the implementation of rank-wise refresh
- * and rank-wise power-down.
- */
- class Rank : public EventManager
- {
- private:
-
- /**
- * A reference to the parent DRAMInterface instance
- */
- DRAMInterface& dram;
-
- /**
- * Since we are taking decisions out of order, we need to keep
- * track of what power transition is happening at what time
- */
- PowerState pwrStateTrans;
-
- /**
- * Previous low-power state, which will be re-entered after refresh.
- */
- PowerState pwrStatePostRefresh;
-
- /**
- * Track when we transitioned to the current power state
- */
- Tick pwrStateTick;
-
- /**
- * Keep track of when a refresh is due.
- */
- Tick refreshDueAt;
-
- /**
- * Function to update Power Stats
- */
- void updatePowerStats();
-
- /**
- * Schedule a power state transition in the future, and
- * potentially override an already scheduled transition.
- *
- * @param pwr_state Power state to transition to
- * @param tick Tick when transition should take place
- */
- void schedulePowerEvent(PowerState pwr_state, Tick tick);
-
- public:
-
- /**
- * Current power state.
- */
- PowerState pwrState;
-
- /**
- * current refresh state
- */
- RefreshState refreshState;
-
- /**
- * rank is in or transitioning to power-down or self-refresh
- */
- bool inLowPowerState;
-
- /**
- * Current Rank index
- */
- uint8_t rank;
-
- /**
- * Track number of packets in read queue going to this rank
- */
- uint32_t readEntries;
-
- /**
- * Track number of packets in write queue going to this rank
- */
- uint32_t writeEntries;
-
- /**
- * Number of ACT, RD, and WR events currently scheduled
- * Incremented when a refresh event is started as well
- * Used to determine when a low-power state can be entered
- */
- uint8_t outstandingEvents;
-
- /**
- * delay low-power exit until this requirement is met
- */
- Tick wakeUpAllowedAt;
-
- /**
- * One DRAMPower instance per rank
- */
- DRAMPower power;
-
- /**
- * List of commands issued, to be sent to DRAMPpower at refresh
- * and stats dump. Keep commands here since commands to different
- * banks are added out of order. Will only pass commands up to
- * curTick() to DRAMPower after sorting.
- */
- std::vector<Command> cmdList;
-
- /**
- * Vector of Banks. Each rank is made of several devices which in
- * term are made from several banks.
- */
- std::vector<Bank> banks;
-
- /**
- * To track number of banks which are currently active for
- * this rank.
- */
- unsigned int numBanksActive;
-
- /** List to keep track of activate ticks */
- std::deque<Tick> actTicks;
-
- /**
- * Track when we issued the last read/write burst
- */
- Tick lastBurstTick;
-
- Rank(const DRAMInterfaceParams* _p, int _rank,
- DRAMInterface& _dram);
-
- const std::string name() const { return csprintf("%d", rank); }
-
- /**
- * Kick off accounting for power and refresh states and
- * schedule initial refresh.
- *
- * @param ref_tick Tick for first refresh
- */
- void startup(Tick ref_tick);
-
- /**
- * Stop the refresh events.
- */
- void suspend();
-
- /**
- * Check if there is no refresh and no preparation of refresh ongoing
- * i.e. the refresh state machine is in idle
- *
- * @param Return true if the rank is idle from a refresh point of view
- */
- bool inRefIdleState() const { return refreshState == REF_IDLE; }
-
- /**
- * Check if the current rank has all banks closed and is not
- * in a low power state
- *
- * @param Return true if the rank is idle from a bank
- * and power point of view
- */
- bool inPwrIdleState() const { return pwrState == PWR_IDLE; }
-
- /**
- * Trigger a self-refresh exit if there are entries enqueued
- * Exit if there are any read entries regardless of the bus state.
- * If we are currently issuing write commands, exit if we have any
- * write commands enqueued as well.
- * Could expand this in the future to analyze state of entire queue
- * if needed.
- *
- * @return boolean indicating self-refresh exit should be scheduled
- */
- bool forceSelfRefreshExit() const;
-
- /**
- * Check if the command queue of current rank is idle
- *
- * @param Return true if the there are no commands in Q.
- * Bus direction determines queue checked.
- */
- bool isQueueEmpty() const;
-
- /**
- * Let the rank check if it was waiting for requests to drain
- * to allow it to transition states.
- */
- void checkDrainDone();
-
- /**
- * Push command out of cmdList queue that are scheduled at
- * or before curTick() to DRAMPower library
- * All commands before curTick are guaranteed to be complete
- * and can safely be flushed.
- */
- void flushCmdList();
-
- /**
- * Computes stats just prior to dump event
- */
- void computeStats();
-
- /**
- * Reset stats on a stats event
- */
- void resetStats();
-
- /**
- * Schedule a transition to power-down (sleep)
- *
- * @param pwr_state Power state to transition to
- * @param tick Absolute tick when transition should take place
- */
- void powerDownSleep(PowerState pwr_state, Tick tick);
-
- /**
- * schedule and event to wake-up from power-down or self-refresh
- * and update bank timing parameters
- *
- * @param exit_delay Relative tick defining the delay required between
- * low-power exit and the next command
- */
- void scheduleWakeUpEvent(Tick exit_delay);
-
- void processWriteDoneEvent();
- EventFunctionWrapper writeDoneEvent;
-
- void processActivateEvent();
- EventFunctionWrapper activateEvent;
-
- void processPrechargeEvent();
- EventFunctionWrapper prechargeEvent;
-
- void processRefreshEvent();
- EventFunctionWrapper refreshEvent;
-
- void processPowerEvent();
- EventFunctionWrapper powerEvent;
-
- void processWakeUpEvent();
- EventFunctionWrapper wakeUpEvent;
-
- protected:
- RankStats stats;
- };
-
- /**
- * Function for sorting Command structures based on timeStamp
- *
- * @param a Memory Command
- * @param next Memory Command
- * @return true if timeStamp of Command 1 < timeStamp of Command 2
- */
- static bool
- sortTime(const Command& cmd, const Command& cmd_next)
- {
- return cmd.timeStamp < cmd_next.timeStamp;
- }
-
- /**
- * DRAM specific device characteristics
- */
- const uint32_t bankGroupsPerRank;
- const bool bankGroupArch;
-
- /**
- * DRAM specific timing requirements
- */
- const Tick tCL;
- const Tick tBURST_MIN;
- const Tick tBURST_MAX;
- const Tick tCCD_L_WR;
- const Tick tCCD_L;
- const Tick tRCD;
- const Tick tRP;
- const Tick tRAS;
- const Tick tWR;
- const Tick tRTP;
- const Tick tRFC;
- const Tick tREFI;
- const Tick tRRD;
- const Tick tRRD_L;
- const Tick tPPD;
- const Tick tAAD;
- const Tick tXAW;
- const Tick tXP;
- const Tick tXS;
- const Tick clkResyncDelay;
- const bool dataClockSync;
- const bool burstInterleave;
- const uint8_t twoCycleActivate;
- const uint32_t activationLimit;
- const Tick wrToRdDlySameBG;
- const Tick rdToWrDlySameBG;
-
- Enums::PageManage pageMgmt;
- /**
- * Max column accesses (read and write) per row, before forefully
- * closing it.
- */
- const uint32_t maxAccessesPerRow;
-
- // timestamp offset
- uint64_t timeStampOffset;
-
- // Holds the value of the DRAM rank of burst issued
- uint8_t activeRank;
-
- /** Enable or disable DRAM powerdown states. */
- bool enableDRAMPowerdown;
-
- /** The time when stats were last reset used to calculate average power */
- Tick lastStatsResetTick;
-
- /**
- * Keep track of when row activations happen, in order to enforce
- * the maximum number of activations in the activation window. The
- * method updates the time that the banks become available based
- * on the current limits.
- *
- * @param rank_ref Reference to the rank
- * @param bank_ref Reference to the bank
- * @param act_tick Time when the activation takes place
- * @param row Index of the row
- */
- void activateBank(Rank& rank_ref, Bank& bank_ref, Tick act_tick,
- uint32_t row);
-
- /**
- * Precharge a given bank and also update when the precharge is
- * done. This will also deal with any stats related to the
- * accesses to the open page.
- *
- * @param rank_ref The rank to precharge
- * @param bank_ref The bank to precharge
- * @param pre_tick Time when the precharge takes place
- * @param auto_or_preall Is this an auto-precharge or precharge all command
- * @param trace Is this an auto precharge then do not add to trace
- */
- void prechargeBank(Rank& rank_ref, Bank& bank_ref,
- Tick pre_tick, bool auto_or_preall = false,
- bool trace = true);
-
- struct DRAMStats : public Stats::Group
- {
- DRAMStats(DRAMInterface &dram);
-
- void regStats() override;
- void resetStats() override;
-
- DRAMInterface &dram;
-
- /** total number of DRAM bursts serviced */
- Stats::Scalar readBursts;
- Stats::Scalar writeBursts;
-
- /** DRAM per bank stats */
- Stats::Vector perBankRdBursts;
- Stats::Vector perBankWrBursts;
-
- // Latencies summed over all requests
- Stats::Scalar totQLat;
- Stats::Scalar totBusLat;
- Stats::Scalar totMemAccLat;
-
- // Average latencies per request
- Stats::Formula avgQLat;
- Stats::Formula avgBusLat;
- Stats::Formula avgMemAccLat;
-
- // Row hit count and rate
- Stats::Scalar readRowHits;
- Stats::Scalar writeRowHits;
- Stats::Formula readRowHitRate;
- Stats::Formula writeRowHitRate;
- Stats::Histogram bytesPerActivate;
- // Number of bytes transferred to/from DRAM
- Stats::Scalar bytesRead;
- Stats::Scalar bytesWritten;
-
- // Average bandwidth
- Stats::Formula avgRdBW;
- Stats::Formula avgWrBW;
- Stats::Formula peakBW;
- // bus utilization
- Stats::Formula busUtil;
- Stats::Formula busUtilRead;
- Stats::Formula busUtilWrite;
- Stats::Formula pageHitRate;
- };
-
- DRAMStats stats;
-
- /**
- * Vector of dram ranks
- */
- std::vector<Rank*> ranks;
-
- public:
-
- /**
- * Buffer sizes for read and write queues in the controller
- * These are passed to the controller on instantiation
- * Defining them here allows for buffers to be resized based
- * on memory type / configuration.
- */
- const uint32_t readBufferSize;
- const uint32_t writeBufferSize;
-
- /*
- * @return delay between write and read commands
- */
- Tick writeToReadDelay() const override { return tBURST + tWTR + tCL; }
-
- /**
- * Find which are the earliest banks ready to issue an activate
- * for the enqueued requests. Assumes maximum of 32 banks per rank
- * Also checks if the bank is already prepped.
- *
- * @param queue Queued requests to consider
- * @param min_col_at time of seamless burst command
- * @return One-hot encoded mask of bank indices
- * @return boolean indicating burst can issue seamlessly, with no gaps
- */
- std::pair<std::vector<uint32_t>, bool>
- minBankPrep(const DRAMPacketQueue& queue, Tick min_col_at) const;
-
- /*
- * @return time to send a burst of data without gaps
- */
- Tick
- burstDelay() const
- {
- return (burstInterleave ? tBURST_MAX / 2 : tBURST);
- }
-
- public:
- /**
- * Initialize the DRAM interface and verify parameters
- */
- void init() override;
-
- /**
- * Iterate through dram ranks and instantiate per rank startup routine
- */
- void startup() override;
-
- /**
- * Setup the rank based on packet received
- *
- * @param integer value of rank to be setup. used to index ranks vector
- * @param are we setting up rank for read or write packet?
- */
- void setupRank(const uint8_t rank, const bool is_read) override;
-
- /**
- * Iterate through dram ranks to exit self-refresh in order to drain
- */
- void drainRanks();
-
- /**
- * Return true once refresh is complete for all ranks and there are no
- * additional commands enqueued. (only evaluated when draining)
- * This will ensure that all banks are closed, power state is IDLE, and
- * power stats have been updated
- *
- * @return true if all ranks have refreshed, with no commands enqueued
- *
- */
- bool allRanksDrained() const override;
-
- /**
- * Iterate through DRAM ranks and suspend them
- */
- void suspend();
-
- /*
- * @return time to offset next command
- */
- Tick commandOffset() const override { return (tRP + tRCD); }
-
- /*
- * Function to calulate unloaded, closed bank access latency
- */
- Tick accessLatency() const override { return (tRP + tRCD + tCL); }
-
- /**
- * For FR-FCFS policy, find first DRAM command that can issue
- *
- * @param queue Queued requests to consider
- * @param min_col_at Minimum tick for 'seamless' issue
- * @return an iterator to the selected packet, else queue.end()
- * @return the tick when the packet selected will issue
- */
- std::pair<DRAMPacketQueue::iterator, Tick>
- chooseNextFRFCFS(DRAMPacketQueue& queue, Tick min_col_at) const override;
-
- /**
- * Actually do the burst - figure out the latency it
- * will take to service the req based on bank state, channel state etc
- * and then update those states to account for this request. Based
- * on this, update the packet's "readyTime" and move it to the
- * response q from where it will eventually go back to the outside
- * world.
- *
- * @param dram_pkt The DRAM packet created from the outside world pkt
- * @param next_burst_at Minimum bus timing requirement from controller
- * @param queue Reference to the read or write queue with the packet
- * @return pair, tick when current burst is issued and
- * tick when next burst can issue
- */
- std::pair<Tick, Tick>
- doBurstAccess(DRAMPacket* dram_pkt, Tick next_burst_at,
- const std::vector<DRAMPacketQueue>& queue);
-
- /**
- * Check if a burst operation can be issued to the DRAM
- *
- * @param Return true if RD/WR can issue
- * This requires the DRAM to be in the
- * REF IDLE state
- */
- bool
- burstReady(DRAMPacket* pkt) const override
- {
- return ranks[pkt->rank]->inRefIdleState();
- }
-
- /**
- * This function checks if ranks are actively refreshing and
- * therefore busy. The function also checks if ranks are in
- * the self-refresh state, in which case, a self-refresh exit
- * is initiated.
- *
- * return boolean if all ranks are in refresh and therefore busy
- */
- bool isBusy();
-
- /**
- * Add rank to rank delay to bus timing to all DRAM banks in alli ranks
- * when access to an alternate interface is issued
- *
- * param cmd_at Time of current command used as starting point for
- * addition of rank-to-rank delay
- */
- void addRankToRankDelay(Tick cmd_at) override;
-
- /**
- * Complete response process for DRAM when read burst is complete
- * This will update the counters and check if a power down state
- * can be entered.
- *
- * @param rank Specifies rank associated with read burst
- */
- void respondEvent(uint8_t rank);
-
- /**
- * Check the refresh state to determine if refresh needs
- * to be kicked back into action after a read response
- *
- * @param rank Specifies rank associated with read burst
- */
- void checkRefreshState(uint8_t rank);
-
- DRAMInterface(const DRAMInterfaceParams* _p);
-};
-
-/**
- * Interface to NVM devices with media specific parameters,
- * statistics, and functions.
- * The NVMInterface includes a class for individual ranks
- * and per rank functions.
- */
-class NVMInterface : public MemInterface
-{
- private:
- /**
- * NVM rank class simply includes a vector of banks.
- */
- class Rank : public EventManager
- {
- private:
-
- /**
- * A reference to the parent NVMInterface instance
- */
- NVMInterface& nvm;
-
- public:
-
- /**
- * Current Rank index
- */
- uint8_t rank;
-
- /**
- * Vector of NVM banks. Each rank is made of several banks
- * that can be accessed in parallel.
- */
- std::vector<Bank> banks;
-
- Rank(const NVMInterfaceParams* _p, int _rank,
- NVMInterface& _nvm);
- };
-
- /**
- * NVM specific device and channel characteristics
- */
- const uint32_t maxPendingWrites;
- const uint32_t maxPendingReads;
- const bool twoCycleRdWr;
-
- /**
- * NVM specific timing requirements
- */
- const Tick tREAD;
- const Tick tWRITE;
- const Tick tSEND;
-
- struct NVMStats : public Stats::Group
- {
- NVMStats(NVMInterface &nvm);
-
- void regStats() override;
-
- NVMInterface &nvm;
-
- /** NVM stats */
- Stats::Scalar readBursts;
- Stats::Scalar writeBursts;
-
- Stats::Vector perBankRdBursts;
- Stats::Vector perBankWrBursts;
-
- // Latencies summed over all requests
- Stats::Scalar totQLat;
- Stats::Scalar totBusLat;
- Stats::Scalar totMemAccLat;
-
- // Average latencies per request
- Stats::Formula avgQLat;
- Stats::Formula avgBusLat;
- Stats::Formula avgMemAccLat;
-
- Stats::Scalar bytesRead;
- Stats::Scalar bytesWritten;
-
- // Average bandwidth
- Stats::Formula avgRdBW;
- Stats::Formula avgWrBW;
- Stats::Formula peakBW;
- Stats::Formula busUtil;
- Stats::Formula busUtilRead;
- Stats::Formula busUtilWrite;
-
- /** NVM stats */
- Stats::Histogram pendingReads;
- Stats::Histogram pendingWrites;
- Stats::Histogram bytesPerBank;
- };
-
- NVMStats stats;
-
- void processWriteRespondEvent();
- EventFunctionWrapper writeRespondEvent;
-
- void processReadReadyEvent();
- EventFunctionWrapper readReadyEvent;
-
- /**
- * Vector of nvm ranks
- */
- std::vector<Rank*> ranks;
-
- /**
- * Holding queue for non-deterministic write commands, which
- * maintains writes that have been issued but have not completed
- * Stored seperately mostly to keep the code clean and help with
- * events scheduling.
- * This mimics a buffer on the media controller and therefore is
- * not added to the main write queue for sizing
- */
- std::list<Tick> writeRespQueue;
-
- std::deque<Tick> readReadyQueue;
-
- /**
- * Check if the write response queue is empty
- *
- * @param Return true if empty
- */
- bool writeRespQueueEmpty() const { return writeRespQueue.empty(); }
-
- /**
- * Till when must we wait before issuing next read command?
- */
- Tick nextReadAt;
-
- // keep track of reads that have issued for which data is either
- // not yet ready or has not yet been transferred to the ctrl
- uint16_t numPendingReads;
- uint16_t numReadDataReady;
-
- public:
- // keep track of the number of reads that have yet to be issued
- uint16_t numReadsToIssue;
-
- // number of writes in the writeQueue for the NVM interface
- uint32_t numWritesQueued;
-
- /**
- * Buffer sizes for read and write queues in the controller
- * These are passed to the controller on instantiation
- * Defining them here allows for buffers to be resized based
- * on memory type / configuration.
- */
- const uint32_t readBufferSize;
- const uint32_t writeBufferSize;
-
- /**
- * Initialize the NVM interface and verify parameters
- */
- void init() override;
-
- /**
- * Setup the rank based on packet received
- *
- * @param integer value of rank to be setup. used to index ranks vector
- * @param are we setting up rank for read or write packet?
- */
- void setupRank(const uint8_t rank, const bool is_read) override;
-
- /**
- * Check drain state of NVM interface
- *
- * @return true if write response queue is empty
- *
- */
- bool allRanksDrained() const override { return writeRespQueueEmpty(); }
-
- /*
- * @return time to offset next command
- */
- Tick commandOffset() const override { return tBURST; }
-
- /**
- * Check if a burst operation can be issued to the NVM
- *
- * @param Return true if RD/WR can issue
- * for reads, also verfy that ready count
- * has been updated to a non-zero value to
- * account for race conditions between events
- */
- bool burstReady(DRAMPacket* pkt) const override;
-
- /**
- * This function checks if ranks are busy.
- * This state is true when either:
- * 1) There is no command with read data ready to transmit or
- * 2) The NVM inteface has reached the maximum number of outstanding
- * writes commands.
- * @param read_queue_empty There are no read queued
- * @param all_writes_nvm All writes in queue are for NVM interface
- * @return true of NVM is busy
- *
- */
- bool isBusy(bool read_queue_empty, bool all_writes_nvm);
- /**
- * For FR-FCFS policy, find first NVM command that can issue
- * default to first command to prepped region
- *
- * @param queue Queued requests to consider
- * @param min_col_at Minimum tick for 'seamless' issue
- * @return an iterator to the selected packet, else queue.end()
- * @return the tick when the packet selected will issue
- */
- std::pair<DRAMPacketQueue::iterator, Tick>
- chooseNextFRFCFS(DRAMPacketQueue& queue, Tick min_col_at) const override;
-
- /**
- * Add rank to rank delay to bus timing to all NVM banks in alli ranks
- * when access to an alternate interface is issued
- *
- * param cmd_at Time of current command used as starting point for
- * addition of rank-to-rank delay
- */
- void addRankToRankDelay(Tick cmd_at) override;
-
-
- /**
- * Select read command to issue asynchronously
- */
- void chooseRead(DRAMPacketQueue& queue);
-
- /*
- * Function to calulate unloaded access latency
- */
- Tick accessLatency() const override { return (tREAD + tSEND); }
-
- /**
- * Check if the write response queue has reached defined threshold
- *
- * @param Return true if full
- */
- bool
- writeRespQueueFull() const
- {
- return writeRespQueue.size() == maxPendingWrites;
- }
-
- bool
- readsWaitingToIssue() const
- {
- return ((numReadsToIssue != 0) &&
- (numPendingReads < maxPendingReads));
- }
-
- /**
- * Actually do the burst and update stats.
- *
- * @param pkt The packet created from the outside world pkt
- * @param next_burst_at Minimum bus timing requirement from controller
- * @return pair, tick when current burst is issued and
- * tick when next burst can issue
- */
- std::pair<Tick, Tick>
- doBurstAccess(DRAMPacket* pkt, Tick next_burst_at);
-
- NVMInterface(const NVMInterfaceParams* _p);
-};
-
-/**
- * The DRAM controller is a single-channel memory controller capturing
- * the most important timing constraints associated with a
- * contemporary DRAM. For multi-channel memory systems, the controller
- * is combined with a crossbar model, with the channel address
- * interleaving taking part in the crossbar.
- *
- * As a basic design principle, this controller
- * model is not cycle callable, but instead uses events to: 1) decide
- * when new decisions can be made, 2) when resources become available,
- * 3) when things are to be considered done, and 4) when to send
- * things back. Through these simple principles, the model delivers
- * high performance, and lots of flexibility, allowing users to
- * evaluate the system impact of a wide range of memory technologies,
- * such as DDR3/4, LPDDR2/3/4, WideIO1/2, HBM and HMC.
- *
- * For more details, please see Hansson et al, "Simulating DRAM
- * controllers for future system architecture exploration",
- * Proc. ISPASS, 2014. If you use this model as part of your research
- * please cite the paper.
- *
- * The low-power functionality implements a staggered powerdown
- * similar to that described in "Optimized Active and Power-Down Mode
- * Refresh Control in 3D-DRAMs" by Jung et al, VLSI-SoC, 2014.
- */
-class DRAMCtrl : public QoS::MemCtrl
-{
- private:
-
- // For now, make use of a queued slave port to avoid dealing with
- // flow control for the responses being sent back
- class MemoryPort : public QueuedSlavePort
- {
-
- RespPacketQueue queue;
- DRAMCtrl& ctrl;
-
- public:
-
- MemoryPort(const std::string& name, DRAMCtrl& _ctrl);
-
- protected:
-
- Tick recvAtomic(PacketPtr pkt);
-
- void recvFunctional(PacketPtr pkt);
-
- bool recvTimingReq(PacketPtr);
-
- virtual AddrRangeList getAddrRanges() const;
-
- };
-
- /**
- * Our incoming port, for a multi-ported controller add a crossbar
- * in front of it
- */
- MemoryPort port;
-
- /**
- * Remember if the memory system is in timing mode
- */
- bool isTimingMode;
-
- /**
- * Remember if we have to retry a request when available.
- */
- bool retryRdReq;
- bool retryWrReq;
-
- /**
- * Bunch of things requires to setup "events" in gem5
- * When event "respondEvent" occurs for example, the method
- * processRespondEvent is called; no parameters are allowed
- * in these methods
- */
- void processNextReqEvent();
- EventFunctionWrapper nextReqEvent;
-
- void processRespondEvent();
- EventFunctionWrapper respondEvent;
-
- /**
- * Check if the read queue has room for more entries
- *
- * @param pkt_count The number of entries needed in the read queue
- * @return true if read queue is full, false otherwise
- */
- bool readQueueFull(unsigned int pkt_count) const;
-
- /**
- * Check if the write queue has room for more entries
- *
- * @param pkt_count The number of entries needed in the write queue
- * @return true if write queue is full, false otherwise
- */
- bool writeQueueFull(unsigned int pkt_count) const;
-
- /**
- * When a new read comes in, first check if the write q has a
- * pending request to the same address.\ If not, decode the
- * address to populate rank/bank/row, create one or mutliple
- * "dram_pkt", and push them to the back of the read queue.\
- * If this is the only
- * read request in the system, schedule an event to start
- * servicing it.
- *
- * @param pkt The request packet from the outside world
- * @param pkt_count The number of DRAM bursts the pkt
- * @param is_dram Does this packet access DRAM?
- * translate to. If pkt size is larger then one full burst,
- * then pkt_count is greater than one.
- */
- void addToReadQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram);
-
- /**
- * Decode the incoming pkt, create a dram_pkt and push to the
- * back of the write queue. \If the write q length is more than
- * the threshold specified by the user, ie the queue is beginning
- * to get full, stop reads, and start draining writes.
- *
- * @param pkt The request packet from the outside world
- * @param pkt_count The number of DRAM bursts the pkt
- * @param is_dram Does this packet access DRAM?
- * translate to. If pkt size is larger then one full burst,
- * then pkt_count is greater than one.
- */
- void addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram);
-
- /**
- * Actually do the burst based on media specific access function.
- * Update bus statistics when complete.
- *
- * @param pkt The DRAM packet created from the outside world pkt
- */
- void doBurstAccess(DRAMPacket* dram_pkt);
-
- /**
- * When a packet reaches its "readyTime" in the response Q,
- * use the "access()" method in AbstractMemory to actually
- * create the response packet, and send it back to the outside
- * world requestor.
- *
- * @param pkt The packet from the outside world
- * @param static_latency Static latency to add before sending the packet
- */
- void accessAndRespond(PacketPtr pkt, Tick static_latency);
-
- /**
- * Determine if there is a packet that can issue.
- *
- * @param pkt The packet to evaluate
- */
- bool
- packetReady(DRAMPacket* pkt)
- {
- return (pkt->isDram() ?
- dram->burstReady(pkt) : nvm->burstReady(pkt));
- }
-
- /**
- * Calculate the minimum delay used when scheduling a read-to-write
- * transision.
- * @param return minimum delay
- */
- Tick
- minReadToWriteDataGap()
- {
- Tick dram_min = dram ? dram->minReadToWriteDataGap() : MaxTick;
- Tick nvm_min = nvm ? nvm->minReadToWriteDataGap() : MaxTick;
- return std::min(dram_min, nvm_min);
- }
-
- /**
- * Calculate the minimum delay used when scheduling a write-to-read
- * transision.
- * @param return minimum delay
- */
- Tick
- minWriteToReadDataGap()
- {
- Tick dram_min = dram ? dram->minWriteToReadDataGap() : MaxTick;
- Tick nvm_min = nvm ? nvm->minWriteToReadDataGap() : MaxTick;
- return std::min(dram_min, nvm_min);
- }
-
- /**
- * The memory schduler/arbiter - picks which request needs to
- * go next, based on the specified policy such as FCFS or FR-FCFS
- * and moves it to the head of the queue.
- * Prioritizes accesses to the same rank as previous burst unless
- * controller is switching command type.
- *
- * @param queue Queued requests to consider
- * @param extra_col_delay Any extra delay due to a read/write switch
- * @return an iterator to the selected packet, else queue.end()
- */
- DRAMPacketQueue::iterator chooseNext(DRAMPacketQueue& queue,
- Tick extra_col_delay);
-
- /**
- * For FR-FCFS policy reorder the read/write queue depending on row buffer
- * hits and earliest bursts available in DRAM
- *
- * @param queue Queued requests to consider
- * @param extra_col_delay Any extra delay due to a read/write switch
- * @return an iterator to the selected packet, else queue.end()
- */
- DRAMPacketQueue::iterator chooseNextFRFCFS(DRAMPacketQueue& queue,
- Tick extra_col_delay);
-
- /**
- * Calculate burst window aligned tick
- *
- * @param cmd_tick Initial tick of command
- * @return burst window aligned tick
- */
- Tick getBurstWindow(Tick cmd_tick);
-
- /**
- * Used for debugging to observe the contents of the queues.
- */
- void printQs() const;
-
- /**
- * Burst-align an address.
- *
- * @param addr The potentially unaligned address
- * @param is_dram Does this packet access DRAM?
- *
- * @return An address aligned to a memory burst
- */
- Addr
- burstAlign(Addr addr, bool is_dram) const
- {
- if (is_dram)
- return (addr & ~(Addr(dram->bytesPerBurst() - 1)));
- else
- return (addr & ~(Addr(nvm->bytesPerBurst() - 1)));
- }
-
- /**
- * The controller's main read and write queues, with support for QoS reordering
- */
- std::vector<DRAMPacketQueue> readQueue;
- std::vector<DRAMPacketQueue> writeQueue;
-
- /**
- * To avoid iterating over the write queue to check for
- * overlapping transactions, maintain a set of burst addresses
- * that are currently queued. Since we merge writes to the same
- * location we never have more than one address to the same burst
- * address.
- */
- std::unordered_set<Addr> isInWriteQueue;
-
- /**
- * Response queue where read packets wait after we're done working
- * with them, but it's not time to send the response yet. The
- * responses are stored separately mostly to keep the code clean
- * and help with events scheduling. For all logical purposes such
- * as sizing the read queue, this and the main read queue need to
- * be added together.
- */
- std::deque<DRAMPacket*> respQueue;
-
- /**
- * Holds count of commands issued in burst window starting at
- * defined Tick. This is used to ensure that the command bandwidth
- * does not exceed the allowable media constraints.
- */
- std::unordered_multiset<Tick> burstTicks;
-
- /**
- * Create pointer to interface of the actual dram media when connected
- */
- DRAMInterface* const dram;
-
- /**
- * Create pointer to interface of the actual nvm media when connected
- */
- NVMInterface* const nvm;
-
- /**
- * The following are basic design parameters of the memory
- * controller, and are initialized based on parameter values.
- * The rowsPerBank is determined based on the capacity, number of
- * ranks and banks, the burst size, and the row buffer size.
- */
- const uint32_t readBufferSize;
- const uint32_t writeBufferSize;
- const uint32_t writeHighThreshold;
- const uint32_t writeLowThreshold;
- const uint32_t minWritesPerSwitch;
- uint32_t writesThisTime;
- uint32_t readsThisTime;
-
- /**
- * Memory controller configuration initialized based on parameter
- * values.
- */
- Enums::MemSched memSchedPolicy;
-
- /**
- * Pipeline latency of the controller frontend. The frontend
- * contribution is added to writes (that complete when they are in
- * the write buffer) and reads that are serviced the write buffer.
- */
- const Tick frontendLatency;
-
- /**
- * Pipeline latency of the backend and PHY. Along with the
- * frontend contribution, this latency is added to reads serviced
- * by the DRAM.
- */
- const Tick backendLatency;
-
- /**
- * Length of a command window, used to check
- * command bandwidth
- */
- const Tick commandWindow;
-
- /**
- * Till when must we wait before issuing next RD/WR burst?
- */
- Tick nextBurstAt;
-
- Tick prevArrival;
-
- /**
- * The soonest you have to start thinking about the next request
- * is the longest access time that can occur before
- * nextBurstAt. Assuming you need to precharge, open a new row,
- * and access, it is tRP + tRCD + tCL.
- */
- Tick nextReqTime;
-
- struct CtrlStats : public Stats::Group
- {
- CtrlStats(DRAMCtrl &ctrl);
-
- void regStats() override;
-
- DRAMCtrl &ctrl;
-
- // All statistics that the model needs to capture
- Stats::Scalar readReqs;
- Stats::Scalar writeReqs;
- Stats::Scalar readBursts;
- Stats::Scalar writeBursts;
- Stats::Scalar servicedByWrQ;
- Stats::Scalar mergedWrBursts;
- Stats::Scalar neitherReadNorWriteReqs;
- // Average queue lengths
- Stats::Average avgRdQLen;
- Stats::Average avgWrQLen;
-
- Stats::Scalar numRdRetry;
- Stats::Scalar numWrRetry;
- Stats::Vector readPktSize;
- Stats::Vector writePktSize;
- Stats::Vector rdQLenPdf;
- Stats::Vector wrQLenPdf;
- Stats::Histogram rdPerTurnAround;
- Stats::Histogram wrPerTurnAround;
-
- Stats::Scalar bytesReadWrQ;
- Stats::Scalar bytesReadSys;
- Stats::Scalar bytesWrittenSys;
- // Average bandwidth
- Stats::Formula avgRdBWSys;
- Stats::Formula avgWrBWSys;
-
- Stats::Scalar totGap;
- Stats::Formula avgGap;
-
- // per-master bytes read and written to memory
- Stats::Vector masterReadBytes;
- Stats::Vector masterWriteBytes;
-
- // per-master bytes read and written to memory rate
- Stats::Formula masterReadRate;
- Stats::Formula masterWriteRate;
-
- // per-master read and write serviced memory accesses
- Stats::Vector masterReadAccesses;
- Stats::Vector masterWriteAccesses;
-
- // per-master read and write total memory access latency
- Stats::Vector masterReadTotalLat;
- Stats::Vector masterWriteTotalLat;
-
- // per-master raed and write average memory access latency
- Stats::Formula masterReadAvgLat;
- Stats::Formula masterWriteAvgLat;
- };
-
- CtrlStats stats;
-
- /**
- * Upstream caches need this packet until true is returned, so
- * hold it for deletion until a subsequent call
- */
- std::unique_ptr<Packet> pendingDelete;
-
- /**
- * Select either the read or write queue
- *
- * @param is_read The current burst is a read, select read queue
- * @return a reference to the appropriate queue
- */
- std::vector<DRAMPacketQueue>&
- selQueue(bool is_read)
- {
- return (is_read ? readQueue : writeQueue);
- };
-
- /**
- * Remove commands that have already issued from burstTicks
- */
- void pruneBurstTick();
-
- public:
-
- DRAMCtrl(const DRAMCtrlParams* p);
-
- /**
- * Ensure that all interfaced have drained commands
- *
- * @return bool flag, set once drain complete
- */
- bool allIntfDrained() const;
-
- DrainState drain() override;
-
- /**
- * Check for command bus contention for single cycle command.
- * If there is contention, shift command to next burst.
- * Check verifies that the commands issued per burst is less
- * than a defined max number, maxCommandsPerWindow.
- * Therefore, contention per cycle is not verified and instead
- * is done based on a burst window.
- *
- * @param cmd_tick Initial tick of command, to be verified
- * @param max_cmds_per_burst Number of commands that can issue
- * in a burst window
- * @return tick for command issue without contention
- */
- Tick verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst);
-
- /**
- * Check for command bus contention for multi-cycle (2 currently)
- * command. If there is contention, shift command(s) to next burst.
- * Check verifies that the commands issued per burst is less
- * than a defined max number, maxCommandsPerWindow.
- * Therefore, contention per cycle is not verified and instead
- * is done based on a burst window.
- *
- * @param cmd_tick Initial tick of command, to be verified
- * @param max_multi_cmd_split Maximum delay between commands
- * @param max_cmds_per_burst Number of commands that can issue
- * in a burst window
- * @return tick for command issue without contention
- */
- Tick verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst,
- Tick max_multi_cmd_split = 0);
-
- /**
- * Is there a respondEvent scheduled?
- *
- * @return true if event is scheduled
- */
- bool respondEventScheduled() const { return respondEvent.scheduled(); }
-
- /**
- * Is there a read/write burst Event scheduled?
- *
- * @return true if event is scheduled
- */
- bool requestEventScheduled() const { return nextReqEvent.scheduled(); }
-
- /**
- * restart the controller
- * This can be used by interfaces to restart the
- * scheduler after maintainence commands complete
- *
- * @param Tick to schedule next event
- */
- void restartScheduler(Tick tick) { schedule(nextReqEvent, tick); }
-
- /**
- * Check the current direction of the memory channel
- *
- * @param next_state Check either the current or next bus state
- * @return True when bus is currently in a read state
- */
- bool inReadBusState(bool next_state) const;
-
- /**
- * Check the current direction of the memory channel
- *
- * @param next_state Check either the current or next bus state
- * @return True when bus is currently in a write state
- */
- bool inWriteBusState(bool next_state) const;
-
- Port &getPort(const std::string &if_name,
- PortID idx=InvalidPortID) override;
-
- virtual void init() override;
- virtual void startup() override;
- virtual void drainResume() override;
-
- protected:
-
- Tick recvAtomic(PacketPtr pkt);
- void recvFunctional(PacketPtr pkt);
- bool recvTimingReq(PacketPtr pkt);
-
-};
-
-#endif //__MEM_DRAM_CTRL_HH__
--- /dev/null
+/*
+ * Copyright (c) 2010-2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2013 Amin Farmahini-Farahani
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mem/mem_ctrl.hh"
+
+#include "base/trace.hh"
+#include "debug/DRAM.hh"
+#include "debug/Drain.hh"
+#include "debug/MemCtrl.hh"
+#include "debug/NVM.hh"
+#include "debug/QOS.hh"
+#include "mem/mem_interface.hh"
+#include "sim/system.hh"
+
+using namespace std;
+
+MemCtrl::MemCtrl(const MemCtrlParams* p) :
+ QoS::MemCtrl(p),
+ port(name() + ".port", *this), isTimingMode(false),
+ retryRdReq(false), retryWrReq(false),
+ nextReqEvent([this]{ processNextReqEvent(); }, name()),
+ respondEvent([this]{ processRespondEvent(); }, name()),
+ dram(p->dram), nvm(p->nvm),
+ readBufferSize((dram ? dram->readBufferSize : 0) +
+ (nvm ? nvm->readBufferSize : 0)),
+ writeBufferSize((dram ? dram->writeBufferSize : 0) +
+ (nvm ? nvm->writeBufferSize : 0)),
+ writeHighThreshold(writeBufferSize * p->write_high_thresh_perc / 100.0),
+ writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0),
+ minWritesPerSwitch(p->min_writes_per_switch),
+ writesThisTime(0), readsThisTime(0),
+ memSchedPolicy(p->mem_sched_policy),
+ frontendLatency(p->static_frontend_latency),
+ backendLatency(p->static_backend_latency),
+ commandWindow(p->command_window),
+ nextBurstAt(0), prevArrival(0),
+ nextReqTime(0),
+ stats(*this)
+{
+ DPRINTF(MemCtrl, "Setting up controller\n");
+ readQueue.resize(p->qos_priorities);
+ writeQueue.resize(p->qos_priorities);
+
+ // Hook up interfaces to the controller
+ if (dram)
+ dram->setCtrl(this, commandWindow);
+ if (nvm)
+ nvm->setCtrl(this, commandWindow);
+
+ fatal_if(!dram && !nvm, "Memory controller must have an interface");
+
+ // perform a basic check of the write thresholds
+ if (p->write_low_thresh_perc >= p->write_high_thresh_perc)
+ fatal("Write buffer low threshold %d must be smaller than the "
+ "high threshold %d\n", p->write_low_thresh_perc,
+ p->write_high_thresh_perc);
+}
+
+void
+MemCtrl::init()
+{
+ if (!port.isConnected()) {
+ fatal("MemCtrl %s is unconnected!\n", name());
+ } else {
+ port.sendRangeChange();
+ }
+}
+
+void
+MemCtrl::startup()
+{
+ // remember the memory system mode of operation
+ isTimingMode = system()->isTimingMode();
+
+ if (isTimingMode) {
+ // shift the bus busy time sufficiently far ahead that we never
+ // have to worry about negative values when computing the time for
+ // the next request, this will add an insignificant bubble at the
+ // start of simulation
+ nextBurstAt = curTick() + (dram ? dram->commandOffset() :
+ nvm->commandOffset());
+ }
+}
+
+Tick
+MemCtrl::recvAtomic(PacketPtr pkt)
+{
+ DPRINTF(MemCtrl, "recvAtomic: %s 0x%x\n",
+ pkt->cmdString(), pkt->getAddr());
+
+ panic_if(pkt->cacheResponding(), "Should not see packets where cache "
+ "is responding");
+
+ Tick latency = 0;
+ // do the actual memory access and turn the packet into a response
+ if (dram && dram->getAddrRange().contains(pkt->getAddr())) {
+ dram->access(pkt);
+
+ if (pkt->hasData()) {
+ // this value is not supposed to be accurate, just enough to
+ // keep things going, mimic a closed page
+ latency = dram->accessLatency();
+ }
+ } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) {
+ nvm->access(pkt);
+
+ if (pkt->hasData()) {
+ // this value is not supposed to be accurate, just enough to
+ // keep things going, mimic a closed page
+ latency = nvm->accessLatency();
+ }
+ } else {
+ panic("Can't handle address range for packet %s\n",
+ pkt->print());
+ }
+
+ return latency;
+}
+
+bool
+MemCtrl::readQueueFull(unsigned int neededEntries) const
+{
+ DPRINTF(MemCtrl,
+ "Read queue limit %d, current size %d, entries needed %d\n",
+ readBufferSize, totalReadQueueSize + respQueue.size(),
+ neededEntries);
+
+ auto rdsize_new = totalReadQueueSize + respQueue.size() + neededEntries;
+ return rdsize_new > readBufferSize;
+}
+
+bool
+MemCtrl::writeQueueFull(unsigned int neededEntries) const
+{
+ DPRINTF(MemCtrl,
+ "Write queue limit %d, current size %d, entries needed %d\n",
+ writeBufferSize, totalWriteQueueSize, neededEntries);
+
+ auto wrsize_new = (totalWriteQueueSize + neededEntries);
+ return wrsize_new > writeBufferSize;
+}
+
+void
+MemCtrl::addToReadQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram)
+{
+ // only add to the read queue here. whenever the request is
+ // eventually done, set the readyTime, and call schedule()
+ assert(!pkt->isWrite());
+
+ assert(pkt_count != 0);
+
+ // if the request size is larger than burst size, the pkt is split into
+ // multiple packets
+ // Note if the pkt starting address is not aligened to burst size, the
+ // address of first packet is kept unaliged. Subsequent packets
+ // are aligned to burst size boundaries. This is to ensure we accurately
+ // check read packets against packets in write queue.
+ const Addr base_addr = pkt->getAddr();
+ Addr addr = base_addr;
+ unsigned pktsServicedByWrQ = 0;
+ BurstHelper* burst_helper = NULL;
+
+ uint32_t burst_size = is_dram ? dram->bytesPerBurst() :
+ nvm->bytesPerBurst();
+ for (int cnt = 0; cnt < pkt_count; ++cnt) {
+ unsigned size = std::min((addr | (burst_size - 1)) + 1,
+ base_addr + pkt->getSize()) - addr;
+ stats.readPktSize[ceilLog2(size)]++;
+ stats.readBursts++;
+ stats.masterReadAccesses[pkt->masterId()]++;
+
+ // First check write buffer to see if the data is already at
+ // the controller
+ bool foundInWrQ = false;
+ Addr burst_addr = burstAlign(addr, is_dram);
+ // if the burst address is not present then there is no need
+ // looking any further
+ if (isInWriteQueue.find(burst_addr) != isInWriteQueue.end()) {
+ for (const auto& vec : writeQueue) {
+ for (const auto& p : vec) {
+ // check if the read is subsumed in the write queue
+ // packet we are looking at
+ if (p->addr <= addr &&
+ ((addr + size) <= (p->addr + p->size))) {
+
+ foundInWrQ = true;
+ stats.servicedByWrQ++;
+ pktsServicedByWrQ++;
+ DPRINTF(MemCtrl,
+ "Read to addr %lld with size %d serviced by "
+ "write queue\n",
+ addr, size);
+ stats.bytesReadWrQ += burst_size;
+ break;
+ }
+ }
+ }
+ }
+
+ // If not found in the write q, make a memory packet and
+ // push it onto the read queue
+ if (!foundInWrQ) {
+
+ // Make the burst helper for split packets
+ if (pkt_count > 1 && burst_helper == NULL) {
+ DPRINTF(MemCtrl, "Read to addr %lld translates to %d "
+ "memory requests\n", pkt->getAddr(), pkt_count);
+ burst_helper = new BurstHelper(pkt_count);
+ }
+
+ MemPacket* mem_pkt;
+ if (is_dram) {
+ mem_pkt = dram->decodePacket(pkt, addr, size, true, true);
+ // increment read entries of the rank
+ dram->setupRank(mem_pkt->rank, true);
+ } else {
+ mem_pkt = nvm->decodePacket(pkt, addr, size, true, false);
+ // Increment count to trigger issue of non-deterministic read
+ nvm->setupRank(mem_pkt->rank, true);
+ // Default readyTime to Max; will be reset once read is issued
+ mem_pkt->readyTime = MaxTick;
+ }
+ mem_pkt->burstHelper = burst_helper;
+
+ assert(!readQueueFull(1));
+ stats.rdQLenPdf[totalReadQueueSize + respQueue.size()]++;
+
+ DPRINTF(MemCtrl, "Adding to read queue\n");
+
+ readQueue[mem_pkt->qosValue()].push_back(mem_pkt);
+
+ // log packet
+ logRequest(MemCtrl::READ, pkt->masterId(), pkt->qosValue(),
+ mem_pkt->addr, 1);
+
+ // Update stats
+ stats.avgRdQLen = totalReadQueueSize + respQueue.size();
+ }
+
+ // Starting address of next memory pkt (aligned to burst boundary)
+ addr = (addr | (burst_size - 1)) + 1;
+ }
+
+ // If all packets are serviced by write queue, we send the repsonse back
+ if (pktsServicedByWrQ == pkt_count) {
+ accessAndRespond(pkt, frontendLatency);
+ return;
+ }
+
+ // Update how many split packets are serviced by write queue
+ if (burst_helper != NULL)
+ burst_helper->burstsServiced = pktsServicedByWrQ;
+
+ // If we are not already scheduled to get a request out of the
+ // queue, do so now
+ if (!nextReqEvent.scheduled()) {
+ DPRINTF(MemCtrl, "Request scheduled immediately\n");
+ schedule(nextReqEvent, curTick());
+ }
+}
+
+void
+MemCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram)
+{
+ // only add to the write queue here. whenever the request is
+ // eventually done, set the readyTime, and call schedule()
+ assert(pkt->isWrite());
+
+ // if the request size is larger than burst size, the pkt is split into
+ // multiple packets
+ const Addr base_addr = pkt->getAddr();
+ Addr addr = base_addr;
+ uint32_t burst_size = is_dram ? dram->bytesPerBurst() :
+ nvm->bytesPerBurst();
+ for (int cnt = 0; cnt < pkt_count; ++cnt) {
+ unsigned size = std::min((addr | (burst_size - 1)) + 1,
+ base_addr + pkt->getSize()) - addr;
+ stats.writePktSize[ceilLog2(size)]++;
+ stats.writeBursts++;
+ stats.masterWriteAccesses[pkt->masterId()]++;
+
+ // see if we can merge with an existing item in the write
+ // queue and keep track of whether we have merged or not
+ bool merged = isInWriteQueue.find(burstAlign(addr, is_dram)) !=
+ isInWriteQueue.end();
+
+ // if the item was not merged we need to create a new write
+ // and enqueue it
+ if (!merged) {
+ MemPacket* mem_pkt;
+ if (is_dram) {
+ mem_pkt = dram->decodePacket(pkt, addr, size, false, true);
+ dram->setupRank(mem_pkt->rank, false);
+ } else {
+ mem_pkt = nvm->decodePacket(pkt, addr, size, false, false);
+ nvm->setupRank(mem_pkt->rank, false);
+ }
+ assert(totalWriteQueueSize < writeBufferSize);
+ stats.wrQLenPdf[totalWriteQueueSize]++;
+
+ DPRINTF(MemCtrl, "Adding to write queue\n");
+
+ writeQueue[mem_pkt->qosValue()].push_back(mem_pkt);
+ isInWriteQueue.insert(burstAlign(addr, is_dram));
+
+ // log packet
+ logRequest(MemCtrl::WRITE, pkt->masterId(), pkt->qosValue(),
+ mem_pkt->addr, 1);
+
+ assert(totalWriteQueueSize == isInWriteQueue.size());
+
+ // Update stats
+ stats.avgWrQLen = totalWriteQueueSize;
+
+ } else {
+ DPRINTF(MemCtrl,
+ "Merging write burst with existing queue entry\n");
+
+ // keep track of the fact that this burst effectively
+ // disappeared as it was merged with an existing one
+ stats.mergedWrBursts++;
+ }
+
+ // Starting address of next memory pkt (aligned to burst_size boundary)
+ addr = (addr | (burst_size - 1)) + 1;
+ }
+
+ // we do not wait for the writes to be send to the actual memory,
+ // but instead take responsibility for the consistency here and
+ // snoop the write queue for any upcoming reads
+ // @todo, if a pkt size is larger than burst size, we might need a
+ // different front end latency
+ accessAndRespond(pkt, frontendLatency);
+
+ // If we are not already scheduled to get a request out of the
+ // queue, do so now
+ if (!nextReqEvent.scheduled()) {
+ DPRINTF(MemCtrl, "Request scheduled immediately\n");
+ schedule(nextReqEvent, curTick());
+ }
+}
+
+void
+MemCtrl::printQs() const
+{
+#if TRACING_ON
+ DPRINTF(MemCtrl, "===READ QUEUE===\n\n");
+ for (const auto& queue : readQueue) {
+ for (const auto& packet : queue) {
+ DPRINTF(MemCtrl, "Read %lu\n", packet->addr);
+ }
+ }
+
+ DPRINTF(MemCtrl, "\n===RESP QUEUE===\n\n");
+ for (const auto& packet : respQueue) {
+ DPRINTF(MemCtrl, "Response %lu\n", packet->addr);
+ }
+
+ DPRINTF(MemCtrl, "\n===WRITE QUEUE===\n\n");
+ for (const auto& queue : writeQueue) {
+ for (const auto& packet : queue) {
+ DPRINTF(MemCtrl, "Write %lu\n", packet->addr);
+ }
+ }
+#endif // TRACING_ON
+}
+
+bool
+MemCtrl::recvTimingReq(PacketPtr pkt)
+{
+ // This is where we enter from the outside world
+ DPRINTF(MemCtrl, "recvTimingReq: request %s addr %lld size %d\n",
+ pkt->cmdString(), pkt->getAddr(), pkt->getSize());
+
+ panic_if(pkt->cacheResponding(), "Should not see packets where cache "
+ "is responding");
+
+ panic_if(!(pkt->isRead() || pkt->isWrite()),
+ "Should only see read and writes at memory controller\n");
+
+ // Calc avg gap between requests
+ if (prevArrival != 0) {
+ stats.totGap += curTick() - prevArrival;
+ }
+ prevArrival = curTick();
+
+ // What type of media does this packet access?
+ bool is_dram;
+ if (dram && dram->getAddrRange().contains(pkt->getAddr())) {
+ is_dram = true;
+ } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) {
+ is_dram = false;
+ } else {
+ panic("Can't handle address range for packet %s\n",
+ pkt->print());
+ }
+
+
+ // Find out how many memory packets a pkt translates to
+ // If the burst size is equal or larger than the pkt size, then a pkt
+ // translates to only one memory packet. Otherwise, a pkt translates to
+ // multiple memory packets
+ unsigned size = pkt->getSize();
+ uint32_t burst_size = is_dram ? dram->bytesPerBurst() :
+ nvm->bytesPerBurst();
+ unsigned offset = pkt->getAddr() & (burst_size - 1);
+ unsigned int pkt_count = divCeil(offset + size, burst_size);
+
+ // run the QoS scheduler and assign a QoS priority value to the packet
+ qosSchedule( { &readQueue, &writeQueue }, burst_size, pkt);
+
+ // check local buffers and do not accept if full
+ if (pkt->isWrite()) {
+ assert(size != 0);
+ if (writeQueueFull(pkt_count)) {
+ DPRINTF(MemCtrl, "Write queue full, not accepting\n");
+ // remember that we have to retry this port
+ retryWrReq = true;
+ stats.numWrRetry++;
+ return false;
+ } else {
+ addToWriteQueue(pkt, pkt_count, is_dram);
+ stats.writeReqs++;
+ stats.bytesWrittenSys += size;
+ }
+ } else {
+ assert(pkt->isRead());
+ assert(size != 0);
+ if (readQueueFull(pkt_count)) {
+ DPRINTF(MemCtrl, "Read queue full, not accepting\n");
+ // remember that we have to retry this port
+ retryRdReq = true;
+ stats.numRdRetry++;
+ return false;
+ } else {
+ addToReadQueue(pkt, pkt_count, is_dram);
+ stats.readReqs++;
+ stats.bytesReadSys += size;
+ }
+ }
+
+ return true;
+}
+
+void
+MemCtrl::processRespondEvent()
+{
+ DPRINTF(MemCtrl,
+ "processRespondEvent(): Some req has reached its readyTime\n");
+
+ MemPacket* mem_pkt = respQueue.front();
+
+ if (mem_pkt->isDram()) {
+ // media specific checks and functions when read response is complete
+ dram->respondEvent(mem_pkt->rank);
+ }
+
+ if (mem_pkt->burstHelper) {
+ // it is a split packet
+ mem_pkt->burstHelper->burstsServiced++;
+ if (mem_pkt->burstHelper->burstsServiced ==
+ mem_pkt->burstHelper->burstCount) {
+ // we have now serviced all children packets of a system packet
+ // so we can now respond to the requester
+ // @todo we probably want to have a different front end and back
+ // end latency for split packets
+ accessAndRespond(mem_pkt->pkt, frontendLatency + backendLatency);
+ delete mem_pkt->burstHelper;
+ mem_pkt->burstHelper = NULL;
+ }
+ } else {
+ // it is not a split packet
+ accessAndRespond(mem_pkt->pkt, frontendLatency + backendLatency);
+ }
+
+ delete respQueue.front();
+ respQueue.pop_front();
+
+ if (!respQueue.empty()) {
+ assert(respQueue.front()->readyTime >= curTick());
+ assert(!respondEvent.scheduled());
+ schedule(respondEvent, respQueue.front()->readyTime);
+ } else {
+ // if there is nothing left in any queue, signal a drain
+ if (drainState() == DrainState::Draining &&
+ !totalWriteQueueSize && !totalReadQueueSize &&
+ allIntfDrained()) {
+
+ DPRINTF(Drain, "Controller done draining\n");
+ signalDrainDone();
+ } else if (mem_pkt->isDram()) {
+ // check the refresh state and kick the refresh event loop
+ // into action again if banks already closed and just waiting
+ // for read to complete
+ dram->checkRefreshState(mem_pkt->rank);
+ }
+ }
+
+ // We have made a location in the queue available at this point,
+ // so if there is a read that was forced to wait, retry now
+ if (retryRdReq) {
+ retryRdReq = false;
+ port.sendRetryReq();
+ }
+}
+
+MemPacketQueue::iterator
+MemCtrl::chooseNext(MemPacketQueue& queue, Tick extra_col_delay)
+{
+ // This method does the arbitration between requests.
+
+ MemPacketQueue::iterator ret = queue.end();
+
+ if (!queue.empty()) {
+ if (queue.size() == 1) {
+ // available rank corresponds to state refresh idle
+ MemPacket* mem_pkt = *(queue.begin());
+ if (packetReady(mem_pkt)) {
+ ret = queue.begin();
+ DPRINTF(MemCtrl, "Single request, going to a free rank\n");
+ } else {
+ DPRINTF(MemCtrl, "Single request, going to a busy rank\n");
+ }
+ } else if (memSchedPolicy == Enums::fcfs) {
+ // check if there is a packet going to a free rank
+ for (auto i = queue.begin(); i != queue.end(); ++i) {
+ MemPacket* mem_pkt = *i;
+ if (packetReady(mem_pkt)) {
+ ret = i;
+ break;
+ }
+ }
+ } else if (memSchedPolicy == Enums::frfcfs) {
+ ret = chooseNextFRFCFS(queue, extra_col_delay);
+ } else {
+ panic("No scheduling policy chosen\n");
+ }
+ }
+ return ret;
+}
+
+MemPacketQueue::iterator
+MemCtrl::chooseNextFRFCFS(MemPacketQueue& queue, Tick extra_col_delay)
+{
+ auto selected_pkt_it = queue.end();
+ Tick col_allowed_at = MaxTick;
+
+ // time we need to issue a column command to be seamless
+ const Tick min_col_at = std::max(nextBurstAt + extra_col_delay, curTick());
+
+ // find optimal packet for each interface
+ if (dram && nvm) {
+ // create 2nd set of parameters for NVM
+ auto nvm_pkt_it = queue.end();
+ Tick nvm_col_at = MaxTick;
+
+ // Select packet by default to give priority if both
+ // can issue at the same time or seamlessly
+ std::tie(selected_pkt_it, col_allowed_at) =
+ dram->chooseNextFRFCFS(queue, min_col_at);
+ std::tie(nvm_pkt_it, nvm_col_at) =
+ nvm->chooseNextFRFCFS(queue, min_col_at);
+
+ // Compare DRAM and NVM and select NVM if it can issue
+ // earlier than the DRAM packet
+ if (col_allowed_at > nvm_col_at) {
+ selected_pkt_it = nvm_pkt_it;
+ }
+ } else if (dram) {
+ std::tie(selected_pkt_it, col_allowed_at) =
+ dram->chooseNextFRFCFS(queue, min_col_at);
+ } else if (nvm) {
+ std::tie(selected_pkt_it, col_allowed_at) =
+ nvm->chooseNextFRFCFS(queue, min_col_at);
+ }
+
+ if (selected_pkt_it == queue.end()) {
+ DPRINTF(MemCtrl, "%s no available packets found\n", __func__);
+ }
+
+ return selected_pkt_it;
+}
+
+void
+MemCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency)
+{
+ DPRINTF(MemCtrl, "Responding to Address %lld.. \n",pkt->getAddr());
+
+ bool needsResponse = pkt->needsResponse();
+ // do the actual memory access which also turns the packet into a
+ // response
+ if (dram && dram->getAddrRange().contains(pkt->getAddr())) {
+ dram->access(pkt);
+ } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) {
+ nvm->access(pkt);
+ } else {
+ panic("Can't handle address range for packet %s\n",
+ pkt->print());
+ }
+
+ // turn packet around to go back to requester if response expected
+ if (needsResponse) {
+ // access already turned the packet into a response
+ assert(pkt->isResponse());
+ // response_time consumes the static latency and is charged also
+ // with headerDelay that takes into account the delay provided by
+ // the xbar and also the payloadDelay that takes into account the
+ // number of data beats.
+ Tick response_time = curTick() + static_latency + pkt->headerDelay +
+ pkt->payloadDelay;
+ // Here we reset the timing of the packet before sending it out.
+ pkt->headerDelay = pkt->payloadDelay = 0;
+
+ // queue the packet in the response queue to be sent out after
+ // the static latency has passed
+ port.schedTimingResp(pkt, response_time);
+ } else {
+ // @todo the packet is going to be deleted, and the MemPacket
+ // is still having a pointer to it
+ pendingDelete.reset(pkt);
+ }
+
+ DPRINTF(MemCtrl, "Done\n");
+
+ return;
+}
+
+void
+MemCtrl::pruneBurstTick()
+{
+ auto it = burstTicks.begin();
+ while (it != burstTicks.end()) {
+ auto current_it = it++;
+ if (curTick() > *current_it) {
+ DPRINTF(MemCtrl, "Removing burstTick for %d\n", *current_it);
+ burstTicks.erase(current_it);
+ }
+ }
+}
+
+Tick
+MemCtrl::getBurstWindow(Tick cmd_tick)
+{
+ // get tick aligned to burst window
+ Tick burst_offset = cmd_tick % commandWindow;
+ return (cmd_tick - burst_offset);
+}
+
+Tick
+MemCtrl::verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst)
+{
+ // start with assumption that there is no contention on command bus
+ Tick cmd_at = cmd_tick;
+
+ // get tick aligned to burst window
+ Tick burst_tick = getBurstWindow(cmd_tick);
+
+ // verify that we have command bandwidth to issue the command
+ // if not, iterate over next window(s) until slot found
+ while (burstTicks.count(burst_tick) >= max_cmds_per_burst) {
+ DPRINTF(MemCtrl, "Contention found on command bus at %d\n",
+ burst_tick);
+ burst_tick += commandWindow;
+ cmd_at = burst_tick;
+ }
+
+ // add command into burst window and return corresponding Tick
+ burstTicks.insert(burst_tick);
+ return cmd_at;
+}
+
+Tick
+MemCtrl::verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst,
+ Tick max_multi_cmd_split)
+{
+ // start with assumption that there is no contention on command bus
+ Tick cmd_at = cmd_tick;
+
+ // get tick aligned to burst window
+ Tick burst_tick = getBurstWindow(cmd_tick);
+
+ // Command timing requirements are from 2nd command
+ // Start with assumption that 2nd command will issue at cmd_at and
+ // find prior slot for 1st command to issue
+ // Given a maximum latency of max_multi_cmd_split between the commands,
+ // find the burst at the maximum latency prior to cmd_at
+ Tick burst_offset = 0;
+ Tick first_cmd_offset = cmd_tick % commandWindow;
+ while (max_multi_cmd_split > (first_cmd_offset + burst_offset)) {
+ burst_offset += commandWindow;
+ }
+ // get the earliest burst aligned address for first command
+ // ensure that the time does not go negative
+ Tick first_cmd_tick = burst_tick - std::min(burst_offset, burst_tick);
+
+ // Can required commands issue?
+ bool first_can_issue = false;
+ bool second_can_issue = false;
+ // verify that we have command bandwidth to issue the command(s)
+ while (!first_can_issue || !second_can_issue) {
+ bool same_burst = (burst_tick == first_cmd_tick);
+ auto first_cmd_count = burstTicks.count(first_cmd_tick);
+ auto second_cmd_count = same_burst ? first_cmd_count + 1 :
+ burstTicks.count(burst_tick);
+
+ first_can_issue = first_cmd_count < max_cmds_per_burst;
+ second_can_issue = second_cmd_count < max_cmds_per_burst;
+
+ if (!second_can_issue) {
+ DPRINTF(MemCtrl, "Contention (cmd2) found on command bus at %d\n",
+ burst_tick);
+ burst_tick += commandWindow;
+ cmd_at = burst_tick;
+ }
+
+ // Verify max_multi_cmd_split isn't violated when command 2 is shifted
+ // If commands initially were issued in same burst, they are
+ // now in consecutive bursts and can still issue B2B
+ bool gap_violated = !same_burst &&
+ ((burst_tick - first_cmd_tick) > max_multi_cmd_split);
+
+ if (!first_can_issue || (!second_can_issue && gap_violated)) {
+ DPRINTF(MemCtrl, "Contention (cmd1) found on command bus at %d\n",
+ first_cmd_tick);
+ first_cmd_tick += commandWindow;
+ }
+ }
+
+ // Add command to burstTicks
+ burstTicks.insert(burst_tick);
+ burstTicks.insert(first_cmd_tick);
+
+ return cmd_at;
+}
+
+bool
+MemCtrl::inReadBusState(bool next_state) const
+{
+ // check the bus state
+ if (next_state) {
+ // use busStateNext to get the state that will be used
+ // for the next burst
+ return (busStateNext == MemCtrl::READ);
+ } else {
+ return (busState == MemCtrl::READ);
+ }
+}
+
+bool
+MemCtrl::inWriteBusState(bool next_state) const
+{
+ // check the bus state
+ if (next_state) {
+ // use busStateNext to get the state that will be used
+ // for the next burst
+ return (busStateNext == MemCtrl::WRITE);
+ } else {
+ return (busState == MemCtrl::WRITE);
+ }
+}
+
+void
+MemCtrl::doBurstAccess(MemPacket* mem_pkt)
+{
+ // first clean up the burstTick set, removing old entries
+ // before adding new entries for next burst
+ pruneBurstTick();
+
+ // When was command issued?
+ Tick cmd_at;
+
+ // Issue the next burst and update bus state to reflect
+ // when previous command was issued
+ if (mem_pkt->isDram()) {
+ std::vector<MemPacketQueue>& queue = selQueue(mem_pkt->isRead());
+ std::tie(cmd_at, nextBurstAt) =
+ dram->doBurstAccess(mem_pkt, nextBurstAt, queue);
+
+ // Update timing for NVM ranks if NVM is configured on this channel
+ if (nvm)
+ nvm->addRankToRankDelay(cmd_at);
+
+ } else {
+ std::tie(cmd_at, nextBurstAt) =
+ nvm->doBurstAccess(mem_pkt, nextBurstAt);
+
+ // Update timing for NVM ranks if NVM is configured on this channel
+ if (dram)
+ dram->addRankToRankDelay(cmd_at);
+
+ }
+
+ DPRINTF(MemCtrl, "Access to %lld, ready at %lld next burst at %lld.\n",
+ mem_pkt->addr, mem_pkt->readyTime, nextBurstAt);
+
+ // Update the minimum timing between the requests, this is a
+ // conservative estimate of when we have to schedule the next
+ // request to not introduce any unecessary bubbles. In most cases
+ // we will wake up sooner than we have to.
+ nextReqTime = nextBurstAt - (dram ? dram->commandOffset() :
+ nvm->commandOffset());
+
+
+ // Update the common bus stats
+ if (mem_pkt->isRead()) {
+ ++readsThisTime;
+ // Update latency stats
+ stats.masterReadTotalLat[mem_pkt->masterId()] +=
+ mem_pkt->readyTime - mem_pkt->entryTime;
+ stats.masterReadBytes[mem_pkt->masterId()] += mem_pkt->size;
+ } else {
+ ++writesThisTime;
+ stats.masterWriteBytes[mem_pkt->masterId()] += mem_pkt->size;
+ stats.masterWriteTotalLat[mem_pkt->masterId()] +=
+ mem_pkt->readyTime - mem_pkt->entryTime;
+ }
+}
+
+void
+MemCtrl::processNextReqEvent()
+{
+ // transition is handled by QoS algorithm if enabled
+ if (turnPolicy) {
+ // select bus state - only done if QoS algorithms are in use
+ busStateNext = selectNextBusState();
+ }
+
+ // detect bus state change
+ bool switched_cmd_type = (busState != busStateNext);
+ // record stats
+ recordTurnaroundStats();
+
+ DPRINTF(MemCtrl, "QoS Turnarounds selected state %s %s\n",
+ (busState==MemCtrl::READ)?"READ":"WRITE",
+ switched_cmd_type?"[turnaround triggered]":"");
+
+ if (switched_cmd_type) {
+ if (busState == MemCtrl::READ) {
+ DPRINTF(MemCtrl,
+ "Switching to writes after %d reads with %d reads "
+ "waiting\n", readsThisTime, totalReadQueueSize);
+ stats.rdPerTurnAround.sample(readsThisTime);
+ readsThisTime = 0;
+ } else {
+ DPRINTF(MemCtrl,
+ "Switching to reads after %d writes with %d writes "
+ "waiting\n", writesThisTime, totalWriteQueueSize);
+ stats.wrPerTurnAround.sample(writesThisTime);
+ writesThisTime = 0;
+ }
+ }
+
+ // updates current state
+ busState = busStateNext;
+
+ if (nvm) {
+ for (auto queue = readQueue.rbegin();
+ queue != readQueue.rend(); ++queue) {
+ // select non-deterministic NVM read to issue
+ // assume that we have the command bandwidth to issue this along
+ // with additional RD/WR burst with needed bank operations
+ if (nvm->readsWaitingToIssue()) {
+ // select non-deterministic NVM read to issue
+ nvm->chooseRead(*queue);
+ }
+ }
+ }
+
+ // check ranks for refresh/wakeup - uses busStateNext, so done after
+ // turnaround decisions
+ // Default to busy status and update based on interface specifics
+ bool dram_busy = dram ? dram->isBusy() : true;
+ bool nvm_busy = true;
+ bool all_writes_nvm = false;
+ if (nvm) {
+ all_writes_nvm = nvm->numWritesQueued == totalWriteQueueSize;
+ bool read_queue_empty = totalReadQueueSize == 0;
+ nvm_busy = nvm->isBusy(read_queue_empty, all_writes_nvm);
+ }
+ // Default state of unused interface is 'true'
+ // Simply AND the busy signals to determine if system is busy
+ if (dram_busy && nvm_busy) {
+ // if all ranks are refreshing wait for them to finish
+ // and stall this state machine without taking any further
+ // action, and do not schedule a new nextReqEvent
+ return;
+ }
+
+ // when we get here it is either a read or a write
+ if (busState == READ) {
+
+ // track if we should switch or not
+ bool switch_to_writes = false;
+
+ if (totalReadQueueSize == 0) {
+ // In the case there is no read request to go next,
+ // trigger writes if we have passed the low threshold (or
+ // if we are draining)
+ if (!(totalWriteQueueSize == 0) &&
+ (drainState() == DrainState::Draining ||
+ totalWriteQueueSize > writeLowThreshold)) {
+
+ DPRINTF(MemCtrl,
+ "Switching to writes due to read queue empty\n");
+ switch_to_writes = true;
+ } else {
+ // check if we are drained
+ // not done draining until in PWR_IDLE state
+ // ensuring all banks are closed and
+ // have exited low power states
+ if (drainState() == DrainState::Draining &&
+ respQueue.empty() && allIntfDrained()) {
+
+ DPRINTF(Drain, "MemCtrl controller done draining\n");
+ signalDrainDone();
+ }
+
+ // nothing to do, not even any point in scheduling an
+ // event for the next request
+ return;
+ }
+ } else {
+
+ bool read_found = false;
+ MemPacketQueue::iterator to_read;
+ uint8_t prio = numPriorities();
+
+ for (auto queue = readQueue.rbegin();
+ queue != readQueue.rend(); ++queue) {
+
+ prio--;
+
+ DPRINTF(QOS,
+ "Checking READ queue [%d] priority [%d elements]\n",
+ prio, queue->size());
+
+ // Figure out which read request goes next
+ // If we are changing command type, incorporate the minimum
+ // bus turnaround delay which will be rank to rank delay
+ to_read = chooseNext((*queue), switched_cmd_type ?
+ minWriteToReadDataGap() : 0);
+
+ if (to_read != queue->end()) {
+ // candidate read found
+ read_found = true;
+ break;
+ }
+ }
+
+ // if no read to an available rank is found then return
+ // at this point. There could be writes to the available ranks
+ // which are above the required threshold. However, to
+ // avoid adding more complexity to the code, return and wait
+ // for a refresh event to kick things into action again.
+ if (!read_found) {
+ DPRINTF(MemCtrl, "No Reads Found - exiting\n");
+ return;
+ }
+
+ auto mem_pkt = *to_read;
+
+ doBurstAccess(mem_pkt);
+
+ // sanity check
+ assert(mem_pkt->size <= (mem_pkt->isDram() ?
+ dram->bytesPerBurst() :
+ nvm->bytesPerBurst()) );
+ assert(mem_pkt->readyTime >= curTick());
+
+ // log the response
+ logResponse(MemCtrl::READ, (*to_read)->masterId(),
+ mem_pkt->qosValue(), mem_pkt->getAddr(), 1,
+ mem_pkt->readyTime - mem_pkt->entryTime);
+
+
+ // Insert into response queue. It will be sent back to the
+ // requester at its readyTime
+ if (respQueue.empty()) {
+ assert(!respondEvent.scheduled());
+ schedule(respondEvent, mem_pkt->readyTime);
+ } else {
+ assert(respQueue.back()->readyTime <= mem_pkt->readyTime);
+ assert(respondEvent.scheduled());
+ }
+
+ respQueue.push_back(mem_pkt);
+
+ // we have so many writes that we have to transition
+ // don't transition if the writeRespQueue is full and
+ // there are no other writes that can issue
+ if ((totalWriteQueueSize > writeHighThreshold) &&
+ !(nvm && all_writes_nvm && nvm->writeRespQueueFull())) {
+ switch_to_writes = true;
+ }
+
+ // remove the request from the queue
+ // the iterator is no longer valid .
+ readQueue[mem_pkt->qosValue()].erase(to_read);
+ }
+
+ // switching to writes, either because the read queue is empty
+ // and the writes have passed the low threshold (or we are
+ // draining), or because the writes hit the hight threshold
+ if (switch_to_writes) {
+ // transition to writing
+ busStateNext = WRITE;
+ }
+ } else {
+
+ bool write_found = false;
+ MemPacketQueue::iterator to_write;
+ uint8_t prio = numPriorities();
+
+ for (auto queue = writeQueue.rbegin();
+ queue != writeQueue.rend(); ++queue) {
+
+ prio--;
+
+ DPRINTF(QOS,
+ "Checking WRITE queue [%d] priority [%d elements]\n",
+ prio, queue->size());
+
+ // If we are changing command type, incorporate the minimum
+ // bus turnaround delay
+ to_write = chooseNext((*queue),
+ switched_cmd_type ? minReadToWriteDataGap() : 0);
+
+ if (to_write != queue->end()) {
+ write_found = true;
+ break;
+ }
+ }
+
+ // if there are no writes to a rank that is available to service
+ // requests (i.e. rank is in refresh idle state) are found then
+ // return. There could be reads to the available ranks. However, to
+ // avoid adding more complexity to the code, return at this point and
+ // wait for a refresh event to kick things into action again.
+ if (!write_found) {
+ DPRINTF(MemCtrl, "No Writes Found - exiting\n");
+ return;
+ }
+
+ auto mem_pkt = *to_write;
+
+ // sanity check
+ assert(mem_pkt->size <= (mem_pkt->isDram() ?
+ dram->bytesPerBurst() :
+ nvm->bytesPerBurst()) );
+
+ doBurstAccess(mem_pkt);
+
+ isInWriteQueue.erase(burstAlign(mem_pkt->addr, mem_pkt->isDram()));
+
+ // log the response
+ logResponse(MemCtrl::WRITE, mem_pkt->masterId(),
+ mem_pkt->qosValue(), mem_pkt->getAddr(), 1,
+ mem_pkt->readyTime - mem_pkt->entryTime);
+
+
+ // remove the request from the queue - the iterator is no longer valid
+ writeQueue[mem_pkt->qosValue()].erase(to_write);
+
+ delete mem_pkt;
+
+ // If we emptied the write queue, or got sufficiently below the
+ // threshold (using the minWritesPerSwitch as the hysteresis) and
+ // are not draining, or we have reads waiting and have done enough
+ // writes, then switch to reads.
+ // If we are interfacing to NVM and have filled the writeRespQueue,
+ // with only NVM writes in Q, then switch to reads
+ bool below_threshold =
+ totalWriteQueueSize + minWritesPerSwitch < writeLowThreshold;
+
+ if (totalWriteQueueSize == 0 ||
+ (below_threshold && drainState() != DrainState::Draining) ||
+ (totalReadQueueSize && writesThisTime >= minWritesPerSwitch) ||
+ (totalReadQueueSize && nvm && nvm->writeRespQueueFull() &&
+ all_writes_nvm)) {
+
+ // turn the bus back around for reads again
+ busStateNext = MemCtrl::READ;
+
+ // note that the we switch back to reads also in the idle
+ // case, which eventually will check for any draining and
+ // also pause any further scheduling if there is really
+ // nothing to do
+ }
+ }
+ // It is possible that a refresh to another rank kicks things back into
+ // action before reaching this point.
+ if (!nextReqEvent.scheduled())
+ schedule(nextReqEvent, std::max(nextReqTime, curTick()));
+
+ // If there is space available and we have writes waiting then let
+ // them retry. This is done here to ensure that the retry does not
+ // cause a nextReqEvent to be scheduled before we do so as part of
+ // the next request processing
+ if (retryWrReq && totalWriteQueueSize < writeBufferSize) {
+ retryWrReq = false;
+ port.sendRetryReq();
+ }
+}
+
+bool
+MemCtrl::packetReady(MemPacket* pkt)
+{
+ return (pkt->isDram() ?
+ dram->burstReady(pkt) : nvm->burstReady(pkt));
+}
+
+Tick
+MemCtrl::minReadToWriteDataGap()
+{
+ Tick dram_min = dram ? dram->minReadToWriteDataGap() : MaxTick;
+ Tick nvm_min = nvm ? nvm->minReadToWriteDataGap() : MaxTick;
+ return std::min(dram_min, nvm_min);
+}
+
+Tick
+MemCtrl::minWriteToReadDataGap()
+{
+ Tick dram_min = dram ? dram->minWriteToReadDataGap() : MaxTick;
+ Tick nvm_min = nvm ? nvm->minWriteToReadDataGap() : MaxTick;
+ return std::min(dram_min, nvm_min);
+}
+
+Addr
+MemCtrl::burstAlign(Addr addr, bool is_dram) const
+{
+ if (is_dram)
+ return (addr & ~(Addr(dram->bytesPerBurst() - 1)));
+ else
+ return (addr & ~(Addr(nvm->bytesPerBurst() - 1)));
+}
+
+MemCtrl::CtrlStats::CtrlStats(MemCtrl &_ctrl)
+ : Stats::Group(&_ctrl),
+ ctrl(_ctrl),
+
+ ADD_STAT(readReqs, "Number of read requests accepted"),
+ ADD_STAT(writeReqs, "Number of write requests accepted"),
+
+ ADD_STAT(readBursts,
+ "Number of controller read bursts, "
+ "including those serviced by the write queue"),
+ ADD_STAT(writeBursts,
+ "Number of controller write bursts, "
+ "including those merged in the write queue"),
+ ADD_STAT(servicedByWrQ,
+ "Number of controller read bursts serviced by the write queue"),
+ ADD_STAT(mergedWrBursts,
+ "Number of controller write bursts merged with an existing one"),
+
+ ADD_STAT(neitherReadNorWriteReqs,
+ "Number of requests that are neither read nor write"),
+
+ ADD_STAT(avgRdQLen, "Average read queue length when enqueuing"),
+ ADD_STAT(avgWrQLen, "Average write queue length when enqueuing"),
+
+ ADD_STAT(numRdRetry, "Number of times read queue was full causing retry"),
+ ADD_STAT(numWrRetry, "Number of times write queue was full causing retry"),
+
+ ADD_STAT(readPktSize, "Read request sizes (log2)"),
+ ADD_STAT(writePktSize, "Write request sizes (log2)"),
+
+ ADD_STAT(rdQLenPdf, "What read queue length does an incoming req see"),
+ ADD_STAT(wrQLenPdf, "What write queue length does an incoming req see"),
+
+ ADD_STAT(rdPerTurnAround,
+ "Reads before turning the bus around for writes"),
+ ADD_STAT(wrPerTurnAround,
+ "Writes before turning the bus around for reads"),
+
+ ADD_STAT(bytesReadWrQ, "Total number of bytes read from write queue"),
+ ADD_STAT(bytesReadSys, "Total read bytes from the system interface side"),
+ ADD_STAT(bytesWrittenSys,
+ "Total written bytes from the system interface side"),
+
+ ADD_STAT(avgRdBWSys, "Average system read bandwidth in MiByte/s"),
+ ADD_STAT(avgWrBWSys, "Average system write bandwidth in MiByte/s"),
+
+ ADD_STAT(totGap, "Total gap between requests"),
+ ADD_STAT(avgGap, "Average gap between requests"),
+
+ ADD_STAT(masterReadBytes, "Per-master bytes read from memory"),
+ ADD_STAT(masterWriteBytes, "Per-master bytes write to memory"),
+ ADD_STAT(masterReadRate,
+ "Per-master bytes read from memory rate (Bytes/sec)"),
+ ADD_STAT(masterWriteRate,
+ "Per-master bytes write to memory rate (Bytes/sec)"),
+ ADD_STAT(masterReadAccesses,
+ "Per-master read serviced memory accesses"),
+ ADD_STAT(masterWriteAccesses,
+ "Per-master write serviced memory accesses"),
+ ADD_STAT(masterReadTotalLat,
+ "Per-master read total memory access latency"),
+ ADD_STAT(masterWriteTotalLat,
+ "Per-master write total memory access latency"),
+ ADD_STAT(masterReadAvgLat,
+ "Per-master read average memory access latency"),
+ ADD_STAT(masterWriteAvgLat,
+ "Per-master write average memory access latency")
+
+{
+}
+
+void
+MemCtrl::CtrlStats::regStats()
+{
+ using namespace Stats;
+
+ assert(ctrl.system());
+ const auto max_masters = ctrl.system()->maxMasters();
+
+ avgRdQLen.precision(2);
+ avgWrQLen.precision(2);
+
+ readPktSize.init(ceilLog2(ctrl.system()->cacheLineSize()) + 1);
+ writePktSize.init(ceilLog2(ctrl.system()->cacheLineSize()) + 1);
+
+ rdQLenPdf.init(ctrl.readBufferSize);
+ wrQLenPdf.init(ctrl.writeBufferSize);
+
+ rdPerTurnAround
+ .init(ctrl.readBufferSize)
+ .flags(nozero);
+ wrPerTurnAround
+ .init(ctrl.writeBufferSize)
+ .flags(nozero);
+
+ avgRdBWSys.precision(2);
+ avgWrBWSys.precision(2);
+ avgGap.precision(2);
+
+ // per-master bytes read and written to memory
+ masterReadBytes
+ .init(max_masters)
+ .flags(nozero | nonan);
+
+ masterWriteBytes
+ .init(max_masters)
+ .flags(nozero | nonan);
+
+ // per-master bytes read and written to memory rate
+ masterReadRate
+ .flags(nozero | nonan)
+ .precision(12);
+
+ masterReadAccesses
+ .init(max_masters)
+ .flags(nozero);
+
+ masterWriteAccesses
+ .init(max_masters)
+ .flags(nozero);
+
+ masterReadTotalLat
+ .init(max_masters)
+ .flags(nozero | nonan);
+
+ masterReadAvgLat
+ .flags(nonan)
+ .precision(2);
+
+ masterWriteRate
+ .flags(nozero | nonan)
+ .precision(12);
+
+ masterWriteTotalLat
+ .init(max_masters)
+ .flags(nozero | nonan);
+
+ masterWriteAvgLat
+ .flags(nonan)
+ .precision(2);
+
+ for (int i = 0; i < max_masters; i++) {
+ const std::string master = ctrl.system()->getMasterName(i);
+ masterReadBytes.subname(i, master);
+ masterReadRate.subname(i, master);
+ masterWriteBytes.subname(i, master);
+ masterWriteRate.subname(i, master);
+ masterReadAccesses.subname(i, master);
+ masterWriteAccesses.subname(i, master);
+ masterReadTotalLat.subname(i, master);
+ masterReadAvgLat.subname(i, master);
+ masterWriteTotalLat.subname(i, master);
+ masterWriteAvgLat.subname(i, master);
+ }
+
+ // Formula stats
+ avgRdBWSys = (bytesReadSys / 1000000) / simSeconds;
+ avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds;
+
+ avgGap = totGap / (readReqs + writeReqs);
+
+ masterReadRate = masterReadBytes / simSeconds;
+ masterWriteRate = masterWriteBytes / simSeconds;
+ masterReadAvgLat = masterReadTotalLat / masterReadAccesses;
+ masterWriteAvgLat = masterWriteTotalLat / masterWriteAccesses;
+}
+
+void
+MemCtrl::recvFunctional(PacketPtr pkt)
+{
+ if (dram && dram->getAddrRange().contains(pkt->getAddr())) {
+ // rely on the abstract memory
+ dram->functionalAccess(pkt);
+ } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) {
+ // rely on the abstract memory
+ nvm->functionalAccess(pkt);
+ } else {
+ panic("Can't handle address range for packet %s\n",
+ pkt->print());
+ }
+}
+
+Port &
+MemCtrl::getPort(const string &if_name, PortID idx)
+{
+ if (if_name != "port") {
+ return QoS::MemCtrl::getPort(if_name, idx);
+ } else {
+ return port;
+ }
+}
+
+bool
+MemCtrl::allIntfDrained() const
+{
+ // ensure dram is in power down and refresh IDLE states
+ bool dram_drained = !dram || dram->allRanksDrained();
+ // No outstanding NVM writes
+ // All other queues verified as needed with calling logic
+ bool nvm_drained = !nvm || nvm->allRanksDrained();
+ return (dram_drained && nvm_drained);
+}
+
+DrainState
+MemCtrl::drain()
+{
+ // if there is anything in any of our internal queues, keep track
+ // of that as well
+ if (!(!totalWriteQueueSize && !totalReadQueueSize && respQueue.empty() &&
+ allIntfDrained())) {
+
+ DPRINTF(Drain, "Memory controller not drained, write: %d, read: %d,"
+ " resp: %d\n", totalWriteQueueSize, totalReadQueueSize,
+ respQueue.size());
+
+ // the only queue that is not drained automatically over time
+ // is the write queue, thus kick things into action if needed
+ if (!totalWriteQueueSize && !nextReqEvent.scheduled()) {
+ schedule(nextReqEvent, curTick());
+ }
+
+ if (dram)
+ dram->drainRanks();
+
+ return DrainState::Draining;
+ } else {
+ return DrainState::Drained;
+ }
+}
+
+void
+MemCtrl::drainResume()
+{
+ if (!isTimingMode && system()->isTimingMode()) {
+ // if we switched to timing mode, kick things into action,
+ // and behave as if we restored from a checkpoint
+ startup();
+ dram->startup();
+ } else if (isTimingMode && !system()->isTimingMode()) {
+ // if we switch from timing mode, stop the refresh events to
+ // not cause issues with KVM
+ if (dram)
+ dram->suspend();
+ }
+
+ // update the mode
+ isTimingMode = system()->isTimingMode();
+}
+
+MemCtrl::MemoryPort::MemoryPort(const std::string& name, MemCtrl& _ctrl)
+ : QueuedSlavePort(name, &_ctrl, queue), queue(_ctrl, *this, true),
+ ctrl(_ctrl)
+{ }
+
+AddrRangeList
+MemCtrl::MemoryPort::getAddrRanges() const
+{
+ AddrRangeList ranges;
+ if (ctrl.dram) {
+ DPRINTF(DRAM, "Pushing DRAM ranges to port\n");
+ ranges.push_back(ctrl.dram->getAddrRange());
+ }
+ if (ctrl.nvm) {
+ DPRINTF(NVM, "Pushing NVM ranges to port\n");
+ ranges.push_back(ctrl.nvm->getAddrRange());
+ }
+ return ranges;
+}
+
+void
+MemCtrl::MemoryPort::recvFunctional(PacketPtr pkt)
+{
+ pkt->pushLabel(ctrl.name());
+
+ if (!queue.trySatisfyFunctional(pkt)) {
+ // Default implementation of SimpleTimingPort::recvFunctional()
+ // calls recvAtomic() and throws away the latency; we can save a
+ // little here by just not calculating the latency.
+ ctrl.recvFunctional(pkt);
+ }
+
+ pkt->popLabel();
+}
+
+Tick
+MemCtrl::MemoryPort::recvAtomic(PacketPtr pkt)
+{
+ return ctrl.recvAtomic(pkt);
+}
+
+bool
+MemCtrl::MemoryPort::recvTimingReq(PacketPtr pkt)
+{
+ // pass it to the memory controller
+ return ctrl.recvTimingReq(pkt);
+}
+
+MemCtrl*
+MemCtrlParams::create()
+{
+ return new MemCtrl(this);
+}
--- /dev/null
+/*
+ * Copyright (c) 2012-2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2013 Amin Farmahini-Farahani
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * MemCtrl declaration
+ */
+
+#ifndef __MEM_CTRL_HH__
+#define __MEM_CTRL_HH__
+
+#include <deque>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "base/callback.hh"
+#include "base/statistics.hh"
+#include "enums/MemSched.hh"
+#include "mem/qos/mem_ctrl.hh"
+#include "mem/qport.hh"
+#include "params/MemCtrl.hh"
+#include "sim/eventq.hh"
+
+class DRAMInterface;
+class NVMInterface;
+
+/**
+ * A burst helper helps organize and manage a packet that is larger than
+ * the memory burst size. A system packet that is larger than the burst size
+ * is split into multiple packets and all those packets point to
+ * a single burst helper such that we know when the whole packet is served.
+ */
+class BurstHelper
+{
+ public:
+
+ /** Number of bursts requred for a system packet **/
+ const unsigned int burstCount;
+
+ /** Number of bursts serviced so far for a system packet **/
+ unsigned int burstsServiced;
+
+ BurstHelper(unsigned int _burstCount)
+ : burstCount(_burstCount), burstsServiced(0)
+ { }
+};
+
+/**
+ * A memory packet stores packets along with the timestamp of when
+ * the packet entered the queue, and also the decoded address.
+ */
+class MemPacket
+{
+ public:
+
+ /** When did request enter the controller */
+ const Tick entryTime;
+
+ /** When will request leave the controller */
+ Tick readyTime;
+
+ /** This comes from the outside world */
+ const PacketPtr pkt;
+
+ /** MasterID associated with the packet */
+ const MasterID _masterId;
+
+ const bool read;
+
+ /** Does this packet access DRAM?*/
+ const bool dram;
+
+ /** Will be populated by address decoder */
+ const uint8_t rank;
+ const uint8_t bank;
+ const uint32_t row;
+
+ /**
+ * Bank id is calculated considering banks in all the ranks
+ * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and
+ * bankId = 8 --> rank1, bank0
+ */
+ const uint16_t bankId;
+
+ /**
+ * The starting address of the packet.
+ * This address could be unaligned to burst size boundaries. The
+ * reason is to keep the address offset so we can accurately check
+ * incoming read packets with packets in the write queue.
+ */
+ Addr addr;
+
+ /**
+ * The size of this dram packet in bytes
+ * It is always equal or smaller than the burst size
+ */
+ unsigned int size;
+
+ /**
+ * A pointer to the BurstHelper if this MemPacket is a split packet
+ * If not a split packet (common case), this is set to NULL
+ */
+ BurstHelper* burstHelper;
+
+ /**
+ * QoS value of the encapsulated packet read at queuing time
+ */
+ uint8_t _qosValue;
+
+ /**
+ * Set the packet QoS value
+ * (interface compatibility with Packet)
+ */
+ inline void qosValue(const uint8_t qv) { _qosValue = qv; }
+
+ /**
+ * Get the packet QoS value
+ * (interface compatibility with Packet)
+ */
+ inline uint8_t qosValue() const { return _qosValue; }
+
+ /**
+ * Get the packet MasterID
+ * (interface compatibility with Packet)
+ */
+ inline MasterID masterId() const { return _masterId; }
+
+ /**
+ * Get the packet size
+ * (interface compatibility with Packet)
+ */
+ inline unsigned int getSize() const { return size; }
+
+ /**
+ * Get the packet address
+ * (interface compatibility with Packet)
+ */
+ inline Addr getAddr() const { return addr; }
+
+ /**
+ * Return true if its a read packet
+ * (interface compatibility with Packet)
+ */
+ inline bool isRead() const { return read; }
+
+ /**
+ * Return true if its a write packet
+ * (interface compatibility with Packet)
+ */
+ inline bool isWrite() const { return !read; }
+
+ /**
+ * Return true if its a DRAM access
+ */
+ inline bool isDram() const { return dram; }
+
+ MemPacket(PacketPtr _pkt, bool is_read, bool is_dram, uint8_t _rank,
+ uint8_t _bank, uint32_t _row, uint16_t bank_id, Addr _addr,
+ unsigned int _size)
+ : entryTime(curTick()), readyTime(curTick()), pkt(_pkt),
+ _masterId(pkt->masterId()),
+ read(is_read), dram(is_dram), rank(_rank), bank(_bank), row(_row),
+ bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL),
+ _qosValue(_pkt->qosValue())
+ { }
+
+};
+
+// The memory packets are store in a multiple dequeue structure,
+// based on their QoS priority
+typedef std::deque<MemPacket*> MemPacketQueue;
+
+
+/**
+ * The memory controller is a single-channel memory controller capturing
+ * the most important timing constraints associated with a
+ * contemporary controller. For multi-channel memory systems, the controller
+ * is combined with a crossbar model, with the channel address
+ * interleaving taking part in the crossbar.
+ *
+ * As a basic design principle, this controller
+ * model is not cycle callable, but instead uses events to: 1) decide
+ * when new decisions can be made, 2) when resources become available,
+ * 3) when things are to be considered done, and 4) when to send
+ * things back. The controller interfaces to media specific interfaces
+ * to enable flexible topoloties.
+ * Through these simple principles, the model delivers
+ * high performance, and lots of flexibility, allowing users to
+ * evaluate the system impact of a wide range of memory technologies.
+ *
+ * For more details, please see Hansson et al, "Simulating DRAM
+ * controllers for future system architecture exploration",
+ * Proc. ISPASS, 2014. If you use this model as part of your research
+ * please cite the paper.
+ *
+ */
+class MemCtrl : public QoS::MemCtrl
+{
+ private:
+
+ // For now, make use of a queued slave port to avoid dealing with
+ // flow control for the responses being sent back
+ class MemoryPort : public QueuedSlavePort
+ {
+
+ RespPacketQueue queue;
+ MemCtrl& ctrl;
+
+ public:
+
+ MemoryPort(const std::string& name, MemCtrl& _ctrl);
+
+ protected:
+
+ Tick recvAtomic(PacketPtr pkt);
+
+ void recvFunctional(PacketPtr pkt);
+
+ bool recvTimingReq(PacketPtr);
+
+ virtual AddrRangeList getAddrRanges() const;
+
+ };
+
+ /**
+ * Our incoming port, for a multi-ported controller add a crossbar
+ * in front of it
+ */
+ MemoryPort port;
+
+ /**
+ * Remember if the memory system is in timing mode
+ */
+ bool isTimingMode;
+
+ /**
+ * Remember if we have to retry a request when available.
+ */
+ bool retryRdReq;
+ bool retryWrReq;
+
+ /**
+ * Bunch of things requires to setup "events" in gem5
+ * When event "respondEvent" occurs for example, the method
+ * processRespondEvent is called; no parameters are allowed
+ * in these methods
+ */
+ void processNextReqEvent();
+ EventFunctionWrapper nextReqEvent;
+
+ void processRespondEvent();
+ EventFunctionWrapper respondEvent;
+
+ /**
+ * Check if the read queue has room for more entries
+ *
+ * @param pkt_count The number of entries needed in the read queue
+ * @return true if read queue is full, false otherwise
+ */
+ bool readQueueFull(unsigned int pkt_count) const;
+
+ /**
+ * Check if the write queue has room for more entries
+ *
+ * @param pkt_count The number of entries needed in the write queue
+ * @return true if write queue is full, false otherwise
+ */
+ bool writeQueueFull(unsigned int pkt_count) const;
+
+ /**
+ * When a new read comes in, first check if the write q has a
+ * pending request to the same address.\ If not, decode the
+ * address to populate rank/bank/row, create one or mutliple
+ * "mem_pkt", and push them to the back of the read queue.\
+ * If this is the only
+ * read request in the system, schedule an event to start
+ * servicing it.
+ *
+ * @param pkt The request packet from the outside world
+ * @param pkt_count The number of memory bursts the pkt
+ * @param is_dram Does this packet access DRAM?
+ * translate to. If pkt size is larger then one full burst,
+ * then pkt_count is greater than one.
+ */
+ void addToReadQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram);
+
+ /**
+ * Decode the incoming pkt, create a mem_pkt and push to the
+ * back of the write queue. \If the write q length is more than
+ * the threshold specified by the user, ie the queue is beginning
+ * to get full, stop reads, and start draining writes.
+ *
+ * @param pkt The request packet from the outside world
+ * @param pkt_count The number of memory bursts the pkt
+ * @param is_dram Does this packet access DRAM?
+ * translate to. If pkt size is larger then one full burst,
+ * then pkt_count is greater than one.
+ */
+ void addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram);
+
+ /**
+ * Actually do the burst based on media specific access function.
+ * Update bus statistics when complete.
+ *
+ * @param mem_pkt The memory packet created from the outside world pkt
+ */
+ void doBurstAccess(MemPacket* mem_pkt);
+
+ /**
+ * When a packet reaches its "readyTime" in the response Q,
+ * use the "access()" method in AbstractMemory to actually
+ * create the response packet, and send it back to the outside
+ * world requestor.
+ *
+ * @param pkt The packet from the outside world
+ * @param static_latency Static latency to add before sending the packet
+ */
+ void accessAndRespond(PacketPtr pkt, Tick static_latency);
+
+ /**
+ * Determine if there is a packet that can issue.
+ *
+ * @param pkt The packet to evaluate
+ */
+ bool packetReady(MemPacket* pkt);
+
+ /**
+ * Calculate the minimum delay used when scheduling a read-to-write
+ * transision.
+ * @param return minimum delay
+ */
+ Tick minReadToWriteDataGap();
+
+ /**
+ * Calculate the minimum delay used when scheduling a write-to-read
+ * transision.
+ * @param return minimum delay
+ */
+ Tick minWriteToReadDataGap();
+
+ /**
+ * The memory schduler/arbiter - picks which request needs to
+ * go next, based on the specified policy such as FCFS or FR-FCFS
+ * and moves it to the head of the queue.
+ * Prioritizes accesses to the same rank as previous burst unless
+ * controller is switching command type.
+ *
+ * @param queue Queued requests to consider
+ * @param extra_col_delay Any extra delay due to a read/write switch
+ * @return an iterator to the selected packet, else queue.end()
+ */
+ MemPacketQueue::iterator chooseNext(MemPacketQueue& queue,
+ Tick extra_col_delay);
+
+ /**
+ * For FR-FCFS policy reorder the read/write queue depending on row buffer
+ * hits and earliest bursts available in memory
+ *
+ * @param queue Queued requests to consider
+ * @param extra_col_delay Any extra delay due to a read/write switch
+ * @return an iterator to the selected packet, else queue.end()
+ */
+ MemPacketQueue::iterator chooseNextFRFCFS(MemPacketQueue& queue,
+ Tick extra_col_delay);
+
+ /**
+ * Calculate burst window aligned tick
+ *
+ * @param cmd_tick Initial tick of command
+ * @return burst window aligned tick
+ */
+ Tick getBurstWindow(Tick cmd_tick);
+
+ /**
+ * Used for debugging to observe the contents of the queues.
+ */
+ void printQs() const;
+
+ /**
+ * Burst-align an address.
+ *
+ * @param addr The potentially unaligned address
+ * @param is_dram Does this packet access DRAM?
+ *
+ * @return An address aligned to a memory burst
+ */
+ Addr burstAlign(Addr addr, bool is_dram) const;
+
+ /**
+ * The controller's main read and write queues,
+ * with support for QoS reordering
+ */
+ std::vector<MemPacketQueue> readQueue;
+ std::vector<MemPacketQueue> writeQueue;
+
+ /**
+ * To avoid iterating over the write queue to check for
+ * overlapping transactions, maintain a set of burst addresses
+ * that are currently queued. Since we merge writes to the same
+ * location we never have more than one address to the same burst
+ * address.
+ */
+ std::unordered_set<Addr> isInWriteQueue;
+
+ /**
+ * Response queue where read packets wait after we're done working
+ * with them, but it's not time to send the response yet. The
+ * responses are stored separately mostly to keep the code clean
+ * and help with events scheduling. For all logical purposes such
+ * as sizing the read queue, this and the main read queue need to
+ * be added together.
+ */
+ std::deque<MemPacket*> respQueue;
+
+ /**
+ * Holds count of commands issued in burst window starting at
+ * defined Tick. This is used to ensure that the command bandwidth
+ * does not exceed the allowable media constraints.
+ */
+ std::unordered_multiset<Tick> burstTicks;
+
+ /**
+ * Create pointer to interface of the actual dram media when connected
+ */
+ DRAMInterface* const dram;
+
+ /**
+ * Create pointer to interface of the actual nvm media when connected
+ */
+ NVMInterface* const nvm;
+
+ /**
+ * The following are basic design parameters of the memory
+ * controller, and are initialized based on parameter values.
+ * The rowsPerBank is determined based on the capacity, number of
+ * ranks and banks, the burst size, and the row buffer size.
+ */
+ const uint32_t readBufferSize;
+ const uint32_t writeBufferSize;
+ const uint32_t writeHighThreshold;
+ const uint32_t writeLowThreshold;
+ const uint32_t minWritesPerSwitch;
+ uint32_t writesThisTime;
+ uint32_t readsThisTime;
+
+ /**
+ * Memory controller configuration initialized based on parameter
+ * values.
+ */
+ Enums::MemSched memSchedPolicy;
+
+ /**
+ * Pipeline latency of the controller frontend. The frontend
+ * contribution is added to writes (that complete when they are in
+ * the write buffer) and reads that are serviced the write buffer.
+ */
+ const Tick frontendLatency;
+
+ /**
+ * Pipeline latency of the backend and PHY. Along with the
+ * frontend contribution, this latency is added to reads serviced
+ * by the memory.
+ */
+ const Tick backendLatency;
+
+ /**
+ * Length of a command window, used to check
+ * command bandwidth
+ */
+ const Tick commandWindow;
+
+ /**
+ * Till when must we wait before issuing next RD/WR burst?
+ */
+ Tick nextBurstAt;
+
+ Tick prevArrival;
+
+ /**
+ * The soonest you have to start thinking about the next request
+ * is the longest access time that can occur before
+ * nextBurstAt. Assuming you need to precharge, open a new row,
+ * and access, it is tRP + tRCD + tCL.
+ */
+ Tick nextReqTime;
+
+ struct CtrlStats : public Stats::Group
+ {
+ CtrlStats(MemCtrl &ctrl);
+
+ void regStats() override;
+
+ MemCtrl &ctrl;
+
+ // All statistics that the model needs to capture
+ Stats::Scalar readReqs;
+ Stats::Scalar writeReqs;
+ Stats::Scalar readBursts;
+ Stats::Scalar writeBursts;
+ Stats::Scalar servicedByWrQ;
+ Stats::Scalar mergedWrBursts;
+ Stats::Scalar neitherReadNorWriteReqs;
+ // Average queue lengths
+ Stats::Average avgRdQLen;
+ Stats::Average avgWrQLen;
+
+ Stats::Scalar numRdRetry;
+ Stats::Scalar numWrRetry;
+ Stats::Vector readPktSize;
+ Stats::Vector writePktSize;
+ Stats::Vector rdQLenPdf;
+ Stats::Vector wrQLenPdf;
+ Stats::Histogram rdPerTurnAround;
+ Stats::Histogram wrPerTurnAround;
+
+ Stats::Scalar bytesReadWrQ;
+ Stats::Scalar bytesReadSys;
+ Stats::Scalar bytesWrittenSys;
+ // Average bandwidth
+ Stats::Formula avgRdBWSys;
+ Stats::Formula avgWrBWSys;
+
+ Stats::Scalar totGap;
+ Stats::Formula avgGap;
+
+ // per-master bytes read and written to memory
+ Stats::Vector masterReadBytes;
+ Stats::Vector masterWriteBytes;
+
+ // per-master bytes read and written to memory rate
+ Stats::Formula masterReadRate;
+ Stats::Formula masterWriteRate;
+
+ // per-master read and write serviced memory accesses
+ Stats::Vector masterReadAccesses;
+ Stats::Vector masterWriteAccesses;
+
+ // per-master read and write total memory access latency
+ Stats::Vector masterReadTotalLat;
+ Stats::Vector masterWriteTotalLat;
+
+ // per-master raed and write average memory access latency
+ Stats::Formula masterReadAvgLat;
+ Stats::Formula masterWriteAvgLat;
+ };
+
+ CtrlStats stats;
+
+ /**
+ * Upstream caches need this packet until true is returned, so
+ * hold it for deletion until a subsequent call
+ */
+ std::unique_ptr<Packet> pendingDelete;
+
+ /**
+ * Select either the read or write queue
+ *
+ * @param is_read The current burst is a read, select read queue
+ * @return a reference to the appropriate queue
+ */
+ std::vector<MemPacketQueue>& selQueue(bool is_read)
+ {
+ return (is_read ? readQueue : writeQueue);
+ };
+
+ /**
+ * Remove commands that have already issued from burstTicks
+ */
+ void pruneBurstTick();
+
+ public:
+
+ MemCtrl(const MemCtrlParams* p);
+
+ /**
+ * Ensure that all interfaced have drained commands
+ *
+ * @return bool flag, set once drain complete
+ */
+ bool allIntfDrained() const;
+
+ DrainState drain() override;
+
+ /**
+ * Check for command bus contention for single cycle command.
+ * If there is contention, shift command to next burst.
+ * Check verifies that the commands issued per burst is less
+ * than a defined max number, maxCommandsPerWindow.
+ * Therefore, contention per cycle is not verified and instead
+ * is done based on a burst window.
+ *
+ * @param cmd_tick Initial tick of command, to be verified
+ * @param max_cmds_per_burst Number of commands that can issue
+ * in a burst window
+ * @return tick for command issue without contention
+ */
+ Tick verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst);
+
+ /**
+ * Check for command bus contention for multi-cycle (2 currently)
+ * command. If there is contention, shift command(s) to next burst.
+ * Check verifies that the commands issued per burst is less
+ * than a defined max number, maxCommandsPerWindow.
+ * Therefore, contention per cycle is not verified and instead
+ * is done based on a burst window.
+ *
+ * @param cmd_tick Initial tick of command, to be verified
+ * @param max_multi_cmd_split Maximum delay between commands
+ * @param max_cmds_per_burst Number of commands that can issue
+ * in a burst window
+ * @return tick for command issue without contention
+ */
+ Tick verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst,
+ Tick max_multi_cmd_split = 0);
+
+ /**
+ * Is there a respondEvent scheduled?
+ *
+ * @return true if event is scheduled
+ */
+ bool respondEventScheduled() const { return respondEvent.scheduled(); }
+
+ /**
+ * Is there a read/write burst Event scheduled?
+ *
+ * @return true if event is scheduled
+ */
+ bool requestEventScheduled() const { return nextReqEvent.scheduled(); }
+
+ /**
+ * restart the controller
+ * This can be used by interfaces to restart the
+ * scheduler after maintainence commands complete
+ *
+ * @param Tick to schedule next event
+ */
+ void restartScheduler(Tick tick) { schedule(nextReqEvent, tick); }
+
+ /**
+ * Check the current direction of the memory channel
+ *
+ * @param next_state Check either the current or next bus state
+ * @return True when bus is currently in a read state
+ */
+ bool inReadBusState(bool next_state) const;
+
+ /**
+ * Check the current direction of the memory channel
+ *
+ * @param next_state Check either the current or next bus state
+ * @return True when bus is currently in a write state
+ */
+ bool inWriteBusState(bool next_state) const;
+
+ Port &getPort(const std::string &if_name,
+ PortID idx=InvalidPortID) override;
+
+ virtual void init() override;
+ virtual void startup() override;
+ virtual void drainResume() override;
+
+ protected:
+
+ Tick recvAtomic(PacketPtr pkt);
+ void recvFunctional(PacketPtr pkt);
+ bool recvTimingReq(PacketPtr pkt);
+
+};
+
+#endif //__MEM_CTRL_HH__
--- /dev/null
+/*
+ * Copyright (c) 2010-2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2013 Amin Farmahini-Farahani
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "mem/mem_interface.hh"
+
+#include "base/bitfield.hh"
+#include "base/trace.hh"
+#include "debug/DRAM.hh"
+#include "debug/DRAMPower.hh"
+#include "debug/DRAMState.hh"
+#include "debug/NVM.hh"
+#include "sim/system.hh"
+
+using namespace std;
+using namespace Data;
+
+MemInterface::MemInterface(const MemInterfaceParams* _p)
+ : AbstractMemory(_p),
+ addrMapping(_p->addr_mapping),
+ burstSize((_p->devices_per_rank * _p->burst_length *
+ _p->device_bus_width) / 8),
+ deviceSize(_p->device_size),
+ deviceRowBufferSize(_p->device_rowbuffer_size),
+ devicesPerRank(_p->devices_per_rank),
+ rowBufferSize(devicesPerRank * deviceRowBufferSize),
+ burstsPerRowBuffer(rowBufferSize / burstSize),
+ burstsPerStripe(range.interleaved() ?
+ range.granularity() / burstSize : 1),
+ ranksPerChannel(_p->ranks_per_channel),
+ banksPerRank(_p->banks_per_rank), rowsPerBank(0),
+ tCK(_p->tCK), tCS(_p->tCS), tBURST(_p->tBURST),
+ tRTW(_p->tRTW),
+ tWTR(_p->tWTR),
+ readBufferSize(_p->read_buffer_size),
+ writeBufferSize(_p->write_buffer_size)
+{}
+
+void
+MemInterface::setCtrl(MemCtrl* _ctrl, unsigned int command_window)
+{
+ ctrl = _ctrl;
+ maxCommandsPerWindow = command_window / tCK;
+}
+
+MemPacket*
+MemInterface::decodePacket(const PacketPtr pkt, Addr pkt_addr,
+ unsigned size, bool is_read, bool is_dram)
+{
+ // decode the address based on the address mapping scheme, with
+ // Ro, Ra, Co, Ba and Ch denoting row, rank, column, bank and
+ // channel, respectively
+ uint8_t rank;
+ uint8_t bank;
+ // use a 64-bit unsigned during the computations as the row is
+ // always the top bits, and check before creating the packet
+ uint64_t row;
+
+ // Get packed address, starting at 0
+ Addr addr = getCtrlAddr(pkt_addr);
+
+ // truncate the address to a memory burst, which makes it unique to
+ // a specific buffer, row, bank, rank and channel
+ addr = addr / burstSize;
+
+ // we have removed the lowest order address bits that denote the
+ // position within the column
+ if (addrMapping == Enums::RoRaBaChCo || addrMapping == Enums::RoRaBaCoCh) {
+ // the lowest order bits denote the column to ensure that
+ // sequential cache lines occupy the same row
+ addr = addr / burstsPerRowBuffer;
+
+ // after the channel bits, get the bank bits to interleave
+ // over the banks
+ bank = addr % banksPerRank;
+ addr = addr / banksPerRank;
+
+ // after the bank, we get the rank bits which thus interleaves
+ // over the ranks
+ rank = addr % ranksPerChannel;
+ addr = addr / ranksPerChannel;
+
+ // lastly, get the row bits, no need to remove them from addr
+ row = addr % rowsPerBank;
+ } else if (addrMapping == Enums::RoCoRaBaCh) {
+ // with emerging technologies, could have small page size with
+ // interleaving granularity greater than row buffer
+ if (burstsPerStripe > burstsPerRowBuffer) {
+ // remove column bits which are a subset of burstsPerStripe
+ addr = addr / burstsPerRowBuffer;
+ } else {
+ // remove lower column bits below channel bits
+ addr = addr / burstsPerStripe;
+ }
+
+ // start with the bank bits, as this provides the maximum
+ // opportunity for parallelism between requests
+ bank = addr % banksPerRank;
+ addr = addr / banksPerRank;
+
+ // next get the rank bits
+ rank = addr % ranksPerChannel;
+ addr = addr / ranksPerChannel;
+
+ // next, the higher-order column bites
+ if (burstsPerStripe < burstsPerRowBuffer) {
+ addr = addr / (burstsPerRowBuffer / burstsPerStripe);
+ }
+
+ // lastly, get the row bits, no need to remove them from addr
+ row = addr % rowsPerBank;
+ } else
+ panic("Unknown address mapping policy chosen!");
+
+ assert(rank < ranksPerChannel);
+ assert(bank < banksPerRank);
+ assert(row < rowsPerBank);
+ assert(row < Bank::NO_ROW);
+
+ DPRINTF(DRAM, "Address: %lld Rank %d Bank %d Row %d\n",
+ pkt_addr, rank, bank, row);
+
+ // create the corresponding memory packet with the entry time and
+ // ready time set to the current tick, the latter will be updated
+ // later
+ uint16_t bank_id = banksPerRank * rank + bank;
+
+ return new MemPacket(pkt, is_read, is_dram, rank, bank, row, bank_id,
+ pkt_addr, size);
+}
+
+pair<MemPacketQueue::iterator, Tick>
+DRAMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const
+{
+ vector<uint32_t> earliest_banks(ranksPerChannel, 0);
+
+ // Has minBankPrep been called to populate earliest_banks?
+ bool filled_earliest_banks = false;
+ // can the PRE/ACT sequence be done without impacting utlization?
+ bool hidden_bank_prep = false;
+
+ // search for seamless row hits first, if no seamless row hit is
+ // found then determine if there are other packets that can be issued
+ // without incurring additional bus delay due to bank timing
+ // Will select closed rows first to enable more open row possibilies
+ // in future selections
+ bool found_hidden_bank = false;
+
+ // remember if we found a row hit, not seamless, but bank prepped
+ // and ready
+ bool found_prepped_pkt = false;
+
+ // if we have no row hit, prepped or not, and no seamless packet,
+ // just go for the earliest possible
+ bool found_earliest_pkt = false;
+
+ Tick selected_col_at = MaxTick;
+ auto selected_pkt_it = queue.end();
+
+ for (auto i = queue.begin(); i != queue.end() ; ++i) {
+ MemPacket* pkt = *i;
+
+ // select optimal DRAM packet in Q
+ if (pkt->isDram()) {
+ const Bank& bank = ranks[pkt->rank]->banks[pkt->bank];
+ const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt :
+ bank.wrAllowedAt;
+
+ DPRINTF(DRAM, "%s checking DRAM packet in bank %d, row %d\n",
+ __func__, pkt->bank, pkt->row);
+
+ // check if rank is not doing a refresh and thus is available,
+ // if not, jump to the next packet
+ if (burstReady(pkt)) {
+
+ DPRINTF(DRAM,
+ "%s bank %d - Rank %d available\n", __func__,
+ pkt->bank, pkt->rank);
+
+ // check if it is a row hit
+ if (bank.openRow == pkt->row) {
+ // no additional rank-to-rank or same bank-group
+ // delays, or we switched read/write and might as well
+ // go for the row hit
+ if (col_allowed_at <= min_col_at) {
+ // FCFS within the hits, giving priority to
+ // commands that can issue seamlessly, without
+ // additional delay, such as same rank accesses
+ // and/or different bank-group accesses
+ DPRINTF(DRAM, "%s Seamless buffer hit\n", __func__);
+ selected_pkt_it = i;
+ selected_col_at = col_allowed_at;
+ // no need to look through the remaining queue entries
+ break;
+ } else if (!found_hidden_bank && !found_prepped_pkt) {
+ // if we did not find a packet to a closed row that can
+ // issue the bank commands without incurring delay, and
+ // did not yet find a packet to a prepped row, remember
+ // the current one
+ selected_pkt_it = i;
+ selected_col_at = col_allowed_at;
+ found_prepped_pkt = true;
+ DPRINTF(DRAM, "%s Prepped row buffer hit\n", __func__);
+ }
+ } else if (!found_earliest_pkt) {
+ // if we have not initialised the bank status, do it
+ // now, and only once per scheduling decisions
+ if (!filled_earliest_banks) {
+ // determine entries with earliest bank delay
+ std::tie(earliest_banks, hidden_bank_prep) =
+ minBankPrep(queue, min_col_at);
+ filled_earliest_banks = true;
+ }
+
+ // bank is amongst first available banks
+ // minBankPrep will give priority to packets that can
+ // issue seamlessly
+ if (bits(earliest_banks[pkt->rank],
+ pkt->bank, pkt->bank)) {
+ found_earliest_pkt = true;
+ found_hidden_bank = hidden_bank_prep;
+
+ // give priority to packets that can issue
+ // bank commands 'behind the scenes'
+ // any additional delay if any will be due to
+ // col-to-col command requirements
+ if (hidden_bank_prep || !found_prepped_pkt) {
+ selected_pkt_it = i;
+ selected_col_at = col_allowed_at;
+ }
+ }
+ }
+ } else {
+ DPRINTF(DRAM, "%s bank %d - Rank %d not available\n", __func__,
+ pkt->bank, pkt->rank);
+ }
+ }
+ }
+
+ if (selected_pkt_it == queue.end()) {
+ DPRINTF(DRAM, "%s no available DRAM ranks found\n", __func__);
+ }
+
+ return make_pair(selected_pkt_it, selected_col_at);
+}
+
+void
+DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref,
+ Tick act_tick, uint32_t row)
+{
+ assert(rank_ref.actTicks.size() == activationLimit);
+
+ // verify that we have command bandwidth to issue the activate
+ // if not, shift to next burst window
+ Tick act_at;
+ if (twoCycleActivate)
+ act_at = ctrl->verifyMultiCmd(act_tick, maxCommandsPerWindow, tAAD);
+ else
+ act_at = ctrl->verifySingleCmd(act_tick, maxCommandsPerWindow);
+
+ DPRINTF(DRAM, "Activate at tick %d\n", act_at);
+
+ // update the open row
+ assert(bank_ref.openRow == Bank::NO_ROW);
+ bank_ref.openRow = row;
+
+ // start counting anew, this covers both the case when we
+ // auto-precharged, and when this access is forced to
+ // precharge
+ bank_ref.bytesAccessed = 0;
+ bank_ref.rowAccesses = 0;
+
+ ++rank_ref.numBanksActive;
+ assert(rank_ref.numBanksActive <= banksPerRank);
+
+ DPRINTF(DRAM, "Activate bank %d, rank %d at tick %lld, now got "
+ "%d active\n", bank_ref.bank, rank_ref.rank, act_at,
+ ranks[rank_ref.rank]->numBanksActive);
+
+ rank_ref.cmdList.push_back(Command(MemCommand::ACT, bank_ref.bank,
+ act_at));
+
+ DPRINTF(DRAMPower, "%llu,ACT,%d,%d\n", divCeil(act_at, tCK) -
+ timeStampOffset, bank_ref.bank, rank_ref.rank);
+
+ // The next access has to respect tRAS for this bank
+ bank_ref.preAllowedAt = act_at + tRAS;
+
+ // Respect the row-to-column command delay for both read and write cmds
+ bank_ref.rdAllowedAt = std::max(act_at + tRCD, bank_ref.rdAllowedAt);
+ bank_ref.wrAllowedAt = std::max(act_at + tRCD, bank_ref.wrAllowedAt);
+
+ // start by enforcing tRRD
+ for (int i = 0; i < banksPerRank; i++) {
+ // next activate to any bank in this rank must not happen
+ // before tRRD
+ if (bankGroupArch && (bank_ref.bankgr == rank_ref.banks[i].bankgr)) {
+ // bank group architecture requires longer delays between
+ // ACT commands within the same bank group. Use tRRD_L
+ // in this case
+ rank_ref.banks[i].actAllowedAt = std::max(act_at + tRRD_L,
+ rank_ref.banks[i].actAllowedAt);
+ } else {
+ // use shorter tRRD value when either
+ // 1) bank group architecture is not supportted
+ // 2) bank is in a different bank group
+ rank_ref.banks[i].actAllowedAt = std::max(act_at + tRRD,
+ rank_ref.banks[i].actAllowedAt);
+ }
+ }
+
+ // next, we deal with tXAW, if the activation limit is disabled
+ // then we directly schedule an activate power event
+ if (!rank_ref.actTicks.empty()) {
+ // sanity check
+ if (rank_ref.actTicks.back() &&
+ (act_at - rank_ref.actTicks.back()) < tXAW) {
+ panic("Got %d activates in window %d (%llu - %llu) which "
+ "is smaller than %llu\n", activationLimit, act_at -
+ rank_ref.actTicks.back(), act_at,
+ rank_ref.actTicks.back(), tXAW);
+ }
+
+ // shift the times used for the book keeping, the last element
+ // (highest index) is the oldest one and hence the lowest value
+ rank_ref.actTicks.pop_back();
+
+ // record an new activation (in the future)
+ rank_ref.actTicks.push_front(act_at);
+
+ // cannot activate more than X times in time window tXAW, push the
+ // next one (the X + 1'st activate) to be tXAW away from the
+ // oldest in our window of X
+ if (rank_ref.actTicks.back() &&
+ (act_at - rank_ref.actTicks.back()) < tXAW) {
+ DPRINTF(DRAM, "Enforcing tXAW with X = %d, next activate "
+ "no earlier than %llu\n", activationLimit,
+ rank_ref.actTicks.back() + tXAW);
+ for (int j = 0; j < banksPerRank; j++)
+ // next activate must not happen before end of window
+ rank_ref.banks[j].actAllowedAt =
+ std::max(rank_ref.actTicks.back() + tXAW,
+ rank_ref.banks[j].actAllowedAt);
+ }
+ }
+
+ // at the point when this activate takes place, make sure we
+ // transition to the active power state
+ if (!rank_ref.activateEvent.scheduled())
+ schedule(rank_ref.activateEvent, act_at);
+ else if (rank_ref.activateEvent.when() > act_at)
+ // move it sooner in time
+ reschedule(rank_ref.activateEvent, act_at);
+}
+
+void
+DRAMInterface::prechargeBank(Rank& rank_ref, Bank& bank, Tick pre_tick,
+ bool auto_or_preall, bool trace)
+{
+ // make sure the bank has an open row
+ assert(bank.openRow != Bank::NO_ROW);
+
+ // sample the bytes per activate here since we are closing
+ // the page
+ stats.bytesPerActivate.sample(bank.bytesAccessed);
+
+ bank.openRow = Bank::NO_ROW;
+
+ Tick pre_at = pre_tick;
+ if (auto_or_preall) {
+ // no precharge allowed before this one
+ bank.preAllowedAt = pre_at;
+ } else {
+ // Issuing an explicit PRE command
+ // Verify that we have command bandwidth to issue the precharge
+ // if not, shift to next burst window
+ pre_at = ctrl->verifySingleCmd(pre_tick, maxCommandsPerWindow);
+ // enforce tPPD
+ for (int i = 0; i < banksPerRank; i++) {
+ rank_ref.banks[i].preAllowedAt = std::max(pre_at + tPPD,
+ rank_ref.banks[i].preAllowedAt);
+ }
+ }
+
+ Tick pre_done_at = pre_at + tRP;
+
+ bank.actAllowedAt = std::max(bank.actAllowedAt, pre_done_at);
+
+ assert(rank_ref.numBanksActive != 0);
+ --rank_ref.numBanksActive;
+
+ DPRINTF(DRAM, "Precharging bank %d, rank %d at tick %lld, now got "
+ "%d active\n", bank.bank, rank_ref.rank, pre_at,
+ rank_ref.numBanksActive);
+
+ if (trace) {
+
+ rank_ref.cmdList.push_back(Command(MemCommand::PRE, bank.bank,
+ pre_at));
+ DPRINTF(DRAMPower, "%llu,PRE,%d,%d\n", divCeil(pre_at, tCK) -
+ timeStampOffset, bank.bank, rank_ref.rank);
+ }
+
+ // if we look at the current number of active banks we might be
+ // tempted to think the DRAM is now idle, however this can be
+ // undone by an activate that is scheduled to happen before we
+ // would have reached the idle state, so schedule an event and
+ // rather check once we actually make it to the point in time when
+ // the (last) precharge takes place
+ if (!rank_ref.prechargeEvent.scheduled()) {
+ schedule(rank_ref.prechargeEvent, pre_done_at);
+ // New event, increment count
+ ++rank_ref.outstandingEvents;
+ } else if (rank_ref.prechargeEvent.when() < pre_done_at) {
+ reschedule(rank_ref.prechargeEvent, pre_done_at);
+ }
+}
+
+pair<Tick, Tick>
+DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at,
+ const std::vector<MemPacketQueue>& queue)
+{
+ DPRINTF(DRAM, "Timing access to addr %lld, rank/bank/row %d %d %d\n",
+ mem_pkt->addr, mem_pkt->rank, mem_pkt->bank, mem_pkt->row);
+
+ // get the rank
+ Rank& rank_ref = *ranks[mem_pkt->rank];
+
+ assert(rank_ref.inRefIdleState());
+
+ // are we in or transitioning to a low-power state and have not scheduled
+ // a power-up event?
+ // if so, wake up from power down to issue RD/WR burst
+ if (rank_ref.inLowPowerState) {
+ assert(rank_ref.pwrState != PWR_SREF);
+ rank_ref.scheduleWakeUpEvent(tXP);
+ }
+
+ // get the bank
+ Bank& bank_ref = rank_ref.banks[mem_pkt->bank];
+
+ // for the state we need to track if it is a row hit or not
+ bool row_hit = true;
+
+ // Determine the access latency and update the bank state
+ if (bank_ref.openRow == mem_pkt->row) {
+ // nothing to do
+ } else {
+ row_hit = false;
+
+ // If there is a page open, precharge it.
+ if (bank_ref.openRow != Bank::NO_ROW) {
+ prechargeBank(rank_ref, bank_ref, std::max(bank_ref.preAllowedAt,
+ curTick()));
+ }
+
+ // next we need to account for the delay in activating the page
+ Tick act_tick = std::max(bank_ref.actAllowedAt, curTick());
+
+ // Record the activation and deal with all the global timing
+ // constraints caused be a new activation (tRRD and tXAW)
+ activateBank(rank_ref, bank_ref, act_tick, mem_pkt->row);
+ }
+
+ // respect any constraints on the command (e.g. tRCD or tCCD)
+ const Tick col_allowed_at = mem_pkt->isRead() ?
+ bank_ref.rdAllowedAt : bank_ref.wrAllowedAt;
+
+ // we need to wait until the bus is available before we can issue
+ // the command; need to ensure minimum bus delay requirement is met
+ Tick cmd_at = std::max({col_allowed_at, next_burst_at, curTick()});
+
+ // verify that we have command bandwidth to issue the burst
+ // if not, shift to next burst window
+ if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) > clkResyncDelay))
+ cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK);
+ else
+ cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow);
+
+ // if we are interleaving bursts, ensure that
+ // 1) we don't double interleave on next burst issue
+ // 2) we are at an interleave boundary; if not, shift to next boundary
+ Tick burst_gap = tBURST_MIN;
+ if (burstInterleave) {
+ if (cmd_at == (rank_ref.lastBurstTick + tBURST_MIN)) {
+ // already interleaving, push next command to end of full burst
+ burst_gap = tBURST;
+ } else if (cmd_at < (rank_ref.lastBurstTick + tBURST)) {
+ // not at an interleave boundary after bandwidth check
+ // Shift command to tBURST boundary to avoid data contention
+ // Command will remain in the same burst window given that
+ // tBURST is less than tBURST_MAX
+ cmd_at = rank_ref.lastBurstTick + tBURST;
+ }
+ }
+ DPRINTF(DRAM, "Schedule RD/WR burst at tick %d\n", cmd_at);
+
+ // update the packet ready time
+ mem_pkt->readyTime = cmd_at + tCL + tBURST;
+
+ rank_ref.lastBurstTick = cmd_at;
+
+ // update the time for the next read/write burst for each
+ // bank (add a max with tCCD/tCCD_L/tCCD_L_WR here)
+ Tick dly_to_rd_cmd;
+ Tick dly_to_wr_cmd;
+ for (int j = 0; j < ranksPerChannel; j++) {
+ for (int i = 0; i < banksPerRank; i++) {
+ if (mem_pkt->rank == j) {
+ if (bankGroupArch &&
+ (bank_ref.bankgr == ranks[j]->banks[i].bankgr)) {
+ // bank group architecture requires longer delays between
+ // RD/WR burst commands to the same bank group.
+ // tCCD_L is default requirement for same BG timing
+ // tCCD_L_WR is required for write-to-write
+ // Need to also take bus turnaround delays into account
+ dly_to_rd_cmd = mem_pkt->isRead() ?
+ tCCD_L : std::max(tCCD_L, wrToRdDlySameBG);
+ dly_to_wr_cmd = mem_pkt->isRead() ?
+ std::max(tCCD_L, rdToWrDlySameBG) :
+ tCCD_L_WR;
+ } else {
+ // tBURST is default requirement for diff BG timing
+ // Need to also take bus turnaround delays into account
+ dly_to_rd_cmd = mem_pkt->isRead() ? burst_gap :
+ writeToReadDelay();
+ dly_to_wr_cmd = mem_pkt->isRead() ? readToWriteDelay() :
+ burst_gap;
+ }
+ } else {
+ // different rank is by default in a different bank group and
+ // doesn't require longer tCCD or additional RTW, WTR delays
+ // Need to account for rank-to-rank switching
+ dly_to_wr_cmd = rankToRankDelay();
+ dly_to_rd_cmd = rankToRankDelay();
+ }
+ ranks[j]->banks[i].rdAllowedAt = std::max(cmd_at + dly_to_rd_cmd,
+ ranks[j]->banks[i].rdAllowedAt);
+ ranks[j]->banks[i].wrAllowedAt = std::max(cmd_at + dly_to_wr_cmd,
+ ranks[j]->banks[i].wrAllowedAt);
+ }
+ }
+
+ // Save rank of current access
+ activeRank = mem_pkt->rank;
+
+ // If this is a write, we also need to respect the write recovery
+ // time before a precharge, in the case of a read, respect the
+ // read to precharge constraint
+ bank_ref.preAllowedAt = std::max(bank_ref.preAllowedAt,
+ mem_pkt->isRead() ? cmd_at + tRTP :
+ mem_pkt->readyTime + tWR);
+
+ // increment the bytes accessed and the accesses per row
+ bank_ref.bytesAccessed += burstSize;
+ ++bank_ref.rowAccesses;
+
+ // if we reached the max, then issue with an auto-precharge
+ bool auto_precharge = pageMgmt == Enums::close ||
+ bank_ref.rowAccesses == maxAccessesPerRow;
+
+ // if we did not hit the limit, we might still want to
+ // auto-precharge
+ if (!auto_precharge &&
+ (pageMgmt == Enums::open_adaptive ||
+ pageMgmt == Enums::close_adaptive)) {
+ // a twist on the open and close page policies:
+ // 1) open_adaptive page policy does not blindly keep the
+ // page open, but close it if there are no row hits, and there
+ // are bank conflicts in the queue
+ // 2) close_adaptive page policy does not blindly close the
+ // page, but closes it only if there are no row hits in the queue.
+ // In this case, only force an auto precharge when there
+ // are no same page hits in the queue
+ bool got_more_hits = false;
+ bool got_bank_conflict = false;
+
+ for (uint8_t i = 0; i < ctrl->numPriorities(); ++i) {
+ auto p = queue[i].begin();
+ // keep on looking until we find a hit or reach the end of the
+ // queue
+ // 1) if a hit is found, then both open and close adaptive
+ // policies keep the page open
+ // 2) if no hit is found, got_bank_conflict is set to true if a
+ // bank conflict request is waiting in the queue
+ // 3) make sure we are not considering the packet that we are
+ // currently dealing with
+ while (!got_more_hits && p != queue[i].end()) {
+ if (mem_pkt != (*p)) {
+ bool same_rank_bank = (mem_pkt->rank == (*p)->rank) &&
+ (mem_pkt->bank == (*p)->bank);
+
+ bool same_row = mem_pkt->row == (*p)->row;
+ got_more_hits |= same_rank_bank && same_row;
+ got_bank_conflict |= same_rank_bank && !same_row;
+ }
+ ++p;
+ }
+
+ if (got_more_hits)
+ break;
+ }
+
+ // auto pre-charge when either
+ // 1) open_adaptive policy, we have not got any more hits, and
+ // have a bank conflict
+ // 2) close_adaptive policy and we have not got any more hits
+ auto_precharge = !got_more_hits &&
+ (got_bank_conflict || pageMgmt == Enums::close_adaptive);
+ }
+
+ // DRAMPower trace command to be written
+ std::string mem_cmd = mem_pkt->isRead() ? "RD" : "WR";
+
+ // MemCommand required for DRAMPower library
+ MemCommand::cmds command = (mem_cmd == "RD") ? MemCommand::RD :
+ MemCommand::WR;
+
+ rank_ref.cmdList.push_back(Command(command, mem_pkt->bank, cmd_at));
+
+ DPRINTF(DRAMPower, "%llu,%s,%d,%d\n", divCeil(cmd_at, tCK) -
+ timeStampOffset, mem_cmd, mem_pkt->bank, mem_pkt->rank);
+
+ // if this access should use auto-precharge, then we are
+ // closing the row after the read/write burst
+ if (auto_precharge) {
+ // if auto-precharge push a PRE command at the correct tick to the
+ // list used by DRAMPower library to calculate power
+ prechargeBank(rank_ref, bank_ref, std::max(curTick(),
+ bank_ref.preAllowedAt), true);
+
+ DPRINTF(DRAM, "Auto-precharged bank: %d\n", mem_pkt->bankId);
+ }
+
+ // Update the stats and schedule the next request
+ if (mem_pkt->isRead()) {
+ // Every respQueue which will generate an event, increment count
+ ++rank_ref.outstandingEvents;
+
+ stats.readBursts++;
+ if (row_hit)
+ stats.readRowHits++;
+ stats.bytesRead += burstSize;
+ stats.perBankRdBursts[mem_pkt->bankId]++;
+
+ // Update latency stats
+ stats.totMemAccLat += mem_pkt->readyTime - mem_pkt->entryTime;
+ stats.totQLat += cmd_at - mem_pkt->entryTime;
+ stats.totBusLat += tBURST;
+ } else {
+ // Schedule write done event to decrement event count
+ // after the readyTime has been reached
+ // Only schedule latest write event to minimize events
+ // required; only need to ensure that final event scheduled covers
+ // the time that writes are outstanding and bus is active
+ // to holdoff power-down entry events
+ if (!rank_ref.writeDoneEvent.scheduled()) {
+ schedule(rank_ref.writeDoneEvent, mem_pkt->readyTime);
+ // New event, increment count
+ ++rank_ref.outstandingEvents;
+
+ } else if (rank_ref.writeDoneEvent.when() < mem_pkt->readyTime) {
+ reschedule(rank_ref.writeDoneEvent, mem_pkt->readyTime);
+ }
+ // will remove write from queue when returned to parent function
+ // decrement count for DRAM rank
+ --rank_ref.writeEntries;
+
+ stats.writeBursts++;
+ if (row_hit)
+ stats.writeRowHits++;
+ stats.bytesWritten += burstSize;
+ stats.perBankWrBursts[mem_pkt->bankId]++;
+
+ }
+ // Update bus state to reflect when previous command was issued
+ return make_pair(cmd_at, cmd_at + burst_gap);
+}
+
+void
+DRAMInterface::addRankToRankDelay(Tick cmd_at)
+{
+ // update timing for DRAM ranks due to bursts issued
+ // to ranks on other media interfaces
+ for (auto n : ranks) {
+ for (int i = 0; i < banksPerRank; i++) {
+ // different rank by default
+ // Need to only account for rank-to-rank switching
+ n->banks[i].rdAllowedAt = std::max(cmd_at + rankToRankDelay(),
+ n->banks[i].rdAllowedAt);
+ n->banks[i].wrAllowedAt = std::max(cmd_at + rankToRankDelay(),
+ n->banks[i].wrAllowedAt);
+ }
+ }
+}
+
+DRAMInterface::DRAMInterface(const DRAMInterfaceParams* _p)
+ : MemInterface(_p),
+ bankGroupsPerRank(_p->bank_groups_per_rank),
+ bankGroupArch(_p->bank_groups_per_rank > 0),
+ tCL(_p->tCL),
+ tBURST_MIN(_p->tBURST_MIN), tBURST_MAX(_p->tBURST_MAX),
+ tCCD_L_WR(_p->tCCD_L_WR), tCCD_L(_p->tCCD_L), tRCD(_p->tRCD),
+ tRP(_p->tRP), tRAS(_p->tRAS), tWR(_p->tWR), tRTP(_p->tRTP),
+ tRFC(_p->tRFC), tREFI(_p->tREFI), tRRD(_p->tRRD), tRRD_L(_p->tRRD_L),
+ tPPD(_p->tPPD), tAAD(_p->tAAD),
+ tXAW(_p->tXAW), tXP(_p->tXP), tXS(_p->tXS),
+ clkResyncDelay(tCL + _p->tBURST_MAX),
+ dataClockSync(_p->data_clock_sync),
+ burstInterleave(tBURST != tBURST_MIN),
+ twoCycleActivate(_p->two_cycle_activate),
+ activationLimit(_p->activation_limit),
+ wrToRdDlySameBG(tCL + _p->tBURST_MAX + _p->tWTR_L),
+ rdToWrDlySameBG(_p->tRTW + _p->tBURST_MAX),
+ pageMgmt(_p->page_policy),
+ maxAccessesPerRow(_p->max_accesses_per_row),
+ timeStampOffset(0), activeRank(0),
+ enableDRAMPowerdown(_p->enable_dram_powerdown),
+ lastStatsResetTick(0),
+ stats(*this)
+{
+ DPRINTF(DRAM, "Setting up DRAM Interface\n");
+
+ fatal_if(!isPowerOf2(burstSize), "DRAM burst size %d is not allowed, "
+ "must be a power of two\n", burstSize);
+
+ // sanity check the ranks since we rely on bit slicing for the
+ // address decoding
+ fatal_if(!isPowerOf2(ranksPerChannel), "DRAM rank count of %d is "
+ "not allowed, must be a power of two\n", ranksPerChannel);
+
+ for (int i = 0; i < ranksPerChannel; i++) {
+ DPRINTF(DRAM, "Creating DRAM rank %d \n", i);
+ Rank* rank = new Rank(_p, i, *this);
+ ranks.push_back(rank);
+ }
+
+ // determine the dram actual capacity from the DRAM config in Mbytes
+ uint64_t deviceCapacity = deviceSize / (1024 * 1024) * devicesPerRank *
+ ranksPerChannel;
+
+ uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
+
+ DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
+ AbstractMemory::size());
+
+ // if actual DRAM size does not match memory capacity in system warn!
+ if (deviceCapacity != capacity / (1024 * 1024))
+ warn("DRAM device capacity (%d Mbytes) does not match the "
+ "address range assigned (%d Mbytes)\n", deviceCapacity,
+ capacity / (1024 * 1024));
+
+ DPRINTF(DRAM, "Row buffer size %d bytes with %d bursts per row buffer\n",
+ rowBufferSize, burstsPerRowBuffer);
+
+ rowsPerBank = capacity / (rowBufferSize * banksPerRank * ranksPerChannel);
+
+ // some basic sanity checks
+ if (tREFI <= tRP || tREFI <= tRFC) {
+ fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n",
+ tREFI, tRP, tRFC);
+ }
+
+ // basic bank group architecture checks ->
+ if (bankGroupArch) {
+ // must have at least one bank per bank group
+ if (bankGroupsPerRank > banksPerRank) {
+ fatal("banks per rank (%d) must be equal to or larger than "
+ "banks groups per rank (%d)\n",
+ banksPerRank, bankGroupsPerRank);
+ }
+ // must have same number of banks in each bank group
+ if ((banksPerRank % bankGroupsPerRank) != 0) {
+ fatal("Banks per rank (%d) must be evenly divisible by bank "
+ "groups per rank (%d) for equal banks per bank group\n",
+ banksPerRank, bankGroupsPerRank);
+ }
+ // tCCD_L should be greater than minimal, back-to-back burst delay
+ if (tCCD_L <= tBURST) {
+ fatal("tCCD_L (%d) should be larger than the minimum bus delay "
+ "(%d) when bank groups per rank (%d) is greater than 1\n",
+ tCCD_L, tBURST, bankGroupsPerRank);
+ }
+ // tCCD_L_WR should be greater than minimal, back-to-back burst delay
+ if (tCCD_L_WR <= tBURST) {
+ fatal("tCCD_L_WR (%d) should be larger than the minimum bus delay "
+ " (%d) when bank groups per rank (%d) is greater than 1\n",
+ tCCD_L_WR, tBURST, bankGroupsPerRank);
+ }
+ // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay
+ // some datasheets might specify it equal to tRRD
+ if (tRRD_L < tRRD) {
+ fatal("tRRD_L (%d) should be larger than tRRD (%d) when "
+ "bank groups per rank (%d) is greater than 1\n",
+ tRRD_L, tRRD, bankGroupsPerRank);
+ }
+ }
+}
+
+void
+DRAMInterface::init()
+{
+ AbstractMemory::init();
+
+ // a bit of sanity checks on the interleaving, save it for here to
+ // ensure that the system pointer is initialised
+ if (range.interleaved()) {
+ if (addrMapping == Enums::RoRaBaChCo) {
+ if (rowBufferSize != range.granularity()) {
+ fatal("Channel interleaving of %s doesn't match RoRaBaChCo "
+ "address map\n", name());
+ }
+ } else if (addrMapping == Enums::RoRaBaCoCh ||
+ addrMapping == Enums::RoCoRaBaCh) {
+ // for the interleavings with channel bits in the bottom,
+ // if the system uses a channel striping granularity that
+ // is larger than the DRAM burst size, then map the
+ // sequential accesses within a stripe to a number of
+ // columns in the DRAM, effectively placing some of the
+ // lower-order column bits as the least-significant bits
+ // of the address (above the ones denoting the burst size)
+ assert(burstsPerStripe >= 1);
+
+ // channel striping has to be done at a granularity that
+ // is equal or larger to a cache line
+ if (system()->cacheLineSize() > range.granularity()) {
+ fatal("Channel interleaving of %s must be at least as large "
+ "as the cache line size\n", name());
+ }
+
+ // ...and equal or smaller than the row-buffer size
+ if (rowBufferSize < range.granularity()) {
+ fatal("Channel interleaving of %s must be at most as large "
+ "as the row-buffer size\n", name());
+ }
+ // this is essentially the check above, so just to be sure
+ assert(burstsPerStripe <= burstsPerRowBuffer);
+ }
+ }
+}
+
+void
+DRAMInterface::startup()
+{
+ if (system()->isTimingMode()) {
+ // timestamp offset should be in clock cycles for DRAMPower
+ timeStampOffset = divCeil(curTick(), tCK);
+
+ for (auto r : ranks) {
+ r->startup(curTick() + tREFI - tRP);
+ }
+ }
+}
+
+bool
+DRAMInterface::isBusy()
+{
+ int busy_ranks = 0;
+ for (auto r : ranks) {
+ if (!r->inRefIdleState()) {
+ if (r->pwrState != PWR_SREF) {
+ // rank is busy refreshing
+ DPRINTF(DRAMState, "Rank %d is not available\n", r->rank);
+ busy_ranks++;
+
+ // let the rank know that if it was waiting to drain, it
+ // is now done and ready to proceed
+ r->checkDrainDone();
+ }
+
+ // check if we were in self-refresh and haven't started
+ // to transition out
+ if ((r->pwrState == PWR_SREF) && r->inLowPowerState) {
+ DPRINTF(DRAMState, "Rank %d is in self-refresh\n", r->rank);
+ // if we have commands queued to this rank and we don't have
+ // a minimum number of active commands enqueued,
+ // exit self-refresh
+ if (r->forceSelfRefreshExit()) {
+ DPRINTF(DRAMState, "rank %d was in self refresh and"
+ " should wake up\n", r->rank);
+ //wake up from self-refresh
+ r->scheduleWakeUpEvent(tXS);
+ // things are brought back into action once a refresh is
+ // performed after self-refresh
+ // continue with selection for other ranks
+ }
+ }
+ }
+ }
+ return (busy_ranks == ranksPerChannel);
+}
+
+void DRAMInterface::setupRank(const uint8_t rank, const bool is_read)
+{
+ // increment entry count of the rank based on packet type
+ if (is_read) {
+ ++ranks[rank]->readEntries;
+ } else {
+ ++ranks[rank]->writeEntries;
+ }
+}
+
+void
+DRAMInterface::respondEvent(uint8_t rank)
+{
+ Rank& rank_ref = *ranks[rank];
+
+ // if a read has reached its ready-time, decrement the number of reads
+ // At this point the packet has been handled and there is a possibility
+ // to switch to low-power mode if no other packet is available
+ --rank_ref.readEntries;
+ DPRINTF(DRAM, "number of read entries for rank %d is %d\n",
+ rank, rank_ref.readEntries);
+
+ // counter should at least indicate one outstanding request
+ // for this read
+ assert(rank_ref.outstandingEvents > 0);
+ // read response received, decrement count
+ --rank_ref.outstandingEvents;
+
+ // at this moment should not have transitioned to a low-power state
+ assert((rank_ref.pwrState != PWR_SREF) &&
+ (rank_ref.pwrState != PWR_PRE_PDN) &&
+ (rank_ref.pwrState != PWR_ACT_PDN));
+
+ // track if this is the last packet before idling
+ // and that there are no outstanding commands to this rank
+ if (rank_ref.isQueueEmpty() && rank_ref.outstandingEvents == 0 &&
+ rank_ref.inRefIdleState() && enableDRAMPowerdown) {
+ // verify that there are no events scheduled
+ assert(!rank_ref.activateEvent.scheduled());
+ assert(!rank_ref.prechargeEvent.scheduled());
+
+ // if coming from active state, schedule power event to
+ // active power-down else go to precharge power-down
+ DPRINTF(DRAMState, "Rank %d sleep at tick %d; current power state is "
+ "%d\n", rank, curTick(), rank_ref.pwrState);
+
+ // default to ACT power-down unless already in IDLE state
+ // could be in IDLE if PRE issued before data returned
+ PowerState next_pwr_state = PWR_ACT_PDN;
+ if (rank_ref.pwrState == PWR_IDLE) {
+ next_pwr_state = PWR_PRE_PDN;
+ }
+
+ rank_ref.powerDownSleep(next_pwr_state, curTick());
+ }
+}
+
+void
+DRAMInterface::checkRefreshState(uint8_t rank)
+{
+ Rank& rank_ref = *ranks[rank];
+
+ if ((rank_ref.refreshState == REF_PRE) &&
+ !rank_ref.prechargeEvent.scheduled()) {
+ // kick the refresh event loop into action again if banks already
+ // closed and just waiting for read to complete
+ schedule(rank_ref.refreshEvent, curTick());
+ }
+}
+
+void
+DRAMInterface::drainRanks()
+{
+ // also need to kick off events to exit self-refresh
+ for (auto r : ranks) {
+ // force self-refresh exit, which in turn will issue auto-refresh
+ if (r->pwrState == PWR_SREF) {
+ DPRINTF(DRAM,"Rank%d: Forcing self-refresh wakeup in drain\n",
+ r->rank);
+ r->scheduleWakeUpEvent(tXS);
+ }
+ }
+}
+
+bool
+DRAMInterface::allRanksDrained() const
+{
+ // true until proven false
+ bool all_ranks_drained = true;
+ for (auto r : ranks) {
+ // then verify that the power state is IDLE ensuring all banks are
+ // closed and rank is not in a low power state. Also verify that rank
+ // is idle from a refresh point of view.
+ all_ranks_drained = r->inPwrIdleState() && r->inRefIdleState() &&
+ all_ranks_drained;
+ }
+ return all_ranks_drained;
+}
+
+void
+DRAMInterface::suspend()
+{
+ for (auto r : ranks) {
+ r->suspend();
+ }
+}
+
+pair<vector<uint32_t>, bool>
+DRAMInterface::minBankPrep(const MemPacketQueue& queue,
+ Tick min_col_at) const
+{
+ Tick min_act_at = MaxTick;
+ vector<uint32_t> bank_mask(ranksPerChannel, 0);
+
+ // latest Tick for which ACT can occur without incurring additoinal
+ // delay on the data bus
+ const Tick hidden_act_max = std::max(min_col_at - tRCD, curTick());
+
+ // Flag condition when burst can issue back-to-back with previous burst
+ bool found_seamless_bank = false;
+
+ // Flag condition when bank can be opened without incurring additional
+ // delay on the data bus
+ bool hidden_bank_prep = false;
+
+ // determine if we have queued transactions targetting the
+ // bank in question
+ vector<bool> got_waiting(ranksPerChannel * banksPerRank, false);
+ for (const auto& p : queue) {
+ if (p->isDram() && ranks[p->rank]->inRefIdleState())
+ got_waiting[p->bankId] = true;
+ }
+
+ // Find command with optimal bank timing
+ // Will prioritize commands that can issue seamlessly.
+ for (int i = 0; i < ranksPerChannel; i++) {
+ for (int j = 0; j < banksPerRank; j++) {
+ uint16_t bank_id = i * banksPerRank + j;
+
+ // if we have waiting requests for the bank, and it is
+ // amongst the first available, update the mask
+ if (got_waiting[bank_id]) {
+ // make sure this rank is not currently refreshing.
+ assert(ranks[i]->inRefIdleState());
+ // simplistic approximation of when the bank can issue
+ // an activate, ignoring any rank-to-rank switching
+ // cost in this calculation
+ Tick act_at = ranks[i]->banks[j].openRow == Bank::NO_ROW ?
+ std::max(ranks[i]->banks[j].actAllowedAt, curTick()) :
+ std::max(ranks[i]->banks[j].preAllowedAt, curTick()) + tRP;
+
+ // When is the earliest the R/W burst can issue?
+ const Tick col_allowed_at = ctrl->inReadBusState(false) ?
+ ranks[i]->banks[j].rdAllowedAt :
+ ranks[i]->banks[j].wrAllowedAt;
+ Tick col_at = std::max(col_allowed_at, act_at + tRCD);
+
+ // bank can issue burst back-to-back (seamlessly) with
+ // previous burst
+ bool new_seamless_bank = col_at <= min_col_at;
+
+ // if we found a new seamless bank or we have no
+ // seamless banks, and got a bank with an earlier
+ // activate time, it should be added to the bit mask
+ if (new_seamless_bank ||
+ (!found_seamless_bank && act_at <= min_act_at)) {
+ // if we did not have a seamless bank before, and
+ // we do now, reset the bank mask, also reset it
+ // if we have not yet found a seamless bank and
+ // the activate time is smaller than what we have
+ // seen so far
+ if (!found_seamless_bank &&
+ (new_seamless_bank || act_at < min_act_at)) {
+ std::fill(bank_mask.begin(), bank_mask.end(), 0);
+ }
+
+ found_seamless_bank |= new_seamless_bank;
+
+ // ACT can occur 'behind the scenes'
+ hidden_bank_prep = act_at <= hidden_act_max;
+
+ // set the bit corresponding to the available bank
+ replaceBits(bank_mask[i], j, j, 1);
+ min_act_at = act_at;
+ }
+ }
+ }
+ }
+
+ return make_pair(bank_mask, hidden_bank_prep);
+}
+
+DRAMInterface*
+DRAMInterfaceParams::create()
+{
+ return new DRAMInterface(this);
+}
+
+DRAMInterface::Rank::Rank(const DRAMInterfaceParams* _p,
+ int _rank, DRAMInterface& _dram)
+ : EventManager(&_dram), dram(_dram),
+ pwrStateTrans(PWR_IDLE), pwrStatePostRefresh(PWR_IDLE),
+ pwrStateTick(0), refreshDueAt(0), pwrState(PWR_IDLE),
+ refreshState(REF_IDLE), inLowPowerState(false), rank(_rank),
+ readEntries(0), writeEntries(0), outstandingEvents(0),
+ wakeUpAllowedAt(0), power(_p, false), banks(_p->banks_per_rank),
+ numBanksActive(0), actTicks(_p->activation_limit, 0), lastBurstTick(0),
+ writeDoneEvent([this]{ processWriteDoneEvent(); }, name()),
+ activateEvent([this]{ processActivateEvent(); }, name()),
+ prechargeEvent([this]{ processPrechargeEvent(); }, name()),
+ refreshEvent([this]{ processRefreshEvent(); }, name()),
+ powerEvent([this]{ processPowerEvent(); }, name()),
+ wakeUpEvent([this]{ processWakeUpEvent(); }, name()),
+ stats(_dram, *this)
+{
+ for (int b = 0; b < _p->banks_per_rank; b++) {
+ banks[b].bank = b;
+ // GDDR addressing of banks to BG is linear.
+ // Here we assume that all DRAM generations address bank groups as
+ // follows:
+ if (_p->bank_groups_per_rank > 0) {
+ // Simply assign lower bits to bank group in order to
+ // rotate across bank groups as banks are incremented
+ // e.g. with 4 banks per bank group and 16 banks total:
+ // banks 0,4,8,12 are in bank group 0
+ // banks 1,5,9,13 are in bank group 1
+ // banks 2,6,10,14 are in bank group 2
+ // banks 3,7,11,15 are in bank group 3
+ banks[b].bankgr = b % _p->bank_groups_per_rank;
+ } else {
+ // No bank groups; simply assign to bank number
+ banks[b].bankgr = b;
+ }
+ }
+}
+
+void
+DRAMInterface::Rank::startup(Tick ref_tick)
+{
+ assert(ref_tick > curTick());
+
+ pwrStateTick = curTick();
+
+ // kick off the refresh, and give ourselves enough time to
+ // precharge
+ schedule(refreshEvent, ref_tick);
+}
+
+void
+DRAMInterface::Rank::suspend()
+{
+ deschedule(refreshEvent);
+
+ // Update the stats
+ updatePowerStats();
+
+ // don't automatically transition back to LP state after next REF
+ pwrStatePostRefresh = PWR_IDLE;
+}
+
+bool
+DRAMInterface::Rank::isQueueEmpty() const
+{
+ // check commmands in Q based on current bus direction
+ bool no_queued_cmds = (dram.ctrl->inReadBusState(true) &&
+ (readEntries == 0))
+ || (dram.ctrl->inWriteBusState(true) &&
+ (writeEntries == 0));
+ return no_queued_cmds;
+}
+
+void
+DRAMInterface::Rank::checkDrainDone()
+{
+ // if this rank was waiting to drain it is now able to proceed to
+ // precharge
+ if (refreshState == REF_DRAIN) {
+ DPRINTF(DRAM, "Refresh drain done, now precharging\n");
+
+ refreshState = REF_PD_EXIT;
+
+ // hand control back to the refresh event loop
+ schedule(refreshEvent, curTick());
+ }
+}
+
+void
+DRAMInterface::Rank::flushCmdList()
+{
+ // at the moment sort the list of commands and update the counters
+ // for DRAMPower libray when doing a refresh
+ sort(cmdList.begin(), cmdList.end(), DRAMInterface::sortTime);
+
+ auto next_iter = cmdList.begin();
+ // push to commands to DRAMPower
+ for ( ; next_iter != cmdList.end() ; ++next_iter) {
+ Command cmd = *next_iter;
+ if (cmd.timeStamp <= curTick()) {
+ // Move all commands at or before curTick to DRAMPower
+ power.powerlib.doCommand(cmd.type, cmd.bank,
+ divCeil(cmd.timeStamp, dram.tCK) -
+ dram.timeStampOffset);
+ } else {
+ // done - found all commands at or before curTick()
+ // next_iter references the 1st command after curTick
+ break;
+ }
+ }
+ // reset cmdList to only contain commands after curTick
+ // if there are no commands after curTick, updated cmdList will be empty
+ // in this case, next_iter is cmdList.end()
+ cmdList.assign(next_iter, cmdList.end());
+}
+
+void
+DRAMInterface::Rank::processActivateEvent()
+{
+ // we should transition to the active state as soon as any bank is active
+ if (pwrState != PWR_ACT)
+ // note that at this point numBanksActive could be back at
+ // zero again due to a precharge scheduled in the future
+ schedulePowerEvent(PWR_ACT, curTick());
+}
+
+void
+DRAMInterface::Rank::processPrechargeEvent()
+{
+ // counter should at least indicate one outstanding request
+ // for this precharge
+ assert(outstandingEvents > 0);
+ // precharge complete, decrement count
+ --outstandingEvents;
+
+ // if we reached zero, then special conditions apply as we track
+ // if all banks are precharged for the power models
+ if (numBanksActive == 0) {
+ // no reads to this rank in the Q and no pending
+ // RD/WR or refresh commands
+ if (isQueueEmpty() && outstandingEvents == 0 &&
+ dram.enableDRAMPowerdown) {
+ // should still be in ACT state since bank still open
+ assert(pwrState == PWR_ACT);
+
+ // All banks closed - switch to precharge power down state.
+ DPRINTF(DRAMState, "Rank %d sleep at tick %d\n",
+ rank, curTick());
+ powerDownSleep(PWR_PRE_PDN, curTick());
+ } else {
+ // we should transition to the idle state when the last bank
+ // is precharged
+ schedulePowerEvent(PWR_IDLE, curTick());
+ }
+ }
+}
+
+void
+DRAMInterface::Rank::processWriteDoneEvent()
+{
+ // counter should at least indicate one outstanding request
+ // for this write
+ assert(outstandingEvents > 0);
+ // Write transfer on bus has completed
+ // decrement per rank counter
+ --outstandingEvents;
+}
+
+void
+DRAMInterface::Rank::processRefreshEvent()
+{
+ // when first preparing the refresh, remember when it was due
+ if ((refreshState == REF_IDLE) || (refreshState == REF_SREF_EXIT)) {
+ // remember when the refresh is due
+ refreshDueAt = curTick();
+
+ // proceed to drain
+ refreshState = REF_DRAIN;
+
+ // make nonzero while refresh is pending to ensure
+ // power down and self-refresh are not entered
+ ++outstandingEvents;
+
+ DPRINTF(DRAM, "Refresh due\n");
+ }
+
+ // let any scheduled read or write to the same rank go ahead,
+ // after which it will
+ // hand control back to this event loop
+ if (refreshState == REF_DRAIN) {
+ // if a request is at the moment being handled and this request is
+ // accessing the current rank then wait for it to finish
+ if ((rank == dram.activeRank)
+ && (dram.ctrl->requestEventScheduled())) {
+ // hand control over to the request loop until it is
+ // evaluated next
+ DPRINTF(DRAM, "Refresh awaiting draining\n");
+
+ return;
+ } else {
+ refreshState = REF_PD_EXIT;
+ }
+ }
+
+ // at this point, ensure that rank is not in a power-down state
+ if (refreshState == REF_PD_EXIT) {
+ // if rank was sleeping and we have't started exit process,
+ // wake-up for refresh
+ if (inLowPowerState) {
+ DPRINTF(DRAM, "Wake Up for refresh\n");
+ // save state and return after refresh completes
+ scheduleWakeUpEvent(dram.tXP);
+ return;
+ } else {
+ refreshState = REF_PRE;
+ }
+ }
+
+ // at this point, ensure that all banks are precharged
+ if (refreshState == REF_PRE) {
+ // precharge any active bank
+ if (numBanksActive != 0) {
+ // at the moment, we use a precharge all even if there is
+ // only a single bank open
+ DPRINTF(DRAM, "Precharging all\n");
+
+ // first determine when we can precharge
+ Tick pre_at = curTick();
+
+ for (auto &b : banks) {
+ // respect both causality and any existing bank
+ // constraints, some banks could already have a
+ // (auto) precharge scheduled
+ pre_at = std::max(b.preAllowedAt, pre_at);
+ }
+
+ // make sure all banks per rank are precharged, and for those that
+ // already are, update their availability
+ Tick act_allowed_at = pre_at + dram.tRP;
+
+ for (auto &b : banks) {
+ if (b.openRow != Bank::NO_ROW) {
+ dram.prechargeBank(*this, b, pre_at, true, false);
+ } else {
+ b.actAllowedAt = std::max(b.actAllowedAt, act_allowed_at);
+ b.preAllowedAt = std::max(b.preAllowedAt, pre_at);
+ }
+ }
+
+ // precharge all banks in rank
+ cmdList.push_back(Command(MemCommand::PREA, 0, pre_at));
+
+ DPRINTF(DRAMPower, "%llu,PREA,0,%d\n",
+ divCeil(pre_at, dram.tCK) -
+ dram.timeStampOffset, rank);
+ } else if ((pwrState == PWR_IDLE) && (outstandingEvents == 1)) {
+ // Banks are closed, have transitioned to IDLE state, and
+ // no outstanding ACT,RD/WR,Auto-PRE sequence scheduled
+ DPRINTF(DRAM, "All banks already precharged, starting refresh\n");
+
+ // go ahead and kick the power state machine into gear since
+ // we are already idle
+ schedulePowerEvent(PWR_REF, curTick());
+ } else {
+ // banks state is closed but haven't transitioned pwrState to IDLE
+ // or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled
+ // should have outstanding precharge or read response event
+ assert(prechargeEvent.scheduled() ||
+ dram.ctrl->respondEventScheduled());
+ // will start refresh when pwrState transitions to IDLE
+ }
+
+ assert(numBanksActive == 0);
+
+ // wait for all banks to be precharged or read to complete
+ // When precharge commands are done, power state machine will
+ // transition to the idle state, and automatically move to a
+ // refresh, at that point it will also call this method to get
+ // the refresh event loop going again
+ // Similarly, when read response completes, if all banks are
+ // precharged, will call this method to get loop re-started
+ return;
+ }
+
+ // last but not least we perform the actual refresh
+ if (refreshState == REF_START) {
+ // should never get here with any banks active
+ assert(numBanksActive == 0);
+ assert(pwrState == PWR_REF);
+
+ Tick ref_done_at = curTick() + dram.tRFC;
+
+ for (auto &b : banks) {
+ b.actAllowedAt = ref_done_at;
+ }
+
+ // at the moment this affects all ranks
+ cmdList.push_back(Command(MemCommand::REF, 0, curTick()));
+
+ // Update the stats
+ updatePowerStats();
+
+ DPRINTF(DRAMPower, "%llu,REF,0,%d\n", divCeil(curTick(), dram.tCK) -
+ dram.timeStampOffset, rank);
+
+ // Update for next refresh
+ refreshDueAt += dram.tREFI;
+
+ // make sure we did not wait so long that we cannot make up
+ // for it
+ if (refreshDueAt < ref_done_at) {
+ fatal("Refresh was delayed so long we cannot catch up\n");
+ }
+
+ // Run the refresh and schedule event to transition power states
+ // when refresh completes
+ refreshState = REF_RUN;
+ schedule(refreshEvent, ref_done_at);
+ return;
+ }
+
+ if (refreshState == REF_RUN) {
+ // should never get here with any banks active
+ assert(numBanksActive == 0);
+ assert(pwrState == PWR_REF);
+
+ assert(!powerEvent.scheduled());
+
+ if ((dram.ctrl->drainState() == DrainState::Draining) ||
+ (dram.ctrl->drainState() == DrainState::Drained)) {
+ // if draining, do not re-enter low-power mode.
+ // simply go to IDLE and wait
+ schedulePowerEvent(PWR_IDLE, curTick());
+ } else {
+ // At the moment, we sleep when the refresh ends and wait to be
+ // woken up again if previously in a low-power state.
+ if (pwrStatePostRefresh != PWR_IDLE) {
+ // power State should be power Refresh
+ assert(pwrState == PWR_REF);
+ DPRINTF(DRAMState, "Rank %d sleeping after refresh and was in "
+ "power state %d before refreshing\n", rank,
+ pwrStatePostRefresh);
+ powerDownSleep(pwrState, curTick());
+
+ // Force PRE power-down if there are no outstanding commands
+ // in Q after refresh.
+ } else if (isQueueEmpty() && dram.enableDRAMPowerdown) {
+ // still have refresh event outstanding but there should
+ // be no other events outstanding
+ assert(outstandingEvents == 1);
+ DPRINTF(DRAMState, "Rank %d sleeping after refresh but was NOT"
+ " in a low power state before refreshing\n", rank);
+ powerDownSleep(PWR_PRE_PDN, curTick());
+
+ } else {
+ // move to the idle power state once the refresh is done, this
+ // will also move the refresh state machine to the refresh
+ // idle state
+ schedulePowerEvent(PWR_IDLE, curTick());
+ }
+ }
+
+ // At this point, we have completed the current refresh.
+ // In the SREF bypass case, we do not get to this state in the
+ // refresh STM and therefore can always schedule next event.
+ // Compensate for the delay in actually performing the refresh
+ // when scheduling the next one
+ schedule(refreshEvent, refreshDueAt - dram.tRP);
+
+ DPRINTF(DRAMState, "Refresh done at %llu and next refresh"
+ " at %llu\n", curTick(), refreshDueAt);
+ }
+}
+
+void
+DRAMInterface::Rank::schedulePowerEvent(PowerState pwr_state, Tick tick)
+{
+ // respect causality
+ assert(tick >= curTick());
+
+ if (!powerEvent.scheduled()) {
+ DPRINTF(DRAMState, "Scheduling power event at %llu to state %d\n",
+ tick, pwr_state);
+
+ // insert the new transition
+ pwrStateTrans = pwr_state;
+
+ schedule(powerEvent, tick);
+ } else {
+ panic("Scheduled power event at %llu to state %d, "
+ "with scheduled event at %llu to %d\n", tick, pwr_state,
+ powerEvent.when(), pwrStateTrans);
+ }
+}
+
+void
+DRAMInterface::Rank::powerDownSleep(PowerState pwr_state, Tick tick)
+{
+ // if low power state is active low, schedule to active low power state.
+ // in reality tCKE is needed to enter active low power. This is neglected
+ // here and could be added in the future.
+ if (pwr_state == PWR_ACT_PDN) {
+ schedulePowerEvent(pwr_state, tick);
+ // push command to DRAMPower
+ cmdList.push_back(Command(MemCommand::PDN_F_ACT, 0, tick));
+ DPRINTF(DRAMPower, "%llu,PDN_F_ACT,0,%d\n", divCeil(tick,
+ dram.tCK) - dram.timeStampOffset, rank);
+ } else if (pwr_state == PWR_PRE_PDN) {
+ // if low power state is precharge low, schedule to precharge low
+ // power state. In reality tCKE is needed to enter active low power.
+ // This is neglected here.
+ schedulePowerEvent(pwr_state, tick);
+ //push Command to DRAMPower
+ cmdList.push_back(Command(MemCommand::PDN_F_PRE, 0, tick));
+ DPRINTF(DRAMPower, "%llu,PDN_F_PRE,0,%d\n", divCeil(tick,
+ dram.tCK) - dram.timeStampOffset, rank);
+ } else if (pwr_state == PWR_REF) {
+ // if a refresh just occurred
+ // transition to PRE_PDN now that all banks are closed
+ // precharge power down requires tCKE to enter. For simplicity
+ // this is not considered.
+ schedulePowerEvent(PWR_PRE_PDN, tick);
+ //push Command to DRAMPower
+ cmdList.push_back(Command(MemCommand::PDN_F_PRE, 0, tick));
+ DPRINTF(DRAMPower, "%llu,PDN_F_PRE,0,%d\n", divCeil(tick,
+ dram.tCK) - dram.timeStampOffset, rank);
+ } else if (pwr_state == PWR_SREF) {
+ // should only enter SREF after PRE-PD wakeup to do a refresh
+ assert(pwrStatePostRefresh == PWR_PRE_PDN);
+ // self refresh requires time tCKESR to enter. For simplicity,
+ // this is not considered.
+ schedulePowerEvent(PWR_SREF, tick);
+ // push Command to DRAMPower
+ cmdList.push_back(Command(MemCommand::SREN, 0, tick));
+ DPRINTF(DRAMPower, "%llu,SREN,0,%d\n", divCeil(tick,
+ dram.tCK) - dram.timeStampOffset, rank);
+ }
+ // Ensure that we don't power-down and back up in same tick
+ // Once we commit to PD entry, do it and wait for at least 1tCK
+ // This could be replaced with tCKE if/when that is added to the model
+ wakeUpAllowedAt = tick + dram.tCK;
+
+ // Transitioning to a low power state, set flag
+ inLowPowerState = true;
+}
+
+void
+DRAMInterface::Rank::scheduleWakeUpEvent(Tick exit_delay)
+{
+ Tick wake_up_tick = std::max(curTick(), wakeUpAllowedAt);
+
+ DPRINTF(DRAMState, "Scheduling wake-up for rank %d at tick %d\n",
+ rank, wake_up_tick);
+
+ // if waking for refresh, hold previous state
+ // else reset state back to IDLE
+ if (refreshState == REF_PD_EXIT) {
+ pwrStatePostRefresh = pwrState;
+ } else {
+ // don't automatically transition back to LP state after next REF
+ pwrStatePostRefresh = PWR_IDLE;
+ }
+
+ // schedule wake-up with event to ensure entry has completed before
+ // we try to wake-up
+ schedule(wakeUpEvent, wake_up_tick);
+
+ for (auto &b : banks) {
+ // respect both causality and any existing bank
+ // constraints, some banks could already have a
+ // (auto) precharge scheduled
+ b.wrAllowedAt = std::max(wake_up_tick + exit_delay, b.wrAllowedAt);
+ b.rdAllowedAt = std::max(wake_up_tick + exit_delay, b.rdAllowedAt);
+ b.preAllowedAt = std::max(wake_up_tick + exit_delay, b.preAllowedAt);
+ b.actAllowedAt = std::max(wake_up_tick + exit_delay, b.actAllowedAt);
+ }
+ // Transitioning out of low power state, clear flag
+ inLowPowerState = false;
+
+ // push to DRAMPower
+ // use pwrStateTrans for cases where we have a power event scheduled
+ // to enter low power that has not yet been processed
+ if (pwrStateTrans == PWR_ACT_PDN) {
+ cmdList.push_back(Command(MemCommand::PUP_ACT, 0, wake_up_tick));
+ DPRINTF(DRAMPower, "%llu,PUP_ACT,0,%d\n", divCeil(wake_up_tick,
+ dram.tCK) - dram.timeStampOffset, rank);
+
+ } else if (pwrStateTrans == PWR_PRE_PDN) {
+ cmdList.push_back(Command(MemCommand::PUP_PRE, 0, wake_up_tick));
+ DPRINTF(DRAMPower, "%llu,PUP_PRE,0,%d\n", divCeil(wake_up_tick,
+ dram.tCK) - dram.timeStampOffset, rank);
+ } else if (pwrStateTrans == PWR_SREF) {
+ cmdList.push_back(Command(MemCommand::SREX, 0, wake_up_tick));
+ DPRINTF(DRAMPower, "%llu,SREX,0,%d\n", divCeil(wake_up_tick,
+ dram.tCK) - dram.timeStampOffset, rank);
+ }
+}
+
+void
+DRAMInterface::Rank::processWakeUpEvent()
+{
+ // Should be in a power-down or self-refresh state
+ assert((pwrState == PWR_ACT_PDN) || (pwrState == PWR_PRE_PDN) ||
+ (pwrState == PWR_SREF));
+
+ // Check current state to determine transition state
+ if (pwrState == PWR_ACT_PDN) {
+ // banks still open, transition to PWR_ACT
+ schedulePowerEvent(PWR_ACT, curTick());
+ } else {
+ // transitioning from a precharge power-down or self-refresh state
+ // banks are closed - transition to PWR_IDLE
+ schedulePowerEvent(PWR_IDLE, curTick());
+ }
+}
+
+void
+DRAMInterface::Rank::processPowerEvent()
+{
+ assert(curTick() >= pwrStateTick);
+ // remember where we were, and for how long
+ Tick duration = curTick() - pwrStateTick;
+ PowerState prev_state = pwrState;
+
+ // update the accounting
+ stats.pwrStateTime[prev_state] += duration;
+
+ // track to total idle time
+ if ((prev_state == PWR_PRE_PDN) || (prev_state == PWR_ACT_PDN) ||
+ (prev_state == PWR_SREF)) {
+ stats.totalIdleTime += duration;
+ }
+
+ pwrState = pwrStateTrans;
+ pwrStateTick = curTick();
+
+ // if rank was refreshing, make sure to start scheduling requests again
+ if (prev_state == PWR_REF) {
+ // bus IDLED prior to REF
+ // counter should be one for refresh command only
+ assert(outstandingEvents == 1);
+ // REF complete, decrement count and go back to IDLE
+ --outstandingEvents;
+ refreshState = REF_IDLE;
+
+ DPRINTF(DRAMState, "Was refreshing for %llu ticks\n", duration);
+ // if moving back to power-down after refresh
+ if (pwrState != PWR_IDLE) {
+ assert(pwrState == PWR_PRE_PDN);
+ DPRINTF(DRAMState, "Switching to power down state after refreshing"
+ " rank %d at %llu tick\n", rank, curTick());
+ }
+
+ // completed refresh event, ensure next request is scheduled
+ if (!dram.ctrl->requestEventScheduled()) {
+ DPRINTF(DRAM, "Scheduling next request after refreshing"
+ " rank %d\n", rank);
+ dram.ctrl->restartScheduler(curTick());
+ }
+ }
+
+ if ((pwrState == PWR_ACT) && (refreshState == REF_PD_EXIT)) {
+ // have exited ACT PD
+ assert(prev_state == PWR_ACT_PDN);
+
+ // go back to REF event and close banks
+ refreshState = REF_PRE;
+ schedule(refreshEvent, curTick());
+ } else if (pwrState == PWR_IDLE) {
+ DPRINTF(DRAMState, "All banks precharged\n");
+ if (prev_state == PWR_SREF) {
+ // set refresh state to REF_SREF_EXIT, ensuring inRefIdleState
+ // continues to return false during tXS after SREF exit
+ // Schedule a refresh which kicks things back into action
+ // when it finishes
+ refreshState = REF_SREF_EXIT;
+ schedule(refreshEvent, curTick() + dram.tXS);
+ } else {
+ // if we have a pending refresh, and are now moving to
+ // the idle state, directly transition to, or schedule refresh
+ if ((refreshState == REF_PRE) || (refreshState == REF_PD_EXIT)) {
+ // ensure refresh is restarted only after final PRE command.
+ // do not restart refresh if controller is in an intermediate
+ // state, after PRE_PDN exit, when banks are IDLE but an
+ // ACT is scheduled.
+ if (!activateEvent.scheduled()) {
+ // there should be nothing waiting at this point
+ assert(!powerEvent.scheduled());
+ if (refreshState == REF_PD_EXIT) {
+ // exiting PRE PD, will be in IDLE until tXP expires
+ // and then should transition to PWR_REF state
+ assert(prev_state == PWR_PRE_PDN);
+ schedulePowerEvent(PWR_REF, curTick() + dram.tXP);
+ } else if (refreshState == REF_PRE) {
+ // can directly move to PWR_REF state and proceed below
+ pwrState = PWR_REF;
+ }
+ } else {
+ // must have PRE scheduled to transition back to IDLE
+ // and re-kick off refresh
+ assert(prechargeEvent.scheduled());
+ }
+ }
+ }
+ }
+
+ // transition to the refresh state and re-start refresh process
+ // refresh state machine will schedule the next power state transition
+ if (pwrState == PWR_REF) {
+ // completed final PRE for refresh or exiting power-down
+ assert(refreshState == REF_PRE || refreshState == REF_PD_EXIT);
+
+ // exited PRE PD for refresh, with no pending commands
+ // bypass auto-refresh and go straight to SREF, where memory
+ // will issue refresh immediately upon entry
+ if (pwrStatePostRefresh == PWR_PRE_PDN && isQueueEmpty() &&
+ (dram.ctrl->drainState() != DrainState::Draining) &&
+ (dram.ctrl->drainState() != DrainState::Drained) &&
+ dram.enableDRAMPowerdown) {
+ DPRINTF(DRAMState, "Rank %d bypassing refresh and transitioning "
+ "to self refresh at %11u tick\n", rank, curTick());
+ powerDownSleep(PWR_SREF, curTick());
+
+ // Since refresh was bypassed, remove event by decrementing count
+ assert(outstandingEvents == 1);
+ --outstandingEvents;
+
+ // reset state back to IDLE temporarily until SREF is entered
+ pwrState = PWR_IDLE;
+
+ // Not bypassing refresh for SREF entry
+ } else {
+ DPRINTF(DRAMState, "Refreshing\n");
+
+ // there should be nothing waiting at this point
+ assert(!powerEvent.scheduled());
+
+ // kick the refresh event loop into action again, and that
+ // in turn will schedule a transition to the idle power
+ // state once the refresh is done
+ schedule(refreshEvent, curTick());
+
+ // Banks transitioned to IDLE, start REF
+ refreshState = REF_START;
+ }
+ }
+
+}
+
+void
+DRAMInterface::Rank::updatePowerStats()
+{
+ // All commands up to refresh have completed
+ // flush cmdList to DRAMPower
+ flushCmdList();
+
+ // Call the function that calculates window energy at intermediate update
+ // events like at refresh, stats dump as well as at simulation exit.
+ // Window starts at the last time the calcWindowEnergy function was called
+ // and is upto current time.
+ power.powerlib.calcWindowEnergy(divCeil(curTick(), dram.tCK) -
+ dram.timeStampOffset);
+
+ // Get the energy from DRAMPower
+ Data::MemoryPowerModel::Energy energy = power.powerlib.getEnergy();
+
+ // The energy components inside the power lib are calculated over
+ // the window so accumulate into the corresponding gem5 stat
+ stats.actEnergy += energy.act_energy * dram.devicesPerRank;
+ stats.preEnergy += energy.pre_energy * dram.devicesPerRank;
+ stats.readEnergy += energy.read_energy * dram.devicesPerRank;
+ stats.writeEnergy += energy.write_energy * dram.devicesPerRank;
+ stats.refreshEnergy += energy.ref_energy * dram.devicesPerRank;
+ stats.actBackEnergy += energy.act_stdby_energy * dram.devicesPerRank;
+ stats.preBackEnergy += energy.pre_stdby_energy * dram.devicesPerRank;
+ stats.actPowerDownEnergy += energy.f_act_pd_energy * dram.devicesPerRank;
+ stats.prePowerDownEnergy += energy.f_pre_pd_energy * dram.devicesPerRank;
+ stats.selfRefreshEnergy += energy.sref_energy * dram.devicesPerRank;
+
+ // Accumulate window energy into the total energy.
+ stats.totalEnergy += energy.window_energy * dram.devicesPerRank;
+ // Average power must not be accumulated but calculated over the time
+ // since last stats reset. SimClock::Frequency is tick period not tick
+ // frequency.
+ // energy (pJ) 1e-9
+ // power (mW) = ----------- * ----------
+ // time (tick) tick_frequency
+ stats.averagePower = (stats.totalEnergy.value() /
+ (curTick() - dram.lastStatsResetTick)) *
+ (SimClock::Frequency / 1000000000.0);
+}
+
+void
+DRAMInterface::Rank::computeStats()
+{
+ DPRINTF(DRAM,"Computing stats due to a dump callback\n");
+
+ // Update the stats
+ updatePowerStats();
+
+ // final update of power state times
+ stats.pwrStateTime[pwrState] += (curTick() - pwrStateTick);
+ pwrStateTick = curTick();
+}
+
+void
+DRAMInterface::Rank::resetStats() {
+ // The only way to clear the counters in DRAMPower is to call
+ // calcWindowEnergy function as that then calls clearCounters. The
+ // clearCounters method itself is private.
+ power.powerlib.calcWindowEnergy(divCeil(curTick(), dram.tCK) -
+ dram.timeStampOffset);
+
+}
+
+bool
+DRAMInterface::Rank::forceSelfRefreshExit() const {
+ return (readEntries != 0) ||
+ (dram.ctrl->inWriteBusState(true) && (writeEntries != 0));
+}
+
+void
+DRAMInterface::DRAMStats::resetStats()
+{
+ dram.lastStatsResetTick = curTick();
+}
+
+DRAMInterface::DRAMStats::DRAMStats(DRAMInterface &_dram)
+ : Stats::Group(&_dram),
+ dram(_dram),
+
+ ADD_STAT(readBursts, "Number of DRAM read bursts"),
+ ADD_STAT(writeBursts, "Number of DRAM write bursts"),
+
+ ADD_STAT(perBankRdBursts, "Per bank write bursts"),
+ ADD_STAT(perBankWrBursts, "Per bank write bursts"),
+
+ ADD_STAT(totQLat, "Total ticks spent queuing"),
+ ADD_STAT(totBusLat, "Total ticks spent in databus transfers"),
+ ADD_STAT(totMemAccLat,
+ "Total ticks spent from burst creation until serviced "
+ "by the DRAM"),
+
+ ADD_STAT(avgQLat, "Average queueing delay per DRAM burst"),
+ ADD_STAT(avgBusLat, "Average bus latency per DRAM burst"),
+ ADD_STAT(avgMemAccLat, "Average memory access latency per DRAM burst"),
+
+ ADD_STAT(readRowHits, "Number of row buffer hits during reads"),
+ ADD_STAT(writeRowHits, "Number of row buffer hits during writes"),
+ ADD_STAT(readRowHitRate, "Row buffer hit rate for reads"),
+ ADD_STAT(writeRowHitRate, "Row buffer hit rate for writes"),
+
+ ADD_STAT(bytesPerActivate, "Bytes accessed per row activation"),
+ ADD_STAT(bytesRead, "Total number of bytes read from DRAM"),
+ ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"),
+ ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiBytes/s"),
+ ADD_STAT(avgWrBW, "Average DRAM write bandwidth in MiBytes/s"),
+ ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"),
+
+ ADD_STAT(busUtil, "Data bus utilization in percentage"),
+ ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"),
+ ADD_STAT(busUtilWrite, "Data bus utilization in percentage for writes"),
+
+ ADD_STAT(pageHitRate, "Row buffer hit rate, read and write combined")
+
+{
+}
+
+void
+DRAMInterface::DRAMStats::regStats()
+{
+ using namespace Stats;
+
+ avgQLat.precision(2);
+ avgBusLat.precision(2);
+ avgMemAccLat.precision(2);
+
+ readRowHitRate.precision(2);
+ writeRowHitRate.precision(2);
+
+ perBankRdBursts.init(dram.banksPerRank * dram.ranksPerChannel);
+ perBankWrBursts.init(dram.banksPerRank * dram.ranksPerChannel);
+
+ bytesPerActivate
+ .init(dram.maxAccessesPerRow ?
+ dram.maxAccessesPerRow : dram.rowBufferSize)
+ .flags(nozero);
+
+ peakBW.precision(2);
+ busUtil.precision(2);
+ busUtilWrite.precision(2);
+ busUtilRead.precision(2);
+
+ pageHitRate.precision(2);
+
+ // Formula stats
+ avgQLat = totQLat / readBursts;
+ avgBusLat = totBusLat / readBursts;
+ avgMemAccLat = totMemAccLat / readBursts;
+
+ readRowHitRate = (readRowHits / readBursts) * 100;
+ writeRowHitRate = (writeRowHits / writeBursts) * 100;
+
+ avgRdBW = (bytesRead / 1000000) / simSeconds;
+ avgWrBW = (bytesWritten / 1000000) / simSeconds;
+ peakBW = (SimClock::Frequency / dram.burstDelay()) *
+ dram.bytesPerBurst() / 1000000;
+
+ busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
+ busUtilRead = avgRdBW / peakBW * 100;
+ busUtilWrite = avgWrBW / peakBW * 100;
+
+ pageHitRate = (writeRowHits + readRowHits) /
+ (writeBursts + readBursts) * 100;
+}
+
+DRAMInterface::RankStats::RankStats(DRAMInterface &_dram, Rank &_rank)
+ : Stats::Group(&_dram, csprintf("rank%d", _rank.rank).c_str()),
+ rank(_rank),
+
+ ADD_STAT(actEnergy, "Energy for activate commands per rank (pJ)"),
+ ADD_STAT(preEnergy, "Energy for precharge commands per rank (pJ)"),
+ ADD_STAT(readEnergy, "Energy for read commands per rank (pJ)"),
+ ADD_STAT(writeEnergy, "Energy for write commands per rank (pJ)"),
+ ADD_STAT(refreshEnergy, "Energy for refresh commands per rank (pJ)"),
+ ADD_STAT(actBackEnergy, "Energy for active background per rank (pJ)"),
+ ADD_STAT(preBackEnergy, "Energy for precharge background per rank (pJ)"),
+ ADD_STAT(actPowerDownEnergy,
+ "Energy for active power-down per rank (pJ)"),
+ ADD_STAT(prePowerDownEnergy,
+ "Energy for precharge power-down per rank (pJ)"),
+ ADD_STAT(selfRefreshEnergy, "Energy for self refresh per rank (pJ)"),
+
+ ADD_STAT(totalEnergy, "Total energy per rank (pJ)"),
+ ADD_STAT(averagePower, "Core power per rank (mW)"),
+
+ ADD_STAT(totalIdleTime, "Total Idle time Per DRAM Rank"),
+ ADD_STAT(pwrStateTime, "Time in different power states")
+{
+}
+
+void
+DRAMInterface::RankStats::regStats()
+{
+ Stats::Group::regStats();
+
+ pwrStateTime
+ .init(6)
+ .subname(0, "IDLE")
+ .subname(1, "REF")
+ .subname(2, "SREF")
+ .subname(3, "PRE_PDN")
+ .subname(4, "ACT")
+ .subname(5, "ACT_PDN");
+}
+
+void
+DRAMInterface::RankStats::resetStats()
+{
+ Stats::Group::resetStats();
+
+ rank.resetStats();
+}
+
+void
+DRAMInterface::RankStats::preDumpStats()
+{
+ Stats::Group::preDumpStats();
+
+ rank.computeStats();
+}
+
+NVMInterface::NVMInterface(const NVMInterfaceParams* _p)
+ : MemInterface(_p),
+ maxPendingWrites(_p->max_pending_writes),
+ maxPendingReads(_p->max_pending_reads),
+ twoCycleRdWr(_p->two_cycle_rdwr),
+ tREAD(_p->tREAD), tWRITE(_p->tWRITE), tSEND(_p->tSEND),
+ stats(*this),
+ writeRespondEvent([this]{ processWriteRespondEvent(); }, name()),
+ readReadyEvent([this]{ processReadReadyEvent(); }, name()),
+ nextReadAt(0), numPendingReads(0), numReadDataReady(0),
+ numReadsToIssue(0), numWritesQueued(0)
+{
+ DPRINTF(NVM, "Setting up NVM Interface\n");
+
+ fatal_if(!isPowerOf2(burstSize), "NVM burst size %d is not allowed, "
+ "must be a power of two\n", burstSize);
+
+ // sanity check the ranks since we rely on bit slicing for the
+ // address decoding
+ fatal_if(!isPowerOf2(ranksPerChannel), "NVM rank count of %d is "
+ "not allowed, must be a power of two\n", ranksPerChannel);
+
+ for (int i =0; i < ranksPerChannel; i++) {
+ // Add NVM ranks to the system
+ DPRINTF(NVM, "Creating NVM rank %d \n", i);
+ Rank* rank = new Rank(_p, i, *this);
+ ranks.push_back(rank);
+ }
+
+ uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
+
+ DPRINTF(NVM, "NVM capacity %lld (%lld) bytes\n", capacity,
+ AbstractMemory::size());
+
+ rowsPerBank = capacity / (rowBufferSize *
+ banksPerRank * ranksPerChannel);
+
+}
+
+NVMInterface*
+NVMInterfaceParams::create()
+{
+ return new NVMInterface(this);
+}
+
+NVMInterface::Rank::Rank(const NVMInterfaceParams* _p,
+ int _rank, NVMInterface& _nvm)
+ : EventManager(&_nvm), nvm(_nvm), rank(_rank),
+ banks(_p->banks_per_rank)
+{
+ for (int b = 0; b < _p->banks_per_rank; b++) {
+ banks[b].bank = b;
+ // No bank groups; simply assign to bank number
+ banks[b].bankgr = b;
+ }
+}
+
+void
+NVMInterface::init()
+{
+ AbstractMemory::init();
+}
+
+void NVMInterface::setupRank(const uint8_t rank, const bool is_read)
+{
+ if (is_read) {
+ // increment count to trigger read and track number of reads in Q
+ numReadsToIssue++;
+ } else {
+ // increment count to track number of writes in Q
+ numWritesQueued++;
+ }
+}
+
+pair<MemPacketQueue::iterator, Tick>
+NVMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const
+{
+ // remember if we found a hit, but one that cannit issue seamlessly
+ bool found_prepped_pkt = false;
+
+ auto selected_pkt_it = queue.end();
+ Tick selected_col_at = MaxTick;
+
+ for (auto i = queue.begin(); i != queue.end() ; ++i) {
+ MemPacket* pkt = *i;
+
+ // select optimal NVM packet in Q
+ if (!pkt->isDram()) {
+ const Bank& bank = ranks[pkt->rank]->banks[pkt->bank];
+ const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt :
+ bank.wrAllowedAt;
+
+ // check if rank is not doing a refresh and thus is available,
+ // if not, jump to the next packet
+ if (burstReady(pkt)) {
+ DPRINTF(NVM, "%s bank %d - Rank %d available\n", __func__,
+ pkt->bank, pkt->rank);
+
+ // no additional rank-to-rank or media delays
+ if (col_allowed_at <= min_col_at) {
+ // FCFS within entries that can issue without
+ // additional delay, such as same rank accesses
+ // or media delay requirements
+ selected_pkt_it = i;
+ selected_col_at = col_allowed_at;
+ // no need to look through the remaining queue entries
+ DPRINTF(NVM, "%s Seamless buffer hit\n", __func__);
+ break;
+ } else if (!found_prepped_pkt) {
+ // packet is to prepped region but cannnot issue
+ // seamlessly; remember this one and continue
+ selected_pkt_it = i;
+ selected_col_at = col_allowed_at;
+ DPRINTF(NVM, "%s Prepped packet found \n", __func__);
+ found_prepped_pkt = true;
+ }
+ } else {
+ DPRINTF(NVM, "%s bank %d - Rank %d not available\n", __func__,
+ pkt->bank, pkt->rank);
+ }
+ }
+ }
+
+ if (selected_pkt_it == queue.end()) {
+ DPRINTF(NVM, "%s no available NVM ranks found\n", __func__);
+ }
+
+ return make_pair(selected_pkt_it, selected_col_at);
+}
+
+void
+NVMInterface::chooseRead(MemPacketQueue& queue)
+{
+ Tick cmd_at = std::max(curTick(), nextReadAt);
+
+ // This method does the arbitration between non-deterministic read
+ // requests to NVM. The chosen packet is not removed from the queue
+ // at this time. Removal from the queue will occur when the data is
+ // ready and a separate SEND command is issued to retrieve it via the
+ // chooseNext function in the top-level controller.
+ assert(!queue.empty());
+
+ assert(numReadsToIssue > 0);
+ numReadsToIssue--;
+ // For simplicity, issue non-deterministic reads in order (fcfs)
+ for (auto i = queue.begin(); i != queue.end() ; ++i) {
+ MemPacket* pkt = *i;
+
+ // Find 1st NVM read packet that hasn't issued read command
+ if (pkt->readyTime == MaxTick && !pkt->isDram() && pkt->isRead()) {
+ // get the bank
+ Bank& bank_ref = ranks[pkt->rank]->banks[pkt->bank];
+
+ // issueing a read, inc counter and verify we haven't overrun
+ numPendingReads++;
+ assert(numPendingReads <= maxPendingReads);
+
+ // increment the bytes accessed and the accesses per row
+ bank_ref.bytesAccessed += burstSize;
+
+ // Verify command bandiwth to issue
+ // Host can issue read immediately uith buffering closer
+ // to the NVM. The actual execution at the NVM may be delayed
+ // due to busy resources
+ if (twoCycleRdWr) {
+ cmd_at = ctrl->verifyMultiCmd(cmd_at,
+ maxCommandsPerWindow, tCK);
+ } else {
+ cmd_at = ctrl->verifySingleCmd(cmd_at,
+ maxCommandsPerWindow);
+ }
+
+ // Update delay to next read
+ // Ensures single read command issued per cycle
+ nextReadAt = cmd_at + tCK;
+
+ // If accessing a new location in this bank, update timing
+ // and stats
+ if (bank_ref.openRow != pkt->row) {
+ // update the open bank, re-using row field
+ bank_ref.openRow = pkt->row;
+
+ // sample the bytes accessed to a buffer in this bank
+ // here when we are re-buffering the data
+ stats.bytesPerBank.sample(bank_ref.bytesAccessed);
+ // start counting anew
+ bank_ref.bytesAccessed = 0;
+
+ // holdoff next command to this bank until the read completes
+ // and the data has been successfully buffered
+ // can pipeline accesses to the same bank, sending them
+ // across the interface B2B, but will incur full access
+ // delay between data ready responses to different buffers
+ // in a bank
+ bank_ref.actAllowedAt = std::max(cmd_at,
+ bank_ref.actAllowedAt) + tREAD;
+ }
+ // update per packet readyTime to holdoff burst read operation
+ // overloading readyTime, which will be updated again when the
+ // burst is issued
+ pkt->readyTime = std::max(cmd_at, bank_ref.actAllowedAt);
+
+ DPRINTF(NVM, "Issuing NVM Read to bank %d at tick %d. "
+ "Data ready at %d\n",
+ bank_ref.bank, cmd_at, pkt->readyTime);
+
+ // Insert into read ready queue. It will be handled after
+ // the media delay has been met
+ if (readReadyQueue.empty()) {
+ assert(!readReadyEvent.scheduled());
+ schedule(readReadyEvent, pkt->readyTime);
+ } else if (readReadyEvent.when() > pkt->readyTime) {
+ // move it sooner in time, to the first read with data
+ reschedule(readReadyEvent, pkt->readyTime);
+ } else {
+ assert(readReadyEvent.scheduled());
+ }
+ readReadyQueue.push_back(pkt->readyTime);
+
+ // found an NVM read to issue - break out
+ break;
+ }
+ }
+}
+
+void
+NVMInterface::processReadReadyEvent()
+{
+ // signal that there is read data ready to be transmitted
+ numReadDataReady++;
+
+ DPRINTF(NVM,
+ "processReadReadyEvent(): Data for an NVM read is ready. "
+ "numReadDataReady is %d\t numPendingReads is %d\n",
+ numReadDataReady, numPendingReads);
+
+ // Find lowest ready time and verify it is equal to curTick
+ // also find the next lowest to schedule next event
+ // Done with this response, erase entry
+ auto ready_it = readReadyQueue.begin();
+ Tick next_ready_at = MaxTick;
+ for (auto i = readReadyQueue.begin(); i != readReadyQueue.end() ; ++i) {
+ if (*ready_it > *i) {
+ next_ready_at = *ready_it;
+ ready_it = i;
+ } else if ((next_ready_at > *i) && (i != ready_it)) {
+ next_ready_at = *i;
+ }
+ }
+
+ // Verify we found the time of this event and remove it
+ assert(*ready_it == curTick());
+ readReadyQueue.erase(ready_it);
+
+ if (!readReadyQueue.empty()) {
+ assert(readReadyQueue.front() >= curTick());
+ assert(!readReadyEvent.scheduled());
+ schedule(readReadyEvent, next_ready_at);
+ }
+
+ // It is possible that a new command kicks things back into
+ // action before reaching this point but need to ensure that we
+ // continue to process new commands as read data becomes ready
+ // This will also trigger a drain if needed
+ if (!ctrl->requestEventScheduled()) {
+ DPRINTF(NVM, "Restart controller scheduler immediately\n");
+ ctrl->restartScheduler(curTick());
+ }
+}
+
+
+bool
+NVMInterface::burstReady(MemPacket* pkt) const {
+ bool read_rdy = pkt->isRead() && (ctrl->inReadBusState(true)) &&
+ (pkt->readyTime <= curTick()) && (numReadDataReady > 0);
+ bool write_rdy = !pkt->isRead() && !ctrl->inReadBusState(true) &&
+ !writeRespQueueFull();
+ return (read_rdy || write_rdy);
+}
+
+pair<Tick, Tick>
+NVMInterface::doBurstAccess(MemPacket* pkt, Tick next_burst_at)
+{
+ DPRINTF(NVM, "NVM Timing access to addr %lld, rank/bank/row %d %d %d\n",
+ pkt->addr, pkt->rank, pkt->bank, pkt->row);
+
+ // get the bank
+ Bank& bank_ref = ranks[pkt->rank]->banks[pkt->bank];
+
+ // respect any constraints on the command
+ const Tick bst_allowed_at = pkt->isRead() ?
+ bank_ref.rdAllowedAt : bank_ref.wrAllowedAt;
+
+ // we need to wait until the bus is available before we can issue
+ // the command; need minimum of tBURST between commands
+ Tick cmd_at = std::max(bst_allowed_at, curTick());
+
+ // we need to wait until the bus is available before we can issue
+ // the command; need minimum of tBURST between commands
+ cmd_at = std::max(cmd_at, next_burst_at);
+
+ // Verify there is command bandwidth to issue
+ // Read burst (send command) is a simple data access and only requires
+ // one command cycle
+ // Write command may require multiple cycles to enable larger address space
+ if (pkt->isRead() || !twoCycleRdWr) {
+ cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow);
+ } else {
+ cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK);
+ }
+ // update the packet ready time to reflect when data will be transferred
+ // Use the same bus delays defined for NVM
+ pkt->readyTime = cmd_at + tSEND + tBURST;
+
+ Tick dly_to_rd_cmd;
+ Tick dly_to_wr_cmd;
+ for (auto n : ranks) {
+ for (int i = 0; i < banksPerRank; i++) {
+ // base delay is a function of tBURST and bus turnaround
+ dly_to_rd_cmd = pkt->isRead() ? tBURST : writeToReadDelay();
+ dly_to_wr_cmd = pkt->isRead() ? readToWriteDelay() : tBURST;
+
+ if (pkt->rank != n->rank) {
+ // adjust timing for different ranks
+ // Need to account for rank-to-rank switching with tCS
+ dly_to_wr_cmd = rankToRankDelay();
+ dly_to_rd_cmd = rankToRankDelay();
+ }
+ n->banks[i].rdAllowedAt = std::max(cmd_at + dly_to_rd_cmd,
+ n->banks[i].rdAllowedAt);
+
+ n->banks[i].wrAllowedAt = std::max(cmd_at + dly_to_wr_cmd,
+ n->banks[i].wrAllowedAt);
+ }
+ }
+
+ DPRINTF(NVM, "NVM Access to %lld, ready at %lld.\n",
+ pkt->addr, pkt->readyTime);
+
+ if (pkt->isRead()) {
+ // completed the read, decrement counters
+ assert(numPendingReads != 0);
+ assert(numReadDataReady != 0);
+
+ numPendingReads--;
+ numReadDataReady--;
+ } else {
+ // Adjust number of NVM writes in Q
+ assert(numWritesQueued > 0);
+ numWritesQueued--;
+
+ // increment the bytes accessed and the accesses per row
+ // only increment for writes as the reads are handled when
+ // the non-deterministic read is issued, before the data transfer
+ bank_ref.bytesAccessed += burstSize;
+
+ // Commands will be issued serially when accessing the same bank
+ // Commands can issue in parallel to different banks
+ if ((bank_ref.bank == pkt->bank) &&
+ (bank_ref.openRow != pkt->row)) {
+ // update the open buffer, re-using row field
+ bank_ref.openRow = pkt->row;
+
+ // sample the bytes accessed to a buffer in this bank
+ // here when we are re-buffering the data
+ stats.bytesPerBank.sample(bank_ref.bytesAccessed);
+ // start counting anew
+ bank_ref.bytesAccessed = 0;
+ }
+
+ // Determine when write will actually complete, assuming it is
+ // scheduled to push to NVM immediately
+ // update actAllowedAt to serialize next command completion that
+ // accesses this bank; must wait until this write completes
+ // Data accesses to the same buffer in this bank
+ // can issue immediately after actAllowedAt expires, without
+ // waiting additional delay of tWRITE. Can revisit this
+ // assumption/simplification in the future.
+ bank_ref.actAllowedAt = std::max(pkt->readyTime,
+ bank_ref.actAllowedAt) + tWRITE;
+
+ // Need to track number of outstanding writes to
+ // ensure 'buffer' on media controller does not overflow
+ assert(!writeRespQueueFull());
+
+ // Insert into write done queue. It will be handled after
+ // the media delay has been met
+ if (writeRespQueueEmpty()) {
+ assert(!writeRespondEvent.scheduled());
+ schedule(writeRespondEvent, bank_ref.actAllowedAt);
+ } else {
+ assert(writeRespondEvent.scheduled());
+ }
+ writeRespQueue.push_back(bank_ref.actAllowedAt);
+ writeRespQueue.sort();
+ if (writeRespondEvent.when() > bank_ref.actAllowedAt) {
+ DPRINTF(NVM, "Rescheduled respond event from %lld to %11d\n",
+ writeRespondEvent.when(), bank_ref.actAllowedAt);
+ DPRINTF(NVM, "Front of response queue is %11d\n",
+ writeRespQueue.front());
+ reschedule(writeRespondEvent, bank_ref.actAllowedAt);
+ }
+
+ }
+
+ // Update the stats
+ if (pkt->isRead()) {
+ stats.readBursts++;
+ stats.bytesRead += burstSize;
+ stats.perBankRdBursts[pkt->bankId]++;
+ stats.pendingReads.sample(numPendingReads);
+
+ // Update latency stats
+ stats.totMemAccLat += pkt->readyTime - pkt->entryTime;
+ stats.totBusLat += tBURST;
+ stats.totQLat += cmd_at - pkt->entryTime;
+ } else {
+ stats.writeBursts++;
+ stats.bytesWritten += burstSize;
+ stats.perBankWrBursts[pkt->bankId]++;
+ }
+
+ return make_pair(cmd_at, cmd_at + tBURST);
+}
+
+void
+NVMInterface::processWriteRespondEvent()
+{
+ DPRINTF(NVM,
+ "processWriteRespondEvent(): A NVM write reached its readyTime. "
+ "%d remaining pending NVM writes\n", writeRespQueue.size());
+
+ // Update stat to track histogram of pending writes
+ stats.pendingWrites.sample(writeRespQueue.size());
+
+ // Done with this response, pop entry
+ writeRespQueue.pop_front();
+
+ if (!writeRespQueue.empty()) {
+ assert(writeRespQueue.front() >= curTick());
+ assert(!writeRespondEvent.scheduled());
+ schedule(writeRespondEvent, writeRespQueue.front());
+ }
+
+ // It is possible that a new command kicks things back into
+ // action before reaching this point but need to ensure that we
+ // continue to process new commands as writes complete at the media and
+ // credits become available. This will also trigger a drain if needed
+ if (!ctrl->requestEventScheduled()) {
+ DPRINTF(NVM, "Restart controller scheduler immediately\n");
+ ctrl->restartScheduler(curTick());
+ }
+}
+
+void
+NVMInterface::addRankToRankDelay(Tick cmd_at)
+{
+ // update timing for NVM ranks due to bursts issued
+ // to ranks for other media interfaces
+ for (auto n : ranks) {
+ for (int i = 0; i < banksPerRank; i++) {
+ // different rank by default
+ // Need to only account for rank-to-rank switching
+ n->banks[i].rdAllowedAt = std::max(cmd_at + rankToRankDelay(),
+ n->banks[i].rdAllowedAt);
+ n->banks[i].wrAllowedAt = std::max(cmd_at + rankToRankDelay(),
+ n->banks[i].wrAllowedAt);
+ }
+ }
+}
+
+bool
+NVMInterface::isBusy(bool read_queue_empty, bool all_writes_nvm)
+{
+ DPRINTF(NVM,"isBusy: numReadDataReady = %d\n", numReadDataReady);
+ // Determine NVM is busy and cannot issue a burst
+ // A read burst cannot issue when data is not ready from the NVM
+ // Also check that we have reads queued to ensure we can change
+ // bus direction to service potential write commands.
+ // A write cannot issue once we've reached MAX pending writes
+ // Only assert busy for the write case when there are also
+ // no reads in Q and the write queue only contains NVM commands
+ // This allows the bus state to switch and service reads
+ return (ctrl->inReadBusState(true) ?
+ (numReadDataReady == 0) && !read_queue_empty :
+ writeRespQueueFull() && read_queue_empty &&
+ all_writes_nvm);
+}
+
+
+NVMInterface::NVMStats::NVMStats(NVMInterface &_nvm)
+ : Stats::Group(&_nvm),
+ nvm(_nvm),
+
+ ADD_STAT(readBursts, "Number of NVM read bursts"),
+ ADD_STAT(writeBursts, "Number of NVM write bursts"),
+
+ ADD_STAT(perBankRdBursts, "Per bank write bursts"),
+ ADD_STAT(perBankWrBursts, "Per bank write bursts"),
+
+ ADD_STAT(totQLat, "Total ticks spent queuing"),
+ ADD_STAT(totBusLat, "Total ticks spent in databus transfers"),
+ ADD_STAT(totMemAccLat,
+ "Total ticks spent from burst creation until serviced "
+ "by the NVM"),
+ ADD_STAT(avgQLat, "Average queueing delay per NVM burst"),
+ ADD_STAT(avgBusLat, "Average bus latency per NVM burst"),
+ ADD_STAT(avgMemAccLat, "Average memory access latency per NVM burst"),
+
+ ADD_STAT(bytesRead, "Total number of bytes read from DRAM"),
+ ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"),
+ ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiBytes/s"),
+ ADD_STAT(avgWrBW, "Average DRAM write bandwidth in MiBytes/s"),
+ ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"),
+ ADD_STAT(busUtil, "NVM Data bus utilization in percentage"),
+ ADD_STAT(busUtilRead, "NVM Data bus read utilization in percentage"),
+ ADD_STAT(busUtilWrite, "NVM Data bus write utilization in percentage"),
+
+ ADD_STAT(pendingReads, "Reads issued to NVM for which data has not been "
+ "transferred"),
+ ADD_STAT(pendingWrites, "Number of outstanding writes to NVM"),
+ ADD_STAT(bytesPerBank, "Bytes read within a bank before loading "
+ "new bank")
+{
+}
+
+void
+NVMInterface::NVMStats::regStats()
+{
+ using namespace Stats;
+
+ perBankRdBursts.init(nvm.ranksPerChannel == 0 ? 1 :
+ nvm.banksPerRank * nvm.ranksPerChannel);
+
+ perBankWrBursts.init(nvm.ranksPerChannel == 0 ? 1 :
+ nvm.banksPerRank * nvm.ranksPerChannel);
+
+ avgQLat.precision(2);
+ avgBusLat.precision(2);
+ avgMemAccLat.precision(2);
+
+ avgRdBW.precision(2);
+ avgWrBW.precision(2);
+ peakBW.precision(2);
+
+ busUtil.precision(2);
+ busUtilRead.precision(2);
+ busUtilWrite.precision(2);
+
+ pendingReads
+ .init(nvm.maxPendingReads)
+ .flags(nozero);
+
+ pendingWrites
+ .init(nvm.maxPendingWrites)
+ .flags(nozero);
+
+ bytesPerBank
+ .init(nvm.rowBufferSize)
+ .flags(nozero);
+
+ avgQLat = totQLat / readBursts;
+ avgBusLat = totBusLat / readBursts;
+ avgMemAccLat = totMemAccLat / readBursts;
+
+ avgRdBW = (bytesRead / 1000000) / simSeconds;
+ avgWrBW = (bytesWritten / 1000000) / simSeconds;
+ peakBW = (SimClock::Frequency / nvm.tBURST) *
+ nvm.burstSize / 1000000;
+
+ busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
+ busUtilRead = avgRdBW / peakBW * 100;
+ busUtilWrite = avgWrBW / peakBW * 100;
+}
--- /dev/null
+/*
+ * Copyright (c) 2012-2020 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder. You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2013 Amin Farmahini-Farahani
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * MemInterface declaration
+ */
+
+#ifndef __MEM_INTERFACE_HH__
+#define __MEM_INTERFACE_HH__
+
+#include <deque>
+#include <string>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "base/statistics.hh"
+#include "enums/AddrMap.hh"
+#include "enums/PageManage.hh"
+#include "mem/abstract_mem.hh"
+#include "mem/drampower.hh"
+#include "mem/mem_ctrl.hh"
+#include "params/DRAMInterface.hh"
+#include "params/MemInterface.hh"
+#include "params/NVMInterface.hh"
+#include "sim/eventq.hh"
+
+/**
+ * General interface to memory device
+ * Includes functions and parameters shared across media types
+ */
+class MemInterface : public AbstractMemory
+{
+ protected:
+ /**
+ * A basic class to track the bank state, i.e. what row is
+ * currently open (if any), when is the bank free to accept a new
+ * column (read/write) command, when can it be precharged, and
+ * when can it be activated.
+ *
+ * The bank also keeps track of how many bytes have been accessed
+ * in the open row since it was opened.
+ */
+ class Bank
+ {
+
+ public:
+
+ static const uint32_t NO_ROW = -1;
+
+ uint32_t openRow;
+ uint8_t bank;
+ uint8_t bankgr;
+
+ Tick rdAllowedAt;
+ Tick wrAllowedAt;
+ Tick preAllowedAt;
+ Tick actAllowedAt;
+
+ uint32_t rowAccesses;
+ uint32_t bytesAccessed;
+
+ Bank() :
+ openRow(NO_ROW), bank(0), bankgr(0),
+ rdAllowedAt(0), wrAllowedAt(0), preAllowedAt(0), actAllowedAt(0),
+ rowAccesses(0), bytesAccessed(0)
+ { }
+ };
+
+ /**
+ * A pointer to the parent MemCtrl instance
+ */
+ MemCtrl* ctrl;
+
+ /**
+ * Number of commands that can issue in the defined controller
+ * command window, used to verify command bandwidth
+ */
+ unsigned int maxCommandsPerWindow;
+
+ /**
+ * Memory controller configuration initialized based on parameter
+ * values.
+ */
+ Enums::AddrMap addrMapping;
+
+ /**
+ * General device and channel characteristics
+ * The rowsPerBank is determined based on the capacity, number of
+ * ranks and banks, the burst size, and the row buffer size.
+ */
+ const uint32_t burstSize;
+ const uint32_t deviceSize;
+ const uint32_t deviceRowBufferSize;
+ const uint32_t devicesPerRank;
+ const uint32_t rowBufferSize;
+ const uint32_t burstsPerRowBuffer;
+ const uint32_t burstsPerStripe;
+ const uint32_t ranksPerChannel;
+ const uint32_t banksPerRank;
+ uint32_t rowsPerBank;
+
+ /**
+ * General timing requirements
+ */
+ const Tick M5_CLASS_VAR_USED tCK;
+ const Tick tCS;
+ const Tick tBURST;
+ const Tick tRTW;
+ const Tick tWTR;
+
+ /*
+ * @return delay between write and read commands
+ */
+ virtual Tick writeToReadDelay() const { return tBURST + tWTR; }
+
+ /*
+ * @return delay between write and read commands
+ */
+ Tick readToWriteDelay() const { return tBURST + tRTW; }
+
+ /*
+ * @return delay between accesses to different ranks
+ */
+ Tick rankToRankDelay() const { return tBURST + tCS; }
+
+
+ public:
+
+ /**
+ * Buffer sizes for read and write queues in the controller
+ * These are passed to the controller on instantiation
+ * Defining them here allows for buffers to be resized based
+ * on memory type / configuration.
+ */
+ const uint32_t readBufferSize;
+ const uint32_t writeBufferSize;
+
+ /** Set a pointer to the controller and initialize
+ * interface based on controller parameters
+ * @param _ctrl pointer to the parent controller
+ * @param command_window size of command window used to
+ * check command bandwidth
+ */
+ void setCtrl(MemCtrl* _ctrl, unsigned int command_window);
+
+ /**
+ * Get an address in a dense range which starts from 0. The input
+ * address is the physical address of the request in an address
+ * space that contains other SimObjects apart from this
+ * controller.
+ *
+ * @param addr The intput address which should be in the addrRange
+ * @return An address in the continues range [0, max)
+ */
+ Addr getCtrlAddr(Addr addr) { return range.getOffset(addr); }
+
+ /**
+ * Setup the rank based on packet received
+ *
+ * @param integer value of rank to be setup. used to index ranks vector
+ * @param are we setting up rank for read or write packet?
+ */
+ virtual void setupRank(const uint8_t rank, const bool is_read) = 0;
+
+ /**
+ * Check drain state of interface
+ *
+ * @return true if all ranks are drained and idle
+ *
+ */
+ virtual bool allRanksDrained() const = 0;
+
+ /**
+ * For FR-FCFS policy, find first command that can issue
+ * Function will be overriden by interface to select based
+ * on media characteristics, used to determine when read
+ * or write can issue.
+ *
+ * @param queue Queued requests to consider
+ * @param min_col_at Minimum tick for 'seamless' issue
+ * @return an iterator to the selected packet, else queue.end()
+ * @return the tick when the packet selected will issue
+ */
+ virtual std::pair<MemPacketQueue::iterator, Tick>
+ chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const = 0;
+
+ /*
+ * Function to calulate unloaded latency
+ */
+ virtual Tick accessLatency() const = 0;
+
+ /**
+ * @return number of bytes in a burst for this interface
+ */
+ uint32_t bytesPerBurst() const { return burstSize; }
+
+ /*
+ * @return time to offset next command
+ */
+ virtual Tick commandOffset() const = 0;
+
+ /**
+ * Check if a burst operation can be issued to the interface
+ *
+ * @param Return true if RD/WR can issue
+ */
+ virtual bool burstReady(MemPacket* pkt) const = 0;
+
+ /**
+ * Determine the required delay for an access to a different rank
+ *
+ * @return required rank to rank delay
+ */
+ Tick rankDelay() const { return tCS; }
+
+ /**
+ *
+ * @return minimum additional bus turnaround required for read-to-write
+ */
+ Tick minReadToWriteDataGap() const { return std::min(tRTW, tCS); }
+
+ /**
+ *
+ * @return minimum additional bus turnaround required for write-to-read
+ */
+ Tick minWriteToReadDataGap() const { return std::min(tWTR, tCS); }
+
+ /**
+ * Address decoder to figure out physical mapping onto ranks,
+ * banks, and rows. This function is called multiple times on the same
+ * system packet if the pakcet is larger than burst of the memory. The
+ * pkt_addr is used for the offset within the packet.
+ *
+ * @param pkt The packet from the outside world
+ * @param pkt_addr The starting address of the packet
+ * @param size The size of the packet in bytes
+ * @param is_read Is the request for a read or a write to memory
+ * @param is_dram Is the request to a DRAM interface
+ * @return A MemPacket pointer with the decoded information
+ */
+ MemPacket* decodePacket(const PacketPtr pkt, Addr pkt_addr,
+ unsigned int size, bool is_read, bool is_dram);
+
+ /**
+ * Add rank to rank delay to bus timing to all banks in all ranks
+ * when access to an alternate interface is issued
+ *
+ * param cmd_at Time of current command used as starting point for
+ * addition of rank-to-rank delay
+ */
+ virtual void addRankToRankDelay(Tick cmd_at) = 0;
+
+ typedef MemInterfaceParams Params;
+ MemInterface(const Params* _p);
+};
+
+/**
+ * Interface to DRAM devices with media specific parameters,
+ * statistics, and functions.
+ * The DRAMInterface includes a class for individual ranks
+ * and per rank functions.
+ */
+class DRAMInterface : public MemInterface
+{
+ private:
+ /**
+ * Simple structure to hold the values needed to keep track of
+ * commands for DRAMPower
+ */
+ struct Command
+ {
+ Data::MemCommand::cmds type;
+ uint8_t bank;
+ Tick timeStamp;
+
+ constexpr Command(Data::MemCommand::cmds _type, uint8_t _bank,
+ Tick time_stamp)
+ : type(_type), bank(_bank), timeStamp(time_stamp)
+ { }
+ };
+
+ /**
+ * The power state captures the different operational states of
+ * the DRAM and interacts with the bus read/write state machine,
+ * and the refresh state machine.
+ *
+ * PWR_IDLE : The idle state in which all banks are closed
+ * From here can transition to: PWR_REF, PWR_ACT,
+ * PWR_PRE_PDN
+ *
+ * PWR_REF : Auto-refresh state. Will transition when refresh is
+ * complete based on power state prior to PWR_REF
+ * From here can transition to: PWR_IDLE, PWR_PRE_PDN,
+ * PWR_SREF
+ *
+ * PWR_SREF : Self-refresh state. Entered after refresh if
+ * previous state was PWR_PRE_PDN
+ * From here can transition to: PWR_IDLE
+ *
+ * PWR_PRE_PDN : Precharge power down state
+ * From here can transition to: PWR_REF, PWR_IDLE
+ *
+ * PWR_ACT : Activate state in which one or more banks are open
+ * From here can transition to: PWR_IDLE, PWR_ACT_PDN
+ *
+ * PWR_ACT_PDN : Activate power down state
+ * From here can transition to: PWR_ACT
+ */
+ enum PowerState
+ {
+ PWR_IDLE = 0,
+ PWR_REF,
+ PWR_SREF,
+ PWR_PRE_PDN,
+ PWR_ACT,
+ PWR_ACT_PDN
+ };
+
+ /**
+ * The refresh state is used to control the progress of the
+ * refresh scheduling. When normal operation is in progress the
+ * refresh state is idle. Once tREFI has elasped, a refresh event
+ * is triggered to start the following STM transitions which are
+ * used to issue a refresh and return back to normal operation
+ *
+ * REF_IDLE : IDLE state used during normal operation
+ * From here can transition to: REF_DRAIN
+ *
+ * REF_SREF_EXIT : Exiting a self-refresh; refresh event scheduled
+ * after self-refresh exit completes
+ * From here can transition to: REF_DRAIN
+ *
+ * REF_DRAIN : Drain state in which on going accesses complete.
+ * From here can transition to: REF_PD_EXIT
+ *
+ * REF_PD_EXIT : Evaluate pwrState and issue wakeup if needed
+ * Next state dependent on whether banks are open
+ * From here can transition to: REF_PRE, REF_START
+ *
+ * REF_PRE : Close (precharge) all open banks
+ * From here can transition to: REF_START
+ *
+ * REF_START : Issue refresh command and update DRAMPower stats
+ * From here can transition to: REF_RUN
+ *
+ * REF_RUN : Refresh running, waiting for tRFC to expire
+ * From here can transition to: REF_IDLE, REF_SREF_EXIT
+ */
+ enum RefreshState
+ {
+ REF_IDLE = 0,
+ REF_DRAIN,
+ REF_PD_EXIT,
+ REF_SREF_EXIT,
+ REF_PRE,
+ REF_START,
+ REF_RUN
+ };
+
+ class Rank;
+ struct RankStats : public Stats::Group
+ {
+ RankStats(DRAMInterface &dram, Rank &rank);
+
+ void regStats() override;
+ void resetStats() override;
+ void preDumpStats() override;
+
+ Rank &rank;
+
+ /*
+ * Command energies
+ */
+ Stats::Scalar actEnergy;
+ Stats::Scalar preEnergy;
+ Stats::Scalar readEnergy;
+ Stats::Scalar writeEnergy;
+ Stats::Scalar refreshEnergy;
+
+ /*
+ * Active Background Energy
+ */
+ Stats::Scalar actBackEnergy;
+
+ /*
+ * Precharge Background Energy
+ */
+ Stats::Scalar preBackEnergy;
+
+ /*
+ * Active Power-Down Energy
+ */
+ Stats::Scalar actPowerDownEnergy;
+
+ /*
+ * Precharge Power-Down Energy
+ */
+ Stats::Scalar prePowerDownEnergy;
+
+ /*
+ * self Refresh Energy
+ */
+ Stats::Scalar selfRefreshEnergy;
+
+ Stats::Scalar totalEnergy;
+ Stats::Scalar averagePower;
+
+ /**
+ * Stat to track total DRAM idle time
+ *
+ */
+ Stats::Scalar totalIdleTime;
+
+ /**
+ * Track time spent in each power state.
+ */
+ Stats::Vector pwrStateTime;
+ };
+
+ /**
+ * Rank class includes a vector of banks. Refresh and Power state
+ * machines are defined per rank. Events required to change the
+ * state of the refresh and power state machine are scheduled per
+ * rank. This class allows the implementation of rank-wise refresh
+ * and rank-wise power-down.
+ */
+ class Rank : public EventManager
+ {
+ private:
+
+ /**
+ * A reference to the parent DRAMInterface instance
+ */
+ DRAMInterface& dram;
+
+ /**
+ * Since we are taking decisions out of order, we need to keep
+ * track of what power transition is happening at what time
+ */
+ PowerState pwrStateTrans;
+
+ /**
+ * Previous low-power state, which will be re-entered after refresh.
+ */
+ PowerState pwrStatePostRefresh;
+
+ /**
+ * Track when we transitioned to the current power state
+ */
+ Tick pwrStateTick;
+
+ /**
+ * Keep track of when a refresh is due.
+ */
+ Tick refreshDueAt;
+
+ /**
+ * Function to update Power Stats
+ */
+ void updatePowerStats();
+
+ /**
+ * Schedule a power state transition in the future, and
+ * potentially override an already scheduled transition.
+ *
+ * @param pwr_state Power state to transition to
+ * @param tick Tick when transition should take place
+ */
+ void schedulePowerEvent(PowerState pwr_state, Tick tick);
+
+ public:
+
+ /**
+ * Current power state.
+ */
+ PowerState pwrState;
+
+ /**
+ * current refresh state
+ */
+ RefreshState refreshState;
+
+ /**
+ * rank is in or transitioning to power-down or self-refresh
+ */
+ bool inLowPowerState;
+
+ /**
+ * Current Rank index
+ */
+ uint8_t rank;
+
+ /**
+ * Track number of packets in read queue going to this rank
+ */
+ uint32_t readEntries;
+
+ /**
+ * Track number of packets in write queue going to this rank
+ */
+ uint32_t writeEntries;
+
+ /**
+ * Number of ACT, RD, and WR events currently scheduled
+ * Incremented when a refresh event is started as well
+ * Used to determine when a low-power state can be entered
+ */
+ uint8_t outstandingEvents;
+
+ /**
+ * delay low-power exit until this requirement is met
+ */
+ Tick wakeUpAllowedAt;
+
+ /**
+ * One DRAMPower instance per rank
+ */
+ DRAMPower power;
+
+ /**
+ * List of commands issued, to be sent to DRAMPpower at refresh
+ * and stats dump. Keep commands here since commands to different
+ * banks are added out of order. Will only pass commands up to
+ * curTick() to DRAMPower after sorting.
+ */
+ std::vector<Command> cmdList;
+
+ /**
+ * Vector of Banks. Each rank is made of several devices which in
+ * term are made from several banks.
+ */
+ std::vector<Bank> banks;
+
+ /**
+ * To track number of banks which are currently active for
+ * this rank.
+ */
+ unsigned int numBanksActive;
+
+ /** List to keep track of activate ticks */
+ std::deque<Tick> actTicks;
+
+ /**
+ * Track when we issued the last read/write burst
+ */
+ Tick lastBurstTick;
+
+ Rank(const DRAMInterfaceParams* _p, int _rank,
+ DRAMInterface& _dram);
+
+ const std::string name() const { return csprintf("%d", rank); }
+
+ /**
+ * Kick off accounting for power and refresh states and
+ * schedule initial refresh.
+ *
+ * @param ref_tick Tick for first refresh
+ */
+ void startup(Tick ref_tick);
+
+ /**
+ * Stop the refresh events.
+ */
+ void suspend();
+
+ /**
+ * Check if there is no refresh and no preparation of refresh ongoing
+ * i.e. the refresh state machine is in idle
+ *
+ * @param Return true if the rank is idle from a refresh point of view
+ */
+ bool inRefIdleState() const { return refreshState == REF_IDLE; }
+
+ /**
+ * Check if the current rank has all banks closed and is not
+ * in a low power state
+ *
+ * @param Return true if the rank is idle from a bank
+ * and power point of view
+ */
+ bool inPwrIdleState() const { return pwrState == PWR_IDLE; }
+
+ /**
+ * Trigger a self-refresh exit if there are entries enqueued
+ * Exit if there are any read entries regardless of the bus state.
+ * If we are currently issuing write commands, exit if we have any
+ * write commands enqueued as well.
+ * Could expand this in the future to analyze state of entire queue
+ * if needed.
+ *
+ * @return boolean indicating self-refresh exit should be scheduled
+ */
+ bool forceSelfRefreshExit() const;
+
+ /**
+ * Check if the command queue of current rank is idle
+ *
+ * @param Return true if the there are no commands in Q.
+ * Bus direction determines queue checked.
+ */
+ bool isQueueEmpty() const;
+
+ /**
+ * Let the rank check if it was waiting for requests to drain
+ * to allow it to transition states.
+ */
+ void checkDrainDone();
+
+ /**
+ * Push command out of cmdList queue that are scheduled at
+ * or before curTick() to DRAMPower library
+ * All commands before curTick are guaranteed to be complete
+ * and can safely be flushed.
+ */
+ void flushCmdList();
+
+ /**
+ * Computes stats just prior to dump event
+ */
+ void computeStats();
+
+ /**
+ * Reset stats on a stats event
+ */
+ void resetStats();
+
+ /**
+ * Schedule a transition to power-down (sleep)
+ *
+ * @param pwr_state Power state to transition to
+ * @param tick Absolute tick when transition should take place
+ */
+ void powerDownSleep(PowerState pwr_state, Tick tick);
+
+ /**
+ * schedule and event to wake-up from power-down or self-refresh
+ * and update bank timing parameters
+ *
+ * @param exit_delay Relative tick defining the delay required between
+ * low-power exit and the next command
+ */
+ void scheduleWakeUpEvent(Tick exit_delay);
+
+ void processWriteDoneEvent();
+ EventFunctionWrapper writeDoneEvent;
+
+ void processActivateEvent();
+ EventFunctionWrapper activateEvent;
+
+ void processPrechargeEvent();
+ EventFunctionWrapper prechargeEvent;
+
+ void processRefreshEvent();
+ EventFunctionWrapper refreshEvent;
+
+ void processPowerEvent();
+ EventFunctionWrapper powerEvent;
+
+ void processWakeUpEvent();
+ EventFunctionWrapper wakeUpEvent;
+
+ protected:
+ RankStats stats;
+ };
+
+ /**
+ * Function for sorting Command structures based on timeStamp
+ *
+ * @param a Memory Command
+ * @param next Memory Command
+ * @return true if timeStamp of Command 1 < timeStamp of Command 2
+ */
+ static bool
+ sortTime(const Command& cmd, const Command& cmd_next)
+ {
+ return cmd.timeStamp < cmd_next.timeStamp;
+ }
+
+ /**
+ * DRAM specific device characteristics
+ */
+ const uint32_t bankGroupsPerRank;
+ const bool bankGroupArch;
+
+ /**
+ * DRAM specific timing requirements
+ */
+ const Tick tCL;
+ const Tick tBURST_MIN;
+ const Tick tBURST_MAX;
+ const Tick tCCD_L_WR;
+ const Tick tCCD_L;
+ const Tick tRCD;
+ const Tick tRP;
+ const Tick tRAS;
+ const Tick tWR;
+ const Tick tRTP;
+ const Tick tRFC;
+ const Tick tREFI;
+ const Tick tRRD;
+ const Tick tRRD_L;
+ const Tick tPPD;
+ const Tick tAAD;
+ const Tick tXAW;
+ const Tick tXP;
+ const Tick tXS;
+ const Tick clkResyncDelay;
+ const bool dataClockSync;
+ const bool burstInterleave;
+ const uint8_t twoCycleActivate;
+ const uint32_t activationLimit;
+ const Tick wrToRdDlySameBG;
+ const Tick rdToWrDlySameBG;
+
+ Enums::PageManage pageMgmt;
+ /**
+ * Max column accesses (read and write) per row, before forefully
+ * closing it.
+ */
+ const uint32_t maxAccessesPerRow;
+
+ // timestamp offset
+ uint64_t timeStampOffset;
+
+ // Holds the value of the DRAM rank of burst issued
+ uint8_t activeRank;
+
+ /** Enable or disable DRAM powerdown states. */
+ bool enableDRAMPowerdown;
+
+ /** The time when stats were last reset used to calculate average power */
+ Tick lastStatsResetTick;
+
+ /**
+ * Keep track of when row activations happen, in order to enforce
+ * the maximum number of activations in the activation window. The
+ * method updates the time that the banks become available based
+ * on the current limits.
+ *
+ * @param rank_ref Reference to the rank
+ * @param bank_ref Reference to the bank
+ * @param act_tick Time when the activation takes place
+ * @param row Index of the row
+ */
+ void activateBank(Rank& rank_ref, Bank& bank_ref, Tick act_tick,
+ uint32_t row);
+
+ /**
+ * Precharge a given bank and also update when the precharge is
+ * done. This will also deal with any stats related to the
+ * accesses to the open page.
+ *
+ * @param rank_ref The rank to precharge
+ * @param bank_ref The bank to precharge
+ * @param pre_tick Time when the precharge takes place
+ * @param auto_or_preall Is this an auto-precharge or precharge all command
+ * @param trace Is this an auto precharge then do not add to trace
+ */
+ void prechargeBank(Rank& rank_ref, Bank& bank_ref,
+ Tick pre_tick, bool auto_or_preall = false,
+ bool trace = true);
+
+ struct DRAMStats : public Stats::Group
+ {
+ DRAMStats(DRAMInterface &dram);
+
+ void regStats() override;
+ void resetStats() override;
+
+ DRAMInterface &dram;
+
+ /** total number of DRAM bursts serviced */
+ Stats::Scalar readBursts;
+ Stats::Scalar writeBursts;
+
+ /** DRAM per bank stats */
+ Stats::Vector perBankRdBursts;
+ Stats::Vector perBankWrBursts;
+
+ // Latencies summed over all requests
+ Stats::Scalar totQLat;
+ Stats::Scalar totBusLat;
+ Stats::Scalar totMemAccLat;
+
+ // Average latencies per request
+ Stats::Formula avgQLat;
+ Stats::Formula avgBusLat;
+ Stats::Formula avgMemAccLat;
+
+ // Row hit count and rate
+ Stats::Scalar readRowHits;
+ Stats::Scalar writeRowHits;
+ Stats::Formula readRowHitRate;
+ Stats::Formula writeRowHitRate;
+ Stats::Histogram bytesPerActivate;
+ // Number of bytes transferred to/from DRAM
+ Stats::Scalar bytesRead;
+ Stats::Scalar bytesWritten;
+
+ // Average bandwidth
+ Stats::Formula avgRdBW;
+ Stats::Formula avgWrBW;
+ Stats::Formula peakBW;
+ // bus utilization
+ Stats::Formula busUtil;
+ Stats::Formula busUtilRead;
+ Stats::Formula busUtilWrite;
+ Stats::Formula pageHitRate;
+ };
+
+ DRAMStats stats;
+
+ /**
+ * Vector of dram ranks
+ */
+ std::vector<Rank*> ranks;
+
+ /*
+ * @return delay between write and read commands
+ */
+ Tick writeToReadDelay() const override { return tBURST + tWTR + tCL; }
+
+ /**
+ * Find which are the earliest banks ready to issue an activate
+ * for the enqueued requests. Assumes maximum of 32 banks per rank
+ * Also checks if the bank is already prepped.
+ *
+ * @param queue Queued requests to consider
+ * @param min_col_at time of seamless burst command
+ * @return One-hot encoded mask of bank indices
+ * @return boolean indicating burst can issue seamlessly, with no gaps
+ */
+ std::pair<std::vector<uint32_t>, bool>
+ minBankPrep(const MemPacketQueue& queue, Tick min_col_at) const;
+
+ /*
+ * @return time to send a burst of data without gaps
+ */
+ Tick
+ burstDelay() const
+ {
+ return (burstInterleave ? tBURST_MAX / 2 : tBURST);
+ }
+
+ public:
+ /**
+ * Initialize the DRAM interface and verify parameters
+ */
+ void init() override;
+
+ /**
+ * Iterate through dram ranks and instantiate per rank startup routine
+ */
+ void startup() override;
+
+ /**
+ * Setup the rank based on packet received
+ *
+ * @param integer value of rank to be setup. used to index ranks vector
+ * @param are we setting up rank for read or write packet?
+ */
+ void setupRank(const uint8_t rank, const bool is_read) override;
+
+ /**
+ * Iterate through dram ranks to exit self-refresh in order to drain
+ */
+ void drainRanks();
+
+ /**
+ * Return true once refresh is complete for all ranks and there are no
+ * additional commands enqueued. (only evaluated when draining)
+ * This will ensure that all banks are closed, power state is IDLE, and
+ * power stats have been updated
+ *
+ * @return true if all ranks have refreshed, with no commands enqueued
+ *
+ */
+ bool allRanksDrained() const override;
+
+ /**
+ * Iterate through DRAM ranks and suspend them
+ */
+ void suspend();
+
+ /*
+ * @return time to offset next command
+ */
+ Tick commandOffset() const override { return (tRP + tRCD); }
+
+ /*
+ * Function to calulate unloaded, closed bank access latency
+ */
+ Tick accessLatency() const override { return (tRP + tRCD + tCL); }
+
+ /**
+ * For FR-FCFS policy, find first DRAM command that can issue
+ *
+ * @param queue Queued requests to consider
+ * @param min_col_at Minimum tick for 'seamless' issue
+ * @return an iterator to the selected packet, else queue.end()
+ * @return the tick when the packet selected will issue
+ */
+ std::pair<MemPacketQueue::iterator, Tick>
+ chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const override;
+
+ /**
+ * Actually do the burst - figure out the latency it
+ * will take to service the req based on bank state, channel state etc
+ * and then update those states to account for this request. Based
+ * on this, update the packet's "readyTime" and move it to the
+ * response q from where it will eventually go back to the outside
+ * world.
+ *
+ * @param mem_pkt The packet created from the outside world pkt
+ * @param next_burst_at Minimum bus timing requirement from controller
+ * @param queue Reference to the read or write queue with the packet
+ * @return pair, tick when current burst is issued and
+ * tick when next burst can issue
+ */
+ std::pair<Tick, Tick>
+ doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at,
+ const std::vector<MemPacketQueue>& queue);
+
+ /**
+ * Check if a burst operation can be issued to the DRAM
+ *
+ * @param Return true if RD/WR can issue
+ * This requires the DRAM to be in the
+ * REF IDLE state
+ */
+ bool
+ burstReady(MemPacket* pkt) const override
+ {
+ return ranks[pkt->rank]->inRefIdleState();
+ }
+
+ /**
+ * This function checks if ranks are actively refreshing and
+ * therefore busy. The function also checks if ranks are in
+ * the self-refresh state, in which case, a self-refresh exit
+ * is initiated.
+ *
+ * return boolean if all ranks are in refresh and therefore busy
+ */
+ bool isBusy();
+
+ /**
+ * Add rank to rank delay to bus timing to all DRAM banks in alli ranks
+ * when access to an alternate interface is issued
+ *
+ * param cmd_at Time of current command used as starting point for
+ * addition of rank-to-rank delay
+ */
+ void addRankToRankDelay(Tick cmd_at) override;
+
+ /**
+ * Complete response process for DRAM when read burst is complete
+ * This will update the counters and check if a power down state
+ * can be entered.
+ *
+ * @param rank Specifies rank associated with read burst
+ */
+ void respondEvent(uint8_t rank);
+
+ /**
+ * Check the refresh state to determine if refresh needs
+ * to be kicked back into action after a read response
+ *
+ * @param rank Specifies rank associated with read burst
+ */
+ void checkRefreshState(uint8_t rank);
+
+ DRAMInterface(const DRAMInterfaceParams* _p);
+};
+
+/**
+ * Interface to NVM devices with media specific parameters,
+ * statistics, and functions.
+ * The NVMInterface includes a class for individual ranks
+ * and per rank functions.
+ */
+class NVMInterface : public MemInterface
+{
+ private:
+ /**
+ * NVM rank class simply includes a vector of banks.
+ */
+ class Rank : public EventManager
+ {
+ private:
+
+ /**
+ * A reference to the parent NVMInterface instance
+ */
+ NVMInterface& nvm;
+
+ public:
+
+ /**
+ * Current Rank index
+ */
+ uint8_t rank;
+
+ /**
+ * Vector of NVM banks. Each rank is made of several banks
+ * that can be accessed in parallel.
+ */
+ std::vector<Bank> banks;
+
+ Rank(const NVMInterfaceParams* _p, int _rank,
+ NVMInterface& _nvm);
+ };
+
+ /**
+ * NVM specific device and channel characteristics
+ */
+ const uint32_t maxPendingWrites;
+ const uint32_t maxPendingReads;
+ const bool twoCycleRdWr;
+
+ /**
+ * NVM specific timing requirements
+ */
+ const Tick tREAD;
+ const Tick tWRITE;
+ const Tick tSEND;
+
+ struct NVMStats : public Stats::Group
+ {
+ NVMStats(NVMInterface &nvm);
+
+ void regStats() override;
+
+ NVMInterface &nvm;
+
+ /** NVM stats */
+ Stats::Scalar readBursts;
+ Stats::Scalar writeBursts;
+
+ Stats::Vector perBankRdBursts;
+ Stats::Vector perBankWrBursts;
+
+ // Latencies summed over all requests
+ Stats::Scalar totQLat;
+ Stats::Scalar totBusLat;
+ Stats::Scalar totMemAccLat;
+
+ // Average latencies per request
+ Stats::Formula avgQLat;
+ Stats::Formula avgBusLat;
+ Stats::Formula avgMemAccLat;
+
+ Stats::Scalar bytesRead;
+ Stats::Scalar bytesWritten;
+
+ // Average bandwidth
+ Stats::Formula avgRdBW;
+ Stats::Formula avgWrBW;
+ Stats::Formula peakBW;
+ Stats::Formula busUtil;
+ Stats::Formula busUtilRead;
+ Stats::Formula busUtilWrite;
+
+ /** NVM stats */
+ Stats::Histogram pendingReads;
+ Stats::Histogram pendingWrites;
+ Stats::Histogram bytesPerBank;
+ };
+
+ NVMStats stats;
+
+ void processWriteRespondEvent();
+ EventFunctionWrapper writeRespondEvent;
+
+ void processReadReadyEvent();
+ EventFunctionWrapper readReadyEvent;
+
+ /**
+ * Vector of nvm ranks
+ */
+ std::vector<Rank*> ranks;
+
+ /**
+ * Holding queue for non-deterministic write commands, which
+ * maintains writes that have been issued but have not completed
+ * Stored seperately mostly to keep the code clean and help with
+ * events scheduling.
+ * This mimics a buffer on the media controller and therefore is
+ * not added to the main write queue for sizing
+ */
+ std::list<Tick> writeRespQueue;
+
+ std::deque<Tick> readReadyQueue;
+
+ /**
+ * Check if the write response queue is empty
+ *
+ * @param Return true if empty
+ */
+ bool writeRespQueueEmpty() const { return writeRespQueue.empty(); }
+
+ /**
+ * Till when must we wait before issuing next read command?
+ */
+ Tick nextReadAt;
+
+ // keep track of reads that have issued for which data is either
+ // not yet ready or has not yet been transferred to the ctrl
+ uint16_t numPendingReads;
+ uint16_t numReadDataReady;
+
+ public:
+ // keep track of the number of reads that have yet to be issued
+ uint16_t numReadsToIssue;
+
+ // number of writes in the writeQueue for the NVM interface
+ uint32_t numWritesQueued;
+
+ /**
+ * Initialize the NVM interface and verify parameters
+ */
+ void init() override;
+
+ /**
+ * Setup the rank based on packet received
+ *
+ * @param integer value of rank to be setup. used to index ranks vector
+ * @param are we setting up rank for read or write packet?
+ */
+ void setupRank(const uint8_t rank, const bool is_read) override;
+
+ /**
+ * Check drain state of NVM interface
+ *
+ * @return true if write response queue is empty
+ *
+ */
+ bool allRanksDrained() const override { return writeRespQueueEmpty(); }
+
+ /*
+ * @return time to offset next command
+ */
+ Tick commandOffset() const override { return tBURST; }
+
+ /**
+ * Check if a burst operation can be issued to the NVM
+ *
+ * @param Return true if RD/WR can issue
+ * for reads, also verfy that ready count
+ * has been updated to a non-zero value to
+ * account for race conditions between events
+ */
+ bool burstReady(MemPacket* pkt) const override;
+
+ /**
+ * This function checks if ranks are busy.
+ * This state is true when either:
+ * 1) There is no command with read data ready to transmit or
+ * 2) The NVM inteface has reached the maximum number of outstanding
+ * writes commands.
+ * @param read_queue_empty There are no read queued
+ * @param all_writes_nvm All writes in queue are for NVM interface
+ * @return true of NVM is busy
+ *
+ */
+ bool isBusy(bool read_queue_empty, bool all_writes_nvm);
+ /**
+ * For FR-FCFS policy, find first NVM command that can issue
+ * default to first command to prepped region
+ *
+ * @param queue Queued requests to consider
+ * @param min_col_at Minimum tick for 'seamless' issue
+ * @return an iterator to the selected packet, else queue.end()
+ * @return the tick when the packet selected will issue
+ */
+ std::pair<MemPacketQueue::iterator, Tick>
+ chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const override;
+
+ /**
+ * Add rank to rank delay to bus timing to all NVM banks in alli ranks
+ * when access to an alternate interface is issued
+ *
+ * param cmd_at Time of current command used as starting point for
+ * addition of rank-to-rank delay
+ */
+ void addRankToRankDelay(Tick cmd_at) override;
+
+
+ /**
+ * Select read command to issue asynchronously
+ */
+ void chooseRead(MemPacketQueue& queue);
+
+ /*
+ * Function to calulate unloaded access latency
+ */
+ Tick accessLatency() const override { return (tREAD + tSEND); }
+
+ /**
+ * Check if the write response queue has reached defined threshold
+ *
+ * @param Return true if full
+ */
+ bool
+ writeRespQueueFull() const
+ {
+ return writeRespQueue.size() == maxPendingWrites;
+ }
+
+ bool
+ readsWaitingToIssue() const
+ {
+ return ((numReadsToIssue != 0) &&
+ (numPendingReads < maxPendingReads));
+ }
+
+ /**
+ * Actually do the burst and update stats.
+ *
+ * @param pkt The packet created from the outside world pkt
+ * @param next_burst_at Minimum bus timing requirement from controller
+ * @return pair, tick when current burst is issued and
+ * tick when next burst can issue
+ */
+ std::pair<Tick, Tick>
+ doBurstAccess(MemPacket* pkt, Tick next_burst_at);
+
+ NVMInterface(const NVMInterfaceParams* _p);
+};
+
+#endif //__MEM_INTERFACE_HH__
def create_system(self):
if issubclass(self.mem_class, m5.objects.DRAMInterface):
- mem_ctrl = DRAMCtrl()
+ mem_ctrl = MemCtrl()
mem_ctrl.dram = self.mem_class()
else:
mem_ctrl = self.mem_class()
if issubclass(self.mem_class, m5.objects.DRAMInterface):
mem_ctrls = []
for r in system.mem_ranges:
- mem_ctrl = DRAMCtrl()
+ mem_ctrl = MemCtrl()
mem_ctrl.dram = self.mem_class(range = r)
mem_ctrls.append(mem_ctrl)
system.physmem = mem_ctrls