From: Wendy Elsasser Date: Tue, 21 Jul 2020 04:09:21 +0000 (-0500) Subject: mem: Clean up Memory Controller X-Git-Tag: v20.1.0.0~71 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=7a28c82c6e62ba7e454cdd6e903d2abbea8d0bc2;p=gem5.git mem: Clean up Memory Controller Make the actual controller more generic - Rename DRAMCtrl to MemCtrl - Rename DRAMacket to MemPacket - Rename dram_ctrl.cc to mem_ctrl.cc - Rename dram_ctrl.hh to mem_ctrl.hh - Create MemCtrl debug flag Move the memory interface classes/functions to separate files - mem_interface.cc - mem_interface.hh Change-Id: I1acba44c855776343e205e7733a7d8bbba92a82c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/31654 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py index 7aa6761b0..941b381d4 100644 --- a/configs/common/MemConfig.py +++ b/configs/common/MemConfig.py @@ -224,11 +224,11 @@ def config_mem(options, system): if opt_mem_type == "HMC_2500_1x32": # The static latency of the vault controllers is estimated # to be smaller than a full DRAM channel controller - mem_ctrl = m5.objects.DRAMCtrl(min_writes_per_switch = 8, + mem_ctrl = m5.objects.MemCtrl(min_writes_per_switch = 8, static_backend_latency = '4ns', static_frontend_latency = '4ns') else: - mem_ctrl = m5.objects.DRAMCtrl() + mem_ctrl = m5.objects.MemCtrl() # Hookup the controller to the interface and add to the list mem_ctrl.dram = dram_intf @@ -246,7 +246,7 @@ def config_mem(options, system): # Create a controller if not sharing a channel with DRAM # in which case the controller has already been created if not opt_hybrid_channel: - mem_ctrl = m5.objects.DRAMCtrl() + mem_ctrl = m5.objects.MemCtrl() mem_ctrl.nvm = nvm_intf mem_ctrls.append(mem_ctrl) diff --git a/configs/common/Options.py b/configs/common/Options.py index 0409fb809..32f8dd94b 100644 --- a/configs/common/Options.py +++ b/configs/common/Options.py @@ -109,7 +109,7 @@ def addNoISAOptions(parser): default="512MB", help="Specify the physical memory size (single memory)") parser.add_option("--enable-dram-powerdown", action="store_true", - help="Enable low-power states in DRAMCtrl") + help="Enable low-power states in DRAMInterface") parser.add_option("--mem-channels-intlv", type="int", default=0, help="Memory channels interleave") diff --git a/configs/dram/lat_mem_rd.py b/configs/dram/lat_mem_rd.py index 9b04e4bd2..4183d4aec 100644 --- a/configs/dram/lat_mem_rd.py +++ b/configs/dram/lat_mem_rd.py @@ -130,7 +130,7 @@ for ctrl in system.mem_ctrls: # the following assumes that we are using the native DRAM # controller, check to be sure - if isinstance(ctrl, m5.objects.DRAMCtrl): + if isinstance(ctrl, m5.objects.MemCtrl): # make the DRAM refresh interval sufficiently infinite to avoid # latency spikes ctrl.tREFI = '100s' diff --git a/configs/dram/low_power_sweep.py b/configs/dram/low_power_sweep.py index 0da2b935b..292b0fa73 100644 --- a/configs/dram/low_power_sweep.py +++ b/configs/dram/low_power_sweep.py @@ -110,8 +110,8 @@ args.elastic_trace_en = 0 MemConfig.config_mem(args, system) # Sanity check for memory controller class. -if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl): - fatal("This script assumes the controller is a DRAMCtrl subclass") +if not isinstance(system.mem_ctrls[0], m5.objects.MemCtrl): + fatal("This script assumes the controller is a MemCtrl subclass") if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface): fatal("This script assumes the memory is a DRAMInterface subclass") diff --git a/configs/dram/sweep.py b/configs/dram/sweep.py index a771c5c9f..2f3837339 100644 --- a/configs/dram/sweep.py +++ b/configs/dram/sweep.py @@ -115,8 +115,8 @@ MemConfig.config_mem(options, system) # the following assumes that we are using the native DRAM # controller, check to be sure -if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl): - fatal("This script assumes the controller is a DRAMCtrl subclass") +if not isinstance(system.mem_ctrls[0], m5.objects.MemCtrl): + fatal("This script assumes the controller is a MemCtrl subclass") if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface): fatal("This script assumes the memory is a DRAMInterface subclass") diff --git a/configs/example/memcheck.py b/configs/example/memcheck.py index 6bccd54db..bffd5a050 100644 --- a/configs/example/memcheck.py +++ b/configs/example/memcheck.py @@ -217,7 +217,7 @@ cfg_file.close() proto_tester = TrafficGen(config_file = cfg_file_path) # Set up the system along with a DRAM controller -system = System(physmem = DRAMCtrl(dram = DDR3_1600_8x8())) +system = System(physmem = MemCtrl(dram = DDR3_1600_8x8())) system.voltage_domain = VoltageDomain(voltage = '1V') diff --git a/configs/learning_gem5/part1/simple.py b/configs/learning_gem5/part1/simple.py index cfd15bebb..22b2cf7b4 100644 --- a/configs/learning_gem5/part1/simple.py +++ b/configs/learning_gem5/part1/simple.py @@ -77,7 +77,7 @@ if m5.defines.buildEnv['TARGET_ISA'] == "x86": system.cpu.interrupts[0].int_slave = system.membus.master # Create a DDR3 memory controller and connect it to the membus -system.mem_ctrl = DRAMCtrl() +system.mem_ctrl = MemCtrl() system.mem_ctrl.dram = DDR3_1600_8x8() system.mem_ctrl.dram.range = system.mem_ranges[0] system.mem_ctrl.port = system.membus.master diff --git a/configs/learning_gem5/part1/two_level.py b/configs/learning_gem5/part1/two_level.py index 0dbcfc7ff..53e11378b 100644 --- a/configs/learning_gem5/part1/two_level.py +++ b/configs/learning_gem5/part1/two_level.py @@ -132,7 +132,7 @@ if m5.defines.buildEnv['TARGET_ISA'] == "x86": system.system_port = system.membus.slave # Create a DDR3 memory controller -system.mem_ctrl = DRAMCtrl() +system.mem_ctrl = MemCtrl() system.mem_ctrl.dram = DDR3_1600_8x8() system.mem_ctrl.dram.range = system.mem_ranges[0] system.mem_ctrl.port = system.membus.master diff --git a/configs/learning_gem5/part2/simple_cache.py b/configs/learning_gem5/part2/simple_cache.py index fbea73d97..533aa23ad 100644 --- a/configs/learning_gem5/part2/simple_cache.py +++ b/configs/learning_gem5/part2/simple_cache.py @@ -76,7 +76,7 @@ system.cpu.interrupts[0].int_master = system.membus.slave system.cpu.interrupts[0].int_slave = system.membus.master # Create a DDR3 memory controller and connect it to the membus -system.mem_ctrl = DRAMCtrl() +system.mem_ctrl = MemCtrl() system.mem_ctrl.dram = DDR3_1600_8x8() system.mem_ctrl.dram.range = system.mem_ranges[0] system.mem_ctrl.port = system.membus.master diff --git a/configs/learning_gem5/part2/simple_memobj.py b/configs/learning_gem5/part2/simple_memobj.py index e792eb9bb..b7d256189 100644 --- a/configs/learning_gem5/part2/simple_memobj.py +++ b/configs/learning_gem5/part2/simple_memobj.py @@ -74,7 +74,7 @@ system.cpu.interrupts[0].int_master = system.membus.slave system.cpu.interrupts[0].int_slave = system.membus.master # Create a DDR3 memory controller and connect it to the membus -system.mem_ctrl = DRAMCtrl() +system.mem_ctrl = MemCtrl() system.mem_ctrl.dram = DDR3_1600_8x8() system.mem_ctrl.dram.range = system.mem_ranges[0] system.mem_ctrl.port = system.membus.master diff --git a/configs/learning_gem5/part3/simple_ruby.py b/configs/learning_gem5/part3/simple_ruby.py index 7f70a8c7d..760a16892 100644 --- a/configs/learning_gem5/part3/simple_ruby.py +++ b/configs/learning_gem5/part3/simple_ruby.py @@ -68,7 +68,7 @@ system.mem_ranges = [AddrRange('512MB')] # Create an address range system.cpu = [TimingSimpleCPU() for i in range(2)] # Create a DDR3 memory controller and connect it to the membus -system.mem_ctrl = DRAMCtrl() +system.mem_ctrl = MemCtrl() system.mem_ctrl.dram = DDR3_1600_8x8() system.mem_ctrl.dram.range = system.mem_ranges[0] diff --git a/configs/nvm/sweep.py b/configs/nvm/sweep.py index 5bc5819cf..7e0bd9e7f 100644 --- a/configs/nvm/sweep.py +++ b/configs/nvm/sweep.py @@ -113,8 +113,8 @@ MemConfig.config_mem(options, system) # the following assumes that we are using the native memory # controller with an NVM interface, check to be sure -if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl): - fatal("This script assumes the controller is a DRAMCtrl subclass") +if not isinstance(system.mem_ctrls[0], m5.objects.MemCtrl): + fatal("This script assumes the controller is a MemCtrl subclass") if not isinstance(system.mem_ctrls[0].nvm, m5.objects.NVMInterface): fatal("This script assumes the memory is a NVMInterface class") diff --git a/configs/nvm/sweep_hybrid.py b/configs/nvm/sweep_hybrid.py index a2513df91..94edfd452 100644 --- a/configs/nvm/sweep_hybrid.py +++ b/configs/nvm/sweep_hybrid.py @@ -126,8 +126,8 @@ MemConfig.config_mem(options, system) # the following assumes that we are using the native controller # with NVM and DRAM interfaces, check to be sure -if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl): - fatal("This script assumes the controller is a DRAMCtrl subclass") +if not isinstance(system.mem_ctrls[0], m5.objects.MemCtrl): + fatal("This script assumes the controller is a MemCtrl subclass") if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface): fatal("This script assumes the first memory is a DRAMInterface subclass") if not isinstance(system.mem_ctrls[0].nvm, m5.objects.NVMInterface): diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py index 4e382af05..622771aa1 100644 --- a/configs/ruby/Ruby.py +++ b/configs/ruby/Ruby.py @@ -133,7 +133,7 @@ def setup_memory_controllers(system, ruby, dir_cntrls, options): dram_intf = MemConfig.create_mem_intf(mem_type, r, index, options.num_dirs, int(math.log(options.num_dirs, 2)), intlv_size, options.xor_low_bit) - mem_ctrl = m5.objects.DRAMCtrl(dram = dram_intf) + mem_ctrl = m5.objects.MemCtrl(dram = dram_intf) if options.access_backing_store: mem_ctrl.kvm_map=False diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py deleted file mode 100644 index 4ef421bde..000000000 --- a/src/mem/DRAMCtrl.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) 2012-2020 ARM Limited -# All rights reserved. -# -# The license below extends only to copyright in the software and shall -# not be construed as granting a license to any other intellectual -# property including but not limited to intellectual property relating -# to a hardware implementation of the functionality of the software -# licensed hereunder. You may use the software subject to the license -# terms below provided that you ensure that this notice is replicated -# unmodified and in its entirety in all distributions of the software, -# modified or unmodified, in source code or in binary form. -# -# Copyright (c) 2013 Amin Farmahini-Farahani -# Copyright (c) 2015 University of Kaiserslautern -# Copyright (c) 2015 The University of Bologna -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from m5.params import * -from m5.proxy import * -from m5.objects.QoSMemCtrl import * - -# Enum for memory scheduling algorithms, currently First-Come -# First-Served and a First-Row Hit then First-Come First-Served -class MemSched(Enum): vals = ['fcfs', 'frfcfs'] - -# DRAMCtrl is a single-channel single-ported DRAM controller model -# that aims to model the most important system-level performance -# effects of a DRAM without getting into too much detail of the DRAM -# itself. -class DRAMCtrl(QoSMemCtrl): - type = 'DRAMCtrl' - cxx_header = "mem/dram_ctrl.hh" - - # single-ported on the system interface side, instantiate with a - # bus in front of the controller for multiple ports - port = SlavePort("Slave port") - - # Interface to volatile, DRAM media - dram = Param.DRAMInterface(NULL, "DRAM interface") - - # Interface to non-volatile media - nvm = Param.NVMInterface(NULL, "NVM interface") - - # read and write buffer depths are set in the interface - # the controller will read these values when instantiated - - # threshold in percent for when to forcefully trigger writes and - # start emptying the write buffer - write_high_thresh_perc = Param.Percent(85, "Threshold to force writes") - - # threshold in percentage for when to start writes if the read - # queue is empty - write_low_thresh_perc = Param.Percent(50, "Threshold to start writes") - - # minimum write bursts to schedule before switching back to reads - min_writes_per_switch = Param.Unsigned(16, "Minimum write bursts before " - "switching to reads") - - # scheduler, address map and page policy - mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy") - - # pipeline latency of the controller and PHY, split into a - # frontend part and a backend part, with reads and writes serviced - # by the queues only seeing the frontend contribution, and reads - # serviced by the memory seeing the sum of the two - static_frontend_latency = Param.Latency("10ns", "Static frontend latency") - static_backend_latency = Param.Latency("10ns", "Static backend latency") - - command_window = Param.Latency("10ns", "Static backend latency") diff --git a/src/mem/DRAMInterface.py b/src/mem/DRAMInterface.py index aa415feb4..85a609223 100644 --- a/src/mem/DRAMInterface.py +++ b/src/mem/DRAMInterface.py @@ -47,7 +47,7 @@ class PageManage(Enum): vals = ['open', 'open_adaptive', 'close', class DRAMInterface(MemInterface): type = 'DRAMInterface' - cxx_header = "mem/dram_ctrl.hh" + cxx_header = "mem/mem_interface.hh" # scheduler page policy page_policy = Param.PageManage('open_adaptive', "Page management policy") diff --git a/src/mem/MemCtrl.py b/src/mem/MemCtrl.py new file mode 100644 index 000000000..e0f34241e --- /dev/null +++ b/src/mem/MemCtrl.py @@ -0,0 +1,92 @@ +# Copyright (c) 2012-2020 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2013 Amin Farmahini-Farahani +# Copyright (c) 2015 University of Kaiserslautern +# Copyright (c) 2015 The University of Bologna +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.params import * +from m5.proxy import * +from m5.objects.QoSMemCtrl import * + +# Enum for memory scheduling algorithms, currently First-Come +# First-Served and a First-Row Hit then First-Come First-Served +class MemSched(Enum): vals = ['fcfs', 'frfcfs'] + +# MemCtrl is a single-channel single-ported Memory controller model +# that aims to model the most important system-level performance +# effects of a memory controller, interfacing with media specific +# interfaces +class MemCtrl(QoSMemCtrl): + type = 'MemCtrl' + cxx_header = "mem/mem_ctrl.hh" + + # single-ported on the system interface side, instantiate with a + # bus in front of the controller for multiple ports + port = SlavePort("Slave port") + + # Interface to volatile, DRAM media + dram = Param.DRAMInterface(NULL, "DRAM interface") + + # Interface to non-volatile media + nvm = Param.NVMInterface(NULL, "NVM interface") + + # read and write buffer depths are set in the interface + # the controller will read these values when instantiated + + # threshold in percent for when to forcefully trigger writes and + # start emptying the write buffer + write_high_thresh_perc = Param.Percent(85, "Threshold to force writes") + + # threshold in percentage for when to start writes if the read + # queue is empty + write_low_thresh_perc = Param.Percent(50, "Threshold to start writes") + + # minimum write bursts to schedule before switching back to reads + min_writes_per_switch = Param.Unsigned(16, "Minimum write bursts before " + "switching to reads") + + # scheduler, address map and page policy + mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy") + + # pipeline latency of the controller and PHY, split into a + # frontend part and a backend part, with reads and writes serviced + # by the queues only seeing the frontend contribution, and reads + # serviced by the memory seeing the sum of the two + static_frontend_latency = Param.Latency("10ns", "Static frontend latency") + static_backend_latency = Param.Latency("10ns", "Static backend latency") + + command_window = Param.Latency("10ns", "Static backend latency") diff --git a/src/mem/MemInterface.py b/src/mem/MemInterface.py index 3a8b917c3..85fe0a058 100644 --- a/src/mem/MemInterface.py +++ b/src/mem/MemInterface.py @@ -54,7 +54,7 @@ class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh'] class MemInterface(AbstractMemory): type = 'MemInterface' abstract = True - cxx_header = "mem/dram_ctrl.hh" + cxx_header = "mem/mem_interface.hh" # Allow the interface to set required controller buffer sizes # each entry corresponds to a burst for the specific memory channel diff --git a/src/mem/NVMInterface.py b/src/mem/NVMInterface.py index f28dd81a3..3f6fbc43f 100644 --- a/src/mem/NVMInterface.py +++ b/src/mem/NVMInterface.py @@ -43,7 +43,7 @@ from m5.objects.DRAMInterface import AddrMap # are modeled without getting into too much detail of the media itself. class NVMInterface(MemInterface): type = 'NVMInterface' - cxx_header = "mem/dram_ctrl.hh" + cxx_header = "mem/mem_interface.hh" # NVM DIMM could have write buffer to offload writes # define buffer depth, which will limit the number of pending writes diff --git a/src/mem/SConscript b/src/mem/SConscript index 409116c40..cf7adc866 100644 --- a/src/mem/SConscript +++ b/src/mem/SConscript @@ -46,7 +46,7 @@ Source('comm_monitor.cc') SimObject('AbstractMemory.py') SimObject('AddrMapper.py') SimObject('Bridge.py') -SimObject('DRAMCtrl.py') +SimObject('MemCtrl.py') SimObject('MemInterface.py') SimObject('DRAMInterface.py') SimObject('NVMInterface.py') @@ -64,9 +64,10 @@ Source('addr_mapper.cc') Source('bridge.cc') Source('coherent_xbar.cc') Source('drampower.cc') -Source('dram_ctrl.cc') Source('external_master.cc') Source('external_slave.cc') +Source('mem_ctrl.cc') +Source('mem_interface.cc') Source('noncoherent_xbar.cc') Source('packet.cc') Source('port.cc') @@ -120,6 +121,7 @@ DebugFlag('NVM') DebugFlag('ExternalPort') DebugFlag('HtmMem', 'Hardware Transactional Memory (Mem side)') DebugFlag('LLSC') +DebugFlag('MemCtrl') DebugFlag('MMU') DebugFlag('MemoryAccess') DebugFlag('PacketQueue') diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc deleted file mode 100644 index 55451a2e1..000000000 --- a/src/mem/dram_ctrl.cc +++ /dev/null @@ -1,3975 +0,0 @@ -/* - * Copyright (c) 2010-2020 ARM Limited - * All rights reserved - * - * The license below extends only to copyright in the software and shall - * not be construed as granting a license to any other intellectual - * property including but not limited to intellectual property relating - * to a hardware implementation of the functionality of the software - * licensed hereunder. You may use the software subject to the license - * terms below provided that you ensure that this notice is replicated - * unmodified and in its entirety in all distributions of the software, - * modified or unmodified, in source code or in binary form. - * - * Copyright (c) 2013 Amin Farmahini-Farahani - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "mem/dram_ctrl.hh" - -#include "base/bitfield.hh" -#include "base/trace.hh" -#include "debug/DRAM.hh" -#include "debug/DRAMPower.hh" -#include "debug/DRAMState.hh" -#include "debug/Drain.hh" -#include "debug/NVM.hh" -#include "debug/QOS.hh" -#include "params/DRAMInterface.hh" -#include "params/NVMInterface.hh" -#include "sim/system.hh" - -using namespace std; -using namespace Data; - -DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : - QoS::MemCtrl(p), - port(name() + ".port", *this), isTimingMode(false), - retryRdReq(false), retryWrReq(false), - nextReqEvent([this]{ processNextReqEvent(); }, name()), - respondEvent([this]{ processRespondEvent(); }, name()), - dram(p->dram), nvm(p->nvm), - readBufferSize((dram ? dram->readBufferSize : 0) + - (nvm ? nvm->readBufferSize : 0)), - writeBufferSize((dram ? dram->writeBufferSize : 0) + - (nvm ? nvm->writeBufferSize : 0)), - writeHighThreshold(writeBufferSize * p->write_high_thresh_perc / 100.0), - writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0), - minWritesPerSwitch(p->min_writes_per_switch), - writesThisTime(0), readsThisTime(0), - memSchedPolicy(p->mem_sched_policy), - frontendLatency(p->static_frontend_latency), - backendLatency(p->static_backend_latency), - commandWindow(p->command_window), - nextBurstAt(0), prevArrival(0), - nextReqTime(0), - stats(*this) -{ - DPRINTF(DRAM, "Setting up controller\n"); - readQueue.resize(p->qos_priorities); - writeQueue.resize(p->qos_priorities); - - // Hook up interfaces to the controller - if (dram) - dram->setCtrl(this, commandWindow); - if (nvm) - nvm->setCtrl(this, commandWindow); - - fatal_if(!dram && !nvm, "Memory controller must have an interface"); - - // perform a basic check of the write thresholds - if (p->write_low_thresh_perc >= p->write_high_thresh_perc) - fatal("Write buffer low threshold %d must be smaller than the " - "high threshold %d\n", p->write_low_thresh_perc, - p->write_high_thresh_perc); -} - -void -DRAMCtrl::init() -{ - if (!port.isConnected()) { - fatal("DRAMCtrl %s is unconnected!\n", name()); - } else { - port.sendRangeChange(); - } -} - -void -DRAMCtrl::startup() -{ - // remember the memory system mode of operation - isTimingMode = system()->isTimingMode(); - - if (isTimingMode) { - // shift the bus busy time sufficiently far ahead that we never - // have to worry about negative values when computing the time for - // the next request, this will add an insignificant bubble at the - // start of simulation - nextBurstAt = curTick() + (dram ? dram->commandOffset() : - nvm->commandOffset()); - } -} - -Tick -DRAMCtrl::recvAtomic(PacketPtr pkt) -{ - DPRINTF(DRAM, "recvAtomic: %s 0x%x\n", pkt->cmdString(), pkt->getAddr()); - - panic_if(pkt->cacheResponding(), "Should not see packets where cache " - "is responding"); - - Tick latency = 0; - // do the actual memory access and turn the packet into a response - if (dram && dram->getAddrRange().contains(pkt->getAddr())) { - dram->access(pkt); - - if (pkt->hasData()) { - // this value is not supposed to be accurate, just enough to - // keep things going, mimic a closed page - latency = dram->accessLatency(); - } - } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) { - nvm->access(pkt); - - if (pkt->hasData()) { - // this value is not supposed to be accurate, just enough to - // keep things going, mimic a closed page - latency = nvm->accessLatency(); - } - } else { - panic("Can't handle address range for packet %s\n", - pkt->print()); - } - - return latency; -} - -bool -DRAMCtrl::readQueueFull(unsigned int neededEntries) const -{ - DPRINTF(DRAM, "Read queue limit %d, current size %d, entries needed %d\n", - readBufferSize, totalReadQueueSize + respQueue.size(), - neededEntries); - - auto rdsize_new = totalReadQueueSize + respQueue.size() + neededEntries; - return rdsize_new > readBufferSize; -} - -bool -DRAMCtrl::writeQueueFull(unsigned int neededEntries) const -{ - DPRINTF(DRAM, "Write queue limit %d, current size %d, entries needed %d\n", - writeBufferSize, totalWriteQueueSize, neededEntries); - - auto wrsize_new = (totalWriteQueueSize + neededEntries); - return wrsize_new > writeBufferSize; -} - -DRAMPacket* -MemInterface::decodePacket(const PacketPtr pkt, Addr pkt_addr, - unsigned size, bool is_read, bool is_dram) -{ - // decode the address based on the address mapping scheme, with - // Ro, Ra, Co, Ba and Ch denoting row, rank, column, bank and - // channel, respectively - uint8_t rank; - uint8_t bank; - // use a 64-bit unsigned during the computations as the row is - // always the top bits, and check before creating the packet - uint64_t row; - - // Get packed address, starting at 0 - Addr addr = getCtrlAddr(pkt_addr); - - // truncate the address to a memory burst, which makes it unique to - // a specific buffer, row, bank, rank and channel - addr = addr / burstSize; - - // we have removed the lowest order address bits that denote the - // position within the column - if (addrMapping == Enums::RoRaBaChCo || addrMapping == Enums::RoRaBaCoCh) { - // the lowest order bits denote the column to ensure that - // sequential cache lines occupy the same row - addr = addr / burstsPerRowBuffer; - - // after the channel bits, get the bank bits to interleave - // over the banks - bank = addr % banksPerRank; - addr = addr / banksPerRank; - - // after the bank, we get the rank bits which thus interleaves - // over the ranks - rank = addr % ranksPerChannel; - addr = addr / ranksPerChannel; - - // lastly, get the row bits, no need to remove them from addr - row = addr % rowsPerBank; - } else if (addrMapping == Enums::RoCoRaBaCh) { - // with emerging technologies, could have small page size with - // interleaving granularity greater than row buffer - if (burstsPerStripe > burstsPerRowBuffer) { - // remove column bits which are a subset of burstsPerStripe - addr = addr / burstsPerRowBuffer; - } else { - // remove lower column bits below channel bits - addr = addr / burstsPerStripe; - } - - // start with the bank bits, as this provides the maximum - // opportunity for parallelism between requests - bank = addr % banksPerRank; - addr = addr / banksPerRank; - - // next get the rank bits - rank = addr % ranksPerChannel; - addr = addr / ranksPerChannel; - - // next, the higher-order column bites - if (burstsPerStripe < burstsPerRowBuffer) { - addr = addr / (burstsPerRowBuffer / burstsPerStripe); - } - - // lastly, get the row bits, no need to remove them from addr - row = addr % rowsPerBank; - } else - panic("Unknown address mapping policy chosen!"); - - assert(rank < ranksPerChannel); - assert(bank < banksPerRank); - assert(row < rowsPerBank); - assert(row < Bank::NO_ROW); - - DPRINTF(DRAM, "Address: %lld Rank %d Bank %d Row %d\n", - pkt_addr, rank, bank, row); - - // create the corresponding memory packet with the entry time and - // ready time set to the current tick, the latter will be updated - // later - uint16_t bank_id = banksPerRank * rank + bank; - - return new DRAMPacket(pkt, is_read, is_dram, rank, bank, row, bank_id, - pkt_addr, size); -} - -void -DRAMCtrl::addToReadQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram) -{ - // only add to the read queue here. whenever the request is - // eventually done, set the readyTime, and call schedule() - assert(!pkt->isWrite()); - - assert(pkt_count != 0); - - // if the request size is larger than burst size, the pkt is split into - // multiple packets - // Note if the pkt starting address is not aligened to burst size, the - // address of first packet is kept unaliged. Subsequent packets - // are aligned to burst size boundaries. This is to ensure we accurately - // check read packets against packets in write queue. - const Addr base_addr = pkt->getAddr(); - Addr addr = base_addr; - unsigned pktsServicedByWrQ = 0; - BurstHelper* burst_helper = NULL; - - uint32_t burst_size = is_dram ? dram->bytesPerBurst() : - nvm->bytesPerBurst(); - for (int cnt = 0; cnt < pkt_count; ++cnt) { - unsigned size = std::min((addr | (burst_size - 1)) + 1, - base_addr + pkt->getSize()) - addr; - stats.readPktSize[ceilLog2(size)]++; - stats.readBursts++; - stats.masterReadAccesses[pkt->masterId()]++; - - // First check write buffer to see if the data is already at - // the controller - bool foundInWrQ = false; - Addr burst_addr = burstAlign(addr, is_dram); - // if the burst address is not present then there is no need - // looking any further - if (isInWriteQueue.find(burst_addr) != isInWriteQueue.end()) { - for (const auto& vec : writeQueue) { - for (const auto& p : vec) { - // check if the read is subsumed in the write queue - // packet we are looking at - if (p->addr <= addr && - ((addr + size) <= (p->addr + p->size))) { - - foundInWrQ = true; - stats.servicedByWrQ++; - pktsServicedByWrQ++; - DPRINTF(DRAM, - "Read to addr %lld with size %d serviced by " - "write queue\n", - addr, size); - stats.bytesReadWrQ += burst_size; - break; - } - } - } - } - - // If not found in the write q, make a DRAM packet and - // push it onto the read queue - if (!foundInWrQ) { - - // Make the burst helper for split packets - if (pkt_count > 1 && burst_helper == NULL) { - DPRINTF(DRAM, "Read to addr %lld translates to %d " - "memory requests\n", pkt->getAddr(), pkt_count); - burst_helper = new BurstHelper(pkt_count); - } - - DRAMPacket* dram_pkt; - if (is_dram) { - dram_pkt = dram->decodePacket(pkt, addr, size, true, true); - // increment read entries of the rank - dram->setupRank(dram_pkt->rank, true); - } else { - dram_pkt = nvm->decodePacket(pkt, addr, size, true, false); - // Increment count to trigger issue of non-deterministic read - nvm->setupRank(dram_pkt->rank, true); - // Default readyTime to Max; will be reset once read is issued - dram_pkt->readyTime = MaxTick; - } - dram_pkt->burstHelper = burst_helper; - - assert(!readQueueFull(1)); - stats.rdQLenPdf[totalReadQueueSize + respQueue.size()]++; - - DPRINTF(DRAM, "Adding to read queue\n"); - - readQueue[dram_pkt->qosValue()].push_back(dram_pkt); - - // log packet - logRequest(MemCtrl::READ, pkt->masterId(), pkt->qosValue(), - dram_pkt->addr, 1); - - // Update stats - stats.avgRdQLen = totalReadQueueSize + respQueue.size(); - } - - // Starting address of next memory pkt (aligned to burst boundary) - addr = (addr | (burst_size - 1)) + 1; - } - - // If all packets are serviced by write queue, we send the repsonse back - if (pktsServicedByWrQ == pkt_count) { - accessAndRespond(pkt, frontendLatency); - return; - } - - // Update how many split packets are serviced by write queue - if (burst_helper != NULL) - burst_helper->burstsServiced = pktsServicedByWrQ; - - // If we are not already scheduled to get a request out of the - // queue, do so now - if (!nextReqEvent.scheduled()) { - DPRINTF(DRAM, "Request scheduled immediately\n"); - schedule(nextReqEvent, curTick()); - } -} - -void -DRAMCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram) -{ - // only add to the write queue here. whenever the request is - // eventually done, set the readyTime, and call schedule() - assert(pkt->isWrite()); - - // if the request size is larger than burst size, the pkt is split into - // multiple packets - const Addr base_addr = pkt->getAddr(); - Addr addr = base_addr; - uint32_t burst_size = is_dram ? dram->bytesPerBurst() : - nvm->bytesPerBurst(); - for (int cnt = 0; cnt < pkt_count; ++cnt) { - unsigned size = std::min((addr | (burst_size - 1)) + 1, - base_addr + pkt->getSize()) - addr; - stats.writePktSize[ceilLog2(size)]++; - stats.writeBursts++; - stats.masterWriteAccesses[pkt->masterId()]++; - - // see if we can merge with an existing item in the write - // queue and keep track of whether we have merged or not - bool merged = isInWriteQueue.find(burstAlign(addr, is_dram)) != - isInWriteQueue.end(); - - // if the item was not merged we need to create a new write - // and enqueue it - if (!merged) { - DRAMPacket* dram_pkt; - if (is_dram) { - dram_pkt = dram->decodePacket(pkt, addr, size, false, true); - dram->setupRank(dram_pkt->rank, false); - } else { - dram_pkt = nvm->decodePacket(pkt, addr, size, false, false); - nvm->setupRank(dram_pkt->rank, false); - } - assert(totalWriteQueueSize < writeBufferSize); - stats.wrQLenPdf[totalWriteQueueSize]++; - - DPRINTF(DRAM, "Adding to write queue\n"); - - writeQueue[dram_pkt->qosValue()].push_back(dram_pkt); - isInWriteQueue.insert(burstAlign(addr, is_dram)); - - // log packet - logRequest(MemCtrl::WRITE, pkt->masterId(), pkt->qosValue(), - dram_pkt->addr, 1); - - assert(totalWriteQueueSize == isInWriteQueue.size()); - - // Update stats - stats.avgWrQLen = totalWriteQueueSize; - - } else { - DPRINTF(DRAM, "Merging write burst with existing queue entry\n"); - - // keep track of the fact that this burst effectively - // disappeared as it was merged with an existing one - stats.mergedWrBursts++; - } - - // Starting address of next memory pkt (aligned to burst_size boundary) - addr = (addr | (burst_size - 1)) + 1; - } - - // we do not wait for the writes to be send to the actual memory, - // but instead take responsibility for the consistency here and - // snoop the write queue for any upcoming reads - // @todo, if a pkt size is larger than burst size, we might need a - // different front end latency - accessAndRespond(pkt, frontendLatency); - - // If we are not already scheduled to get a request out of the - // queue, do so now - if (!nextReqEvent.scheduled()) { - DPRINTF(DRAM, "Request scheduled immediately\n"); - schedule(nextReqEvent, curTick()); - } -} - -void -DRAMCtrl::printQs() const -{ -#if TRACING_ON - DPRINTF(DRAM, "===READ QUEUE===\n\n"); - for (const auto& queue : readQueue) { - for (const auto& packet : queue) { - DPRINTF(DRAM, "Read %lu\n", packet->addr); - } - } - - DPRINTF(DRAM, "\n===RESP QUEUE===\n\n"); - for (const auto& packet : respQueue) { - DPRINTF(DRAM, "Response %lu\n", packet->addr); - } - - DPRINTF(DRAM, "\n===WRITE QUEUE===\n\n"); - for (const auto& queue : writeQueue) { - for (const auto& packet : queue) { - DPRINTF(DRAM, "Write %lu\n", packet->addr); - } - } -#endif // TRACING_ON -} - -bool -DRAMCtrl::recvTimingReq(PacketPtr pkt) -{ - // This is where we enter from the outside world - DPRINTF(DRAM, "recvTimingReq: request %s addr %lld size %d\n", - pkt->cmdString(), pkt->getAddr(), pkt->getSize()); - - panic_if(pkt->cacheResponding(), "Should not see packets where cache " - "is responding"); - - panic_if(!(pkt->isRead() || pkt->isWrite()), - "Should only see read and writes at memory controller\n"); - - // Calc avg gap between requests - if (prevArrival != 0) { - stats.totGap += curTick() - prevArrival; - } - prevArrival = curTick(); - - // What type of media does this packet access? - bool is_dram; - if (dram && dram->getAddrRange().contains(pkt->getAddr())) { - is_dram = true; - } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) { - is_dram = false; - } else { - panic("Can't handle address range for packet %s\n", - pkt->print()); - } - - - // Find out how many memory packets a pkt translates to - // If the burst size is equal or larger than the pkt size, then a pkt - // translates to only one memory packet. Otherwise, a pkt translates to - // multiple memory packets - unsigned size = pkt->getSize(); - uint32_t burst_size = is_dram ? dram->bytesPerBurst() : - nvm->bytesPerBurst(); - unsigned offset = pkt->getAddr() & (burst_size - 1); - unsigned int pkt_count = divCeil(offset + size, burst_size); - - // run the QoS scheduler and assign a QoS priority value to the packet - qosSchedule( { &readQueue, &writeQueue }, burst_size, pkt); - - // check local buffers and do not accept if full - if (pkt->isWrite()) { - assert(size != 0); - if (writeQueueFull(pkt_count)) { - DPRINTF(DRAM, "Write queue full, not accepting\n"); - // remember that we have to retry this port - retryWrReq = true; - stats.numWrRetry++; - return false; - } else { - addToWriteQueue(pkt, pkt_count, is_dram); - stats.writeReqs++; - stats.bytesWrittenSys += size; - } - } else { - assert(pkt->isRead()); - assert(size != 0); - if (readQueueFull(pkt_count)) { - DPRINTF(DRAM, "Read queue full, not accepting\n"); - // remember that we have to retry this port - retryRdReq = true; - stats.numRdRetry++; - return false; - } else { - addToReadQueue(pkt, pkt_count, is_dram); - stats.readReqs++; - stats.bytesReadSys += size; - } - } - - return true; -} - -void -DRAMCtrl::processRespondEvent() -{ - DPRINTF(DRAM, - "processRespondEvent(): Some req has reached its readyTime\n"); - - DRAMPacket* dram_pkt = respQueue.front(); - - if (dram_pkt->isDram()) { - // media specific checks and functions when read response is complete - dram->respondEvent(dram_pkt->rank); - } - - if (dram_pkt->burstHelper) { - // it is a split packet - dram_pkt->burstHelper->burstsServiced++; - if (dram_pkt->burstHelper->burstsServiced == - dram_pkt->burstHelper->burstCount) { - // we have now serviced all children packets of a system packet - // so we can now respond to the requester - // @todo we probably want to have a different front end and back - // end latency for split packets - accessAndRespond(dram_pkt->pkt, frontendLatency + backendLatency); - delete dram_pkt->burstHelper; - dram_pkt->burstHelper = NULL; - } - } else { - // it is not a split packet - accessAndRespond(dram_pkt->pkt, frontendLatency + backendLatency); - } - - assert(respQueue.front() == dram_pkt); - respQueue.pop_front(); - - if (!respQueue.empty()) { - assert(respQueue.front()->readyTime >= curTick()); - assert(!respondEvent.scheduled()); - schedule(respondEvent, respQueue.front()->readyTime); - } else { - // if there is nothing left in any queue, signal a drain - if (drainState() == DrainState::Draining && - !totalWriteQueueSize && !totalReadQueueSize && - allIntfDrained()) { - - DPRINTF(Drain, "DRAM controller done draining\n"); - signalDrainDone(); - } else if (dram_pkt->isDram()) { - // check the refresh state and kick the refresh event loop - // into action again if banks already closed and just waiting - // for read to complete - dram->checkRefreshState(dram_pkt->rank); - } - } - - delete dram_pkt; - - // We have made a location in the queue available at this point, - // so if there is a read that was forced to wait, retry now - if (retryRdReq) { - retryRdReq = false; - port.sendRetryReq(); - } -} - -DRAMPacketQueue::iterator -DRAMCtrl::chooseNext(DRAMPacketQueue& queue, Tick extra_col_delay) -{ - // This method does the arbitration between requests. - - DRAMPacketQueue::iterator ret = queue.end(); - - if (!queue.empty()) { - if (queue.size() == 1) { - // available rank corresponds to state refresh idle - DRAMPacket* dram_pkt = *(queue.begin()); - if (packetReady(dram_pkt)) { - ret = queue.begin(); - DPRINTF(DRAM, "Single request, going to a free rank\n"); - } else { - DPRINTF(DRAM, "Single request, going to a busy rank\n"); - } - } else if (memSchedPolicy == Enums::fcfs) { - // check if there is a packet going to a free rank - for (auto i = queue.begin(); i != queue.end(); ++i) { - DRAMPacket* dram_pkt = *i; - if (packetReady(dram_pkt)) { - ret = i; - break; - } - } - } else if (memSchedPolicy == Enums::frfcfs) { - ret = chooseNextFRFCFS(queue, extra_col_delay); - } else { - panic("No scheduling policy chosen\n"); - } - } - return ret; -} - -DRAMPacketQueue::iterator -DRAMCtrl::chooseNextFRFCFS(DRAMPacketQueue& queue, Tick extra_col_delay) -{ - auto selected_pkt_it = queue.end(); - Tick col_allowed_at = MaxTick; - - // time we need to issue a column command to be seamless - const Tick min_col_at = std::max(nextBurstAt + extra_col_delay, curTick()); - - // find optimal packet for each interface - if (dram && nvm) { - // create 2nd set of parameters for NVM - auto nvm_pkt_it = queue.end(); - Tick nvm_col_at = MaxTick; - - // Select DRAM packet by default to give priority if both - // can issue at the same time or seamlessly - std::tie(selected_pkt_it, col_allowed_at) = - dram->chooseNextFRFCFS(queue, min_col_at); - if (selected_pkt_it == queue.end()) { - DPRINTF(DRAM, "%s no available DRAM ranks found\n", __func__); - } - - std::tie(nvm_pkt_it, nvm_col_at) = - nvm->chooseNextFRFCFS(queue, min_col_at); - if (nvm_pkt_it == queue.end()) { - DPRINTF(NVM, "%s no available NVM ranks found\n", __func__); - } - - // Compare DRAM and NVM and select NVM if it can issue - // earlier than the DRAM packet - if (col_allowed_at > nvm_col_at) { - selected_pkt_it = nvm_pkt_it; - } - } else if (dram) { - std::tie(selected_pkt_it, col_allowed_at) = - dram->chooseNextFRFCFS(queue, min_col_at); - - if (selected_pkt_it == queue.end()) { - DPRINTF(DRAM, "%s no available DRAM ranks found\n", __func__); - } - } else if (nvm) { - std::tie(selected_pkt_it, col_allowed_at) = - nvm->chooseNextFRFCFS(queue, min_col_at); - - if (selected_pkt_it == queue.end()) { - DPRINTF(NVM, "%s no available NVM ranks found\n", __func__); - } - } - - return selected_pkt_it; -} - -pair -DRAMInterface::chooseNextFRFCFS(DRAMPacketQueue& queue, Tick min_col_at) const -{ - vector earliest_banks(ranksPerChannel, 0); - - // Has minBankPrep been called to populate earliest_banks? - bool filled_earliest_banks = false; - // can the PRE/ACT sequence be done without impacting utlization? - bool hidden_bank_prep = false; - - // search for seamless row hits first, if no seamless row hit is - // found then determine if there are other packets that can be issued - // without incurring additional bus delay due to bank timing - // Will select closed rows first to enable more open row possibilies - // in future selections - bool found_hidden_bank = false; - - // remember if we found a row hit, not seamless, but bank prepped - // and ready - bool found_prepped_pkt = false; - - // if we have no row hit, prepped or not, and no seamless packet, - // just go for the earliest possible - bool found_earliest_pkt = false; - - Tick selected_col_at = MaxTick; - auto selected_pkt_it = queue.end(); - - for (auto i = queue.begin(); i != queue.end() ; ++i) { - DRAMPacket* pkt = *i; - - // select optimal DRAM packet in Q - if (pkt->isDram()) { - const Bank& bank = ranks[pkt->rank]->banks[pkt->bank]; - const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt : - bank.wrAllowedAt; - - DPRINTF(DRAM, "%s checking DRAM packet in bank %d, row %d\n", - __func__, pkt->bank, pkt->row); - - // check if rank is not doing a refresh and thus is available, - // if not, jump to the next packet - if (burstReady(pkt)) { - - DPRINTF(DRAM, - "%s bank %d - Rank %d available\n", __func__, - pkt->bank, pkt->rank); - - // check if it is a row hit - if (bank.openRow == pkt->row) { - // no additional rank-to-rank or same bank-group - // delays, or we switched read/write and might as well - // go for the row hit - if (col_allowed_at <= min_col_at) { - // FCFS within the hits, giving priority to - // commands that can issue seamlessly, without - // additional delay, such as same rank accesses - // and/or different bank-group accesses - DPRINTF(DRAM, "%s Seamless buffer hit\n", __func__); - selected_pkt_it = i; - selected_col_at = col_allowed_at; - // no need to look through the remaining queue entries - break; - } else if (!found_hidden_bank && !found_prepped_pkt) { - // if we did not find a packet to a closed row that can - // issue the bank commands without incurring delay, and - // did not yet find a packet to a prepped row, remember - // the current one - selected_pkt_it = i; - selected_col_at = col_allowed_at; - found_prepped_pkt = true; - DPRINTF(DRAM, "%s Prepped row buffer hit\n", __func__); - } - } else if (!found_earliest_pkt) { - // if we have not initialised the bank status, do it - // now, and only once per scheduling decisions - if (!filled_earliest_banks) { - // determine entries with earliest bank delay - std::tie(earliest_banks, hidden_bank_prep) = - minBankPrep(queue, min_col_at); - filled_earliest_banks = true; - } - - // bank is amongst first available banks - // minBankPrep will give priority to packets that can - // issue seamlessly - if (bits(earliest_banks[pkt->rank], - pkt->bank, pkt->bank)) { - found_earliest_pkt = true; - found_hidden_bank = hidden_bank_prep; - - // give priority to packets that can issue - // bank commands 'behind the scenes' - // any additional delay if any will be due to - // col-to-col command requirements - if (hidden_bank_prep || !found_prepped_pkt) { - selected_pkt_it = i; - selected_col_at = col_allowed_at; - } - } - } - } else { - DPRINTF(DRAM, "%s bank %d - Rank %d not available\n", __func__, - pkt->bank, pkt->rank); - } - } - } - - if (selected_pkt_it == queue.end()) { - DPRINTF(DRAM, "%s no available DRAM ranks found\n", __func__); - } - - return make_pair(selected_pkt_it, selected_col_at); -} - -void -DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency) -{ - DPRINTF(DRAM, "Responding to Address %lld.. \n",pkt->getAddr()); - - bool needsResponse = pkt->needsResponse(); - // do the actual memory access which also turns the packet into a - // response - if (dram && dram->getAddrRange().contains(pkt->getAddr())) { - dram->access(pkt); - } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) { - nvm->access(pkt); - } else { - panic("Can't handle address range for packet %s\n", - pkt->print()); - } - - // turn packet around to go back to requester if response expected - if (needsResponse) { - // access already turned the packet into a response - assert(pkt->isResponse()); - // response_time consumes the static latency and is charged also - // with headerDelay that takes into account the delay provided by - // the xbar and also the payloadDelay that takes into account the - // number of data beats. - Tick response_time = curTick() + static_latency + pkt->headerDelay + - pkt->payloadDelay; - // Here we reset the timing of the packet before sending it out. - pkt->headerDelay = pkt->payloadDelay = 0; - - // queue the packet in the response queue to be sent out after - // the static latency has passed - port.schedTimingResp(pkt, response_time); - } else { - // @todo the packet is going to be deleted, and the DRAMPacket - // is still having a pointer to it - pendingDelete.reset(pkt); - } - - DPRINTF(DRAM, "Done\n"); - - return; -} - -void -DRAMCtrl::pruneBurstTick() -{ - auto it = burstTicks.begin(); - while (it != burstTicks.end()) { - auto current_it = it++; - if (curTick() > *current_it) { - DPRINTF(DRAM, "Removing burstTick for %d\n", *current_it); - burstTicks.erase(current_it); - } - } -} - -Tick -DRAMCtrl::getBurstWindow(Tick cmd_tick) -{ - // get tick aligned to burst window - Tick burst_offset = cmd_tick % commandWindow; - return (cmd_tick - burst_offset); -} - -Tick -DRAMCtrl::verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst) -{ - // start with assumption that there is no contention on command bus - Tick cmd_at = cmd_tick; - - // get tick aligned to burst window - Tick burst_tick = getBurstWindow(cmd_tick); - - // verify that we have command bandwidth to issue the command - // if not, iterate over next window(s) until slot found - while (burstTicks.count(burst_tick) >= max_cmds_per_burst) { - DPRINTF(DRAM, "Contention found on command bus at %d\n", burst_tick); - burst_tick += commandWindow; - cmd_at = burst_tick; - } - - // add command into burst window and return corresponding Tick - burstTicks.insert(burst_tick); - return cmd_at; -} - -Tick -DRAMCtrl::verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst, - Tick max_multi_cmd_split) -{ - // start with assumption that there is no contention on command bus - Tick cmd_at = cmd_tick; - - // get tick aligned to burst window - Tick burst_tick = getBurstWindow(cmd_tick); - - // Command timing requirements are from 2nd command - // Start with assumption that 2nd command will issue at cmd_at and - // find prior slot for 1st command to issue - // Given a maximum latency of max_multi_cmd_split between the commands, - // find the burst at the maximum latency prior to cmd_at - Tick burst_offset = 0; - Tick first_cmd_offset = cmd_tick % commandWindow; - while (max_multi_cmd_split > (first_cmd_offset + burst_offset)) { - burst_offset += commandWindow; - } - // get the earliest burst aligned address for first command - // ensure that the time does not go negative - Tick first_cmd_tick = burst_tick - std::min(burst_offset, burst_tick); - - // Can required commands issue? - bool first_can_issue = false; - bool second_can_issue = false; - // verify that we have command bandwidth to issue the command(s) - while (!first_can_issue || !second_can_issue) { - bool same_burst = (burst_tick == first_cmd_tick); - auto first_cmd_count = burstTicks.count(first_cmd_tick); - auto second_cmd_count = same_burst ? first_cmd_count + 1 : - burstTicks.count(burst_tick); - - first_can_issue = first_cmd_count < max_cmds_per_burst; - second_can_issue = second_cmd_count < max_cmds_per_burst; - - if (!second_can_issue) { - DPRINTF(DRAM, "Contention (cmd2) found on command bus at %d\n", - burst_tick); - burst_tick += commandWindow; - cmd_at = burst_tick; - } - - // Verify max_multi_cmd_split isn't violated when command 2 is shifted - // If commands initially were issued in same burst, they are - // now in consecutive bursts and can still issue B2B - bool gap_violated = !same_burst && - ((burst_tick - first_cmd_tick) > max_multi_cmd_split); - - if (!first_can_issue || (!second_can_issue && gap_violated)) { - DPRINTF(DRAM, "Contention (cmd1) found on command bus at %d\n", - first_cmd_tick); - first_cmd_tick += commandWindow; - } - } - - // Add command to burstTicks - burstTicks.insert(burst_tick); - burstTicks.insert(first_cmd_tick); - - return cmd_at; -} - -void -DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref, - Tick act_tick, uint32_t row) -{ - assert(rank_ref.actTicks.size() == activationLimit); - - // verify that we have command bandwidth to issue the activate - // if not, shift to next burst window - Tick act_at; - if (twoCycleActivate) - act_at = ctrl->verifyMultiCmd(act_tick, maxCommandsPerWindow, tAAD); - else - act_at = ctrl->verifySingleCmd(act_tick, maxCommandsPerWindow); - - DPRINTF(DRAM, "Activate at tick %d\n", act_at); - - // update the open row - assert(bank_ref.openRow == Bank::NO_ROW); - bank_ref.openRow = row; - - // start counting anew, this covers both the case when we - // auto-precharged, and when this access is forced to - // precharge - bank_ref.bytesAccessed = 0; - bank_ref.rowAccesses = 0; - - ++rank_ref.numBanksActive; - assert(rank_ref.numBanksActive <= banksPerRank); - - DPRINTF(DRAM, "Activate bank %d, rank %d at tick %lld, now got " - "%d active\n", bank_ref.bank, rank_ref.rank, act_at, - ranks[rank_ref.rank]->numBanksActive); - - rank_ref.cmdList.push_back(Command(MemCommand::ACT, bank_ref.bank, - act_at)); - - DPRINTF(DRAMPower, "%llu,ACT,%d,%d\n", divCeil(act_at, tCK) - - timeStampOffset, bank_ref.bank, rank_ref.rank); - - // The next access has to respect tRAS for this bank - bank_ref.preAllowedAt = act_at + tRAS; - - // Respect the row-to-column command delay for both read and write cmds - bank_ref.rdAllowedAt = std::max(act_at + tRCD, bank_ref.rdAllowedAt); - bank_ref.wrAllowedAt = std::max(act_at + tRCD, bank_ref.wrAllowedAt); - - // start by enforcing tRRD - for (int i = 0; i < banksPerRank; i++) { - // next activate to any bank in this rank must not happen - // before tRRD - if (bankGroupArch && (bank_ref.bankgr == rank_ref.banks[i].bankgr)) { - // bank group architecture requires longer delays between - // ACT commands within the same bank group. Use tRRD_L - // in this case - rank_ref.banks[i].actAllowedAt = std::max(act_at + tRRD_L, - rank_ref.banks[i].actAllowedAt); - } else { - // use shorter tRRD value when either - // 1) bank group architecture is not supportted - // 2) bank is in a different bank group - rank_ref.banks[i].actAllowedAt = std::max(act_at + tRRD, - rank_ref.banks[i].actAllowedAt); - } - } - - // next, we deal with tXAW, if the activation limit is disabled - // then we directly schedule an activate power event - if (!rank_ref.actTicks.empty()) { - // sanity check - if (rank_ref.actTicks.back() && - (act_at - rank_ref.actTicks.back()) < tXAW) { - panic("Got %d activates in window %d (%llu - %llu) which " - "is smaller than %llu\n", activationLimit, act_at - - rank_ref.actTicks.back(), act_at, - rank_ref.actTicks.back(), tXAW); - } - - // shift the times used for the book keeping, the last element - // (highest index) is the oldest one and hence the lowest value - rank_ref.actTicks.pop_back(); - - // record an new activation (in the future) - rank_ref.actTicks.push_front(act_at); - - // cannot activate more than X times in time window tXAW, push the - // next one (the X + 1'st activate) to be tXAW away from the - // oldest in our window of X - if (rank_ref.actTicks.back() && - (act_at - rank_ref.actTicks.back()) < tXAW) { - DPRINTF(DRAM, "Enforcing tXAW with X = %d, next activate " - "no earlier than %llu\n", activationLimit, - rank_ref.actTicks.back() + tXAW); - for (int j = 0; j < banksPerRank; j++) - // next activate must not happen before end of window - rank_ref.banks[j].actAllowedAt = - std::max(rank_ref.actTicks.back() + tXAW, - rank_ref.banks[j].actAllowedAt); - } - } - - // at the point when this activate takes place, make sure we - // transition to the active power state - if (!rank_ref.activateEvent.scheduled()) - schedule(rank_ref.activateEvent, act_at); - else if (rank_ref.activateEvent.when() > act_at) - // move it sooner in time - reschedule(rank_ref.activateEvent, act_at); -} - -void -DRAMInterface::prechargeBank(Rank& rank_ref, Bank& bank, Tick pre_tick, - bool auto_or_preall, bool trace) -{ - // make sure the bank has an open row - assert(bank.openRow != Bank::NO_ROW); - - // sample the bytes per activate here since we are closing - // the page - stats.bytesPerActivate.sample(bank.bytesAccessed); - - bank.openRow = Bank::NO_ROW; - - Tick pre_at = pre_tick; - if (auto_or_preall) { - // no precharge allowed before this one - bank.preAllowedAt = pre_at; - } else { - // Issuing an explicit PRE command - // Verify that we have command bandwidth to issue the precharge - // if not, shift to next burst window - pre_at = ctrl->verifySingleCmd(pre_tick, maxCommandsPerWindow); - // enforce tPPD - for (int i = 0; i < banksPerRank; i++) { - rank_ref.banks[i].preAllowedAt = std::max(pre_at + tPPD, - rank_ref.banks[i].preAllowedAt); - } - } - - Tick pre_done_at = pre_at + tRP; - - bank.actAllowedAt = std::max(bank.actAllowedAt, pre_done_at); - - assert(rank_ref.numBanksActive != 0); - --rank_ref.numBanksActive; - - DPRINTF(DRAM, "Precharging bank %d, rank %d at tick %lld, now got " - "%d active\n", bank.bank, rank_ref.rank, pre_at, - rank_ref.numBanksActive); - - if (trace) { - - rank_ref.cmdList.push_back(Command(MemCommand::PRE, bank.bank, - pre_at)); - DPRINTF(DRAMPower, "%llu,PRE,%d,%d\n", divCeil(pre_at, tCK) - - timeStampOffset, bank.bank, rank_ref.rank); - } - - // if we look at the current number of active banks we might be - // tempted to think the DRAM is now idle, however this can be - // undone by an activate that is scheduled to happen before we - // would have reached the idle state, so schedule an event and - // rather check once we actually make it to the point in time when - // the (last) precharge takes place - if (!rank_ref.prechargeEvent.scheduled()) { - schedule(rank_ref.prechargeEvent, pre_done_at); - // New event, increment count - ++rank_ref.outstandingEvents; - } else if (rank_ref.prechargeEvent.when() < pre_done_at) { - reschedule(rank_ref.prechargeEvent, pre_done_at); - } -} - -pair -DRAMInterface::doBurstAccess(DRAMPacket* dram_pkt, Tick next_burst_at, - const std::vector& queue) -{ - DPRINTF(DRAM, "Timing access to addr %lld, rank/bank/row %d %d %d\n", - dram_pkt->addr, dram_pkt->rank, dram_pkt->bank, dram_pkt->row); - - // get the rank - Rank& rank_ref = *ranks[dram_pkt->rank]; - - assert(rank_ref.inRefIdleState()); - - // are we in or transitioning to a low-power state and have not scheduled - // a power-up event? - // if so, wake up from power down to issue RD/WR burst - if (rank_ref.inLowPowerState) { - assert(rank_ref.pwrState != PWR_SREF); - rank_ref.scheduleWakeUpEvent(tXP); - } - - // get the bank - Bank& bank_ref = rank_ref.banks[dram_pkt->bank]; - - // for the state we need to track if it is a row hit or not - bool row_hit = true; - - // Determine the access latency and update the bank state - if (bank_ref.openRow == dram_pkt->row) { - // nothing to do - } else { - row_hit = false; - - // If there is a page open, precharge it. - if (bank_ref.openRow != Bank::NO_ROW) { - prechargeBank(rank_ref, bank_ref, std::max(bank_ref.preAllowedAt, - curTick())); - } - - // next we need to account for the delay in activating the page - Tick act_tick = std::max(bank_ref.actAllowedAt, curTick()); - - // Record the activation and deal with all the global timing - // constraints caused be a new activation (tRRD and tXAW) - activateBank(rank_ref, bank_ref, act_tick, dram_pkt->row); - } - - // respect any constraints on the command (e.g. tRCD or tCCD) - const Tick col_allowed_at = dram_pkt->isRead() ? - bank_ref.rdAllowedAt : bank_ref.wrAllowedAt; - - // we need to wait until the bus is available before we can issue - // the command; need to ensure minimum bus delay requirement is met - Tick cmd_at = std::max({col_allowed_at, next_burst_at, curTick()}); - - // verify that we have command bandwidth to issue the burst - // if not, shift to next burst window - if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) > clkResyncDelay)) - cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK); - else - cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow); - - // if we are interleaving bursts, ensure that - // 1) we don't double interleave on next burst issue - // 2) we are at an interleave boundary; if not, shift to next boundary - Tick burst_gap = tBURST_MIN; - if (burstInterleave) { - if (cmd_at == (rank_ref.lastBurstTick + tBURST_MIN)) { - // already interleaving, push next command to end of full burst - burst_gap = tBURST; - } else if (cmd_at < (rank_ref.lastBurstTick + tBURST)) { - // not at an interleave boundary after bandwidth check - // Shift command to tBURST boundary to avoid data contention - // Command will remain in the same burst window given that - // tBURST is less than tBURST_MAX - cmd_at = rank_ref.lastBurstTick + tBURST; - } - } - DPRINTF(DRAM, "Schedule RD/WR burst at tick %d\n", cmd_at); - - // update the packet ready time - dram_pkt->readyTime = cmd_at + tCL + tBURST; - - rank_ref.lastBurstTick = cmd_at; - - // update the time for the next read/write burst for each - // bank (add a max with tCCD/tCCD_L/tCCD_L_WR here) - Tick dly_to_rd_cmd; - Tick dly_to_wr_cmd; - for (int j = 0; j < ranksPerChannel; j++) { - for (int i = 0; i < banksPerRank; i++) { - if (dram_pkt->rank == j) { - if (bankGroupArch && - (bank_ref.bankgr == ranks[j]->banks[i].bankgr)) { - // bank group architecture requires longer delays between - // RD/WR burst commands to the same bank group. - // tCCD_L is default requirement for same BG timing - // tCCD_L_WR is required for write-to-write - // Need to also take bus turnaround delays into account - dly_to_rd_cmd = dram_pkt->isRead() ? - tCCD_L : std::max(tCCD_L, wrToRdDlySameBG); - dly_to_wr_cmd = dram_pkt->isRead() ? - std::max(tCCD_L, rdToWrDlySameBG) : - tCCD_L_WR; - } else { - // tBURST is default requirement for diff BG timing - // Need to also take bus turnaround delays into account - dly_to_rd_cmd = dram_pkt->isRead() ? burst_gap : - writeToReadDelay(); - dly_to_wr_cmd = dram_pkt->isRead() ? readToWriteDelay() : - burst_gap; - } - } else { - // different rank is by default in a different bank group and - // doesn't require longer tCCD or additional RTW, WTR delays - // Need to account for rank-to-rank switching - dly_to_wr_cmd = rankToRankDelay(); - dly_to_rd_cmd = rankToRankDelay(); - } - ranks[j]->banks[i].rdAllowedAt = std::max(cmd_at + dly_to_rd_cmd, - ranks[j]->banks[i].rdAllowedAt); - ranks[j]->banks[i].wrAllowedAt = std::max(cmd_at + dly_to_wr_cmd, - ranks[j]->banks[i].wrAllowedAt); - } - } - - // Save rank of current access - activeRank = dram_pkt->rank; - - // If this is a write, we also need to respect the write recovery - // time before a precharge, in the case of a read, respect the - // read to precharge constraint - bank_ref.preAllowedAt = std::max(bank_ref.preAllowedAt, - dram_pkt->isRead() ? cmd_at + tRTP : - dram_pkt->readyTime + tWR); - - // increment the bytes accessed and the accesses per row - bank_ref.bytesAccessed += burstSize; - ++bank_ref.rowAccesses; - - // if we reached the max, then issue with an auto-precharge - bool auto_precharge = pageMgmt == Enums::close || - bank_ref.rowAccesses == maxAccessesPerRow; - - // if we did not hit the limit, we might still want to - // auto-precharge - if (!auto_precharge && - (pageMgmt == Enums::open_adaptive || - pageMgmt == Enums::close_adaptive)) { - // a twist on the open and close page policies: - // 1) open_adaptive page policy does not blindly keep the - // page open, but close it if there are no row hits, and there - // are bank conflicts in the queue - // 2) close_adaptive page policy does not blindly close the - // page, but closes it only if there are no row hits in the queue. - // In this case, only force an auto precharge when there - // are no same page hits in the queue - bool got_more_hits = false; - bool got_bank_conflict = false; - - for (uint8_t i = 0; i < ctrl->numPriorities(); ++i) { - auto p = queue[i].begin(); - // keep on looking until we find a hit or reach the end of the - // queue - // 1) if a hit is found, then both open and close adaptive - // policies keep the page open - // 2) if no hit is found, got_bank_conflict is set to true if a - // bank conflict request is waiting in the queue - // 3) make sure we are not considering the packet that we are - // currently dealing with - while (!got_more_hits && p != queue[i].end()) { - if (dram_pkt != (*p)) { - bool same_rank_bank = (dram_pkt->rank == (*p)->rank) && - (dram_pkt->bank == (*p)->bank); - - bool same_row = dram_pkt->row == (*p)->row; - got_more_hits |= same_rank_bank && same_row; - got_bank_conflict |= same_rank_bank && !same_row; - } - ++p; - } - - if (got_more_hits) - break; - } - - // auto pre-charge when either - // 1) open_adaptive policy, we have not got any more hits, and - // have a bank conflict - // 2) close_adaptive policy and we have not got any more hits - auto_precharge = !got_more_hits && - (got_bank_conflict || pageMgmt == Enums::close_adaptive); - } - - // DRAMPower trace command to be written - std::string mem_cmd = dram_pkt->isRead() ? "RD" : "WR"; - - // MemCommand required for DRAMPower library - MemCommand::cmds command = (mem_cmd == "RD") ? MemCommand::RD : - MemCommand::WR; - - rank_ref.cmdList.push_back(Command(command, dram_pkt->bank, cmd_at)); - - DPRINTF(DRAMPower, "%llu,%s,%d,%d\n", divCeil(cmd_at, tCK) - - timeStampOffset, mem_cmd, dram_pkt->bank, dram_pkt->rank); - - // if this access should use auto-precharge, then we are - // closing the row after the read/write burst - if (auto_precharge) { - // if auto-precharge push a PRE command at the correct tick to the - // list used by DRAMPower library to calculate power - prechargeBank(rank_ref, bank_ref, std::max(curTick(), - bank_ref.preAllowedAt), true); - - DPRINTF(DRAM, "Auto-precharged bank: %d\n", dram_pkt->bankId); - } - - // Update the stats and schedule the next request - if (dram_pkt->isRead()) { - // Every respQueue which will generate an event, increment count - ++rank_ref.outstandingEvents; - - stats.readBursts++; - if (row_hit) - stats.readRowHits++; - stats.bytesRead += burstSize; - stats.perBankRdBursts[dram_pkt->bankId]++; - - // Update latency stats - stats.totMemAccLat += dram_pkt->readyTime - dram_pkt->entryTime; - stats.totQLat += cmd_at - dram_pkt->entryTime; - stats.totBusLat += tBURST; - } else { - // Schedule write done event to decrement event count - // after the readyTime has been reached - // Only schedule latest write event to minimize events - // required; only need to ensure that final event scheduled covers - // the time that writes are outstanding and bus is active - // to holdoff power-down entry events - if (!rank_ref.writeDoneEvent.scheduled()) { - schedule(rank_ref.writeDoneEvent, dram_pkt->readyTime); - // New event, increment count - ++rank_ref.outstandingEvents; - - } else if (rank_ref.writeDoneEvent.when() < dram_pkt->readyTime) { - reschedule(rank_ref.writeDoneEvent, dram_pkt->readyTime); - } - // will remove write from queue when returned to parent function - // decrement count for DRAM rank - --rank_ref.writeEntries; - - stats.writeBursts++; - if (row_hit) - stats.writeRowHits++; - stats.bytesWritten += burstSize; - stats.perBankWrBursts[dram_pkt->bankId]++; - - } - // Update bus state to reflect when previous command was issued - return make_pair(cmd_at, cmd_at + burst_gap); -} - -void -DRAMInterface::addRankToRankDelay(Tick cmd_at) -{ - // update timing for DRAM ranks due to bursts issued - // to ranks on other media interfaces - for (auto n : ranks) { - for (int i = 0; i < banksPerRank; i++) { - // different rank by default - // Need to only account for rank-to-rank switching - n->banks[i].rdAllowedAt = std::max(cmd_at + rankToRankDelay(), - n->banks[i].rdAllowedAt); - n->banks[i].wrAllowedAt = std::max(cmd_at + rankToRankDelay(), - n->banks[i].wrAllowedAt); - } - } -} - -bool -DRAMCtrl::inReadBusState(bool next_state) const -{ - // check the bus state - if (next_state) { - // use busStateNext to get the state that will be used - // for the next burst - return (busStateNext == DRAMCtrl::READ); - } else { - return (busState == DRAMCtrl::READ); - } -} - -bool -DRAMCtrl::inWriteBusState(bool next_state) const -{ - // check the bus state - if (next_state) { - // use busStateNext to get the state that will be used - // for the next burst - return (busStateNext == DRAMCtrl::WRITE); - } else { - return (busState == DRAMCtrl::WRITE); - } -} - -void -DRAMCtrl::doBurstAccess(DRAMPacket* dram_pkt) -{ - // first clean up the burstTick set, removing old entries - // before adding new entries for next burst - pruneBurstTick(); - - // When was command issued? - Tick cmd_at; - - // Issue the next burst and update bus state to reflect - // when previous command was issued - if (dram_pkt->isDram()) { - std::vector& queue = selQueue(dram_pkt->isRead()); - std::tie(cmd_at, nextBurstAt) = - dram->doBurstAccess(dram_pkt, nextBurstAt, queue); - - // Update timing for NVM ranks if NVM is configured on this channel - if (nvm) - nvm->addRankToRankDelay(cmd_at); - - } else { - std::tie(cmd_at, nextBurstAt) = - nvm->doBurstAccess(dram_pkt, nextBurstAt); - - // Update timing for NVM ranks if NVM is configured on this channel - if (dram) - dram->addRankToRankDelay(cmd_at); - - } - - DPRINTF(DRAM, "Access to %lld, ready at %lld next burst at %lld.\n", - dram_pkt->addr, dram_pkt->readyTime, nextBurstAt); - - // Update the minimum timing between the requests, this is a - // conservative estimate of when we have to schedule the next - // request to not introduce any unecessary bubbles. In most cases - // we will wake up sooner than we have to. - nextReqTime = nextBurstAt - (dram ? dram->commandOffset() : - nvm->commandOffset()); - - - // Update the common bus stats - if (dram_pkt->isRead()) { - ++readsThisTime; - // Update latency stats - stats.masterReadTotalLat[dram_pkt->masterId()] += - dram_pkt->readyTime - dram_pkt->entryTime; - stats.masterReadBytes[dram_pkt->masterId()] += dram_pkt->size; - } else { - ++writesThisTime; - stats.masterWriteBytes[dram_pkt->masterId()] += dram_pkt->size; - stats.masterWriteTotalLat[dram_pkt->masterId()] += - dram_pkt->readyTime - dram_pkt->entryTime; - } -} - -void -DRAMCtrl::processNextReqEvent() -{ - // transition is handled by QoS algorithm if enabled - if (turnPolicy) { - // select bus state - only done if QoS algorithms are in use - busStateNext = selectNextBusState(); - } - - // detect bus state change - bool switched_cmd_type = (busState != busStateNext); - // record stats - recordTurnaroundStats(); - - DPRINTF(DRAM, "QoS Turnarounds selected state %s %s\n", - (busState==MemCtrl::READ)?"READ":"WRITE", - switched_cmd_type?"[turnaround triggered]":""); - - if (switched_cmd_type) { - if (busState == MemCtrl::READ) { - DPRINTF(DRAM, - "Switching to writes after %d reads with %d reads " - "waiting\n", readsThisTime, totalReadQueueSize); - stats.rdPerTurnAround.sample(readsThisTime); - readsThisTime = 0; - } else { - DPRINTF(DRAM, - "Switching to reads after %d writes with %d writes " - "waiting\n", writesThisTime, totalWriteQueueSize); - stats.wrPerTurnAround.sample(writesThisTime); - writesThisTime = 0; - } - } - - // updates current state - busState = busStateNext; - - if (nvm) { - for (auto queue = readQueue.rbegin(); - queue != readQueue.rend(); ++queue) { - // select non-deterministic NVM read to issue - // assume that we have the command bandwidth to issue this along - // with additional RD/WR burst with needed bank operations - if (nvm->readsWaitingToIssue()) { - // select non-deterministic NVM read to issue - nvm->chooseRead(*queue); - } - } - } - - // check ranks for refresh/wakeup - uses busStateNext, so done after - // turnaround decisions - // Default to busy status and update based on interface specifics - bool dram_busy = dram ? dram->isBusy() : true; - bool nvm_busy = true; - bool all_writes_nvm = false; - if (nvm) { - all_writes_nvm = nvm->numWritesQueued == totalWriteQueueSize; - bool read_queue_empty = totalReadQueueSize == 0; - nvm_busy = nvm->isBusy(read_queue_empty, all_writes_nvm); - } - // Default state of unused interface is 'true' - // Simply AND the busy signals to determine if system is busy - if (dram_busy && nvm_busy) { - // if all ranks are refreshing wait for them to finish - // and stall this state machine without taking any further - // action, and do not schedule a new nextReqEvent - return; - } - - // when we get here it is either a read or a write - if (busState == READ) { - - // track if we should switch or not - bool switch_to_writes = false; - - if (totalReadQueueSize == 0) { - // In the case there is no read request to go next, - // trigger writes if we have passed the low threshold (or - // if we are draining) - if (!(totalWriteQueueSize == 0) && - (drainState() == DrainState::Draining || - totalWriteQueueSize > writeLowThreshold)) { - - DPRINTF(DRAM, "Switching to writes due to read queue empty\n"); - switch_to_writes = true; - } else { - // check if we are drained - // not done draining until in PWR_IDLE state - // ensuring all banks are closed and - // have exited low power states - if (drainState() == DrainState::Draining && - respQueue.empty() && allIntfDrained()) { - - DPRINTF(Drain, "DRAM controller done draining\n"); - signalDrainDone(); - } - - // nothing to do, not even any point in scheduling an - // event for the next request - return; - } - } else { - - bool read_found = false; - DRAMPacketQueue::iterator to_read; - uint8_t prio = numPriorities(); - - for (auto queue = readQueue.rbegin(); - queue != readQueue.rend(); ++queue) { - - prio--; - - DPRINTF(QOS, - "DRAM controller checking READ queue [%d] priority [%d elements]\n", - prio, queue->size()); - - // Figure out which read request goes next - // If we are changing command type, incorporate the minimum - // bus turnaround delay which will be rank to rank delay - to_read = chooseNext((*queue), switched_cmd_type ? - minWriteToReadDataGap() : 0); - - if (to_read != queue->end()) { - // candidate read found - read_found = true; - break; - } - } - - // if no read to an available rank is found then return - // at this point. There could be writes to the available ranks - // which are above the required threshold. However, to - // avoid adding more complexity to the code, return and wait - // for a refresh event to kick things into action again. - if (!read_found) { - DPRINTF(DRAM, "No Reads Found - exiting\n"); - return; - } - - auto dram_pkt = *to_read; - - doBurstAccess(dram_pkt); - - // sanity check - assert(dram_pkt->size <= (dram_pkt->isDram() ? - dram->bytesPerBurst() : - nvm->bytesPerBurst()) ); - assert(dram_pkt->readyTime >= curTick()); - - // log the response - logResponse(MemCtrl::READ, (*to_read)->masterId(), - dram_pkt->qosValue(), dram_pkt->getAddr(), 1, - dram_pkt->readyTime - dram_pkt->entryTime); - - - // Insert into response queue. It will be sent back to the - // requester at its readyTime - if (respQueue.empty()) { - assert(!respondEvent.scheduled()); - schedule(respondEvent, dram_pkt->readyTime); - } else { - assert(respQueue.back()->readyTime <= dram_pkt->readyTime); - assert(respondEvent.scheduled()); - } - - respQueue.push_back(dram_pkt); - - // we have so many writes that we have to transition - // don't transition if the writeRespQueue is full and - // there are no other writes that can issue - if ((totalWriteQueueSize > writeHighThreshold) && - !(nvm && all_writes_nvm && nvm->writeRespQueueFull())) { - switch_to_writes = true; - } - - // remove the request from the queue - the iterator is no longer valid . - readQueue[dram_pkt->qosValue()].erase(to_read); - } - - // switching to writes, either because the read queue is empty - // and the writes have passed the low threshold (or we are - // draining), or because the writes hit the hight threshold - if (switch_to_writes) { - // transition to writing - busStateNext = WRITE; - } - } else { - - bool write_found = false; - DRAMPacketQueue::iterator to_write; - uint8_t prio = numPriorities(); - - for (auto queue = writeQueue.rbegin(); - queue != writeQueue.rend(); ++queue) { - - prio--; - - DPRINTF(QOS, - "DRAM controller checking WRITE queue [%d] priority [%d elements]\n", - prio, queue->size()); - - // If we are changing command type, incorporate the minimum - // bus turnaround delay - to_write = chooseNext((*queue), - switched_cmd_type ? minReadToWriteDataGap() : 0); - - if (to_write != queue->end()) { - write_found = true; - break; - } - } - - // if there are no writes to a rank that is available to service - // requests (i.e. rank is in refresh idle state) are found then - // return. There could be reads to the available ranks. However, to - // avoid adding more complexity to the code, return at this point and - // wait for a refresh event to kick things into action again. - if (!write_found) { - DPRINTF(DRAM, "No Writes Found - exiting\n"); - return; - } - - auto dram_pkt = *to_write; - - // sanity check - assert(dram_pkt->size <= (dram_pkt->isDram() ? - dram->bytesPerBurst() : - nvm->bytesPerBurst()) ); - - doBurstAccess(dram_pkt); - - isInWriteQueue.erase(burstAlign(dram_pkt->addr, dram_pkt->isDram())); - - // log the response - logResponse(MemCtrl::WRITE, dram_pkt->masterId(), - dram_pkt->qosValue(), dram_pkt->getAddr(), 1, - dram_pkt->readyTime - dram_pkt->entryTime); - - - // remove the request from the queue - the iterator is no longer valid - writeQueue[dram_pkt->qosValue()].erase(to_write); - - delete dram_pkt; - - // If we emptied the write queue, or got sufficiently below the - // threshold (using the minWritesPerSwitch as the hysteresis) and - // are not draining, or we have reads waiting and have done enough - // writes, then switch to reads. - // If we are interfacing to NVM and have filled the writeRespQueue, - // with only NVM writes in Q, then switch to reads - bool below_threshold = - totalWriteQueueSize + minWritesPerSwitch < writeLowThreshold; - - if (totalWriteQueueSize == 0 || - (below_threshold && drainState() != DrainState::Draining) || - (totalReadQueueSize && writesThisTime >= minWritesPerSwitch) || - (totalReadQueueSize && nvm && nvm->writeRespQueueFull() && - all_writes_nvm)) { - - // turn the bus back around for reads again - busStateNext = MemCtrl::READ; - - // note that the we switch back to reads also in the idle - // case, which eventually will check for any draining and - // also pause any further scheduling if there is really - // nothing to do - } - } - // It is possible that a refresh to another rank kicks things back into - // action before reaching this point. - if (!nextReqEvent.scheduled()) - schedule(nextReqEvent, std::max(nextReqTime, curTick())); - - // If there is space available and we have writes waiting then let - // them retry. This is done here to ensure that the retry does not - // cause a nextReqEvent to be scheduled before we do so as part of - // the next request processing - if (retryWrReq && totalWriteQueueSize < writeBufferSize) { - retryWrReq = false; - port.sendRetryReq(); - } -} - -MemInterface::MemInterface(const MemInterfaceParams* _p) - : AbstractMemory(_p), - addrMapping(_p->addr_mapping), - burstSize((_p->devices_per_rank * _p->burst_length * - _p->device_bus_width) / 8), - deviceSize(_p->device_size), - deviceRowBufferSize(_p->device_rowbuffer_size), - devicesPerRank(_p->devices_per_rank), - rowBufferSize(devicesPerRank * deviceRowBufferSize), - burstsPerRowBuffer(rowBufferSize / burstSize), - burstsPerStripe(range.interleaved() ? - range.granularity() / burstSize : 1), - ranksPerChannel(_p->ranks_per_channel), - banksPerRank(_p->banks_per_rank), rowsPerBank(0), - tCK(_p->tCK), tCS(_p->tCS), tBURST(_p->tBURST), - tRTW(_p->tRTW), - tWTR(_p->tWTR) -{} - -void -MemInterface::setCtrl(DRAMCtrl* _ctrl, unsigned int command_window) -{ - ctrl = _ctrl; - maxCommandsPerWindow = command_window / tCK; -} - -DRAMInterface::DRAMInterface(const DRAMInterfaceParams* _p) - : MemInterface(_p), - bankGroupsPerRank(_p->bank_groups_per_rank), - bankGroupArch(_p->bank_groups_per_rank > 0), - tCL(_p->tCL), - tBURST_MIN(_p->tBURST_MIN), tBURST_MAX(_p->tBURST_MAX), - tCCD_L_WR(_p->tCCD_L_WR), tCCD_L(_p->tCCD_L), tRCD(_p->tRCD), - tRP(_p->tRP), tRAS(_p->tRAS), tWR(_p->tWR), tRTP(_p->tRTP), - tRFC(_p->tRFC), tREFI(_p->tREFI), tRRD(_p->tRRD), tRRD_L(_p->tRRD_L), - tPPD(_p->tPPD), tAAD(_p->tAAD), - tXAW(_p->tXAW), tXP(_p->tXP), tXS(_p->tXS), - clkResyncDelay(tCL + _p->tBURST_MAX), - dataClockSync(_p->data_clock_sync), - burstInterleave(tBURST != tBURST_MIN), - twoCycleActivate(_p->two_cycle_activate), - activationLimit(_p->activation_limit), - wrToRdDlySameBG(tCL + _p->tBURST_MAX + _p->tWTR_L), - rdToWrDlySameBG(_p->tRTW + _p->tBURST_MAX), - pageMgmt(_p->page_policy), - maxAccessesPerRow(_p->max_accesses_per_row), - timeStampOffset(0), activeRank(0), - enableDRAMPowerdown(_p->enable_dram_powerdown), - lastStatsResetTick(0), - stats(*this), - readBufferSize(_p->read_buffer_size), - writeBufferSize(_p->write_buffer_size) -{ - DPRINTF(DRAM, "Setting up DRAM Interface\n"); - - fatal_if(!isPowerOf2(burstSize), "DRAM burst size %d is not allowed, " - "must be a power of two\n", burstSize); - - // sanity check the ranks since we rely on bit slicing for the - // address decoding - fatal_if(!isPowerOf2(ranksPerChannel), "DRAM rank count of %d is " - "not allowed, must be a power of two\n", ranksPerChannel); - - for (int i = 0; i < ranksPerChannel; i++) { - DPRINTF(DRAM, "Creating DRAM rank %d \n", i); - Rank* rank = new Rank(_p, i, *this); - ranks.push_back(rank); - } - - // determine the dram actual capacity from the DRAM config in Mbytes - uint64_t deviceCapacity = deviceSize / (1024 * 1024) * devicesPerRank * - ranksPerChannel; - - uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size()); - - DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity, - AbstractMemory::size()); - - // if actual DRAM size does not match memory capacity in system warn! - if (deviceCapacity != capacity / (1024 * 1024)) - warn("DRAM device capacity (%d Mbytes) does not match the " - "address range assigned (%d Mbytes)\n", deviceCapacity, - capacity / (1024 * 1024)); - - DPRINTF(DRAM, "Row buffer size %d bytes with %d bursts per row buffer\n", - rowBufferSize, burstsPerRowBuffer); - - rowsPerBank = capacity / (rowBufferSize * banksPerRank * ranksPerChannel); - - // some basic sanity checks - if (tREFI <= tRP || tREFI <= tRFC) { - fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n", - tREFI, tRP, tRFC); - } - - // basic bank group architecture checks -> - if (bankGroupArch) { - // must have at least one bank per bank group - if (bankGroupsPerRank > banksPerRank) { - fatal("banks per rank (%d) must be equal to or larger than " - "banks groups per rank (%d)\n", - banksPerRank, bankGroupsPerRank); - } - // must have same number of banks in each bank group - if ((banksPerRank % bankGroupsPerRank) != 0) { - fatal("Banks per rank (%d) must be evenly divisible by bank " - "groups per rank (%d) for equal banks per bank group\n", - banksPerRank, bankGroupsPerRank); - } - // tCCD_L should be greater than minimal, back-to-back burst delay - if (tCCD_L <= tBURST) { - fatal("tCCD_L (%d) should be larger than the minimum bus delay " - "(%d) when bank groups per rank (%d) is greater than 1\n", - tCCD_L, tBURST, bankGroupsPerRank); - } - // tCCD_L_WR should be greater than minimal, back-to-back burst delay - if (tCCD_L_WR <= tBURST) { - fatal("tCCD_L_WR (%d) should be larger than the minimum bus delay " - " (%d) when bank groups per rank (%d) is greater than 1\n", - tCCD_L_WR, tBURST, bankGroupsPerRank); - } - // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay - // some datasheets might specify it equal to tRRD - if (tRRD_L < tRRD) { - fatal("tRRD_L (%d) should be larger than tRRD (%d) when " - "bank groups per rank (%d) is greater than 1\n", - tRRD_L, tRRD, bankGroupsPerRank); - } - } -} - -void -DRAMInterface::init() -{ - AbstractMemory::init(); - - // a bit of sanity checks on the interleaving, save it for here to - // ensure that the system pointer is initialised - if (range.interleaved()) { - if (addrMapping == Enums::RoRaBaChCo) { - if (rowBufferSize != range.granularity()) { - fatal("Channel interleaving of %s doesn't match RoRaBaChCo " - "address map\n", name()); - } - } else if (addrMapping == Enums::RoRaBaCoCh || - addrMapping == Enums::RoCoRaBaCh) { - // for the interleavings with channel bits in the bottom, - // if the system uses a channel striping granularity that - // is larger than the DRAM burst size, then map the - // sequential accesses within a stripe to a number of - // columns in the DRAM, effectively placing some of the - // lower-order column bits as the least-significant bits - // of the address (above the ones denoting the burst size) - assert(burstsPerStripe >= 1); - - // channel striping has to be done at a granularity that - // is equal or larger to a cache line - if (system()->cacheLineSize() > range.granularity()) { - fatal("Channel interleaving of %s must be at least as large " - "as the cache line size\n", name()); - } - - // ...and equal or smaller than the row-buffer size - if (rowBufferSize < range.granularity()) { - fatal("Channel interleaving of %s must be at most as large " - "as the row-buffer size\n", name()); - } - // this is essentially the check above, so just to be sure - assert(burstsPerStripe <= burstsPerRowBuffer); - } - } -} - -void -DRAMInterface::startup() -{ - if (system()->isTimingMode()) { - // timestamp offset should be in clock cycles for DRAMPower - timeStampOffset = divCeil(curTick(), tCK); - - for (auto r : ranks) { - r->startup(curTick() + tREFI - tRP); - } - } -} - -bool -DRAMInterface::isBusy() -{ - int busy_ranks = 0; - for (auto r : ranks) { - if (!r->inRefIdleState()) { - if (r->pwrState != PWR_SREF) { - // rank is busy refreshing - DPRINTF(DRAMState, "Rank %d is not available\n", r->rank); - busy_ranks++; - - // let the rank know that if it was waiting to drain, it - // is now done and ready to proceed - r->checkDrainDone(); - } - - // check if we were in self-refresh and haven't started - // to transition out - if ((r->pwrState == PWR_SREF) && r->inLowPowerState) { - DPRINTF(DRAMState, "Rank %d is in self-refresh\n", r->rank); - // if we have commands queued to this rank and we don't have - // a minimum number of active commands enqueued, - // exit self-refresh - if (r->forceSelfRefreshExit()) { - DPRINTF(DRAMState, "rank %d was in self refresh and" - " should wake up\n", r->rank); - //wake up from self-refresh - r->scheduleWakeUpEvent(tXS); - // things are brought back into action once a refresh is - // performed after self-refresh - // continue with selection for other ranks - } - } - } - } - return (busy_ranks == ranksPerChannel); -} - -void DRAMInterface::setupRank(const uint8_t rank, const bool is_read) -{ - // increment entry count of the rank based on packet type - if (is_read) { - ++ranks[rank]->readEntries; - } else { - ++ranks[rank]->writeEntries; - } -} - -void -DRAMInterface::respondEvent(uint8_t rank) -{ - Rank& rank_ref = *ranks[rank]; - - // if a read has reached its ready-time, decrement the number of reads - // At this point the packet has been handled and there is a possibility - // to switch to low-power mode if no other packet is available - --rank_ref.readEntries; - DPRINTF(DRAM, "number of read entries for rank %d is %d\n", - rank, rank_ref.readEntries); - - // counter should at least indicate one outstanding request - // for this read - assert(rank_ref.outstandingEvents > 0); - // read response received, decrement count - --rank_ref.outstandingEvents; - - // at this moment should not have transitioned to a low-power state - assert((rank_ref.pwrState != PWR_SREF) && - (rank_ref.pwrState != PWR_PRE_PDN) && - (rank_ref.pwrState != PWR_ACT_PDN)); - - // track if this is the last packet before idling - // and that there are no outstanding commands to this rank - if (rank_ref.isQueueEmpty() && rank_ref.outstandingEvents == 0 && - rank_ref.inRefIdleState() && enableDRAMPowerdown) { - // verify that there are no events scheduled - assert(!rank_ref.activateEvent.scheduled()); - assert(!rank_ref.prechargeEvent.scheduled()); - - // if coming from active state, schedule power event to - // active power-down else go to precharge power-down - DPRINTF(DRAMState, "Rank %d sleep at tick %d; current power state is " - "%d\n", rank, curTick(), rank_ref.pwrState); - - // default to ACT power-down unless already in IDLE state - // could be in IDLE if PRE issued before data returned - PowerState next_pwr_state = PWR_ACT_PDN; - if (rank_ref.pwrState == PWR_IDLE) { - next_pwr_state = PWR_PRE_PDN; - } - - rank_ref.powerDownSleep(next_pwr_state, curTick()); - } -} - -void -DRAMInterface::checkRefreshState(uint8_t rank) -{ - Rank& rank_ref = *ranks[rank]; - - if ((rank_ref.refreshState == REF_PRE) && - !rank_ref.prechargeEvent.scheduled()) { - // kick the refresh event loop into action again if banks already - // closed and just waiting for read to complete - schedule(rank_ref.refreshEvent, curTick()); - } -} - -void -DRAMInterface::drainRanks() -{ - // also need to kick off events to exit self-refresh - for (auto r : ranks) { - // force self-refresh exit, which in turn will issue auto-refresh - if (r->pwrState == PWR_SREF) { - DPRINTF(DRAM,"Rank%d: Forcing self-refresh wakeup in drain\n", - r->rank); - r->scheduleWakeUpEvent(tXS); - } - } -} - -bool -DRAMInterface::allRanksDrained() const -{ - // true until proven false - bool all_ranks_drained = true; - for (auto r : ranks) { - // then verify that the power state is IDLE ensuring all banks are - // closed and rank is not in a low power state. Also verify that rank - // is idle from a refresh point of view. - all_ranks_drained = r->inPwrIdleState() && r->inRefIdleState() && - all_ranks_drained; - } - return all_ranks_drained; -} - -void -DRAMInterface::suspend() -{ - for (auto r : ranks) { - r->suspend(); - } -} - -pair, bool> -DRAMInterface::minBankPrep(const DRAMPacketQueue& queue, - Tick min_col_at) const -{ - Tick min_act_at = MaxTick; - vector bank_mask(ranksPerChannel, 0); - - // latest Tick for which ACT can occur without incurring additoinal - // delay on the data bus - const Tick hidden_act_max = std::max(min_col_at - tRCD, curTick()); - - // Flag condition when burst can issue back-to-back with previous burst - bool found_seamless_bank = false; - - // Flag condition when bank can be opened without incurring additional - // delay on the data bus - bool hidden_bank_prep = false; - - // determine if we have queued transactions targetting the - // bank in question - vector got_waiting(ranksPerChannel * banksPerRank, false); - for (const auto& p : queue) { - if (p->isDram() && ranks[p->rank]->inRefIdleState()) - got_waiting[p->bankId] = true; - } - - // Find command with optimal bank timing - // Will prioritize commands that can issue seamlessly. - for (int i = 0; i < ranksPerChannel; i++) { - for (int j = 0; j < banksPerRank; j++) { - uint16_t bank_id = i * banksPerRank + j; - - // if we have waiting requests for the bank, and it is - // amongst the first available, update the mask - if (got_waiting[bank_id]) { - // make sure this rank is not currently refreshing. - assert(ranks[i]->inRefIdleState()); - // simplistic approximation of when the bank can issue - // an activate, ignoring any rank-to-rank switching - // cost in this calculation - Tick act_at = ranks[i]->banks[j].openRow == Bank::NO_ROW ? - std::max(ranks[i]->banks[j].actAllowedAt, curTick()) : - std::max(ranks[i]->banks[j].preAllowedAt, curTick()) + tRP; - - // When is the earliest the R/W burst can issue? - const Tick col_allowed_at = ctrl->inReadBusState(false) ? - ranks[i]->banks[j].rdAllowedAt : - ranks[i]->banks[j].wrAllowedAt; - Tick col_at = std::max(col_allowed_at, act_at + tRCD); - - // bank can issue burst back-to-back (seamlessly) with - // previous burst - bool new_seamless_bank = col_at <= min_col_at; - - // if we found a new seamless bank or we have no - // seamless banks, and got a bank with an earlier - // activate time, it should be added to the bit mask - if (new_seamless_bank || - (!found_seamless_bank && act_at <= min_act_at)) { - // if we did not have a seamless bank before, and - // we do now, reset the bank mask, also reset it - // if we have not yet found a seamless bank and - // the activate time is smaller than what we have - // seen so far - if (!found_seamless_bank && - (new_seamless_bank || act_at < min_act_at)) { - std::fill(bank_mask.begin(), bank_mask.end(), 0); - } - - found_seamless_bank |= new_seamless_bank; - - // ACT can occur 'behind the scenes' - hidden_bank_prep = act_at <= hidden_act_max; - - // set the bit corresponding to the available bank - replaceBits(bank_mask[i], j, j, 1); - min_act_at = act_at; - } - } - } - } - - return make_pair(bank_mask, hidden_bank_prep); -} - -DRAMInterface* -DRAMInterfaceParams::create() -{ - return new DRAMInterface(this); -} - -DRAMInterface::Rank::Rank(const DRAMInterfaceParams* _p, - int _rank, DRAMInterface& _dram) - : EventManager(&_dram), dram(_dram), - pwrStateTrans(PWR_IDLE), pwrStatePostRefresh(PWR_IDLE), - pwrStateTick(0), refreshDueAt(0), pwrState(PWR_IDLE), - refreshState(REF_IDLE), inLowPowerState(false), rank(_rank), - readEntries(0), writeEntries(0), outstandingEvents(0), - wakeUpAllowedAt(0), power(_p, false), banks(_p->banks_per_rank), - numBanksActive(0), actTicks(_p->activation_limit, 0), lastBurstTick(0), - writeDoneEvent([this]{ processWriteDoneEvent(); }, name()), - activateEvent([this]{ processActivateEvent(); }, name()), - prechargeEvent([this]{ processPrechargeEvent(); }, name()), - refreshEvent([this]{ processRefreshEvent(); }, name()), - powerEvent([this]{ processPowerEvent(); }, name()), - wakeUpEvent([this]{ processWakeUpEvent(); }, name()), - stats(_dram, *this) -{ - for (int b = 0; b < _p->banks_per_rank; b++) { - banks[b].bank = b; - // GDDR addressing of banks to BG is linear. - // Here we assume that all DRAM generations address bank groups as - // follows: - if (_p->bank_groups_per_rank > 0) { - // Simply assign lower bits to bank group in order to - // rotate across bank groups as banks are incremented - // e.g. with 4 banks per bank group and 16 banks total: - // banks 0,4,8,12 are in bank group 0 - // banks 1,5,9,13 are in bank group 1 - // banks 2,6,10,14 are in bank group 2 - // banks 3,7,11,15 are in bank group 3 - banks[b].bankgr = b % _p->bank_groups_per_rank; - } else { - // No bank groups; simply assign to bank number - banks[b].bankgr = b; - } - } -} - -void -DRAMInterface::Rank::startup(Tick ref_tick) -{ - assert(ref_tick > curTick()); - - pwrStateTick = curTick(); - - // kick off the refresh, and give ourselves enough time to - // precharge - schedule(refreshEvent, ref_tick); -} - -void -DRAMInterface::Rank::suspend() -{ - deschedule(refreshEvent); - - // Update the stats - updatePowerStats(); - - // don't automatically transition back to LP state after next REF - pwrStatePostRefresh = PWR_IDLE; -} - -bool -DRAMInterface::Rank::isQueueEmpty() const -{ - // check commmands in Q based on current bus direction - bool no_queued_cmds = (dram.ctrl->inReadBusState(true) && - (readEntries == 0)) - || (dram.ctrl->inWriteBusState(true) && - (writeEntries == 0)); - return no_queued_cmds; -} - -void -DRAMInterface::Rank::checkDrainDone() -{ - // if this rank was waiting to drain it is now able to proceed to - // precharge - if (refreshState == REF_DRAIN) { - DPRINTF(DRAM, "Refresh drain done, now precharging\n"); - - refreshState = REF_PD_EXIT; - - // hand control back to the refresh event loop - schedule(refreshEvent, curTick()); - } -} - -void -DRAMInterface::Rank::flushCmdList() -{ - // at the moment sort the list of commands and update the counters - // for DRAMPower libray when doing a refresh - sort(cmdList.begin(), cmdList.end(), DRAMInterface::sortTime); - - auto next_iter = cmdList.begin(); - // push to commands to DRAMPower - for ( ; next_iter != cmdList.end() ; ++next_iter) { - Command cmd = *next_iter; - if (cmd.timeStamp <= curTick()) { - // Move all commands at or before curTick to DRAMPower - power.powerlib.doCommand(cmd.type, cmd.bank, - divCeil(cmd.timeStamp, dram.tCK) - - dram.timeStampOffset); - } else { - // done - found all commands at or before curTick() - // next_iter references the 1st command after curTick - break; - } - } - // reset cmdList to only contain commands after curTick - // if there are no commands after curTick, updated cmdList will be empty - // in this case, next_iter is cmdList.end() - cmdList.assign(next_iter, cmdList.end()); -} - -void -DRAMInterface::Rank::processActivateEvent() -{ - // we should transition to the active state as soon as any bank is active - if (pwrState != PWR_ACT) - // note that at this point numBanksActive could be back at - // zero again due to a precharge scheduled in the future - schedulePowerEvent(PWR_ACT, curTick()); -} - -void -DRAMInterface::Rank::processPrechargeEvent() -{ - // counter should at least indicate one outstanding request - // for this precharge - assert(outstandingEvents > 0); - // precharge complete, decrement count - --outstandingEvents; - - // if we reached zero, then special conditions apply as we track - // if all banks are precharged for the power models - if (numBanksActive == 0) { - // no reads to this rank in the Q and no pending - // RD/WR or refresh commands - if (isQueueEmpty() && outstandingEvents == 0 && - dram.enableDRAMPowerdown) { - // should still be in ACT state since bank still open - assert(pwrState == PWR_ACT); - - // All banks closed - switch to precharge power down state. - DPRINTF(DRAMState, "Rank %d sleep at tick %d\n", - rank, curTick()); - powerDownSleep(PWR_PRE_PDN, curTick()); - } else { - // we should transition to the idle state when the last bank - // is precharged - schedulePowerEvent(PWR_IDLE, curTick()); - } - } -} - -void -DRAMInterface::Rank::processWriteDoneEvent() -{ - // counter should at least indicate one outstanding request - // for this write - assert(outstandingEvents > 0); - // Write transfer on bus has completed - // decrement per rank counter - --outstandingEvents; -} - -void -DRAMInterface::Rank::processRefreshEvent() -{ - // when first preparing the refresh, remember when it was due - if ((refreshState == REF_IDLE) || (refreshState == REF_SREF_EXIT)) { - // remember when the refresh is due - refreshDueAt = curTick(); - - // proceed to drain - refreshState = REF_DRAIN; - - // make nonzero while refresh is pending to ensure - // power down and self-refresh are not entered - ++outstandingEvents; - - DPRINTF(DRAM, "Refresh due\n"); - } - - // let any scheduled read or write to the same rank go ahead, - // after which it will - // hand control back to this event loop - if (refreshState == REF_DRAIN) { - // if a request is at the moment being handled and this request is - // accessing the current rank then wait for it to finish - if ((rank == dram.activeRank) - && (dram.ctrl->requestEventScheduled())) { - // hand control over to the request loop until it is - // evaluated next - DPRINTF(DRAM, "Refresh awaiting draining\n"); - - return; - } else { - refreshState = REF_PD_EXIT; - } - } - - // at this point, ensure that rank is not in a power-down state - if (refreshState == REF_PD_EXIT) { - // if rank was sleeping and we have't started exit process, - // wake-up for refresh - if (inLowPowerState) { - DPRINTF(DRAM, "Wake Up for refresh\n"); - // save state and return after refresh completes - scheduleWakeUpEvent(dram.tXP); - return; - } else { - refreshState = REF_PRE; - } - } - - // at this point, ensure that all banks are precharged - if (refreshState == REF_PRE) { - // precharge any active bank - if (numBanksActive != 0) { - // at the moment, we use a precharge all even if there is - // only a single bank open - DPRINTF(DRAM, "Precharging all\n"); - - // first determine when we can precharge - Tick pre_at = curTick(); - - for (auto &b : banks) { - // respect both causality and any existing bank - // constraints, some banks could already have a - // (auto) precharge scheduled - pre_at = std::max(b.preAllowedAt, pre_at); - } - - // make sure all banks per rank are precharged, and for those that - // already are, update their availability - Tick act_allowed_at = pre_at + dram.tRP; - - for (auto &b : banks) { - if (b.openRow != Bank::NO_ROW) { - dram.prechargeBank(*this, b, pre_at, true, false); - } else { - b.actAllowedAt = std::max(b.actAllowedAt, act_allowed_at); - b.preAllowedAt = std::max(b.preAllowedAt, pre_at); - } - } - - // precharge all banks in rank - cmdList.push_back(Command(MemCommand::PREA, 0, pre_at)); - - DPRINTF(DRAMPower, "%llu,PREA,0,%d\n", - divCeil(pre_at, dram.tCK) - - dram.timeStampOffset, rank); - } else if ((pwrState == PWR_IDLE) && (outstandingEvents == 1)) { - // Banks are closed, have transitioned to IDLE state, and - // no outstanding ACT,RD/WR,Auto-PRE sequence scheduled - DPRINTF(DRAM, "All banks already precharged, starting refresh\n"); - - // go ahead and kick the power state machine into gear since - // we are already idle - schedulePowerEvent(PWR_REF, curTick()); - } else { - // banks state is closed but haven't transitioned pwrState to IDLE - // or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled - // should have outstanding precharge or read response event - assert(prechargeEvent.scheduled() || - dram.ctrl->respondEventScheduled()); - // will start refresh when pwrState transitions to IDLE - } - - assert(numBanksActive == 0); - - // wait for all banks to be precharged or read to complete - // When precharge commands are done, power state machine will - // transition to the idle state, and automatically move to a - // refresh, at that point it will also call this method to get - // the refresh event loop going again - // Similarly, when read response completes, if all banks are - // precharged, will call this method to get loop re-started - return; - } - - // last but not least we perform the actual refresh - if (refreshState == REF_START) { - // should never get here with any banks active - assert(numBanksActive == 0); - assert(pwrState == PWR_REF); - - Tick ref_done_at = curTick() + dram.tRFC; - - for (auto &b : banks) { - b.actAllowedAt = ref_done_at; - } - - // at the moment this affects all ranks - cmdList.push_back(Command(MemCommand::REF, 0, curTick())); - - // Update the stats - updatePowerStats(); - - DPRINTF(DRAMPower, "%llu,REF,0,%d\n", divCeil(curTick(), dram.tCK) - - dram.timeStampOffset, rank); - - // Update for next refresh - refreshDueAt += dram.tREFI; - - // make sure we did not wait so long that we cannot make up - // for it - if (refreshDueAt < ref_done_at) { - fatal("Refresh was delayed so long we cannot catch up\n"); - } - - // Run the refresh and schedule event to transition power states - // when refresh completes - refreshState = REF_RUN; - schedule(refreshEvent, ref_done_at); - return; - } - - if (refreshState == REF_RUN) { - // should never get here with any banks active - assert(numBanksActive == 0); - assert(pwrState == PWR_REF); - - assert(!powerEvent.scheduled()); - - if ((dram.ctrl->drainState() == DrainState::Draining) || - (dram.ctrl->drainState() == DrainState::Drained)) { - // if draining, do not re-enter low-power mode. - // simply go to IDLE and wait - schedulePowerEvent(PWR_IDLE, curTick()); - } else { - // At the moment, we sleep when the refresh ends and wait to be - // woken up again if previously in a low-power state. - if (pwrStatePostRefresh != PWR_IDLE) { - // power State should be power Refresh - assert(pwrState == PWR_REF); - DPRINTF(DRAMState, "Rank %d sleeping after refresh and was in " - "power state %d before refreshing\n", rank, - pwrStatePostRefresh); - powerDownSleep(pwrState, curTick()); - - // Force PRE power-down if there are no outstanding commands - // in Q after refresh. - } else if (isQueueEmpty() && dram.enableDRAMPowerdown) { - // still have refresh event outstanding but there should - // be no other events outstanding - assert(outstandingEvents == 1); - DPRINTF(DRAMState, "Rank %d sleeping after refresh but was NOT" - " in a low power state before refreshing\n", rank); - powerDownSleep(PWR_PRE_PDN, curTick()); - - } else { - // move to the idle power state once the refresh is done, this - // will also move the refresh state machine to the refresh - // idle state - schedulePowerEvent(PWR_IDLE, curTick()); - } - } - - // At this point, we have completed the current refresh. - // In the SREF bypass case, we do not get to this state in the - // refresh STM and therefore can always schedule next event. - // Compensate for the delay in actually performing the refresh - // when scheduling the next one - schedule(refreshEvent, refreshDueAt - dram.tRP); - - DPRINTF(DRAMState, "Refresh done at %llu and next refresh" - " at %llu\n", curTick(), refreshDueAt); - } -} - -void -DRAMInterface::Rank::schedulePowerEvent(PowerState pwr_state, Tick tick) -{ - // respect causality - assert(tick >= curTick()); - - if (!powerEvent.scheduled()) { - DPRINTF(DRAMState, "Scheduling power event at %llu to state %d\n", - tick, pwr_state); - - // insert the new transition - pwrStateTrans = pwr_state; - - schedule(powerEvent, tick); - } else { - panic("Scheduled power event at %llu to state %d, " - "with scheduled event at %llu to %d\n", tick, pwr_state, - powerEvent.when(), pwrStateTrans); - } -} - -void -DRAMInterface::Rank::powerDownSleep(PowerState pwr_state, Tick tick) -{ - // if low power state is active low, schedule to active low power state. - // in reality tCKE is needed to enter active low power. This is neglected - // here and could be added in the future. - if (pwr_state == PWR_ACT_PDN) { - schedulePowerEvent(pwr_state, tick); - // push command to DRAMPower - cmdList.push_back(Command(MemCommand::PDN_F_ACT, 0, tick)); - DPRINTF(DRAMPower, "%llu,PDN_F_ACT,0,%d\n", divCeil(tick, - dram.tCK) - dram.timeStampOffset, rank); - } else if (pwr_state == PWR_PRE_PDN) { - // if low power state is precharge low, schedule to precharge low - // power state. In reality tCKE is needed to enter active low power. - // This is neglected here. - schedulePowerEvent(pwr_state, tick); - //push Command to DRAMPower - cmdList.push_back(Command(MemCommand::PDN_F_PRE, 0, tick)); - DPRINTF(DRAMPower, "%llu,PDN_F_PRE,0,%d\n", divCeil(tick, - dram.tCK) - dram.timeStampOffset, rank); - } else if (pwr_state == PWR_REF) { - // if a refresh just occurred - // transition to PRE_PDN now that all banks are closed - // precharge power down requires tCKE to enter. For simplicity - // this is not considered. - schedulePowerEvent(PWR_PRE_PDN, tick); - //push Command to DRAMPower - cmdList.push_back(Command(MemCommand::PDN_F_PRE, 0, tick)); - DPRINTF(DRAMPower, "%llu,PDN_F_PRE,0,%d\n", divCeil(tick, - dram.tCK) - dram.timeStampOffset, rank); - } else if (pwr_state == PWR_SREF) { - // should only enter SREF after PRE-PD wakeup to do a refresh - assert(pwrStatePostRefresh == PWR_PRE_PDN); - // self refresh requires time tCKESR to enter. For simplicity, - // this is not considered. - schedulePowerEvent(PWR_SREF, tick); - // push Command to DRAMPower - cmdList.push_back(Command(MemCommand::SREN, 0, tick)); - DPRINTF(DRAMPower, "%llu,SREN,0,%d\n", divCeil(tick, - dram.tCK) - dram.timeStampOffset, rank); - } - // Ensure that we don't power-down and back up in same tick - // Once we commit to PD entry, do it and wait for at least 1tCK - // This could be replaced with tCKE if/when that is added to the model - wakeUpAllowedAt = tick + dram.tCK; - - // Transitioning to a low power state, set flag - inLowPowerState = true; -} - -void -DRAMInterface::Rank::scheduleWakeUpEvent(Tick exit_delay) -{ - Tick wake_up_tick = std::max(curTick(), wakeUpAllowedAt); - - DPRINTF(DRAMState, "Scheduling wake-up for rank %d at tick %d\n", - rank, wake_up_tick); - - // if waking for refresh, hold previous state - // else reset state back to IDLE - if (refreshState == REF_PD_EXIT) { - pwrStatePostRefresh = pwrState; - } else { - // don't automatically transition back to LP state after next REF - pwrStatePostRefresh = PWR_IDLE; - } - - // schedule wake-up with event to ensure entry has completed before - // we try to wake-up - schedule(wakeUpEvent, wake_up_tick); - - for (auto &b : banks) { - // respect both causality and any existing bank - // constraints, some banks could already have a - // (auto) precharge scheduled - b.wrAllowedAt = std::max(wake_up_tick + exit_delay, b.wrAllowedAt); - b.rdAllowedAt = std::max(wake_up_tick + exit_delay, b.rdAllowedAt); - b.preAllowedAt = std::max(wake_up_tick + exit_delay, b.preAllowedAt); - b.actAllowedAt = std::max(wake_up_tick + exit_delay, b.actAllowedAt); - } - // Transitioning out of low power state, clear flag - inLowPowerState = false; - - // push to DRAMPower - // use pwrStateTrans for cases where we have a power event scheduled - // to enter low power that has not yet been processed - if (pwrStateTrans == PWR_ACT_PDN) { - cmdList.push_back(Command(MemCommand::PUP_ACT, 0, wake_up_tick)); - DPRINTF(DRAMPower, "%llu,PUP_ACT,0,%d\n", divCeil(wake_up_tick, - dram.tCK) - dram.timeStampOffset, rank); - - } else if (pwrStateTrans == PWR_PRE_PDN) { - cmdList.push_back(Command(MemCommand::PUP_PRE, 0, wake_up_tick)); - DPRINTF(DRAMPower, "%llu,PUP_PRE,0,%d\n", divCeil(wake_up_tick, - dram.tCK) - dram.timeStampOffset, rank); - } else if (pwrStateTrans == PWR_SREF) { - cmdList.push_back(Command(MemCommand::SREX, 0, wake_up_tick)); - DPRINTF(DRAMPower, "%llu,SREX,0,%d\n", divCeil(wake_up_tick, - dram.tCK) - dram.timeStampOffset, rank); - } -} - -void -DRAMInterface::Rank::processWakeUpEvent() -{ - // Should be in a power-down or self-refresh state - assert((pwrState == PWR_ACT_PDN) || (pwrState == PWR_PRE_PDN) || - (pwrState == PWR_SREF)); - - // Check current state to determine transition state - if (pwrState == PWR_ACT_PDN) { - // banks still open, transition to PWR_ACT - schedulePowerEvent(PWR_ACT, curTick()); - } else { - // transitioning from a precharge power-down or self-refresh state - // banks are closed - transition to PWR_IDLE - schedulePowerEvent(PWR_IDLE, curTick()); - } -} - -void -DRAMInterface::Rank::processPowerEvent() -{ - assert(curTick() >= pwrStateTick); - // remember where we were, and for how long - Tick duration = curTick() - pwrStateTick; - PowerState prev_state = pwrState; - - // update the accounting - stats.pwrStateTime[prev_state] += duration; - - // track to total idle time - if ((prev_state == PWR_PRE_PDN) || (prev_state == PWR_ACT_PDN) || - (prev_state == PWR_SREF)) { - stats.totalIdleTime += duration; - } - - pwrState = pwrStateTrans; - pwrStateTick = curTick(); - - // if rank was refreshing, make sure to start scheduling requests again - if (prev_state == PWR_REF) { - // bus IDLED prior to REF - // counter should be one for refresh command only - assert(outstandingEvents == 1); - // REF complete, decrement count and go back to IDLE - --outstandingEvents; - refreshState = REF_IDLE; - - DPRINTF(DRAMState, "Was refreshing for %llu ticks\n", duration); - // if moving back to power-down after refresh - if (pwrState != PWR_IDLE) { - assert(pwrState == PWR_PRE_PDN); - DPRINTF(DRAMState, "Switching to power down state after refreshing" - " rank %d at %llu tick\n", rank, curTick()); - } - - // completed refresh event, ensure next request is scheduled - if (!dram.ctrl->requestEventScheduled()) { - DPRINTF(DRAM, "Scheduling next request after refreshing" - " rank %d\n", rank); - dram.ctrl->restartScheduler(curTick()); - } - } - - if ((pwrState == PWR_ACT) && (refreshState == REF_PD_EXIT)) { - // have exited ACT PD - assert(prev_state == PWR_ACT_PDN); - - // go back to REF event and close banks - refreshState = REF_PRE; - schedule(refreshEvent, curTick()); - } else if (pwrState == PWR_IDLE) { - DPRINTF(DRAMState, "All banks precharged\n"); - if (prev_state == PWR_SREF) { - // set refresh state to REF_SREF_EXIT, ensuring inRefIdleState - // continues to return false during tXS after SREF exit - // Schedule a refresh which kicks things back into action - // when it finishes - refreshState = REF_SREF_EXIT; - schedule(refreshEvent, curTick() + dram.tXS); - } else { - // if we have a pending refresh, and are now moving to - // the idle state, directly transition to, or schedule refresh - if ((refreshState == REF_PRE) || (refreshState == REF_PD_EXIT)) { - // ensure refresh is restarted only after final PRE command. - // do not restart refresh if controller is in an intermediate - // state, after PRE_PDN exit, when banks are IDLE but an - // ACT is scheduled. - if (!activateEvent.scheduled()) { - // there should be nothing waiting at this point - assert(!powerEvent.scheduled()); - if (refreshState == REF_PD_EXIT) { - // exiting PRE PD, will be in IDLE until tXP expires - // and then should transition to PWR_REF state - assert(prev_state == PWR_PRE_PDN); - schedulePowerEvent(PWR_REF, curTick() + dram.tXP); - } else if (refreshState == REF_PRE) { - // can directly move to PWR_REF state and proceed below - pwrState = PWR_REF; - } - } else { - // must have PRE scheduled to transition back to IDLE - // and re-kick off refresh - assert(prechargeEvent.scheduled()); - } - } - } - } - - // transition to the refresh state and re-start refresh process - // refresh state machine will schedule the next power state transition - if (pwrState == PWR_REF) { - // completed final PRE for refresh or exiting power-down - assert(refreshState == REF_PRE || refreshState == REF_PD_EXIT); - - // exited PRE PD for refresh, with no pending commands - // bypass auto-refresh and go straight to SREF, where memory - // will issue refresh immediately upon entry - if (pwrStatePostRefresh == PWR_PRE_PDN && isQueueEmpty() && - (dram.ctrl->drainState() != DrainState::Draining) && - (dram.ctrl->drainState() != DrainState::Drained) && - dram.enableDRAMPowerdown) { - DPRINTF(DRAMState, "Rank %d bypassing refresh and transitioning " - "to self refresh at %11u tick\n", rank, curTick()); - powerDownSleep(PWR_SREF, curTick()); - - // Since refresh was bypassed, remove event by decrementing count - assert(outstandingEvents == 1); - --outstandingEvents; - - // reset state back to IDLE temporarily until SREF is entered - pwrState = PWR_IDLE; - - // Not bypassing refresh for SREF entry - } else { - DPRINTF(DRAMState, "Refreshing\n"); - - // there should be nothing waiting at this point - assert(!powerEvent.scheduled()); - - // kick the refresh event loop into action again, and that - // in turn will schedule a transition to the idle power - // state once the refresh is done - schedule(refreshEvent, curTick()); - - // Banks transitioned to IDLE, start REF - refreshState = REF_START; - } - } - -} - -void -DRAMInterface::Rank::updatePowerStats() -{ - // All commands up to refresh have completed - // flush cmdList to DRAMPower - flushCmdList(); - - // Call the function that calculates window energy at intermediate update - // events like at refresh, stats dump as well as at simulation exit. - // Window starts at the last time the calcWindowEnergy function was called - // and is upto current time. - power.powerlib.calcWindowEnergy(divCeil(curTick(), dram.tCK) - - dram.timeStampOffset); - - // Get the energy from DRAMPower - Data::MemoryPowerModel::Energy energy = power.powerlib.getEnergy(); - - // The energy components inside the power lib are calculated over - // the window so accumulate into the corresponding gem5 stat - stats.actEnergy += energy.act_energy * dram.devicesPerRank; - stats.preEnergy += energy.pre_energy * dram.devicesPerRank; - stats.readEnergy += energy.read_energy * dram.devicesPerRank; - stats.writeEnergy += energy.write_energy * dram.devicesPerRank; - stats.refreshEnergy += energy.ref_energy * dram.devicesPerRank; - stats.actBackEnergy += energy.act_stdby_energy * dram.devicesPerRank; - stats.preBackEnergy += energy.pre_stdby_energy * dram.devicesPerRank; - stats.actPowerDownEnergy += energy.f_act_pd_energy * dram.devicesPerRank; - stats.prePowerDownEnergy += energy.f_pre_pd_energy * dram.devicesPerRank; - stats.selfRefreshEnergy += energy.sref_energy * dram.devicesPerRank; - - // Accumulate window energy into the total energy. - stats.totalEnergy += energy.window_energy * dram.devicesPerRank; - // Average power must not be accumulated but calculated over the time - // since last stats reset. SimClock::Frequency is tick period not tick - // frequency. - // energy (pJ) 1e-9 - // power (mW) = ----------- * ---------- - // time (tick) tick_frequency - stats.averagePower = (stats.totalEnergy.value() / - (curTick() - dram.lastStatsResetTick)) * - (SimClock::Frequency / 1000000000.0); -} - -void -DRAMInterface::Rank::computeStats() -{ - DPRINTF(DRAM,"Computing stats due to a dump callback\n"); - - // Update the stats - updatePowerStats(); - - // final update of power state times - stats.pwrStateTime[pwrState] += (curTick() - pwrStateTick); - pwrStateTick = curTick(); -} - -void -DRAMInterface::Rank::resetStats() { - // The only way to clear the counters in DRAMPower is to call - // calcWindowEnergy function as that then calls clearCounters. The - // clearCounters method itself is private. - power.powerlib.calcWindowEnergy(divCeil(curTick(), dram.tCK) - - dram.timeStampOffset); - -} - -bool -DRAMInterface::Rank::forceSelfRefreshExit() const { - return (readEntries != 0) || - (dram.ctrl->inWriteBusState(true) && (writeEntries != 0)); -} - -DRAMCtrl::CtrlStats::CtrlStats(DRAMCtrl &_ctrl) - : Stats::Group(&_ctrl), - ctrl(_ctrl), - - ADD_STAT(readReqs, "Number of read requests accepted"), - ADD_STAT(writeReqs, "Number of write requests accepted"), - - ADD_STAT(readBursts, - "Number of controller read bursts, " - "including those serviced by the write queue"), - ADD_STAT(writeBursts, - "Number of controller write bursts, " - "including those merged in the write queue"), - ADD_STAT(servicedByWrQ, - "Number of controller read bursts serviced by the write queue"), - ADD_STAT(mergedWrBursts, - "Number of controller write bursts merged with an existing one"), - - ADD_STAT(neitherReadNorWriteReqs, - "Number of requests that are neither read nor write"), - - ADD_STAT(avgRdQLen, "Average read queue length when enqueuing"), - ADD_STAT(avgWrQLen, "Average write queue length when enqueuing"), - - ADD_STAT(numRdRetry, "Number of times read queue was full causing retry"), - ADD_STAT(numWrRetry, "Number of times write queue was full causing retry"), - - ADD_STAT(readPktSize, "Read request sizes (log2)"), - ADD_STAT(writePktSize, "Write request sizes (log2)"), - - ADD_STAT(rdQLenPdf, "What read queue length does an incoming req see"), - ADD_STAT(wrQLenPdf, "What write queue length does an incoming req see"), - - ADD_STAT(rdPerTurnAround, - "Reads before turning the bus around for writes"), - ADD_STAT(wrPerTurnAround, - "Writes before turning the bus around for reads"), - - ADD_STAT(bytesReadWrQ, "Total number of bytes read from write queue"), - ADD_STAT(bytesReadSys, "Total read bytes from the system interface side"), - ADD_STAT(bytesWrittenSys, - "Total written bytes from the system interface side"), - - ADD_STAT(avgRdBWSys, "Average system read bandwidth in MiByte/s"), - ADD_STAT(avgWrBWSys, "Average system write bandwidth in MiByte/s"), - - ADD_STAT(totGap, "Total gap between requests"), - ADD_STAT(avgGap, "Average gap between requests"), - - ADD_STAT(masterReadBytes, "Per-master bytes read from memory"), - ADD_STAT(masterWriteBytes, "Per-master bytes write to memory"), - ADD_STAT(masterReadRate, - "Per-master bytes read from memory rate (Bytes/sec)"), - ADD_STAT(masterWriteRate, - "Per-master bytes write to memory rate (Bytes/sec)"), - ADD_STAT(masterReadAccesses, - "Per-master read serviced memory accesses"), - ADD_STAT(masterWriteAccesses, - "Per-master write serviced memory accesses"), - ADD_STAT(masterReadTotalLat, - "Per-master read total memory access latency"), - ADD_STAT(masterWriteTotalLat, - "Per-master write total memory access latency"), - ADD_STAT(masterReadAvgLat, - "Per-master read average memory access latency"), - ADD_STAT(masterWriteAvgLat, - "Per-master write average memory access latency") - -{ -} - -void -DRAMCtrl::CtrlStats::regStats() -{ - using namespace Stats; - - assert(ctrl.system()); - const auto max_masters = ctrl.system()->maxMasters(); - - avgRdQLen.precision(2); - avgWrQLen.precision(2); - - readPktSize.init(ceilLog2(ctrl.system()->cacheLineSize()) + 1); - writePktSize.init(ceilLog2(ctrl.system()->cacheLineSize()) + 1); - - rdQLenPdf.init(ctrl.readBufferSize); - wrQLenPdf.init(ctrl.writeBufferSize); - - rdPerTurnAround - .init(ctrl.readBufferSize) - .flags(nozero); - wrPerTurnAround - .init(ctrl.writeBufferSize) - .flags(nozero); - - avgRdBWSys.precision(2); - avgWrBWSys.precision(2); - avgGap.precision(2); - - // per-master bytes read and written to memory - masterReadBytes - .init(max_masters) - .flags(nozero | nonan); - - masterWriteBytes - .init(max_masters) - .flags(nozero | nonan); - - // per-master bytes read and written to memory rate - masterReadRate - .flags(nozero | nonan) - .precision(12); - - masterReadAccesses - .init(max_masters) - .flags(nozero); - - masterWriteAccesses - .init(max_masters) - .flags(nozero); - - masterReadTotalLat - .init(max_masters) - .flags(nozero | nonan); - - masterReadAvgLat - .flags(nonan) - .precision(2); - - masterWriteRate - .flags(nozero | nonan) - .precision(12); - - masterWriteTotalLat - .init(max_masters) - .flags(nozero | nonan); - - masterWriteAvgLat - .flags(nonan) - .precision(2); - - for (int i = 0; i < max_masters; i++) { - const std::string master = ctrl.system()->getMasterName(i); - masterReadBytes.subname(i, master); - masterReadRate.subname(i, master); - masterWriteBytes.subname(i, master); - masterWriteRate.subname(i, master); - masterReadAccesses.subname(i, master); - masterWriteAccesses.subname(i, master); - masterReadTotalLat.subname(i, master); - masterReadAvgLat.subname(i, master); - masterWriteTotalLat.subname(i, master); - masterWriteAvgLat.subname(i, master); - } - - // Formula stats - avgRdBWSys = (bytesReadSys / 1000000) / simSeconds; - avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds; - - avgGap = totGap / (readReqs + writeReqs); - - masterReadRate = masterReadBytes / simSeconds; - masterWriteRate = masterWriteBytes / simSeconds; - masterReadAvgLat = masterReadTotalLat / masterReadAccesses; - masterWriteAvgLat = masterWriteTotalLat / masterWriteAccesses; -} - -void -DRAMInterface::DRAMStats::resetStats() -{ - dram.lastStatsResetTick = curTick(); -} - -DRAMInterface::DRAMStats::DRAMStats(DRAMInterface &_dram) - : Stats::Group(&_dram), - dram(_dram), - - ADD_STAT(readBursts, "Number of DRAM read bursts"), - ADD_STAT(writeBursts, "Number of DRAM write bursts"), - - ADD_STAT(perBankRdBursts, "Per bank write bursts"), - ADD_STAT(perBankWrBursts, "Per bank write bursts"), - - ADD_STAT(totQLat, "Total ticks spent queuing"), - ADD_STAT(totBusLat, "Total ticks spent in databus transfers"), - ADD_STAT(totMemAccLat, - "Total ticks spent from burst creation until serviced " - "by the DRAM"), - - ADD_STAT(avgQLat, "Average queueing delay per DRAM burst"), - ADD_STAT(avgBusLat, "Average bus latency per DRAM burst"), - ADD_STAT(avgMemAccLat, "Average memory access latency per DRAM burst"), - - ADD_STAT(readRowHits, "Number of row buffer hits during reads"), - ADD_STAT(writeRowHits, "Number of row buffer hits during writes"), - ADD_STAT(readRowHitRate, "Row buffer hit rate for reads"), - ADD_STAT(writeRowHitRate, "Row buffer hit rate for writes"), - - ADD_STAT(bytesPerActivate, "Bytes accessed per row activation"), - ADD_STAT(bytesRead, "Total number of bytes read from DRAM"), - ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"), - ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiBytes/s"), - ADD_STAT(avgWrBW, "Average DRAM write bandwidth in MiBytes/s"), - ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"), - - ADD_STAT(busUtil, "Data bus utilization in percentage"), - ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"), - ADD_STAT(busUtilWrite, "Data bus utilization in percentage for writes"), - - ADD_STAT(pageHitRate, "Row buffer hit rate, read and write combined") - -{ -} - -void -DRAMInterface::DRAMStats::regStats() -{ - using namespace Stats; - - avgQLat.precision(2); - avgBusLat.precision(2); - avgMemAccLat.precision(2); - - readRowHitRate.precision(2); - writeRowHitRate.precision(2); - - perBankRdBursts.init(dram.banksPerRank * dram.ranksPerChannel); - perBankWrBursts.init(dram.banksPerRank * dram.ranksPerChannel); - - bytesPerActivate - .init(dram.maxAccessesPerRow ? - dram.maxAccessesPerRow : dram.rowBufferSize) - .flags(nozero); - - peakBW.precision(2); - busUtil.precision(2); - busUtilWrite.precision(2); - busUtilRead.precision(2); - - pageHitRate.precision(2); - - // Formula stats - avgQLat = totQLat / readBursts; - avgBusLat = totBusLat / readBursts; - avgMemAccLat = totMemAccLat / readBursts; - - readRowHitRate = (readRowHits / readBursts) * 100; - writeRowHitRate = (writeRowHits / writeBursts) * 100; - - avgRdBW = (bytesRead / 1000000) / simSeconds; - avgWrBW = (bytesWritten / 1000000) / simSeconds; - peakBW = (SimClock::Frequency / dram.burstDelay()) * - dram.bytesPerBurst() / 1000000; - - busUtil = (avgRdBW + avgWrBW) / peakBW * 100; - busUtilRead = avgRdBW / peakBW * 100; - busUtilWrite = avgWrBW / peakBW * 100; - - pageHitRate = (writeRowHits + readRowHits) / - (writeBursts + readBursts) * 100; -} - -DRAMInterface::RankStats::RankStats(DRAMInterface &_dram, Rank &_rank) - : Stats::Group(&_dram, csprintf("rank%d", _rank.rank).c_str()), - rank(_rank), - - ADD_STAT(actEnergy, "Energy for activate commands per rank (pJ)"), - ADD_STAT(preEnergy, "Energy for precharge commands per rank (pJ)"), - ADD_STAT(readEnergy, "Energy for read commands per rank (pJ)"), - ADD_STAT(writeEnergy, "Energy for write commands per rank (pJ)"), - ADD_STAT(refreshEnergy, "Energy for refresh commands per rank (pJ)"), - ADD_STAT(actBackEnergy, "Energy for active background per rank (pJ)"), - ADD_STAT(preBackEnergy, "Energy for precharge background per rank (pJ)"), - ADD_STAT(actPowerDownEnergy, - "Energy for active power-down per rank (pJ)"), - ADD_STAT(prePowerDownEnergy, - "Energy for precharge power-down per rank (pJ)"), - ADD_STAT(selfRefreshEnergy, "Energy for self refresh per rank (pJ)"), - - ADD_STAT(totalEnergy, "Total energy per rank (pJ)"), - ADD_STAT(averagePower, "Core power per rank (mW)"), - - ADD_STAT(totalIdleTime, "Total Idle time Per DRAM Rank"), - ADD_STAT(pwrStateTime, "Time in different power states") -{ -} - -void -DRAMInterface::RankStats::regStats() -{ - Stats::Group::regStats(); - - pwrStateTime - .init(6) - .subname(0, "IDLE") - .subname(1, "REF") - .subname(2, "SREF") - .subname(3, "PRE_PDN") - .subname(4, "ACT") - .subname(5, "ACT_PDN"); -} - -void -DRAMInterface::RankStats::resetStats() -{ - Stats::Group::resetStats(); - - rank.resetStats(); -} - -void -DRAMInterface::RankStats::preDumpStats() -{ - Stats::Group::preDumpStats(); - - rank.computeStats(); -} - -void -DRAMCtrl::recvFunctional(PacketPtr pkt) -{ - if (dram && dram->getAddrRange().contains(pkt->getAddr())) { - // rely on the abstract memory - dram->functionalAccess(pkt); - } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) { - // rely on the abstract memory - nvm->functionalAccess(pkt); - } else { - panic("Can't handle address range for packet %s\n", - pkt->print()); - } -} - -Port & -DRAMCtrl::getPort(const string &if_name, PortID idx) -{ - if (if_name != "port") { - return QoS::MemCtrl::getPort(if_name, idx); - } else { - return port; - } -} - -bool -DRAMCtrl::allIntfDrained() const -{ - // ensure dram is in power down and refresh IDLE states - bool dram_drained = !dram || dram->allRanksDrained(); - // No outstanding NVM writes - // All other queues verified as needed with calling logic - bool nvm_drained = !nvm || nvm->allRanksDrained(); - return (dram_drained && nvm_drained); -} - -DrainState -DRAMCtrl::drain() -{ - // if there is anything in any of our internal queues, keep track - // of that as well - if (!(!totalWriteQueueSize && !totalReadQueueSize && respQueue.empty() && - allIntfDrained())) { - - DPRINTF(Drain, "DRAM controller not drained, write: %d, read: %d," - " resp: %d\n", totalWriteQueueSize, totalReadQueueSize, - respQueue.size()); - - // the only queue that is not drained automatically over time - // is the write queue, thus kick things into action if needed - if (!totalWriteQueueSize && !nextReqEvent.scheduled()) { - schedule(nextReqEvent, curTick()); - } - - if (dram) - dram->drainRanks(); - - return DrainState::Draining; - } else { - return DrainState::Drained; - } -} - -void -DRAMCtrl::drainResume() -{ - if (!isTimingMode && system()->isTimingMode()) { - // if we switched to timing mode, kick things into action, - // and behave as if we restored from a checkpoint - startup(); - dram->startup(); - } else if (isTimingMode && !system()->isTimingMode()) { - // if we switch from timing mode, stop the refresh events to - // not cause issues with KVM - if (dram) - dram->suspend(); - } - - // update the mode - isTimingMode = system()->isTimingMode(); -} - -DRAMCtrl::MemoryPort::MemoryPort(const std::string& name, DRAMCtrl& _ctrl) - : QueuedSlavePort(name, &_ctrl, queue), queue(_ctrl, *this, true), - ctrl(_ctrl) -{ } - -AddrRangeList -DRAMCtrl::MemoryPort::getAddrRanges() const -{ - AddrRangeList ranges; - if (ctrl.dram) { - DPRINTF(DRAM, "Pushing DRAM ranges to port\n"); - ranges.push_back(ctrl.dram->getAddrRange()); - } - if (ctrl.nvm) { - DPRINTF(DRAM, "Pushing NVM ranges to port\n"); - ranges.push_back(ctrl.nvm->getAddrRange()); - } - return ranges; -} - -void -DRAMCtrl::MemoryPort::recvFunctional(PacketPtr pkt) -{ - pkt->pushLabel(ctrl.name()); - - if (!queue.trySatisfyFunctional(pkt)) { - // Default implementation of SimpleTimingPort::recvFunctional() - // calls recvAtomic() and throws away the latency; we can save a - // little here by just not calculating the latency. - ctrl.recvFunctional(pkt); - } - - pkt->popLabel(); -} - -Tick -DRAMCtrl::MemoryPort::recvAtomic(PacketPtr pkt) -{ - return ctrl.recvAtomic(pkt); -} - -bool -DRAMCtrl::MemoryPort::recvTimingReq(PacketPtr pkt) -{ - // pass it to the memory controller - return ctrl.recvTimingReq(pkt); -} - -DRAMCtrl* -DRAMCtrlParams::create() -{ - return new DRAMCtrl(this); -} - -NVMInterface::NVMInterface(const NVMInterfaceParams* _p) - : MemInterface(_p), - maxPendingWrites(_p->max_pending_writes), - maxPendingReads(_p->max_pending_reads), - twoCycleRdWr(_p->two_cycle_rdwr), - tREAD(_p->tREAD), tWRITE(_p->tWRITE), tSEND(_p->tSEND), - stats(*this), - writeRespondEvent([this]{ processWriteRespondEvent(); }, name()), - readReadyEvent([this]{ processReadReadyEvent(); }, name()), - nextReadAt(0), numPendingReads(0), numReadDataReady(0), - numReadsToIssue(0), numWritesQueued(0), - readBufferSize(_p->read_buffer_size), - writeBufferSize(_p->write_buffer_size) -{ - DPRINTF(NVM, "Setting up NVM Interface\n"); - - fatal_if(!isPowerOf2(burstSize), "NVM burst size %d is not allowed, " - "must be a power of two\n", burstSize); - - // sanity check the ranks since we rely on bit slicing for the - // address decoding - fatal_if(!isPowerOf2(ranksPerChannel), "NVM rank count of %d is " - "not allowed, must be a power of two\n", ranksPerChannel); - - for (int i =0; i < ranksPerChannel; i++) { - // Add NVM ranks to the system - DPRINTF(NVM, "Creating NVM rank %d \n", i); - Rank* rank = new Rank(_p, i, *this); - ranks.push_back(rank); - } - - uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size()); - - DPRINTF(NVM, "NVM capacity %lld (%lld) bytes\n", capacity, - AbstractMemory::size()); - - rowsPerBank = capacity / (rowBufferSize * - banksPerRank * ranksPerChannel); - -} - -NVMInterface* -NVMInterfaceParams::create() -{ - return new NVMInterface(this); -} - -NVMInterface::Rank::Rank(const NVMInterfaceParams* _p, - int _rank, NVMInterface& _nvm) - : EventManager(&_nvm), nvm(_nvm), rank(_rank), - banks(_p->banks_per_rank) -{ - for (int b = 0; b < _p->banks_per_rank; b++) { - banks[b].bank = b; - // No bank groups; simply assign to bank number - banks[b].bankgr = b; - } -} - -void -NVMInterface::init() -{ - AbstractMemory::init(); -} - -void NVMInterface::setupRank(const uint8_t rank, const bool is_read) -{ - if (is_read) { - // increment count to trigger read and track number of reads in Q - numReadsToIssue++; - } else { - // increment count to track number of writes in Q - numWritesQueued++; - } -} - -pair -NVMInterface::chooseNextFRFCFS(DRAMPacketQueue& queue, Tick min_col_at) const -{ - // remember if we found a hit, but one that cannit issue seamlessly - bool found_prepped_pkt = false; - - auto selected_pkt_it = queue.end(); - Tick selected_col_at = MaxTick; - - for (auto i = queue.begin(); i != queue.end() ; ++i) { - DRAMPacket* pkt = *i; - - // select optimal NVM packet in Q - if (!pkt->isDram()) { - const Bank& bank = ranks[pkt->rank]->banks[pkt->bank]; - const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt : - bank.wrAllowedAt; - - // check if rank is not doing a refresh and thus is available, - // if not, jump to the next packet - if (burstReady(pkt)) { - DPRINTF(NVM, "%s bank %d - Rank %d available\n", __func__, - pkt->bank, pkt->rank); - - // no additional rank-to-rank or media delays - if (col_allowed_at <= min_col_at) { - // FCFS within entries that can issue without - // additional delay, such as same rank accesses - // or media delay requirements - selected_pkt_it = i; - selected_col_at = col_allowed_at; - // no need to look through the remaining queue entries - DPRINTF(NVM, "%s Seamless buffer hit\n", __func__); - break; - } else if (!found_prepped_pkt) { - // packet is to prepped region but cannnot issue - // seamlessly; remember this one and continue - selected_pkt_it = i; - selected_col_at = col_allowed_at; - DPRINTF(NVM, "%s Prepped packet found \n", __func__); - found_prepped_pkt = true; - } - } else { - DPRINTF(NVM, "%s bank %d - Rank %d not available\n", __func__, - pkt->bank, pkt->rank); - } - } - } - - if (selected_pkt_it == queue.end()) { - DPRINTF(NVM, "%s no available NVM ranks found\n", __func__); - } - - return make_pair(selected_pkt_it, selected_col_at); -} - -void -NVMInterface::chooseRead(DRAMPacketQueue& queue) -{ - Tick cmd_at = std::max(curTick(), nextReadAt); - - // This method does the arbitration between non-deterministic read - // requests to NVM. The chosen packet is not removed from the queue - // at this time. Removal from the queue will occur when the data is - // ready and a separate SEND command is issued to retrieve it via the - // chooseNext function in the top-level controller. - assert(!queue.empty()); - - assert(numReadsToIssue > 0); - numReadsToIssue--; - // For simplicity, issue non-deterministic reads in order (fcfs) - for (auto i = queue.begin(); i != queue.end() ; ++i) { - DRAMPacket* pkt = *i; - - // Find 1st NVM read packet that hasn't issued read command - if (pkt->readyTime == MaxTick && !pkt->isDram() && pkt->isRead()) { - // get the bank - Bank& bank_ref = ranks[pkt->rank]->banks[pkt->bank]; - - // issueing a read, inc counter and verify we haven't overrun - numPendingReads++; - assert(numPendingReads <= maxPendingReads); - - // increment the bytes accessed and the accesses per row - bank_ref.bytesAccessed += burstSize; - - // Verify command bandiwth to issue - // Host can issue read immediately uith buffering closer - // to the NVM. The actual execution at the NVM may be delayed - // due to busy resources - if (twoCycleRdWr) { - cmd_at = ctrl->verifyMultiCmd(cmd_at, - maxCommandsPerWindow, tCK); - } else { - cmd_at = ctrl->verifySingleCmd(cmd_at, - maxCommandsPerWindow); - } - - // Update delay to next read - // Ensures single read command issued per cycle - nextReadAt = cmd_at + tCK; - - // If accessing a new location in this bank, update timing - // and stats - if (bank_ref.openRow != pkt->row) { - // update the open bank, re-using row field - bank_ref.openRow = pkt->row; - - // sample the bytes accessed to a buffer in this bank - // here when we are re-buffering the data - stats.bytesPerBank.sample(bank_ref.bytesAccessed); - // start counting anew - bank_ref.bytesAccessed = 0; - - // holdoff next command to this bank until the read completes - // and the data has been successfully buffered - // can pipeline accesses to the same bank, sending them - // across the interface B2B, but will incur full access - // delay between data ready responses to different buffers - // in a bank - bank_ref.actAllowedAt = std::max(cmd_at, - bank_ref.actAllowedAt) + tREAD; - } - // update per packet readyTime to holdoff burst read operation - // overloading readyTime, which will be updated again when the - // burst is issued - pkt->readyTime = std::max(cmd_at, bank_ref.actAllowedAt); - - DPRINTF(NVM, "Issuing NVM Read to bank %d at tick %d. " - "Data ready at %d\n", - bank_ref.bank, cmd_at, pkt->readyTime); - - // Insert into read ready queue. It will be handled after - // the media delay has been met - if (readReadyQueue.empty()) { - assert(!readReadyEvent.scheduled()); - schedule(readReadyEvent, pkt->readyTime); - } else if (readReadyEvent.when() > pkt->readyTime) { - // move it sooner in time, to the first read with data - reschedule(readReadyEvent, pkt->readyTime); - } else { - assert(readReadyEvent.scheduled()); - } - readReadyQueue.push_back(pkt->readyTime); - - // found an NVM read to issue - break out - break; - } - } -} - -void -NVMInterface::processReadReadyEvent() -{ - // signal that there is read data ready to be transmitted - numReadDataReady++; - - DPRINTF(NVM, - "processReadReadyEvent(): Data for an NVM read is ready. " - "numReadDataReady is %d\t numPendingReads is %d\n", - numReadDataReady, numPendingReads); - - // Find lowest ready time and verify it is equal to curTick - // also find the next lowest to schedule next event - // Done with this response, erase entry - auto ready_it = readReadyQueue.begin(); - Tick next_ready_at = MaxTick; - for (auto i = readReadyQueue.begin(); i != readReadyQueue.end() ; ++i) { - if (*ready_it > *i) { - next_ready_at = *ready_it; - ready_it = i; - } else if ((next_ready_at > *i) && (i != ready_it)) { - next_ready_at = *i; - } - } - - // Verify we found the time of this event and remove it - assert(*ready_it == curTick()); - readReadyQueue.erase(ready_it); - - if (!readReadyQueue.empty()) { - assert(readReadyQueue.front() >= curTick()); - assert(!readReadyEvent.scheduled()); - schedule(readReadyEvent, next_ready_at); - } - - // It is possible that a new command kicks things back into - // action before reaching this point but need to ensure that we - // continue to process new commands as read data becomes ready - // This will also trigger a drain if needed - if (!ctrl->requestEventScheduled()) { - DPRINTF(NVM, "Restart controller scheduler immediately\n"); - ctrl->restartScheduler(curTick()); - } -} - - -bool -NVMInterface::burstReady(DRAMPacket* pkt) const { - bool read_rdy = pkt->isRead() && (ctrl->inReadBusState(true)) && - (pkt->readyTime <= curTick()) && (numReadDataReady > 0); - bool write_rdy = !pkt->isRead() && !ctrl->inReadBusState(true) && - !writeRespQueueFull(); - return (read_rdy || write_rdy); -} - -pair -NVMInterface::doBurstAccess(DRAMPacket* pkt, Tick next_burst_at) -{ - DPRINTF(NVM, "NVM Timing access to addr %lld, rank/bank/row %d %d %d\n", - pkt->addr, pkt->rank, pkt->bank, pkt->row); - - // get the bank - Bank& bank_ref = ranks[pkt->rank]->banks[pkt->bank]; - - // respect any constraints on the command - const Tick bst_allowed_at = pkt->isRead() ? - bank_ref.rdAllowedAt : bank_ref.wrAllowedAt; - - // we need to wait until the bus is available before we can issue - // the command; need minimum of tBURST between commands - Tick cmd_at = std::max(bst_allowed_at, curTick()); - - // we need to wait until the bus is available before we can issue - // the command; need minimum of tBURST between commands - cmd_at = std::max(cmd_at, next_burst_at); - - // Verify there is command bandwidth to issue - // Read burst (send command) is a simple data access and only requires - // one command cycle - // Write command may require multiple cycles to enable larger address space - if (pkt->isRead() || !twoCycleRdWr) { - cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow); - } else { - cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK); - } - // update the packet ready time to reflect when data will be transferred - // Use the same bus delays defined for NVM - pkt->readyTime = cmd_at + tSEND + tBURST; - - Tick dly_to_rd_cmd; - Tick dly_to_wr_cmd; - for (auto n : ranks) { - for (int i = 0; i < banksPerRank; i++) { - // base delay is a function of tBURST and bus turnaround - dly_to_rd_cmd = pkt->isRead() ? tBURST : writeToReadDelay(); - dly_to_wr_cmd = pkt->isRead() ? readToWriteDelay() : tBURST; - - if (pkt->rank != n->rank) { - // adjust timing for different ranks - // Need to account for rank-to-rank switching with tCS - dly_to_wr_cmd = rankToRankDelay(); - dly_to_rd_cmd = rankToRankDelay(); - } - n->banks[i].rdAllowedAt = std::max(cmd_at + dly_to_rd_cmd, - n->banks[i].rdAllowedAt); - - n->banks[i].wrAllowedAt = std::max(cmd_at + dly_to_wr_cmd, - n->banks[i].wrAllowedAt); - } - } - - DPRINTF(NVM, "NVM Access to %lld, ready at %lld.\n", - pkt->addr, pkt->readyTime); - - if (pkt->isRead()) { - // completed the read, decrement counters - assert(numPendingReads != 0); - assert(numReadDataReady != 0); - - numPendingReads--; - numReadDataReady--; - } else { - // Adjust number of NVM writes in Q - assert(numWritesQueued > 0); - numWritesQueued--; - - // increment the bytes accessed and the accesses per row - // only increment for writes as the reads are handled when - // the non-deterministic read is issued, before the data transfer - bank_ref.bytesAccessed += burstSize; - - // Commands will be issued serially when accessing the same bank - // Commands can issue in parallel to different banks - if ((bank_ref.bank == pkt->bank) && - (bank_ref.openRow != pkt->row)) { - // update the open buffer, re-using row field - bank_ref.openRow = pkt->row; - - // sample the bytes accessed to a buffer in this bank - // here when we are re-buffering the data - stats.bytesPerBank.sample(bank_ref.bytesAccessed); - // start counting anew - bank_ref.bytesAccessed = 0; - } - - // Determine when write will actually complete, assuming it is - // scheduled to push to NVM immediately - // update actAllowedAt to serialize next command completion that - // accesses this bank; must wait until this write completes - // Data accesses to the same buffer in this bank - // can issue immediately after actAllowedAt expires, without - // waiting additional delay of tWRITE. Can revisit this - // assumption/simplification in the future. - bank_ref.actAllowedAt = std::max(pkt->readyTime, - bank_ref.actAllowedAt) + tWRITE; - - // Need to track number of outstanding writes to - // ensure 'buffer' on media controller does not overflow - assert(!writeRespQueueFull()); - - // Insert into write done queue. It will be handled after - // the media delay has been met - if (writeRespQueueEmpty()) { - assert(!writeRespondEvent.scheduled()); - schedule(writeRespondEvent, bank_ref.actAllowedAt); - } else { - assert(writeRespondEvent.scheduled()); - } - writeRespQueue.push_back(bank_ref.actAllowedAt); - writeRespQueue.sort(); - if (writeRespondEvent.when() > bank_ref.actAllowedAt) { - DPRINTF(NVM, "Rescheduled respond event from %lld to %11d\n", - writeRespondEvent.when(), bank_ref.actAllowedAt); - DPRINTF(NVM, "Front of response queue is %11d\n", - writeRespQueue.front()); - reschedule(writeRespondEvent, bank_ref.actAllowedAt); - } - - } - - // Update the stats - if (pkt->isRead()) { - stats.readBursts++; - stats.bytesRead += burstSize; - stats.perBankRdBursts[pkt->bankId]++; - stats.pendingReads.sample(numPendingReads); - - // Update latency stats - stats.totMemAccLat += pkt->readyTime - pkt->entryTime; - stats.totBusLat += tBURST; - stats.totQLat += cmd_at - pkt->entryTime; - } else { - stats.writeBursts++; - stats.bytesWritten += burstSize; - stats.perBankWrBursts[pkt->bankId]++; - } - - return make_pair(cmd_at, cmd_at + tBURST); -} - -void -NVMInterface::processWriteRespondEvent() -{ - DPRINTF(NVM, - "processWriteRespondEvent(): A NVM write reached its readyTime. " - "%d remaining pending NVM writes\n", writeRespQueue.size()); - - // Update stat to track histogram of pending writes - stats.pendingWrites.sample(writeRespQueue.size()); - - // Done with this response, pop entry - writeRespQueue.pop_front(); - - if (!writeRespQueue.empty()) { - assert(writeRespQueue.front() >= curTick()); - assert(!writeRespondEvent.scheduled()); - schedule(writeRespondEvent, writeRespQueue.front()); - } - - // It is possible that a new command kicks things back into - // action before reaching this point but need to ensure that we - // continue to process new commands as writes complete at the media and - // credits become available. This will also trigger a drain if needed - if (!ctrl->requestEventScheduled()) { - DPRINTF(NVM, "Restart controller scheduler immediately\n"); - ctrl->restartScheduler(curTick()); - } -} - -void -NVMInterface::addRankToRankDelay(Tick cmd_at) -{ - // update timing for NVM ranks due to bursts issued - // to ranks for other media interfaces - for (auto n : ranks) { - for (int i = 0; i < banksPerRank; i++) { - // different rank by default - // Need to only account for rank-to-rank switching - n->banks[i].rdAllowedAt = std::max(cmd_at + rankToRankDelay(), - n->banks[i].rdAllowedAt); - n->banks[i].wrAllowedAt = std::max(cmd_at + rankToRankDelay(), - n->banks[i].wrAllowedAt); - } - } -} - -bool -NVMInterface::isBusy(bool read_queue_empty, bool all_writes_nvm) -{ - DPRINTF(NVM,"isBusy: numReadDataReady = %d\n", numReadDataReady); - // Determine NVM is busy and cannot issue a burst - // A read burst cannot issue when data is not ready from the NVM - // Also check that we have reads queued to ensure we can change - // bus direction to service potential write commands. - // A write cannot issue once we've reached MAX pending writes - // Only assert busy for the write case when there are also - // no reads in Q and the write queue only contains NVM commands - // This allows the bus state to switch and service reads - return (ctrl->inReadBusState(true) ? - (numReadDataReady == 0) && !read_queue_empty : - writeRespQueueFull() && read_queue_empty && - all_writes_nvm); -} - - -NVMInterface::NVMStats::NVMStats(NVMInterface &_nvm) - : Stats::Group(&_nvm), - nvm(_nvm), - - ADD_STAT(readBursts, "Number of NVM read bursts"), - ADD_STAT(writeBursts, "Number of NVM write bursts"), - - ADD_STAT(perBankRdBursts, "Per bank write bursts"), - ADD_STAT(perBankWrBursts, "Per bank write bursts"), - - ADD_STAT(totQLat, "Total ticks spent queuing"), - ADD_STAT(totBusLat, "Total ticks spent in databus transfers"), - ADD_STAT(totMemAccLat, - "Total ticks spent from burst creation until serviced " - "by the NVM"), - ADD_STAT(avgQLat, "Average queueing delay per NVM burst"), - ADD_STAT(avgBusLat, "Average bus latency per NVM burst"), - ADD_STAT(avgMemAccLat, "Average memory access latency per NVM burst"), - - ADD_STAT(bytesRead, "Total number of bytes read from DRAM"), - ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"), - ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiBytes/s"), - ADD_STAT(avgWrBW, "Average DRAM write bandwidth in MiBytes/s"), - ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"), - ADD_STAT(busUtil, "NVM Data bus utilization in percentage"), - ADD_STAT(busUtilRead, "NVM Data bus read utilization in percentage"), - ADD_STAT(busUtilWrite, "NVM Data bus write utilization in percentage"), - - ADD_STAT(pendingReads, "Reads issued to NVM for which data has not been " - "transferred"), - ADD_STAT(pendingWrites, "Number of outstanding writes to NVM"), - ADD_STAT(bytesPerBank, "Bytes read within a bank before loading " - "new bank") -{ -} - -void -NVMInterface::NVMStats::regStats() -{ - using namespace Stats; - - perBankRdBursts.init(nvm.ranksPerChannel == 0 ? 1 : - nvm.banksPerRank * nvm.ranksPerChannel); - - perBankWrBursts.init(nvm.ranksPerChannel == 0 ? 1 : - nvm.banksPerRank * nvm.ranksPerChannel); - - avgQLat.precision(2); - avgBusLat.precision(2); - avgMemAccLat.precision(2); - - avgRdBW.precision(2); - avgWrBW.precision(2); - peakBW.precision(2); - - busUtil.precision(2); - busUtilRead.precision(2); - busUtilWrite.precision(2); - - pendingReads - .init(nvm.maxPendingReads) - .flags(nozero); - - pendingWrites - .init(nvm.maxPendingWrites) - .flags(nozero); - - bytesPerBank - .init(nvm.rowBufferSize) - .flags(nozero); - - avgQLat = totQLat / readBursts; - avgBusLat = totBusLat / readBursts; - avgMemAccLat = totMemAccLat / readBursts; - - avgRdBW = (bytesRead / 1000000) / simSeconds; - avgWrBW = (bytesWritten / 1000000) / simSeconds; - peakBW = (SimClock::Frequency / nvm.tBURST) * - nvm.burstSize / 1000000; - - busUtil = (avgRdBW + avgWrBW) / peakBW * 100; - busUtilRead = avgRdBW / peakBW * 100; - busUtilWrite = avgWrBW / peakBW * 100; -} diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh deleted file mode 100644 index 7fc499f0d..000000000 --- a/src/mem/dram_ctrl.hh +++ /dev/null @@ -1,1955 +0,0 @@ -/* - * Copyright (c) 2012-2020 ARM Limited - * All rights reserved - * - * The license below extends only to copyright in the software and shall - * not be construed as granting a license to any other intellectual - * property including but not limited to intellectual property relating - * to a hardware implementation of the functionality of the software - * licensed hereunder. You may use the software subject to the license - * terms below provided that you ensure that this notice is replicated - * unmodified and in its entirety in all distributions of the software, - * modified or unmodified, in source code or in binary form. - * - * Copyright (c) 2013 Amin Farmahini-Farahani - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * @file - * DRAMCtrl declaration - */ - -#ifndef __MEM_DRAM_CTRL_HH__ -#define __MEM_DRAM_CTRL_HH__ - -#include -#include -#include -#include -#include - -#include "base/statistics.hh" -#include "enums/AddrMap.hh" -#include "enums/MemSched.hh" -#include "enums/PageManage.hh" -#include "mem/abstract_mem.hh" -#include "mem/drampower.hh" -#include "mem/qos/mem_ctrl.hh" -#include "mem/qport.hh" -#include "params/DRAMCtrl.hh" -#include "params/DRAMInterface.hh" -#include "params/MemInterface.hh" -#include "params/NVMInterface.hh" -#include "sim/eventq.hh" - -class DRAMInterfaceParams; -class NVMInterfaceParams; - -/** - * A burst helper helps organize and manage a packet that is larger than - * the DRAM burst size. A system packet that is larger than the burst size - * is split into multiple DRAM packets and all those DRAM packets point to - * a single burst helper such that we know when the whole packet is served. - */ -class BurstHelper -{ - public: - - /** Number of DRAM bursts requred for a system packet **/ - const unsigned int burstCount; - - /** Number of DRAM bursts serviced so far for a system packet **/ - unsigned int burstsServiced; - - BurstHelper(unsigned int _burstCount) - : burstCount(_burstCount), burstsServiced(0) - { } -}; - -/** - * A DRAM packet stores packets along with the timestamp of when - * the packet entered the queue, and also the decoded address. - */ -class DRAMPacket -{ - public: - - /** When did request enter the controller */ - const Tick entryTime; - - /** When will request leave the controller */ - Tick readyTime; - - /** This comes from the outside world */ - const PacketPtr pkt; - - /** MasterID associated with the packet */ - const MasterID _masterId; - - const bool read; - - /** Does this packet access DRAM?*/ - const bool dram; - - /** Will be populated by address decoder */ - const uint8_t rank; - const uint8_t bank; - const uint32_t row; - - /** - * Bank id is calculated considering banks in all the ranks - * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and - * bankId = 8 --> rank1, bank0 - */ - const uint16_t bankId; - - /** - * The starting address of the DRAM packet. - * This address could be unaligned to burst size boundaries. The - * reason is to keep the address offset so we can accurately check - * incoming read packets with packets in the write queue. - */ - Addr addr; - - /** - * The size of this dram packet in bytes - * It is always equal or smaller than DRAM burst size - */ - unsigned int size; - - /** - * A pointer to the BurstHelper if this DRAMPacket is a split packet - * If not a split packet (common case), this is set to NULL - */ - BurstHelper* burstHelper; - - /** - * QoS value of the encapsulated packet read at queuing time - */ - uint8_t _qosValue; - - /** - * Set the packet QoS value - * (interface compatibility with Packet) - */ - inline void qosValue(const uint8_t qv) { _qosValue = qv; } - - /** - * Get the packet QoS value - * (interface compatibility with Packet) - */ - inline uint8_t qosValue() const { return _qosValue; } - - /** - * Get the packet MasterID - * (interface compatibility with Packet) - */ - inline MasterID masterId() const { return _masterId; } - - /** - * Get the packet size - * (interface compatibility with Packet) - */ - inline unsigned int getSize() const { return size; } - - /** - * Get the packet address - * (interface compatibility with Packet) - */ - inline Addr getAddr() const { return addr; } - - /** - * Return true if its a read packet - * (interface compatibility with Packet) - */ - inline bool isRead() const { return read; } - - /** - * Return true if its a write packet - * (interface compatibility with Packet) - */ - inline bool isWrite() const { return !read; } - - /** - * Return true if its a DRAM access - */ - inline bool isDram() const { return dram; } - - DRAMPacket(PacketPtr _pkt, bool is_read, bool is_dram, uint8_t _rank, - uint8_t _bank, uint32_t _row, uint16_t bank_id, Addr _addr, - unsigned int _size) - : entryTime(curTick()), readyTime(curTick()), pkt(_pkt), - _masterId(pkt->masterId()), - read(is_read), dram(is_dram), rank(_rank), bank(_bank), row(_row), - bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL), - _qosValue(_pkt->qosValue()) - { } - -}; - -// The DRAM packets are store in a multiple dequeue structure, -// based on their QoS priority -typedef std::deque DRAMPacketQueue; - - -/** - * General interface to memory device - * Includes functions and parameters shared across media types - */ -class MemInterface : public AbstractMemory -{ - protected: - /** - * A basic class to track the bank state, i.e. what row is - * currently open (if any), when is the bank free to accept a new - * column (read/write) command, when can it be precharged, and - * when can it be activated. - * - * The bank also keeps track of how many bytes have been accessed - * in the open row since it was opened. - */ - class Bank - { - - public: - - static const uint32_t NO_ROW = -1; - - uint32_t openRow; - uint8_t bank; - uint8_t bankgr; - - Tick rdAllowedAt; - Tick wrAllowedAt; - Tick preAllowedAt; - Tick actAllowedAt; - - uint32_t rowAccesses; - uint32_t bytesAccessed; - - Bank() : - openRow(NO_ROW), bank(0), bankgr(0), - rdAllowedAt(0), wrAllowedAt(0), preAllowedAt(0), actAllowedAt(0), - rowAccesses(0), bytesAccessed(0) - { } - }; - - /** - * A pointer to the parent DRAMCtrl instance - */ - DRAMCtrl* ctrl; - - /** - * Number of commands that can issue in the defined controller - * command window, used to verify command bandwidth - */ - unsigned int maxCommandsPerWindow; - - /** - * Memory controller configuration initialized based on parameter - * values. - */ - Enums::AddrMap addrMapping; - - /** - * General device and channel characteristics - * The rowsPerBank is determined based on the capacity, number of - * ranks and banks, the burst size, and the row buffer size. - */ - const uint32_t burstSize; - const uint32_t deviceSize; - const uint32_t deviceRowBufferSize; - const uint32_t devicesPerRank; - const uint32_t rowBufferSize; - const uint32_t burstsPerRowBuffer; - const uint32_t burstsPerStripe; - const uint32_t ranksPerChannel; - const uint32_t banksPerRank; - uint32_t rowsPerBank; - - /** - * General timing requirements - */ - const Tick M5_CLASS_VAR_USED tCK; - const Tick tCS; - const Tick tBURST; - const Tick tRTW; - const Tick tWTR; - - /* - * @return delay between write and read commands - */ - virtual Tick writeToReadDelay() const { return tBURST + tWTR; } - - /* - * @return delay between write and read commands - */ - Tick readToWriteDelay() const { return tBURST + tRTW; } - - /* - * @return delay between accesses to different ranks - */ - Tick rankToRankDelay() const { return tBURST + tCS; } - - - public: - /** Set a pointer to the controller and initialize - * interface based on controller parameters - * @param _ctrl pointer to the parent controller - * @param command_window size of command window used to - * check command bandwidth - */ - void setCtrl(DRAMCtrl* _ctrl, unsigned int command_window); - - /** - * Get an address in a dense range which starts from 0. The input - * address is the physical address of the request in an address - * space that contains other SimObjects apart from this - * controller. - * - * @param addr The intput address which should be in the addrRange - * @return An address in the continues range [0, max) - */ - Addr getCtrlAddr(Addr addr) { return range.getOffset(addr); } - - /** - * Setup the rank based on packet received - * - * @param integer value of rank to be setup. used to index ranks vector - * @param are we setting up rank for read or write packet? - */ - virtual void setupRank(const uint8_t rank, const bool is_read) = 0; - - /** - * Check drain state of interface - * - * @return true if all ranks are drained and idle - * - */ - virtual bool allRanksDrained() const = 0; - - /** - * For FR-FCFS policy, find first command that can issue - * Function will be overriden by interface to select based - * on media characteristics, used to determine when read - * or write can issue. - * - * @param queue Queued requests to consider - * @param min_col_at Minimum tick for 'seamless' issue - * @return an iterator to the selected packet, else queue.end() - * @return the tick when the packet selected will issue - */ - virtual std::pair - chooseNextFRFCFS(DRAMPacketQueue& queue, Tick min_col_at) const = 0; - - /* - * Function to calulate unloaded latency - */ - virtual Tick accessLatency() const = 0; - - /** - * @return number of bytes in a burst for this interface - */ - uint32_t bytesPerBurst() const { return burstSize; } - - /* - * @return time to offset next command - */ - virtual Tick commandOffset() const = 0; - - /** - * Check if a burst operation can be issued to the interface - * - * @param Return true if RD/WR can issue - */ - virtual bool burstReady(DRAMPacket* pkt) const = 0; - - /** - * Determine the required delay for an access to a different rank - * - * @return required rank to rank delay - */ - Tick rankDelay() const { return tCS; } - - /** - * - * @return minimum additional bus turnaround required for read-to-write - */ - Tick minReadToWriteDataGap() const { return std::min(tRTW, tCS); } - - /** - * - * @return minimum additional bus turnaround required for write-to-read - */ - Tick minWriteToReadDataGap() const { return std::min(tWTR, tCS); } - - /** - * Address decoder to figure out physical mapping onto ranks, - * banks, and rows. This function is called multiple times on the same - * system packet if the pakcet is larger than burst of the memory. The - * pkt_addr is used for the offset within the packet. - * - * @param pkt The packet from the outside world - * @param pkt_addr The starting address of the DRAM packet - * @param size The size of the DRAM packet in bytes - * @param is_read Is the request for a read or a write to memory - * @param is_dram Is the request to a DRAM interface - * @return A DRAMPacket pointer with the decoded information - */ - DRAMPacket* decodePacket(const PacketPtr pkt, Addr pkt_addr, - unsigned int size, bool is_read, bool is_dram); - - /** - * Add rank to rank delay to bus timing to all banks in all ranks - * when access to an alternate interface is issued - * - * param cmd_at Time of current command used as starting point for - * addition of rank-to-rank delay - */ - virtual void addRankToRankDelay(Tick cmd_at) = 0; - - typedef MemInterfaceParams Params; - MemInterface(const Params* _p); -}; - -/** - * Interface to DRAM devices with media specific parameters, - * statistics, and functions. - * The DRAMInterface includes a class for individual ranks - * and per rank functions. - */ -class DRAMInterface : public MemInterface -{ - private: - /** - * Simple structure to hold the values needed to keep track of - * commands for DRAMPower - */ - struct Command - { - Data::MemCommand::cmds type; - uint8_t bank; - Tick timeStamp; - - constexpr Command(Data::MemCommand::cmds _type, uint8_t _bank, - Tick time_stamp) - : type(_type), bank(_bank), timeStamp(time_stamp) - { } - }; - - /** - * The power state captures the different operational states of - * the DRAM and interacts with the bus read/write state machine, - * and the refresh state machine. - * - * PWR_IDLE : The idle state in which all banks are closed - * From here can transition to: PWR_REF, PWR_ACT, - * PWR_PRE_PDN - * - * PWR_REF : Auto-refresh state. Will transition when refresh is - * complete based on power state prior to PWR_REF - * From here can transition to: PWR_IDLE, PWR_PRE_PDN, - * PWR_SREF - * - * PWR_SREF : Self-refresh state. Entered after refresh if - * previous state was PWR_PRE_PDN - * From here can transition to: PWR_IDLE - * - * PWR_PRE_PDN : Precharge power down state - * From here can transition to: PWR_REF, PWR_IDLE - * - * PWR_ACT : Activate state in which one or more banks are open - * From here can transition to: PWR_IDLE, PWR_ACT_PDN - * - * PWR_ACT_PDN : Activate power down state - * From here can transition to: PWR_ACT - */ - enum PowerState - { - PWR_IDLE = 0, - PWR_REF, - PWR_SREF, - PWR_PRE_PDN, - PWR_ACT, - PWR_ACT_PDN - }; - - /** - * The refresh state is used to control the progress of the - * refresh scheduling. When normal operation is in progress the - * refresh state is idle. Once tREFI has elasped, a refresh event - * is triggered to start the following STM transitions which are - * used to issue a refresh and return back to normal operation - * - * REF_IDLE : IDLE state used during normal operation - * From here can transition to: REF_DRAIN - * - * REF_SREF_EXIT : Exiting a self-refresh; refresh event scheduled - * after self-refresh exit completes - * From here can transition to: REF_DRAIN - * - * REF_DRAIN : Drain state in which on going accesses complete. - * From here can transition to: REF_PD_EXIT - * - * REF_PD_EXIT : Evaluate pwrState and issue wakeup if needed - * Next state dependent on whether banks are open - * From here can transition to: REF_PRE, REF_START - * - * REF_PRE : Close (precharge) all open banks - * From here can transition to: REF_START - * - * REF_START : Issue refresh command and update DRAMPower stats - * From here can transition to: REF_RUN - * - * REF_RUN : Refresh running, waiting for tRFC to expire - * From here can transition to: REF_IDLE, REF_SREF_EXIT - */ - enum RefreshState - { - REF_IDLE = 0, - REF_DRAIN, - REF_PD_EXIT, - REF_SREF_EXIT, - REF_PRE, - REF_START, - REF_RUN - }; - - class Rank; - struct RankStats : public Stats::Group - { - RankStats(DRAMInterface &dram, Rank &rank); - - void regStats() override; - void resetStats() override; - void preDumpStats() override; - - Rank &rank; - - /* - * Command energies - */ - Stats::Scalar actEnergy; - Stats::Scalar preEnergy; - Stats::Scalar readEnergy; - Stats::Scalar writeEnergy; - Stats::Scalar refreshEnergy; - - /* - * Active Background Energy - */ - Stats::Scalar actBackEnergy; - - /* - * Precharge Background Energy - */ - Stats::Scalar preBackEnergy; - - /* - * Active Power-Down Energy - */ - Stats::Scalar actPowerDownEnergy; - - /* - * Precharge Power-Down Energy - */ - Stats::Scalar prePowerDownEnergy; - - /* - * self Refresh Energy - */ - Stats::Scalar selfRefreshEnergy; - - Stats::Scalar totalEnergy; - Stats::Scalar averagePower; - - /** - * Stat to track total DRAM idle time - * - */ - Stats::Scalar totalIdleTime; - - /** - * Track time spent in each power state. - */ - Stats::Vector pwrStateTime; - }; - - /** - * Rank class includes a vector of banks. Refresh and Power state - * machines are defined per rank. Events required to change the - * state of the refresh and power state machine are scheduled per - * rank. This class allows the implementation of rank-wise refresh - * and rank-wise power-down. - */ - class Rank : public EventManager - { - private: - - /** - * A reference to the parent DRAMInterface instance - */ - DRAMInterface& dram; - - /** - * Since we are taking decisions out of order, we need to keep - * track of what power transition is happening at what time - */ - PowerState pwrStateTrans; - - /** - * Previous low-power state, which will be re-entered after refresh. - */ - PowerState pwrStatePostRefresh; - - /** - * Track when we transitioned to the current power state - */ - Tick pwrStateTick; - - /** - * Keep track of when a refresh is due. - */ - Tick refreshDueAt; - - /** - * Function to update Power Stats - */ - void updatePowerStats(); - - /** - * Schedule a power state transition in the future, and - * potentially override an already scheduled transition. - * - * @param pwr_state Power state to transition to - * @param tick Tick when transition should take place - */ - void schedulePowerEvent(PowerState pwr_state, Tick tick); - - public: - - /** - * Current power state. - */ - PowerState pwrState; - - /** - * current refresh state - */ - RefreshState refreshState; - - /** - * rank is in or transitioning to power-down or self-refresh - */ - bool inLowPowerState; - - /** - * Current Rank index - */ - uint8_t rank; - - /** - * Track number of packets in read queue going to this rank - */ - uint32_t readEntries; - - /** - * Track number of packets in write queue going to this rank - */ - uint32_t writeEntries; - - /** - * Number of ACT, RD, and WR events currently scheduled - * Incremented when a refresh event is started as well - * Used to determine when a low-power state can be entered - */ - uint8_t outstandingEvents; - - /** - * delay low-power exit until this requirement is met - */ - Tick wakeUpAllowedAt; - - /** - * One DRAMPower instance per rank - */ - DRAMPower power; - - /** - * List of commands issued, to be sent to DRAMPpower at refresh - * and stats dump. Keep commands here since commands to different - * banks are added out of order. Will only pass commands up to - * curTick() to DRAMPower after sorting. - */ - std::vector cmdList; - - /** - * Vector of Banks. Each rank is made of several devices which in - * term are made from several banks. - */ - std::vector banks; - - /** - * To track number of banks which are currently active for - * this rank. - */ - unsigned int numBanksActive; - - /** List to keep track of activate ticks */ - std::deque actTicks; - - /** - * Track when we issued the last read/write burst - */ - Tick lastBurstTick; - - Rank(const DRAMInterfaceParams* _p, int _rank, - DRAMInterface& _dram); - - const std::string name() const { return csprintf("%d", rank); } - - /** - * Kick off accounting for power and refresh states and - * schedule initial refresh. - * - * @param ref_tick Tick for first refresh - */ - void startup(Tick ref_tick); - - /** - * Stop the refresh events. - */ - void suspend(); - - /** - * Check if there is no refresh and no preparation of refresh ongoing - * i.e. the refresh state machine is in idle - * - * @param Return true if the rank is idle from a refresh point of view - */ - bool inRefIdleState() const { return refreshState == REF_IDLE; } - - /** - * Check if the current rank has all banks closed and is not - * in a low power state - * - * @param Return true if the rank is idle from a bank - * and power point of view - */ - bool inPwrIdleState() const { return pwrState == PWR_IDLE; } - - /** - * Trigger a self-refresh exit if there are entries enqueued - * Exit if there are any read entries regardless of the bus state. - * If we are currently issuing write commands, exit if we have any - * write commands enqueued as well. - * Could expand this in the future to analyze state of entire queue - * if needed. - * - * @return boolean indicating self-refresh exit should be scheduled - */ - bool forceSelfRefreshExit() const; - - /** - * Check if the command queue of current rank is idle - * - * @param Return true if the there are no commands in Q. - * Bus direction determines queue checked. - */ - bool isQueueEmpty() const; - - /** - * Let the rank check if it was waiting for requests to drain - * to allow it to transition states. - */ - void checkDrainDone(); - - /** - * Push command out of cmdList queue that are scheduled at - * or before curTick() to DRAMPower library - * All commands before curTick are guaranteed to be complete - * and can safely be flushed. - */ - void flushCmdList(); - - /** - * Computes stats just prior to dump event - */ - void computeStats(); - - /** - * Reset stats on a stats event - */ - void resetStats(); - - /** - * Schedule a transition to power-down (sleep) - * - * @param pwr_state Power state to transition to - * @param tick Absolute tick when transition should take place - */ - void powerDownSleep(PowerState pwr_state, Tick tick); - - /** - * schedule and event to wake-up from power-down or self-refresh - * and update bank timing parameters - * - * @param exit_delay Relative tick defining the delay required between - * low-power exit and the next command - */ - void scheduleWakeUpEvent(Tick exit_delay); - - void processWriteDoneEvent(); - EventFunctionWrapper writeDoneEvent; - - void processActivateEvent(); - EventFunctionWrapper activateEvent; - - void processPrechargeEvent(); - EventFunctionWrapper prechargeEvent; - - void processRefreshEvent(); - EventFunctionWrapper refreshEvent; - - void processPowerEvent(); - EventFunctionWrapper powerEvent; - - void processWakeUpEvent(); - EventFunctionWrapper wakeUpEvent; - - protected: - RankStats stats; - }; - - /** - * Function for sorting Command structures based on timeStamp - * - * @param a Memory Command - * @param next Memory Command - * @return true if timeStamp of Command 1 < timeStamp of Command 2 - */ - static bool - sortTime(const Command& cmd, const Command& cmd_next) - { - return cmd.timeStamp < cmd_next.timeStamp; - } - - /** - * DRAM specific device characteristics - */ - const uint32_t bankGroupsPerRank; - const bool bankGroupArch; - - /** - * DRAM specific timing requirements - */ - const Tick tCL; - const Tick tBURST_MIN; - const Tick tBURST_MAX; - const Tick tCCD_L_WR; - const Tick tCCD_L; - const Tick tRCD; - const Tick tRP; - const Tick tRAS; - const Tick tWR; - const Tick tRTP; - const Tick tRFC; - const Tick tREFI; - const Tick tRRD; - const Tick tRRD_L; - const Tick tPPD; - const Tick tAAD; - const Tick tXAW; - const Tick tXP; - const Tick tXS; - const Tick clkResyncDelay; - const bool dataClockSync; - const bool burstInterleave; - const uint8_t twoCycleActivate; - const uint32_t activationLimit; - const Tick wrToRdDlySameBG; - const Tick rdToWrDlySameBG; - - Enums::PageManage pageMgmt; - /** - * Max column accesses (read and write) per row, before forefully - * closing it. - */ - const uint32_t maxAccessesPerRow; - - // timestamp offset - uint64_t timeStampOffset; - - // Holds the value of the DRAM rank of burst issued - uint8_t activeRank; - - /** Enable or disable DRAM powerdown states. */ - bool enableDRAMPowerdown; - - /** The time when stats were last reset used to calculate average power */ - Tick lastStatsResetTick; - - /** - * Keep track of when row activations happen, in order to enforce - * the maximum number of activations in the activation window. The - * method updates the time that the banks become available based - * on the current limits. - * - * @param rank_ref Reference to the rank - * @param bank_ref Reference to the bank - * @param act_tick Time when the activation takes place - * @param row Index of the row - */ - void activateBank(Rank& rank_ref, Bank& bank_ref, Tick act_tick, - uint32_t row); - - /** - * Precharge a given bank and also update when the precharge is - * done. This will also deal with any stats related to the - * accesses to the open page. - * - * @param rank_ref The rank to precharge - * @param bank_ref The bank to precharge - * @param pre_tick Time when the precharge takes place - * @param auto_or_preall Is this an auto-precharge or precharge all command - * @param trace Is this an auto precharge then do not add to trace - */ - void prechargeBank(Rank& rank_ref, Bank& bank_ref, - Tick pre_tick, bool auto_or_preall = false, - bool trace = true); - - struct DRAMStats : public Stats::Group - { - DRAMStats(DRAMInterface &dram); - - void regStats() override; - void resetStats() override; - - DRAMInterface &dram; - - /** total number of DRAM bursts serviced */ - Stats::Scalar readBursts; - Stats::Scalar writeBursts; - - /** DRAM per bank stats */ - Stats::Vector perBankRdBursts; - Stats::Vector perBankWrBursts; - - // Latencies summed over all requests - Stats::Scalar totQLat; - Stats::Scalar totBusLat; - Stats::Scalar totMemAccLat; - - // Average latencies per request - Stats::Formula avgQLat; - Stats::Formula avgBusLat; - Stats::Formula avgMemAccLat; - - // Row hit count and rate - Stats::Scalar readRowHits; - Stats::Scalar writeRowHits; - Stats::Formula readRowHitRate; - Stats::Formula writeRowHitRate; - Stats::Histogram bytesPerActivate; - // Number of bytes transferred to/from DRAM - Stats::Scalar bytesRead; - Stats::Scalar bytesWritten; - - // Average bandwidth - Stats::Formula avgRdBW; - Stats::Formula avgWrBW; - Stats::Formula peakBW; - // bus utilization - Stats::Formula busUtil; - Stats::Formula busUtilRead; - Stats::Formula busUtilWrite; - Stats::Formula pageHitRate; - }; - - DRAMStats stats; - - /** - * Vector of dram ranks - */ - std::vector ranks; - - public: - - /** - * Buffer sizes for read and write queues in the controller - * These are passed to the controller on instantiation - * Defining them here allows for buffers to be resized based - * on memory type / configuration. - */ - const uint32_t readBufferSize; - const uint32_t writeBufferSize; - - /* - * @return delay between write and read commands - */ - Tick writeToReadDelay() const override { return tBURST + tWTR + tCL; } - - /** - * Find which are the earliest banks ready to issue an activate - * for the enqueued requests. Assumes maximum of 32 banks per rank - * Also checks if the bank is already prepped. - * - * @param queue Queued requests to consider - * @param min_col_at time of seamless burst command - * @return One-hot encoded mask of bank indices - * @return boolean indicating burst can issue seamlessly, with no gaps - */ - std::pair, bool> - minBankPrep(const DRAMPacketQueue& queue, Tick min_col_at) const; - - /* - * @return time to send a burst of data without gaps - */ - Tick - burstDelay() const - { - return (burstInterleave ? tBURST_MAX / 2 : tBURST); - } - - public: - /** - * Initialize the DRAM interface and verify parameters - */ - void init() override; - - /** - * Iterate through dram ranks and instantiate per rank startup routine - */ - void startup() override; - - /** - * Setup the rank based on packet received - * - * @param integer value of rank to be setup. used to index ranks vector - * @param are we setting up rank for read or write packet? - */ - void setupRank(const uint8_t rank, const bool is_read) override; - - /** - * Iterate through dram ranks to exit self-refresh in order to drain - */ - void drainRanks(); - - /** - * Return true once refresh is complete for all ranks and there are no - * additional commands enqueued. (only evaluated when draining) - * This will ensure that all banks are closed, power state is IDLE, and - * power stats have been updated - * - * @return true if all ranks have refreshed, with no commands enqueued - * - */ - bool allRanksDrained() const override; - - /** - * Iterate through DRAM ranks and suspend them - */ - void suspend(); - - /* - * @return time to offset next command - */ - Tick commandOffset() const override { return (tRP + tRCD); } - - /* - * Function to calulate unloaded, closed bank access latency - */ - Tick accessLatency() const override { return (tRP + tRCD + tCL); } - - /** - * For FR-FCFS policy, find first DRAM command that can issue - * - * @param queue Queued requests to consider - * @param min_col_at Minimum tick for 'seamless' issue - * @return an iterator to the selected packet, else queue.end() - * @return the tick when the packet selected will issue - */ - std::pair - chooseNextFRFCFS(DRAMPacketQueue& queue, Tick min_col_at) const override; - - /** - * Actually do the burst - figure out the latency it - * will take to service the req based on bank state, channel state etc - * and then update those states to account for this request. Based - * on this, update the packet's "readyTime" and move it to the - * response q from where it will eventually go back to the outside - * world. - * - * @param dram_pkt The DRAM packet created from the outside world pkt - * @param next_burst_at Minimum bus timing requirement from controller - * @param queue Reference to the read or write queue with the packet - * @return pair, tick when current burst is issued and - * tick when next burst can issue - */ - std::pair - doBurstAccess(DRAMPacket* dram_pkt, Tick next_burst_at, - const std::vector& queue); - - /** - * Check if a burst operation can be issued to the DRAM - * - * @param Return true if RD/WR can issue - * This requires the DRAM to be in the - * REF IDLE state - */ - bool - burstReady(DRAMPacket* pkt) const override - { - return ranks[pkt->rank]->inRefIdleState(); - } - - /** - * This function checks if ranks are actively refreshing and - * therefore busy. The function also checks if ranks are in - * the self-refresh state, in which case, a self-refresh exit - * is initiated. - * - * return boolean if all ranks are in refresh and therefore busy - */ - bool isBusy(); - - /** - * Add rank to rank delay to bus timing to all DRAM banks in alli ranks - * when access to an alternate interface is issued - * - * param cmd_at Time of current command used as starting point for - * addition of rank-to-rank delay - */ - void addRankToRankDelay(Tick cmd_at) override; - - /** - * Complete response process for DRAM when read burst is complete - * This will update the counters and check if a power down state - * can be entered. - * - * @param rank Specifies rank associated with read burst - */ - void respondEvent(uint8_t rank); - - /** - * Check the refresh state to determine if refresh needs - * to be kicked back into action after a read response - * - * @param rank Specifies rank associated with read burst - */ - void checkRefreshState(uint8_t rank); - - DRAMInterface(const DRAMInterfaceParams* _p); -}; - -/** - * Interface to NVM devices with media specific parameters, - * statistics, and functions. - * The NVMInterface includes a class for individual ranks - * and per rank functions. - */ -class NVMInterface : public MemInterface -{ - private: - /** - * NVM rank class simply includes a vector of banks. - */ - class Rank : public EventManager - { - private: - - /** - * A reference to the parent NVMInterface instance - */ - NVMInterface& nvm; - - public: - - /** - * Current Rank index - */ - uint8_t rank; - - /** - * Vector of NVM banks. Each rank is made of several banks - * that can be accessed in parallel. - */ - std::vector banks; - - Rank(const NVMInterfaceParams* _p, int _rank, - NVMInterface& _nvm); - }; - - /** - * NVM specific device and channel characteristics - */ - const uint32_t maxPendingWrites; - const uint32_t maxPendingReads; - const bool twoCycleRdWr; - - /** - * NVM specific timing requirements - */ - const Tick tREAD; - const Tick tWRITE; - const Tick tSEND; - - struct NVMStats : public Stats::Group - { - NVMStats(NVMInterface &nvm); - - void regStats() override; - - NVMInterface &nvm; - - /** NVM stats */ - Stats::Scalar readBursts; - Stats::Scalar writeBursts; - - Stats::Vector perBankRdBursts; - Stats::Vector perBankWrBursts; - - // Latencies summed over all requests - Stats::Scalar totQLat; - Stats::Scalar totBusLat; - Stats::Scalar totMemAccLat; - - // Average latencies per request - Stats::Formula avgQLat; - Stats::Formula avgBusLat; - Stats::Formula avgMemAccLat; - - Stats::Scalar bytesRead; - Stats::Scalar bytesWritten; - - // Average bandwidth - Stats::Formula avgRdBW; - Stats::Formula avgWrBW; - Stats::Formula peakBW; - Stats::Formula busUtil; - Stats::Formula busUtilRead; - Stats::Formula busUtilWrite; - - /** NVM stats */ - Stats::Histogram pendingReads; - Stats::Histogram pendingWrites; - Stats::Histogram bytesPerBank; - }; - - NVMStats stats; - - void processWriteRespondEvent(); - EventFunctionWrapper writeRespondEvent; - - void processReadReadyEvent(); - EventFunctionWrapper readReadyEvent; - - /** - * Vector of nvm ranks - */ - std::vector ranks; - - /** - * Holding queue for non-deterministic write commands, which - * maintains writes that have been issued but have not completed - * Stored seperately mostly to keep the code clean and help with - * events scheduling. - * This mimics a buffer on the media controller and therefore is - * not added to the main write queue for sizing - */ - std::list writeRespQueue; - - std::deque readReadyQueue; - - /** - * Check if the write response queue is empty - * - * @param Return true if empty - */ - bool writeRespQueueEmpty() const { return writeRespQueue.empty(); } - - /** - * Till when must we wait before issuing next read command? - */ - Tick nextReadAt; - - // keep track of reads that have issued for which data is either - // not yet ready or has not yet been transferred to the ctrl - uint16_t numPendingReads; - uint16_t numReadDataReady; - - public: - // keep track of the number of reads that have yet to be issued - uint16_t numReadsToIssue; - - // number of writes in the writeQueue for the NVM interface - uint32_t numWritesQueued; - - /** - * Buffer sizes for read and write queues in the controller - * These are passed to the controller on instantiation - * Defining them here allows for buffers to be resized based - * on memory type / configuration. - */ - const uint32_t readBufferSize; - const uint32_t writeBufferSize; - - /** - * Initialize the NVM interface and verify parameters - */ - void init() override; - - /** - * Setup the rank based on packet received - * - * @param integer value of rank to be setup. used to index ranks vector - * @param are we setting up rank for read or write packet? - */ - void setupRank(const uint8_t rank, const bool is_read) override; - - /** - * Check drain state of NVM interface - * - * @return true if write response queue is empty - * - */ - bool allRanksDrained() const override { return writeRespQueueEmpty(); } - - /* - * @return time to offset next command - */ - Tick commandOffset() const override { return tBURST; } - - /** - * Check if a burst operation can be issued to the NVM - * - * @param Return true if RD/WR can issue - * for reads, also verfy that ready count - * has been updated to a non-zero value to - * account for race conditions between events - */ - bool burstReady(DRAMPacket* pkt) const override; - - /** - * This function checks if ranks are busy. - * This state is true when either: - * 1) There is no command with read data ready to transmit or - * 2) The NVM inteface has reached the maximum number of outstanding - * writes commands. - * @param read_queue_empty There are no read queued - * @param all_writes_nvm All writes in queue are for NVM interface - * @return true of NVM is busy - * - */ - bool isBusy(bool read_queue_empty, bool all_writes_nvm); - /** - * For FR-FCFS policy, find first NVM command that can issue - * default to first command to prepped region - * - * @param queue Queued requests to consider - * @param min_col_at Minimum tick for 'seamless' issue - * @return an iterator to the selected packet, else queue.end() - * @return the tick when the packet selected will issue - */ - std::pair - chooseNextFRFCFS(DRAMPacketQueue& queue, Tick min_col_at) const override; - - /** - * Add rank to rank delay to bus timing to all NVM banks in alli ranks - * when access to an alternate interface is issued - * - * param cmd_at Time of current command used as starting point for - * addition of rank-to-rank delay - */ - void addRankToRankDelay(Tick cmd_at) override; - - - /** - * Select read command to issue asynchronously - */ - void chooseRead(DRAMPacketQueue& queue); - - /* - * Function to calulate unloaded access latency - */ - Tick accessLatency() const override { return (tREAD + tSEND); } - - /** - * Check if the write response queue has reached defined threshold - * - * @param Return true if full - */ - bool - writeRespQueueFull() const - { - return writeRespQueue.size() == maxPendingWrites; - } - - bool - readsWaitingToIssue() const - { - return ((numReadsToIssue != 0) && - (numPendingReads < maxPendingReads)); - } - - /** - * Actually do the burst and update stats. - * - * @param pkt The packet created from the outside world pkt - * @param next_burst_at Minimum bus timing requirement from controller - * @return pair, tick when current burst is issued and - * tick when next burst can issue - */ - std::pair - doBurstAccess(DRAMPacket* pkt, Tick next_burst_at); - - NVMInterface(const NVMInterfaceParams* _p); -}; - -/** - * The DRAM controller is a single-channel memory controller capturing - * the most important timing constraints associated with a - * contemporary DRAM. For multi-channel memory systems, the controller - * is combined with a crossbar model, with the channel address - * interleaving taking part in the crossbar. - * - * As a basic design principle, this controller - * model is not cycle callable, but instead uses events to: 1) decide - * when new decisions can be made, 2) when resources become available, - * 3) when things are to be considered done, and 4) when to send - * things back. Through these simple principles, the model delivers - * high performance, and lots of flexibility, allowing users to - * evaluate the system impact of a wide range of memory technologies, - * such as DDR3/4, LPDDR2/3/4, WideIO1/2, HBM and HMC. - * - * For more details, please see Hansson et al, "Simulating DRAM - * controllers for future system architecture exploration", - * Proc. ISPASS, 2014. If you use this model as part of your research - * please cite the paper. - * - * The low-power functionality implements a staggered powerdown - * similar to that described in "Optimized Active and Power-Down Mode - * Refresh Control in 3D-DRAMs" by Jung et al, VLSI-SoC, 2014. - */ -class DRAMCtrl : public QoS::MemCtrl -{ - private: - - // For now, make use of a queued slave port to avoid dealing with - // flow control for the responses being sent back - class MemoryPort : public QueuedSlavePort - { - - RespPacketQueue queue; - DRAMCtrl& ctrl; - - public: - - MemoryPort(const std::string& name, DRAMCtrl& _ctrl); - - protected: - - Tick recvAtomic(PacketPtr pkt); - - void recvFunctional(PacketPtr pkt); - - bool recvTimingReq(PacketPtr); - - virtual AddrRangeList getAddrRanges() const; - - }; - - /** - * Our incoming port, for a multi-ported controller add a crossbar - * in front of it - */ - MemoryPort port; - - /** - * Remember if the memory system is in timing mode - */ - bool isTimingMode; - - /** - * Remember if we have to retry a request when available. - */ - bool retryRdReq; - bool retryWrReq; - - /** - * Bunch of things requires to setup "events" in gem5 - * When event "respondEvent" occurs for example, the method - * processRespondEvent is called; no parameters are allowed - * in these methods - */ - void processNextReqEvent(); - EventFunctionWrapper nextReqEvent; - - void processRespondEvent(); - EventFunctionWrapper respondEvent; - - /** - * Check if the read queue has room for more entries - * - * @param pkt_count The number of entries needed in the read queue - * @return true if read queue is full, false otherwise - */ - bool readQueueFull(unsigned int pkt_count) const; - - /** - * Check if the write queue has room for more entries - * - * @param pkt_count The number of entries needed in the write queue - * @return true if write queue is full, false otherwise - */ - bool writeQueueFull(unsigned int pkt_count) const; - - /** - * When a new read comes in, first check if the write q has a - * pending request to the same address.\ If not, decode the - * address to populate rank/bank/row, create one or mutliple - * "dram_pkt", and push them to the back of the read queue.\ - * If this is the only - * read request in the system, schedule an event to start - * servicing it. - * - * @param pkt The request packet from the outside world - * @param pkt_count The number of DRAM bursts the pkt - * @param is_dram Does this packet access DRAM? - * translate to. If pkt size is larger then one full burst, - * then pkt_count is greater than one. - */ - void addToReadQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram); - - /** - * Decode the incoming pkt, create a dram_pkt and push to the - * back of the write queue. \If the write q length is more than - * the threshold specified by the user, ie the queue is beginning - * to get full, stop reads, and start draining writes. - * - * @param pkt The request packet from the outside world - * @param pkt_count The number of DRAM bursts the pkt - * @param is_dram Does this packet access DRAM? - * translate to. If pkt size is larger then one full burst, - * then pkt_count is greater than one. - */ - void addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram); - - /** - * Actually do the burst based on media specific access function. - * Update bus statistics when complete. - * - * @param pkt The DRAM packet created from the outside world pkt - */ - void doBurstAccess(DRAMPacket* dram_pkt); - - /** - * When a packet reaches its "readyTime" in the response Q, - * use the "access()" method in AbstractMemory to actually - * create the response packet, and send it back to the outside - * world requestor. - * - * @param pkt The packet from the outside world - * @param static_latency Static latency to add before sending the packet - */ - void accessAndRespond(PacketPtr pkt, Tick static_latency); - - /** - * Determine if there is a packet that can issue. - * - * @param pkt The packet to evaluate - */ - bool - packetReady(DRAMPacket* pkt) - { - return (pkt->isDram() ? - dram->burstReady(pkt) : nvm->burstReady(pkt)); - } - - /** - * Calculate the minimum delay used when scheduling a read-to-write - * transision. - * @param return minimum delay - */ - Tick - minReadToWriteDataGap() - { - Tick dram_min = dram ? dram->minReadToWriteDataGap() : MaxTick; - Tick nvm_min = nvm ? nvm->minReadToWriteDataGap() : MaxTick; - return std::min(dram_min, nvm_min); - } - - /** - * Calculate the minimum delay used when scheduling a write-to-read - * transision. - * @param return minimum delay - */ - Tick - minWriteToReadDataGap() - { - Tick dram_min = dram ? dram->minWriteToReadDataGap() : MaxTick; - Tick nvm_min = nvm ? nvm->minWriteToReadDataGap() : MaxTick; - return std::min(dram_min, nvm_min); - } - - /** - * The memory schduler/arbiter - picks which request needs to - * go next, based on the specified policy such as FCFS or FR-FCFS - * and moves it to the head of the queue. - * Prioritizes accesses to the same rank as previous burst unless - * controller is switching command type. - * - * @param queue Queued requests to consider - * @param extra_col_delay Any extra delay due to a read/write switch - * @return an iterator to the selected packet, else queue.end() - */ - DRAMPacketQueue::iterator chooseNext(DRAMPacketQueue& queue, - Tick extra_col_delay); - - /** - * For FR-FCFS policy reorder the read/write queue depending on row buffer - * hits and earliest bursts available in DRAM - * - * @param queue Queued requests to consider - * @param extra_col_delay Any extra delay due to a read/write switch - * @return an iterator to the selected packet, else queue.end() - */ - DRAMPacketQueue::iterator chooseNextFRFCFS(DRAMPacketQueue& queue, - Tick extra_col_delay); - - /** - * Calculate burst window aligned tick - * - * @param cmd_tick Initial tick of command - * @return burst window aligned tick - */ - Tick getBurstWindow(Tick cmd_tick); - - /** - * Used for debugging to observe the contents of the queues. - */ - void printQs() const; - - /** - * Burst-align an address. - * - * @param addr The potentially unaligned address - * @param is_dram Does this packet access DRAM? - * - * @return An address aligned to a memory burst - */ - Addr - burstAlign(Addr addr, bool is_dram) const - { - if (is_dram) - return (addr & ~(Addr(dram->bytesPerBurst() - 1))); - else - return (addr & ~(Addr(nvm->bytesPerBurst() - 1))); - } - - /** - * The controller's main read and write queues, with support for QoS reordering - */ - std::vector readQueue; - std::vector writeQueue; - - /** - * To avoid iterating over the write queue to check for - * overlapping transactions, maintain a set of burst addresses - * that are currently queued. Since we merge writes to the same - * location we never have more than one address to the same burst - * address. - */ - std::unordered_set isInWriteQueue; - - /** - * Response queue where read packets wait after we're done working - * with them, but it's not time to send the response yet. The - * responses are stored separately mostly to keep the code clean - * and help with events scheduling. For all logical purposes such - * as sizing the read queue, this and the main read queue need to - * be added together. - */ - std::deque respQueue; - - /** - * Holds count of commands issued in burst window starting at - * defined Tick. This is used to ensure that the command bandwidth - * does not exceed the allowable media constraints. - */ - std::unordered_multiset burstTicks; - - /** - * Create pointer to interface of the actual dram media when connected - */ - DRAMInterface* const dram; - - /** - * Create pointer to interface of the actual nvm media when connected - */ - NVMInterface* const nvm; - - /** - * The following are basic design parameters of the memory - * controller, and are initialized based on parameter values. - * The rowsPerBank is determined based on the capacity, number of - * ranks and banks, the burst size, and the row buffer size. - */ - const uint32_t readBufferSize; - const uint32_t writeBufferSize; - const uint32_t writeHighThreshold; - const uint32_t writeLowThreshold; - const uint32_t minWritesPerSwitch; - uint32_t writesThisTime; - uint32_t readsThisTime; - - /** - * Memory controller configuration initialized based on parameter - * values. - */ - Enums::MemSched memSchedPolicy; - - /** - * Pipeline latency of the controller frontend. The frontend - * contribution is added to writes (that complete when they are in - * the write buffer) and reads that are serviced the write buffer. - */ - const Tick frontendLatency; - - /** - * Pipeline latency of the backend and PHY. Along with the - * frontend contribution, this latency is added to reads serviced - * by the DRAM. - */ - const Tick backendLatency; - - /** - * Length of a command window, used to check - * command bandwidth - */ - const Tick commandWindow; - - /** - * Till when must we wait before issuing next RD/WR burst? - */ - Tick nextBurstAt; - - Tick prevArrival; - - /** - * The soonest you have to start thinking about the next request - * is the longest access time that can occur before - * nextBurstAt. Assuming you need to precharge, open a new row, - * and access, it is tRP + tRCD + tCL. - */ - Tick nextReqTime; - - struct CtrlStats : public Stats::Group - { - CtrlStats(DRAMCtrl &ctrl); - - void regStats() override; - - DRAMCtrl &ctrl; - - // All statistics that the model needs to capture - Stats::Scalar readReqs; - Stats::Scalar writeReqs; - Stats::Scalar readBursts; - Stats::Scalar writeBursts; - Stats::Scalar servicedByWrQ; - Stats::Scalar mergedWrBursts; - Stats::Scalar neitherReadNorWriteReqs; - // Average queue lengths - Stats::Average avgRdQLen; - Stats::Average avgWrQLen; - - Stats::Scalar numRdRetry; - Stats::Scalar numWrRetry; - Stats::Vector readPktSize; - Stats::Vector writePktSize; - Stats::Vector rdQLenPdf; - Stats::Vector wrQLenPdf; - Stats::Histogram rdPerTurnAround; - Stats::Histogram wrPerTurnAround; - - Stats::Scalar bytesReadWrQ; - Stats::Scalar bytesReadSys; - Stats::Scalar bytesWrittenSys; - // Average bandwidth - Stats::Formula avgRdBWSys; - Stats::Formula avgWrBWSys; - - Stats::Scalar totGap; - Stats::Formula avgGap; - - // per-master bytes read and written to memory - Stats::Vector masterReadBytes; - Stats::Vector masterWriteBytes; - - // per-master bytes read and written to memory rate - Stats::Formula masterReadRate; - Stats::Formula masterWriteRate; - - // per-master read and write serviced memory accesses - Stats::Vector masterReadAccesses; - Stats::Vector masterWriteAccesses; - - // per-master read and write total memory access latency - Stats::Vector masterReadTotalLat; - Stats::Vector masterWriteTotalLat; - - // per-master raed and write average memory access latency - Stats::Formula masterReadAvgLat; - Stats::Formula masterWriteAvgLat; - }; - - CtrlStats stats; - - /** - * Upstream caches need this packet until true is returned, so - * hold it for deletion until a subsequent call - */ - std::unique_ptr pendingDelete; - - /** - * Select either the read or write queue - * - * @param is_read The current burst is a read, select read queue - * @return a reference to the appropriate queue - */ - std::vector& - selQueue(bool is_read) - { - return (is_read ? readQueue : writeQueue); - }; - - /** - * Remove commands that have already issued from burstTicks - */ - void pruneBurstTick(); - - public: - - DRAMCtrl(const DRAMCtrlParams* p); - - /** - * Ensure that all interfaced have drained commands - * - * @return bool flag, set once drain complete - */ - bool allIntfDrained() const; - - DrainState drain() override; - - /** - * Check for command bus contention for single cycle command. - * If there is contention, shift command to next burst. - * Check verifies that the commands issued per burst is less - * than a defined max number, maxCommandsPerWindow. - * Therefore, contention per cycle is not verified and instead - * is done based on a burst window. - * - * @param cmd_tick Initial tick of command, to be verified - * @param max_cmds_per_burst Number of commands that can issue - * in a burst window - * @return tick for command issue without contention - */ - Tick verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst); - - /** - * Check for command bus contention for multi-cycle (2 currently) - * command. If there is contention, shift command(s) to next burst. - * Check verifies that the commands issued per burst is less - * than a defined max number, maxCommandsPerWindow. - * Therefore, contention per cycle is not verified and instead - * is done based on a burst window. - * - * @param cmd_tick Initial tick of command, to be verified - * @param max_multi_cmd_split Maximum delay between commands - * @param max_cmds_per_burst Number of commands that can issue - * in a burst window - * @return tick for command issue without contention - */ - Tick verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst, - Tick max_multi_cmd_split = 0); - - /** - * Is there a respondEvent scheduled? - * - * @return true if event is scheduled - */ - bool respondEventScheduled() const { return respondEvent.scheduled(); } - - /** - * Is there a read/write burst Event scheduled? - * - * @return true if event is scheduled - */ - bool requestEventScheduled() const { return nextReqEvent.scheduled(); } - - /** - * restart the controller - * This can be used by interfaces to restart the - * scheduler after maintainence commands complete - * - * @param Tick to schedule next event - */ - void restartScheduler(Tick tick) { schedule(nextReqEvent, tick); } - - /** - * Check the current direction of the memory channel - * - * @param next_state Check either the current or next bus state - * @return True when bus is currently in a read state - */ - bool inReadBusState(bool next_state) const; - - /** - * Check the current direction of the memory channel - * - * @param next_state Check either the current or next bus state - * @return True when bus is currently in a write state - */ - bool inWriteBusState(bool next_state) const; - - Port &getPort(const std::string &if_name, - PortID idx=InvalidPortID) override; - - virtual void init() override; - virtual void startup() override; - virtual void drainResume() override; - - protected: - - Tick recvAtomic(PacketPtr pkt); - void recvFunctional(PacketPtr pkt); - bool recvTimingReq(PacketPtr pkt); - -}; - -#endif //__MEM_DRAM_CTRL_HH__ diff --git a/src/mem/mem_ctrl.cc b/src/mem/mem_ctrl.cc new file mode 100644 index 000000000..66d3c2ab9 --- /dev/null +++ b/src/mem/mem_ctrl.cc @@ -0,0 +1,1475 @@ +/* + * Copyright (c) 2010-2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2013 Amin Farmahini-Farahani + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mem/mem_ctrl.hh" + +#include "base/trace.hh" +#include "debug/DRAM.hh" +#include "debug/Drain.hh" +#include "debug/MemCtrl.hh" +#include "debug/NVM.hh" +#include "debug/QOS.hh" +#include "mem/mem_interface.hh" +#include "sim/system.hh" + +using namespace std; + +MemCtrl::MemCtrl(const MemCtrlParams* p) : + QoS::MemCtrl(p), + port(name() + ".port", *this), isTimingMode(false), + retryRdReq(false), retryWrReq(false), + nextReqEvent([this]{ processNextReqEvent(); }, name()), + respondEvent([this]{ processRespondEvent(); }, name()), + dram(p->dram), nvm(p->nvm), + readBufferSize((dram ? dram->readBufferSize : 0) + + (nvm ? nvm->readBufferSize : 0)), + writeBufferSize((dram ? dram->writeBufferSize : 0) + + (nvm ? nvm->writeBufferSize : 0)), + writeHighThreshold(writeBufferSize * p->write_high_thresh_perc / 100.0), + writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0), + minWritesPerSwitch(p->min_writes_per_switch), + writesThisTime(0), readsThisTime(0), + memSchedPolicy(p->mem_sched_policy), + frontendLatency(p->static_frontend_latency), + backendLatency(p->static_backend_latency), + commandWindow(p->command_window), + nextBurstAt(0), prevArrival(0), + nextReqTime(0), + stats(*this) +{ + DPRINTF(MemCtrl, "Setting up controller\n"); + readQueue.resize(p->qos_priorities); + writeQueue.resize(p->qos_priorities); + + // Hook up interfaces to the controller + if (dram) + dram->setCtrl(this, commandWindow); + if (nvm) + nvm->setCtrl(this, commandWindow); + + fatal_if(!dram && !nvm, "Memory controller must have an interface"); + + // perform a basic check of the write thresholds + if (p->write_low_thresh_perc >= p->write_high_thresh_perc) + fatal("Write buffer low threshold %d must be smaller than the " + "high threshold %d\n", p->write_low_thresh_perc, + p->write_high_thresh_perc); +} + +void +MemCtrl::init() +{ + if (!port.isConnected()) { + fatal("MemCtrl %s is unconnected!\n", name()); + } else { + port.sendRangeChange(); + } +} + +void +MemCtrl::startup() +{ + // remember the memory system mode of operation + isTimingMode = system()->isTimingMode(); + + if (isTimingMode) { + // shift the bus busy time sufficiently far ahead that we never + // have to worry about negative values when computing the time for + // the next request, this will add an insignificant bubble at the + // start of simulation + nextBurstAt = curTick() + (dram ? dram->commandOffset() : + nvm->commandOffset()); + } +} + +Tick +MemCtrl::recvAtomic(PacketPtr pkt) +{ + DPRINTF(MemCtrl, "recvAtomic: %s 0x%x\n", + pkt->cmdString(), pkt->getAddr()); + + panic_if(pkt->cacheResponding(), "Should not see packets where cache " + "is responding"); + + Tick latency = 0; + // do the actual memory access and turn the packet into a response + if (dram && dram->getAddrRange().contains(pkt->getAddr())) { + dram->access(pkt); + + if (pkt->hasData()) { + // this value is not supposed to be accurate, just enough to + // keep things going, mimic a closed page + latency = dram->accessLatency(); + } + } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) { + nvm->access(pkt); + + if (pkt->hasData()) { + // this value is not supposed to be accurate, just enough to + // keep things going, mimic a closed page + latency = nvm->accessLatency(); + } + } else { + panic("Can't handle address range for packet %s\n", + pkt->print()); + } + + return latency; +} + +bool +MemCtrl::readQueueFull(unsigned int neededEntries) const +{ + DPRINTF(MemCtrl, + "Read queue limit %d, current size %d, entries needed %d\n", + readBufferSize, totalReadQueueSize + respQueue.size(), + neededEntries); + + auto rdsize_new = totalReadQueueSize + respQueue.size() + neededEntries; + return rdsize_new > readBufferSize; +} + +bool +MemCtrl::writeQueueFull(unsigned int neededEntries) const +{ + DPRINTF(MemCtrl, + "Write queue limit %d, current size %d, entries needed %d\n", + writeBufferSize, totalWriteQueueSize, neededEntries); + + auto wrsize_new = (totalWriteQueueSize + neededEntries); + return wrsize_new > writeBufferSize; +} + +void +MemCtrl::addToReadQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram) +{ + // only add to the read queue here. whenever the request is + // eventually done, set the readyTime, and call schedule() + assert(!pkt->isWrite()); + + assert(pkt_count != 0); + + // if the request size is larger than burst size, the pkt is split into + // multiple packets + // Note if the pkt starting address is not aligened to burst size, the + // address of first packet is kept unaliged. Subsequent packets + // are aligned to burst size boundaries. This is to ensure we accurately + // check read packets against packets in write queue. + const Addr base_addr = pkt->getAddr(); + Addr addr = base_addr; + unsigned pktsServicedByWrQ = 0; + BurstHelper* burst_helper = NULL; + + uint32_t burst_size = is_dram ? dram->bytesPerBurst() : + nvm->bytesPerBurst(); + for (int cnt = 0; cnt < pkt_count; ++cnt) { + unsigned size = std::min((addr | (burst_size - 1)) + 1, + base_addr + pkt->getSize()) - addr; + stats.readPktSize[ceilLog2(size)]++; + stats.readBursts++; + stats.masterReadAccesses[pkt->masterId()]++; + + // First check write buffer to see if the data is already at + // the controller + bool foundInWrQ = false; + Addr burst_addr = burstAlign(addr, is_dram); + // if the burst address is not present then there is no need + // looking any further + if (isInWriteQueue.find(burst_addr) != isInWriteQueue.end()) { + for (const auto& vec : writeQueue) { + for (const auto& p : vec) { + // check if the read is subsumed in the write queue + // packet we are looking at + if (p->addr <= addr && + ((addr + size) <= (p->addr + p->size))) { + + foundInWrQ = true; + stats.servicedByWrQ++; + pktsServicedByWrQ++; + DPRINTF(MemCtrl, + "Read to addr %lld with size %d serviced by " + "write queue\n", + addr, size); + stats.bytesReadWrQ += burst_size; + break; + } + } + } + } + + // If not found in the write q, make a memory packet and + // push it onto the read queue + if (!foundInWrQ) { + + // Make the burst helper for split packets + if (pkt_count > 1 && burst_helper == NULL) { + DPRINTF(MemCtrl, "Read to addr %lld translates to %d " + "memory requests\n", pkt->getAddr(), pkt_count); + burst_helper = new BurstHelper(pkt_count); + } + + MemPacket* mem_pkt; + if (is_dram) { + mem_pkt = dram->decodePacket(pkt, addr, size, true, true); + // increment read entries of the rank + dram->setupRank(mem_pkt->rank, true); + } else { + mem_pkt = nvm->decodePacket(pkt, addr, size, true, false); + // Increment count to trigger issue of non-deterministic read + nvm->setupRank(mem_pkt->rank, true); + // Default readyTime to Max; will be reset once read is issued + mem_pkt->readyTime = MaxTick; + } + mem_pkt->burstHelper = burst_helper; + + assert(!readQueueFull(1)); + stats.rdQLenPdf[totalReadQueueSize + respQueue.size()]++; + + DPRINTF(MemCtrl, "Adding to read queue\n"); + + readQueue[mem_pkt->qosValue()].push_back(mem_pkt); + + // log packet + logRequest(MemCtrl::READ, pkt->masterId(), pkt->qosValue(), + mem_pkt->addr, 1); + + // Update stats + stats.avgRdQLen = totalReadQueueSize + respQueue.size(); + } + + // Starting address of next memory pkt (aligned to burst boundary) + addr = (addr | (burst_size - 1)) + 1; + } + + // If all packets are serviced by write queue, we send the repsonse back + if (pktsServicedByWrQ == pkt_count) { + accessAndRespond(pkt, frontendLatency); + return; + } + + // Update how many split packets are serviced by write queue + if (burst_helper != NULL) + burst_helper->burstsServiced = pktsServicedByWrQ; + + // If we are not already scheduled to get a request out of the + // queue, do so now + if (!nextReqEvent.scheduled()) { + DPRINTF(MemCtrl, "Request scheduled immediately\n"); + schedule(nextReqEvent, curTick()); + } +} + +void +MemCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram) +{ + // only add to the write queue here. whenever the request is + // eventually done, set the readyTime, and call schedule() + assert(pkt->isWrite()); + + // if the request size is larger than burst size, the pkt is split into + // multiple packets + const Addr base_addr = pkt->getAddr(); + Addr addr = base_addr; + uint32_t burst_size = is_dram ? dram->bytesPerBurst() : + nvm->bytesPerBurst(); + for (int cnt = 0; cnt < pkt_count; ++cnt) { + unsigned size = std::min((addr | (burst_size - 1)) + 1, + base_addr + pkt->getSize()) - addr; + stats.writePktSize[ceilLog2(size)]++; + stats.writeBursts++; + stats.masterWriteAccesses[pkt->masterId()]++; + + // see if we can merge with an existing item in the write + // queue and keep track of whether we have merged or not + bool merged = isInWriteQueue.find(burstAlign(addr, is_dram)) != + isInWriteQueue.end(); + + // if the item was not merged we need to create a new write + // and enqueue it + if (!merged) { + MemPacket* mem_pkt; + if (is_dram) { + mem_pkt = dram->decodePacket(pkt, addr, size, false, true); + dram->setupRank(mem_pkt->rank, false); + } else { + mem_pkt = nvm->decodePacket(pkt, addr, size, false, false); + nvm->setupRank(mem_pkt->rank, false); + } + assert(totalWriteQueueSize < writeBufferSize); + stats.wrQLenPdf[totalWriteQueueSize]++; + + DPRINTF(MemCtrl, "Adding to write queue\n"); + + writeQueue[mem_pkt->qosValue()].push_back(mem_pkt); + isInWriteQueue.insert(burstAlign(addr, is_dram)); + + // log packet + logRequest(MemCtrl::WRITE, pkt->masterId(), pkt->qosValue(), + mem_pkt->addr, 1); + + assert(totalWriteQueueSize == isInWriteQueue.size()); + + // Update stats + stats.avgWrQLen = totalWriteQueueSize; + + } else { + DPRINTF(MemCtrl, + "Merging write burst with existing queue entry\n"); + + // keep track of the fact that this burst effectively + // disappeared as it was merged with an existing one + stats.mergedWrBursts++; + } + + // Starting address of next memory pkt (aligned to burst_size boundary) + addr = (addr | (burst_size - 1)) + 1; + } + + // we do not wait for the writes to be send to the actual memory, + // but instead take responsibility for the consistency here and + // snoop the write queue for any upcoming reads + // @todo, if a pkt size is larger than burst size, we might need a + // different front end latency + accessAndRespond(pkt, frontendLatency); + + // If we are not already scheduled to get a request out of the + // queue, do so now + if (!nextReqEvent.scheduled()) { + DPRINTF(MemCtrl, "Request scheduled immediately\n"); + schedule(nextReqEvent, curTick()); + } +} + +void +MemCtrl::printQs() const +{ +#if TRACING_ON + DPRINTF(MemCtrl, "===READ QUEUE===\n\n"); + for (const auto& queue : readQueue) { + for (const auto& packet : queue) { + DPRINTF(MemCtrl, "Read %lu\n", packet->addr); + } + } + + DPRINTF(MemCtrl, "\n===RESP QUEUE===\n\n"); + for (const auto& packet : respQueue) { + DPRINTF(MemCtrl, "Response %lu\n", packet->addr); + } + + DPRINTF(MemCtrl, "\n===WRITE QUEUE===\n\n"); + for (const auto& queue : writeQueue) { + for (const auto& packet : queue) { + DPRINTF(MemCtrl, "Write %lu\n", packet->addr); + } + } +#endif // TRACING_ON +} + +bool +MemCtrl::recvTimingReq(PacketPtr pkt) +{ + // This is where we enter from the outside world + DPRINTF(MemCtrl, "recvTimingReq: request %s addr %lld size %d\n", + pkt->cmdString(), pkt->getAddr(), pkt->getSize()); + + panic_if(pkt->cacheResponding(), "Should not see packets where cache " + "is responding"); + + panic_if(!(pkt->isRead() || pkt->isWrite()), + "Should only see read and writes at memory controller\n"); + + // Calc avg gap between requests + if (prevArrival != 0) { + stats.totGap += curTick() - prevArrival; + } + prevArrival = curTick(); + + // What type of media does this packet access? + bool is_dram; + if (dram && dram->getAddrRange().contains(pkt->getAddr())) { + is_dram = true; + } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) { + is_dram = false; + } else { + panic("Can't handle address range for packet %s\n", + pkt->print()); + } + + + // Find out how many memory packets a pkt translates to + // If the burst size is equal or larger than the pkt size, then a pkt + // translates to only one memory packet. Otherwise, a pkt translates to + // multiple memory packets + unsigned size = pkt->getSize(); + uint32_t burst_size = is_dram ? dram->bytesPerBurst() : + nvm->bytesPerBurst(); + unsigned offset = pkt->getAddr() & (burst_size - 1); + unsigned int pkt_count = divCeil(offset + size, burst_size); + + // run the QoS scheduler and assign a QoS priority value to the packet + qosSchedule( { &readQueue, &writeQueue }, burst_size, pkt); + + // check local buffers and do not accept if full + if (pkt->isWrite()) { + assert(size != 0); + if (writeQueueFull(pkt_count)) { + DPRINTF(MemCtrl, "Write queue full, not accepting\n"); + // remember that we have to retry this port + retryWrReq = true; + stats.numWrRetry++; + return false; + } else { + addToWriteQueue(pkt, pkt_count, is_dram); + stats.writeReqs++; + stats.bytesWrittenSys += size; + } + } else { + assert(pkt->isRead()); + assert(size != 0); + if (readQueueFull(pkt_count)) { + DPRINTF(MemCtrl, "Read queue full, not accepting\n"); + // remember that we have to retry this port + retryRdReq = true; + stats.numRdRetry++; + return false; + } else { + addToReadQueue(pkt, pkt_count, is_dram); + stats.readReqs++; + stats.bytesReadSys += size; + } + } + + return true; +} + +void +MemCtrl::processRespondEvent() +{ + DPRINTF(MemCtrl, + "processRespondEvent(): Some req has reached its readyTime\n"); + + MemPacket* mem_pkt = respQueue.front(); + + if (mem_pkt->isDram()) { + // media specific checks and functions when read response is complete + dram->respondEvent(mem_pkt->rank); + } + + if (mem_pkt->burstHelper) { + // it is a split packet + mem_pkt->burstHelper->burstsServiced++; + if (mem_pkt->burstHelper->burstsServiced == + mem_pkt->burstHelper->burstCount) { + // we have now serviced all children packets of a system packet + // so we can now respond to the requester + // @todo we probably want to have a different front end and back + // end latency for split packets + accessAndRespond(mem_pkt->pkt, frontendLatency + backendLatency); + delete mem_pkt->burstHelper; + mem_pkt->burstHelper = NULL; + } + } else { + // it is not a split packet + accessAndRespond(mem_pkt->pkt, frontendLatency + backendLatency); + } + + delete respQueue.front(); + respQueue.pop_front(); + + if (!respQueue.empty()) { + assert(respQueue.front()->readyTime >= curTick()); + assert(!respondEvent.scheduled()); + schedule(respondEvent, respQueue.front()->readyTime); + } else { + // if there is nothing left in any queue, signal a drain + if (drainState() == DrainState::Draining && + !totalWriteQueueSize && !totalReadQueueSize && + allIntfDrained()) { + + DPRINTF(Drain, "Controller done draining\n"); + signalDrainDone(); + } else if (mem_pkt->isDram()) { + // check the refresh state and kick the refresh event loop + // into action again if banks already closed and just waiting + // for read to complete + dram->checkRefreshState(mem_pkt->rank); + } + } + + // We have made a location in the queue available at this point, + // so if there is a read that was forced to wait, retry now + if (retryRdReq) { + retryRdReq = false; + port.sendRetryReq(); + } +} + +MemPacketQueue::iterator +MemCtrl::chooseNext(MemPacketQueue& queue, Tick extra_col_delay) +{ + // This method does the arbitration between requests. + + MemPacketQueue::iterator ret = queue.end(); + + if (!queue.empty()) { + if (queue.size() == 1) { + // available rank corresponds to state refresh idle + MemPacket* mem_pkt = *(queue.begin()); + if (packetReady(mem_pkt)) { + ret = queue.begin(); + DPRINTF(MemCtrl, "Single request, going to a free rank\n"); + } else { + DPRINTF(MemCtrl, "Single request, going to a busy rank\n"); + } + } else if (memSchedPolicy == Enums::fcfs) { + // check if there is a packet going to a free rank + for (auto i = queue.begin(); i != queue.end(); ++i) { + MemPacket* mem_pkt = *i; + if (packetReady(mem_pkt)) { + ret = i; + break; + } + } + } else if (memSchedPolicy == Enums::frfcfs) { + ret = chooseNextFRFCFS(queue, extra_col_delay); + } else { + panic("No scheduling policy chosen\n"); + } + } + return ret; +} + +MemPacketQueue::iterator +MemCtrl::chooseNextFRFCFS(MemPacketQueue& queue, Tick extra_col_delay) +{ + auto selected_pkt_it = queue.end(); + Tick col_allowed_at = MaxTick; + + // time we need to issue a column command to be seamless + const Tick min_col_at = std::max(nextBurstAt + extra_col_delay, curTick()); + + // find optimal packet for each interface + if (dram && nvm) { + // create 2nd set of parameters for NVM + auto nvm_pkt_it = queue.end(); + Tick nvm_col_at = MaxTick; + + // Select packet by default to give priority if both + // can issue at the same time or seamlessly + std::tie(selected_pkt_it, col_allowed_at) = + dram->chooseNextFRFCFS(queue, min_col_at); + std::tie(nvm_pkt_it, nvm_col_at) = + nvm->chooseNextFRFCFS(queue, min_col_at); + + // Compare DRAM and NVM and select NVM if it can issue + // earlier than the DRAM packet + if (col_allowed_at > nvm_col_at) { + selected_pkt_it = nvm_pkt_it; + } + } else if (dram) { + std::tie(selected_pkt_it, col_allowed_at) = + dram->chooseNextFRFCFS(queue, min_col_at); + } else if (nvm) { + std::tie(selected_pkt_it, col_allowed_at) = + nvm->chooseNextFRFCFS(queue, min_col_at); + } + + if (selected_pkt_it == queue.end()) { + DPRINTF(MemCtrl, "%s no available packets found\n", __func__); + } + + return selected_pkt_it; +} + +void +MemCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency) +{ + DPRINTF(MemCtrl, "Responding to Address %lld.. \n",pkt->getAddr()); + + bool needsResponse = pkt->needsResponse(); + // do the actual memory access which also turns the packet into a + // response + if (dram && dram->getAddrRange().contains(pkt->getAddr())) { + dram->access(pkt); + } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) { + nvm->access(pkt); + } else { + panic("Can't handle address range for packet %s\n", + pkt->print()); + } + + // turn packet around to go back to requester if response expected + if (needsResponse) { + // access already turned the packet into a response + assert(pkt->isResponse()); + // response_time consumes the static latency and is charged also + // with headerDelay that takes into account the delay provided by + // the xbar and also the payloadDelay that takes into account the + // number of data beats. + Tick response_time = curTick() + static_latency + pkt->headerDelay + + pkt->payloadDelay; + // Here we reset the timing of the packet before sending it out. + pkt->headerDelay = pkt->payloadDelay = 0; + + // queue the packet in the response queue to be sent out after + // the static latency has passed + port.schedTimingResp(pkt, response_time); + } else { + // @todo the packet is going to be deleted, and the MemPacket + // is still having a pointer to it + pendingDelete.reset(pkt); + } + + DPRINTF(MemCtrl, "Done\n"); + + return; +} + +void +MemCtrl::pruneBurstTick() +{ + auto it = burstTicks.begin(); + while (it != burstTicks.end()) { + auto current_it = it++; + if (curTick() > *current_it) { + DPRINTF(MemCtrl, "Removing burstTick for %d\n", *current_it); + burstTicks.erase(current_it); + } + } +} + +Tick +MemCtrl::getBurstWindow(Tick cmd_tick) +{ + // get tick aligned to burst window + Tick burst_offset = cmd_tick % commandWindow; + return (cmd_tick - burst_offset); +} + +Tick +MemCtrl::verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst) +{ + // start with assumption that there is no contention on command bus + Tick cmd_at = cmd_tick; + + // get tick aligned to burst window + Tick burst_tick = getBurstWindow(cmd_tick); + + // verify that we have command bandwidth to issue the command + // if not, iterate over next window(s) until slot found + while (burstTicks.count(burst_tick) >= max_cmds_per_burst) { + DPRINTF(MemCtrl, "Contention found on command bus at %d\n", + burst_tick); + burst_tick += commandWindow; + cmd_at = burst_tick; + } + + // add command into burst window and return corresponding Tick + burstTicks.insert(burst_tick); + return cmd_at; +} + +Tick +MemCtrl::verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst, + Tick max_multi_cmd_split) +{ + // start with assumption that there is no contention on command bus + Tick cmd_at = cmd_tick; + + // get tick aligned to burst window + Tick burst_tick = getBurstWindow(cmd_tick); + + // Command timing requirements are from 2nd command + // Start with assumption that 2nd command will issue at cmd_at and + // find prior slot for 1st command to issue + // Given a maximum latency of max_multi_cmd_split between the commands, + // find the burst at the maximum latency prior to cmd_at + Tick burst_offset = 0; + Tick first_cmd_offset = cmd_tick % commandWindow; + while (max_multi_cmd_split > (first_cmd_offset + burst_offset)) { + burst_offset += commandWindow; + } + // get the earliest burst aligned address for first command + // ensure that the time does not go negative + Tick first_cmd_tick = burst_tick - std::min(burst_offset, burst_tick); + + // Can required commands issue? + bool first_can_issue = false; + bool second_can_issue = false; + // verify that we have command bandwidth to issue the command(s) + while (!first_can_issue || !second_can_issue) { + bool same_burst = (burst_tick == first_cmd_tick); + auto first_cmd_count = burstTicks.count(first_cmd_tick); + auto second_cmd_count = same_burst ? first_cmd_count + 1 : + burstTicks.count(burst_tick); + + first_can_issue = first_cmd_count < max_cmds_per_burst; + second_can_issue = second_cmd_count < max_cmds_per_burst; + + if (!second_can_issue) { + DPRINTF(MemCtrl, "Contention (cmd2) found on command bus at %d\n", + burst_tick); + burst_tick += commandWindow; + cmd_at = burst_tick; + } + + // Verify max_multi_cmd_split isn't violated when command 2 is shifted + // If commands initially were issued in same burst, they are + // now in consecutive bursts and can still issue B2B + bool gap_violated = !same_burst && + ((burst_tick - first_cmd_tick) > max_multi_cmd_split); + + if (!first_can_issue || (!second_can_issue && gap_violated)) { + DPRINTF(MemCtrl, "Contention (cmd1) found on command bus at %d\n", + first_cmd_tick); + first_cmd_tick += commandWindow; + } + } + + // Add command to burstTicks + burstTicks.insert(burst_tick); + burstTicks.insert(first_cmd_tick); + + return cmd_at; +} + +bool +MemCtrl::inReadBusState(bool next_state) const +{ + // check the bus state + if (next_state) { + // use busStateNext to get the state that will be used + // for the next burst + return (busStateNext == MemCtrl::READ); + } else { + return (busState == MemCtrl::READ); + } +} + +bool +MemCtrl::inWriteBusState(bool next_state) const +{ + // check the bus state + if (next_state) { + // use busStateNext to get the state that will be used + // for the next burst + return (busStateNext == MemCtrl::WRITE); + } else { + return (busState == MemCtrl::WRITE); + } +} + +void +MemCtrl::doBurstAccess(MemPacket* mem_pkt) +{ + // first clean up the burstTick set, removing old entries + // before adding new entries for next burst + pruneBurstTick(); + + // When was command issued? + Tick cmd_at; + + // Issue the next burst and update bus state to reflect + // when previous command was issued + if (mem_pkt->isDram()) { + std::vector& queue = selQueue(mem_pkt->isRead()); + std::tie(cmd_at, nextBurstAt) = + dram->doBurstAccess(mem_pkt, nextBurstAt, queue); + + // Update timing for NVM ranks if NVM is configured on this channel + if (nvm) + nvm->addRankToRankDelay(cmd_at); + + } else { + std::tie(cmd_at, nextBurstAt) = + nvm->doBurstAccess(mem_pkt, nextBurstAt); + + // Update timing for NVM ranks if NVM is configured on this channel + if (dram) + dram->addRankToRankDelay(cmd_at); + + } + + DPRINTF(MemCtrl, "Access to %lld, ready at %lld next burst at %lld.\n", + mem_pkt->addr, mem_pkt->readyTime, nextBurstAt); + + // Update the minimum timing between the requests, this is a + // conservative estimate of when we have to schedule the next + // request to not introduce any unecessary bubbles. In most cases + // we will wake up sooner than we have to. + nextReqTime = nextBurstAt - (dram ? dram->commandOffset() : + nvm->commandOffset()); + + + // Update the common bus stats + if (mem_pkt->isRead()) { + ++readsThisTime; + // Update latency stats + stats.masterReadTotalLat[mem_pkt->masterId()] += + mem_pkt->readyTime - mem_pkt->entryTime; + stats.masterReadBytes[mem_pkt->masterId()] += mem_pkt->size; + } else { + ++writesThisTime; + stats.masterWriteBytes[mem_pkt->masterId()] += mem_pkt->size; + stats.masterWriteTotalLat[mem_pkt->masterId()] += + mem_pkt->readyTime - mem_pkt->entryTime; + } +} + +void +MemCtrl::processNextReqEvent() +{ + // transition is handled by QoS algorithm if enabled + if (turnPolicy) { + // select bus state - only done if QoS algorithms are in use + busStateNext = selectNextBusState(); + } + + // detect bus state change + bool switched_cmd_type = (busState != busStateNext); + // record stats + recordTurnaroundStats(); + + DPRINTF(MemCtrl, "QoS Turnarounds selected state %s %s\n", + (busState==MemCtrl::READ)?"READ":"WRITE", + switched_cmd_type?"[turnaround triggered]":""); + + if (switched_cmd_type) { + if (busState == MemCtrl::READ) { + DPRINTF(MemCtrl, + "Switching to writes after %d reads with %d reads " + "waiting\n", readsThisTime, totalReadQueueSize); + stats.rdPerTurnAround.sample(readsThisTime); + readsThisTime = 0; + } else { + DPRINTF(MemCtrl, + "Switching to reads after %d writes with %d writes " + "waiting\n", writesThisTime, totalWriteQueueSize); + stats.wrPerTurnAround.sample(writesThisTime); + writesThisTime = 0; + } + } + + // updates current state + busState = busStateNext; + + if (nvm) { + for (auto queue = readQueue.rbegin(); + queue != readQueue.rend(); ++queue) { + // select non-deterministic NVM read to issue + // assume that we have the command bandwidth to issue this along + // with additional RD/WR burst with needed bank operations + if (nvm->readsWaitingToIssue()) { + // select non-deterministic NVM read to issue + nvm->chooseRead(*queue); + } + } + } + + // check ranks for refresh/wakeup - uses busStateNext, so done after + // turnaround decisions + // Default to busy status and update based on interface specifics + bool dram_busy = dram ? dram->isBusy() : true; + bool nvm_busy = true; + bool all_writes_nvm = false; + if (nvm) { + all_writes_nvm = nvm->numWritesQueued == totalWriteQueueSize; + bool read_queue_empty = totalReadQueueSize == 0; + nvm_busy = nvm->isBusy(read_queue_empty, all_writes_nvm); + } + // Default state of unused interface is 'true' + // Simply AND the busy signals to determine if system is busy + if (dram_busy && nvm_busy) { + // if all ranks are refreshing wait for them to finish + // and stall this state machine without taking any further + // action, and do not schedule a new nextReqEvent + return; + } + + // when we get here it is either a read or a write + if (busState == READ) { + + // track if we should switch or not + bool switch_to_writes = false; + + if (totalReadQueueSize == 0) { + // In the case there is no read request to go next, + // trigger writes if we have passed the low threshold (or + // if we are draining) + if (!(totalWriteQueueSize == 0) && + (drainState() == DrainState::Draining || + totalWriteQueueSize > writeLowThreshold)) { + + DPRINTF(MemCtrl, + "Switching to writes due to read queue empty\n"); + switch_to_writes = true; + } else { + // check if we are drained + // not done draining until in PWR_IDLE state + // ensuring all banks are closed and + // have exited low power states + if (drainState() == DrainState::Draining && + respQueue.empty() && allIntfDrained()) { + + DPRINTF(Drain, "MemCtrl controller done draining\n"); + signalDrainDone(); + } + + // nothing to do, not even any point in scheduling an + // event for the next request + return; + } + } else { + + bool read_found = false; + MemPacketQueue::iterator to_read; + uint8_t prio = numPriorities(); + + for (auto queue = readQueue.rbegin(); + queue != readQueue.rend(); ++queue) { + + prio--; + + DPRINTF(QOS, + "Checking READ queue [%d] priority [%d elements]\n", + prio, queue->size()); + + // Figure out which read request goes next + // If we are changing command type, incorporate the minimum + // bus turnaround delay which will be rank to rank delay + to_read = chooseNext((*queue), switched_cmd_type ? + minWriteToReadDataGap() : 0); + + if (to_read != queue->end()) { + // candidate read found + read_found = true; + break; + } + } + + // if no read to an available rank is found then return + // at this point. There could be writes to the available ranks + // which are above the required threshold. However, to + // avoid adding more complexity to the code, return and wait + // for a refresh event to kick things into action again. + if (!read_found) { + DPRINTF(MemCtrl, "No Reads Found - exiting\n"); + return; + } + + auto mem_pkt = *to_read; + + doBurstAccess(mem_pkt); + + // sanity check + assert(mem_pkt->size <= (mem_pkt->isDram() ? + dram->bytesPerBurst() : + nvm->bytesPerBurst()) ); + assert(mem_pkt->readyTime >= curTick()); + + // log the response + logResponse(MemCtrl::READ, (*to_read)->masterId(), + mem_pkt->qosValue(), mem_pkt->getAddr(), 1, + mem_pkt->readyTime - mem_pkt->entryTime); + + + // Insert into response queue. It will be sent back to the + // requester at its readyTime + if (respQueue.empty()) { + assert(!respondEvent.scheduled()); + schedule(respondEvent, mem_pkt->readyTime); + } else { + assert(respQueue.back()->readyTime <= mem_pkt->readyTime); + assert(respondEvent.scheduled()); + } + + respQueue.push_back(mem_pkt); + + // we have so many writes that we have to transition + // don't transition if the writeRespQueue is full and + // there are no other writes that can issue + if ((totalWriteQueueSize > writeHighThreshold) && + !(nvm && all_writes_nvm && nvm->writeRespQueueFull())) { + switch_to_writes = true; + } + + // remove the request from the queue + // the iterator is no longer valid . + readQueue[mem_pkt->qosValue()].erase(to_read); + } + + // switching to writes, either because the read queue is empty + // and the writes have passed the low threshold (or we are + // draining), or because the writes hit the hight threshold + if (switch_to_writes) { + // transition to writing + busStateNext = WRITE; + } + } else { + + bool write_found = false; + MemPacketQueue::iterator to_write; + uint8_t prio = numPriorities(); + + for (auto queue = writeQueue.rbegin(); + queue != writeQueue.rend(); ++queue) { + + prio--; + + DPRINTF(QOS, + "Checking WRITE queue [%d] priority [%d elements]\n", + prio, queue->size()); + + // If we are changing command type, incorporate the minimum + // bus turnaround delay + to_write = chooseNext((*queue), + switched_cmd_type ? minReadToWriteDataGap() : 0); + + if (to_write != queue->end()) { + write_found = true; + break; + } + } + + // if there are no writes to a rank that is available to service + // requests (i.e. rank is in refresh idle state) are found then + // return. There could be reads to the available ranks. However, to + // avoid adding more complexity to the code, return at this point and + // wait for a refresh event to kick things into action again. + if (!write_found) { + DPRINTF(MemCtrl, "No Writes Found - exiting\n"); + return; + } + + auto mem_pkt = *to_write; + + // sanity check + assert(mem_pkt->size <= (mem_pkt->isDram() ? + dram->bytesPerBurst() : + nvm->bytesPerBurst()) ); + + doBurstAccess(mem_pkt); + + isInWriteQueue.erase(burstAlign(mem_pkt->addr, mem_pkt->isDram())); + + // log the response + logResponse(MemCtrl::WRITE, mem_pkt->masterId(), + mem_pkt->qosValue(), mem_pkt->getAddr(), 1, + mem_pkt->readyTime - mem_pkt->entryTime); + + + // remove the request from the queue - the iterator is no longer valid + writeQueue[mem_pkt->qosValue()].erase(to_write); + + delete mem_pkt; + + // If we emptied the write queue, or got sufficiently below the + // threshold (using the minWritesPerSwitch as the hysteresis) and + // are not draining, or we have reads waiting and have done enough + // writes, then switch to reads. + // If we are interfacing to NVM and have filled the writeRespQueue, + // with only NVM writes in Q, then switch to reads + bool below_threshold = + totalWriteQueueSize + minWritesPerSwitch < writeLowThreshold; + + if (totalWriteQueueSize == 0 || + (below_threshold && drainState() != DrainState::Draining) || + (totalReadQueueSize && writesThisTime >= minWritesPerSwitch) || + (totalReadQueueSize && nvm && nvm->writeRespQueueFull() && + all_writes_nvm)) { + + // turn the bus back around for reads again + busStateNext = MemCtrl::READ; + + // note that the we switch back to reads also in the idle + // case, which eventually will check for any draining and + // also pause any further scheduling if there is really + // nothing to do + } + } + // It is possible that a refresh to another rank kicks things back into + // action before reaching this point. + if (!nextReqEvent.scheduled()) + schedule(nextReqEvent, std::max(nextReqTime, curTick())); + + // If there is space available and we have writes waiting then let + // them retry. This is done here to ensure that the retry does not + // cause a nextReqEvent to be scheduled before we do so as part of + // the next request processing + if (retryWrReq && totalWriteQueueSize < writeBufferSize) { + retryWrReq = false; + port.sendRetryReq(); + } +} + +bool +MemCtrl::packetReady(MemPacket* pkt) +{ + return (pkt->isDram() ? + dram->burstReady(pkt) : nvm->burstReady(pkt)); +} + +Tick +MemCtrl::minReadToWriteDataGap() +{ + Tick dram_min = dram ? dram->minReadToWriteDataGap() : MaxTick; + Tick nvm_min = nvm ? nvm->minReadToWriteDataGap() : MaxTick; + return std::min(dram_min, nvm_min); +} + +Tick +MemCtrl::minWriteToReadDataGap() +{ + Tick dram_min = dram ? dram->minWriteToReadDataGap() : MaxTick; + Tick nvm_min = nvm ? nvm->minWriteToReadDataGap() : MaxTick; + return std::min(dram_min, nvm_min); +} + +Addr +MemCtrl::burstAlign(Addr addr, bool is_dram) const +{ + if (is_dram) + return (addr & ~(Addr(dram->bytesPerBurst() - 1))); + else + return (addr & ~(Addr(nvm->bytesPerBurst() - 1))); +} + +MemCtrl::CtrlStats::CtrlStats(MemCtrl &_ctrl) + : Stats::Group(&_ctrl), + ctrl(_ctrl), + + ADD_STAT(readReqs, "Number of read requests accepted"), + ADD_STAT(writeReqs, "Number of write requests accepted"), + + ADD_STAT(readBursts, + "Number of controller read bursts, " + "including those serviced by the write queue"), + ADD_STAT(writeBursts, + "Number of controller write bursts, " + "including those merged in the write queue"), + ADD_STAT(servicedByWrQ, + "Number of controller read bursts serviced by the write queue"), + ADD_STAT(mergedWrBursts, + "Number of controller write bursts merged with an existing one"), + + ADD_STAT(neitherReadNorWriteReqs, + "Number of requests that are neither read nor write"), + + ADD_STAT(avgRdQLen, "Average read queue length when enqueuing"), + ADD_STAT(avgWrQLen, "Average write queue length when enqueuing"), + + ADD_STAT(numRdRetry, "Number of times read queue was full causing retry"), + ADD_STAT(numWrRetry, "Number of times write queue was full causing retry"), + + ADD_STAT(readPktSize, "Read request sizes (log2)"), + ADD_STAT(writePktSize, "Write request sizes (log2)"), + + ADD_STAT(rdQLenPdf, "What read queue length does an incoming req see"), + ADD_STAT(wrQLenPdf, "What write queue length does an incoming req see"), + + ADD_STAT(rdPerTurnAround, + "Reads before turning the bus around for writes"), + ADD_STAT(wrPerTurnAround, + "Writes before turning the bus around for reads"), + + ADD_STAT(bytesReadWrQ, "Total number of bytes read from write queue"), + ADD_STAT(bytesReadSys, "Total read bytes from the system interface side"), + ADD_STAT(bytesWrittenSys, + "Total written bytes from the system interface side"), + + ADD_STAT(avgRdBWSys, "Average system read bandwidth in MiByte/s"), + ADD_STAT(avgWrBWSys, "Average system write bandwidth in MiByte/s"), + + ADD_STAT(totGap, "Total gap between requests"), + ADD_STAT(avgGap, "Average gap between requests"), + + ADD_STAT(masterReadBytes, "Per-master bytes read from memory"), + ADD_STAT(masterWriteBytes, "Per-master bytes write to memory"), + ADD_STAT(masterReadRate, + "Per-master bytes read from memory rate (Bytes/sec)"), + ADD_STAT(masterWriteRate, + "Per-master bytes write to memory rate (Bytes/sec)"), + ADD_STAT(masterReadAccesses, + "Per-master read serviced memory accesses"), + ADD_STAT(masterWriteAccesses, + "Per-master write serviced memory accesses"), + ADD_STAT(masterReadTotalLat, + "Per-master read total memory access latency"), + ADD_STAT(masterWriteTotalLat, + "Per-master write total memory access latency"), + ADD_STAT(masterReadAvgLat, + "Per-master read average memory access latency"), + ADD_STAT(masterWriteAvgLat, + "Per-master write average memory access latency") + +{ +} + +void +MemCtrl::CtrlStats::regStats() +{ + using namespace Stats; + + assert(ctrl.system()); + const auto max_masters = ctrl.system()->maxMasters(); + + avgRdQLen.precision(2); + avgWrQLen.precision(2); + + readPktSize.init(ceilLog2(ctrl.system()->cacheLineSize()) + 1); + writePktSize.init(ceilLog2(ctrl.system()->cacheLineSize()) + 1); + + rdQLenPdf.init(ctrl.readBufferSize); + wrQLenPdf.init(ctrl.writeBufferSize); + + rdPerTurnAround + .init(ctrl.readBufferSize) + .flags(nozero); + wrPerTurnAround + .init(ctrl.writeBufferSize) + .flags(nozero); + + avgRdBWSys.precision(2); + avgWrBWSys.precision(2); + avgGap.precision(2); + + // per-master bytes read and written to memory + masterReadBytes + .init(max_masters) + .flags(nozero | nonan); + + masterWriteBytes + .init(max_masters) + .flags(nozero | nonan); + + // per-master bytes read and written to memory rate + masterReadRate + .flags(nozero | nonan) + .precision(12); + + masterReadAccesses + .init(max_masters) + .flags(nozero); + + masterWriteAccesses + .init(max_masters) + .flags(nozero); + + masterReadTotalLat + .init(max_masters) + .flags(nozero | nonan); + + masterReadAvgLat + .flags(nonan) + .precision(2); + + masterWriteRate + .flags(nozero | nonan) + .precision(12); + + masterWriteTotalLat + .init(max_masters) + .flags(nozero | nonan); + + masterWriteAvgLat + .flags(nonan) + .precision(2); + + for (int i = 0; i < max_masters; i++) { + const std::string master = ctrl.system()->getMasterName(i); + masterReadBytes.subname(i, master); + masterReadRate.subname(i, master); + masterWriteBytes.subname(i, master); + masterWriteRate.subname(i, master); + masterReadAccesses.subname(i, master); + masterWriteAccesses.subname(i, master); + masterReadTotalLat.subname(i, master); + masterReadAvgLat.subname(i, master); + masterWriteTotalLat.subname(i, master); + masterWriteAvgLat.subname(i, master); + } + + // Formula stats + avgRdBWSys = (bytesReadSys / 1000000) / simSeconds; + avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds; + + avgGap = totGap / (readReqs + writeReqs); + + masterReadRate = masterReadBytes / simSeconds; + masterWriteRate = masterWriteBytes / simSeconds; + masterReadAvgLat = masterReadTotalLat / masterReadAccesses; + masterWriteAvgLat = masterWriteTotalLat / masterWriteAccesses; +} + +void +MemCtrl::recvFunctional(PacketPtr pkt) +{ + if (dram && dram->getAddrRange().contains(pkt->getAddr())) { + // rely on the abstract memory + dram->functionalAccess(pkt); + } else if (nvm && nvm->getAddrRange().contains(pkt->getAddr())) { + // rely on the abstract memory + nvm->functionalAccess(pkt); + } else { + panic("Can't handle address range for packet %s\n", + pkt->print()); + } +} + +Port & +MemCtrl::getPort(const string &if_name, PortID idx) +{ + if (if_name != "port") { + return QoS::MemCtrl::getPort(if_name, idx); + } else { + return port; + } +} + +bool +MemCtrl::allIntfDrained() const +{ + // ensure dram is in power down and refresh IDLE states + bool dram_drained = !dram || dram->allRanksDrained(); + // No outstanding NVM writes + // All other queues verified as needed with calling logic + bool nvm_drained = !nvm || nvm->allRanksDrained(); + return (dram_drained && nvm_drained); +} + +DrainState +MemCtrl::drain() +{ + // if there is anything in any of our internal queues, keep track + // of that as well + if (!(!totalWriteQueueSize && !totalReadQueueSize && respQueue.empty() && + allIntfDrained())) { + + DPRINTF(Drain, "Memory controller not drained, write: %d, read: %d," + " resp: %d\n", totalWriteQueueSize, totalReadQueueSize, + respQueue.size()); + + // the only queue that is not drained automatically over time + // is the write queue, thus kick things into action if needed + if (!totalWriteQueueSize && !nextReqEvent.scheduled()) { + schedule(nextReqEvent, curTick()); + } + + if (dram) + dram->drainRanks(); + + return DrainState::Draining; + } else { + return DrainState::Drained; + } +} + +void +MemCtrl::drainResume() +{ + if (!isTimingMode && system()->isTimingMode()) { + // if we switched to timing mode, kick things into action, + // and behave as if we restored from a checkpoint + startup(); + dram->startup(); + } else if (isTimingMode && !system()->isTimingMode()) { + // if we switch from timing mode, stop the refresh events to + // not cause issues with KVM + if (dram) + dram->suspend(); + } + + // update the mode + isTimingMode = system()->isTimingMode(); +} + +MemCtrl::MemoryPort::MemoryPort(const std::string& name, MemCtrl& _ctrl) + : QueuedSlavePort(name, &_ctrl, queue), queue(_ctrl, *this, true), + ctrl(_ctrl) +{ } + +AddrRangeList +MemCtrl::MemoryPort::getAddrRanges() const +{ + AddrRangeList ranges; + if (ctrl.dram) { + DPRINTF(DRAM, "Pushing DRAM ranges to port\n"); + ranges.push_back(ctrl.dram->getAddrRange()); + } + if (ctrl.nvm) { + DPRINTF(NVM, "Pushing NVM ranges to port\n"); + ranges.push_back(ctrl.nvm->getAddrRange()); + } + return ranges; +} + +void +MemCtrl::MemoryPort::recvFunctional(PacketPtr pkt) +{ + pkt->pushLabel(ctrl.name()); + + if (!queue.trySatisfyFunctional(pkt)) { + // Default implementation of SimpleTimingPort::recvFunctional() + // calls recvAtomic() and throws away the latency; we can save a + // little here by just not calculating the latency. + ctrl.recvFunctional(pkt); + } + + pkt->popLabel(); +} + +Tick +MemCtrl::MemoryPort::recvAtomic(PacketPtr pkt) +{ + return ctrl.recvAtomic(pkt); +} + +bool +MemCtrl::MemoryPort::recvTimingReq(PacketPtr pkt) +{ + // pass it to the memory controller + return ctrl.recvTimingReq(pkt); +} + +MemCtrl* +MemCtrlParams::create() +{ + return new MemCtrl(this); +} diff --git a/src/mem/mem_ctrl.hh b/src/mem/mem_ctrl.hh new file mode 100644 index 000000000..834cb5c5e --- /dev/null +++ b/src/mem/mem_ctrl.hh @@ -0,0 +1,709 @@ +/* + * Copyright (c) 2012-2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2013 Amin Farmahini-Farahani + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * MemCtrl declaration + */ + +#ifndef __MEM_CTRL_HH__ +#define __MEM_CTRL_HH__ + +#include +#include +#include +#include +#include + +#include "base/callback.hh" +#include "base/statistics.hh" +#include "enums/MemSched.hh" +#include "mem/qos/mem_ctrl.hh" +#include "mem/qport.hh" +#include "params/MemCtrl.hh" +#include "sim/eventq.hh" + +class DRAMInterface; +class NVMInterface; + +/** + * A burst helper helps organize and manage a packet that is larger than + * the memory burst size. A system packet that is larger than the burst size + * is split into multiple packets and all those packets point to + * a single burst helper such that we know when the whole packet is served. + */ +class BurstHelper +{ + public: + + /** Number of bursts requred for a system packet **/ + const unsigned int burstCount; + + /** Number of bursts serviced so far for a system packet **/ + unsigned int burstsServiced; + + BurstHelper(unsigned int _burstCount) + : burstCount(_burstCount), burstsServiced(0) + { } +}; + +/** + * A memory packet stores packets along with the timestamp of when + * the packet entered the queue, and also the decoded address. + */ +class MemPacket +{ + public: + + /** When did request enter the controller */ + const Tick entryTime; + + /** When will request leave the controller */ + Tick readyTime; + + /** This comes from the outside world */ + const PacketPtr pkt; + + /** MasterID associated with the packet */ + const MasterID _masterId; + + const bool read; + + /** Does this packet access DRAM?*/ + const bool dram; + + /** Will be populated by address decoder */ + const uint8_t rank; + const uint8_t bank; + const uint32_t row; + + /** + * Bank id is calculated considering banks in all the ranks + * eg: 2 ranks each with 8 banks, then bankId = 0 --> rank0, bank0 and + * bankId = 8 --> rank1, bank0 + */ + const uint16_t bankId; + + /** + * The starting address of the packet. + * This address could be unaligned to burst size boundaries. The + * reason is to keep the address offset so we can accurately check + * incoming read packets with packets in the write queue. + */ + Addr addr; + + /** + * The size of this dram packet in bytes + * It is always equal or smaller than the burst size + */ + unsigned int size; + + /** + * A pointer to the BurstHelper if this MemPacket is a split packet + * If not a split packet (common case), this is set to NULL + */ + BurstHelper* burstHelper; + + /** + * QoS value of the encapsulated packet read at queuing time + */ + uint8_t _qosValue; + + /** + * Set the packet QoS value + * (interface compatibility with Packet) + */ + inline void qosValue(const uint8_t qv) { _qosValue = qv; } + + /** + * Get the packet QoS value + * (interface compatibility with Packet) + */ + inline uint8_t qosValue() const { return _qosValue; } + + /** + * Get the packet MasterID + * (interface compatibility with Packet) + */ + inline MasterID masterId() const { return _masterId; } + + /** + * Get the packet size + * (interface compatibility with Packet) + */ + inline unsigned int getSize() const { return size; } + + /** + * Get the packet address + * (interface compatibility with Packet) + */ + inline Addr getAddr() const { return addr; } + + /** + * Return true if its a read packet + * (interface compatibility with Packet) + */ + inline bool isRead() const { return read; } + + /** + * Return true if its a write packet + * (interface compatibility with Packet) + */ + inline bool isWrite() const { return !read; } + + /** + * Return true if its a DRAM access + */ + inline bool isDram() const { return dram; } + + MemPacket(PacketPtr _pkt, bool is_read, bool is_dram, uint8_t _rank, + uint8_t _bank, uint32_t _row, uint16_t bank_id, Addr _addr, + unsigned int _size) + : entryTime(curTick()), readyTime(curTick()), pkt(_pkt), + _masterId(pkt->masterId()), + read(is_read), dram(is_dram), rank(_rank), bank(_bank), row(_row), + bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL), + _qosValue(_pkt->qosValue()) + { } + +}; + +// The memory packets are store in a multiple dequeue structure, +// based on their QoS priority +typedef std::deque MemPacketQueue; + + +/** + * The memory controller is a single-channel memory controller capturing + * the most important timing constraints associated with a + * contemporary controller. For multi-channel memory systems, the controller + * is combined with a crossbar model, with the channel address + * interleaving taking part in the crossbar. + * + * As a basic design principle, this controller + * model is not cycle callable, but instead uses events to: 1) decide + * when new decisions can be made, 2) when resources become available, + * 3) when things are to be considered done, and 4) when to send + * things back. The controller interfaces to media specific interfaces + * to enable flexible topoloties. + * Through these simple principles, the model delivers + * high performance, and lots of flexibility, allowing users to + * evaluate the system impact of a wide range of memory technologies. + * + * For more details, please see Hansson et al, "Simulating DRAM + * controllers for future system architecture exploration", + * Proc. ISPASS, 2014. If you use this model as part of your research + * please cite the paper. + * + */ +class MemCtrl : public QoS::MemCtrl +{ + private: + + // For now, make use of a queued slave port to avoid dealing with + // flow control for the responses being sent back + class MemoryPort : public QueuedSlavePort + { + + RespPacketQueue queue; + MemCtrl& ctrl; + + public: + + MemoryPort(const std::string& name, MemCtrl& _ctrl); + + protected: + + Tick recvAtomic(PacketPtr pkt); + + void recvFunctional(PacketPtr pkt); + + bool recvTimingReq(PacketPtr); + + virtual AddrRangeList getAddrRanges() const; + + }; + + /** + * Our incoming port, for a multi-ported controller add a crossbar + * in front of it + */ + MemoryPort port; + + /** + * Remember if the memory system is in timing mode + */ + bool isTimingMode; + + /** + * Remember if we have to retry a request when available. + */ + bool retryRdReq; + bool retryWrReq; + + /** + * Bunch of things requires to setup "events" in gem5 + * When event "respondEvent" occurs for example, the method + * processRespondEvent is called; no parameters are allowed + * in these methods + */ + void processNextReqEvent(); + EventFunctionWrapper nextReqEvent; + + void processRespondEvent(); + EventFunctionWrapper respondEvent; + + /** + * Check if the read queue has room for more entries + * + * @param pkt_count The number of entries needed in the read queue + * @return true if read queue is full, false otherwise + */ + bool readQueueFull(unsigned int pkt_count) const; + + /** + * Check if the write queue has room for more entries + * + * @param pkt_count The number of entries needed in the write queue + * @return true if write queue is full, false otherwise + */ + bool writeQueueFull(unsigned int pkt_count) const; + + /** + * When a new read comes in, first check if the write q has a + * pending request to the same address.\ If not, decode the + * address to populate rank/bank/row, create one or mutliple + * "mem_pkt", and push them to the back of the read queue.\ + * If this is the only + * read request in the system, schedule an event to start + * servicing it. + * + * @param pkt The request packet from the outside world + * @param pkt_count The number of memory bursts the pkt + * @param is_dram Does this packet access DRAM? + * translate to. If pkt size is larger then one full burst, + * then pkt_count is greater than one. + */ + void addToReadQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram); + + /** + * Decode the incoming pkt, create a mem_pkt and push to the + * back of the write queue. \If the write q length is more than + * the threshold specified by the user, ie the queue is beginning + * to get full, stop reads, and start draining writes. + * + * @param pkt The request packet from the outside world + * @param pkt_count The number of memory bursts the pkt + * @param is_dram Does this packet access DRAM? + * translate to. If pkt size is larger then one full burst, + * then pkt_count is greater than one. + */ + void addToWriteQueue(PacketPtr pkt, unsigned int pkt_count, bool is_dram); + + /** + * Actually do the burst based on media specific access function. + * Update bus statistics when complete. + * + * @param mem_pkt The memory packet created from the outside world pkt + */ + void doBurstAccess(MemPacket* mem_pkt); + + /** + * When a packet reaches its "readyTime" in the response Q, + * use the "access()" method in AbstractMemory to actually + * create the response packet, and send it back to the outside + * world requestor. + * + * @param pkt The packet from the outside world + * @param static_latency Static latency to add before sending the packet + */ + void accessAndRespond(PacketPtr pkt, Tick static_latency); + + /** + * Determine if there is a packet that can issue. + * + * @param pkt The packet to evaluate + */ + bool packetReady(MemPacket* pkt); + + /** + * Calculate the minimum delay used when scheduling a read-to-write + * transision. + * @param return minimum delay + */ + Tick minReadToWriteDataGap(); + + /** + * Calculate the minimum delay used when scheduling a write-to-read + * transision. + * @param return minimum delay + */ + Tick minWriteToReadDataGap(); + + /** + * The memory schduler/arbiter - picks which request needs to + * go next, based on the specified policy such as FCFS or FR-FCFS + * and moves it to the head of the queue. + * Prioritizes accesses to the same rank as previous burst unless + * controller is switching command type. + * + * @param queue Queued requests to consider + * @param extra_col_delay Any extra delay due to a read/write switch + * @return an iterator to the selected packet, else queue.end() + */ + MemPacketQueue::iterator chooseNext(MemPacketQueue& queue, + Tick extra_col_delay); + + /** + * For FR-FCFS policy reorder the read/write queue depending on row buffer + * hits and earliest bursts available in memory + * + * @param queue Queued requests to consider + * @param extra_col_delay Any extra delay due to a read/write switch + * @return an iterator to the selected packet, else queue.end() + */ + MemPacketQueue::iterator chooseNextFRFCFS(MemPacketQueue& queue, + Tick extra_col_delay); + + /** + * Calculate burst window aligned tick + * + * @param cmd_tick Initial tick of command + * @return burst window aligned tick + */ + Tick getBurstWindow(Tick cmd_tick); + + /** + * Used for debugging to observe the contents of the queues. + */ + void printQs() const; + + /** + * Burst-align an address. + * + * @param addr The potentially unaligned address + * @param is_dram Does this packet access DRAM? + * + * @return An address aligned to a memory burst + */ + Addr burstAlign(Addr addr, bool is_dram) const; + + /** + * The controller's main read and write queues, + * with support for QoS reordering + */ + std::vector readQueue; + std::vector writeQueue; + + /** + * To avoid iterating over the write queue to check for + * overlapping transactions, maintain a set of burst addresses + * that are currently queued. Since we merge writes to the same + * location we never have more than one address to the same burst + * address. + */ + std::unordered_set isInWriteQueue; + + /** + * Response queue where read packets wait after we're done working + * with them, but it's not time to send the response yet. The + * responses are stored separately mostly to keep the code clean + * and help with events scheduling. For all logical purposes such + * as sizing the read queue, this and the main read queue need to + * be added together. + */ + std::deque respQueue; + + /** + * Holds count of commands issued in burst window starting at + * defined Tick. This is used to ensure that the command bandwidth + * does not exceed the allowable media constraints. + */ + std::unordered_multiset burstTicks; + + /** + * Create pointer to interface of the actual dram media when connected + */ + DRAMInterface* const dram; + + /** + * Create pointer to interface of the actual nvm media when connected + */ + NVMInterface* const nvm; + + /** + * The following are basic design parameters of the memory + * controller, and are initialized based on parameter values. + * The rowsPerBank is determined based on the capacity, number of + * ranks and banks, the burst size, and the row buffer size. + */ + const uint32_t readBufferSize; + const uint32_t writeBufferSize; + const uint32_t writeHighThreshold; + const uint32_t writeLowThreshold; + const uint32_t minWritesPerSwitch; + uint32_t writesThisTime; + uint32_t readsThisTime; + + /** + * Memory controller configuration initialized based on parameter + * values. + */ + Enums::MemSched memSchedPolicy; + + /** + * Pipeline latency of the controller frontend. The frontend + * contribution is added to writes (that complete when they are in + * the write buffer) and reads that are serviced the write buffer. + */ + const Tick frontendLatency; + + /** + * Pipeline latency of the backend and PHY. Along with the + * frontend contribution, this latency is added to reads serviced + * by the memory. + */ + const Tick backendLatency; + + /** + * Length of a command window, used to check + * command bandwidth + */ + const Tick commandWindow; + + /** + * Till when must we wait before issuing next RD/WR burst? + */ + Tick nextBurstAt; + + Tick prevArrival; + + /** + * The soonest you have to start thinking about the next request + * is the longest access time that can occur before + * nextBurstAt. Assuming you need to precharge, open a new row, + * and access, it is tRP + tRCD + tCL. + */ + Tick nextReqTime; + + struct CtrlStats : public Stats::Group + { + CtrlStats(MemCtrl &ctrl); + + void regStats() override; + + MemCtrl &ctrl; + + // All statistics that the model needs to capture + Stats::Scalar readReqs; + Stats::Scalar writeReqs; + Stats::Scalar readBursts; + Stats::Scalar writeBursts; + Stats::Scalar servicedByWrQ; + Stats::Scalar mergedWrBursts; + Stats::Scalar neitherReadNorWriteReqs; + // Average queue lengths + Stats::Average avgRdQLen; + Stats::Average avgWrQLen; + + Stats::Scalar numRdRetry; + Stats::Scalar numWrRetry; + Stats::Vector readPktSize; + Stats::Vector writePktSize; + Stats::Vector rdQLenPdf; + Stats::Vector wrQLenPdf; + Stats::Histogram rdPerTurnAround; + Stats::Histogram wrPerTurnAround; + + Stats::Scalar bytesReadWrQ; + Stats::Scalar bytesReadSys; + Stats::Scalar bytesWrittenSys; + // Average bandwidth + Stats::Formula avgRdBWSys; + Stats::Formula avgWrBWSys; + + Stats::Scalar totGap; + Stats::Formula avgGap; + + // per-master bytes read and written to memory + Stats::Vector masterReadBytes; + Stats::Vector masterWriteBytes; + + // per-master bytes read and written to memory rate + Stats::Formula masterReadRate; + Stats::Formula masterWriteRate; + + // per-master read and write serviced memory accesses + Stats::Vector masterReadAccesses; + Stats::Vector masterWriteAccesses; + + // per-master read and write total memory access latency + Stats::Vector masterReadTotalLat; + Stats::Vector masterWriteTotalLat; + + // per-master raed and write average memory access latency + Stats::Formula masterReadAvgLat; + Stats::Formula masterWriteAvgLat; + }; + + CtrlStats stats; + + /** + * Upstream caches need this packet until true is returned, so + * hold it for deletion until a subsequent call + */ + std::unique_ptr pendingDelete; + + /** + * Select either the read or write queue + * + * @param is_read The current burst is a read, select read queue + * @return a reference to the appropriate queue + */ + std::vector& selQueue(bool is_read) + { + return (is_read ? readQueue : writeQueue); + }; + + /** + * Remove commands that have already issued from burstTicks + */ + void pruneBurstTick(); + + public: + + MemCtrl(const MemCtrlParams* p); + + /** + * Ensure that all interfaced have drained commands + * + * @return bool flag, set once drain complete + */ + bool allIntfDrained() const; + + DrainState drain() override; + + /** + * Check for command bus contention for single cycle command. + * If there is contention, shift command to next burst. + * Check verifies that the commands issued per burst is less + * than a defined max number, maxCommandsPerWindow. + * Therefore, contention per cycle is not verified and instead + * is done based on a burst window. + * + * @param cmd_tick Initial tick of command, to be verified + * @param max_cmds_per_burst Number of commands that can issue + * in a burst window + * @return tick for command issue without contention + */ + Tick verifySingleCmd(Tick cmd_tick, Tick max_cmds_per_burst); + + /** + * Check for command bus contention for multi-cycle (2 currently) + * command. If there is contention, shift command(s) to next burst. + * Check verifies that the commands issued per burst is less + * than a defined max number, maxCommandsPerWindow. + * Therefore, contention per cycle is not verified and instead + * is done based on a burst window. + * + * @param cmd_tick Initial tick of command, to be verified + * @param max_multi_cmd_split Maximum delay between commands + * @param max_cmds_per_burst Number of commands that can issue + * in a burst window + * @return tick for command issue without contention + */ + Tick verifyMultiCmd(Tick cmd_tick, Tick max_cmds_per_burst, + Tick max_multi_cmd_split = 0); + + /** + * Is there a respondEvent scheduled? + * + * @return true if event is scheduled + */ + bool respondEventScheduled() const { return respondEvent.scheduled(); } + + /** + * Is there a read/write burst Event scheduled? + * + * @return true if event is scheduled + */ + bool requestEventScheduled() const { return nextReqEvent.scheduled(); } + + /** + * restart the controller + * This can be used by interfaces to restart the + * scheduler after maintainence commands complete + * + * @param Tick to schedule next event + */ + void restartScheduler(Tick tick) { schedule(nextReqEvent, tick); } + + /** + * Check the current direction of the memory channel + * + * @param next_state Check either the current or next bus state + * @return True when bus is currently in a read state + */ + bool inReadBusState(bool next_state) const; + + /** + * Check the current direction of the memory channel + * + * @param next_state Check either the current or next bus state + * @return True when bus is currently in a write state + */ + bool inWriteBusState(bool next_state) const; + + Port &getPort(const std::string &if_name, + PortID idx=InvalidPortID) override; + + virtual void init() override; + virtual void startup() override; + virtual void drainResume() override; + + protected: + + Tick recvAtomic(PacketPtr pkt); + void recvFunctional(PacketPtr pkt); + bool recvTimingReq(PacketPtr pkt); + +}; + +#endif //__MEM_CTRL_HH__ diff --git a/src/mem/mem_interface.cc b/src/mem/mem_interface.cc new file mode 100644 index 000000000..7817c4ac3 --- /dev/null +++ b/src/mem/mem_interface.cc @@ -0,0 +1,2572 @@ +/* + * Copyright (c) 2010-2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2013 Amin Farmahini-Farahani + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mem/mem_interface.hh" + +#include "base/bitfield.hh" +#include "base/trace.hh" +#include "debug/DRAM.hh" +#include "debug/DRAMPower.hh" +#include "debug/DRAMState.hh" +#include "debug/NVM.hh" +#include "sim/system.hh" + +using namespace std; +using namespace Data; + +MemInterface::MemInterface(const MemInterfaceParams* _p) + : AbstractMemory(_p), + addrMapping(_p->addr_mapping), + burstSize((_p->devices_per_rank * _p->burst_length * + _p->device_bus_width) / 8), + deviceSize(_p->device_size), + deviceRowBufferSize(_p->device_rowbuffer_size), + devicesPerRank(_p->devices_per_rank), + rowBufferSize(devicesPerRank * deviceRowBufferSize), + burstsPerRowBuffer(rowBufferSize / burstSize), + burstsPerStripe(range.interleaved() ? + range.granularity() / burstSize : 1), + ranksPerChannel(_p->ranks_per_channel), + banksPerRank(_p->banks_per_rank), rowsPerBank(0), + tCK(_p->tCK), tCS(_p->tCS), tBURST(_p->tBURST), + tRTW(_p->tRTW), + tWTR(_p->tWTR), + readBufferSize(_p->read_buffer_size), + writeBufferSize(_p->write_buffer_size) +{} + +void +MemInterface::setCtrl(MemCtrl* _ctrl, unsigned int command_window) +{ + ctrl = _ctrl; + maxCommandsPerWindow = command_window / tCK; +} + +MemPacket* +MemInterface::decodePacket(const PacketPtr pkt, Addr pkt_addr, + unsigned size, bool is_read, bool is_dram) +{ + // decode the address based on the address mapping scheme, with + // Ro, Ra, Co, Ba and Ch denoting row, rank, column, bank and + // channel, respectively + uint8_t rank; + uint8_t bank; + // use a 64-bit unsigned during the computations as the row is + // always the top bits, and check before creating the packet + uint64_t row; + + // Get packed address, starting at 0 + Addr addr = getCtrlAddr(pkt_addr); + + // truncate the address to a memory burst, which makes it unique to + // a specific buffer, row, bank, rank and channel + addr = addr / burstSize; + + // we have removed the lowest order address bits that denote the + // position within the column + if (addrMapping == Enums::RoRaBaChCo || addrMapping == Enums::RoRaBaCoCh) { + // the lowest order bits denote the column to ensure that + // sequential cache lines occupy the same row + addr = addr / burstsPerRowBuffer; + + // after the channel bits, get the bank bits to interleave + // over the banks + bank = addr % banksPerRank; + addr = addr / banksPerRank; + + // after the bank, we get the rank bits which thus interleaves + // over the ranks + rank = addr % ranksPerChannel; + addr = addr / ranksPerChannel; + + // lastly, get the row bits, no need to remove them from addr + row = addr % rowsPerBank; + } else if (addrMapping == Enums::RoCoRaBaCh) { + // with emerging technologies, could have small page size with + // interleaving granularity greater than row buffer + if (burstsPerStripe > burstsPerRowBuffer) { + // remove column bits which are a subset of burstsPerStripe + addr = addr / burstsPerRowBuffer; + } else { + // remove lower column bits below channel bits + addr = addr / burstsPerStripe; + } + + // start with the bank bits, as this provides the maximum + // opportunity for parallelism between requests + bank = addr % banksPerRank; + addr = addr / banksPerRank; + + // next get the rank bits + rank = addr % ranksPerChannel; + addr = addr / ranksPerChannel; + + // next, the higher-order column bites + if (burstsPerStripe < burstsPerRowBuffer) { + addr = addr / (burstsPerRowBuffer / burstsPerStripe); + } + + // lastly, get the row bits, no need to remove them from addr + row = addr % rowsPerBank; + } else + panic("Unknown address mapping policy chosen!"); + + assert(rank < ranksPerChannel); + assert(bank < banksPerRank); + assert(row < rowsPerBank); + assert(row < Bank::NO_ROW); + + DPRINTF(DRAM, "Address: %lld Rank %d Bank %d Row %d\n", + pkt_addr, rank, bank, row); + + // create the corresponding memory packet with the entry time and + // ready time set to the current tick, the latter will be updated + // later + uint16_t bank_id = banksPerRank * rank + bank; + + return new MemPacket(pkt, is_read, is_dram, rank, bank, row, bank_id, + pkt_addr, size); +} + +pair +DRAMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const +{ + vector earliest_banks(ranksPerChannel, 0); + + // Has minBankPrep been called to populate earliest_banks? + bool filled_earliest_banks = false; + // can the PRE/ACT sequence be done without impacting utlization? + bool hidden_bank_prep = false; + + // search for seamless row hits first, if no seamless row hit is + // found then determine if there are other packets that can be issued + // without incurring additional bus delay due to bank timing + // Will select closed rows first to enable more open row possibilies + // in future selections + bool found_hidden_bank = false; + + // remember if we found a row hit, not seamless, but bank prepped + // and ready + bool found_prepped_pkt = false; + + // if we have no row hit, prepped or not, and no seamless packet, + // just go for the earliest possible + bool found_earliest_pkt = false; + + Tick selected_col_at = MaxTick; + auto selected_pkt_it = queue.end(); + + for (auto i = queue.begin(); i != queue.end() ; ++i) { + MemPacket* pkt = *i; + + // select optimal DRAM packet in Q + if (pkt->isDram()) { + const Bank& bank = ranks[pkt->rank]->banks[pkt->bank]; + const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt : + bank.wrAllowedAt; + + DPRINTF(DRAM, "%s checking DRAM packet in bank %d, row %d\n", + __func__, pkt->bank, pkt->row); + + // check if rank is not doing a refresh and thus is available, + // if not, jump to the next packet + if (burstReady(pkt)) { + + DPRINTF(DRAM, + "%s bank %d - Rank %d available\n", __func__, + pkt->bank, pkt->rank); + + // check if it is a row hit + if (bank.openRow == pkt->row) { + // no additional rank-to-rank or same bank-group + // delays, or we switched read/write and might as well + // go for the row hit + if (col_allowed_at <= min_col_at) { + // FCFS within the hits, giving priority to + // commands that can issue seamlessly, without + // additional delay, such as same rank accesses + // and/or different bank-group accesses + DPRINTF(DRAM, "%s Seamless buffer hit\n", __func__); + selected_pkt_it = i; + selected_col_at = col_allowed_at; + // no need to look through the remaining queue entries + break; + } else if (!found_hidden_bank && !found_prepped_pkt) { + // if we did not find a packet to a closed row that can + // issue the bank commands without incurring delay, and + // did not yet find a packet to a prepped row, remember + // the current one + selected_pkt_it = i; + selected_col_at = col_allowed_at; + found_prepped_pkt = true; + DPRINTF(DRAM, "%s Prepped row buffer hit\n", __func__); + } + } else if (!found_earliest_pkt) { + // if we have not initialised the bank status, do it + // now, and only once per scheduling decisions + if (!filled_earliest_banks) { + // determine entries with earliest bank delay + std::tie(earliest_banks, hidden_bank_prep) = + minBankPrep(queue, min_col_at); + filled_earliest_banks = true; + } + + // bank is amongst first available banks + // minBankPrep will give priority to packets that can + // issue seamlessly + if (bits(earliest_banks[pkt->rank], + pkt->bank, pkt->bank)) { + found_earliest_pkt = true; + found_hidden_bank = hidden_bank_prep; + + // give priority to packets that can issue + // bank commands 'behind the scenes' + // any additional delay if any will be due to + // col-to-col command requirements + if (hidden_bank_prep || !found_prepped_pkt) { + selected_pkt_it = i; + selected_col_at = col_allowed_at; + } + } + } + } else { + DPRINTF(DRAM, "%s bank %d - Rank %d not available\n", __func__, + pkt->bank, pkt->rank); + } + } + } + + if (selected_pkt_it == queue.end()) { + DPRINTF(DRAM, "%s no available DRAM ranks found\n", __func__); + } + + return make_pair(selected_pkt_it, selected_col_at); +} + +void +DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref, + Tick act_tick, uint32_t row) +{ + assert(rank_ref.actTicks.size() == activationLimit); + + // verify that we have command bandwidth to issue the activate + // if not, shift to next burst window + Tick act_at; + if (twoCycleActivate) + act_at = ctrl->verifyMultiCmd(act_tick, maxCommandsPerWindow, tAAD); + else + act_at = ctrl->verifySingleCmd(act_tick, maxCommandsPerWindow); + + DPRINTF(DRAM, "Activate at tick %d\n", act_at); + + // update the open row + assert(bank_ref.openRow == Bank::NO_ROW); + bank_ref.openRow = row; + + // start counting anew, this covers both the case when we + // auto-precharged, and when this access is forced to + // precharge + bank_ref.bytesAccessed = 0; + bank_ref.rowAccesses = 0; + + ++rank_ref.numBanksActive; + assert(rank_ref.numBanksActive <= banksPerRank); + + DPRINTF(DRAM, "Activate bank %d, rank %d at tick %lld, now got " + "%d active\n", bank_ref.bank, rank_ref.rank, act_at, + ranks[rank_ref.rank]->numBanksActive); + + rank_ref.cmdList.push_back(Command(MemCommand::ACT, bank_ref.bank, + act_at)); + + DPRINTF(DRAMPower, "%llu,ACT,%d,%d\n", divCeil(act_at, tCK) - + timeStampOffset, bank_ref.bank, rank_ref.rank); + + // The next access has to respect tRAS for this bank + bank_ref.preAllowedAt = act_at + tRAS; + + // Respect the row-to-column command delay for both read and write cmds + bank_ref.rdAllowedAt = std::max(act_at + tRCD, bank_ref.rdAllowedAt); + bank_ref.wrAllowedAt = std::max(act_at + tRCD, bank_ref.wrAllowedAt); + + // start by enforcing tRRD + for (int i = 0; i < banksPerRank; i++) { + // next activate to any bank in this rank must not happen + // before tRRD + if (bankGroupArch && (bank_ref.bankgr == rank_ref.banks[i].bankgr)) { + // bank group architecture requires longer delays between + // ACT commands within the same bank group. Use tRRD_L + // in this case + rank_ref.banks[i].actAllowedAt = std::max(act_at + tRRD_L, + rank_ref.banks[i].actAllowedAt); + } else { + // use shorter tRRD value when either + // 1) bank group architecture is not supportted + // 2) bank is in a different bank group + rank_ref.banks[i].actAllowedAt = std::max(act_at + tRRD, + rank_ref.banks[i].actAllowedAt); + } + } + + // next, we deal with tXAW, if the activation limit is disabled + // then we directly schedule an activate power event + if (!rank_ref.actTicks.empty()) { + // sanity check + if (rank_ref.actTicks.back() && + (act_at - rank_ref.actTicks.back()) < tXAW) { + panic("Got %d activates in window %d (%llu - %llu) which " + "is smaller than %llu\n", activationLimit, act_at - + rank_ref.actTicks.back(), act_at, + rank_ref.actTicks.back(), tXAW); + } + + // shift the times used for the book keeping, the last element + // (highest index) is the oldest one and hence the lowest value + rank_ref.actTicks.pop_back(); + + // record an new activation (in the future) + rank_ref.actTicks.push_front(act_at); + + // cannot activate more than X times in time window tXAW, push the + // next one (the X + 1'st activate) to be tXAW away from the + // oldest in our window of X + if (rank_ref.actTicks.back() && + (act_at - rank_ref.actTicks.back()) < tXAW) { + DPRINTF(DRAM, "Enforcing tXAW with X = %d, next activate " + "no earlier than %llu\n", activationLimit, + rank_ref.actTicks.back() + tXAW); + for (int j = 0; j < banksPerRank; j++) + // next activate must not happen before end of window + rank_ref.banks[j].actAllowedAt = + std::max(rank_ref.actTicks.back() + tXAW, + rank_ref.banks[j].actAllowedAt); + } + } + + // at the point when this activate takes place, make sure we + // transition to the active power state + if (!rank_ref.activateEvent.scheduled()) + schedule(rank_ref.activateEvent, act_at); + else if (rank_ref.activateEvent.when() > act_at) + // move it sooner in time + reschedule(rank_ref.activateEvent, act_at); +} + +void +DRAMInterface::prechargeBank(Rank& rank_ref, Bank& bank, Tick pre_tick, + bool auto_or_preall, bool trace) +{ + // make sure the bank has an open row + assert(bank.openRow != Bank::NO_ROW); + + // sample the bytes per activate here since we are closing + // the page + stats.bytesPerActivate.sample(bank.bytesAccessed); + + bank.openRow = Bank::NO_ROW; + + Tick pre_at = pre_tick; + if (auto_or_preall) { + // no precharge allowed before this one + bank.preAllowedAt = pre_at; + } else { + // Issuing an explicit PRE command + // Verify that we have command bandwidth to issue the precharge + // if not, shift to next burst window + pre_at = ctrl->verifySingleCmd(pre_tick, maxCommandsPerWindow); + // enforce tPPD + for (int i = 0; i < banksPerRank; i++) { + rank_ref.banks[i].preAllowedAt = std::max(pre_at + tPPD, + rank_ref.banks[i].preAllowedAt); + } + } + + Tick pre_done_at = pre_at + tRP; + + bank.actAllowedAt = std::max(bank.actAllowedAt, pre_done_at); + + assert(rank_ref.numBanksActive != 0); + --rank_ref.numBanksActive; + + DPRINTF(DRAM, "Precharging bank %d, rank %d at tick %lld, now got " + "%d active\n", bank.bank, rank_ref.rank, pre_at, + rank_ref.numBanksActive); + + if (trace) { + + rank_ref.cmdList.push_back(Command(MemCommand::PRE, bank.bank, + pre_at)); + DPRINTF(DRAMPower, "%llu,PRE,%d,%d\n", divCeil(pre_at, tCK) - + timeStampOffset, bank.bank, rank_ref.rank); + } + + // if we look at the current number of active banks we might be + // tempted to think the DRAM is now idle, however this can be + // undone by an activate that is scheduled to happen before we + // would have reached the idle state, so schedule an event and + // rather check once we actually make it to the point in time when + // the (last) precharge takes place + if (!rank_ref.prechargeEvent.scheduled()) { + schedule(rank_ref.prechargeEvent, pre_done_at); + // New event, increment count + ++rank_ref.outstandingEvents; + } else if (rank_ref.prechargeEvent.when() < pre_done_at) { + reschedule(rank_ref.prechargeEvent, pre_done_at); + } +} + +pair +DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at, + const std::vector& queue) +{ + DPRINTF(DRAM, "Timing access to addr %lld, rank/bank/row %d %d %d\n", + mem_pkt->addr, mem_pkt->rank, mem_pkt->bank, mem_pkt->row); + + // get the rank + Rank& rank_ref = *ranks[mem_pkt->rank]; + + assert(rank_ref.inRefIdleState()); + + // are we in or transitioning to a low-power state and have not scheduled + // a power-up event? + // if so, wake up from power down to issue RD/WR burst + if (rank_ref.inLowPowerState) { + assert(rank_ref.pwrState != PWR_SREF); + rank_ref.scheduleWakeUpEvent(tXP); + } + + // get the bank + Bank& bank_ref = rank_ref.banks[mem_pkt->bank]; + + // for the state we need to track if it is a row hit or not + bool row_hit = true; + + // Determine the access latency and update the bank state + if (bank_ref.openRow == mem_pkt->row) { + // nothing to do + } else { + row_hit = false; + + // If there is a page open, precharge it. + if (bank_ref.openRow != Bank::NO_ROW) { + prechargeBank(rank_ref, bank_ref, std::max(bank_ref.preAllowedAt, + curTick())); + } + + // next we need to account for the delay in activating the page + Tick act_tick = std::max(bank_ref.actAllowedAt, curTick()); + + // Record the activation and deal with all the global timing + // constraints caused be a new activation (tRRD and tXAW) + activateBank(rank_ref, bank_ref, act_tick, mem_pkt->row); + } + + // respect any constraints on the command (e.g. tRCD or tCCD) + const Tick col_allowed_at = mem_pkt->isRead() ? + bank_ref.rdAllowedAt : bank_ref.wrAllowedAt; + + // we need to wait until the bus is available before we can issue + // the command; need to ensure minimum bus delay requirement is met + Tick cmd_at = std::max({col_allowed_at, next_burst_at, curTick()}); + + // verify that we have command bandwidth to issue the burst + // if not, shift to next burst window + if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) > clkResyncDelay)) + cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK); + else + cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow); + + // if we are interleaving bursts, ensure that + // 1) we don't double interleave on next burst issue + // 2) we are at an interleave boundary; if not, shift to next boundary + Tick burst_gap = tBURST_MIN; + if (burstInterleave) { + if (cmd_at == (rank_ref.lastBurstTick + tBURST_MIN)) { + // already interleaving, push next command to end of full burst + burst_gap = tBURST; + } else if (cmd_at < (rank_ref.lastBurstTick + tBURST)) { + // not at an interleave boundary after bandwidth check + // Shift command to tBURST boundary to avoid data contention + // Command will remain in the same burst window given that + // tBURST is less than tBURST_MAX + cmd_at = rank_ref.lastBurstTick + tBURST; + } + } + DPRINTF(DRAM, "Schedule RD/WR burst at tick %d\n", cmd_at); + + // update the packet ready time + mem_pkt->readyTime = cmd_at + tCL + tBURST; + + rank_ref.lastBurstTick = cmd_at; + + // update the time for the next read/write burst for each + // bank (add a max with tCCD/tCCD_L/tCCD_L_WR here) + Tick dly_to_rd_cmd; + Tick dly_to_wr_cmd; + for (int j = 0; j < ranksPerChannel; j++) { + for (int i = 0; i < banksPerRank; i++) { + if (mem_pkt->rank == j) { + if (bankGroupArch && + (bank_ref.bankgr == ranks[j]->banks[i].bankgr)) { + // bank group architecture requires longer delays between + // RD/WR burst commands to the same bank group. + // tCCD_L is default requirement for same BG timing + // tCCD_L_WR is required for write-to-write + // Need to also take bus turnaround delays into account + dly_to_rd_cmd = mem_pkt->isRead() ? + tCCD_L : std::max(tCCD_L, wrToRdDlySameBG); + dly_to_wr_cmd = mem_pkt->isRead() ? + std::max(tCCD_L, rdToWrDlySameBG) : + tCCD_L_WR; + } else { + // tBURST is default requirement for diff BG timing + // Need to also take bus turnaround delays into account + dly_to_rd_cmd = mem_pkt->isRead() ? burst_gap : + writeToReadDelay(); + dly_to_wr_cmd = mem_pkt->isRead() ? readToWriteDelay() : + burst_gap; + } + } else { + // different rank is by default in a different bank group and + // doesn't require longer tCCD or additional RTW, WTR delays + // Need to account for rank-to-rank switching + dly_to_wr_cmd = rankToRankDelay(); + dly_to_rd_cmd = rankToRankDelay(); + } + ranks[j]->banks[i].rdAllowedAt = std::max(cmd_at + dly_to_rd_cmd, + ranks[j]->banks[i].rdAllowedAt); + ranks[j]->banks[i].wrAllowedAt = std::max(cmd_at + dly_to_wr_cmd, + ranks[j]->banks[i].wrAllowedAt); + } + } + + // Save rank of current access + activeRank = mem_pkt->rank; + + // If this is a write, we also need to respect the write recovery + // time before a precharge, in the case of a read, respect the + // read to precharge constraint + bank_ref.preAllowedAt = std::max(bank_ref.preAllowedAt, + mem_pkt->isRead() ? cmd_at + tRTP : + mem_pkt->readyTime + tWR); + + // increment the bytes accessed and the accesses per row + bank_ref.bytesAccessed += burstSize; + ++bank_ref.rowAccesses; + + // if we reached the max, then issue with an auto-precharge + bool auto_precharge = pageMgmt == Enums::close || + bank_ref.rowAccesses == maxAccessesPerRow; + + // if we did not hit the limit, we might still want to + // auto-precharge + if (!auto_precharge && + (pageMgmt == Enums::open_adaptive || + pageMgmt == Enums::close_adaptive)) { + // a twist on the open and close page policies: + // 1) open_adaptive page policy does not blindly keep the + // page open, but close it if there are no row hits, and there + // are bank conflicts in the queue + // 2) close_adaptive page policy does not blindly close the + // page, but closes it only if there are no row hits in the queue. + // In this case, only force an auto precharge when there + // are no same page hits in the queue + bool got_more_hits = false; + bool got_bank_conflict = false; + + for (uint8_t i = 0; i < ctrl->numPriorities(); ++i) { + auto p = queue[i].begin(); + // keep on looking until we find a hit or reach the end of the + // queue + // 1) if a hit is found, then both open and close adaptive + // policies keep the page open + // 2) if no hit is found, got_bank_conflict is set to true if a + // bank conflict request is waiting in the queue + // 3) make sure we are not considering the packet that we are + // currently dealing with + while (!got_more_hits && p != queue[i].end()) { + if (mem_pkt != (*p)) { + bool same_rank_bank = (mem_pkt->rank == (*p)->rank) && + (mem_pkt->bank == (*p)->bank); + + bool same_row = mem_pkt->row == (*p)->row; + got_more_hits |= same_rank_bank && same_row; + got_bank_conflict |= same_rank_bank && !same_row; + } + ++p; + } + + if (got_more_hits) + break; + } + + // auto pre-charge when either + // 1) open_adaptive policy, we have not got any more hits, and + // have a bank conflict + // 2) close_adaptive policy and we have not got any more hits + auto_precharge = !got_more_hits && + (got_bank_conflict || pageMgmt == Enums::close_adaptive); + } + + // DRAMPower trace command to be written + std::string mem_cmd = mem_pkt->isRead() ? "RD" : "WR"; + + // MemCommand required for DRAMPower library + MemCommand::cmds command = (mem_cmd == "RD") ? MemCommand::RD : + MemCommand::WR; + + rank_ref.cmdList.push_back(Command(command, mem_pkt->bank, cmd_at)); + + DPRINTF(DRAMPower, "%llu,%s,%d,%d\n", divCeil(cmd_at, tCK) - + timeStampOffset, mem_cmd, mem_pkt->bank, mem_pkt->rank); + + // if this access should use auto-precharge, then we are + // closing the row after the read/write burst + if (auto_precharge) { + // if auto-precharge push a PRE command at the correct tick to the + // list used by DRAMPower library to calculate power + prechargeBank(rank_ref, bank_ref, std::max(curTick(), + bank_ref.preAllowedAt), true); + + DPRINTF(DRAM, "Auto-precharged bank: %d\n", mem_pkt->bankId); + } + + // Update the stats and schedule the next request + if (mem_pkt->isRead()) { + // Every respQueue which will generate an event, increment count + ++rank_ref.outstandingEvents; + + stats.readBursts++; + if (row_hit) + stats.readRowHits++; + stats.bytesRead += burstSize; + stats.perBankRdBursts[mem_pkt->bankId]++; + + // Update latency stats + stats.totMemAccLat += mem_pkt->readyTime - mem_pkt->entryTime; + stats.totQLat += cmd_at - mem_pkt->entryTime; + stats.totBusLat += tBURST; + } else { + // Schedule write done event to decrement event count + // after the readyTime has been reached + // Only schedule latest write event to minimize events + // required; only need to ensure that final event scheduled covers + // the time that writes are outstanding and bus is active + // to holdoff power-down entry events + if (!rank_ref.writeDoneEvent.scheduled()) { + schedule(rank_ref.writeDoneEvent, mem_pkt->readyTime); + // New event, increment count + ++rank_ref.outstandingEvents; + + } else if (rank_ref.writeDoneEvent.when() < mem_pkt->readyTime) { + reschedule(rank_ref.writeDoneEvent, mem_pkt->readyTime); + } + // will remove write from queue when returned to parent function + // decrement count for DRAM rank + --rank_ref.writeEntries; + + stats.writeBursts++; + if (row_hit) + stats.writeRowHits++; + stats.bytesWritten += burstSize; + stats.perBankWrBursts[mem_pkt->bankId]++; + + } + // Update bus state to reflect when previous command was issued + return make_pair(cmd_at, cmd_at + burst_gap); +} + +void +DRAMInterface::addRankToRankDelay(Tick cmd_at) +{ + // update timing for DRAM ranks due to bursts issued + // to ranks on other media interfaces + for (auto n : ranks) { + for (int i = 0; i < banksPerRank; i++) { + // different rank by default + // Need to only account for rank-to-rank switching + n->banks[i].rdAllowedAt = std::max(cmd_at + rankToRankDelay(), + n->banks[i].rdAllowedAt); + n->banks[i].wrAllowedAt = std::max(cmd_at + rankToRankDelay(), + n->banks[i].wrAllowedAt); + } + } +} + +DRAMInterface::DRAMInterface(const DRAMInterfaceParams* _p) + : MemInterface(_p), + bankGroupsPerRank(_p->bank_groups_per_rank), + bankGroupArch(_p->bank_groups_per_rank > 0), + tCL(_p->tCL), + tBURST_MIN(_p->tBURST_MIN), tBURST_MAX(_p->tBURST_MAX), + tCCD_L_WR(_p->tCCD_L_WR), tCCD_L(_p->tCCD_L), tRCD(_p->tRCD), + tRP(_p->tRP), tRAS(_p->tRAS), tWR(_p->tWR), tRTP(_p->tRTP), + tRFC(_p->tRFC), tREFI(_p->tREFI), tRRD(_p->tRRD), tRRD_L(_p->tRRD_L), + tPPD(_p->tPPD), tAAD(_p->tAAD), + tXAW(_p->tXAW), tXP(_p->tXP), tXS(_p->tXS), + clkResyncDelay(tCL + _p->tBURST_MAX), + dataClockSync(_p->data_clock_sync), + burstInterleave(tBURST != tBURST_MIN), + twoCycleActivate(_p->two_cycle_activate), + activationLimit(_p->activation_limit), + wrToRdDlySameBG(tCL + _p->tBURST_MAX + _p->tWTR_L), + rdToWrDlySameBG(_p->tRTW + _p->tBURST_MAX), + pageMgmt(_p->page_policy), + maxAccessesPerRow(_p->max_accesses_per_row), + timeStampOffset(0), activeRank(0), + enableDRAMPowerdown(_p->enable_dram_powerdown), + lastStatsResetTick(0), + stats(*this) +{ + DPRINTF(DRAM, "Setting up DRAM Interface\n"); + + fatal_if(!isPowerOf2(burstSize), "DRAM burst size %d is not allowed, " + "must be a power of two\n", burstSize); + + // sanity check the ranks since we rely on bit slicing for the + // address decoding + fatal_if(!isPowerOf2(ranksPerChannel), "DRAM rank count of %d is " + "not allowed, must be a power of two\n", ranksPerChannel); + + for (int i = 0; i < ranksPerChannel; i++) { + DPRINTF(DRAM, "Creating DRAM rank %d \n", i); + Rank* rank = new Rank(_p, i, *this); + ranks.push_back(rank); + } + + // determine the dram actual capacity from the DRAM config in Mbytes + uint64_t deviceCapacity = deviceSize / (1024 * 1024) * devicesPerRank * + ranksPerChannel; + + uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size()); + + DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity, + AbstractMemory::size()); + + // if actual DRAM size does not match memory capacity in system warn! + if (deviceCapacity != capacity / (1024 * 1024)) + warn("DRAM device capacity (%d Mbytes) does not match the " + "address range assigned (%d Mbytes)\n", deviceCapacity, + capacity / (1024 * 1024)); + + DPRINTF(DRAM, "Row buffer size %d bytes with %d bursts per row buffer\n", + rowBufferSize, burstsPerRowBuffer); + + rowsPerBank = capacity / (rowBufferSize * banksPerRank * ranksPerChannel); + + // some basic sanity checks + if (tREFI <= tRP || tREFI <= tRFC) { + fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n", + tREFI, tRP, tRFC); + } + + // basic bank group architecture checks -> + if (bankGroupArch) { + // must have at least one bank per bank group + if (bankGroupsPerRank > banksPerRank) { + fatal("banks per rank (%d) must be equal to or larger than " + "banks groups per rank (%d)\n", + banksPerRank, bankGroupsPerRank); + } + // must have same number of banks in each bank group + if ((banksPerRank % bankGroupsPerRank) != 0) { + fatal("Banks per rank (%d) must be evenly divisible by bank " + "groups per rank (%d) for equal banks per bank group\n", + banksPerRank, bankGroupsPerRank); + } + // tCCD_L should be greater than minimal, back-to-back burst delay + if (tCCD_L <= tBURST) { + fatal("tCCD_L (%d) should be larger than the minimum bus delay " + "(%d) when bank groups per rank (%d) is greater than 1\n", + tCCD_L, tBURST, bankGroupsPerRank); + } + // tCCD_L_WR should be greater than minimal, back-to-back burst delay + if (tCCD_L_WR <= tBURST) { + fatal("tCCD_L_WR (%d) should be larger than the minimum bus delay " + " (%d) when bank groups per rank (%d) is greater than 1\n", + tCCD_L_WR, tBURST, bankGroupsPerRank); + } + // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay + // some datasheets might specify it equal to tRRD + if (tRRD_L < tRRD) { + fatal("tRRD_L (%d) should be larger than tRRD (%d) when " + "bank groups per rank (%d) is greater than 1\n", + tRRD_L, tRRD, bankGroupsPerRank); + } + } +} + +void +DRAMInterface::init() +{ + AbstractMemory::init(); + + // a bit of sanity checks on the interleaving, save it for here to + // ensure that the system pointer is initialised + if (range.interleaved()) { + if (addrMapping == Enums::RoRaBaChCo) { + if (rowBufferSize != range.granularity()) { + fatal("Channel interleaving of %s doesn't match RoRaBaChCo " + "address map\n", name()); + } + } else if (addrMapping == Enums::RoRaBaCoCh || + addrMapping == Enums::RoCoRaBaCh) { + // for the interleavings with channel bits in the bottom, + // if the system uses a channel striping granularity that + // is larger than the DRAM burst size, then map the + // sequential accesses within a stripe to a number of + // columns in the DRAM, effectively placing some of the + // lower-order column bits as the least-significant bits + // of the address (above the ones denoting the burst size) + assert(burstsPerStripe >= 1); + + // channel striping has to be done at a granularity that + // is equal or larger to a cache line + if (system()->cacheLineSize() > range.granularity()) { + fatal("Channel interleaving of %s must be at least as large " + "as the cache line size\n", name()); + } + + // ...and equal or smaller than the row-buffer size + if (rowBufferSize < range.granularity()) { + fatal("Channel interleaving of %s must be at most as large " + "as the row-buffer size\n", name()); + } + // this is essentially the check above, so just to be sure + assert(burstsPerStripe <= burstsPerRowBuffer); + } + } +} + +void +DRAMInterface::startup() +{ + if (system()->isTimingMode()) { + // timestamp offset should be in clock cycles for DRAMPower + timeStampOffset = divCeil(curTick(), tCK); + + for (auto r : ranks) { + r->startup(curTick() + tREFI - tRP); + } + } +} + +bool +DRAMInterface::isBusy() +{ + int busy_ranks = 0; + for (auto r : ranks) { + if (!r->inRefIdleState()) { + if (r->pwrState != PWR_SREF) { + // rank is busy refreshing + DPRINTF(DRAMState, "Rank %d is not available\n", r->rank); + busy_ranks++; + + // let the rank know that if it was waiting to drain, it + // is now done and ready to proceed + r->checkDrainDone(); + } + + // check if we were in self-refresh and haven't started + // to transition out + if ((r->pwrState == PWR_SREF) && r->inLowPowerState) { + DPRINTF(DRAMState, "Rank %d is in self-refresh\n", r->rank); + // if we have commands queued to this rank and we don't have + // a minimum number of active commands enqueued, + // exit self-refresh + if (r->forceSelfRefreshExit()) { + DPRINTF(DRAMState, "rank %d was in self refresh and" + " should wake up\n", r->rank); + //wake up from self-refresh + r->scheduleWakeUpEvent(tXS); + // things are brought back into action once a refresh is + // performed after self-refresh + // continue with selection for other ranks + } + } + } + } + return (busy_ranks == ranksPerChannel); +} + +void DRAMInterface::setupRank(const uint8_t rank, const bool is_read) +{ + // increment entry count of the rank based on packet type + if (is_read) { + ++ranks[rank]->readEntries; + } else { + ++ranks[rank]->writeEntries; + } +} + +void +DRAMInterface::respondEvent(uint8_t rank) +{ + Rank& rank_ref = *ranks[rank]; + + // if a read has reached its ready-time, decrement the number of reads + // At this point the packet has been handled and there is a possibility + // to switch to low-power mode if no other packet is available + --rank_ref.readEntries; + DPRINTF(DRAM, "number of read entries for rank %d is %d\n", + rank, rank_ref.readEntries); + + // counter should at least indicate one outstanding request + // for this read + assert(rank_ref.outstandingEvents > 0); + // read response received, decrement count + --rank_ref.outstandingEvents; + + // at this moment should not have transitioned to a low-power state + assert((rank_ref.pwrState != PWR_SREF) && + (rank_ref.pwrState != PWR_PRE_PDN) && + (rank_ref.pwrState != PWR_ACT_PDN)); + + // track if this is the last packet before idling + // and that there are no outstanding commands to this rank + if (rank_ref.isQueueEmpty() && rank_ref.outstandingEvents == 0 && + rank_ref.inRefIdleState() && enableDRAMPowerdown) { + // verify that there are no events scheduled + assert(!rank_ref.activateEvent.scheduled()); + assert(!rank_ref.prechargeEvent.scheduled()); + + // if coming from active state, schedule power event to + // active power-down else go to precharge power-down + DPRINTF(DRAMState, "Rank %d sleep at tick %d; current power state is " + "%d\n", rank, curTick(), rank_ref.pwrState); + + // default to ACT power-down unless already in IDLE state + // could be in IDLE if PRE issued before data returned + PowerState next_pwr_state = PWR_ACT_PDN; + if (rank_ref.pwrState == PWR_IDLE) { + next_pwr_state = PWR_PRE_PDN; + } + + rank_ref.powerDownSleep(next_pwr_state, curTick()); + } +} + +void +DRAMInterface::checkRefreshState(uint8_t rank) +{ + Rank& rank_ref = *ranks[rank]; + + if ((rank_ref.refreshState == REF_PRE) && + !rank_ref.prechargeEvent.scheduled()) { + // kick the refresh event loop into action again if banks already + // closed and just waiting for read to complete + schedule(rank_ref.refreshEvent, curTick()); + } +} + +void +DRAMInterface::drainRanks() +{ + // also need to kick off events to exit self-refresh + for (auto r : ranks) { + // force self-refresh exit, which in turn will issue auto-refresh + if (r->pwrState == PWR_SREF) { + DPRINTF(DRAM,"Rank%d: Forcing self-refresh wakeup in drain\n", + r->rank); + r->scheduleWakeUpEvent(tXS); + } + } +} + +bool +DRAMInterface::allRanksDrained() const +{ + // true until proven false + bool all_ranks_drained = true; + for (auto r : ranks) { + // then verify that the power state is IDLE ensuring all banks are + // closed and rank is not in a low power state. Also verify that rank + // is idle from a refresh point of view. + all_ranks_drained = r->inPwrIdleState() && r->inRefIdleState() && + all_ranks_drained; + } + return all_ranks_drained; +} + +void +DRAMInterface::suspend() +{ + for (auto r : ranks) { + r->suspend(); + } +} + +pair, bool> +DRAMInterface::minBankPrep(const MemPacketQueue& queue, + Tick min_col_at) const +{ + Tick min_act_at = MaxTick; + vector bank_mask(ranksPerChannel, 0); + + // latest Tick for which ACT can occur without incurring additoinal + // delay on the data bus + const Tick hidden_act_max = std::max(min_col_at - tRCD, curTick()); + + // Flag condition when burst can issue back-to-back with previous burst + bool found_seamless_bank = false; + + // Flag condition when bank can be opened without incurring additional + // delay on the data bus + bool hidden_bank_prep = false; + + // determine if we have queued transactions targetting the + // bank in question + vector got_waiting(ranksPerChannel * banksPerRank, false); + for (const auto& p : queue) { + if (p->isDram() && ranks[p->rank]->inRefIdleState()) + got_waiting[p->bankId] = true; + } + + // Find command with optimal bank timing + // Will prioritize commands that can issue seamlessly. + for (int i = 0; i < ranksPerChannel; i++) { + for (int j = 0; j < banksPerRank; j++) { + uint16_t bank_id = i * banksPerRank + j; + + // if we have waiting requests for the bank, and it is + // amongst the first available, update the mask + if (got_waiting[bank_id]) { + // make sure this rank is not currently refreshing. + assert(ranks[i]->inRefIdleState()); + // simplistic approximation of when the bank can issue + // an activate, ignoring any rank-to-rank switching + // cost in this calculation + Tick act_at = ranks[i]->banks[j].openRow == Bank::NO_ROW ? + std::max(ranks[i]->banks[j].actAllowedAt, curTick()) : + std::max(ranks[i]->banks[j].preAllowedAt, curTick()) + tRP; + + // When is the earliest the R/W burst can issue? + const Tick col_allowed_at = ctrl->inReadBusState(false) ? + ranks[i]->banks[j].rdAllowedAt : + ranks[i]->banks[j].wrAllowedAt; + Tick col_at = std::max(col_allowed_at, act_at + tRCD); + + // bank can issue burst back-to-back (seamlessly) with + // previous burst + bool new_seamless_bank = col_at <= min_col_at; + + // if we found a new seamless bank or we have no + // seamless banks, and got a bank with an earlier + // activate time, it should be added to the bit mask + if (new_seamless_bank || + (!found_seamless_bank && act_at <= min_act_at)) { + // if we did not have a seamless bank before, and + // we do now, reset the bank mask, also reset it + // if we have not yet found a seamless bank and + // the activate time is smaller than what we have + // seen so far + if (!found_seamless_bank && + (new_seamless_bank || act_at < min_act_at)) { + std::fill(bank_mask.begin(), bank_mask.end(), 0); + } + + found_seamless_bank |= new_seamless_bank; + + // ACT can occur 'behind the scenes' + hidden_bank_prep = act_at <= hidden_act_max; + + // set the bit corresponding to the available bank + replaceBits(bank_mask[i], j, j, 1); + min_act_at = act_at; + } + } + } + } + + return make_pair(bank_mask, hidden_bank_prep); +} + +DRAMInterface* +DRAMInterfaceParams::create() +{ + return new DRAMInterface(this); +} + +DRAMInterface::Rank::Rank(const DRAMInterfaceParams* _p, + int _rank, DRAMInterface& _dram) + : EventManager(&_dram), dram(_dram), + pwrStateTrans(PWR_IDLE), pwrStatePostRefresh(PWR_IDLE), + pwrStateTick(0), refreshDueAt(0), pwrState(PWR_IDLE), + refreshState(REF_IDLE), inLowPowerState(false), rank(_rank), + readEntries(0), writeEntries(0), outstandingEvents(0), + wakeUpAllowedAt(0), power(_p, false), banks(_p->banks_per_rank), + numBanksActive(0), actTicks(_p->activation_limit, 0), lastBurstTick(0), + writeDoneEvent([this]{ processWriteDoneEvent(); }, name()), + activateEvent([this]{ processActivateEvent(); }, name()), + prechargeEvent([this]{ processPrechargeEvent(); }, name()), + refreshEvent([this]{ processRefreshEvent(); }, name()), + powerEvent([this]{ processPowerEvent(); }, name()), + wakeUpEvent([this]{ processWakeUpEvent(); }, name()), + stats(_dram, *this) +{ + for (int b = 0; b < _p->banks_per_rank; b++) { + banks[b].bank = b; + // GDDR addressing of banks to BG is linear. + // Here we assume that all DRAM generations address bank groups as + // follows: + if (_p->bank_groups_per_rank > 0) { + // Simply assign lower bits to bank group in order to + // rotate across bank groups as banks are incremented + // e.g. with 4 banks per bank group and 16 banks total: + // banks 0,4,8,12 are in bank group 0 + // banks 1,5,9,13 are in bank group 1 + // banks 2,6,10,14 are in bank group 2 + // banks 3,7,11,15 are in bank group 3 + banks[b].bankgr = b % _p->bank_groups_per_rank; + } else { + // No bank groups; simply assign to bank number + banks[b].bankgr = b; + } + } +} + +void +DRAMInterface::Rank::startup(Tick ref_tick) +{ + assert(ref_tick > curTick()); + + pwrStateTick = curTick(); + + // kick off the refresh, and give ourselves enough time to + // precharge + schedule(refreshEvent, ref_tick); +} + +void +DRAMInterface::Rank::suspend() +{ + deschedule(refreshEvent); + + // Update the stats + updatePowerStats(); + + // don't automatically transition back to LP state after next REF + pwrStatePostRefresh = PWR_IDLE; +} + +bool +DRAMInterface::Rank::isQueueEmpty() const +{ + // check commmands in Q based on current bus direction + bool no_queued_cmds = (dram.ctrl->inReadBusState(true) && + (readEntries == 0)) + || (dram.ctrl->inWriteBusState(true) && + (writeEntries == 0)); + return no_queued_cmds; +} + +void +DRAMInterface::Rank::checkDrainDone() +{ + // if this rank was waiting to drain it is now able to proceed to + // precharge + if (refreshState == REF_DRAIN) { + DPRINTF(DRAM, "Refresh drain done, now precharging\n"); + + refreshState = REF_PD_EXIT; + + // hand control back to the refresh event loop + schedule(refreshEvent, curTick()); + } +} + +void +DRAMInterface::Rank::flushCmdList() +{ + // at the moment sort the list of commands and update the counters + // for DRAMPower libray when doing a refresh + sort(cmdList.begin(), cmdList.end(), DRAMInterface::sortTime); + + auto next_iter = cmdList.begin(); + // push to commands to DRAMPower + for ( ; next_iter != cmdList.end() ; ++next_iter) { + Command cmd = *next_iter; + if (cmd.timeStamp <= curTick()) { + // Move all commands at or before curTick to DRAMPower + power.powerlib.doCommand(cmd.type, cmd.bank, + divCeil(cmd.timeStamp, dram.tCK) - + dram.timeStampOffset); + } else { + // done - found all commands at or before curTick() + // next_iter references the 1st command after curTick + break; + } + } + // reset cmdList to only contain commands after curTick + // if there are no commands after curTick, updated cmdList will be empty + // in this case, next_iter is cmdList.end() + cmdList.assign(next_iter, cmdList.end()); +} + +void +DRAMInterface::Rank::processActivateEvent() +{ + // we should transition to the active state as soon as any bank is active + if (pwrState != PWR_ACT) + // note that at this point numBanksActive could be back at + // zero again due to a precharge scheduled in the future + schedulePowerEvent(PWR_ACT, curTick()); +} + +void +DRAMInterface::Rank::processPrechargeEvent() +{ + // counter should at least indicate one outstanding request + // for this precharge + assert(outstandingEvents > 0); + // precharge complete, decrement count + --outstandingEvents; + + // if we reached zero, then special conditions apply as we track + // if all banks are precharged for the power models + if (numBanksActive == 0) { + // no reads to this rank in the Q and no pending + // RD/WR or refresh commands + if (isQueueEmpty() && outstandingEvents == 0 && + dram.enableDRAMPowerdown) { + // should still be in ACT state since bank still open + assert(pwrState == PWR_ACT); + + // All banks closed - switch to precharge power down state. + DPRINTF(DRAMState, "Rank %d sleep at tick %d\n", + rank, curTick()); + powerDownSleep(PWR_PRE_PDN, curTick()); + } else { + // we should transition to the idle state when the last bank + // is precharged + schedulePowerEvent(PWR_IDLE, curTick()); + } + } +} + +void +DRAMInterface::Rank::processWriteDoneEvent() +{ + // counter should at least indicate one outstanding request + // for this write + assert(outstandingEvents > 0); + // Write transfer on bus has completed + // decrement per rank counter + --outstandingEvents; +} + +void +DRAMInterface::Rank::processRefreshEvent() +{ + // when first preparing the refresh, remember when it was due + if ((refreshState == REF_IDLE) || (refreshState == REF_SREF_EXIT)) { + // remember when the refresh is due + refreshDueAt = curTick(); + + // proceed to drain + refreshState = REF_DRAIN; + + // make nonzero while refresh is pending to ensure + // power down and self-refresh are not entered + ++outstandingEvents; + + DPRINTF(DRAM, "Refresh due\n"); + } + + // let any scheduled read or write to the same rank go ahead, + // after which it will + // hand control back to this event loop + if (refreshState == REF_DRAIN) { + // if a request is at the moment being handled and this request is + // accessing the current rank then wait for it to finish + if ((rank == dram.activeRank) + && (dram.ctrl->requestEventScheduled())) { + // hand control over to the request loop until it is + // evaluated next + DPRINTF(DRAM, "Refresh awaiting draining\n"); + + return; + } else { + refreshState = REF_PD_EXIT; + } + } + + // at this point, ensure that rank is not in a power-down state + if (refreshState == REF_PD_EXIT) { + // if rank was sleeping and we have't started exit process, + // wake-up for refresh + if (inLowPowerState) { + DPRINTF(DRAM, "Wake Up for refresh\n"); + // save state and return after refresh completes + scheduleWakeUpEvent(dram.tXP); + return; + } else { + refreshState = REF_PRE; + } + } + + // at this point, ensure that all banks are precharged + if (refreshState == REF_PRE) { + // precharge any active bank + if (numBanksActive != 0) { + // at the moment, we use a precharge all even if there is + // only a single bank open + DPRINTF(DRAM, "Precharging all\n"); + + // first determine when we can precharge + Tick pre_at = curTick(); + + for (auto &b : banks) { + // respect both causality and any existing bank + // constraints, some banks could already have a + // (auto) precharge scheduled + pre_at = std::max(b.preAllowedAt, pre_at); + } + + // make sure all banks per rank are precharged, and for those that + // already are, update their availability + Tick act_allowed_at = pre_at + dram.tRP; + + for (auto &b : banks) { + if (b.openRow != Bank::NO_ROW) { + dram.prechargeBank(*this, b, pre_at, true, false); + } else { + b.actAllowedAt = std::max(b.actAllowedAt, act_allowed_at); + b.preAllowedAt = std::max(b.preAllowedAt, pre_at); + } + } + + // precharge all banks in rank + cmdList.push_back(Command(MemCommand::PREA, 0, pre_at)); + + DPRINTF(DRAMPower, "%llu,PREA,0,%d\n", + divCeil(pre_at, dram.tCK) - + dram.timeStampOffset, rank); + } else if ((pwrState == PWR_IDLE) && (outstandingEvents == 1)) { + // Banks are closed, have transitioned to IDLE state, and + // no outstanding ACT,RD/WR,Auto-PRE sequence scheduled + DPRINTF(DRAM, "All banks already precharged, starting refresh\n"); + + // go ahead and kick the power state machine into gear since + // we are already idle + schedulePowerEvent(PWR_REF, curTick()); + } else { + // banks state is closed but haven't transitioned pwrState to IDLE + // or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled + // should have outstanding precharge or read response event + assert(prechargeEvent.scheduled() || + dram.ctrl->respondEventScheduled()); + // will start refresh when pwrState transitions to IDLE + } + + assert(numBanksActive == 0); + + // wait for all banks to be precharged or read to complete + // When precharge commands are done, power state machine will + // transition to the idle state, and automatically move to a + // refresh, at that point it will also call this method to get + // the refresh event loop going again + // Similarly, when read response completes, if all banks are + // precharged, will call this method to get loop re-started + return; + } + + // last but not least we perform the actual refresh + if (refreshState == REF_START) { + // should never get here with any banks active + assert(numBanksActive == 0); + assert(pwrState == PWR_REF); + + Tick ref_done_at = curTick() + dram.tRFC; + + for (auto &b : banks) { + b.actAllowedAt = ref_done_at; + } + + // at the moment this affects all ranks + cmdList.push_back(Command(MemCommand::REF, 0, curTick())); + + // Update the stats + updatePowerStats(); + + DPRINTF(DRAMPower, "%llu,REF,0,%d\n", divCeil(curTick(), dram.tCK) - + dram.timeStampOffset, rank); + + // Update for next refresh + refreshDueAt += dram.tREFI; + + // make sure we did not wait so long that we cannot make up + // for it + if (refreshDueAt < ref_done_at) { + fatal("Refresh was delayed so long we cannot catch up\n"); + } + + // Run the refresh and schedule event to transition power states + // when refresh completes + refreshState = REF_RUN; + schedule(refreshEvent, ref_done_at); + return; + } + + if (refreshState == REF_RUN) { + // should never get here with any banks active + assert(numBanksActive == 0); + assert(pwrState == PWR_REF); + + assert(!powerEvent.scheduled()); + + if ((dram.ctrl->drainState() == DrainState::Draining) || + (dram.ctrl->drainState() == DrainState::Drained)) { + // if draining, do not re-enter low-power mode. + // simply go to IDLE and wait + schedulePowerEvent(PWR_IDLE, curTick()); + } else { + // At the moment, we sleep when the refresh ends and wait to be + // woken up again if previously in a low-power state. + if (pwrStatePostRefresh != PWR_IDLE) { + // power State should be power Refresh + assert(pwrState == PWR_REF); + DPRINTF(DRAMState, "Rank %d sleeping after refresh and was in " + "power state %d before refreshing\n", rank, + pwrStatePostRefresh); + powerDownSleep(pwrState, curTick()); + + // Force PRE power-down if there are no outstanding commands + // in Q after refresh. + } else if (isQueueEmpty() && dram.enableDRAMPowerdown) { + // still have refresh event outstanding but there should + // be no other events outstanding + assert(outstandingEvents == 1); + DPRINTF(DRAMState, "Rank %d sleeping after refresh but was NOT" + " in a low power state before refreshing\n", rank); + powerDownSleep(PWR_PRE_PDN, curTick()); + + } else { + // move to the idle power state once the refresh is done, this + // will also move the refresh state machine to the refresh + // idle state + schedulePowerEvent(PWR_IDLE, curTick()); + } + } + + // At this point, we have completed the current refresh. + // In the SREF bypass case, we do not get to this state in the + // refresh STM and therefore can always schedule next event. + // Compensate for the delay in actually performing the refresh + // when scheduling the next one + schedule(refreshEvent, refreshDueAt - dram.tRP); + + DPRINTF(DRAMState, "Refresh done at %llu and next refresh" + " at %llu\n", curTick(), refreshDueAt); + } +} + +void +DRAMInterface::Rank::schedulePowerEvent(PowerState pwr_state, Tick tick) +{ + // respect causality + assert(tick >= curTick()); + + if (!powerEvent.scheduled()) { + DPRINTF(DRAMState, "Scheduling power event at %llu to state %d\n", + tick, pwr_state); + + // insert the new transition + pwrStateTrans = pwr_state; + + schedule(powerEvent, tick); + } else { + panic("Scheduled power event at %llu to state %d, " + "with scheduled event at %llu to %d\n", tick, pwr_state, + powerEvent.when(), pwrStateTrans); + } +} + +void +DRAMInterface::Rank::powerDownSleep(PowerState pwr_state, Tick tick) +{ + // if low power state is active low, schedule to active low power state. + // in reality tCKE is needed to enter active low power. This is neglected + // here and could be added in the future. + if (pwr_state == PWR_ACT_PDN) { + schedulePowerEvent(pwr_state, tick); + // push command to DRAMPower + cmdList.push_back(Command(MemCommand::PDN_F_ACT, 0, tick)); + DPRINTF(DRAMPower, "%llu,PDN_F_ACT,0,%d\n", divCeil(tick, + dram.tCK) - dram.timeStampOffset, rank); + } else if (pwr_state == PWR_PRE_PDN) { + // if low power state is precharge low, schedule to precharge low + // power state. In reality tCKE is needed to enter active low power. + // This is neglected here. + schedulePowerEvent(pwr_state, tick); + //push Command to DRAMPower + cmdList.push_back(Command(MemCommand::PDN_F_PRE, 0, tick)); + DPRINTF(DRAMPower, "%llu,PDN_F_PRE,0,%d\n", divCeil(tick, + dram.tCK) - dram.timeStampOffset, rank); + } else if (pwr_state == PWR_REF) { + // if a refresh just occurred + // transition to PRE_PDN now that all banks are closed + // precharge power down requires tCKE to enter. For simplicity + // this is not considered. + schedulePowerEvent(PWR_PRE_PDN, tick); + //push Command to DRAMPower + cmdList.push_back(Command(MemCommand::PDN_F_PRE, 0, tick)); + DPRINTF(DRAMPower, "%llu,PDN_F_PRE,0,%d\n", divCeil(tick, + dram.tCK) - dram.timeStampOffset, rank); + } else if (pwr_state == PWR_SREF) { + // should only enter SREF after PRE-PD wakeup to do a refresh + assert(pwrStatePostRefresh == PWR_PRE_PDN); + // self refresh requires time tCKESR to enter. For simplicity, + // this is not considered. + schedulePowerEvent(PWR_SREF, tick); + // push Command to DRAMPower + cmdList.push_back(Command(MemCommand::SREN, 0, tick)); + DPRINTF(DRAMPower, "%llu,SREN,0,%d\n", divCeil(tick, + dram.tCK) - dram.timeStampOffset, rank); + } + // Ensure that we don't power-down and back up in same tick + // Once we commit to PD entry, do it and wait for at least 1tCK + // This could be replaced with tCKE if/when that is added to the model + wakeUpAllowedAt = tick + dram.tCK; + + // Transitioning to a low power state, set flag + inLowPowerState = true; +} + +void +DRAMInterface::Rank::scheduleWakeUpEvent(Tick exit_delay) +{ + Tick wake_up_tick = std::max(curTick(), wakeUpAllowedAt); + + DPRINTF(DRAMState, "Scheduling wake-up for rank %d at tick %d\n", + rank, wake_up_tick); + + // if waking for refresh, hold previous state + // else reset state back to IDLE + if (refreshState == REF_PD_EXIT) { + pwrStatePostRefresh = pwrState; + } else { + // don't automatically transition back to LP state after next REF + pwrStatePostRefresh = PWR_IDLE; + } + + // schedule wake-up with event to ensure entry has completed before + // we try to wake-up + schedule(wakeUpEvent, wake_up_tick); + + for (auto &b : banks) { + // respect both causality and any existing bank + // constraints, some banks could already have a + // (auto) precharge scheduled + b.wrAllowedAt = std::max(wake_up_tick + exit_delay, b.wrAllowedAt); + b.rdAllowedAt = std::max(wake_up_tick + exit_delay, b.rdAllowedAt); + b.preAllowedAt = std::max(wake_up_tick + exit_delay, b.preAllowedAt); + b.actAllowedAt = std::max(wake_up_tick + exit_delay, b.actAllowedAt); + } + // Transitioning out of low power state, clear flag + inLowPowerState = false; + + // push to DRAMPower + // use pwrStateTrans for cases where we have a power event scheduled + // to enter low power that has not yet been processed + if (pwrStateTrans == PWR_ACT_PDN) { + cmdList.push_back(Command(MemCommand::PUP_ACT, 0, wake_up_tick)); + DPRINTF(DRAMPower, "%llu,PUP_ACT,0,%d\n", divCeil(wake_up_tick, + dram.tCK) - dram.timeStampOffset, rank); + + } else if (pwrStateTrans == PWR_PRE_PDN) { + cmdList.push_back(Command(MemCommand::PUP_PRE, 0, wake_up_tick)); + DPRINTF(DRAMPower, "%llu,PUP_PRE,0,%d\n", divCeil(wake_up_tick, + dram.tCK) - dram.timeStampOffset, rank); + } else if (pwrStateTrans == PWR_SREF) { + cmdList.push_back(Command(MemCommand::SREX, 0, wake_up_tick)); + DPRINTF(DRAMPower, "%llu,SREX,0,%d\n", divCeil(wake_up_tick, + dram.tCK) - dram.timeStampOffset, rank); + } +} + +void +DRAMInterface::Rank::processWakeUpEvent() +{ + // Should be in a power-down or self-refresh state + assert((pwrState == PWR_ACT_PDN) || (pwrState == PWR_PRE_PDN) || + (pwrState == PWR_SREF)); + + // Check current state to determine transition state + if (pwrState == PWR_ACT_PDN) { + // banks still open, transition to PWR_ACT + schedulePowerEvent(PWR_ACT, curTick()); + } else { + // transitioning from a precharge power-down or self-refresh state + // banks are closed - transition to PWR_IDLE + schedulePowerEvent(PWR_IDLE, curTick()); + } +} + +void +DRAMInterface::Rank::processPowerEvent() +{ + assert(curTick() >= pwrStateTick); + // remember where we were, and for how long + Tick duration = curTick() - pwrStateTick; + PowerState prev_state = pwrState; + + // update the accounting + stats.pwrStateTime[prev_state] += duration; + + // track to total idle time + if ((prev_state == PWR_PRE_PDN) || (prev_state == PWR_ACT_PDN) || + (prev_state == PWR_SREF)) { + stats.totalIdleTime += duration; + } + + pwrState = pwrStateTrans; + pwrStateTick = curTick(); + + // if rank was refreshing, make sure to start scheduling requests again + if (prev_state == PWR_REF) { + // bus IDLED prior to REF + // counter should be one for refresh command only + assert(outstandingEvents == 1); + // REF complete, decrement count and go back to IDLE + --outstandingEvents; + refreshState = REF_IDLE; + + DPRINTF(DRAMState, "Was refreshing for %llu ticks\n", duration); + // if moving back to power-down after refresh + if (pwrState != PWR_IDLE) { + assert(pwrState == PWR_PRE_PDN); + DPRINTF(DRAMState, "Switching to power down state after refreshing" + " rank %d at %llu tick\n", rank, curTick()); + } + + // completed refresh event, ensure next request is scheduled + if (!dram.ctrl->requestEventScheduled()) { + DPRINTF(DRAM, "Scheduling next request after refreshing" + " rank %d\n", rank); + dram.ctrl->restartScheduler(curTick()); + } + } + + if ((pwrState == PWR_ACT) && (refreshState == REF_PD_EXIT)) { + // have exited ACT PD + assert(prev_state == PWR_ACT_PDN); + + // go back to REF event and close banks + refreshState = REF_PRE; + schedule(refreshEvent, curTick()); + } else if (pwrState == PWR_IDLE) { + DPRINTF(DRAMState, "All banks precharged\n"); + if (prev_state == PWR_SREF) { + // set refresh state to REF_SREF_EXIT, ensuring inRefIdleState + // continues to return false during tXS after SREF exit + // Schedule a refresh which kicks things back into action + // when it finishes + refreshState = REF_SREF_EXIT; + schedule(refreshEvent, curTick() + dram.tXS); + } else { + // if we have a pending refresh, and are now moving to + // the idle state, directly transition to, or schedule refresh + if ((refreshState == REF_PRE) || (refreshState == REF_PD_EXIT)) { + // ensure refresh is restarted only after final PRE command. + // do not restart refresh if controller is in an intermediate + // state, after PRE_PDN exit, when banks are IDLE but an + // ACT is scheduled. + if (!activateEvent.scheduled()) { + // there should be nothing waiting at this point + assert(!powerEvent.scheduled()); + if (refreshState == REF_PD_EXIT) { + // exiting PRE PD, will be in IDLE until tXP expires + // and then should transition to PWR_REF state + assert(prev_state == PWR_PRE_PDN); + schedulePowerEvent(PWR_REF, curTick() + dram.tXP); + } else if (refreshState == REF_PRE) { + // can directly move to PWR_REF state and proceed below + pwrState = PWR_REF; + } + } else { + // must have PRE scheduled to transition back to IDLE + // and re-kick off refresh + assert(prechargeEvent.scheduled()); + } + } + } + } + + // transition to the refresh state and re-start refresh process + // refresh state machine will schedule the next power state transition + if (pwrState == PWR_REF) { + // completed final PRE for refresh or exiting power-down + assert(refreshState == REF_PRE || refreshState == REF_PD_EXIT); + + // exited PRE PD for refresh, with no pending commands + // bypass auto-refresh and go straight to SREF, where memory + // will issue refresh immediately upon entry + if (pwrStatePostRefresh == PWR_PRE_PDN && isQueueEmpty() && + (dram.ctrl->drainState() != DrainState::Draining) && + (dram.ctrl->drainState() != DrainState::Drained) && + dram.enableDRAMPowerdown) { + DPRINTF(DRAMState, "Rank %d bypassing refresh and transitioning " + "to self refresh at %11u tick\n", rank, curTick()); + powerDownSleep(PWR_SREF, curTick()); + + // Since refresh was bypassed, remove event by decrementing count + assert(outstandingEvents == 1); + --outstandingEvents; + + // reset state back to IDLE temporarily until SREF is entered + pwrState = PWR_IDLE; + + // Not bypassing refresh for SREF entry + } else { + DPRINTF(DRAMState, "Refreshing\n"); + + // there should be nothing waiting at this point + assert(!powerEvent.scheduled()); + + // kick the refresh event loop into action again, and that + // in turn will schedule a transition to the idle power + // state once the refresh is done + schedule(refreshEvent, curTick()); + + // Banks transitioned to IDLE, start REF + refreshState = REF_START; + } + } + +} + +void +DRAMInterface::Rank::updatePowerStats() +{ + // All commands up to refresh have completed + // flush cmdList to DRAMPower + flushCmdList(); + + // Call the function that calculates window energy at intermediate update + // events like at refresh, stats dump as well as at simulation exit. + // Window starts at the last time the calcWindowEnergy function was called + // and is upto current time. + power.powerlib.calcWindowEnergy(divCeil(curTick(), dram.tCK) - + dram.timeStampOffset); + + // Get the energy from DRAMPower + Data::MemoryPowerModel::Energy energy = power.powerlib.getEnergy(); + + // The energy components inside the power lib are calculated over + // the window so accumulate into the corresponding gem5 stat + stats.actEnergy += energy.act_energy * dram.devicesPerRank; + stats.preEnergy += energy.pre_energy * dram.devicesPerRank; + stats.readEnergy += energy.read_energy * dram.devicesPerRank; + stats.writeEnergy += energy.write_energy * dram.devicesPerRank; + stats.refreshEnergy += energy.ref_energy * dram.devicesPerRank; + stats.actBackEnergy += energy.act_stdby_energy * dram.devicesPerRank; + stats.preBackEnergy += energy.pre_stdby_energy * dram.devicesPerRank; + stats.actPowerDownEnergy += energy.f_act_pd_energy * dram.devicesPerRank; + stats.prePowerDownEnergy += energy.f_pre_pd_energy * dram.devicesPerRank; + stats.selfRefreshEnergy += energy.sref_energy * dram.devicesPerRank; + + // Accumulate window energy into the total energy. + stats.totalEnergy += energy.window_energy * dram.devicesPerRank; + // Average power must not be accumulated but calculated over the time + // since last stats reset. SimClock::Frequency is tick period not tick + // frequency. + // energy (pJ) 1e-9 + // power (mW) = ----------- * ---------- + // time (tick) tick_frequency + stats.averagePower = (stats.totalEnergy.value() / + (curTick() - dram.lastStatsResetTick)) * + (SimClock::Frequency / 1000000000.0); +} + +void +DRAMInterface::Rank::computeStats() +{ + DPRINTF(DRAM,"Computing stats due to a dump callback\n"); + + // Update the stats + updatePowerStats(); + + // final update of power state times + stats.pwrStateTime[pwrState] += (curTick() - pwrStateTick); + pwrStateTick = curTick(); +} + +void +DRAMInterface::Rank::resetStats() { + // The only way to clear the counters in DRAMPower is to call + // calcWindowEnergy function as that then calls clearCounters. The + // clearCounters method itself is private. + power.powerlib.calcWindowEnergy(divCeil(curTick(), dram.tCK) - + dram.timeStampOffset); + +} + +bool +DRAMInterface::Rank::forceSelfRefreshExit() const { + return (readEntries != 0) || + (dram.ctrl->inWriteBusState(true) && (writeEntries != 0)); +} + +void +DRAMInterface::DRAMStats::resetStats() +{ + dram.lastStatsResetTick = curTick(); +} + +DRAMInterface::DRAMStats::DRAMStats(DRAMInterface &_dram) + : Stats::Group(&_dram), + dram(_dram), + + ADD_STAT(readBursts, "Number of DRAM read bursts"), + ADD_STAT(writeBursts, "Number of DRAM write bursts"), + + ADD_STAT(perBankRdBursts, "Per bank write bursts"), + ADD_STAT(perBankWrBursts, "Per bank write bursts"), + + ADD_STAT(totQLat, "Total ticks spent queuing"), + ADD_STAT(totBusLat, "Total ticks spent in databus transfers"), + ADD_STAT(totMemAccLat, + "Total ticks spent from burst creation until serviced " + "by the DRAM"), + + ADD_STAT(avgQLat, "Average queueing delay per DRAM burst"), + ADD_STAT(avgBusLat, "Average bus latency per DRAM burst"), + ADD_STAT(avgMemAccLat, "Average memory access latency per DRAM burst"), + + ADD_STAT(readRowHits, "Number of row buffer hits during reads"), + ADD_STAT(writeRowHits, "Number of row buffer hits during writes"), + ADD_STAT(readRowHitRate, "Row buffer hit rate for reads"), + ADD_STAT(writeRowHitRate, "Row buffer hit rate for writes"), + + ADD_STAT(bytesPerActivate, "Bytes accessed per row activation"), + ADD_STAT(bytesRead, "Total number of bytes read from DRAM"), + ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"), + ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiBytes/s"), + ADD_STAT(avgWrBW, "Average DRAM write bandwidth in MiBytes/s"), + ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"), + + ADD_STAT(busUtil, "Data bus utilization in percentage"), + ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"), + ADD_STAT(busUtilWrite, "Data bus utilization in percentage for writes"), + + ADD_STAT(pageHitRate, "Row buffer hit rate, read and write combined") + +{ +} + +void +DRAMInterface::DRAMStats::regStats() +{ + using namespace Stats; + + avgQLat.precision(2); + avgBusLat.precision(2); + avgMemAccLat.precision(2); + + readRowHitRate.precision(2); + writeRowHitRate.precision(2); + + perBankRdBursts.init(dram.banksPerRank * dram.ranksPerChannel); + perBankWrBursts.init(dram.banksPerRank * dram.ranksPerChannel); + + bytesPerActivate + .init(dram.maxAccessesPerRow ? + dram.maxAccessesPerRow : dram.rowBufferSize) + .flags(nozero); + + peakBW.precision(2); + busUtil.precision(2); + busUtilWrite.precision(2); + busUtilRead.precision(2); + + pageHitRate.precision(2); + + // Formula stats + avgQLat = totQLat / readBursts; + avgBusLat = totBusLat / readBursts; + avgMemAccLat = totMemAccLat / readBursts; + + readRowHitRate = (readRowHits / readBursts) * 100; + writeRowHitRate = (writeRowHits / writeBursts) * 100; + + avgRdBW = (bytesRead / 1000000) / simSeconds; + avgWrBW = (bytesWritten / 1000000) / simSeconds; + peakBW = (SimClock::Frequency / dram.burstDelay()) * + dram.bytesPerBurst() / 1000000; + + busUtil = (avgRdBW + avgWrBW) / peakBW * 100; + busUtilRead = avgRdBW / peakBW * 100; + busUtilWrite = avgWrBW / peakBW * 100; + + pageHitRate = (writeRowHits + readRowHits) / + (writeBursts + readBursts) * 100; +} + +DRAMInterface::RankStats::RankStats(DRAMInterface &_dram, Rank &_rank) + : Stats::Group(&_dram, csprintf("rank%d", _rank.rank).c_str()), + rank(_rank), + + ADD_STAT(actEnergy, "Energy for activate commands per rank (pJ)"), + ADD_STAT(preEnergy, "Energy for precharge commands per rank (pJ)"), + ADD_STAT(readEnergy, "Energy for read commands per rank (pJ)"), + ADD_STAT(writeEnergy, "Energy for write commands per rank (pJ)"), + ADD_STAT(refreshEnergy, "Energy for refresh commands per rank (pJ)"), + ADD_STAT(actBackEnergy, "Energy for active background per rank (pJ)"), + ADD_STAT(preBackEnergy, "Energy for precharge background per rank (pJ)"), + ADD_STAT(actPowerDownEnergy, + "Energy for active power-down per rank (pJ)"), + ADD_STAT(prePowerDownEnergy, + "Energy for precharge power-down per rank (pJ)"), + ADD_STAT(selfRefreshEnergy, "Energy for self refresh per rank (pJ)"), + + ADD_STAT(totalEnergy, "Total energy per rank (pJ)"), + ADD_STAT(averagePower, "Core power per rank (mW)"), + + ADD_STAT(totalIdleTime, "Total Idle time Per DRAM Rank"), + ADD_STAT(pwrStateTime, "Time in different power states") +{ +} + +void +DRAMInterface::RankStats::regStats() +{ + Stats::Group::regStats(); + + pwrStateTime + .init(6) + .subname(0, "IDLE") + .subname(1, "REF") + .subname(2, "SREF") + .subname(3, "PRE_PDN") + .subname(4, "ACT") + .subname(5, "ACT_PDN"); +} + +void +DRAMInterface::RankStats::resetStats() +{ + Stats::Group::resetStats(); + + rank.resetStats(); +} + +void +DRAMInterface::RankStats::preDumpStats() +{ + Stats::Group::preDumpStats(); + + rank.computeStats(); +} + +NVMInterface::NVMInterface(const NVMInterfaceParams* _p) + : MemInterface(_p), + maxPendingWrites(_p->max_pending_writes), + maxPendingReads(_p->max_pending_reads), + twoCycleRdWr(_p->two_cycle_rdwr), + tREAD(_p->tREAD), tWRITE(_p->tWRITE), tSEND(_p->tSEND), + stats(*this), + writeRespondEvent([this]{ processWriteRespondEvent(); }, name()), + readReadyEvent([this]{ processReadReadyEvent(); }, name()), + nextReadAt(0), numPendingReads(0), numReadDataReady(0), + numReadsToIssue(0), numWritesQueued(0) +{ + DPRINTF(NVM, "Setting up NVM Interface\n"); + + fatal_if(!isPowerOf2(burstSize), "NVM burst size %d is not allowed, " + "must be a power of two\n", burstSize); + + // sanity check the ranks since we rely on bit slicing for the + // address decoding + fatal_if(!isPowerOf2(ranksPerChannel), "NVM rank count of %d is " + "not allowed, must be a power of two\n", ranksPerChannel); + + for (int i =0; i < ranksPerChannel; i++) { + // Add NVM ranks to the system + DPRINTF(NVM, "Creating NVM rank %d \n", i); + Rank* rank = new Rank(_p, i, *this); + ranks.push_back(rank); + } + + uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size()); + + DPRINTF(NVM, "NVM capacity %lld (%lld) bytes\n", capacity, + AbstractMemory::size()); + + rowsPerBank = capacity / (rowBufferSize * + banksPerRank * ranksPerChannel); + +} + +NVMInterface* +NVMInterfaceParams::create() +{ + return new NVMInterface(this); +} + +NVMInterface::Rank::Rank(const NVMInterfaceParams* _p, + int _rank, NVMInterface& _nvm) + : EventManager(&_nvm), nvm(_nvm), rank(_rank), + banks(_p->banks_per_rank) +{ + for (int b = 0; b < _p->banks_per_rank; b++) { + banks[b].bank = b; + // No bank groups; simply assign to bank number + banks[b].bankgr = b; + } +} + +void +NVMInterface::init() +{ + AbstractMemory::init(); +} + +void NVMInterface::setupRank(const uint8_t rank, const bool is_read) +{ + if (is_read) { + // increment count to trigger read and track number of reads in Q + numReadsToIssue++; + } else { + // increment count to track number of writes in Q + numWritesQueued++; + } +} + +pair +NVMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const +{ + // remember if we found a hit, but one that cannit issue seamlessly + bool found_prepped_pkt = false; + + auto selected_pkt_it = queue.end(); + Tick selected_col_at = MaxTick; + + for (auto i = queue.begin(); i != queue.end() ; ++i) { + MemPacket* pkt = *i; + + // select optimal NVM packet in Q + if (!pkt->isDram()) { + const Bank& bank = ranks[pkt->rank]->banks[pkt->bank]; + const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt : + bank.wrAllowedAt; + + // check if rank is not doing a refresh and thus is available, + // if not, jump to the next packet + if (burstReady(pkt)) { + DPRINTF(NVM, "%s bank %d - Rank %d available\n", __func__, + pkt->bank, pkt->rank); + + // no additional rank-to-rank or media delays + if (col_allowed_at <= min_col_at) { + // FCFS within entries that can issue without + // additional delay, such as same rank accesses + // or media delay requirements + selected_pkt_it = i; + selected_col_at = col_allowed_at; + // no need to look through the remaining queue entries + DPRINTF(NVM, "%s Seamless buffer hit\n", __func__); + break; + } else if (!found_prepped_pkt) { + // packet is to prepped region but cannnot issue + // seamlessly; remember this one and continue + selected_pkt_it = i; + selected_col_at = col_allowed_at; + DPRINTF(NVM, "%s Prepped packet found \n", __func__); + found_prepped_pkt = true; + } + } else { + DPRINTF(NVM, "%s bank %d - Rank %d not available\n", __func__, + pkt->bank, pkt->rank); + } + } + } + + if (selected_pkt_it == queue.end()) { + DPRINTF(NVM, "%s no available NVM ranks found\n", __func__); + } + + return make_pair(selected_pkt_it, selected_col_at); +} + +void +NVMInterface::chooseRead(MemPacketQueue& queue) +{ + Tick cmd_at = std::max(curTick(), nextReadAt); + + // This method does the arbitration between non-deterministic read + // requests to NVM. The chosen packet is not removed from the queue + // at this time. Removal from the queue will occur when the data is + // ready and a separate SEND command is issued to retrieve it via the + // chooseNext function in the top-level controller. + assert(!queue.empty()); + + assert(numReadsToIssue > 0); + numReadsToIssue--; + // For simplicity, issue non-deterministic reads in order (fcfs) + for (auto i = queue.begin(); i != queue.end() ; ++i) { + MemPacket* pkt = *i; + + // Find 1st NVM read packet that hasn't issued read command + if (pkt->readyTime == MaxTick && !pkt->isDram() && pkt->isRead()) { + // get the bank + Bank& bank_ref = ranks[pkt->rank]->banks[pkt->bank]; + + // issueing a read, inc counter and verify we haven't overrun + numPendingReads++; + assert(numPendingReads <= maxPendingReads); + + // increment the bytes accessed and the accesses per row + bank_ref.bytesAccessed += burstSize; + + // Verify command bandiwth to issue + // Host can issue read immediately uith buffering closer + // to the NVM. The actual execution at the NVM may be delayed + // due to busy resources + if (twoCycleRdWr) { + cmd_at = ctrl->verifyMultiCmd(cmd_at, + maxCommandsPerWindow, tCK); + } else { + cmd_at = ctrl->verifySingleCmd(cmd_at, + maxCommandsPerWindow); + } + + // Update delay to next read + // Ensures single read command issued per cycle + nextReadAt = cmd_at + tCK; + + // If accessing a new location in this bank, update timing + // and stats + if (bank_ref.openRow != pkt->row) { + // update the open bank, re-using row field + bank_ref.openRow = pkt->row; + + // sample the bytes accessed to a buffer in this bank + // here when we are re-buffering the data + stats.bytesPerBank.sample(bank_ref.bytesAccessed); + // start counting anew + bank_ref.bytesAccessed = 0; + + // holdoff next command to this bank until the read completes + // and the data has been successfully buffered + // can pipeline accesses to the same bank, sending them + // across the interface B2B, but will incur full access + // delay between data ready responses to different buffers + // in a bank + bank_ref.actAllowedAt = std::max(cmd_at, + bank_ref.actAllowedAt) + tREAD; + } + // update per packet readyTime to holdoff burst read operation + // overloading readyTime, which will be updated again when the + // burst is issued + pkt->readyTime = std::max(cmd_at, bank_ref.actAllowedAt); + + DPRINTF(NVM, "Issuing NVM Read to bank %d at tick %d. " + "Data ready at %d\n", + bank_ref.bank, cmd_at, pkt->readyTime); + + // Insert into read ready queue. It will be handled after + // the media delay has been met + if (readReadyQueue.empty()) { + assert(!readReadyEvent.scheduled()); + schedule(readReadyEvent, pkt->readyTime); + } else if (readReadyEvent.when() > pkt->readyTime) { + // move it sooner in time, to the first read with data + reschedule(readReadyEvent, pkt->readyTime); + } else { + assert(readReadyEvent.scheduled()); + } + readReadyQueue.push_back(pkt->readyTime); + + // found an NVM read to issue - break out + break; + } + } +} + +void +NVMInterface::processReadReadyEvent() +{ + // signal that there is read data ready to be transmitted + numReadDataReady++; + + DPRINTF(NVM, + "processReadReadyEvent(): Data for an NVM read is ready. " + "numReadDataReady is %d\t numPendingReads is %d\n", + numReadDataReady, numPendingReads); + + // Find lowest ready time and verify it is equal to curTick + // also find the next lowest to schedule next event + // Done with this response, erase entry + auto ready_it = readReadyQueue.begin(); + Tick next_ready_at = MaxTick; + for (auto i = readReadyQueue.begin(); i != readReadyQueue.end() ; ++i) { + if (*ready_it > *i) { + next_ready_at = *ready_it; + ready_it = i; + } else if ((next_ready_at > *i) && (i != ready_it)) { + next_ready_at = *i; + } + } + + // Verify we found the time of this event and remove it + assert(*ready_it == curTick()); + readReadyQueue.erase(ready_it); + + if (!readReadyQueue.empty()) { + assert(readReadyQueue.front() >= curTick()); + assert(!readReadyEvent.scheduled()); + schedule(readReadyEvent, next_ready_at); + } + + // It is possible that a new command kicks things back into + // action before reaching this point but need to ensure that we + // continue to process new commands as read data becomes ready + // This will also trigger a drain if needed + if (!ctrl->requestEventScheduled()) { + DPRINTF(NVM, "Restart controller scheduler immediately\n"); + ctrl->restartScheduler(curTick()); + } +} + + +bool +NVMInterface::burstReady(MemPacket* pkt) const { + bool read_rdy = pkt->isRead() && (ctrl->inReadBusState(true)) && + (pkt->readyTime <= curTick()) && (numReadDataReady > 0); + bool write_rdy = !pkt->isRead() && !ctrl->inReadBusState(true) && + !writeRespQueueFull(); + return (read_rdy || write_rdy); +} + +pair +NVMInterface::doBurstAccess(MemPacket* pkt, Tick next_burst_at) +{ + DPRINTF(NVM, "NVM Timing access to addr %lld, rank/bank/row %d %d %d\n", + pkt->addr, pkt->rank, pkt->bank, pkt->row); + + // get the bank + Bank& bank_ref = ranks[pkt->rank]->banks[pkt->bank]; + + // respect any constraints on the command + const Tick bst_allowed_at = pkt->isRead() ? + bank_ref.rdAllowedAt : bank_ref.wrAllowedAt; + + // we need to wait until the bus is available before we can issue + // the command; need minimum of tBURST between commands + Tick cmd_at = std::max(bst_allowed_at, curTick()); + + // we need to wait until the bus is available before we can issue + // the command; need minimum of tBURST between commands + cmd_at = std::max(cmd_at, next_burst_at); + + // Verify there is command bandwidth to issue + // Read burst (send command) is a simple data access and only requires + // one command cycle + // Write command may require multiple cycles to enable larger address space + if (pkt->isRead() || !twoCycleRdWr) { + cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow); + } else { + cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK); + } + // update the packet ready time to reflect when data will be transferred + // Use the same bus delays defined for NVM + pkt->readyTime = cmd_at + tSEND + tBURST; + + Tick dly_to_rd_cmd; + Tick dly_to_wr_cmd; + for (auto n : ranks) { + for (int i = 0; i < banksPerRank; i++) { + // base delay is a function of tBURST and bus turnaround + dly_to_rd_cmd = pkt->isRead() ? tBURST : writeToReadDelay(); + dly_to_wr_cmd = pkt->isRead() ? readToWriteDelay() : tBURST; + + if (pkt->rank != n->rank) { + // adjust timing for different ranks + // Need to account for rank-to-rank switching with tCS + dly_to_wr_cmd = rankToRankDelay(); + dly_to_rd_cmd = rankToRankDelay(); + } + n->banks[i].rdAllowedAt = std::max(cmd_at + dly_to_rd_cmd, + n->banks[i].rdAllowedAt); + + n->banks[i].wrAllowedAt = std::max(cmd_at + dly_to_wr_cmd, + n->banks[i].wrAllowedAt); + } + } + + DPRINTF(NVM, "NVM Access to %lld, ready at %lld.\n", + pkt->addr, pkt->readyTime); + + if (pkt->isRead()) { + // completed the read, decrement counters + assert(numPendingReads != 0); + assert(numReadDataReady != 0); + + numPendingReads--; + numReadDataReady--; + } else { + // Adjust number of NVM writes in Q + assert(numWritesQueued > 0); + numWritesQueued--; + + // increment the bytes accessed and the accesses per row + // only increment for writes as the reads are handled when + // the non-deterministic read is issued, before the data transfer + bank_ref.bytesAccessed += burstSize; + + // Commands will be issued serially when accessing the same bank + // Commands can issue in parallel to different banks + if ((bank_ref.bank == pkt->bank) && + (bank_ref.openRow != pkt->row)) { + // update the open buffer, re-using row field + bank_ref.openRow = pkt->row; + + // sample the bytes accessed to a buffer in this bank + // here when we are re-buffering the data + stats.bytesPerBank.sample(bank_ref.bytesAccessed); + // start counting anew + bank_ref.bytesAccessed = 0; + } + + // Determine when write will actually complete, assuming it is + // scheduled to push to NVM immediately + // update actAllowedAt to serialize next command completion that + // accesses this bank; must wait until this write completes + // Data accesses to the same buffer in this bank + // can issue immediately after actAllowedAt expires, without + // waiting additional delay of tWRITE. Can revisit this + // assumption/simplification in the future. + bank_ref.actAllowedAt = std::max(pkt->readyTime, + bank_ref.actAllowedAt) + tWRITE; + + // Need to track number of outstanding writes to + // ensure 'buffer' on media controller does not overflow + assert(!writeRespQueueFull()); + + // Insert into write done queue. It will be handled after + // the media delay has been met + if (writeRespQueueEmpty()) { + assert(!writeRespondEvent.scheduled()); + schedule(writeRespondEvent, bank_ref.actAllowedAt); + } else { + assert(writeRespondEvent.scheduled()); + } + writeRespQueue.push_back(bank_ref.actAllowedAt); + writeRespQueue.sort(); + if (writeRespondEvent.when() > bank_ref.actAllowedAt) { + DPRINTF(NVM, "Rescheduled respond event from %lld to %11d\n", + writeRespondEvent.when(), bank_ref.actAllowedAt); + DPRINTF(NVM, "Front of response queue is %11d\n", + writeRespQueue.front()); + reschedule(writeRespondEvent, bank_ref.actAllowedAt); + } + + } + + // Update the stats + if (pkt->isRead()) { + stats.readBursts++; + stats.bytesRead += burstSize; + stats.perBankRdBursts[pkt->bankId]++; + stats.pendingReads.sample(numPendingReads); + + // Update latency stats + stats.totMemAccLat += pkt->readyTime - pkt->entryTime; + stats.totBusLat += tBURST; + stats.totQLat += cmd_at - pkt->entryTime; + } else { + stats.writeBursts++; + stats.bytesWritten += burstSize; + stats.perBankWrBursts[pkt->bankId]++; + } + + return make_pair(cmd_at, cmd_at + tBURST); +} + +void +NVMInterface::processWriteRespondEvent() +{ + DPRINTF(NVM, + "processWriteRespondEvent(): A NVM write reached its readyTime. " + "%d remaining pending NVM writes\n", writeRespQueue.size()); + + // Update stat to track histogram of pending writes + stats.pendingWrites.sample(writeRespQueue.size()); + + // Done with this response, pop entry + writeRespQueue.pop_front(); + + if (!writeRespQueue.empty()) { + assert(writeRespQueue.front() >= curTick()); + assert(!writeRespondEvent.scheduled()); + schedule(writeRespondEvent, writeRespQueue.front()); + } + + // It is possible that a new command kicks things back into + // action before reaching this point but need to ensure that we + // continue to process new commands as writes complete at the media and + // credits become available. This will also trigger a drain if needed + if (!ctrl->requestEventScheduled()) { + DPRINTF(NVM, "Restart controller scheduler immediately\n"); + ctrl->restartScheduler(curTick()); + } +} + +void +NVMInterface::addRankToRankDelay(Tick cmd_at) +{ + // update timing for NVM ranks due to bursts issued + // to ranks for other media interfaces + for (auto n : ranks) { + for (int i = 0; i < banksPerRank; i++) { + // different rank by default + // Need to only account for rank-to-rank switching + n->banks[i].rdAllowedAt = std::max(cmd_at + rankToRankDelay(), + n->banks[i].rdAllowedAt); + n->banks[i].wrAllowedAt = std::max(cmd_at + rankToRankDelay(), + n->banks[i].wrAllowedAt); + } + } +} + +bool +NVMInterface::isBusy(bool read_queue_empty, bool all_writes_nvm) +{ + DPRINTF(NVM,"isBusy: numReadDataReady = %d\n", numReadDataReady); + // Determine NVM is busy and cannot issue a burst + // A read burst cannot issue when data is not ready from the NVM + // Also check that we have reads queued to ensure we can change + // bus direction to service potential write commands. + // A write cannot issue once we've reached MAX pending writes + // Only assert busy for the write case when there are also + // no reads in Q and the write queue only contains NVM commands + // This allows the bus state to switch and service reads + return (ctrl->inReadBusState(true) ? + (numReadDataReady == 0) && !read_queue_empty : + writeRespQueueFull() && read_queue_empty && + all_writes_nvm); +} + + +NVMInterface::NVMStats::NVMStats(NVMInterface &_nvm) + : Stats::Group(&_nvm), + nvm(_nvm), + + ADD_STAT(readBursts, "Number of NVM read bursts"), + ADD_STAT(writeBursts, "Number of NVM write bursts"), + + ADD_STAT(perBankRdBursts, "Per bank write bursts"), + ADD_STAT(perBankWrBursts, "Per bank write bursts"), + + ADD_STAT(totQLat, "Total ticks spent queuing"), + ADD_STAT(totBusLat, "Total ticks spent in databus transfers"), + ADD_STAT(totMemAccLat, + "Total ticks spent from burst creation until serviced " + "by the NVM"), + ADD_STAT(avgQLat, "Average queueing delay per NVM burst"), + ADD_STAT(avgBusLat, "Average bus latency per NVM burst"), + ADD_STAT(avgMemAccLat, "Average memory access latency per NVM burst"), + + ADD_STAT(bytesRead, "Total number of bytes read from DRAM"), + ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"), + ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiBytes/s"), + ADD_STAT(avgWrBW, "Average DRAM write bandwidth in MiBytes/s"), + ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"), + ADD_STAT(busUtil, "NVM Data bus utilization in percentage"), + ADD_STAT(busUtilRead, "NVM Data bus read utilization in percentage"), + ADD_STAT(busUtilWrite, "NVM Data bus write utilization in percentage"), + + ADD_STAT(pendingReads, "Reads issued to NVM for which data has not been " + "transferred"), + ADD_STAT(pendingWrites, "Number of outstanding writes to NVM"), + ADD_STAT(bytesPerBank, "Bytes read within a bank before loading " + "new bank") +{ +} + +void +NVMInterface::NVMStats::regStats() +{ + using namespace Stats; + + perBankRdBursts.init(nvm.ranksPerChannel == 0 ? 1 : + nvm.banksPerRank * nvm.ranksPerChannel); + + perBankWrBursts.init(nvm.ranksPerChannel == 0 ? 1 : + nvm.banksPerRank * nvm.ranksPerChannel); + + avgQLat.precision(2); + avgBusLat.precision(2); + avgMemAccLat.precision(2); + + avgRdBW.precision(2); + avgWrBW.precision(2); + peakBW.precision(2); + + busUtil.precision(2); + busUtilRead.precision(2); + busUtilWrite.precision(2); + + pendingReads + .init(nvm.maxPendingReads) + .flags(nozero); + + pendingWrites + .init(nvm.maxPendingWrites) + .flags(nozero); + + bytesPerBank + .init(nvm.rowBufferSize) + .flags(nozero); + + avgQLat = totQLat / readBursts; + avgBusLat = totBusLat / readBursts; + avgMemAccLat = totMemAccLat / readBursts; + + avgRdBW = (bytesRead / 1000000) / simSeconds; + avgWrBW = (bytesWritten / 1000000) / simSeconds; + peakBW = (SimClock::Frequency / nvm.tBURST) * + nvm.burstSize / 1000000; + + busUtil = (avgRdBW + avgWrBW) / peakBW * 100; + busUtilRead = avgRdBW / peakBW * 100; + busUtilWrite = avgWrBW / peakBW * 100; +} diff --git a/src/mem/mem_interface.hh b/src/mem/mem_interface.hh new file mode 100644 index 000000000..98440020a --- /dev/null +++ b/src/mem/mem_interface.hh @@ -0,0 +1,1270 @@ +/* + * Copyright (c) 2012-2020 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * + * Copyright (c) 2013 Amin Farmahini-Farahani + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * MemInterface declaration + */ + +#ifndef __MEM_INTERFACE_HH__ +#define __MEM_INTERFACE_HH__ + +#include +#include +#include +#include +#include + +#include "base/statistics.hh" +#include "enums/AddrMap.hh" +#include "enums/PageManage.hh" +#include "mem/abstract_mem.hh" +#include "mem/drampower.hh" +#include "mem/mem_ctrl.hh" +#include "params/DRAMInterface.hh" +#include "params/MemInterface.hh" +#include "params/NVMInterface.hh" +#include "sim/eventq.hh" + +/** + * General interface to memory device + * Includes functions and parameters shared across media types + */ +class MemInterface : public AbstractMemory +{ + protected: + /** + * A basic class to track the bank state, i.e. what row is + * currently open (if any), when is the bank free to accept a new + * column (read/write) command, when can it be precharged, and + * when can it be activated. + * + * The bank also keeps track of how many bytes have been accessed + * in the open row since it was opened. + */ + class Bank + { + + public: + + static const uint32_t NO_ROW = -1; + + uint32_t openRow; + uint8_t bank; + uint8_t bankgr; + + Tick rdAllowedAt; + Tick wrAllowedAt; + Tick preAllowedAt; + Tick actAllowedAt; + + uint32_t rowAccesses; + uint32_t bytesAccessed; + + Bank() : + openRow(NO_ROW), bank(0), bankgr(0), + rdAllowedAt(0), wrAllowedAt(0), preAllowedAt(0), actAllowedAt(0), + rowAccesses(0), bytesAccessed(0) + { } + }; + + /** + * A pointer to the parent MemCtrl instance + */ + MemCtrl* ctrl; + + /** + * Number of commands that can issue in the defined controller + * command window, used to verify command bandwidth + */ + unsigned int maxCommandsPerWindow; + + /** + * Memory controller configuration initialized based on parameter + * values. + */ + Enums::AddrMap addrMapping; + + /** + * General device and channel characteristics + * The rowsPerBank is determined based on the capacity, number of + * ranks and banks, the burst size, and the row buffer size. + */ + const uint32_t burstSize; + const uint32_t deviceSize; + const uint32_t deviceRowBufferSize; + const uint32_t devicesPerRank; + const uint32_t rowBufferSize; + const uint32_t burstsPerRowBuffer; + const uint32_t burstsPerStripe; + const uint32_t ranksPerChannel; + const uint32_t banksPerRank; + uint32_t rowsPerBank; + + /** + * General timing requirements + */ + const Tick M5_CLASS_VAR_USED tCK; + const Tick tCS; + const Tick tBURST; + const Tick tRTW; + const Tick tWTR; + + /* + * @return delay between write and read commands + */ + virtual Tick writeToReadDelay() const { return tBURST + tWTR; } + + /* + * @return delay between write and read commands + */ + Tick readToWriteDelay() const { return tBURST + tRTW; } + + /* + * @return delay between accesses to different ranks + */ + Tick rankToRankDelay() const { return tBURST + tCS; } + + + public: + + /** + * Buffer sizes for read and write queues in the controller + * These are passed to the controller on instantiation + * Defining them here allows for buffers to be resized based + * on memory type / configuration. + */ + const uint32_t readBufferSize; + const uint32_t writeBufferSize; + + /** Set a pointer to the controller and initialize + * interface based on controller parameters + * @param _ctrl pointer to the parent controller + * @param command_window size of command window used to + * check command bandwidth + */ + void setCtrl(MemCtrl* _ctrl, unsigned int command_window); + + /** + * Get an address in a dense range which starts from 0. The input + * address is the physical address of the request in an address + * space that contains other SimObjects apart from this + * controller. + * + * @param addr The intput address which should be in the addrRange + * @return An address in the continues range [0, max) + */ + Addr getCtrlAddr(Addr addr) { return range.getOffset(addr); } + + /** + * Setup the rank based on packet received + * + * @param integer value of rank to be setup. used to index ranks vector + * @param are we setting up rank for read or write packet? + */ + virtual void setupRank(const uint8_t rank, const bool is_read) = 0; + + /** + * Check drain state of interface + * + * @return true if all ranks are drained and idle + * + */ + virtual bool allRanksDrained() const = 0; + + /** + * For FR-FCFS policy, find first command that can issue + * Function will be overriden by interface to select based + * on media characteristics, used to determine when read + * or write can issue. + * + * @param queue Queued requests to consider + * @param min_col_at Minimum tick for 'seamless' issue + * @return an iterator to the selected packet, else queue.end() + * @return the tick when the packet selected will issue + */ + virtual std::pair + chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const = 0; + + /* + * Function to calulate unloaded latency + */ + virtual Tick accessLatency() const = 0; + + /** + * @return number of bytes in a burst for this interface + */ + uint32_t bytesPerBurst() const { return burstSize; } + + /* + * @return time to offset next command + */ + virtual Tick commandOffset() const = 0; + + /** + * Check if a burst operation can be issued to the interface + * + * @param Return true if RD/WR can issue + */ + virtual bool burstReady(MemPacket* pkt) const = 0; + + /** + * Determine the required delay for an access to a different rank + * + * @return required rank to rank delay + */ + Tick rankDelay() const { return tCS; } + + /** + * + * @return minimum additional bus turnaround required for read-to-write + */ + Tick minReadToWriteDataGap() const { return std::min(tRTW, tCS); } + + /** + * + * @return minimum additional bus turnaround required for write-to-read + */ + Tick minWriteToReadDataGap() const { return std::min(tWTR, tCS); } + + /** + * Address decoder to figure out physical mapping onto ranks, + * banks, and rows. This function is called multiple times on the same + * system packet if the pakcet is larger than burst of the memory. The + * pkt_addr is used for the offset within the packet. + * + * @param pkt The packet from the outside world + * @param pkt_addr The starting address of the packet + * @param size The size of the packet in bytes + * @param is_read Is the request for a read or a write to memory + * @param is_dram Is the request to a DRAM interface + * @return A MemPacket pointer with the decoded information + */ + MemPacket* decodePacket(const PacketPtr pkt, Addr pkt_addr, + unsigned int size, bool is_read, bool is_dram); + + /** + * Add rank to rank delay to bus timing to all banks in all ranks + * when access to an alternate interface is issued + * + * param cmd_at Time of current command used as starting point for + * addition of rank-to-rank delay + */ + virtual void addRankToRankDelay(Tick cmd_at) = 0; + + typedef MemInterfaceParams Params; + MemInterface(const Params* _p); +}; + +/** + * Interface to DRAM devices with media specific parameters, + * statistics, and functions. + * The DRAMInterface includes a class for individual ranks + * and per rank functions. + */ +class DRAMInterface : public MemInterface +{ + private: + /** + * Simple structure to hold the values needed to keep track of + * commands for DRAMPower + */ + struct Command + { + Data::MemCommand::cmds type; + uint8_t bank; + Tick timeStamp; + + constexpr Command(Data::MemCommand::cmds _type, uint8_t _bank, + Tick time_stamp) + : type(_type), bank(_bank), timeStamp(time_stamp) + { } + }; + + /** + * The power state captures the different operational states of + * the DRAM and interacts with the bus read/write state machine, + * and the refresh state machine. + * + * PWR_IDLE : The idle state in which all banks are closed + * From here can transition to: PWR_REF, PWR_ACT, + * PWR_PRE_PDN + * + * PWR_REF : Auto-refresh state. Will transition when refresh is + * complete based on power state prior to PWR_REF + * From here can transition to: PWR_IDLE, PWR_PRE_PDN, + * PWR_SREF + * + * PWR_SREF : Self-refresh state. Entered after refresh if + * previous state was PWR_PRE_PDN + * From here can transition to: PWR_IDLE + * + * PWR_PRE_PDN : Precharge power down state + * From here can transition to: PWR_REF, PWR_IDLE + * + * PWR_ACT : Activate state in which one or more banks are open + * From here can transition to: PWR_IDLE, PWR_ACT_PDN + * + * PWR_ACT_PDN : Activate power down state + * From here can transition to: PWR_ACT + */ + enum PowerState + { + PWR_IDLE = 0, + PWR_REF, + PWR_SREF, + PWR_PRE_PDN, + PWR_ACT, + PWR_ACT_PDN + }; + + /** + * The refresh state is used to control the progress of the + * refresh scheduling. When normal operation is in progress the + * refresh state is idle. Once tREFI has elasped, a refresh event + * is triggered to start the following STM transitions which are + * used to issue a refresh and return back to normal operation + * + * REF_IDLE : IDLE state used during normal operation + * From here can transition to: REF_DRAIN + * + * REF_SREF_EXIT : Exiting a self-refresh; refresh event scheduled + * after self-refresh exit completes + * From here can transition to: REF_DRAIN + * + * REF_DRAIN : Drain state in which on going accesses complete. + * From here can transition to: REF_PD_EXIT + * + * REF_PD_EXIT : Evaluate pwrState and issue wakeup if needed + * Next state dependent on whether banks are open + * From here can transition to: REF_PRE, REF_START + * + * REF_PRE : Close (precharge) all open banks + * From here can transition to: REF_START + * + * REF_START : Issue refresh command and update DRAMPower stats + * From here can transition to: REF_RUN + * + * REF_RUN : Refresh running, waiting for tRFC to expire + * From here can transition to: REF_IDLE, REF_SREF_EXIT + */ + enum RefreshState + { + REF_IDLE = 0, + REF_DRAIN, + REF_PD_EXIT, + REF_SREF_EXIT, + REF_PRE, + REF_START, + REF_RUN + }; + + class Rank; + struct RankStats : public Stats::Group + { + RankStats(DRAMInterface &dram, Rank &rank); + + void regStats() override; + void resetStats() override; + void preDumpStats() override; + + Rank &rank; + + /* + * Command energies + */ + Stats::Scalar actEnergy; + Stats::Scalar preEnergy; + Stats::Scalar readEnergy; + Stats::Scalar writeEnergy; + Stats::Scalar refreshEnergy; + + /* + * Active Background Energy + */ + Stats::Scalar actBackEnergy; + + /* + * Precharge Background Energy + */ + Stats::Scalar preBackEnergy; + + /* + * Active Power-Down Energy + */ + Stats::Scalar actPowerDownEnergy; + + /* + * Precharge Power-Down Energy + */ + Stats::Scalar prePowerDownEnergy; + + /* + * self Refresh Energy + */ + Stats::Scalar selfRefreshEnergy; + + Stats::Scalar totalEnergy; + Stats::Scalar averagePower; + + /** + * Stat to track total DRAM idle time + * + */ + Stats::Scalar totalIdleTime; + + /** + * Track time spent in each power state. + */ + Stats::Vector pwrStateTime; + }; + + /** + * Rank class includes a vector of banks. Refresh and Power state + * machines are defined per rank. Events required to change the + * state of the refresh and power state machine are scheduled per + * rank. This class allows the implementation of rank-wise refresh + * and rank-wise power-down. + */ + class Rank : public EventManager + { + private: + + /** + * A reference to the parent DRAMInterface instance + */ + DRAMInterface& dram; + + /** + * Since we are taking decisions out of order, we need to keep + * track of what power transition is happening at what time + */ + PowerState pwrStateTrans; + + /** + * Previous low-power state, which will be re-entered after refresh. + */ + PowerState pwrStatePostRefresh; + + /** + * Track when we transitioned to the current power state + */ + Tick pwrStateTick; + + /** + * Keep track of when a refresh is due. + */ + Tick refreshDueAt; + + /** + * Function to update Power Stats + */ + void updatePowerStats(); + + /** + * Schedule a power state transition in the future, and + * potentially override an already scheduled transition. + * + * @param pwr_state Power state to transition to + * @param tick Tick when transition should take place + */ + void schedulePowerEvent(PowerState pwr_state, Tick tick); + + public: + + /** + * Current power state. + */ + PowerState pwrState; + + /** + * current refresh state + */ + RefreshState refreshState; + + /** + * rank is in or transitioning to power-down or self-refresh + */ + bool inLowPowerState; + + /** + * Current Rank index + */ + uint8_t rank; + + /** + * Track number of packets in read queue going to this rank + */ + uint32_t readEntries; + + /** + * Track number of packets in write queue going to this rank + */ + uint32_t writeEntries; + + /** + * Number of ACT, RD, and WR events currently scheduled + * Incremented when a refresh event is started as well + * Used to determine when a low-power state can be entered + */ + uint8_t outstandingEvents; + + /** + * delay low-power exit until this requirement is met + */ + Tick wakeUpAllowedAt; + + /** + * One DRAMPower instance per rank + */ + DRAMPower power; + + /** + * List of commands issued, to be sent to DRAMPpower at refresh + * and stats dump. Keep commands here since commands to different + * banks are added out of order. Will only pass commands up to + * curTick() to DRAMPower after sorting. + */ + std::vector cmdList; + + /** + * Vector of Banks. Each rank is made of several devices which in + * term are made from several banks. + */ + std::vector banks; + + /** + * To track number of banks which are currently active for + * this rank. + */ + unsigned int numBanksActive; + + /** List to keep track of activate ticks */ + std::deque actTicks; + + /** + * Track when we issued the last read/write burst + */ + Tick lastBurstTick; + + Rank(const DRAMInterfaceParams* _p, int _rank, + DRAMInterface& _dram); + + const std::string name() const { return csprintf("%d", rank); } + + /** + * Kick off accounting for power and refresh states and + * schedule initial refresh. + * + * @param ref_tick Tick for first refresh + */ + void startup(Tick ref_tick); + + /** + * Stop the refresh events. + */ + void suspend(); + + /** + * Check if there is no refresh and no preparation of refresh ongoing + * i.e. the refresh state machine is in idle + * + * @param Return true if the rank is idle from a refresh point of view + */ + bool inRefIdleState() const { return refreshState == REF_IDLE; } + + /** + * Check if the current rank has all banks closed and is not + * in a low power state + * + * @param Return true if the rank is idle from a bank + * and power point of view + */ + bool inPwrIdleState() const { return pwrState == PWR_IDLE; } + + /** + * Trigger a self-refresh exit if there are entries enqueued + * Exit if there are any read entries regardless of the bus state. + * If we are currently issuing write commands, exit if we have any + * write commands enqueued as well. + * Could expand this in the future to analyze state of entire queue + * if needed. + * + * @return boolean indicating self-refresh exit should be scheduled + */ + bool forceSelfRefreshExit() const; + + /** + * Check if the command queue of current rank is idle + * + * @param Return true if the there are no commands in Q. + * Bus direction determines queue checked. + */ + bool isQueueEmpty() const; + + /** + * Let the rank check if it was waiting for requests to drain + * to allow it to transition states. + */ + void checkDrainDone(); + + /** + * Push command out of cmdList queue that are scheduled at + * or before curTick() to DRAMPower library + * All commands before curTick are guaranteed to be complete + * and can safely be flushed. + */ + void flushCmdList(); + + /** + * Computes stats just prior to dump event + */ + void computeStats(); + + /** + * Reset stats on a stats event + */ + void resetStats(); + + /** + * Schedule a transition to power-down (sleep) + * + * @param pwr_state Power state to transition to + * @param tick Absolute tick when transition should take place + */ + void powerDownSleep(PowerState pwr_state, Tick tick); + + /** + * schedule and event to wake-up from power-down or self-refresh + * and update bank timing parameters + * + * @param exit_delay Relative tick defining the delay required between + * low-power exit and the next command + */ + void scheduleWakeUpEvent(Tick exit_delay); + + void processWriteDoneEvent(); + EventFunctionWrapper writeDoneEvent; + + void processActivateEvent(); + EventFunctionWrapper activateEvent; + + void processPrechargeEvent(); + EventFunctionWrapper prechargeEvent; + + void processRefreshEvent(); + EventFunctionWrapper refreshEvent; + + void processPowerEvent(); + EventFunctionWrapper powerEvent; + + void processWakeUpEvent(); + EventFunctionWrapper wakeUpEvent; + + protected: + RankStats stats; + }; + + /** + * Function for sorting Command structures based on timeStamp + * + * @param a Memory Command + * @param next Memory Command + * @return true if timeStamp of Command 1 < timeStamp of Command 2 + */ + static bool + sortTime(const Command& cmd, const Command& cmd_next) + { + return cmd.timeStamp < cmd_next.timeStamp; + } + + /** + * DRAM specific device characteristics + */ + const uint32_t bankGroupsPerRank; + const bool bankGroupArch; + + /** + * DRAM specific timing requirements + */ + const Tick tCL; + const Tick tBURST_MIN; + const Tick tBURST_MAX; + const Tick tCCD_L_WR; + const Tick tCCD_L; + const Tick tRCD; + const Tick tRP; + const Tick tRAS; + const Tick tWR; + const Tick tRTP; + const Tick tRFC; + const Tick tREFI; + const Tick tRRD; + const Tick tRRD_L; + const Tick tPPD; + const Tick tAAD; + const Tick tXAW; + const Tick tXP; + const Tick tXS; + const Tick clkResyncDelay; + const bool dataClockSync; + const bool burstInterleave; + const uint8_t twoCycleActivate; + const uint32_t activationLimit; + const Tick wrToRdDlySameBG; + const Tick rdToWrDlySameBG; + + Enums::PageManage pageMgmt; + /** + * Max column accesses (read and write) per row, before forefully + * closing it. + */ + const uint32_t maxAccessesPerRow; + + // timestamp offset + uint64_t timeStampOffset; + + // Holds the value of the DRAM rank of burst issued + uint8_t activeRank; + + /** Enable or disable DRAM powerdown states. */ + bool enableDRAMPowerdown; + + /** The time when stats were last reset used to calculate average power */ + Tick lastStatsResetTick; + + /** + * Keep track of when row activations happen, in order to enforce + * the maximum number of activations in the activation window. The + * method updates the time that the banks become available based + * on the current limits. + * + * @param rank_ref Reference to the rank + * @param bank_ref Reference to the bank + * @param act_tick Time when the activation takes place + * @param row Index of the row + */ + void activateBank(Rank& rank_ref, Bank& bank_ref, Tick act_tick, + uint32_t row); + + /** + * Precharge a given bank and also update when the precharge is + * done. This will also deal with any stats related to the + * accesses to the open page. + * + * @param rank_ref The rank to precharge + * @param bank_ref The bank to precharge + * @param pre_tick Time when the precharge takes place + * @param auto_or_preall Is this an auto-precharge or precharge all command + * @param trace Is this an auto precharge then do not add to trace + */ + void prechargeBank(Rank& rank_ref, Bank& bank_ref, + Tick pre_tick, bool auto_or_preall = false, + bool trace = true); + + struct DRAMStats : public Stats::Group + { + DRAMStats(DRAMInterface &dram); + + void regStats() override; + void resetStats() override; + + DRAMInterface &dram; + + /** total number of DRAM bursts serviced */ + Stats::Scalar readBursts; + Stats::Scalar writeBursts; + + /** DRAM per bank stats */ + Stats::Vector perBankRdBursts; + Stats::Vector perBankWrBursts; + + // Latencies summed over all requests + Stats::Scalar totQLat; + Stats::Scalar totBusLat; + Stats::Scalar totMemAccLat; + + // Average latencies per request + Stats::Formula avgQLat; + Stats::Formula avgBusLat; + Stats::Formula avgMemAccLat; + + // Row hit count and rate + Stats::Scalar readRowHits; + Stats::Scalar writeRowHits; + Stats::Formula readRowHitRate; + Stats::Formula writeRowHitRate; + Stats::Histogram bytesPerActivate; + // Number of bytes transferred to/from DRAM + Stats::Scalar bytesRead; + Stats::Scalar bytesWritten; + + // Average bandwidth + Stats::Formula avgRdBW; + Stats::Formula avgWrBW; + Stats::Formula peakBW; + // bus utilization + Stats::Formula busUtil; + Stats::Formula busUtilRead; + Stats::Formula busUtilWrite; + Stats::Formula pageHitRate; + }; + + DRAMStats stats; + + /** + * Vector of dram ranks + */ + std::vector ranks; + + /* + * @return delay between write and read commands + */ + Tick writeToReadDelay() const override { return tBURST + tWTR + tCL; } + + /** + * Find which are the earliest banks ready to issue an activate + * for the enqueued requests. Assumes maximum of 32 banks per rank + * Also checks if the bank is already prepped. + * + * @param queue Queued requests to consider + * @param min_col_at time of seamless burst command + * @return One-hot encoded mask of bank indices + * @return boolean indicating burst can issue seamlessly, with no gaps + */ + std::pair, bool> + minBankPrep(const MemPacketQueue& queue, Tick min_col_at) const; + + /* + * @return time to send a burst of data without gaps + */ + Tick + burstDelay() const + { + return (burstInterleave ? tBURST_MAX / 2 : tBURST); + } + + public: + /** + * Initialize the DRAM interface and verify parameters + */ + void init() override; + + /** + * Iterate through dram ranks and instantiate per rank startup routine + */ + void startup() override; + + /** + * Setup the rank based on packet received + * + * @param integer value of rank to be setup. used to index ranks vector + * @param are we setting up rank for read or write packet? + */ + void setupRank(const uint8_t rank, const bool is_read) override; + + /** + * Iterate through dram ranks to exit self-refresh in order to drain + */ + void drainRanks(); + + /** + * Return true once refresh is complete for all ranks and there are no + * additional commands enqueued. (only evaluated when draining) + * This will ensure that all banks are closed, power state is IDLE, and + * power stats have been updated + * + * @return true if all ranks have refreshed, with no commands enqueued + * + */ + bool allRanksDrained() const override; + + /** + * Iterate through DRAM ranks and suspend them + */ + void suspend(); + + /* + * @return time to offset next command + */ + Tick commandOffset() const override { return (tRP + tRCD); } + + /* + * Function to calulate unloaded, closed bank access latency + */ + Tick accessLatency() const override { return (tRP + tRCD + tCL); } + + /** + * For FR-FCFS policy, find first DRAM command that can issue + * + * @param queue Queued requests to consider + * @param min_col_at Minimum tick for 'seamless' issue + * @return an iterator to the selected packet, else queue.end() + * @return the tick when the packet selected will issue + */ + std::pair + chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const override; + + /** + * Actually do the burst - figure out the latency it + * will take to service the req based on bank state, channel state etc + * and then update those states to account for this request. Based + * on this, update the packet's "readyTime" and move it to the + * response q from where it will eventually go back to the outside + * world. + * + * @param mem_pkt The packet created from the outside world pkt + * @param next_burst_at Minimum bus timing requirement from controller + * @param queue Reference to the read or write queue with the packet + * @return pair, tick when current burst is issued and + * tick when next burst can issue + */ + std::pair + doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at, + const std::vector& queue); + + /** + * Check if a burst operation can be issued to the DRAM + * + * @param Return true if RD/WR can issue + * This requires the DRAM to be in the + * REF IDLE state + */ + bool + burstReady(MemPacket* pkt) const override + { + return ranks[pkt->rank]->inRefIdleState(); + } + + /** + * This function checks if ranks are actively refreshing and + * therefore busy. The function also checks if ranks are in + * the self-refresh state, in which case, a self-refresh exit + * is initiated. + * + * return boolean if all ranks are in refresh and therefore busy + */ + bool isBusy(); + + /** + * Add rank to rank delay to bus timing to all DRAM banks in alli ranks + * when access to an alternate interface is issued + * + * param cmd_at Time of current command used as starting point for + * addition of rank-to-rank delay + */ + void addRankToRankDelay(Tick cmd_at) override; + + /** + * Complete response process for DRAM when read burst is complete + * This will update the counters and check if a power down state + * can be entered. + * + * @param rank Specifies rank associated with read burst + */ + void respondEvent(uint8_t rank); + + /** + * Check the refresh state to determine if refresh needs + * to be kicked back into action after a read response + * + * @param rank Specifies rank associated with read burst + */ + void checkRefreshState(uint8_t rank); + + DRAMInterface(const DRAMInterfaceParams* _p); +}; + +/** + * Interface to NVM devices with media specific parameters, + * statistics, and functions. + * The NVMInterface includes a class for individual ranks + * and per rank functions. + */ +class NVMInterface : public MemInterface +{ + private: + /** + * NVM rank class simply includes a vector of banks. + */ + class Rank : public EventManager + { + private: + + /** + * A reference to the parent NVMInterface instance + */ + NVMInterface& nvm; + + public: + + /** + * Current Rank index + */ + uint8_t rank; + + /** + * Vector of NVM banks. Each rank is made of several banks + * that can be accessed in parallel. + */ + std::vector banks; + + Rank(const NVMInterfaceParams* _p, int _rank, + NVMInterface& _nvm); + }; + + /** + * NVM specific device and channel characteristics + */ + const uint32_t maxPendingWrites; + const uint32_t maxPendingReads; + const bool twoCycleRdWr; + + /** + * NVM specific timing requirements + */ + const Tick tREAD; + const Tick tWRITE; + const Tick tSEND; + + struct NVMStats : public Stats::Group + { + NVMStats(NVMInterface &nvm); + + void regStats() override; + + NVMInterface &nvm; + + /** NVM stats */ + Stats::Scalar readBursts; + Stats::Scalar writeBursts; + + Stats::Vector perBankRdBursts; + Stats::Vector perBankWrBursts; + + // Latencies summed over all requests + Stats::Scalar totQLat; + Stats::Scalar totBusLat; + Stats::Scalar totMemAccLat; + + // Average latencies per request + Stats::Formula avgQLat; + Stats::Formula avgBusLat; + Stats::Formula avgMemAccLat; + + Stats::Scalar bytesRead; + Stats::Scalar bytesWritten; + + // Average bandwidth + Stats::Formula avgRdBW; + Stats::Formula avgWrBW; + Stats::Formula peakBW; + Stats::Formula busUtil; + Stats::Formula busUtilRead; + Stats::Formula busUtilWrite; + + /** NVM stats */ + Stats::Histogram pendingReads; + Stats::Histogram pendingWrites; + Stats::Histogram bytesPerBank; + }; + + NVMStats stats; + + void processWriteRespondEvent(); + EventFunctionWrapper writeRespondEvent; + + void processReadReadyEvent(); + EventFunctionWrapper readReadyEvent; + + /** + * Vector of nvm ranks + */ + std::vector ranks; + + /** + * Holding queue for non-deterministic write commands, which + * maintains writes that have been issued but have not completed + * Stored seperately mostly to keep the code clean and help with + * events scheduling. + * This mimics a buffer on the media controller and therefore is + * not added to the main write queue for sizing + */ + std::list writeRespQueue; + + std::deque readReadyQueue; + + /** + * Check if the write response queue is empty + * + * @param Return true if empty + */ + bool writeRespQueueEmpty() const { return writeRespQueue.empty(); } + + /** + * Till when must we wait before issuing next read command? + */ + Tick nextReadAt; + + // keep track of reads that have issued for which data is either + // not yet ready or has not yet been transferred to the ctrl + uint16_t numPendingReads; + uint16_t numReadDataReady; + + public: + // keep track of the number of reads that have yet to be issued + uint16_t numReadsToIssue; + + // number of writes in the writeQueue for the NVM interface + uint32_t numWritesQueued; + + /** + * Initialize the NVM interface and verify parameters + */ + void init() override; + + /** + * Setup the rank based on packet received + * + * @param integer value of rank to be setup. used to index ranks vector + * @param are we setting up rank for read or write packet? + */ + void setupRank(const uint8_t rank, const bool is_read) override; + + /** + * Check drain state of NVM interface + * + * @return true if write response queue is empty + * + */ + bool allRanksDrained() const override { return writeRespQueueEmpty(); } + + /* + * @return time to offset next command + */ + Tick commandOffset() const override { return tBURST; } + + /** + * Check if a burst operation can be issued to the NVM + * + * @param Return true if RD/WR can issue + * for reads, also verfy that ready count + * has been updated to a non-zero value to + * account for race conditions between events + */ + bool burstReady(MemPacket* pkt) const override; + + /** + * This function checks if ranks are busy. + * This state is true when either: + * 1) There is no command with read data ready to transmit or + * 2) The NVM inteface has reached the maximum number of outstanding + * writes commands. + * @param read_queue_empty There are no read queued + * @param all_writes_nvm All writes in queue are for NVM interface + * @return true of NVM is busy + * + */ + bool isBusy(bool read_queue_empty, bool all_writes_nvm); + /** + * For FR-FCFS policy, find first NVM command that can issue + * default to first command to prepped region + * + * @param queue Queued requests to consider + * @param min_col_at Minimum tick for 'seamless' issue + * @return an iterator to the selected packet, else queue.end() + * @return the tick when the packet selected will issue + */ + std::pair + chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const override; + + /** + * Add rank to rank delay to bus timing to all NVM banks in alli ranks + * when access to an alternate interface is issued + * + * param cmd_at Time of current command used as starting point for + * addition of rank-to-rank delay + */ + void addRankToRankDelay(Tick cmd_at) override; + + + /** + * Select read command to issue asynchronously + */ + void chooseRead(MemPacketQueue& queue); + + /* + * Function to calulate unloaded access latency + */ + Tick accessLatency() const override { return (tREAD + tSEND); } + + /** + * Check if the write response queue has reached defined threshold + * + * @param Return true if full + */ + bool + writeRespQueueFull() const + { + return writeRespQueue.size() == maxPendingWrites; + } + + bool + readsWaitingToIssue() const + { + return ((numReadsToIssue != 0) && + (numPendingReads < maxPendingReads)); + } + + /** + * Actually do the burst and update stats. + * + * @param pkt The packet created from the outside world pkt + * @param next_burst_at Minimum bus timing requirement from controller + * @return pair, tick when current burst is issued and + * tick when next burst can issue + */ + std::pair + doBurstAccess(MemPacket* pkt, Tick next_burst_at); + + NVMInterface(const NVMInterfaceParams* _p); +}; + +#endif //__MEM_INTERFACE_HH__ diff --git a/tests/gem5/configs/base_config.py b/tests/gem5/configs/base_config.py index cbea76874..fbedbaf68 100644 --- a/tests/gem5/configs/base_config.py +++ b/tests/gem5/configs/base_config.py @@ -221,7 +221,7 @@ class BaseSESystem(BaseSystem): def create_system(self): if issubclass(self.mem_class, m5.objects.DRAMInterface): - mem_ctrl = DRAMCtrl() + mem_ctrl = MemCtrl() mem_ctrl.dram = self.mem_class() else: mem_ctrl = self.mem_class() @@ -280,7 +280,7 @@ class BaseFSSystem(BaseSystem): if issubclass(self.mem_class, m5.objects.DRAMInterface): mem_ctrls = [] for r in system.mem_ranges: - mem_ctrl = DRAMCtrl() + mem_ctrl = MemCtrl() mem_ctrl.dram = self.mem_class(range = r) mem_ctrls.append(mem_ctrl) system.physmem = mem_ctrls