src/mem/mem_interface.cc

   1 /*
   2  * Copyright (c) 2010-2020 ARM Limited
   3  * All rights reserved
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Copyright (c) 2013 Amin Farmahini-Farahani
  15  * All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or without
  18  * modification, are permitted provided that the following conditions are
  19  * met: redistributions of source code must retain the above copyright
  20  * notice, this list of conditions and the following disclaimer;
  21  * redistributions in binary form must reproduce the above copyright
  22  * notice, this list of conditions and the following disclaimer in the
  23  * documentation and/or other materials provided with the distribution;
  24  * neither the name of the copyright holders nor the names of its
  25  * contributors may be used to endorse or promote products derived from
  26  * this software without specific prior written permission.
  27  *
  28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39  */
  40
  41 #include "mem/mem_interface.hh"
  42
  43 #include "base/bitfield.hh"
  44 #include "base/cprintf.hh"
  45 #include "base/trace.hh"
  46 #include "debug/DRAM.hh"
  47 #include "debug/DRAMPower.hh"
  48 #include "debug/DRAMState.hh"
  49 #include "debug/NVM.hh"
  50 #include "sim/system.hh"
  51
  52 using namespace Data;
  53
  54 MemInterface::MemInterface(const MemInterfaceParams &_p)
  55     : AbstractMemory(_p),
  56       addrMapping(_p.addr_mapping),
  57       burstSize((_p.devices_per_rank * _p.burst_length *
  58                  _p.device_bus_width) / 8),
  59       deviceSize(_p.device_size),
  60       deviceRowBufferSize(_p.device_rowbuffer_size),
  61       devicesPerRank(_p.devices_per_rank),
  62       rowBufferSize(devicesPerRank * deviceRowBufferSize),
  63       burstsPerRowBuffer(rowBufferSize / burstSize),
  64       burstsPerStripe(range.interleaved() ?
  65                       range.granularity() / burstSize : 1),
  66       ranksPerChannel(_p.ranks_per_channel),
  67       banksPerRank(_p.banks_per_rank), rowsPerBank(0),
  68       tCK(_p.tCK), tCS(_p.tCS), tBURST(_p.tBURST),
  69       tRTW(_p.tRTW),
  70       tWTR(_p.tWTR),
  71       readBufferSize(_p.read_buffer_size),
  72       writeBufferSize(_p.write_buffer_size)
  73 {}
  74
  75 void
  76 MemInterface::setCtrl(MemCtrl* _ctrl, unsigned int command_window)
  77 {
  78     ctrl = _ctrl;
  79     maxCommandsPerWindow = command_window / tCK;
  80 }
  81
  82 MemPacket*
  83 MemInterface::decodePacket(const PacketPtr pkt, Addr pkt_addr,
  84                        unsigned size, bool is_read, bool is_dram)
  85 {
  86     // decode the address based on the address mapping scheme, with
  87     // Ro, Ra, Co, Ba and Ch denoting row, rank, column, bank and
  88     // channel, respectively
  89     uint8_t rank;
  90     uint8_t bank;
  91     // use a 64-bit unsigned during the computations as the row is
  92     // always the top bits, and check before creating the packet
  93     uint64_t row;
  94
  95     // Get packed address, starting at 0
  96     Addr addr = getCtrlAddr(pkt_addr);
  97
  98     // truncate the address to a memory burst, which makes it unique to
  99     // a specific buffer, row, bank, rank and channel
 100     addr = addr / burstSize;
 101
 102     // we have removed the lowest order address bits that denote the
 103     // position within the column
 104     if (addrMapping == Enums::RoRaBaChCo || addrMapping == Enums::RoRaBaCoCh) {
 105         // the lowest order bits denote the column to ensure that
 106         // sequential cache lines occupy the same row
 107         addr = addr / burstsPerRowBuffer;
 108
 109         // after the channel bits, get the bank bits to interleave
 110         // over the banks
 111         bank = addr % banksPerRank;
 112         addr = addr / banksPerRank;
 113
 114         // after the bank, we get the rank bits which thus interleaves
 115         // over the ranks
 116         rank = addr % ranksPerChannel;
 117         addr = addr / ranksPerChannel;
 118
 119         // lastly, get the row bits, no need to remove them from addr
 120         row = addr % rowsPerBank;
 121     } else if (addrMapping == Enums::RoCoRaBaCh) {
 122         // with emerging technologies, could have small page size with
 123         // interleaving granularity greater than row buffer
 124         if (burstsPerStripe > burstsPerRowBuffer) {
 125             // remove column bits which are a subset of burstsPerStripe
 126             addr = addr / burstsPerRowBuffer;
 127         } else {
 128             // remove lower column bits below channel bits
 129             addr = addr / burstsPerStripe;
 130         }
 131
 132         // start with the bank bits, as this provides the maximum
 133         // opportunity for parallelism between requests
 134         bank = addr % banksPerRank;
 135         addr = addr / banksPerRank;
 136
 137         // next get the rank bits
 138         rank = addr % ranksPerChannel;
 139         addr = addr / ranksPerChannel;
 140
 141         // next, the higher-order column bites
 142         if (burstsPerStripe < burstsPerRowBuffer) {
 143             addr = addr / (burstsPerRowBuffer / burstsPerStripe);
 144         }
 145
 146         // lastly, get the row bits, no need to remove them from addr
 147         row = addr % rowsPerBank;
 148     } else
 149         panic("Unknown address mapping policy chosen!");
 150
 151     assert(rank < ranksPerChannel);
 152     assert(bank < banksPerRank);
 153     assert(row < rowsPerBank);
 154     assert(row < Bank::NO_ROW);
 155
 156     DPRINTF(DRAM, "Address: %lld Rank %d Bank %d Row %d\n",
 157             pkt_addr, rank, bank, row);
 158
 159     // create the corresponding memory packet with the entry time and
 160     // ready time set to the current tick, the latter will be updated
 161     // later
 162     uint16_t bank_id = banksPerRank * rank + bank;
 163
 164     return new MemPacket(pkt, is_read, is_dram, rank, bank, row, bank_id,
 165                    pkt_addr, size);
 166 }
 167
 168 std::pair<MemPacketQueue::iterator, Tick>
 169 DRAMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const
 170 {
 171     std::vector<uint32_t> earliest_banks(ranksPerChannel, 0);
 172
 173     // Has minBankPrep been called to populate earliest_banks?
 174     bool filled_earliest_banks = false;
 175     // can the PRE/ACT sequence be done without impacting utlization?
 176     bool hidden_bank_prep = false;
 177
 178     // search for seamless row hits first, if no seamless row hit is
 179     // found then determine if there are other packets that can be issued
 180     // without incurring additional bus delay due to bank timing
 181     // Will select closed rows first to enable more open row possibilies
 182     // in future selections
 183     bool found_hidden_bank = false;
 184
 185     // remember if we found a row hit, not seamless, but bank prepped
 186     // and ready
 187     bool found_prepped_pkt = false;
 188
 189     // if we have no row hit, prepped or not, and no seamless packet,
 190     // just go for the earliest possible
 191     bool found_earliest_pkt = false;
 192
 193     Tick selected_col_at = MaxTick;
 194     auto selected_pkt_it = queue.end();
 195
 196     for (auto i = queue.begin(); i != queue.end() ; ++i) {
 197         MemPacket* pkt = *i;
 198
 199         // select optimal DRAM packet in Q
 200         if (pkt->isDram()) {
 201             const Bank& bank = ranks[pkt->rank]->banks[pkt->bank];
 202             const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt :
 203                                                         bank.wrAllowedAt;
 204
 205             DPRINTF(DRAM, "%s checking DRAM packet in bank %d, row %d\n",
 206                     __func__, pkt->bank, pkt->row);
 207
 208             // check if rank is not doing a refresh and thus is available,
 209             // if not, jump to the next packet
 210             if (burstReady(pkt)) {
 211
 212                 DPRINTF(DRAM,
 213                         "%s bank %d - Rank %d available\n", __func__,
 214                         pkt->bank, pkt->rank);
 215
 216                 // check if it is a row hit
 217                 if (bank.openRow == pkt->row) {
 218                     // no additional rank-to-rank or same bank-group
 219                     // delays, or we switched read/write and might as well
 220                     // go for the row hit
 221                     if (col_allowed_at <= min_col_at) {
 222                         // FCFS within the hits, giving priority to
 223                         // commands that can issue seamlessly, without
 224                         // additional delay, such as same rank accesses
 225                         // and/or different bank-group accesses
 226                         DPRINTF(DRAM, "%s Seamless buffer hit\n", __func__);
 227                         selected_pkt_it = i;
 228                         selected_col_at = col_allowed_at;
 229                         // no need to look through the remaining queue entries
 230                         break;
 231                     } else if (!found_hidden_bank && !found_prepped_pkt) {
 232                         // if we did not find a packet to a closed row that can
 233                         // issue the bank commands without incurring delay, and
 234                         // did not yet find a packet to a prepped row, remember
 235                         // the current one
 236                         selected_pkt_it = i;
 237                         selected_col_at = col_allowed_at;
 238                         found_prepped_pkt = true;
 239                         DPRINTF(DRAM, "%s Prepped row buffer hit\n", __func__);
 240                     }
 241                 } else if (!found_earliest_pkt) {
 242                     // if we have not initialised the bank status, do it
 243                     // now, and only once per scheduling decisions
 244                     if (!filled_earliest_banks) {
 245                         // determine entries with earliest bank delay
 246                         std::tie(earliest_banks, hidden_bank_prep) =
 247                             minBankPrep(queue, min_col_at);
 248                         filled_earliest_banks = true;
 249                     }
 250
 251                     // bank is amongst first available banks
 252                     // minBankPrep will give priority to packets that can
 253                     // issue seamlessly
 254                     if (bits(earliest_banks[pkt->rank],
 255                              pkt->bank, pkt->bank)) {
 256                         found_earliest_pkt = true;
 257                         found_hidden_bank = hidden_bank_prep;
 258
 259                         // give priority to packets that can issue
 260                         // bank commands 'behind the scenes'
 261                         // any additional delay if any will be due to
 262                         // col-to-col command requirements
 263                         if (hidden_bank_prep || !found_prepped_pkt) {
 264                             selected_pkt_it = i;
 265                             selected_col_at = col_allowed_at;
 266                         }
 267                     }
 268                 }
 269             } else {
 270                 DPRINTF(DRAM, "%s bank %d - Rank %d not available\n", __func__,
 271                         pkt->bank, pkt->rank);
 272             }
 273         }
 274     }
 275
 276     if (selected_pkt_it == queue.end()) {
 277         DPRINTF(DRAM, "%s no available DRAM ranks found\n", __func__);
 278     }
 279
 280     return std::make_pair(selected_pkt_it, selected_col_at);
 281 }
 282
 283 void
 284 DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref,
 285                        Tick act_tick, uint32_t row)
 286 {
 287     assert(rank_ref.actTicks.size() == activationLimit);
 288
 289     // verify that we have command bandwidth to issue the activate
 290     // if not, shift to next burst window
 291     Tick act_at;
 292     if (twoCycleActivate)
 293         act_at = ctrl->verifyMultiCmd(act_tick, maxCommandsPerWindow, tAAD);
 294     else
 295         act_at = ctrl->verifySingleCmd(act_tick, maxCommandsPerWindow);
 296
 297     DPRINTF(DRAM, "Activate at tick %d\n", act_at);
 298
 299     // update the open row
 300     assert(bank_ref.openRow == Bank::NO_ROW);
 301     bank_ref.openRow = row;
 302
 303     // start counting anew, this covers both the case when we
 304     // auto-precharged, and when this access is forced to
 305     // precharge
 306     bank_ref.bytesAccessed = 0;
 307     bank_ref.rowAccesses = 0;
 308
 309     ++rank_ref.numBanksActive;
 310     assert(rank_ref.numBanksActive <= banksPerRank);
 311
 312     DPRINTF(DRAM, "Activate bank %d, rank %d at tick %lld, now got "
 313             "%d active\n", bank_ref.bank, rank_ref.rank, act_at,
 314             ranks[rank_ref.rank]->numBanksActive);
 315
 316     rank_ref.cmdList.push_back(Command(MemCommand::ACT, bank_ref.bank,
 317                                act_at));
 318
 319     DPRINTF(DRAMPower, "%llu,ACT,%d,%d\n", divCeil(act_at, tCK) -
 320             timeStampOffset, bank_ref.bank, rank_ref.rank);
 321
 322     // The next access has to respect tRAS for this bank
 323     bank_ref.preAllowedAt = act_at + tRAS;
 324
 325     // Respect the row-to-column command delay for both read and write cmds
 326     bank_ref.rdAllowedAt = std::max(act_at + tRCD, bank_ref.rdAllowedAt);
 327     bank_ref.wrAllowedAt = std::max(act_at + tRCD, bank_ref.wrAllowedAt);
 328
 329     // start by enforcing tRRD
 330     for (int i = 0; i < banksPerRank; i++) {
 331         // next activate to any bank in this rank must not happen
 332         // before tRRD
 333         if (bankGroupArch && (bank_ref.bankgr == rank_ref.banks[i].bankgr)) {
 334             // bank group architecture requires longer delays between
 335             // ACT commands within the same bank group.  Use tRRD_L
 336             // in this case
 337             rank_ref.banks[i].actAllowedAt = std::max(act_at + tRRD_L,
 338                                              rank_ref.banks[i].actAllowedAt);
 339         } else {
 340             // use shorter tRRD value when either
 341             // 1) bank group architecture is not supportted
 342             // 2) bank is in a different bank group
 343             rank_ref.banks[i].actAllowedAt = std::max(act_at + tRRD,
 344                                              rank_ref.banks[i].actAllowedAt);
 345         }
 346     }
 347
 348     // next, we deal with tXAW, if the activation limit is disabled
 349     // then we directly schedule an activate power event
 350     if (!rank_ref.actTicks.empty()) {
 351         // sanity check
 352         if (rank_ref.actTicks.back() &&
 353            (act_at - rank_ref.actTicks.back()) < tXAW) {
 354             panic("Got %d activates in window %d (%llu - %llu) which "
 355                   "is smaller than %llu\n", activationLimit, act_at -
 356                   rank_ref.actTicks.back(), act_at,
 357                   rank_ref.actTicks.back(), tXAW);
 358         }
 359
 360         // shift the times used for the book keeping, the last element
 361         // (highest index) is the oldest one and hence the lowest value
 362         rank_ref.actTicks.pop_back();
 363
 364         // record an new activation (in the future)
 365         rank_ref.actTicks.push_front(act_at);
 366
 367         // cannot activate more than X times in time window tXAW, push the
 368         // next one (the X + 1'st activate) to be tXAW away from the
 369         // oldest in our window of X
 370         if (rank_ref.actTicks.back() &&
 371            (act_at - rank_ref.actTicks.back()) < tXAW) {
 372             DPRINTF(DRAM, "Enforcing tXAW with X = %d, next activate "
 373                     "no earlier than %llu\n", activationLimit,
 374                     rank_ref.actTicks.back() + tXAW);
 375             for (int j = 0; j < banksPerRank; j++)
 376                 // next activate must not happen before end of window
 377                 rank_ref.banks[j].actAllowedAt =
 378                     std::max(rank_ref.actTicks.back() + tXAW,
 379                              rank_ref.banks[j].actAllowedAt);
 380         }
 381     }
 382
 383     // at the point when this activate takes place, make sure we
 384     // transition to the active power state
 385     if (!rank_ref.activateEvent.scheduled())
 386         schedule(rank_ref.activateEvent, act_at);
 387     else if (rank_ref.activateEvent.when() > act_at)
 388         // move it sooner in time
 389         reschedule(rank_ref.activateEvent, act_at);
 390 }
 391
 392 void
 393 DRAMInterface::prechargeBank(Rank& rank_ref, Bank& bank, Tick pre_tick,
 394                              bool auto_or_preall, bool trace)
 395 {
 396     // make sure the bank has an open row
 397     assert(bank.openRow != Bank::NO_ROW);
 398
 399     // sample the bytes per activate here since we are closing
 400     // the page
 401     stats.bytesPerActivate.sample(bank.bytesAccessed);
 402
 403     bank.openRow = Bank::NO_ROW;
 404
 405     Tick pre_at = pre_tick;
 406     if (auto_or_preall) {
 407         // no precharge allowed before this one
 408         bank.preAllowedAt = pre_at;
 409     } else {
 410         // Issuing an explicit PRE command
 411         // Verify that we have command bandwidth to issue the precharge
 412         // if not, shift to next burst window
 413         pre_at = ctrl->verifySingleCmd(pre_tick, maxCommandsPerWindow);
 414         // enforce tPPD
 415         for (int i = 0; i < banksPerRank; i++) {
 416             rank_ref.banks[i].preAllowedAt = std::max(pre_at + tPPD,
 417                                              rank_ref.banks[i].preAllowedAt);
 418         }
 419     }
 420
 421     Tick pre_done_at = pre_at + tRP;
 422
 423     bank.actAllowedAt = std::max(bank.actAllowedAt, pre_done_at);
 424
 425     assert(rank_ref.numBanksActive != 0);
 426     --rank_ref.numBanksActive;
 427
 428     DPRINTF(DRAM, "Precharging bank %d, rank %d at tick %lld, now got "
 429             "%d active\n", bank.bank, rank_ref.rank, pre_at,
 430             rank_ref.numBanksActive);
 431
 432     if (trace) {
 433
 434         rank_ref.cmdList.push_back(Command(MemCommand::PRE, bank.bank,
 435                                    pre_at));
 436         DPRINTF(DRAMPower, "%llu,PRE,%d,%d\n", divCeil(pre_at, tCK) -
 437                 timeStampOffset, bank.bank, rank_ref.rank);
 438     }
 439
 440     // if we look at the current number of active banks we might be
 441     // tempted to think the DRAM is now idle, however this can be
 442     // undone by an activate that is scheduled to happen before we
 443     // would have reached the idle state, so schedule an event and
 444     // rather check once we actually make it to the point in time when
 445     // the (last) precharge takes place
 446     if (!rank_ref.prechargeEvent.scheduled()) {
 447         schedule(rank_ref.prechargeEvent, pre_done_at);
 448         // New event, increment count
 449         ++rank_ref.outstandingEvents;
 450     } else if (rank_ref.prechargeEvent.when() < pre_done_at) {
 451         reschedule(rank_ref.prechargeEvent, pre_done_at);
 452     }
 453 }
 454
 455 std::pair<Tick, Tick>
 456 DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at,
 457                              const std::vector<MemPacketQueue>& queue)
 458 {
 459     DPRINTF(DRAM, "Timing access to addr %lld, rank/bank/row %d %d %d\n",
 460             mem_pkt->addr, mem_pkt->rank, mem_pkt->bank, mem_pkt->row);
 461
 462     // get the rank
 463     Rank& rank_ref = *ranks[mem_pkt->rank];
 464
 465     assert(rank_ref.inRefIdleState());
 466
 467     // are we in or transitioning to a low-power state and have not scheduled
 468     // a power-up event?
 469     // if so, wake up from power down to issue RD/WR burst
 470     if (rank_ref.inLowPowerState) {
 471         assert(rank_ref.pwrState != PWR_SREF);
 472         rank_ref.scheduleWakeUpEvent(tXP);
 473     }
 474
 475     // get the bank
 476     Bank& bank_ref = rank_ref.banks[mem_pkt->bank];
 477
 478     // for the state we need to track if it is a row hit or not
 479     bool row_hit = true;
 480
 481     // Determine the access latency and update the bank state
 482     if (bank_ref.openRow == mem_pkt->row) {
 483         // nothing to do
 484     } else {
 485         row_hit = false;
 486
 487         // If there is a page open, precharge it.
 488         if (bank_ref.openRow != Bank::NO_ROW) {
 489             prechargeBank(rank_ref, bank_ref, std::max(bank_ref.preAllowedAt,
 490                                                    curTick()));
 491         }
 492
 493         // next we need to account for the delay in activating the page
 494         Tick act_tick = std::max(bank_ref.actAllowedAt, curTick());
 495
 496         // Record the activation and deal with all the global timing
 497         // constraints caused be a new activation (tRRD and tXAW)
 498         activateBank(rank_ref, bank_ref, act_tick, mem_pkt->row);
 499     }
 500
 501     // respect any constraints on the command (e.g. tRCD or tCCD)
 502     const Tick col_allowed_at = mem_pkt->isRead() ?
 503                                 bank_ref.rdAllowedAt : bank_ref.wrAllowedAt;
 504
 505     // we need to wait until the bus is available before we can issue
 506     // the command; need to ensure minimum bus delay requirement is met
 507     Tick cmd_at = std::max({col_allowed_at, next_burst_at, curTick()});
 508
 509     // verify that we have command bandwidth to issue the burst
 510     // if not, shift to next burst window
 511     if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) > clkResyncDelay))
 512         cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK);
 513     else
 514         cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow);
 515
 516     // if we are interleaving bursts, ensure that
 517     // 1) we don't double interleave on next burst issue
 518     // 2) we are at an interleave boundary; if not, shift to next boundary
 519     Tick burst_gap = tBURST_MIN;
 520     if (burstInterleave) {
 521         if (cmd_at == (rank_ref.lastBurstTick + tBURST_MIN)) {
 522             // already interleaving, push next command to end of full burst
 523             burst_gap = tBURST;
 524         } else if (cmd_at < (rank_ref.lastBurstTick + tBURST)) {
 525             // not at an interleave boundary after bandwidth check
 526             // Shift command to tBURST boundary to avoid data contention
 527             // Command will remain in the same burst window given that
 528             // tBURST is less than tBURST_MAX
 529             cmd_at = rank_ref.lastBurstTick + tBURST;
 530         }
 531     }
 532     DPRINTF(DRAM, "Schedule RD/WR burst at tick %d\n", cmd_at);
 533
 534     // update the packet ready time
 535     mem_pkt->readyTime = cmd_at + tCL + tBURST;
 536
 537     rank_ref.lastBurstTick = cmd_at;
 538
 539     // update the time for the next read/write burst for each
 540     // bank (add a max with tCCD/tCCD_L/tCCD_L_WR here)
 541     Tick dly_to_rd_cmd;
 542     Tick dly_to_wr_cmd;
 543     for (int j = 0; j < ranksPerChannel; j++) {
 544         for (int i = 0; i < banksPerRank; i++) {
 545             if (mem_pkt->rank == j) {
 546                 if (bankGroupArch &&
 547                    (bank_ref.bankgr == ranks[j]->banks[i].bankgr)) {
 548                     // bank group architecture requires longer delays between
 549                     // RD/WR burst commands to the same bank group.
 550                     // tCCD_L is default requirement for same BG timing
 551                     // tCCD_L_WR is required for write-to-write
 552                     // Need to also take bus turnaround delays into account
 553                     dly_to_rd_cmd = mem_pkt->isRead() ?
 554                                     tCCD_L : std::max(tCCD_L, wrToRdDlySameBG);
 555                     dly_to_wr_cmd = mem_pkt->isRead() ?
 556                                     std::max(tCCD_L, rdToWrDlySameBG) :
 557                                     tCCD_L_WR;
 558                 } else {
 559                     // tBURST is default requirement for diff BG timing
 560                     // Need to also take bus turnaround delays into account
 561                     dly_to_rd_cmd = mem_pkt->isRead() ? burst_gap :
 562                                                        writeToReadDelay();
 563                     dly_to_wr_cmd = mem_pkt->isRead() ? readToWriteDelay() :
 564                                                        burst_gap;
 565                 }
 566             } else {
 567                 // different rank is by default in a different bank group and
 568                 // doesn't require longer tCCD or additional RTW, WTR delays
 569                 // Need to account for rank-to-rank switching
 570                 dly_to_wr_cmd = rankToRankDelay();
 571                 dly_to_rd_cmd = rankToRankDelay();
 572             }
 573             ranks[j]->banks[i].rdAllowedAt = std::max(cmd_at + dly_to_rd_cmd,
 574                                              ranks[j]->banks[i].rdAllowedAt);
 575             ranks[j]->banks[i].wrAllowedAt = std::max(cmd_at + dly_to_wr_cmd,
 576                                              ranks[j]->banks[i].wrAllowedAt);
 577         }
 578     }
 579
 580     // Save rank of current access
 581     activeRank = mem_pkt->rank;
 582
 583     // If this is a write, we also need to respect the write recovery
 584     // time before a precharge, in the case of a read, respect the
 585     // read to precharge constraint
 586     bank_ref.preAllowedAt = std::max(bank_ref.preAllowedAt,
 587                                  mem_pkt->isRead() ? cmd_at + tRTP :
 588                                  mem_pkt->readyTime + tWR);
 589
 590     // increment the bytes accessed and the accesses per row
 591     bank_ref.bytesAccessed += burstSize;
 592     ++bank_ref.rowAccesses;
 593
 594     // if we reached the max, then issue with an auto-precharge
 595     bool auto_precharge = pageMgmt == Enums::close ||
 596         bank_ref.rowAccesses == maxAccessesPerRow;
 597
 598     // if we did not hit the limit, we might still want to
 599     // auto-precharge
 600     if (!auto_precharge &&
 601         (pageMgmt == Enums::open_adaptive ||
 602          pageMgmt == Enums::close_adaptive)) {
 603         // a twist on the open and close page policies:
 604         // 1) open_adaptive page policy does not blindly keep the
 605         // page open, but close it if there are no row hits, and there
 606         // are bank conflicts in the queue
 607         // 2) close_adaptive page policy does not blindly close the
 608         // page, but closes it only if there are no row hits in the queue.
 609         // In this case, only force an auto precharge when there
 610         // are no same page hits in the queue
 611         bool got_more_hits = false;
 612         bool got_bank_conflict = false;
 613
 614         for (uint8_t i = 0; i < ctrl->numPriorities(); ++i) {
 615             auto p = queue[i].begin();
 616             // keep on looking until we find a hit or reach the end of the
 617             // queue
 618             // 1) if a hit is found, then both open and close adaptive
 619             //    policies keep the page open
 620             // 2) if no hit is found, got_bank_conflict is set to true if a
 621             //    bank conflict request is waiting in the queue
 622             // 3) make sure we are not considering the packet that we are
 623             //    currently dealing with
 624             while (!got_more_hits && p != queue[i].end()) {
 625                 if (mem_pkt != (*p)) {
 626                     bool same_rank_bank = (mem_pkt->rank == (*p)->rank) &&
 627                                           (mem_pkt->bank == (*p)->bank);
 628
 629                     bool same_row = mem_pkt->row == (*p)->row;
 630                     got_more_hits |= same_rank_bank && same_row;
 631                     got_bank_conflict |= same_rank_bank && !same_row;
 632                 }
 633                 ++p;
 634             }
 635
 636             if (got_more_hits)
 637                 break;
 638         }
 639
 640         // auto pre-charge when either
 641         // 1) open_adaptive policy, we have not got any more hits, and
 642         //    have a bank conflict
 643         // 2) close_adaptive policy and we have not got any more hits
 644         auto_precharge = !got_more_hits &&
 645             (got_bank_conflict || pageMgmt == Enums::close_adaptive);
 646     }
 647
 648     // DRAMPower trace command to be written
 649     std::string mem_cmd = mem_pkt->isRead() ? "RD" : "WR";
 650
 651     // MemCommand required for DRAMPower library
 652     MemCommand::cmds command = (mem_cmd == "RD") ? MemCommand::RD :
 653                                                    MemCommand::WR;
 654
 655     rank_ref.cmdList.push_back(Command(command, mem_pkt->bank, cmd_at));
 656
 657     DPRINTF(DRAMPower, "%llu,%s,%d,%d\n", divCeil(cmd_at, tCK) -
 658             timeStampOffset, mem_cmd, mem_pkt->bank, mem_pkt->rank);
 659
 660     // if this access should use auto-precharge, then we are
 661     // closing the row after the read/write burst
 662     if (auto_precharge) {
 663         // if auto-precharge push a PRE command at the correct tick to the
 664         // list used by DRAMPower library to calculate power
 665         prechargeBank(rank_ref, bank_ref, std::max(curTick(),
 666                       bank_ref.preAllowedAt), true);
 667
 668         DPRINTF(DRAM, "Auto-precharged bank: %d\n", mem_pkt->bankId);
 669     }
 670
 671     // Update the stats and schedule the next request
 672     if (mem_pkt->isRead()) {
 673         // Every respQueue which will generate an event, increment count
 674         ++rank_ref.outstandingEvents;
 675
 676         stats.readBursts++;
 677         if (row_hit)
 678             stats.readRowHits++;
 679         stats.bytesRead += burstSize;
 680         stats.perBankRdBursts[mem_pkt->bankId]++;
 681
 682         // Update latency stats
 683         stats.totMemAccLat += mem_pkt->readyTime - mem_pkt->entryTime;
 684         stats.totQLat += cmd_at - mem_pkt->entryTime;
 685         stats.totBusLat += tBURST;
 686     } else {
 687         // Schedule write done event to decrement event count
 688         // after the readyTime has been reached
 689         // Only schedule latest write event to minimize events
 690         // required; only need to ensure that final event scheduled covers
 691         // the time that writes are outstanding and bus is active
 692         // to holdoff power-down entry events
 693         if (!rank_ref.writeDoneEvent.scheduled()) {
 694             schedule(rank_ref.writeDoneEvent, mem_pkt->readyTime);
 695             // New event, increment count
 696             ++rank_ref.outstandingEvents;
 697
 698         } else if (rank_ref.writeDoneEvent.when() < mem_pkt->readyTime) {
 699             reschedule(rank_ref.writeDoneEvent, mem_pkt->readyTime);
 700         }
 701         // will remove write from queue when returned to parent function
 702         // decrement count for DRAM rank
 703         --rank_ref.writeEntries;
 704
 705         stats.writeBursts++;
 706         if (row_hit)
 707             stats.writeRowHits++;
 708         stats.bytesWritten += burstSize;
 709         stats.perBankWrBursts[mem_pkt->bankId]++;
 710
 711     }
 712     // Update bus state to reflect when previous command was issued
 713     return std::make_pair(cmd_at, cmd_at + burst_gap);
 714 }
 715
 716 void
 717 DRAMInterface::addRankToRankDelay(Tick cmd_at)
 718 {
 719     // update timing for DRAM ranks due to bursts issued
 720     // to ranks on other media interfaces
 721     for (auto n : ranks) {
 722         for (int i = 0; i < banksPerRank; i++) {
 723             // different rank by default
 724             // Need to only account for rank-to-rank switching
 725             n->banks[i].rdAllowedAt = std::max(cmd_at + rankToRankDelay(),
 726                                              n->banks[i].rdAllowedAt);
 727             n->banks[i].wrAllowedAt = std::max(cmd_at + rankToRankDelay(),
 728                                              n->banks[i].wrAllowedAt);
 729         }
 730     }
 731 }
 732
 733 DRAMInterface::DRAMInterface(const DRAMInterfaceParams &_p)
 734     : MemInterface(_p),
 735       bankGroupsPerRank(_p.bank_groups_per_rank),
 736       bankGroupArch(_p.bank_groups_per_rank > 0),
 737       tCL(_p.tCL),
 738       tBURST_MIN(_p.tBURST_MIN), tBURST_MAX(_p.tBURST_MAX),
 739       tCCD_L_WR(_p.tCCD_L_WR), tCCD_L(_p.tCCD_L), tRCD(_p.tRCD),
 740       tRP(_p.tRP), tRAS(_p.tRAS), tWR(_p.tWR), tRTP(_p.tRTP),
 741       tRFC(_p.tRFC), tREFI(_p.tREFI), tRRD(_p.tRRD), tRRD_L(_p.tRRD_L),
 742       tPPD(_p.tPPD), tAAD(_p.tAAD),
 743       tXAW(_p.tXAW), tXP(_p.tXP), tXS(_p.tXS),
 744       clkResyncDelay(tCL + _p.tBURST_MAX),
 745       dataClockSync(_p.data_clock_sync),
 746       burstInterleave(tBURST != tBURST_MIN),
 747       twoCycleActivate(_p.two_cycle_activate),
 748       activationLimit(_p.activation_limit),
 749       wrToRdDlySameBG(tCL + _p.tBURST_MAX + _p.tWTR_L),
 750       rdToWrDlySameBG(_p.tRTW + _p.tBURST_MAX),
 751       pageMgmt(_p.page_policy),
 752       maxAccessesPerRow(_p.max_accesses_per_row),
 753       timeStampOffset(0), activeRank(0),
 754       enableDRAMPowerdown(_p.enable_dram_powerdown),
 755       lastStatsResetTick(0),
 756       stats(*this)
 757 {
 758     DPRINTF(DRAM, "Setting up DRAM Interface\n");
 759
 760     fatal_if(!isPowerOf2(burstSize), "DRAM burst size %d is not allowed, "
 761              "must be a power of two\n", burstSize);
 762
 763     // sanity check the ranks since we rely on bit slicing for the
 764     // address decoding
 765     fatal_if(!isPowerOf2(ranksPerChannel), "DRAM rank count of %d is "
 766              "not allowed, must be a power of two\n", ranksPerChannel);
 767
 768     for (int i = 0; i < ranksPerChannel; i++) {
 769         DPRINTF(DRAM, "Creating DRAM rank %d \n", i);
 770         Rank* rank = new Rank(_p, i, *this);
 771         ranks.push_back(rank);
 772     }
 773
 774     // determine the dram actual capacity from the DRAM config in Mbytes
 775     uint64_t deviceCapacity = deviceSize / (1024 * 1024) * devicesPerRank *
 776                               ranksPerChannel;
 777
 778     uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
 779
 780     DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
 781             AbstractMemory::size());
 782
 783     // if actual DRAM size does not match memory capacity in system warn!
 784     if (deviceCapacity != capacity / (1024 * 1024))
 785         warn("DRAM device capacity (%d Mbytes) does not match the "
 786              "address range assigned (%d Mbytes)\n", deviceCapacity,
 787              capacity / (1024 * 1024));
 788
 789     DPRINTF(DRAM, "Row buffer size %d bytes with %d bursts per row buffer\n",
 790             rowBufferSize, burstsPerRowBuffer);
 791
 792     rowsPerBank = capacity / (rowBufferSize * banksPerRank * ranksPerChannel);
 793
 794     // some basic sanity checks
 795     if (tREFI <= tRP || tREFI <= tRFC) {
 796         fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n",
 797               tREFI, tRP, tRFC);
 798     }
 799
 800     // basic bank group architecture checks ->
 801     if (bankGroupArch) {
 802         // must have at least one bank per bank group
 803         if (bankGroupsPerRank > banksPerRank) {
 804             fatal("banks per rank (%d) must be equal to or larger than "
 805                   "banks groups per rank (%d)\n",
 806                   banksPerRank, bankGroupsPerRank);
 807         }
 808         // must have same number of banks in each bank group
 809         if ((banksPerRank % bankGroupsPerRank) != 0) {
 810             fatal("Banks per rank (%d) must be evenly divisible by bank "
 811                   "groups per rank (%d) for equal banks per bank group\n",
 812                   banksPerRank, bankGroupsPerRank);
 813         }
 814         // tCCD_L should be greater than minimal, back-to-back burst delay
 815         if (tCCD_L <= tBURST) {
 816             fatal("tCCD_L (%d) should be larger than the minimum bus delay "
 817                   "(%d) when bank groups per rank (%d) is greater than 1\n",
 818                   tCCD_L, tBURST, bankGroupsPerRank);
 819         }
 820         // tCCD_L_WR should be greater than minimal, back-to-back burst delay
 821         if (tCCD_L_WR <= tBURST) {
 822             fatal("tCCD_L_WR (%d) should be larger than the minimum bus delay "
 823                   " (%d) when bank groups per rank (%d) is greater than 1\n",
 824                   tCCD_L_WR, tBURST, bankGroupsPerRank);
 825         }
 826         // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay
 827         // some datasheets might specify it equal to tRRD
 828         if (tRRD_L < tRRD) {
 829             fatal("tRRD_L (%d) should be larger than tRRD (%d) when "
 830                   "bank groups per rank (%d) is greater than 1\n",
 831                   tRRD_L, tRRD, bankGroupsPerRank);
 832         }
 833     }
 834 }
 835
 836 void
 837 DRAMInterface::init()
 838 {
 839     AbstractMemory::init();
 840
 841     // a bit of sanity checks on the interleaving, save it for here to
 842     // ensure that the system pointer is initialised
 843     if (range.interleaved()) {
 844         if (addrMapping == Enums::RoRaBaChCo) {
 845             if (rowBufferSize != range.granularity()) {
 846                 fatal("Channel interleaving of %s doesn't match RoRaBaChCo "
 847                       "address map\n", name());
 848             }
 849         } else if (addrMapping == Enums::RoRaBaCoCh ||
 850                    addrMapping == Enums::RoCoRaBaCh) {
 851             // for the interleavings with channel bits in the bottom,
 852             // if the system uses a channel striping granularity that
 853             // is larger than the DRAM burst size, then map the
 854             // sequential accesses within a stripe to a number of
 855             // columns in the DRAM, effectively placing some of the
 856             // lower-order column bits as the least-significant bits
 857             // of the address (above the ones denoting the burst size)
 858             assert(burstsPerStripe >= 1);
 859
 860             // channel striping has to be done at a granularity that
 861             // is equal or larger to a cache line
 862             if (system()->cacheLineSize() > range.granularity()) {
 863                 fatal("Channel interleaving of %s must be at least as large "
 864                       "as the cache line size\n", name());
 865             }
 866
 867             // ...and equal or smaller than the row-buffer size
 868             if (rowBufferSize < range.granularity()) {
 869                 fatal("Channel interleaving of %s must be at most as large "
 870                       "as the row-buffer size\n", name());
 871             }
 872             // this is essentially the check above, so just to be sure
 873             assert(burstsPerStripe <= burstsPerRowBuffer);
 874         }
 875     }
 876 }
 877
 878 void
 879 DRAMInterface::startup()
 880 {
 881     if (system()->isTimingMode()) {
 882         // timestamp offset should be in clock cycles for DRAMPower
 883         timeStampOffset = divCeil(curTick(), tCK);
 884
 885         for (auto r : ranks) {
 886             r->startup(curTick() + tREFI - tRP);
 887         }
 888     }
 889 }
 890
 891 bool
 892 DRAMInterface::isBusy()
 893 {
 894     int busy_ranks = 0;
 895     for (auto r : ranks) {
 896         if (!r->inRefIdleState()) {
 897             if (r->pwrState != PWR_SREF) {
 898                 // rank is busy refreshing
 899                 DPRINTF(DRAMState, "Rank %d is not available\n", r->rank);
 900                 busy_ranks++;
 901
 902                 // let the rank know that if it was waiting to drain, it
 903                 // is now done and ready to proceed
 904                 r->checkDrainDone();
 905             }
 906
 907             // check if we were in self-refresh and haven't started
 908             // to transition out
 909             if ((r->pwrState == PWR_SREF) && r->inLowPowerState) {
 910                 DPRINTF(DRAMState, "Rank %d is in self-refresh\n", r->rank);
 911                 // if we have commands queued to this rank and we don't have
 912                 // a minimum number of active commands enqueued,
 913                 // exit self-refresh
 914                 if (r->forceSelfRefreshExit()) {
 915                     DPRINTF(DRAMState, "rank %d was in self refresh and"
 916                            " should wake up\n", r->rank);
 917                     //wake up from self-refresh
 918                     r->scheduleWakeUpEvent(tXS);
 919                     // things are brought back into action once a refresh is
 920                     // performed after self-refresh
 921                     // continue with selection for other ranks
 922                 }
 923             }
 924         }
 925     }
 926     return (busy_ranks == ranksPerChannel);
 927 }
 928
 929 void DRAMInterface::setupRank(const uint8_t rank, const bool is_read)
 930 {
 931     // increment entry count of the rank based on packet type
 932     if (is_read) {
 933         ++ranks[rank]->readEntries;
 934     } else {
 935         ++ranks[rank]->writeEntries;
 936     }
 937 }
 938
 939 void
 940 DRAMInterface::respondEvent(uint8_t rank)
 941 {
 942     Rank& rank_ref = *ranks[rank];
 943
 944     // if a read has reached its ready-time, decrement the number of reads
 945     // At this point the packet has been handled and there is a possibility
 946     // to switch to low-power mode if no other packet is available
 947     --rank_ref.readEntries;
 948     DPRINTF(DRAM, "number of read entries for rank %d is %d\n",
 949             rank, rank_ref.readEntries);
 950
 951     // counter should at least indicate one outstanding request
 952     // for this read
 953     assert(rank_ref.outstandingEvents > 0);
 954     // read response received, decrement count
 955     --rank_ref.outstandingEvents;
 956
 957     // at this moment should not have transitioned to a low-power state
 958     assert((rank_ref.pwrState != PWR_SREF) &&
 959            (rank_ref.pwrState != PWR_PRE_PDN) &&
 960            (rank_ref.pwrState != PWR_ACT_PDN));
 961
 962     // track if this is the last packet before idling
 963     // and that there are no outstanding commands to this rank
 964     if (rank_ref.isQueueEmpty() && rank_ref.outstandingEvents == 0 &&
 965         rank_ref.inRefIdleState() && enableDRAMPowerdown) {
 966         // verify that there are no events scheduled
 967         assert(!rank_ref.activateEvent.scheduled());
 968         assert(!rank_ref.prechargeEvent.scheduled());
 969
 970         // if coming from active state, schedule power event to
 971         // active power-down else go to precharge power-down
 972         DPRINTF(DRAMState, "Rank %d sleep at tick %d; current power state is "
 973                 "%d\n", rank, curTick(), rank_ref.pwrState);
 974
 975         // default to ACT power-down unless already in IDLE state
 976         // could be in IDLE if PRE issued before data returned
 977         PowerState next_pwr_state = PWR_ACT_PDN;
 978         if (rank_ref.pwrState == PWR_IDLE) {
 979             next_pwr_state = PWR_PRE_PDN;
 980         }
 981
 982         rank_ref.powerDownSleep(next_pwr_state, curTick());
 983     }
 984 }
 985
 986 void
 987 DRAMInterface::checkRefreshState(uint8_t rank)
 988 {
 989     Rank& rank_ref = *ranks[rank];
 990
 991     if ((rank_ref.refreshState == REF_PRE) &&
 992         !rank_ref.prechargeEvent.scheduled()) {
 993           // kick the refresh event loop into action again if banks already
 994           // closed and just waiting for read to complete
 995           schedule(rank_ref.refreshEvent, curTick());
 996     }
 997 }
 998
 999 void
1000 DRAMInterface::drainRanks()
1001 {
1002     // also need to kick off events to exit self-refresh
1003     for (auto r : ranks) {
1004         // force self-refresh exit, which in turn will issue auto-refresh
1005         if (r->pwrState == PWR_SREF) {
1006             DPRINTF(DRAM,"Rank%d: Forcing self-refresh wakeup in drain\n",
1007                     r->rank);
1008             r->scheduleWakeUpEvent(tXS);
1009         }
1010     }
1011 }
1012
1013 bool
1014 DRAMInterface::allRanksDrained() const
1015 {
1016     // true until proven false
1017     bool all_ranks_drained = true;
1018     for (auto r : ranks) {
1019         // then verify that the power state is IDLE ensuring all banks are
1020         // closed and rank is not in a low power state. Also verify that rank
1021         // is idle from a refresh point of view.
1022         all_ranks_drained = r->inPwrIdleState() && r->inRefIdleState() &&
1023             all_ranks_drained;
1024     }
1025     return all_ranks_drained;
1026 }
1027
1028 void
1029 DRAMInterface::suspend()
1030 {
1031     for (auto r : ranks) {
1032         r->suspend();
1033     }
1034 }
1035
1036 std::pair<std::vector<uint32_t>, bool>
1037 DRAMInterface::minBankPrep(const MemPacketQueue& queue,
1038                       Tick min_col_at) const
1039 {
1040     Tick min_act_at = MaxTick;
1041     std::vector<uint32_t> bank_mask(ranksPerChannel, 0);
1042
1043     // latest Tick for which ACT can occur without incurring additoinal
1044     // delay on the data bus
1045     const Tick hidden_act_max = std::max(min_col_at - tRCD, curTick());
1046
1047     // Flag condition when burst can issue back-to-back with previous burst
1048     bool found_seamless_bank = false;
1049
1050     // Flag condition when bank can be opened without incurring additional
1051     // delay on the data bus
1052     bool hidden_bank_prep = false;
1053
1054     // determine if we have queued transactions targetting the
1055     // bank in question
1056     std::vector<bool> got_waiting(ranksPerChannel * banksPerRank, false);
1057     for (const auto& p : queue) {
1058         if (p->isDram() && ranks[p->rank]->inRefIdleState())
1059             got_waiting[p->bankId] = true;
1060     }
1061
1062     // Find command with optimal bank timing
1063     // Will prioritize commands that can issue seamlessly.
1064     for (int i = 0; i < ranksPerChannel; i++) {
1065         for (int j = 0; j < banksPerRank; j++) {
1066             uint16_t bank_id = i * banksPerRank + j;
1067
1068             // if we have waiting requests for the bank, and it is
1069             // amongst the first available, update the mask
1070             if (got_waiting[bank_id]) {
1071                 // make sure this rank is not currently refreshing.
1072                 assert(ranks[i]->inRefIdleState());
1073                 // simplistic approximation of when the bank can issue
1074                 // an activate, ignoring any rank-to-rank switching
1075                 // cost in this calculation
1076                 Tick act_at = ranks[i]->banks[j].openRow == Bank::NO_ROW ?
1077                     std::max(ranks[i]->banks[j].actAllowedAt, curTick()) :
1078                     std::max(ranks[i]->banks[j].preAllowedAt, curTick()) + tRP;
1079
1080                 // When is the earliest the R/W burst can issue?
1081                 const Tick col_allowed_at = ctrl->inReadBusState(false) ?
1082                                               ranks[i]->banks[j].rdAllowedAt :
1083                                               ranks[i]->banks[j].wrAllowedAt;
1084                 Tick col_at = std::max(col_allowed_at, act_at + tRCD);
1085
1086                 // bank can issue burst back-to-back (seamlessly) with
1087                 // previous burst
1088                 bool new_seamless_bank = col_at <= min_col_at;
1089
1090                 // if we found a new seamless bank or we have no
1091                 // seamless banks, and got a bank with an earlier
1092                 // activate time, it should be added to the bit mask
1093                 if (new_seamless_bank ||
1094                     (!found_seamless_bank && act_at <= min_act_at)) {
1095                     // if we did not have a seamless bank before, and
1096                     // we do now, reset the bank mask, also reset it
1097                     // if we have not yet found a seamless bank and
1098                     // the activate time is smaller than what we have
1099                     // seen so far
1100                     if (!found_seamless_bank &&
1101                         (new_seamless_bank || act_at < min_act_at)) {
1102                         std::fill(bank_mask.begin(), bank_mask.end(), 0);
1103                     }
1104
1105                     found_seamless_bank |= new_seamless_bank;
1106
1107                     // ACT can occur 'behind the scenes'
1108                     hidden_bank_prep = act_at <= hidden_act_max;
1109
1110                     // set the bit corresponding to the available bank
1111                     replaceBits(bank_mask[i], j, j, 1);
1112                     min_act_at = act_at;
1113                 }
1114             }
1115         }
1116     }
1117
1118     return std::make_pair(bank_mask, hidden_bank_prep);
1119 }
1120
1121 DRAMInterface::Rank::Rank(const DRAMInterfaceParams &_p,
1122                          int _rank, DRAMInterface& _dram)
1123     : EventManager(&_dram), dram(_dram),
1124       pwrStateTrans(PWR_IDLE), pwrStatePostRefresh(PWR_IDLE),
1125       pwrStateTick(0), refreshDueAt(0), pwrState(PWR_IDLE),
1126       refreshState(REF_IDLE), inLowPowerState(false), rank(_rank),
1127       readEntries(0), writeEntries(0), outstandingEvents(0),
1128       wakeUpAllowedAt(0), power(_p, false), banks(_p.banks_per_rank),
1129       numBanksActive(0), actTicks(_p.activation_limit, 0), lastBurstTick(0),
1130       writeDoneEvent([this]{ processWriteDoneEvent(); }, name()),
1131       activateEvent([this]{ processActivateEvent(); }, name()),
1132       prechargeEvent([this]{ processPrechargeEvent(); }, name()),
1133       refreshEvent([this]{ processRefreshEvent(); }, name()),
1134       powerEvent([this]{ processPowerEvent(); }, name()),
1135       wakeUpEvent([this]{ processWakeUpEvent(); }, name()),
1136       stats(_dram, *this)
1137 {
1138     for (int b = 0; b < _p.banks_per_rank; b++) {
1139         banks[b].bank = b;
1140         // GDDR addressing of banks to BG is linear.
1141         // Here we assume that all DRAM generations address bank groups as
1142         // follows:
1143         if (_p.bank_groups_per_rank > 0) {
1144             // Simply assign lower bits to bank group in order to
1145             // rotate across bank groups as banks are incremented
1146             // e.g. with 4 banks per bank group and 16 banks total:
1147             //    banks 0,4,8,12  are in bank group 0
1148             //    banks 1,5,9,13  are in bank group 1
1149             //    banks 2,6,10,14 are in bank group 2
1150             //    banks 3,7,11,15 are in bank group 3
1151             banks[b].bankgr = b % _p.bank_groups_per_rank;
1152         } else {
1153             // No bank groups; simply assign to bank number
1154             banks[b].bankgr = b;
1155         }
1156     }
1157 }
1158
1159 void
1160 DRAMInterface::Rank::startup(Tick ref_tick)
1161 {
1162     assert(ref_tick > curTick());
1163
1164     pwrStateTick = curTick();
1165
1166     // kick off the refresh, and give ourselves enough time to
1167     // precharge
1168     schedule(refreshEvent, ref_tick);
1169 }
1170
1171 void
1172 DRAMInterface::Rank::suspend()
1173 {
1174     deschedule(refreshEvent);
1175
1176     // Update the stats
1177     updatePowerStats();
1178
1179     // don't automatically transition back to LP state after next REF
1180     pwrStatePostRefresh = PWR_IDLE;
1181 }
1182
1183 bool
1184 DRAMInterface::Rank::isQueueEmpty() const
1185 {
1186     // check commmands in Q based on current bus direction
1187     bool no_queued_cmds = (dram.ctrl->inReadBusState(true) &&
1188                           (readEntries == 0))
1189                        || (dram.ctrl->inWriteBusState(true) &&
1190                           (writeEntries == 0));
1191     return no_queued_cmds;
1192 }
1193
1194 void
1195 DRAMInterface::Rank::checkDrainDone()
1196 {
1197     // if this rank was waiting to drain it is now able to proceed to
1198     // precharge
1199     if (refreshState == REF_DRAIN) {
1200         DPRINTF(DRAM, "Refresh drain done, now precharging\n");
1201
1202         refreshState = REF_PD_EXIT;
1203
1204         // hand control back to the refresh event loop
1205         schedule(refreshEvent, curTick());
1206     }
1207 }
1208
1209 void
1210 DRAMInterface::Rank::flushCmdList()
1211 {
1212     // at the moment sort the list of commands and update the counters
1213     // for DRAMPower libray when doing a refresh
1214     sort(cmdList.begin(), cmdList.end(), DRAMInterface::sortTime);
1215
1216     auto next_iter = cmdList.begin();
1217     // push to commands to DRAMPower
1218     for ( ; next_iter != cmdList.end() ; ++next_iter) {
1219          Command cmd = *next_iter;
1220          if (cmd.timeStamp <= curTick()) {
1221              // Move all commands at or before curTick to DRAMPower
1222              power.powerlib.doCommand(cmd.type, cmd.bank,
1223                                       divCeil(cmd.timeStamp, dram.tCK) -
1224                                       dram.timeStampOffset);
1225          } else {
1226              // done - found all commands at or before curTick()
1227              // next_iter references the 1st command after curTick
1228              break;
1229          }
1230     }
1231     // reset cmdList to only contain commands after curTick
1232     // if there are no commands after curTick, updated cmdList will be empty
1233     // in this case, next_iter is cmdList.end()
1234     cmdList.assign(next_iter, cmdList.end());
1235 }
1236
1237 void
1238 DRAMInterface::Rank::processActivateEvent()
1239 {
1240     // we should transition to the active state as soon as any bank is active
1241     if (pwrState != PWR_ACT)
1242         // note that at this point numBanksActive could be back at
1243         // zero again due to a precharge scheduled in the future
1244         schedulePowerEvent(PWR_ACT, curTick());
1245 }
1246
1247 void
1248 DRAMInterface::Rank::processPrechargeEvent()
1249 {
1250     // counter should at least indicate one outstanding request
1251     // for this precharge
1252     assert(outstandingEvents > 0);
1253     // precharge complete, decrement count
1254     --outstandingEvents;
1255
1256     // if we reached zero, then special conditions apply as we track
1257     // if all banks are precharged for the power models
1258     if (numBanksActive == 0) {
1259         // no reads to this rank in the Q and no pending
1260         // RD/WR or refresh commands
1261         if (isQueueEmpty() && outstandingEvents == 0 &&
1262             dram.enableDRAMPowerdown) {
1263             // should still be in ACT state since bank still open
1264             assert(pwrState == PWR_ACT);
1265
1266             // All banks closed - switch to precharge power down state.
1267             DPRINTF(DRAMState, "Rank %d sleep at tick %d\n",
1268                     rank, curTick());
1269             powerDownSleep(PWR_PRE_PDN, curTick());
1270         } else {
1271             // we should transition to the idle state when the last bank
1272             // is precharged
1273             schedulePowerEvent(PWR_IDLE, curTick());
1274         }
1275     }
1276 }
1277
1278 void
1279 DRAMInterface::Rank::processWriteDoneEvent()
1280 {
1281     // counter should at least indicate one outstanding request
1282     // for this write
1283     assert(outstandingEvents > 0);
1284     // Write transfer on bus has completed
1285     // decrement per rank counter
1286     --outstandingEvents;
1287 }
1288
1289 void
1290 DRAMInterface::Rank::processRefreshEvent()
1291 {
1292     // when first preparing the refresh, remember when it was due
1293     if ((refreshState == REF_IDLE) || (refreshState == REF_SREF_EXIT)) {
1294         // remember when the refresh is due
1295         refreshDueAt = curTick();
1296
1297         // proceed to drain
1298         refreshState = REF_DRAIN;
1299
1300         // make nonzero while refresh is pending to ensure
1301         // power down and self-refresh are not entered
1302         ++outstandingEvents;
1303
1304         DPRINTF(DRAM, "Refresh due\n");
1305     }
1306
1307     // let any scheduled read or write to the same rank go ahead,
1308     // after which it will
1309     // hand control back to this event loop
1310     if (refreshState == REF_DRAIN) {
1311         // if a request is at the moment being handled and this request is
1312         // accessing the current rank then wait for it to finish
1313         if ((rank == dram.activeRank)
1314             && (dram.ctrl->requestEventScheduled())) {
1315             // hand control over to the request loop until it is
1316             // evaluated next
1317             DPRINTF(DRAM, "Refresh awaiting draining\n");
1318
1319             return;
1320         } else {
1321             refreshState = REF_PD_EXIT;
1322         }
1323     }
1324
1325     // at this point, ensure that rank is not in a power-down state
1326     if (refreshState == REF_PD_EXIT) {
1327         // if rank was sleeping and we have't started exit process,
1328         // wake-up for refresh
1329         if (inLowPowerState) {
1330             DPRINTF(DRAM, "Wake Up for refresh\n");
1331             // save state and return after refresh completes
1332             scheduleWakeUpEvent(dram.tXP);
1333             return;
1334         } else {
1335             refreshState = REF_PRE;
1336         }
1337     }
1338
1339     // at this point, ensure that all banks are precharged
1340     if (refreshState == REF_PRE) {
1341         // precharge any active bank
1342         if (numBanksActive != 0) {
1343             // at the moment, we use a precharge all even if there is
1344             // only a single bank open
1345             DPRINTF(DRAM, "Precharging all\n");
1346
1347             // first determine when we can precharge
1348             Tick pre_at = curTick();
1349
1350             for (auto &b : banks) {
1351                 // respect both causality and any existing bank
1352                 // constraints, some banks could already have a
1353                 // (auto) precharge scheduled
1354                 pre_at = std::max(b.preAllowedAt, pre_at);
1355             }
1356
1357             // make sure all banks per rank are precharged, and for those that
1358             // already are, update their availability
1359             Tick act_allowed_at = pre_at + dram.tRP;
1360
1361             for (auto &b : banks) {
1362                 if (b.openRow != Bank::NO_ROW) {
1363                     dram.prechargeBank(*this, b, pre_at, true, false);
1364                 } else {
1365                     b.actAllowedAt = std::max(b.actAllowedAt, act_allowed_at);
1366                     b.preAllowedAt = std::max(b.preAllowedAt, pre_at);
1367                 }
1368             }
1369
1370             // precharge all banks in rank
1371             cmdList.push_back(Command(MemCommand::PREA, 0, pre_at));
1372
1373             DPRINTF(DRAMPower, "%llu,PREA,0,%d\n",
1374                     divCeil(pre_at, dram.tCK) -
1375                             dram.timeStampOffset, rank);
1376         } else if ((pwrState == PWR_IDLE) && (outstandingEvents == 1))  {
1377             // Banks are closed, have transitioned to IDLE state, and
1378             // no outstanding ACT,RD/WR,Auto-PRE sequence scheduled
1379             DPRINTF(DRAM, "All banks already precharged, starting refresh\n");
1380
1381             // go ahead and kick the power state machine into gear since
1382             // we are already idle
1383             schedulePowerEvent(PWR_REF, curTick());
1384         } else {
1385             // banks state is closed but haven't transitioned pwrState to IDLE
1386             // or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled
1387             // should have outstanding precharge or read response event
1388             assert(prechargeEvent.scheduled() ||
1389                    dram.ctrl->respondEventScheduled());
1390             // will start refresh when pwrState transitions to IDLE
1391         }
1392
1393         assert(numBanksActive == 0);
1394
1395         // wait for all banks to be precharged or read to complete
1396         // When precharge commands are done, power state machine will
1397         // transition to the idle state, and automatically move to a
1398         // refresh, at that point it will also call this method to get
1399         // the refresh event loop going again
1400         // Similarly, when read response completes, if all banks are
1401         // precharged, will call this method to get loop re-started
1402         return;
1403     }
1404
1405     // last but not least we perform the actual refresh
1406     if (refreshState == REF_START) {
1407         // should never get here with any banks active
1408         assert(numBanksActive == 0);
1409         assert(pwrState == PWR_REF);
1410
1411         Tick ref_done_at = curTick() + dram.tRFC;
1412
1413         for (auto &b : banks) {
1414             b.actAllowedAt = ref_done_at;
1415         }
1416
1417         // at the moment this affects all ranks
1418         cmdList.push_back(Command(MemCommand::REF, 0, curTick()));
1419
1420         // Update the stats
1421         updatePowerStats();
1422
1423         DPRINTF(DRAMPower, "%llu,REF,0,%d\n", divCeil(curTick(), dram.tCK) -
1424                 dram.timeStampOffset, rank);
1425
1426         // Update for next refresh
1427         refreshDueAt += dram.tREFI;
1428
1429         // make sure we did not wait so long that we cannot make up
1430         // for it
1431         if (refreshDueAt < ref_done_at) {
1432             fatal("Refresh was delayed so long we cannot catch up\n");
1433         }
1434
1435         // Run the refresh and schedule event to transition power states
1436         // when refresh completes
1437         refreshState = REF_RUN;
1438         schedule(refreshEvent, ref_done_at);
1439         return;
1440     }
1441
1442     if (refreshState == REF_RUN) {
1443         // should never get here with any banks active
1444         assert(numBanksActive == 0);
1445         assert(pwrState == PWR_REF);
1446
1447         assert(!powerEvent.scheduled());
1448
1449         if ((dram.ctrl->drainState() == DrainState::Draining) ||
1450             (dram.ctrl->drainState() == DrainState::Drained)) {
1451             // if draining, do not re-enter low-power mode.
1452             // simply go to IDLE and wait
1453             schedulePowerEvent(PWR_IDLE, curTick());
1454         } else {
1455             // At the moment, we sleep when the refresh ends and wait to be
1456             // woken up again if previously in a low-power state.
1457             if (pwrStatePostRefresh != PWR_IDLE) {
1458                 // power State should be power Refresh
1459                 assert(pwrState == PWR_REF);
1460                 DPRINTF(DRAMState, "Rank %d sleeping after refresh and was in "
1461                         "power state %d before refreshing\n", rank,
1462                         pwrStatePostRefresh);
1463                 powerDownSleep(pwrState, curTick());
1464
1465             // Force PRE power-down if there are no outstanding commands
1466             // in Q after refresh.
1467             } else if (isQueueEmpty() && dram.enableDRAMPowerdown) {
1468                 // still have refresh event outstanding but there should
1469                 // be no other events outstanding
1470                 assert(outstandingEvents == 1);
1471                 DPRINTF(DRAMState, "Rank %d sleeping after refresh but was NOT"
1472                         " in a low power state before refreshing\n", rank);
1473                 powerDownSleep(PWR_PRE_PDN, curTick());
1474
1475             } else {
1476                 // move to the idle power state once the refresh is done, this
1477                 // will also move the refresh state machine to the refresh
1478                 // idle state
1479                 schedulePowerEvent(PWR_IDLE, curTick());
1480             }
1481         }
1482
1483         // At this point, we have completed the current refresh.
1484         // In the SREF bypass case, we do not get to this state in the
1485         // refresh STM and therefore can always schedule next event.
1486         // Compensate for the delay in actually performing the refresh
1487         // when scheduling the next one
1488         schedule(refreshEvent, refreshDueAt - dram.tRP);
1489
1490         DPRINTF(DRAMState, "Refresh done at %llu and next refresh"
1491                 " at %llu\n", curTick(), refreshDueAt);
1492     }
1493 }
1494
1495 void
1496 DRAMInterface::Rank::schedulePowerEvent(PowerState pwr_state, Tick tick)
1497 {
1498     // respect causality
1499     assert(tick >= curTick());
1500
1501     if (!powerEvent.scheduled()) {
1502         DPRINTF(DRAMState, "Scheduling power event at %llu to state %d\n",
1503                 tick, pwr_state);
1504
1505         // insert the new transition
1506         pwrStateTrans = pwr_state;
1507
1508         schedule(powerEvent, tick);
1509     } else {
1510         panic("Scheduled power event at %llu to state %d, "
1511               "with scheduled event at %llu to %d\n", tick, pwr_state,
1512               powerEvent.when(), pwrStateTrans);
1513     }
1514 }
1515
1516 void
1517 DRAMInterface::Rank::powerDownSleep(PowerState pwr_state, Tick tick)
1518 {
1519     // if low power state is active low, schedule to active low power state.
1520     // in reality tCKE is needed to enter active low power. This is neglected
1521     // here and could be added in the future.
1522     if (pwr_state == PWR_ACT_PDN) {
1523         schedulePowerEvent(pwr_state, tick);
1524         // push command to DRAMPower
1525         cmdList.push_back(Command(MemCommand::PDN_F_ACT, 0, tick));
1526         DPRINTF(DRAMPower, "%llu,PDN_F_ACT,0,%d\n", divCeil(tick,
1527                 dram.tCK) - dram.timeStampOffset, rank);
1528     } else if (pwr_state == PWR_PRE_PDN) {
1529         // if low power state is precharge low, schedule to precharge low
1530         // power state. In reality tCKE is needed to enter active low power.
1531         // This is neglected here.
1532         schedulePowerEvent(pwr_state, tick);
1533         //push Command to DRAMPower
1534         cmdList.push_back(Command(MemCommand::PDN_F_PRE, 0, tick));
1535         DPRINTF(DRAMPower, "%llu,PDN_F_PRE,0,%d\n", divCeil(tick,
1536                 dram.tCK) - dram.timeStampOffset, rank);
1537     } else if (pwr_state == PWR_REF) {
1538         // if a refresh just occurred
1539         // transition to PRE_PDN now that all banks are closed
1540         // precharge power down requires tCKE to enter. For simplicity
1541         // this is not considered.
1542         schedulePowerEvent(PWR_PRE_PDN, tick);
1543         //push Command to DRAMPower
1544         cmdList.push_back(Command(MemCommand::PDN_F_PRE, 0, tick));
1545         DPRINTF(DRAMPower, "%llu,PDN_F_PRE,0,%d\n", divCeil(tick,
1546                 dram.tCK) - dram.timeStampOffset, rank);
1547     } else if (pwr_state == PWR_SREF) {
1548         // should only enter SREF after PRE-PD wakeup to do a refresh
1549         assert(pwrStatePostRefresh == PWR_PRE_PDN);
1550         // self refresh requires time tCKESR to enter. For simplicity,
1551         // this is not considered.
1552         schedulePowerEvent(PWR_SREF, tick);
1553         // push Command to DRAMPower
1554         cmdList.push_back(Command(MemCommand::SREN, 0, tick));
1555         DPRINTF(DRAMPower, "%llu,SREN,0,%d\n", divCeil(tick,
1556                 dram.tCK) - dram.timeStampOffset, rank);
1557     }
1558     // Ensure that we don't power-down and back up in same tick
1559     // Once we commit to PD entry, do it and wait for at least 1tCK
1560     // This could be replaced with tCKE if/when that is added to the model
1561     wakeUpAllowedAt = tick + dram.tCK;
1562
1563     // Transitioning to a low power state, set flag
1564     inLowPowerState = true;
1565 }
1566
1567 void
1568 DRAMInterface::Rank::scheduleWakeUpEvent(Tick exit_delay)
1569 {
1570     Tick wake_up_tick = std::max(curTick(), wakeUpAllowedAt);
1571
1572     DPRINTF(DRAMState, "Scheduling wake-up for rank %d at tick %d\n",
1573             rank, wake_up_tick);
1574
1575     // if waking for refresh, hold previous state
1576     // else reset state back to IDLE
1577     if (refreshState == REF_PD_EXIT) {
1578         pwrStatePostRefresh = pwrState;
1579     } else {
1580         // don't automatically transition back to LP state after next REF
1581         pwrStatePostRefresh = PWR_IDLE;
1582     }
1583
1584     // schedule wake-up with event to ensure entry has completed before
1585     // we try to wake-up
1586     schedule(wakeUpEvent, wake_up_tick);
1587
1588     for (auto &b : banks) {
1589         // respect both causality and any existing bank
1590         // constraints, some banks could already have a
1591         // (auto) precharge scheduled
1592         b.wrAllowedAt = std::max(wake_up_tick + exit_delay, b.wrAllowedAt);
1593         b.rdAllowedAt = std::max(wake_up_tick + exit_delay, b.rdAllowedAt);
1594         b.preAllowedAt = std::max(wake_up_tick + exit_delay, b.preAllowedAt);
1595         b.actAllowedAt = std::max(wake_up_tick + exit_delay, b.actAllowedAt);
1596     }
1597     // Transitioning out of low power state, clear flag
1598     inLowPowerState = false;
1599
1600     // push to DRAMPower
1601     // use pwrStateTrans for cases where we have a power event scheduled
1602     // to enter low power that has not yet been processed
1603     if (pwrStateTrans == PWR_ACT_PDN) {
1604         cmdList.push_back(Command(MemCommand::PUP_ACT, 0, wake_up_tick));
1605         DPRINTF(DRAMPower, "%llu,PUP_ACT,0,%d\n", divCeil(wake_up_tick,
1606                 dram.tCK) - dram.timeStampOffset, rank);
1607
1608     } else if (pwrStateTrans == PWR_PRE_PDN) {
1609         cmdList.push_back(Command(MemCommand::PUP_PRE, 0, wake_up_tick));
1610         DPRINTF(DRAMPower, "%llu,PUP_PRE,0,%d\n", divCeil(wake_up_tick,
1611                 dram.tCK) - dram.timeStampOffset, rank);
1612     } else if (pwrStateTrans == PWR_SREF) {
1613         cmdList.push_back(Command(MemCommand::SREX, 0, wake_up_tick));
1614         DPRINTF(DRAMPower, "%llu,SREX,0,%d\n", divCeil(wake_up_tick,
1615                 dram.tCK) - dram.timeStampOffset, rank);
1616     }
1617 }
1618
1619 void
1620 DRAMInterface::Rank::processWakeUpEvent()
1621 {
1622     // Should be in a power-down or self-refresh state
1623     assert((pwrState == PWR_ACT_PDN) || (pwrState == PWR_PRE_PDN) ||
1624            (pwrState == PWR_SREF));
1625
1626     // Check current state to determine transition state
1627     if (pwrState == PWR_ACT_PDN) {
1628         // banks still open, transition to PWR_ACT
1629         schedulePowerEvent(PWR_ACT, curTick());
1630     } else {
1631         // transitioning from a precharge power-down or self-refresh state
1632         // banks are closed - transition to PWR_IDLE
1633         schedulePowerEvent(PWR_IDLE, curTick());
1634     }
1635 }
1636
1637 void
1638 DRAMInterface::Rank::processPowerEvent()
1639 {
1640     assert(curTick() >= pwrStateTick);
1641     // remember where we were, and for how long
1642     Tick duration = curTick() - pwrStateTick;
1643     PowerState prev_state = pwrState;
1644
1645     // update the accounting
1646     stats.pwrStateTime[prev_state] += duration;
1647
1648     // track to total idle time
1649     if ((prev_state == PWR_PRE_PDN) || (prev_state == PWR_ACT_PDN) ||
1650         (prev_state == PWR_SREF)) {
1651         stats.totalIdleTime += duration;
1652     }
1653
1654     pwrState = pwrStateTrans;
1655     pwrStateTick = curTick();
1656
1657     // if rank was refreshing, make sure to start scheduling requests again
1658     if (prev_state == PWR_REF) {
1659         // bus IDLED prior to REF
1660         // counter should be one for refresh command only
1661         assert(outstandingEvents == 1);
1662         // REF complete, decrement count and go back to IDLE
1663         --outstandingEvents;
1664         refreshState = REF_IDLE;
1665
1666         DPRINTF(DRAMState, "Was refreshing for %llu ticks\n", duration);
1667         // if moving back to power-down after refresh
1668         if (pwrState != PWR_IDLE) {
1669             assert(pwrState == PWR_PRE_PDN);
1670             DPRINTF(DRAMState, "Switching to power down state after refreshing"
1671                     " rank %d at %llu tick\n", rank, curTick());
1672         }
1673
1674         // completed refresh event, ensure next request is scheduled
1675         if (!dram.ctrl->requestEventScheduled()) {
1676             DPRINTF(DRAM, "Scheduling next request after refreshing"
1677                            " rank %d\n", rank);
1678             dram.ctrl->restartScheduler(curTick());
1679         }
1680     }
1681
1682     if ((pwrState == PWR_ACT) && (refreshState == REF_PD_EXIT)) {
1683         // have exited ACT PD
1684         assert(prev_state == PWR_ACT_PDN);
1685
1686         // go back to REF event and close banks
1687         refreshState = REF_PRE;
1688         schedule(refreshEvent, curTick());
1689     } else if (pwrState == PWR_IDLE) {
1690         DPRINTF(DRAMState, "All banks precharged\n");
1691         if (prev_state == PWR_SREF) {
1692             // set refresh state to REF_SREF_EXIT, ensuring inRefIdleState
1693             // continues to return false during tXS after SREF exit
1694             // Schedule a refresh which kicks things back into action
1695             // when it finishes
1696             refreshState = REF_SREF_EXIT;
1697             schedule(refreshEvent, curTick() + dram.tXS);
1698         } else {
1699             // if we have a pending refresh, and are now moving to
1700             // the idle state, directly transition to, or schedule refresh
1701             if ((refreshState == REF_PRE) || (refreshState == REF_PD_EXIT)) {
1702                 // ensure refresh is restarted only after final PRE command.
1703                 // do not restart refresh if controller is in an intermediate
1704                 // state, after PRE_PDN exit, when banks are IDLE but an
1705                 // ACT is scheduled.
1706                 if (!activateEvent.scheduled()) {
1707                     // there should be nothing waiting at this point
1708                     assert(!powerEvent.scheduled());
1709                     if (refreshState == REF_PD_EXIT) {
1710                         // exiting PRE PD, will be in IDLE until tXP expires
1711                         // and then should transition to PWR_REF state
1712                         assert(prev_state == PWR_PRE_PDN);
1713                         schedulePowerEvent(PWR_REF, curTick() + dram.tXP);
1714                     } else if (refreshState == REF_PRE) {
1715                         // can directly move to PWR_REF state and proceed below
1716                         pwrState = PWR_REF;
1717                     }
1718                 } else {
1719                     // must have PRE scheduled to transition back to IDLE
1720                     // and re-kick off refresh
1721                     assert(prechargeEvent.scheduled());
1722                 }
1723             }
1724         }
1725     }
1726
1727     // transition to the refresh state and re-start refresh process
1728     // refresh state machine will schedule the next power state transition
1729     if (pwrState == PWR_REF) {
1730         // completed final PRE for refresh or exiting power-down
1731         assert(refreshState == REF_PRE || refreshState == REF_PD_EXIT);
1732
1733         // exited PRE PD for refresh, with no pending commands
1734         // bypass auto-refresh and go straight to SREF, where memory
1735         // will issue refresh immediately upon entry
1736         if (pwrStatePostRefresh == PWR_PRE_PDN && isQueueEmpty() &&
1737            (dram.ctrl->drainState() != DrainState::Draining) &&
1738            (dram.ctrl->drainState() != DrainState::Drained) &&
1739            dram.enableDRAMPowerdown) {
1740             DPRINTF(DRAMState, "Rank %d bypassing refresh and transitioning "
1741                     "to self refresh at %11u tick\n", rank, curTick());
1742             powerDownSleep(PWR_SREF, curTick());
1743
1744             // Since refresh was bypassed, remove event by decrementing count
1745             assert(outstandingEvents == 1);
1746             --outstandingEvents;
1747
1748             // reset state back to IDLE temporarily until SREF is entered
1749             pwrState = PWR_IDLE;
1750
1751         // Not bypassing refresh for SREF entry
1752         } else {
1753             DPRINTF(DRAMState, "Refreshing\n");
1754
1755             // there should be nothing waiting at this point
1756             assert(!powerEvent.scheduled());
1757
1758             // kick the refresh event loop into action again, and that
1759             // in turn will schedule a transition to the idle power
1760             // state once the refresh is done
1761             schedule(refreshEvent, curTick());
1762
1763             // Banks transitioned to IDLE, start REF
1764             refreshState = REF_START;
1765         }
1766     }
1767
1768 }
1769
1770 void
1771 DRAMInterface::Rank::updatePowerStats()
1772 {
1773     // All commands up to refresh have completed
1774     // flush cmdList to DRAMPower
1775     flushCmdList();
1776
1777     // Call the function that calculates window energy at intermediate update
1778     // events like at refresh, stats dump as well as at simulation exit.
1779     // Window starts at the last time the calcWindowEnergy function was called
1780     // and is upto current time.
1781     power.powerlib.calcWindowEnergy(divCeil(curTick(), dram.tCK) -
1782                                     dram.timeStampOffset);
1783
1784     // Get the energy from DRAMPower
1785     Data::MemoryPowerModel::Energy energy = power.powerlib.getEnergy();
1786
1787     // The energy components inside the power lib are calculated over
1788     // the window so accumulate into the corresponding gem5 stat
1789     stats.actEnergy += energy.act_energy * dram.devicesPerRank;
1790     stats.preEnergy += energy.pre_energy * dram.devicesPerRank;
1791     stats.readEnergy += energy.read_energy * dram.devicesPerRank;
1792     stats.writeEnergy += energy.write_energy * dram.devicesPerRank;
1793     stats.refreshEnergy += energy.ref_energy * dram.devicesPerRank;
1794     stats.actBackEnergy += energy.act_stdby_energy * dram.devicesPerRank;
1795     stats.preBackEnergy += energy.pre_stdby_energy * dram.devicesPerRank;
1796     stats.actPowerDownEnergy += energy.f_act_pd_energy * dram.devicesPerRank;
1797     stats.prePowerDownEnergy += energy.f_pre_pd_energy * dram.devicesPerRank;
1798     stats.selfRefreshEnergy += energy.sref_energy * dram.devicesPerRank;
1799
1800     // Accumulate window energy into the total energy.
1801     stats.totalEnergy += energy.window_energy * dram.devicesPerRank;
1802     // Average power must not be accumulated but calculated over the time
1803     // since last stats reset. SimClock::Frequency is tick period not tick
1804     // frequency.
1805     //              energy (pJ)     1e-9
1806     // power (mW) = ----------- * ----------
1807     //              time (tick)   tick_frequency
1808     stats.averagePower = (stats.totalEnergy.value() /
1809                     (curTick() - dram.lastStatsResetTick)) *
1810                     (SimClock::Frequency / 1000000000.0);
1811 }
1812
1813 void
1814 DRAMInterface::Rank::computeStats()
1815 {
1816     DPRINTF(DRAM,"Computing stats due to a dump callback\n");
1817
1818     // Update the stats
1819     updatePowerStats();
1820
1821     // final update of power state times
1822     stats.pwrStateTime[pwrState] += (curTick() - pwrStateTick);
1823     pwrStateTick = curTick();
1824 }
1825
1826 void
1827 DRAMInterface::Rank::resetStats() {
1828     // The only way to clear the counters in DRAMPower is to call
1829     // calcWindowEnergy function as that then calls clearCounters. The
1830     // clearCounters method itself is private.
1831     power.powerlib.calcWindowEnergy(divCeil(curTick(), dram.tCK) -
1832                                     dram.timeStampOffset);
1833
1834 }
1835
1836 bool
1837 DRAMInterface::Rank::forceSelfRefreshExit() const {
1838     return (readEntries != 0) ||
1839            (dram.ctrl->inWriteBusState(true) && (writeEntries != 0));
1840 }
1841
1842 void
1843 DRAMInterface::DRAMStats::resetStats()
1844 {
1845     dram.lastStatsResetTick = curTick();
1846 }
1847
1848 DRAMInterface::DRAMStats::DRAMStats(DRAMInterface &_dram)
1849     : Stats::Group(&_dram),
1850     dram(_dram),
1851
1852     ADD_STAT(readBursts, UNIT_COUNT, "Number of DRAM read bursts"),
1853     ADD_STAT(writeBursts, UNIT_COUNT, "Number of DRAM write bursts"),
1854
1855     ADD_STAT(perBankRdBursts, UNIT_COUNT, "Per bank write bursts"),
1856     ADD_STAT(perBankWrBursts, UNIT_COUNT, "Per bank write bursts"),
1857
1858     ADD_STAT(totQLat, UNIT_TICK, "Total ticks spent queuing"),
1859     ADD_STAT(totBusLat, UNIT_TICK, "Total ticks spent in databus transfers"),
1860     ADD_STAT(totMemAccLat, UNIT_TICK,
1861              "Total ticks spent from burst creation until serviced "
1862              "by the DRAM"),
1863
1864     ADD_STAT(avgQLat, UNIT_RATE(Stats::Units::Tick, Stats::Units::Count),
1865              "Average queueing delay per DRAM burst"),
1866     ADD_STAT(avgBusLat, UNIT_RATE(Stats::Units::Tick, Stats::Units::Count),
1867              "Average bus latency per DRAM burst"),
1868     ADD_STAT(avgMemAccLat, UNIT_RATE(Stats::Units::Tick, Stats::Units::Count),
1869              "Average memory access latency per DRAM burst"),
1870
1871     ADD_STAT(readRowHits, UNIT_COUNT,
1872              "Number of row buffer hits during reads"),
1873     ADD_STAT(writeRowHits, UNIT_COUNT,
1874              "Number of row buffer hits during writes"),
1875     ADD_STAT(readRowHitRate, UNIT_RATIO, "Row buffer hit rate for reads"),
1876     ADD_STAT(writeRowHitRate, UNIT_RATIO, "Row buffer hit rate for writes"),
1877
1878     ADD_STAT(bytesPerActivate, UNIT_BYTE, "Bytes accessed per row activation"),
1879     ADD_STAT(bytesRead, UNIT_BYTE, "Total number of bytes read from DRAM"),
1880     ADD_STAT(bytesWritten, UNIT_BYTE, "Total number of bytes written to DRAM"),
1881     ADD_STAT(avgRdBW, UNIT_RATE(Stats::Units::Byte, Stats::Units::Second),
1882              "Average DRAM read bandwidth in MiBytes/s"),
1883     ADD_STAT(avgWrBW, UNIT_RATE(Stats::Units::Byte, Stats::Units::Second),
1884              "Average DRAM write bandwidth in MiBytes/s"),
1885     ADD_STAT(peakBW,  UNIT_RATE(Stats::Units::Byte, Stats::Units::Second),
1886              "Theoretical peak bandwidth in MiByte/s"),
1887
1888     ADD_STAT(busUtil, UNIT_RATIO, "Data bus utilization in percentage"),
1889     ADD_STAT(busUtilRead, UNIT_RATIO,
1890              "Data bus utilization in percentage for reads"),
1891     ADD_STAT(busUtilWrite, UNIT_RATIO,
1892              "Data bus utilization in percentage for writes"),
1893
1894     ADD_STAT(pageHitRate, UNIT_RATIO,
1895              "Row buffer hit rate, read and write combined")
1896
1897 {
1898 }
1899
1900 void
1901 DRAMInterface::DRAMStats::regStats()
1902 {
1903     using namespace Stats;
1904
1905     avgQLat.precision(2);
1906     avgBusLat.precision(2);
1907     avgMemAccLat.precision(2);
1908
1909     readRowHitRate.precision(2);
1910     writeRowHitRate.precision(2);
1911
1912     perBankRdBursts.init(dram.banksPerRank * dram.ranksPerChannel);
1913     perBankWrBursts.init(dram.banksPerRank * dram.ranksPerChannel);
1914
1915     bytesPerActivate
1916         .init(dram.maxAccessesPerRow ?
1917               dram.maxAccessesPerRow : dram.rowBufferSize)
1918         .flags(nozero);
1919
1920     peakBW.precision(2);
1921     busUtil.precision(2);
1922     busUtilWrite.precision(2);
1923     busUtilRead.precision(2);
1924
1925     pageHitRate.precision(2);
1926
1927     // Formula stats
1928     avgQLat = totQLat / readBursts;
1929     avgBusLat = totBusLat / readBursts;
1930     avgMemAccLat = totMemAccLat / readBursts;
1931
1932     readRowHitRate = (readRowHits / readBursts) * 100;
1933     writeRowHitRate = (writeRowHits / writeBursts) * 100;
1934
1935     avgRdBW = (bytesRead / 1000000) / simSeconds;
1936     avgWrBW = (bytesWritten / 1000000) / simSeconds;
1937     peakBW = (SimClock::Frequency / dram.burstDelay()) *
1938               dram.bytesPerBurst() / 1000000;
1939
1940     busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
1941     busUtilRead = avgRdBW / peakBW * 100;
1942     busUtilWrite = avgWrBW / peakBW * 100;
1943
1944     pageHitRate = (writeRowHits + readRowHits) /
1945         (writeBursts + readBursts) * 100;
1946 }
1947
1948 DRAMInterface::RankStats::RankStats(DRAMInterface &_dram, Rank &_rank)
1949     : Stats::Group(&_dram, csprintf("rank%d", _rank.rank).c_str()),
1950     rank(_rank),
1951
1952     ADD_STAT(actEnergy, UNIT_JOULE,
1953              "Energy for activate commands per rank (pJ)"),
1954     ADD_STAT(preEnergy, UNIT_JOULE,
1955              "Energy for precharge commands per rank (pJ)"),
1956     ADD_STAT(readEnergy, UNIT_JOULE,
1957              "Energy for read commands per rank (pJ)"),
1958     ADD_STAT(writeEnergy, UNIT_JOULE,
1959              "Energy for write commands per rank (pJ)"),
1960     ADD_STAT(refreshEnergy, UNIT_JOULE,
1961              "Energy for refresh commands per rank (pJ)"),
1962     ADD_STAT(actBackEnergy, UNIT_JOULE,
1963              "Energy for active background per rank (pJ)"),
1964     ADD_STAT(preBackEnergy, UNIT_JOULE,
1965              "Energy for precharge background per rank (pJ)"),
1966     ADD_STAT(actPowerDownEnergy, UNIT_JOULE,
1967              "Energy for active power-down per rank (pJ)"),
1968     ADD_STAT(prePowerDownEnergy, UNIT_JOULE,
1969              "Energy for precharge power-down per rank (pJ)"),
1970     ADD_STAT(selfRefreshEnergy, UNIT_JOULE,
1971              "Energy for self refresh per rank (pJ)"),
1972
1973     ADD_STAT(totalEnergy, UNIT_JOULE, "Total energy per rank (pJ)"),
1974     ADD_STAT(averagePower, UNIT_WATT, "Core power per rank (mW)"),
1975
1976     ADD_STAT(totalIdleTime, UNIT_TICK, "Total Idle time Per DRAM Rank"),
1977     ADD_STAT(pwrStateTime, UNIT_TICK, "Time in different power states")
1978 {
1979 }
1980
1981 void
1982 DRAMInterface::RankStats::regStats()
1983 {
1984     Stats::Group::regStats();
1985
1986     pwrStateTime
1987         .init(6)
1988         .subname(0, "IDLE")
1989         .subname(1, "REF")
1990         .subname(2, "SREF")
1991         .subname(3, "PRE_PDN")
1992         .subname(4, "ACT")
1993         .subname(5, "ACT_PDN");
1994 }
1995
1996 void
1997 DRAMInterface::RankStats::resetStats()
1998 {
1999     Stats::Group::resetStats();
2000
2001     rank.resetStats();
2002 }
2003
2004 void
2005 DRAMInterface::RankStats::preDumpStats()
2006 {
2007     Stats::Group::preDumpStats();
2008
2009     rank.computeStats();
2010 }
2011
2012 NVMInterface::NVMInterface(const NVMInterfaceParams &_p)
2013     : MemInterface(_p),
2014       maxPendingWrites(_p.max_pending_writes),
2015       maxPendingReads(_p.max_pending_reads),
2016       twoCycleRdWr(_p.two_cycle_rdwr),
2017       tREAD(_p.tREAD), tWRITE(_p.tWRITE), tSEND(_p.tSEND),
2018       stats(*this),
2019       writeRespondEvent([this]{ processWriteRespondEvent(); }, name()),
2020       readReadyEvent([this]{ processReadReadyEvent(); }, name()),
2021       nextReadAt(0), numPendingReads(0), numReadDataReady(0),
2022       numReadsToIssue(0), numWritesQueued(0)
2023 {
2024     DPRINTF(NVM, "Setting up NVM Interface\n");
2025
2026     fatal_if(!isPowerOf2(burstSize), "NVM burst size %d is not allowed, "
2027              "must be a power of two\n", burstSize);
2028
2029     // sanity check the ranks since we rely on bit slicing for the
2030     // address decoding
2031     fatal_if(!isPowerOf2(ranksPerChannel), "NVM rank count of %d is "
2032              "not allowed, must be a power of two\n", ranksPerChannel);
2033
2034     for (int i =0; i < ranksPerChannel; i++) {
2035         // Add NVM ranks to the system
2036         DPRINTF(NVM, "Creating NVM rank %d \n", i);
2037         Rank* rank = new Rank(_p, i, *this);
2038         ranks.push_back(rank);
2039     }
2040
2041     uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
2042
2043     DPRINTF(NVM, "NVM capacity %lld (%lld) bytes\n", capacity,
2044             AbstractMemory::size());
2045
2046     rowsPerBank = capacity / (rowBufferSize *
2047                     banksPerRank * ranksPerChannel);
2048
2049 }
2050
2051 NVMInterface::Rank::Rank(const NVMInterfaceParams &_p,
2052                          int _rank, NVMInterface& _nvm)
2053     : EventManager(&_nvm), rank(_rank), banks(_p.banks_per_rank)
2054 {
2055     for (int b = 0; b < _p.banks_per_rank; b++) {
2056         banks[b].bank = b;
2057         // No bank groups; simply assign to bank number
2058         banks[b].bankgr = b;
2059     }
2060 }
2061
2062 void
2063 NVMInterface::init()
2064 {
2065     AbstractMemory::init();
2066 }
2067
2068 void NVMInterface::setupRank(const uint8_t rank, const bool is_read)
2069 {
2070     if (is_read) {
2071         // increment count to trigger read and track number of reads in Q
2072         numReadsToIssue++;
2073     } else {
2074         // increment count to track number of writes in Q
2075         numWritesQueued++;
2076     }
2077 }
2078
2079 std::pair<MemPacketQueue::iterator, Tick>
2080 NVMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const
2081 {
2082     // remember if we found a hit, but one that cannit issue seamlessly
2083     bool found_prepped_pkt = false;
2084
2085     auto selected_pkt_it = queue.end();
2086     Tick selected_col_at = MaxTick;
2087
2088     for (auto i = queue.begin(); i != queue.end() ; ++i) {
2089         MemPacket* pkt = *i;
2090
2091         // select optimal NVM packet in Q
2092         if (!pkt->isDram()) {
2093             const Bank& bank = ranks[pkt->rank]->banks[pkt->bank];
2094             const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt :
2095                                                         bank.wrAllowedAt;
2096
2097             // check if rank is not doing a refresh and thus is available,
2098             // if not, jump to the next packet
2099             if (burstReady(pkt)) {
2100                 DPRINTF(NVM, "%s bank %d - Rank %d available\n", __func__,
2101                         pkt->bank, pkt->rank);
2102
2103                 // no additional rank-to-rank or media delays
2104                 if (col_allowed_at <= min_col_at) {
2105                     // FCFS within entries that can issue without
2106                     // additional delay, such as same rank accesses
2107                     // or media delay requirements
2108                     selected_pkt_it = i;
2109                     selected_col_at = col_allowed_at;
2110                     // no need to look through the remaining queue entries
2111                     DPRINTF(NVM, "%s Seamless buffer hit\n", __func__);
2112                     break;
2113                 } else if (!found_prepped_pkt) {
2114                     // packet is to prepped region but cannnot issue
2115                     // seamlessly; remember this one and continue
2116                     selected_pkt_it = i;
2117                     selected_col_at = col_allowed_at;
2118                     DPRINTF(NVM, "%s Prepped packet found \n", __func__);
2119                     found_prepped_pkt = true;
2120                 }
2121             } else {
2122                 DPRINTF(NVM, "%s bank %d - Rank %d not available\n", __func__,
2123                         pkt->bank, pkt->rank);
2124             }
2125         }
2126     }
2127
2128     if (selected_pkt_it == queue.end()) {
2129         DPRINTF(NVM, "%s no available NVM ranks found\n", __func__);
2130     }
2131
2132     return std::make_pair(selected_pkt_it, selected_col_at);
2133 }
2134
2135 void
2136 NVMInterface::chooseRead(MemPacketQueue& queue)
2137 {
2138     Tick cmd_at = std::max(curTick(), nextReadAt);
2139
2140     // This method does the arbitration between non-deterministic read
2141     // requests to NVM. The chosen packet is not removed from the queue
2142     // at this time. Removal from the queue will occur when the data is
2143     // ready and a separate SEND command is issued to retrieve it via the
2144     // chooseNext function in the top-level controller.
2145     assert(!queue.empty());
2146
2147     assert(numReadsToIssue > 0);
2148     numReadsToIssue--;
2149     // For simplicity, issue non-deterministic reads in order (fcfs)
2150     for (auto i = queue.begin(); i != queue.end() ; ++i) {
2151         MemPacket* pkt = *i;
2152
2153         // Find 1st NVM read packet that hasn't issued read command
2154         if (pkt->readyTime == MaxTick && !pkt->isDram() && pkt->isRead()) {
2155            // get the bank
2156            Bank& bank_ref = ranks[pkt->rank]->banks[pkt->bank];
2157
2158             // issueing a read, inc counter and verify we haven't overrun
2159             numPendingReads++;
2160             assert(numPendingReads <= maxPendingReads);
2161
2162             // increment the bytes accessed and the accesses per row
2163             bank_ref.bytesAccessed += burstSize;
2164
2165             // Verify command bandiwth to issue
2166             // Host can issue read immediately uith buffering closer
2167             // to the NVM. The actual execution at the NVM may be delayed
2168             // due to busy resources
2169             if (twoCycleRdWr) {
2170                 cmd_at = ctrl->verifyMultiCmd(cmd_at,
2171                                               maxCommandsPerWindow, tCK);
2172             } else {
2173                 cmd_at = ctrl->verifySingleCmd(cmd_at,
2174                                                maxCommandsPerWindow);
2175             }
2176
2177             // Update delay to next read
2178             // Ensures single read command issued per cycle
2179             nextReadAt = cmd_at + tCK;
2180
2181             // If accessing a new location in this bank, update timing
2182             // and stats
2183             if (bank_ref.openRow != pkt->row) {
2184                 // update the open bank, re-using row field
2185                 bank_ref.openRow = pkt->row;
2186
2187                 // sample the bytes accessed to a buffer in this bank
2188                 // here when we are re-buffering the data
2189                 stats.bytesPerBank.sample(bank_ref.bytesAccessed);
2190                 // start counting anew
2191                 bank_ref.bytesAccessed = 0;
2192
2193                 // holdoff next command to this bank until the read completes
2194                 // and the data has been successfully buffered
2195                 // can pipeline accesses to the same bank, sending them
2196                 // across the interface B2B, but will incur full access
2197                 // delay between data ready responses to different buffers
2198                 // in a bank
2199                 bank_ref.actAllowedAt = std::max(cmd_at,
2200                                         bank_ref.actAllowedAt) + tREAD;
2201             }
2202             // update per packet readyTime to holdoff burst read operation
2203             // overloading readyTime, which will be updated again when the
2204             // burst is issued
2205             pkt->readyTime = std::max(cmd_at, bank_ref.actAllowedAt);
2206
2207             DPRINTF(NVM, "Issuing NVM Read to bank %d at tick %d. "
2208                          "Data ready at %d\n",
2209                          bank_ref.bank, cmd_at, pkt->readyTime);
2210
2211             // Insert into read ready queue. It will be handled after
2212             // the media delay has been met
2213             if (readReadyQueue.empty()) {
2214                 assert(!readReadyEvent.scheduled());
2215                 schedule(readReadyEvent, pkt->readyTime);
2216             } else if (readReadyEvent.when() > pkt->readyTime) {
2217                 // move it sooner in time, to the first read with data
2218                 reschedule(readReadyEvent, pkt->readyTime);
2219             } else {
2220                 assert(readReadyEvent.scheduled());
2221             }
2222             readReadyQueue.push_back(pkt->readyTime);
2223
2224             // found an NVM read to issue - break out
2225             break;
2226         }
2227     }
2228 }
2229
2230 void
2231 NVMInterface::processReadReadyEvent()
2232 {
2233     // signal that there is read data ready to be transmitted
2234     numReadDataReady++;
2235
2236     DPRINTF(NVM,
2237             "processReadReadyEvent(): Data for an NVM read is ready. "
2238             "numReadDataReady is %d\t numPendingReads is %d\n",
2239              numReadDataReady, numPendingReads);
2240
2241     // Find lowest ready time and verify it is equal to curTick
2242     // also find the next lowest to schedule next event
2243     // Done with this response, erase entry
2244     auto ready_it = readReadyQueue.begin();
2245     Tick next_ready_at = MaxTick;
2246     for (auto i = readReadyQueue.begin(); i != readReadyQueue.end() ; ++i) {
2247         if (*ready_it > *i) {
2248             next_ready_at = *ready_it;
2249             ready_it = i;
2250         } else if ((next_ready_at > *i) && (i != ready_it)) {
2251             next_ready_at = *i;
2252         }
2253     }
2254
2255     // Verify we found the time of this event and remove it
2256     assert(*ready_it == curTick());
2257     readReadyQueue.erase(ready_it);
2258
2259     if (!readReadyQueue.empty()) {
2260         assert(readReadyQueue.front() >= curTick());
2261         assert(!readReadyEvent.scheduled());
2262         schedule(readReadyEvent, next_ready_at);
2263     }
2264
2265     // It is possible that a new command kicks things back into
2266     // action before reaching this point but need to ensure that we
2267     // continue to process new commands as read data becomes ready
2268     // This will also trigger a drain if needed
2269     if (!ctrl->requestEventScheduled()) {
2270         DPRINTF(NVM, "Restart controller scheduler immediately\n");
2271         ctrl->restartScheduler(curTick());
2272     }
2273 }
2274
2275 bool
2276 NVMInterface::burstReady(MemPacket* pkt) const {
2277     bool read_rdy =  pkt->isRead() && (ctrl->inReadBusState(true)) &&
2278                (pkt->readyTime <= curTick()) && (numReadDataReady > 0);
2279     bool write_rdy =  !pkt->isRead() && !ctrl->inReadBusState(true) &&
2280                 !writeRespQueueFull();
2281     return (read_rdy || write_rdy);
2282 }
2283
2284     std::pair<Tick, Tick>
2285 NVMInterface::doBurstAccess(MemPacket* pkt, Tick next_burst_at)
2286 {
2287     DPRINTF(NVM, "NVM Timing access to addr %lld, rank/bank/row %d %d %d\n",
2288             pkt->addr, pkt->rank, pkt->bank, pkt->row);
2289
2290     // get the bank
2291     Bank& bank_ref = ranks[pkt->rank]->banks[pkt->bank];
2292
2293     // respect any constraints on the command
2294     const Tick bst_allowed_at = pkt->isRead() ?
2295                                 bank_ref.rdAllowedAt : bank_ref.wrAllowedAt;
2296
2297     // we need to wait until the bus is available before we can issue
2298     // the command; need minimum of tBURST between commands
2299     Tick cmd_at = std::max(bst_allowed_at, curTick());
2300
2301     // we need to wait until the bus is available before we can issue
2302     // the command; need minimum of tBURST between commands
2303     cmd_at = std::max(cmd_at, next_burst_at);
2304
2305     // Verify there is command bandwidth to issue
2306     // Read burst (send command) is a simple data access and only requires
2307     // one command cycle
2308     // Write command may require multiple cycles to enable larger address space
2309     if (pkt->isRead() || !twoCycleRdWr) {
2310         cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow);
2311     } else {
2312         cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK);
2313     }
2314     // update the packet ready time to reflect when data will be transferred
2315     // Use the same bus delays defined for NVM
2316     pkt->readyTime = cmd_at + tSEND + tBURST;
2317
2318     Tick dly_to_rd_cmd;
2319     Tick dly_to_wr_cmd;
2320     for (auto n : ranks) {
2321         for (int i = 0; i < banksPerRank; i++) {
2322             // base delay is a function of tBURST and bus turnaround
2323             dly_to_rd_cmd = pkt->isRead() ? tBURST : writeToReadDelay();
2324             dly_to_wr_cmd = pkt->isRead() ? readToWriteDelay() : tBURST;
2325
2326             if (pkt->rank != n->rank) {
2327                 // adjust timing for different ranks
2328                 // Need to account for rank-to-rank switching with tCS
2329                 dly_to_wr_cmd = rankToRankDelay();
2330                 dly_to_rd_cmd = rankToRankDelay();
2331             }
2332             n->banks[i].rdAllowedAt = std::max(cmd_at + dly_to_rd_cmd,
2333                                       n->banks[i].rdAllowedAt);
2334
2335             n->banks[i].wrAllowedAt = std::max(cmd_at + dly_to_wr_cmd,
2336                                       n->banks[i].wrAllowedAt);
2337         }
2338     }
2339
2340     DPRINTF(NVM, "NVM Access to %lld, ready at %lld.\n",
2341             pkt->addr, pkt->readyTime);
2342
2343     if (pkt->isRead()) {
2344         // completed the read, decrement counters
2345         assert(numPendingReads != 0);
2346         assert(numReadDataReady != 0);
2347
2348         numPendingReads--;
2349         numReadDataReady--;
2350     } else {
2351         // Adjust number of NVM writes in Q
2352         assert(numWritesQueued > 0);
2353         numWritesQueued--;
2354
2355         // increment the bytes accessed and the accesses per row
2356         // only increment for writes as the reads are handled when
2357         // the non-deterministic read is issued, before the data transfer
2358         bank_ref.bytesAccessed += burstSize;
2359
2360         // Commands will be issued serially when accessing the same bank
2361         // Commands can issue in parallel to different banks
2362         if ((bank_ref.bank == pkt->bank) &&
2363             (bank_ref.openRow != pkt->row)) {
2364            // update the open buffer, re-using row field
2365            bank_ref.openRow = pkt->row;
2366
2367            // sample the bytes accessed to a buffer in this bank
2368            // here when we are re-buffering the data
2369            stats.bytesPerBank.sample(bank_ref.bytesAccessed);
2370            // start counting anew
2371            bank_ref.bytesAccessed = 0;
2372         }
2373
2374         // Determine when write will actually complete, assuming it is
2375         // scheduled to push to NVM immediately
2376         // update actAllowedAt to serialize next command completion that
2377         // accesses this bank; must wait until this write completes
2378         // Data accesses to the same buffer in this bank
2379         // can issue immediately after actAllowedAt expires, without
2380         // waiting additional delay of tWRITE. Can revisit this
2381         // assumption/simplification in the future.
2382         bank_ref.actAllowedAt = std::max(pkt->readyTime,
2383                                 bank_ref.actAllowedAt) + tWRITE;
2384
2385         // Need to track number of outstanding writes to
2386         // ensure 'buffer' on media controller does not overflow
2387         assert(!writeRespQueueFull());
2388
2389         // Insert into write done queue. It will be handled after
2390         // the media delay has been met
2391         if (writeRespQueueEmpty()) {
2392             assert(!writeRespondEvent.scheduled());
2393             schedule(writeRespondEvent, bank_ref.actAllowedAt);
2394         } else {
2395             assert(writeRespondEvent.scheduled());
2396         }
2397         writeRespQueue.push_back(bank_ref.actAllowedAt);
2398         writeRespQueue.sort();
2399         if (writeRespondEvent.when() > bank_ref.actAllowedAt) {
2400             DPRINTF(NVM, "Rescheduled respond event from %lld to %11d\n",
2401                 writeRespondEvent.when(), bank_ref.actAllowedAt);
2402             DPRINTF(NVM, "Front of response queue is %11d\n",
2403                 writeRespQueue.front());
2404             reschedule(writeRespondEvent, bank_ref.actAllowedAt);
2405         }
2406
2407     }
2408
2409     // Update the stats
2410     if (pkt->isRead()) {
2411         stats.readBursts++;
2412         stats.bytesRead += burstSize;
2413         stats.perBankRdBursts[pkt->bankId]++;
2414         stats.pendingReads.sample(numPendingReads);
2415
2416         // Update latency stats
2417         stats.totMemAccLat += pkt->readyTime - pkt->entryTime;
2418         stats.totBusLat += tBURST;
2419         stats.totQLat += cmd_at - pkt->entryTime;
2420     } else {
2421         stats.writeBursts++;
2422         stats.bytesWritten += burstSize;
2423         stats.perBankWrBursts[pkt->bankId]++;
2424     }
2425
2426     return std::make_pair(cmd_at, cmd_at + tBURST);
2427 }
2428
2429 void
2430 NVMInterface::processWriteRespondEvent()
2431 {
2432     DPRINTF(NVM,
2433             "processWriteRespondEvent(): A NVM write reached its readyTime.  "
2434             "%d remaining pending NVM writes\n", writeRespQueue.size());
2435
2436     // Update stat to track histogram of pending writes
2437     stats.pendingWrites.sample(writeRespQueue.size());
2438
2439     // Done with this response, pop entry
2440     writeRespQueue.pop_front();
2441
2442     if (!writeRespQueue.empty()) {
2443         assert(writeRespQueue.front() >= curTick());
2444         assert(!writeRespondEvent.scheduled());
2445         schedule(writeRespondEvent, writeRespQueue.front());
2446     }
2447
2448     // It is possible that a new command kicks things back into
2449     // action before reaching this point but need to ensure that we
2450     // continue to process new commands as writes complete at the media and
2451     // credits become available. This will also trigger a drain if needed
2452     if (!ctrl->requestEventScheduled()) {
2453         DPRINTF(NVM, "Restart controller scheduler immediately\n");
2454         ctrl->restartScheduler(curTick());
2455     }
2456 }
2457
2458 void
2459 NVMInterface::addRankToRankDelay(Tick cmd_at)
2460 {
2461     // update timing for NVM ranks due to bursts issued
2462     // to ranks for other media interfaces
2463     for (auto n : ranks) {
2464         for (int i = 0; i < banksPerRank; i++) {
2465             // different rank by default
2466             // Need to only account for rank-to-rank switching
2467             n->banks[i].rdAllowedAt = std::max(cmd_at + rankToRankDelay(),
2468                                              n->banks[i].rdAllowedAt);
2469             n->banks[i].wrAllowedAt = std::max(cmd_at + rankToRankDelay(),
2470                                              n->banks[i].wrAllowedAt);
2471         }
2472     }
2473 }
2474
2475 bool
2476 NVMInterface::isBusy(bool read_queue_empty, bool all_writes_nvm)
2477 {
2478      DPRINTF(NVM,"isBusy: numReadDataReady = %d\n", numReadDataReady);
2479      // Determine NVM is busy and cannot issue a burst
2480      // A read burst cannot issue when data is not ready from the NVM
2481      // Also check that we have reads queued to ensure we can change
2482      // bus direction to service potential write commands.
2483      // A write cannot issue once we've reached MAX pending writes
2484      // Only assert busy for the write case when there are also
2485      // no reads in Q and the write queue only contains NVM commands
2486      // This allows the bus state to switch and service reads
2487      return (ctrl->inReadBusState(true) ?
2488                  (numReadDataReady == 0) && !read_queue_empty :
2489                  writeRespQueueFull() && read_queue_empty &&
2490                                          all_writes_nvm);
2491 }
2492
2493
2494 NVMInterface::NVMStats::NVMStats(NVMInterface &_nvm)
2495     : Stats::Group(&_nvm),
2496     nvm(_nvm),
2497
2498     ADD_STAT(readBursts, UNIT_COUNT, "Number of NVM read bursts"),
2499     ADD_STAT(writeBursts, UNIT_COUNT, "Number of NVM write bursts"),
2500
2501     ADD_STAT(perBankRdBursts, UNIT_COUNT, "Per bank write bursts"),
2502     ADD_STAT(perBankWrBursts, UNIT_COUNT, "Per bank write bursts"),
2503
2504     ADD_STAT(totQLat, UNIT_TICK, "Total ticks spent queuing"),
2505     ADD_STAT(totBusLat, UNIT_TICK, "Total ticks spent in databus transfers"),
2506     ADD_STAT(totMemAccLat, UNIT_TICK,
2507              "Total ticks spent from burst creation until serviced "
2508              "by the NVM"),
2509     ADD_STAT(avgQLat, UNIT_RATE(Stats::Units::Tick, Stats::Units::Count),
2510              "Average queueing delay per NVM burst"),
2511     ADD_STAT(avgBusLat, UNIT_RATE(Stats::Units::Tick, Stats::Units::Count),
2512              "Average bus latency per NVM burst"),
2513     ADD_STAT(avgMemAccLat, UNIT_RATE(Stats::Units::Tick, Stats::Units::Count),
2514              "Average memory access latency per NVM burst"),
2515
2516     ADD_STAT(bytesRead, UNIT_BYTE, "Total number of bytes read from DRAM"),
2517     ADD_STAT(bytesWritten, UNIT_BYTE, "Total number of bytes written to DRAM"),
2518     ADD_STAT(avgRdBW, UNIT_RATE(Stats::Units::Byte, Stats::Units::Second),
2519              "Average DRAM read bandwidth in MiBytes/s"),
2520     ADD_STAT(avgWrBW, UNIT_RATE(Stats::Units::Byte, Stats::Units::Second),
2521              "Average DRAM write bandwidth in MiBytes/s"),
2522     ADD_STAT(peakBW, UNIT_RATE(Stats::Units::Byte, Stats::Units::Second),
2523              "Theoretical peak bandwidth in MiByte/s"),
2524     ADD_STAT(busUtil, UNIT_RATIO, "NVM Data bus utilization in percentage"),
2525     ADD_STAT(busUtilRead, UNIT_RATIO,
2526              "NVM Data bus read utilization in percentage"),
2527     ADD_STAT(busUtilWrite, UNIT_RATIO,
2528              "NVM Data bus write utilization in percentage"),
2529
2530     ADD_STAT(pendingReads, UNIT_COUNT,
2531              "Reads issued to NVM for which data has not been transferred"),
2532     ADD_STAT(bytesPerBank, UNIT_BYTE,
2533              "Bytes read within a bank before loading new bank")
2534
2535 {
2536 }
2537
2538 void
2539 NVMInterface::NVMStats::regStats()
2540 {
2541     using namespace Stats;
2542
2543     perBankRdBursts.init(nvm.ranksPerChannel == 0 ? 1 :
2544               nvm.banksPerRank * nvm.ranksPerChannel);
2545
2546     perBankWrBursts.init(nvm.ranksPerChannel == 0 ? 1 :
2547               nvm.banksPerRank * nvm.ranksPerChannel);
2548
2549     avgQLat.precision(2);
2550     avgBusLat.precision(2);
2551     avgMemAccLat.precision(2);
2552
2553     avgRdBW.precision(2);
2554     avgWrBW.precision(2);
2555     peakBW.precision(2);
2556
2557     busUtil.precision(2);
2558     busUtilRead.precision(2);
2559     busUtilWrite.precision(2);
2560
2561     pendingReads
2562         .init(nvm.maxPendingReads)
2563         .flags(nozero);
2564
2565     pendingWrites
2566         .init(nvm.maxPendingWrites)
2567         .flags(nozero);
2568
2569     bytesPerBank
2570         .init(nvm.rowBufferSize)
2571         .flags(nozero);
2572
2573     avgQLat = totQLat / readBursts;
2574     avgBusLat = totBusLat / readBursts;
2575     avgMemAccLat = totMemAccLat / readBursts;
2576
2577     avgRdBW = (bytesRead / 1000000) / simSeconds;
2578     avgWrBW = (bytesWritten / 1000000) / simSeconds;
2579     peakBW = (SimClock::Frequency / nvm.tBURST) *
2580               nvm.burstSize / 1000000;
2581
2582     busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
2583     busUtilRead = avgRdBW / peakBW * 100;
2584     busUtilWrite = avgWrBW / peakBW * 100;
2585 }