mem: Add Units to mem stats
[gem5.git] / src / mem / mem_interface.cc
1 /*
2 * Copyright (c) 2010-2020 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2013 Amin Farmahini-Farahani
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 #include "mem/mem_interface.hh"
42
43 #include "base/bitfield.hh"
44 #include "base/cprintf.hh"
45 #include "base/trace.hh"
46 #include "debug/DRAM.hh"
47 #include "debug/DRAMPower.hh"
48 #include "debug/DRAMState.hh"
49 #include "debug/NVM.hh"
50 #include "sim/system.hh"
51
52 using namespace Data;
53
54 MemInterface::MemInterface(const MemInterfaceParams &_p)
55 : AbstractMemory(_p),
56 addrMapping(_p.addr_mapping),
57 burstSize((_p.devices_per_rank * _p.burst_length *
58 _p.device_bus_width) / 8),
59 deviceSize(_p.device_size),
60 deviceRowBufferSize(_p.device_rowbuffer_size),
61 devicesPerRank(_p.devices_per_rank),
62 rowBufferSize(devicesPerRank * deviceRowBufferSize),
63 burstsPerRowBuffer(rowBufferSize / burstSize),
64 burstsPerStripe(range.interleaved() ?
65 range.granularity() / burstSize : 1),
66 ranksPerChannel(_p.ranks_per_channel),
67 banksPerRank(_p.banks_per_rank), rowsPerBank(0),
68 tCK(_p.tCK), tCS(_p.tCS), tBURST(_p.tBURST),
69 tRTW(_p.tRTW),
70 tWTR(_p.tWTR),
71 readBufferSize(_p.read_buffer_size),
72 writeBufferSize(_p.write_buffer_size)
73 {}
74
75 void
76 MemInterface::setCtrl(MemCtrl* _ctrl, unsigned int command_window)
77 {
78 ctrl = _ctrl;
79 maxCommandsPerWindow = command_window / tCK;
80 }
81
82 MemPacket*
83 MemInterface::decodePacket(const PacketPtr pkt, Addr pkt_addr,
84 unsigned size, bool is_read, bool is_dram)
85 {
86 // decode the address based on the address mapping scheme, with
87 // Ro, Ra, Co, Ba and Ch denoting row, rank, column, bank and
88 // channel, respectively
89 uint8_t rank;
90 uint8_t bank;
91 // use a 64-bit unsigned during the computations as the row is
92 // always the top bits, and check before creating the packet
93 uint64_t row;
94
95 // Get packed address, starting at 0
96 Addr addr = getCtrlAddr(pkt_addr);
97
98 // truncate the address to a memory burst, which makes it unique to
99 // a specific buffer, row, bank, rank and channel
100 addr = addr / burstSize;
101
102 // we have removed the lowest order address bits that denote the
103 // position within the column
104 if (addrMapping == Enums::RoRaBaChCo || addrMapping == Enums::RoRaBaCoCh) {
105 // the lowest order bits denote the column to ensure that
106 // sequential cache lines occupy the same row
107 addr = addr / burstsPerRowBuffer;
108
109 // after the channel bits, get the bank bits to interleave
110 // over the banks
111 bank = addr % banksPerRank;
112 addr = addr / banksPerRank;
113
114 // after the bank, we get the rank bits which thus interleaves
115 // over the ranks
116 rank = addr % ranksPerChannel;
117 addr = addr / ranksPerChannel;
118
119 // lastly, get the row bits, no need to remove them from addr
120 row = addr % rowsPerBank;
121 } else if (addrMapping == Enums::RoCoRaBaCh) {
122 // with emerging technologies, could have small page size with
123 // interleaving granularity greater than row buffer
124 if (burstsPerStripe > burstsPerRowBuffer) {
125 // remove column bits which are a subset of burstsPerStripe
126 addr = addr / burstsPerRowBuffer;
127 } else {
128 // remove lower column bits below channel bits
129 addr = addr / burstsPerStripe;
130 }
131
132 // start with the bank bits, as this provides the maximum
133 // opportunity for parallelism between requests
134 bank = addr % banksPerRank;
135 addr = addr / banksPerRank;
136
137 // next get the rank bits
138 rank = addr % ranksPerChannel;
139 addr = addr / ranksPerChannel;
140
141 // next, the higher-order column bites
142 if (burstsPerStripe < burstsPerRowBuffer) {
143 addr = addr / (burstsPerRowBuffer / burstsPerStripe);
144 }
145
146 // lastly, get the row bits, no need to remove them from addr
147 row = addr % rowsPerBank;
148 } else
149 panic("Unknown address mapping policy chosen!");
150
151 assert(rank < ranksPerChannel);
152 assert(bank < banksPerRank);
153 assert(row < rowsPerBank);
154 assert(row < Bank::NO_ROW);
155
156 DPRINTF(DRAM, "Address: %lld Rank %d Bank %d Row %d\n",
157 pkt_addr, rank, bank, row);
158
159 // create the corresponding memory packet with the entry time and
160 // ready time set to the current tick, the latter will be updated
161 // later
162 uint16_t bank_id = banksPerRank * rank + bank;
163
164 return new MemPacket(pkt, is_read, is_dram, rank, bank, row, bank_id,
165 pkt_addr, size);
166 }
167
168 std::pair<MemPacketQueue::iterator, Tick>
169 DRAMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const
170 {
171 std::vector<uint32_t> earliest_banks(ranksPerChannel, 0);
172
173 // Has minBankPrep been called to populate earliest_banks?
174 bool filled_earliest_banks = false;
175 // can the PRE/ACT sequence be done without impacting utlization?
176 bool hidden_bank_prep = false;
177
178 // search for seamless row hits first, if no seamless row hit is
179 // found then determine if there are other packets that can be issued
180 // without incurring additional bus delay due to bank timing
181 // Will select closed rows first to enable more open row possibilies
182 // in future selections
183 bool found_hidden_bank = false;
184
185 // remember if we found a row hit, not seamless, but bank prepped
186 // and ready
187 bool found_prepped_pkt = false;
188
189 // if we have no row hit, prepped or not, and no seamless packet,
190 // just go for the earliest possible
191 bool found_earliest_pkt = false;
192
193 Tick selected_col_at = MaxTick;
194 auto selected_pkt_it = queue.end();
195
196 for (auto i = queue.begin(); i != queue.end() ; ++i) {
197 MemPacket* pkt = *i;
198
199 // select optimal DRAM packet in Q
200 if (pkt->isDram()) {
201 const Bank& bank = ranks[pkt->rank]->banks[pkt->bank];
202 const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt :
203 bank.wrAllowedAt;
204
205 DPRINTF(DRAM, "%s checking DRAM packet in bank %d, row %d\n",
206 __func__, pkt->bank, pkt->row);
207
208 // check if rank is not doing a refresh and thus is available,
209 // if not, jump to the next packet
210 if (burstReady(pkt)) {
211
212 DPRINTF(DRAM,
213 "%s bank %d - Rank %d available\n", __func__,
214 pkt->bank, pkt->rank);
215
216 // check if it is a row hit
217 if (bank.openRow == pkt->row) {
218 // no additional rank-to-rank or same bank-group
219 // delays, or we switched read/write and might as well
220 // go for the row hit
221 if (col_allowed_at <= min_col_at) {
222 // FCFS within the hits, giving priority to
223 // commands that can issue seamlessly, without
224 // additional delay, such as same rank accesses
225 // and/or different bank-group accesses
226 DPRINTF(DRAM, "%s Seamless buffer hit\n", __func__);
227 selected_pkt_it = i;
228 selected_col_at = col_allowed_at;
229 // no need to look through the remaining queue entries
230 break;
231 } else if (!found_hidden_bank && !found_prepped_pkt) {
232 // if we did not find a packet to a closed row that can
233 // issue the bank commands without incurring delay, and
234 // did not yet find a packet to a prepped row, remember
235 // the current one
236 selected_pkt_it = i;
237 selected_col_at = col_allowed_at;
238 found_prepped_pkt = true;
239 DPRINTF(DRAM, "%s Prepped row buffer hit\n", __func__);
240 }
241 } else if (!found_earliest_pkt) {
242 // if we have not initialised the bank status, do it
243 // now, and only once per scheduling decisions
244 if (!filled_earliest_banks) {
245 // determine entries with earliest bank delay
246 std::tie(earliest_banks, hidden_bank_prep) =
247 minBankPrep(queue, min_col_at);
248 filled_earliest_banks = true;
249 }
250
251 // bank is amongst first available banks
252 // minBankPrep will give priority to packets that can
253 // issue seamlessly
254 if (bits(earliest_banks[pkt->rank],
255 pkt->bank, pkt->bank)) {
256 found_earliest_pkt = true;
257 found_hidden_bank = hidden_bank_prep;
258
259 // give priority to packets that can issue
260 // bank commands 'behind the scenes'
261 // any additional delay if any will be due to
262 // col-to-col command requirements
263 if (hidden_bank_prep || !found_prepped_pkt) {
264 selected_pkt_it = i;
265 selected_col_at = col_allowed_at;
266 }
267 }
268 }
269 } else {
270 DPRINTF(DRAM, "%s bank %d - Rank %d not available\n", __func__,
271 pkt->bank, pkt->rank);
272 }
273 }
274 }
275
276 if (selected_pkt_it == queue.end()) {
277 DPRINTF(DRAM, "%s no available DRAM ranks found\n", __func__);
278 }
279
280 return std::make_pair(selected_pkt_it, selected_col_at);
281 }
282
283 void
284 DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref,
285 Tick act_tick, uint32_t row)
286 {
287 assert(rank_ref.actTicks.size() == activationLimit);
288
289 // verify that we have command bandwidth to issue the activate
290 // if not, shift to next burst window
291 Tick act_at;
292 if (twoCycleActivate)
293 act_at = ctrl->verifyMultiCmd(act_tick, maxCommandsPerWindow, tAAD);
294 else
295 act_at = ctrl->verifySingleCmd(act_tick, maxCommandsPerWindow);
296
297 DPRINTF(DRAM, "Activate at tick %d\n", act_at);
298
299 // update the open row
300 assert(bank_ref.openRow == Bank::NO_ROW);
301 bank_ref.openRow = row;
302
303 // start counting anew, this covers both the case when we
304 // auto-precharged, and when this access is forced to
305 // precharge
306 bank_ref.bytesAccessed = 0;
307 bank_ref.rowAccesses = 0;
308
309 ++rank_ref.numBanksActive;
310 assert(rank_ref.numBanksActive <= banksPerRank);
311
312 DPRINTF(DRAM, "Activate bank %d, rank %d at tick %lld, now got "
313 "%d active\n", bank_ref.bank, rank_ref.rank, act_at,
314 ranks[rank_ref.rank]->numBanksActive);
315
316 rank_ref.cmdList.push_back(Command(MemCommand::ACT, bank_ref.bank,
317 act_at));
318
319 DPRINTF(DRAMPower, "%llu,ACT,%d,%d\n", divCeil(act_at, tCK) -
320 timeStampOffset, bank_ref.bank, rank_ref.rank);
321
322 // The next access has to respect tRAS for this bank
323 bank_ref.preAllowedAt = act_at + tRAS;
324
325 // Respect the row-to-column command delay for both read and write cmds
326 bank_ref.rdAllowedAt = std::max(act_at + tRCD, bank_ref.rdAllowedAt);
327 bank_ref.wrAllowedAt = std::max(act_at + tRCD, bank_ref.wrAllowedAt);
328
329 // start by enforcing tRRD
330 for (int i = 0; i < banksPerRank; i++) {
331 // next activate to any bank in this rank must not happen
332 // before tRRD
333 if (bankGroupArch && (bank_ref.bankgr == rank_ref.banks[i].bankgr)) {
334 // bank group architecture requires longer delays between
335 // ACT commands within the same bank group. Use tRRD_L
336 // in this case
337 rank_ref.banks[i].actAllowedAt = std::max(act_at + tRRD_L,
338 rank_ref.banks[i].actAllowedAt);
339 } else {
340 // use shorter tRRD value when either
341 // 1) bank group architecture is not supportted
342 // 2) bank is in a different bank group
343 rank_ref.banks[i].actAllowedAt = std::max(act_at + tRRD,
344 rank_ref.banks[i].actAllowedAt);
345 }
346 }
347
348 // next, we deal with tXAW, if the activation limit is disabled
349 // then we directly schedule an activate power event
350 if (!rank_ref.actTicks.empty()) {
351 // sanity check
352 if (rank_ref.actTicks.back() &&
353 (act_at - rank_ref.actTicks.back()) < tXAW) {
354 panic("Got %d activates in window %d (%llu - %llu) which "
355 "is smaller than %llu\n", activationLimit, act_at -
356 rank_ref.actTicks.back(), act_at,
357 rank_ref.actTicks.back(), tXAW);
358 }
359
360 // shift the times used for the book keeping, the last element
361 // (highest index) is the oldest one and hence the lowest value
362 rank_ref.actTicks.pop_back();
363
364 // record an new activation (in the future)
365 rank_ref.actTicks.push_front(act_at);
366
367 // cannot activate more than X times in time window tXAW, push the
368 // next one (the X + 1'st activate) to be tXAW away from the
369 // oldest in our window of X
370 if (rank_ref.actTicks.back() &&
371 (act_at - rank_ref.actTicks.back()) < tXAW) {
372 DPRINTF(DRAM, "Enforcing tXAW with X = %d, next activate "
373 "no earlier than %llu\n", activationLimit,
374 rank_ref.actTicks.back() + tXAW);
375 for (int j = 0; j < banksPerRank; j++)
376 // next activate must not happen before end of window
377 rank_ref.banks[j].actAllowedAt =
378 std::max(rank_ref.actTicks.back() + tXAW,
379 rank_ref.banks[j].actAllowedAt);
380 }
381 }
382
383 // at the point when this activate takes place, make sure we
384 // transition to the active power state
385 if (!rank_ref.activateEvent.scheduled())
386 schedule(rank_ref.activateEvent, act_at);
387 else if (rank_ref.activateEvent.when() > act_at)
388 // move it sooner in time
389 reschedule(rank_ref.activateEvent, act_at);
390 }
391
392 void
393 DRAMInterface::prechargeBank(Rank& rank_ref, Bank& bank, Tick pre_tick,
394 bool auto_or_preall, bool trace)
395 {
396 // make sure the bank has an open row
397 assert(bank.openRow != Bank::NO_ROW);
398
399 // sample the bytes per activate here since we are closing
400 // the page
401 stats.bytesPerActivate.sample(bank.bytesAccessed);
402
403 bank.openRow = Bank::NO_ROW;
404
405 Tick pre_at = pre_tick;
406 if (auto_or_preall) {
407 // no precharge allowed before this one
408 bank.preAllowedAt = pre_at;
409 } else {
410 // Issuing an explicit PRE command
411 // Verify that we have command bandwidth to issue the precharge
412 // if not, shift to next burst window
413 pre_at = ctrl->verifySingleCmd(pre_tick, maxCommandsPerWindow);
414 // enforce tPPD
415 for (int i = 0; i < banksPerRank; i++) {
416 rank_ref.banks[i].preAllowedAt = std::max(pre_at + tPPD,
417 rank_ref.banks[i].preAllowedAt);
418 }
419 }
420
421 Tick pre_done_at = pre_at + tRP;
422
423 bank.actAllowedAt = std::max(bank.actAllowedAt, pre_done_at);
424
425 assert(rank_ref.numBanksActive != 0);
426 --rank_ref.numBanksActive;
427
428 DPRINTF(DRAM, "Precharging bank %d, rank %d at tick %lld, now got "
429 "%d active\n", bank.bank, rank_ref.rank, pre_at,
430 rank_ref.numBanksActive);
431
432 if (trace) {
433
434 rank_ref.cmdList.push_back(Command(MemCommand::PRE, bank.bank,
435 pre_at));
436 DPRINTF(DRAMPower, "%llu,PRE,%d,%d\n", divCeil(pre_at, tCK) -
437 timeStampOffset, bank.bank, rank_ref.rank);
438 }
439
440 // if we look at the current number of active banks we might be
441 // tempted to think the DRAM is now idle, however this can be
442 // undone by an activate that is scheduled to happen before we
443 // would have reached the idle state, so schedule an event and
444 // rather check once we actually make it to the point in time when
445 // the (last) precharge takes place
446 if (!rank_ref.prechargeEvent.scheduled()) {
447 schedule(rank_ref.prechargeEvent, pre_done_at);
448 // New event, increment count
449 ++rank_ref.outstandingEvents;
450 } else if (rank_ref.prechargeEvent.when() < pre_done_at) {
451 reschedule(rank_ref.prechargeEvent, pre_done_at);
452 }
453 }
454
455 std::pair<Tick, Tick>
456 DRAMInterface::doBurstAccess(MemPacket* mem_pkt, Tick next_burst_at,
457 const std::vector<MemPacketQueue>& queue)
458 {
459 DPRINTF(DRAM, "Timing access to addr %lld, rank/bank/row %d %d %d\n",
460 mem_pkt->addr, mem_pkt->rank, mem_pkt->bank, mem_pkt->row);
461
462 // get the rank
463 Rank& rank_ref = *ranks[mem_pkt->rank];
464
465 assert(rank_ref.inRefIdleState());
466
467 // are we in or transitioning to a low-power state and have not scheduled
468 // a power-up event?
469 // if so, wake up from power down to issue RD/WR burst
470 if (rank_ref.inLowPowerState) {
471 assert(rank_ref.pwrState != PWR_SREF);
472 rank_ref.scheduleWakeUpEvent(tXP);
473 }
474
475 // get the bank
476 Bank& bank_ref = rank_ref.banks[mem_pkt->bank];
477
478 // for the state we need to track if it is a row hit or not
479 bool row_hit = true;
480
481 // Determine the access latency and update the bank state
482 if (bank_ref.openRow == mem_pkt->row) {
483 // nothing to do
484 } else {
485 row_hit = false;
486
487 // If there is a page open, precharge it.
488 if (bank_ref.openRow != Bank::NO_ROW) {
489 prechargeBank(rank_ref, bank_ref, std::max(bank_ref.preAllowedAt,
490 curTick()));
491 }
492
493 // next we need to account for the delay in activating the page
494 Tick act_tick = std::max(bank_ref.actAllowedAt, curTick());
495
496 // Record the activation and deal with all the global timing
497 // constraints caused be a new activation (tRRD and tXAW)
498 activateBank(rank_ref, bank_ref, act_tick, mem_pkt->row);
499 }
500
501 // respect any constraints on the command (e.g. tRCD or tCCD)
502 const Tick col_allowed_at = mem_pkt->isRead() ?
503 bank_ref.rdAllowedAt : bank_ref.wrAllowedAt;
504
505 // we need to wait until the bus is available before we can issue
506 // the command; need to ensure minimum bus delay requirement is met
507 Tick cmd_at = std::max({col_allowed_at, next_burst_at, curTick()});
508
509 // verify that we have command bandwidth to issue the burst
510 // if not, shift to next burst window
511 if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) > clkResyncDelay))
512 cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK);
513 else
514 cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow);
515
516 // if we are interleaving bursts, ensure that
517 // 1) we don't double interleave on next burst issue
518 // 2) we are at an interleave boundary; if not, shift to next boundary
519 Tick burst_gap = tBURST_MIN;
520 if (burstInterleave) {
521 if (cmd_at == (rank_ref.lastBurstTick + tBURST_MIN)) {
522 // already interleaving, push next command to end of full burst
523 burst_gap = tBURST;
524 } else if (cmd_at < (rank_ref.lastBurstTick + tBURST)) {
525 // not at an interleave boundary after bandwidth check
526 // Shift command to tBURST boundary to avoid data contention
527 // Command will remain in the same burst window given that
528 // tBURST is less than tBURST_MAX
529 cmd_at = rank_ref.lastBurstTick + tBURST;
530 }
531 }
532 DPRINTF(DRAM, "Schedule RD/WR burst at tick %d\n", cmd_at);
533
534 // update the packet ready time
535 mem_pkt->readyTime = cmd_at + tCL + tBURST;
536
537 rank_ref.lastBurstTick = cmd_at;
538
539 // update the time for the next read/write burst for each
540 // bank (add a max with tCCD/tCCD_L/tCCD_L_WR here)
541 Tick dly_to_rd_cmd;
542 Tick dly_to_wr_cmd;
543 for (int j = 0; j < ranksPerChannel; j++) {
544 for (int i = 0; i < banksPerRank; i++) {
545 if (mem_pkt->rank == j) {
546 if (bankGroupArch &&
547 (bank_ref.bankgr == ranks[j]->banks[i].bankgr)) {
548 // bank group architecture requires longer delays between
549 // RD/WR burst commands to the same bank group.
550 // tCCD_L is default requirement for same BG timing
551 // tCCD_L_WR is required for write-to-write
552 // Need to also take bus turnaround delays into account
553 dly_to_rd_cmd = mem_pkt->isRead() ?
554 tCCD_L : std::max(tCCD_L, wrToRdDlySameBG);
555 dly_to_wr_cmd = mem_pkt->isRead() ?
556 std::max(tCCD_L, rdToWrDlySameBG) :
557 tCCD_L_WR;
558 } else {
559 // tBURST is default requirement for diff BG timing
560 // Need to also take bus turnaround delays into account
561 dly_to_rd_cmd = mem_pkt->isRead() ? burst_gap :
562 writeToReadDelay();
563 dly_to_wr_cmd = mem_pkt->isRead() ? readToWriteDelay() :
564 burst_gap;
565 }
566 } else {
567 // different rank is by default in a different bank group and
568 // doesn't require longer tCCD or additional RTW, WTR delays
569 // Need to account for rank-to-rank switching
570 dly_to_wr_cmd = rankToRankDelay();
571 dly_to_rd_cmd = rankToRankDelay();
572 }
573 ranks[j]->banks[i].rdAllowedAt = std::max(cmd_at + dly_to_rd_cmd,
574 ranks[j]->banks[i].rdAllowedAt);
575 ranks[j]->banks[i].wrAllowedAt = std::max(cmd_at + dly_to_wr_cmd,
576 ranks[j]->banks[i].wrAllowedAt);
577 }
578 }
579
580 // Save rank of current access
581 activeRank = mem_pkt->rank;
582
583 // If this is a write, we also need to respect the write recovery
584 // time before a precharge, in the case of a read, respect the
585 // read to precharge constraint
586 bank_ref.preAllowedAt = std::max(bank_ref.preAllowedAt,
587 mem_pkt->isRead() ? cmd_at + tRTP :
588 mem_pkt->readyTime + tWR);
589
590 // increment the bytes accessed and the accesses per row
591 bank_ref.bytesAccessed += burstSize;
592 ++bank_ref.rowAccesses;
593
594 // if we reached the max, then issue with an auto-precharge
595 bool auto_precharge = pageMgmt == Enums::close ||
596 bank_ref.rowAccesses == maxAccessesPerRow;
597
598 // if we did not hit the limit, we might still want to
599 // auto-precharge
600 if (!auto_precharge &&
601 (pageMgmt == Enums::open_adaptive ||
602 pageMgmt == Enums::close_adaptive)) {
603 // a twist on the open and close page policies:
604 // 1) open_adaptive page policy does not blindly keep the
605 // page open, but close it if there are no row hits, and there
606 // are bank conflicts in the queue
607 // 2) close_adaptive page policy does not blindly close the
608 // page, but closes it only if there are no row hits in the queue.
609 // In this case, only force an auto precharge when there
610 // are no same page hits in the queue
611 bool got_more_hits = false;
612 bool got_bank_conflict = false;
613
614 for (uint8_t i = 0; i < ctrl->numPriorities(); ++i) {
615 auto p = queue[i].begin();
616 // keep on looking until we find a hit or reach the end of the
617 // queue
618 // 1) if a hit is found, then both open and close adaptive
619 // policies keep the page open
620 // 2) if no hit is found, got_bank_conflict is set to true if a
621 // bank conflict request is waiting in the queue
622 // 3) make sure we are not considering the packet that we are
623 // currently dealing with
624 while (!got_more_hits && p != queue[i].end()) {
625 if (mem_pkt != (*p)) {
626 bool same_rank_bank = (mem_pkt->rank == (*p)->rank) &&
627 (mem_pkt->bank == (*p)->bank);
628
629 bool same_row = mem_pkt->row == (*p)->row;
630 got_more_hits |= same_rank_bank && same_row;
631 got_bank_conflict |= same_rank_bank && !same_row;
632 }
633 ++p;
634 }
635
636 if (got_more_hits)
637 break;
638 }
639
640 // auto pre-charge when either
641 // 1) open_adaptive policy, we have not got any more hits, and
642 // have a bank conflict
643 // 2) close_adaptive policy and we have not got any more hits
644 auto_precharge = !got_more_hits &&
645 (got_bank_conflict || pageMgmt == Enums::close_adaptive);
646 }
647
648 // DRAMPower trace command to be written
649 std::string mem_cmd = mem_pkt->isRead() ? "RD" : "WR";
650
651 // MemCommand required for DRAMPower library
652 MemCommand::cmds command = (mem_cmd == "RD") ? MemCommand::RD :
653 MemCommand::WR;
654
655 rank_ref.cmdList.push_back(Command(command, mem_pkt->bank, cmd_at));
656
657 DPRINTF(DRAMPower, "%llu,%s,%d,%d\n", divCeil(cmd_at, tCK) -
658 timeStampOffset, mem_cmd, mem_pkt->bank, mem_pkt->rank);
659
660 // if this access should use auto-precharge, then we are
661 // closing the row after the read/write burst
662 if (auto_precharge) {
663 // if auto-precharge push a PRE command at the correct tick to the
664 // list used by DRAMPower library to calculate power
665 prechargeBank(rank_ref, bank_ref, std::max(curTick(),
666 bank_ref.preAllowedAt), true);
667
668 DPRINTF(DRAM, "Auto-precharged bank: %d\n", mem_pkt->bankId);
669 }
670
671 // Update the stats and schedule the next request
672 if (mem_pkt->isRead()) {
673 // Every respQueue which will generate an event, increment count
674 ++rank_ref.outstandingEvents;
675
676 stats.readBursts++;
677 if (row_hit)
678 stats.readRowHits++;
679 stats.bytesRead += burstSize;
680 stats.perBankRdBursts[mem_pkt->bankId]++;
681
682 // Update latency stats
683 stats.totMemAccLat += mem_pkt->readyTime - mem_pkt->entryTime;
684 stats.totQLat += cmd_at - mem_pkt->entryTime;
685 stats.totBusLat += tBURST;
686 } else {
687 // Schedule write done event to decrement event count
688 // after the readyTime has been reached
689 // Only schedule latest write event to minimize events
690 // required; only need to ensure that final event scheduled covers
691 // the time that writes are outstanding and bus is active
692 // to holdoff power-down entry events
693 if (!rank_ref.writeDoneEvent.scheduled()) {
694 schedule(rank_ref.writeDoneEvent, mem_pkt->readyTime);
695 // New event, increment count
696 ++rank_ref.outstandingEvents;
697
698 } else if (rank_ref.writeDoneEvent.when() < mem_pkt->readyTime) {
699 reschedule(rank_ref.writeDoneEvent, mem_pkt->readyTime);
700 }
701 // will remove write from queue when returned to parent function
702 // decrement count for DRAM rank
703 --rank_ref.writeEntries;
704
705 stats.writeBursts++;
706 if (row_hit)
707 stats.writeRowHits++;
708 stats.bytesWritten += burstSize;
709 stats.perBankWrBursts[mem_pkt->bankId]++;
710
711 }
712 // Update bus state to reflect when previous command was issued
713 return std::make_pair(cmd_at, cmd_at + burst_gap);
714 }
715
716 void
717 DRAMInterface::addRankToRankDelay(Tick cmd_at)
718 {
719 // update timing for DRAM ranks due to bursts issued
720 // to ranks on other media interfaces
721 for (auto n : ranks) {
722 for (int i = 0; i < banksPerRank; i++) {
723 // different rank by default
724 // Need to only account for rank-to-rank switching
725 n->banks[i].rdAllowedAt = std::max(cmd_at + rankToRankDelay(),
726 n->banks[i].rdAllowedAt);
727 n->banks[i].wrAllowedAt = std::max(cmd_at + rankToRankDelay(),
728 n->banks[i].wrAllowedAt);
729 }
730 }
731 }
732
733 DRAMInterface::DRAMInterface(const DRAMInterfaceParams &_p)
734 : MemInterface(_p),
735 bankGroupsPerRank(_p.bank_groups_per_rank),
736 bankGroupArch(_p.bank_groups_per_rank > 0),
737 tCL(_p.tCL),
738 tBURST_MIN(_p.tBURST_MIN), tBURST_MAX(_p.tBURST_MAX),
739 tCCD_L_WR(_p.tCCD_L_WR), tCCD_L(_p.tCCD_L), tRCD(_p.tRCD),
740 tRP(_p.tRP), tRAS(_p.tRAS), tWR(_p.tWR), tRTP(_p.tRTP),
741 tRFC(_p.tRFC), tREFI(_p.tREFI), tRRD(_p.tRRD), tRRD_L(_p.tRRD_L),
742 tPPD(_p.tPPD), tAAD(_p.tAAD),
743 tXAW(_p.tXAW), tXP(_p.tXP), tXS(_p.tXS),
744 clkResyncDelay(tCL + _p.tBURST_MAX),
745 dataClockSync(_p.data_clock_sync),
746 burstInterleave(tBURST != tBURST_MIN),
747 twoCycleActivate(_p.two_cycle_activate),
748 activationLimit(_p.activation_limit),
749 wrToRdDlySameBG(tCL + _p.tBURST_MAX + _p.tWTR_L),
750 rdToWrDlySameBG(_p.tRTW + _p.tBURST_MAX),
751 pageMgmt(_p.page_policy),
752 maxAccessesPerRow(_p.max_accesses_per_row),
753 timeStampOffset(0), activeRank(0),
754 enableDRAMPowerdown(_p.enable_dram_powerdown),
755 lastStatsResetTick(0),
756 stats(*this)
757 {
758 DPRINTF(DRAM, "Setting up DRAM Interface\n");
759
760 fatal_if(!isPowerOf2(burstSize), "DRAM burst size %d is not allowed, "
761 "must be a power of two\n", burstSize);
762
763 // sanity check the ranks since we rely on bit slicing for the
764 // address decoding
765 fatal_if(!isPowerOf2(ranksPerChannel), "DRAM rank count of %d is "
766 "not allowed, must be a power of two\n", ranksPerChannel);
767
768 for (int i = 0; i < ranksPerChannel; i++) {
769 DPRINTF(DRAM, "Creating DRAM rank %d \n", i);
770 Rank* rank = new Rank(_p, i, *this);
771 ranks.push_back(rank);
772 }
773
774 // determine the dram actual capacity from the DRAM config in Mbytes
775 uint64_t deviceCapacity = deviceSize / (1024 * 1024) * devicesPerRank *
776 ranksPerChannel;
777
778 uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
779
780 DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
781 AbstractMemory::size());
782
783 // if actual DRAM size does not match memory capacity in system warn!
784 if (deviceCapacity != capacity / (1024 * 1024))
785 warn("DRAM device capacity (%d Mbytes) does not match the "
786 "address range assigned (%d Mbytes)\n", deviceCapacity,
787 capacity / (1024 * 1024));
788
789 DPRINTF(DRAM, "Row buffer size %d bytes with %d bursts per row buffer\n",
790 rowBufferSize, burstsPerRowBuffer);
791
792 rowsPerBank = capacity / (rowBufferSize * banksPerRank * ranksPerChannel);
793
794 // some basic sanity checks
795 if (tREFI <= tRP || tREFI <= tRFC) {
796 fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n",
797 tREFI, tRP, tRFC);
798 }
799
800 // basic bank group architecture checks ->
801 if (bankGroupArch) {
802 // must have at least one bank per bank group
803 if (bankGroupsPerRank > banksPerRank) {
804 fatal("banks per rank (%d) must be equal to or larger than "
805 "banks groups per rank (%d)\n",
806 banksPerRank, bankGroupsPerRank);
807 }
808 // must have same number of banks in each bank group
809 if ((banksPerRank % bankGroupsPerRank) != 0) {
810 fatal("Banks per rank (%d) must be evenly divisible by bank "
811 "groups per rank (%d) for equal banks per bank group\n",
812 banksPerRank, bankGroupsPerRank);
813 }
814 // tCCD_L should be greater than minimal, back-to-back burst delay
815 if (tCCD_L <= tBURST) {
816 fatal("tCCD_L (%d) should be larger than the minimum bus delay "
817 "(%d) when bank groups per rank (%d) is greater than 1\n",
818 tCCD_L, tBURST, bankGroupsPerRank);
819 }
820 // tCCD_L_WR should be greater than minimal, back-to-back burst delay
821 if (tCCD_L_WR <= tBURST) {
822 fatal("tCCD_L_WR (%d) should be larger than the minimum bus delay "
823 " (%d) when bank groups per rank (%d) is greater than 1\n",
824 tCCD_L_WR, tBURST, bankGroupsPerRank);
825 }
826 // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay
827 // some datasheets might specify it equal to tRRD
828 if (tRRD_L < tRRD) {
829 fatal("tRRD_L (%d) should be larger than tRRD (%d) when "
830 "bank groups per rank (%d) is greater than 1\n",
831 tRRD_L, tRRD, bankGroupsPerRank);
832 }
833 }
834 }
835
836 void
837 DRAMInterface::init()
838 {
839 AbstractMemory::init();
840
841 // a bit of sanity checks on the interleaving, save it for here to
842 // ensure that the system pointer is initialised
843 if (range.interleaved()) {
844 if (addrMapping == Enums::RoRaBaChCo) {
845 if (rowBufferSize != range.granularity()) {
846 fatal("Channel interleaving of %s doesn't match RoRaBaChCo "
847 "address map\n", name());
848 }
849 } else if (addrMapping == Enums::RoRaBaCoCh ||
850 addrMapping == Enums::RoCoRaBaCh) {
851 // for the interleavings with channel bits in the bottom,
852 // if the system uses a channel striping granularity that
853 // is larger than the DRAM burst size, then map the
854 // sequential accesses within a stripe to a number of
855 // columns in the DRAM, effectively placing some of the
856 // lower-order column bits as the least-significant bits
857 // of the address (above the ones denoting the burst size)
858 assert(burstsPerStripe >= 1);
859
860 // channel striping has to be done at a granularity that
861 // is equal or larger to a cache line
862 if (system()->cacheLineSize() > range.granularity()) {
863 fatal("Channel interleaving of %s must be at least as large "
864 "as the cache line size\n", name());
865 }
866
867 // ...and equal or smaller than the row-buffer size
868 if (rowBufferSize < range.granularity()) {
869 fatal("Channel interleaving of %s must be at most as large "
870 "as the row-buffer size\n", name());
871 }
872 // this is essentially the check above, so just to be sure
873 assert(burstsPerStripe <= burstsPerRowBuffer);
874 }
875 }
876 }
877
878 void
879 DRAMInterface::startup()
880 {
881 if (system()->isTimingMode()) {
882 // timestamp offset should be in clock cycles for DRAMPower
883 timeStampOffset = divCeil(curTick(), tCK);
884
885 for (auto r : ranks) {
886 r->startup(curTick() + tREFI - tRP);
887 }
888 }
889 }
890
891 bool
892 DRAMInterface::isBusy()
893 {
894 int busy_ranks = 0;
895 for (auto r : ranks) {
896 if (!r->inRefIdleState()) {
897 if (r->pwrState != PWR_SREF) {
898 // rank is busy refreshing
899 DPRINTF(DRAMState, "Rank %d is not available\n", r->rank);
900 busy_ranks++;
901
902 // let the rank know that if it was waiting to drain, it
903 // is now done and ready to proceed
904 r->checkDrainDone();
905 }
906
907 // check if we were in self-refresh and haven't started
908 // to transition out
909 if ((r->pwrState == PWR_SREF) && r->inLowPowerState) {
910 DPRINTF(DRAMState, "Rank %d is in self-refresh\n", r->rank);
911 // if we have commands queued to this rank and we don't have
912 // a minimum number of active commands enqueued,
913 // exit self-refresh
914 if (r->forceSelfRefreshExit()) {
915 DPRINTF(DRAMState, "rank %d was in self refresh and"
916 " should wake up\n", r->rank);
917 //wake up from self-refresh
918 r->scheduleWakeUpEvent(tXS);
919 // things are brought back into action once a refresh is
920 // performed after self-refresh
921 // continue with selection for other ranks
922 }
923 }
924 }
925 }
926 return (busy_ranks == ranksPerChannel);
927 }
928
929 void DRAMInterface::setupRank(const uint8_t rank, const bool is_read)
930 {
931 // increment entry count of the rank based on packet type
932 if (is_read) {
933 ++ranks[rank]->readEntries;
934 } else {
935 ++ranks[rank]->writeEntries;
936 }
937 }
938
939 void
940 DRAMInterface::respondEvent(uint8_t rank)
941 {
942 Rank& rank_ref = *ranks[rank];
943
944 // if a read has reached its ready-time, decrement the number of reads
945 // At this point the packet has been handled and there is a possibility
946 // to switch to low-power mode if no other packet is available
947 --rank_ref.readEntries;
948 DPRINTF(DRAM, "number of read entries for rank %d is %d\n",
949 rank, rank_ref.readEntries);
950
951 // counter should at least indicate one outstanding request
952 // for this read
953 assert(rank_ref.outstandingEvents > 0);
954 // read response received, decrement count
955 --rank_ref.outstandingEvents;
956
957 // at this moment should not have transitioned to a low-power state
958 assert((rank_ref.pwrState != PWR_SREF) &&
959 (rank_ref.pwrState != PWR_PRE_PDN) &&
960 (rank_ref.pwrState != PWR_ACT_PDN));
961
962 // track if this is the last packet before idling
963 // and that there are no outstanding commands to this rank
964 if (rank_ref.isQueueEmpty() && rank_ref.outstandingEvents == 0 &&
965 rank_ref.inRefIdleState() && enableDRAMPowerdown) {
966 // verify that there are no events scheduled
967 assert(!rank_ref.activateEvent.scheduled());
968 assert(!rank_ref.prechargeEvent.scheduled());
969
970 // if coming from active state, schedule power event to
971 // active power-down else go to precharge power-down
972 DPRINTF(DRAMState, "Rank %d sleep at tick %d; current power state is "
973 "%d\n", rank, curTick(), rank_ref.pwrState);
974
975 // default to ACT power-down unless already in IDLE state
976 // could be in IDLE if PRE issued before data returned
977 PowerState next_pwr_state = PWR_ACT_PDN;
978 if (rank_ref.pwrState == PWR_IDLE) {
979 next_pwr_state = PWR_PRE_PDN;
980 }
981
982 rank_ref.powerDownSleep(next_pwr_state, curTick());
983 }
984 }
985
986 void
987 DRAMInterface::checkRefreshState(uint8_t rank)
988 {
989 Rank& rank_ref = *ranks[rank];
990
991 if ((rank_ref.refreshState == REF_PRE) &&
992 !rank_ref.prechargeEvent.scheduled()) {
993 // kick the refresh event loop into action again if banks already
994 // closed and just waiting for read to complete
995 schedule(rank_ref.refreshEvent, curTick());
996 }
997 }
998
999 void
1000 DRAMInterface::drainRanks()
1001 {
1002 // also need to kick off events to exit self-refresh
1003 for (auto r : ranks) {
1004 // force self-refresh exit, which in turn will issue auto-refresh
1005 if (r->pwrState == PWR_SREF) {
1006 DPRINTF(DRAM,"Rank%d: Forcing self-refresh wakeup in drain\n",
1007 r->rank);
1008 r->scheduleWakeUpEvent(tXS);
1009 }
1010 }
1011 }
1012
1013 bool
1014 DRAMInterface::allRanksDrained() const
1015 {
1016 // true until proven false
1017 bool all_ranks_drained = true;
1018 for (auto r : ranks) {
1019 // then verify that the power state is IDLE ensuring all banks are
1020 // closed and rank is not in a low power state. Also verify that rank
1021 // is idle from a refresh point of view.
1022 all_ranks_drained = r->inPwrIdleState() && r->inRefIdleState() &&
1023 all_ranks_drained;
1024 }
1025 return all_ranks_drained;
1026 }
1027
1028 void
1029 DRAMInterface::suspend()
1030 {
1031 for (auto r : ranks) {
1032 r->suspend();
1033 }
1034 }
1035
1036 std::pair<std::vector<uint32_t>, bool>
1037 DRAMInterface::minBankPrep(const MemPacketQueue& queue,
1038 Tick min_col_at) const
1039 {
1040 Tick min_act_at = MaxTick;
1041 std::vector<uint32_t> bank_mask(ranksPerChannel, 0);
1042
1043 // latest Tick for which ACT can occur without incurring additoinal
1044 // delay on the data bus
1045 const Tick hidden_act_max = std::max(min_col_at - tRCD, curTick());
1046
1047 // Flag condition when burst can issue back-to-back with previous burst
1048 bool found_seamless_bank = false;
1049
1050 // Flag condition when bank can be opened without incurring additional
1051 // delay on the data bus
1052 bool hidden_bank_prep = false;
1053
1054 // determine if we have queued transactions targetting the
1055 // bank in question
1056 std::vector<bool> got_waiting(ranksPerChannel * banksPerRank, false);
1057 for (const auto& p : queue) {
1058 if (p->isDram() && ranks[p->rank]->inRefIdleState())
1059 got_waiting[p->bankId] = true;
1060 }
1061
1062 // Find command with optimal bank timing
1063 // Will prioritize commands that can issue seamlessly.
1064 for (int i = 0; i < ranksPerChannel; i++) {
1065 for (int j = 0; j < banksPerRank; j++) {
1066 uint16_t bank_id = i * banksPerRank + j;
1067
1068 // if we have waiting requests for the bank, and it is
1069 // amongst the first available, update the mask
1070 if (got_waiting[bank_id]) {
1071 // make sure this rank is not currently refreshing.
1072 assert(ranks[i]->inRefIdleState());
1073 // simplistic approximation of when the bank can issue
1074 // an activate, ignoring any rank-to-rank switching
1075 // cost in this calculation
1076 Tick act_at = ranks[i]->banks[j].openRow == Bank::NO_ROW ?
1077 std::max(ranks[i]->banks[j].actAllowedAt, curTick()) :
1078 std::max(ranks[i]->banks[j].preAllowedAt, curTick()) + tRP;
1079
1080 // When is the earliest the R/W burst can issue?
1081 const Tick col_allowed_at = ctrl->inReadBusState(false) ?
1082 ranks[i]->banks[j].rdAllowedAt :
1083 ranks[i]->banks[j].wrAllowedAt;
1084 Tick col_at = std::max(col_allowed_at, act_at + tRCD);
1085
1086 // bank can issue burst back-to-back (seamlessly) with
1087 // previous burst
1088 bool new_seamless_bank = col_at <= min_col_at;
1089
1090 // if we found a new seamless bank or we have no
1091 // seamless banks, and got a bank with an earlier
1092 // activate time, it should be added to the bit mask
1093 if (new_seamless_bank ||
1094 (!found_seamless_bank && act_at <= min_act_at)) {
1095 // if we did not have a seamless bank before, and
1096 // we do now, reset the bank mask, also reset it
1097 // if we have not yet found a seamless bank and
1098 // the activate time is smaller than what we have
1099 // seen so far
1100 if (!found_seamless_bank &&
1101 (new_seamless_bank || act_at < min_act_at)) {
1102 std::fill(bank_mask.begin(), bank_mask.end(), 0);
1103 }
1104
1105 found_seamless_bank |= new_seamless_bank;
1106
1107 // ACT can occur 'behind the scenes'
1108 hidden_bank_prep = act_at <= hidden_act_max;
1109
1110 // set the bit corresponding to the available bank
1111 replaceBits(bank_mask[i], j, j, 1);
1112 min_act_at = act_at;
1113 }
1114 }
1115 }
1116 }
1117
1118 return std::make_pair(bank_mask, hidden_bank_prep);
1119 }
1120
1121 DRAMInterface::Rank::Rank(const DRAMInterfaceParams &_p,
1122 int _rank, DRAMInterface& _dram)
1123 : EventManager(&_dram), dram(_dram),
1124 pwrStateTrans(PWR_IDLE), pwrStatePostRefresh(PWR_IDLE),
1125 pwrStateTick(0), refreshDueAt(0), pwrState(PWR_IDLE),
1126 refreshState(REF_IDLE), inLowPowerState(false), rank(_rank),
1127 readEntries(0), writeEntries(0), outstandingEvents(0),
1128 wakeUpAllowedAt(0), power(_p, false), banks(_p.banks_per_rank),
1129 numBanksActive(0), actTicks(_p.activation_limit, 0), lastBurstTick(0),
1130 writeDoneEvent([this]{ processWriteDoneEvent(); }, name()),
1131 activateEvent([this]{ processActivateEvent(); }, name()),
1132 prechargeEvent([this]{ processPrechargeEvent(); }, name()),
1133 refreshEvent([this]{ processRefreshEvent(); }, name()),
1134 powerEvent([this]{ processPowerEvent(); }, name()),
1135 wakeUpEvent([this]{ processWakeUpEvent(); }, name()),
1136 stats(_dram, *this)
1137 {
1138 for (int b = 0; b < _p.banks_per_rank; b++) {
1139 banks[b].bank = b;
1140 // GDDR addressing of banks to BG is linear.
1141 // Here we assume that all DRAM generations address bank groups as
1142 // follows:
1143 if (_p.bank_groups_per_rank > 0) {
1144 // Simply assign lower bits to bank group in order to
1145 // rotate across bank groups as banks are incremented
1146 // e.g. with 4 banks per bank group and 16 banks total:
1147 // banks 0,4,8,12 are in bank group 0
1148 // banks 1,5,9,13 are in bank group 1
1149 // banks 2,6,10,14 are in bank group 2
1150 // banks 3,7,11,15 are in bank group 3
1151 banks[b].bankgr = b % _p.bank_groups_per_rank;
1152 } else {
1153 // No bank groups; simply assign to bank number
1154 banks[b].bankgr = b;
1155 }
1156 }
1157 }
1158
1159 void
1160 DRAMInterface::Rank::startup(Tick ref_tick)
1161 {
1162 assert(ref_tick > curTick());
1163
1164 pwrStateTick = curTick();
1165
1166 // kick off the refresh, and give ourselves enough time to
1167 // precharge
1168 schedule(refreshEvent, ref_tick);
1169 }
1170
1171 void
1172 DRAMInterface::Rank::suspend()
1173 {
1174 deschedule(refreshEvent);
1175
1176 // Update the stats
1177 updatePowerStats();
1178
1179 // don't automatically transition back to LP state after next REF
1180 pwrStatePostRefresh = PWR_IDLE;
1181 }
1182
1183 bool
1184 DRAMInterface::Rank::isQueueEmpty() const
1185 {
1186 // check commmands in Q based on current bus direction
1187 bool no_queued_cmds = (dram.ctrl->inReadBusState(true) &&
1188 (readEntries == 0))
1189 || (dram.ctrl->inWriteBusState(true) &&
1190 (writeEntries == 0));
1191 return no_queued_cmds;
1192 }
1193
1194 void
1195 DRAMInterface::Rank::checkDrainDone()
1196 {
1197 // if this rank was waiting to drain it is now able to proceed to
1198 // precharge
1199 if (refreshState == REF_DRAIN) {
1200 DPRINTF(DRAM, "Refresh drain done, now precharging\n");
1201
1202 refreshState = REF_PD_EXIT;
1203
1204 // hand control back to the refresh event loop
1205 schedule(refreshEvent, curTick());
1206 }
1207 }
1208
1209 void
1210 DRAMInterface::Rank::flushCmdList()
1211 {
1212 // at the moment sort the list of commands and update the counters
1213 // for DRAMPower libray when doing a refresh
1214 sort(cmdList.begin(), cmdList.end(), DRAMInterface::sortTime);
1215
1216 auto next_iter = cmdList.begin();
1217 // push to commands to DRAMPower
1218 for ( ; next_iter != cmdList.end() ; ++next_iter) {
1219 Command cmd = *next_iter;
1220 if (cmd.timeStamp <= curTick()) {
1221 // Move all commands at or before curTick to DRAMPower
1222 power.powerlib.doCommand(cmd.type, cmd.bank,
1223 divCeil(cmd.timeStamp, dram.tCK) -
1224 dram.timeStampOffset);
1225 } else {
1226 // done - found all commands at or before curTick()
1227 // next_iter references the 1st command after curTick
1228 break;
1229 }
1230 }
1231 // reset cmdList to only contain commands after curTick
1232 // if there are no commands after curTick, updated cmdList will be empty
1233 // in this case, next_iter is cmdList.end()
1234 cmdList.assign(next_iter, cmdList.end());
1235 }
1236
1237 void
1238 DRAMInterface::Rank::processActivateEvent()
1239 {
1240 // we should transition to the active state as soon as any bank is active
1241 if (pwrState != PWR_ACT)
1242 // note that at this point numBanksActive could be back at
1243 // zero again due to a precharge scheduled in the future
1244 schedulePowerEvent(PWR_ACT, curTick());
1245 }
1246
1247 void
1248 DRAMInterface::Rank::processPrechargeEvent()
1249 {
1250 // counter should at least indicate one outstanding request
1251 // for this precharge
1252 assert(outstandingEvents > 0);
1253 // precharge complete, decrement count
1254 --outstandingEvents;
1255
1256 // if we reached zero, then special conditions apply as we track
1257 // if all banks are precharged for the power models
1258 if (numBanksActive == 0) {
1259 // no reads to this rank in the Q and no pending
1260 // RD/WR or refresh commands
1261 if (isQueueEmpty() && outstandingEvents == 0 &&
1262 dram.enableDRAMPowerdown) {
1263 // should still be in ACT state since bank still open
1264 assert(pwrState == PWR_ACT);
1265
1266 // All banks closed - switch to precharge power down state.
1267 DPRINTF(DRAMState, "Rank %d sleep at tick %d\n",
1268 rank, curTick());
1269 powerDownSleep(PWR_PRE_PDN, curTick());
1270 } else {
1271 // we should transition to the idle state when the last bank
1272 // is precharged
1273 schedulePowerEvent(PWR_IDLE, curTick());
1274 }
1275 }
1276 }
1277
1278 void
1279 DRAMInterface::Rank::processWriteDoneEvent()
1280 {
1281 // counter should at least indicate one outstanding request
1282 // for this write
1283 assert(outstandingEvents > 0);
1284 // Write transfer on bus has completed
1285 // decrement per rank counter
1286 --outstandingEvents;
1287 }
1288
1289 void
1290 DRAMInterface::Rank::processRefreshEvent()
1291 {
1292 // when first preparing the refresh, remember when it was due
1293 if ((refreshState == REF_IDLE) || (refreshState == REF_SREF_EXIT)) {
1294 // remember when the refresh is due
1295 refreshDueAt = curTick();
1296
1297 // proceed to drain
1298 refreshState = REF_DRAIN;
1299
1300 // make nonzero while refresh is pending to ensure
1301 // power down and self-refresh are not entered
1302 ++outstandingEvents;
1303
1304 DPRINTF(DRAM, "Refresh due\n");
1305 }
1306
1307 // let any scheduled read or write to the same rank go ahead,
1308 // after which it will
1309 // hand control back to this event loop
1310 if (refreshState == REF_DRAIN) {
1311 // if a request is at the moment being handled and this request is
1312 // accessing the current rank then wait for it to finish
1313 if ((rank == dram.activeRank)
1314 && (dram.ctrl->requestEventScheduled())) {
1315 // hand control over to the request loop until it is
1316 // evaluated next
1317 DPRINTF(DRAM, "Refresh awaiting draining\n");
1318
1319 return;
1320 } else {
1321 refreshState = REF_PD_EXIT;
1322 }
1323 }
1324
1325 // at this point, ensure that rank is not in a power-down state
1326 if (refreshState == REF_PD_EXIT) {
1327 // if rank was sleeping and we have't started exit process,
1328 // wake-up for refresh
1329 if (inLowPowerState) {
1330 DPRINTF(DRAM, "Wake Up for refresh\n");
1331 // save state and return after refresh completes
1332 scheduleWakeUpEvent(dram.tXP);
1333 return;
1334 } else {
1335 refreshState = REF_PRE;
1336 }
1337 }
1338
1339 // at this point, ensure that all banks are precharged
1340 if (refreshState == REF_PRE) {
1341 // precharge any active bank
1342 if (numBanksActive != 0) {
1343 // at the moment, we use a precharge all even if there is
1344 // only a single bank open
1345 DPRINTF(DRAM, "Precharging all\n");
1346
1347 // first determine when we can precharge
1348 Tick pre_at = curTick();
1349
1350 for (auto &b : banks) {
1351 // respect both causality and any existing bank
1352 // constraints, some banks could already have a
1353 // (auto) precharge scheduled
1354 pre_at = std::max(b.preAllowedAt, pre_at);
1355 }
1356
1357 // make sure all banks per rank are precharged, and for those that
1358 // already are, update their availability
1359 Tick act_allowed_at = pre_at + dram.tRP;
1360
1361 for (auto &b : banks) {
1362 if (b.openRow != Bank::NO_ROW) {
1363 dram.prechargeBank(*this, b, pre_at, true, false);
1364 } else {
1365 b.actAllowedAt = std::max(b.actAllowedAt, act_allowed_at);
1366 b.preAllowedAt = std::max(b.preAllowedAt, pre_at);
1367 }
1368 }
1369
1370 // precharge all banks in rank
1371 cmdList.push_back(Command(MemCommand::PREA, 0, pre_at));
1372
1373 DPRINTF(DRAMPower, "%llu,PREA,0,%d\n",
1374 divCeil(pre_at, dram.tCK) -
1375 dram.timeStampOffset, rank);
1376 } else if ((pwrState == PWR_IDLE) && (outstandingEvents == 1)) {
1377 // Banks are closed, have transitioned to IDLE state, and
1378 // no outstanding ACT,RD/WR,Auto-PRE sequence scheduled
1379 DPRINTF(DRAM, "All banks already precharged, starting refresh\n");
1380
1381 // go ahead and kick the power state machine into gear since
1382 // we are already idle
1383 schedulePowerEvent(PWR_REF, curTick());
1384 } else {
1385 // banks state is closed but haven't transitioned pwrState to IDLE
1386 // or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled
1387 // should have outstanding precharge or read response event
1388 assert(prechargeEvent.scheduled() ||
1389 dram.ctrl->respondEventScheduled());
1390 // will start refresh when pwrState transitions to IDLE
1391 }
1392
1393 assert(numBanksActive == 0);
1394
1395 // wait for all banks to be precharged or read to complete
1396 // When precharge commands are done, power state machine will
1397 // transition to the idle state, and automatically move to a
1398 // refresh, at that point it will also call this method to get
1399 // the refresh event loop going again
1400 // Similarly, when read response completes, if all banks are
1401 // precharged, will call this method to get loop re-started
1402 return;
1403 }
1404
1405 // last but not least we perform the actual refresh
1406 if (refreshState == REF_START) {
1407 // should never get here with any banks active
1408 assert(numBanksActive == 0);
1409 assert(pwrState == PWR_REF);
1410
1411 Tick ref_done_at = curTick() + dram.tRFC;
1412
1413 for (auto &b : banks) {
1414 b.actAllowedAt = ref_done_at;
1415 }
1416
1417 // at the moment this affects all ranks
1418 cmdList.push_back(Command(MemCommand::REF, 0, curTick()));
1419
1420 // Update the stats
1421 updatePowerStats();
1422
1423 DPRINTF(DRAMPower, "%llu,REF,0,%d\n", divCeil(curTick(), dram.tCK) -
1424 dram.timeStampOffset, rank);
1425
1426 // Update for next refresh
1427 refreshDueAt += dram.tREFI;
1428
1429 // make sure we did not wait so long that we cannot make up
1430 // for it
1431 if (refreshDueAt < ref_done_at) {
1432 fatal("Refresh was delayed so long we cannot catch up\n");
1433 }
1434
1435 // Run the refresh and schedule event to transition power states
1436 // when refresh completes
1437 refreshState = REF_RUN;
1438 schedule(refreshEvent, ref_done_at);
1439 return;
1440 }
1441
1442 if (refreshState == REF_RUN) {
1443 // should never get here with any banks active
1444 assert(numBanksActive == 0);
1445 assert(pwrState == PWR_REF);
1446
1447 assert(!powerEvent.scheduled());
1448
1449 if ((dram.ctrl->drainState() == DrainState::Draining) ||
1450 (dram.ctrl->drainState() == DrainState::Drained)) {
1451 // if draining, do not re-enter low-power mode.
1452 // simply go to IDLE and wait
1453 schedulePowerEvent(PWR_IDLE, curTick());
1454 } else {
1455 // At the moment, we sleep when the refresh ends and wait to be
1456 // woken up again if previously in a low-power state.
1457 if (pwrStatePostRefresh != PWR_IDLE) {
1458 // power State should be power Refresh
1459 assert(pwrState == PWR_REF);
1460 DPRINTF(DRAMState, "Rank %d sleeping after refresh and was in "
1461 "power state %d before refreshing\n", rank,
1462 pwrStatePostRefresh);
1463 powerDownSleep(pwrState, curTick());
1464
1465 // Force PRE power-down if there are no outstanding commands
1466 // in Q after refresh.
1467 } else if (isQueueEmpty() && dram.enableDRAMPowerdown) {
1468 // still have refresh event outstanding but there should
1469 // be no other events outstanding
1470 assert(outstandingEvents == 1);
1471 DPRINTF(DRAMState, "Rank %d sleeping after refresh but was NOT"
1472 " in a low power state before refreshing\n", rank);
1473 powerDownSleep(PWR_PRE_PDN, curTick());
1474
1475 } else {
1476 // move to the idle power state once the refresh is done, this
1477 // will also move the refresh state machine to the refresh
1478 // idle state
1479 schedulePowerEvent(PWR_IDLE, curTick());
1480 }
1481 }
1482
1483 // At this point, we have completed the current refresh.
1484 // In the SREF bypass case, we do not get to this state in the
1485 // refresh STM and therefore can always schedule next event.
1486 // Compensate for the delay in actually performing the refresh
1487 // when scheduling the next one
1488 schedule(refreshEvent, refreshDueAt - dram.tRP);
1489
1490 DPRINTF(DRAMState, "Refresh done at %llu and next refresh"
1491 " at %llu\n", curTick(), refreshDueAt);
1492 }
1493 }
1494
1495 void
1496 DRAMInterface::Rank::schedulePowerEvent(PowerState pwr_state, Tick tick)
1497 {
1498 // respect causality
1499 assert(tick >= curTick());
1500
1501 if (!powerEvent.scheduled()) {
1502 DPRINTF(DRAMState, "Scheduling power event at %llu to state %d\n",
1503 tick, pwr_state);
1504
1505 // insert the new transition
1506 pwrStateTrans = pwr_state;
1507
1508 schedule(powerEvent, tick);
1509 } else {
1510 panic("Scheduled power event at %llu to state %d, "
1511 "with scheduled event at %llu to %d\n", tick, pwr_state,
1512 powerEvent.when(), pwrStateTrans);
1513 }
1514 }
1515
1516 void
1517 DRAMInterface::Rank::powerDownSleep(PowerState pwr_state, Tick tick)
1518 {
1519 // if low power state is active low, schedule to active low power state.
1520 // in reality tCKE is needed to enter active low power. This is neglected
1521 // here and could be added in the future.
1522 if (pwr_state == PWR_ACT_PDN) {
1523 schedulePowerEvent(pwr_state, tick);
1524 // push command to DRAMPower
1525 cmdList.push_back(Command(MemCommand::PDN_F_ACT, 0, tick));
1526 DPRINTF(DRAMPower, "%llu,PDN_F_ACT,0,%d\n", divCeil(tick,
1527 dram.tCK) - dram.timeStampOffset, rank);
1528 } else if (pwr_state == PWR_PRE_PDN) {
1529 // if low power state is precharge low, schedule to precharge low
1530 // power state. In reality tCKE is needed to enter active low power.
1531 // This is neglected here.
1532 schedulePowerEvent(pwr_state, tick);
1533 //push Command to DRAMPower
1534 cmdList.push_back(Command(MemCommand::PDN_F_PRE, 0, tick));
1535 DPRINTF(DRAMPower, "%llu,PDN_F_PRE,0,%d\n", divCeil(tick,
1536 dram.tCK) - dram.timeStampOffset, rank);
1537 } else if (pwr_state == PWR_REF) {
1538 // if a refresh just occurred
1539 // transition to PRE_PDN now that all banks are closed
1540 // precharge power down requires tCKE to enter. For simplicity
1541 // this is not considered.
1542 schedulePowerEvent(PWR_PRE_PDN, tick);
1543 //push Command to DRAMPower
1544 cmdList.push_back(Command(MemCommand::PDN_F_PRE, 0, tick));
1545 DPRINTF(DRAMPower, "%llu,PDN_F_PRE,0,%d\n", divCeil(tick,
1546 dram.tCK) - dram.timeStampOffset, rank);
1547 } else if (pwr_state == PWR_SREF) {
1548 // should only enter SREF after PRE-PD wakeup to do a refresh
1549 assert(pwrStatePostRefresh == PWR_PRE_PDN);
1550 // self refresh requires time tCKESR to enter. For simplicity,
1551 // this is not considered.
1552 schedulePowerEvent(PWR_SREF, tick);
1553 // push Command to DRAMPower
1554 cmdList.push_back(Command(MemCommand::SREN, 0, tick));
1555 DPRINTF(DRAMPower, "%llu,SREN,0,%d\n", divCeil(tick,
1556 dram.tCK) - dram.timeStampOffset, rank);
1557 }
1558 // Ensure that we don't power-down and back up in same tick
1559 // Once we commit to PD entry, do it and wait for at least 1tCK
1560 // This could be replaced with tCKE if/when that is added to the model
1561 wakeUpAllowedAt = tick + dram.tCK;
1562
1563 // Transitioning to a low power state, set flag
1564 inLowPowerState = true;
1565 }
1566
1567 void
1568 DRAMInterface::Rank::scheduleWakeUpEvent(Tick exit_delay)
1569 {
1570 Tick wake_up_tick = std::max(curTick(), wakeUpAllowedAt);
1571
1572 DPRINTF(DRAMState, "Scheduling wake-up for rank %d at tick %d\n",
1573 rank, wake_up_tick);
1574
1575 // if waking for refresh, hold previous state
1576 // else reset state back to IDLE
1577 if (refreshState == REF_PD_EXIT) {
1578 pwrStatePostRefresh = pwrState;
1579 } else {
1580 // don't automatically transition back to LP state after next REF
1581 pwrStatePostRefresh = PWR_IDLE;
1582 }
1583
1584 // schedule wake-up with event to ensure entry has completed before
1585 // we try to wake-up
1586 schedule(wakeUpEvent, wake_up_tick);
1587
1588 for (auto &b : banks) {
1589 // respect both causality and any existing bank
1590 // constraints, some banks could already have a
1591 // (auto) precharge scheduled
1592 b.wrAllowedAt = std::max(wake_up_tick + exit_delay, b.wrAllowedAt);
1593 b.rdAllowedAt = std::max(wake_up_tick + exit_delay, b.rdAllowedAt);
1594 b.preAllowedAt = std::max(wake_up_tick + exit_delay, b.preAllowedAt);
1595 b.actAllowedAt = std::max(wake_up_tick + exit_delay, b.actAllowedAt);
1596 }
1597 // Transitioning out of low power state, clear flag
1598 inLowPowerState = false;
1599
1600 // push to DRAMPower
1601 // use pwrStateTrans for cases where we have a power event scheduled
1602 // to enter low power that has not yet been processed
1603 if (pwrStateTrans == PWR_ACT_PDN) {
1604 cmdList.push_back(Command(MemCommand::PUP_ACT, 0, wake_up_tick));
1605 DPRINTF(DRAMPower, "%llu,PUP_ACT,0,%d\n", divCeil(wake_up_tick,
1606 dram.tCK) - dram.timeStampOffset, rank);
1607
1608 } else if (pwrStateTrans == PWR_PRE_PDN) {
1609 cmdList.push_back(Command(MemCommand::PUP_PRE, 0, wake_up_tick));
1610 DPRINTF(DRAMPower, "%llu,PUP_PRE,0,%d\n", divCeil(wake_up_tick,
1611 dram.tCK) - dram.timeStampOffset, rank);
1612 } else if (pwrStateTrans == PWR_SREF) {
1613 cmdList.push_back(Command(MemCommand::SREX, 0, wake_up_tick));
1614 DPRINTF(DRAMPower, "%llu,SREX,0,%d\n", divCeil(wake_up_tick,
1615 dram.tCK) - dram.timeStampOffset, rank);
1616 }
1617 }
1618
1619 void
1620 DRAMInterface::Rank::processWakeUpEvent()
1621 {
1622 // Should be in a power-down or self-refresh state
1623 assert((pwrState == PWR_ACT_PDN) || (pwrState == PWR_PRE_PDN) ||
1624 (pwrState == PWR_SREF));
1625
1626 // Check current state to determine transition state
1627 if (pwrState == PWR_ACT_PDN) {
1628 // banks still open, transition to PWR_ACT
1629 schedulePowerEvent(PWR_ACT, curTick());
1630 } else {
1631 // transitioning from a precharge power-down or self-refresh state
1632 // banks are closed - transition to PWR_IDLE
1633 schedulePowerEvent(PWR_IDLE, curTick());
1634 }
1635 }
1636
1637 void
1638 DRAMInterface::Rank::processPowerEvent()
1639 {
1640 assert(curTick() >= pwrStateTick);
1641 // remember where we were, and for how long
1642 Tick duration = curTick() - pwrStateTick;
1643 PowerState prev_state = pwrState;
1644
1645 // update the accounting
1646 stats.pwrStateTime[prev_state] += duration;
1647
1648 // track to total idle time
1649 if ((prev_state == PWR_PRE_PDN) || (prev_state == PWR_ACT_PDN) ||
1650 (prev_state == PWR_SREF)) {
1651 stats.totalIdleTime += duration;
1652 }
1653
1654 pwrState = pwrStateTrans;
1655 pwrStateTick = curTick();
1656
1657 // if rank was refreshing, make sure to start scheduling requests again
1658 if (prev_state == PWR_REF) {
1659 // bus IDLED prior to REF
1660 // counter should be one for refresh command only
1661 assert(outstandingEvents == 1);
1662 // REF complete, decrement count and go back to IDLE
1663 --outstandingEvents;
1664 refreshState = REF_IDLE;
1665
1666 DPRINTF(DRAMState, "Was refreshing for %llu ticks\n", duration);
1667 // if moving back to power-down after refresh
1668 if (pwrState != PWR_IDLE) {
1669 assert(pwrState == PWR_PRE_PDN);
1670 DPRINTF(DRAMState, "Switching to power down state after refreshing"
1671 " rank %d at %llu tick\n", rank, curTick());
1672 }
1673
1674 // completed refresh event, ensure next request is scheduled
1675 if (!dram.ctrl->requestEventScheduled()) {
1676 DPRINTF(DRAM, "Scheduling next request after refreshing"
1677 " rank %d\n", rank);
1678 dram.ctrl->restartScheduler(curTick());
1679 }
1680 }
1681
1682 if ((pwrState == PWR_ACT) && (refreshState == REF_PD_EXIT)) {
1683 // have exited ACT PD
1684 assert(prev_state == PWR_ACT_PDN);
1685
1686 // go back to REF event and close banks
1687 refreshState = REF_PRE;
1688 schedule(refreshEvent, curTick());
1689 } else if (pwrState == PWR_IDLE) {
1690 DPRINTF(DRAMState, "All banks precharged\n");
1691 if (prev_state == PWR_SREF) {
1692 // set refresh state to REF_SREF_EXIT, ensuring inRefIdleState
1693 // continues to return false during tXS after SREF exit
1694 // Schedule a refresh which kicks things back into action
1695 // when it finishes
1696 refreshState = REF_SREF_EXIT;
1697 schedule(refreshEvent, curTick() + dram.tXS);
1698 } else {
1699 // if we have a pending refresh, and are now moving to
1700 // the idle state, directly transition to, or schedule refresh
1701 if ((refreshState == REF_PRE) || (refreshState == REF_PD_EXIT)) {
1702 // ensure refresh is restarted only after final PRE command.
1703 // do not restart refresh if controller is in an intermediate
1704 // state, after PRE_PDN exit, when banks are IDLE but an
1705 // ACT is scheduled.
1706 if (!activateEvent.scheduled()) {
1707 // there should be nothing waiting at this point
1708 assert(!powerEvent.scheduled());
1709 if (refreshState == REF_PD_EXIT) {
1710 // exiting PRE PD, will be in IDLE until tXP expires
1711 // and then should transition to PWR_REF state
1712 assert(prev_state == PWR_PRE_PDN);
1713 schedulePowerEvent(PWR_REF, curTick() + dram.tXP);
1714 } else if (refreshState == REF_PRE) {
1715 // can directly move to PWR_REF state and proceed below
1716 pwrState = PWR_REF;
1717 }
1718 } else {
1719 // must have PRE scheduled to transition back to IDLE
1720 // and re-kick off refresh
1721 assert(prechargeEvent.scheduled());
1722 }
1723 }
1724 }
1725 }
1726
1727 // transition to the refresh state and re-start refresh process
1728 // refresh state machine will schedule the next power state transition
1729 if (pwrState == PWR_REF) {
1730 // completed final PRE for refresh or exiting power-down
1731 assert(refreshState == REF_PRE || refreshState == REF_PD_EXIT);
1732
1733 // exited PRE PD for refresh, with no pending commands
1734 // bypass auto-refresh and go straight to SREF, where memory
1735 // will issue refresh immediately upon entry
1736 if (pwrStatePostRefresh == PWR_PRE_PDN && isQueueEmpty() &&
1737 (dram.ctrl->drainState() != DrainState::Draining) &&
1738 (dram.ctrl->drainState() != DrainState::Drained) &&
1739 dram.enableDRAMPowerdown) {
1740 DPRINTF(DRAMState, "Rank %d bypassing refresh and transitioning "
1741 "to self refresh at %11u tick\n", rank, curTick());
1742 powerDownSleep(PWR_SREF, curTick());
1743
1744 // Since refresh was bypassed, remove event by decrementing count
1745 assert(outstandingEvents == 1);
1746 --outstandingEvents;
1747
1748 // reset state back to IDLE temporarily until SREF is entered
1749 pwrState = PWR_IDLE;
1750
1751 // Not bypassing refresh for SREF entry
1752 } else {
1753 DPRINTF(DRAMState, "Refreshing\n");
1754
1755 // there should be nothing waiting at this point
1756 assert(!powerEvent.scheduled());
1757
1758 // kick the refresh event loop into action again, and that
1759 // in turn will schedule a transition to the idle power
1760 // state once the refresh is done
1761 schedule(refreshEvent, curTick());
1762
1763 // Banks transitioned to IDLE, start REF
1764 refreshState = REF_START;
1765 }
1766 }
1767
1768 }
1769
1770 void
1771 DRAMInterface::Rank::updatePowerStats()
1772 {
1773 // All commands up to refresh have completed
1774 // flush cmdList to DRAMPower
1775 flushCmdList();
1776
1777 // Call the function that calculates window energy at intermediate update
1778 // events like at refresh, stats dump as well as at simulation exit.
1779 // Window starts at the last time the calcWindowEnergy function was called
1780 // and is upto current time.
1781 power.powerlib.calcWindowEnergy(divCeil(curTick(), dram.tCK) -
1782 dram.timeStampOffset);
1783
1784 // Get the energy from DRAMPower
1785 Data::MemoryPowerModel::Energy energy = power.powerlib.getEnergy();
1786
1787 // The energy components inside the power lib are calculated over
1788 // the window so accumulate into the corresponding gem5 stat
1789 stats.actEnergy += energy.act_energy * dram.devicesPerRank;
1790 stats.preEnergy += energy.pre_energy * dram.devicesPerRank;
1791 stats.readEnergy += energy.read_energy * dram.devicesPerRank;
1792 stats.writeEnergy += energy.write_energy * dram.devicesPerRank;
1793 stats.refreshEnergy += energy.ref_energy * dram.devicesPerRank;
1794 stats.actBackEnergy += energy.act_stdby_energy * dram.devicesPerRank;
1795 stats.preBackEnergy += energy.pre_stdby_energy * dram.devicesPerRank;
1796 stats.actPowerDownEnergy += energy.f_act_pd_energy * dram.devicesPerRank;
1797 stats.prePowerDownEnergy += energy.f_pre_pd_energy * dram.devicesPerRank;
1798 stats.selfRefreshEnergy += energy.sref_energy * dram.devicesPerRank;
1799
1800 // Accumulate window energy into the total energy.
1801 stats.totalEnergy += energy.window_energy * dram.devicesPerRank;
1802 // Average power must not be accumulated but calculated over the time
1803 // since last stats reset. SimClock::Frequency is tick period not tick
1804 // frequency.
1805 // energy (pJ) 1e-9
1806 // power (mW) = ----------- * ----------
1807 // time (tick) tick_frequency
1808 stats.averagePower = (stats.totalEnergy.value() /
1809 (curTick() - dram.lastStatsResetTick)) *
1810 (SimClock::Frequency / 1000000000.0);
1811 }
1812
1813 void
1814 DRAMInterface::Rank::computeStats()
1815 {
1816 DPRINTF(DRAM,"Computing stats due to a dump callback\n");
1817
1818 // Update the stats
1819 updatePowerStats();
1820
1821 // final update of power state times
1822 stats.pwrStateTime[pwrState] += (curTick() - pwrStateTick);
1823 pwrStateTick = curTick();
1824 }
1825
1826 void
1827 DRAMInterface::Rank::resetStats() {
1828 // The only way to clear the counters in DRAMPower is to call
1829 // calcWindowEnergy function as that then calls clearCounters. The
1830 // clearCounters method itself is private.
1831 power.powerlib.calcWindowEnergy(divCeil(curTick(), dram.tCK) -
1832 dram.timeStampOffset);
1833
1834 }
1835
1836 bool
1837 DRAMInterface::Rank::forceSelfRefreshExit() const {
1838 return (readEntries != 0) ||
1839 (dram.ctrl->inWriteBusState(true) && (writeEntries != 0));
1840 }
1841
1842 void
1843 DRAMInterface::DRAMStats::resetStats()
1844 {
1845 dram.lastStatsResetTick = curTick();
1846 }
1847
1848 DRAMInterface::DRAMStats::DRAMStats(DRAMInterface &_dram)
1849 : Stats::Group(&_dram),
1850 dram(_dram),
1851
1852 ADD_STAT(readBursts, UNIT_COUNT, "Number of DRAM read bursts"),
1853 ADD_STAT(writeBursts, UNIT_COUNT, "Number of DRAM write bursts"),
1854
1855 ADD_STAT(perBankRdBursts, UNIT_COUNT, "Per bank write bursts"),
1856 ADD_STAT(perBankWrBursts, UNIT_COUNT, "Per bank write bursts"),
1857
1858 ADD_STAT(totQLat, UNIT_TICK, "Total ticks spent queuing"),
1859 ADD_STAT(totBusLat, UNIT_TICK, "Total ticks spent in databus transfers"),
1860 ADD_STAT(totMemAccLat, UNIT_TICK,
1861 "Total ticks spent from burst creation until serviced "
1862 "by the DRAM"),
1863
1864 ADD_STAT(avgQLat, UNIT_RATE(Stats::Units::Tick, Stats::Units::Count),
1865 "Average queueing delay per DRAM burst"),
1866 ADD_STAT(avgBusLat, UNIT_RATE(Stats::Units::Tick, Stats::Units::Count),
1867 "Average bus latency per DRAM burst"),
1868 ADD_STAT(avgMemAccLat, UNIT_RATE(Stats::Units::Tick, Stats::Units::Count),
1869 "Average memory access latency per DRAM burst"),
1870
1871 ADD_STAT(readRowHits, UNIT_COUNT,
1872 "Number of row buffer hits during reads"),
1873 ADD_STAT(writeRowHits, UNIT_COUNT,
1874 "Number of row buffer hits during writes"),
1875 ADD_STAT(readRowHitRate, UNIT_RATIO, "Row buffer hit rate for reads"),
1876 ADD_STAT(writeRowHitRate, UNIT_RATIO, "Row buffer hit rate for writes"),
1877
1878 ADD_STAT(bytesPerActivate, UNIT_BYTE, "Bytes accessed per row activation"),
1879 ADD_STAT(bytesRead, UNIT_BYTE, "Total number of bytes read from DRAM"),
1880 ADD_STAT(bytesWritten, UNIT_BYTE, "Total number of bytes written to DRAM"),
1881 ADD_STAT(avgRdBW, UNIT_RATE(Stats::Units::Byte, Stats::Units::Second),
1882 "Average DRAM read bandwidth in MiBytes/s"),
1883 ADD_STAT(avgWrBW, UNIT_RATE(Stats::Units::Byte, Stats::Units::Second),
1884 "Average DRAM write bandwidth in MiBytes/s"),
1885 ADD_STAT(peakBW, UNIT_RATE(Stats::Units::Byte, Stats::Units::Second),
1886 "Theoretical peak bandwidth in MiByte/s"),
1887
1888 ADD_STAT(busUtil, UNIT_RATIO, "Data bus utilization in percentage"),
1889 ADD_STAT(busUtilRead, UNIT_RATIO,
1890 "Data bus utilization in percentage for reads"),
1891 ADD_STAT(busUtilWrite, UNIT_RATIO,
1892 "Data bus utilization in percentage for writes"),
1893
1894 ADD_STAT(pageHitRate, UNIT_RATIO,
1895 "Row buffer hit rate, read and write combined")
1896
1897 {
1898 }
1899
1900 void
1901 DRAMInterface::DRAMStats::regStats()
1902 {
1903 using namespace Stats;
1904
1905 avgQLat.precision(2);
1906 avgBusLat.precision(2);
1907 avgMemAccLat.precision(2);
1908
1909 readRowHitRate.precision(2);
1910 writeRowHitRate.precision(2);
1911
1912 perBankRdBursts.init(dram.banksPerRank * dram.ranksPerChannel);
1913 perBankWrBursts.init(dram.banksPerRank * dram.ranksPerChannel);
1914
1915 bytesPerActivate
1916 .init(dram.maxAccessesPerRow ?
1917 dram.maxAccessesPerRow : dram.rowBufferSize)
1918 .flags(nozero);
1919
1920 peakBW.precision(2);
1921 busUtil.precision(2);
1922 busUtilWrite.precision(2);
1923 busUtilRead.precision(2);
1924
1925 pageHitRate.precision(2);
1926
1927 // Formula stats
1928 avgQLat = totQLat / readBursts;
1929 avgBusLat = totBusLat / readBursts;
1930 avgMemAccLat = totMemAccLat / readBursts;
1931
1932 readRowHitRate = (readRowHits / readBursts) * 100;
1933 writeRowHitRate = (writeRowHits / writeBursts) * 100;
1934
1935 avgRdBW = (bytesRead / 1000000) / simSeconds;
1936 avgWrBW = (bytesWritten / 1000000) / simSeconds;
1937 peakBW = (SimClock::Frequency / dram.burstDelay()) *
1938 dram.bytesPerBurst() / 1000000;
1939
1940 busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
1941 busUtilRead = avgRdBW / peakBW * 100;
1942 busUtilWrite = avgWrBW / peakBW * 100;
1943
1944 pageHitRate = (writeRowHits + readRowHits) /
1945 (writeBursts + readBursts) * 100;
1946 }
1947
1948 DRAMInterface::RankStats::RankStats(DRAMInterface &_dram, Rank &_rank)
1949 : Stats::Group(&_dram, csprintf("rank%d", _rank.rank).c_str()),
1950 rank(_rank),
1951
1952 ADD_STAT(actEnergy, UNIT_JOULE,
1953 "Energy for activate commands per rank (pJ)"),
1954 ADD_STAT(preEnergy, UNIT_JOULE,
1955 "Energy for precharge commands per rank (pJ)"),
1956 ADD_STAT(readEnergy, UNIT_JOULE,
1957 "Energy for read commands per rank (pJ)"),
1958 ADD_STAT(writeEnergy, UNIT_JOULE,
1959 "Energy for write commands per rank (pJ)"),
1960 ADD_STAT(refreshEnergy, UNIT_JOULE,
1961 "Energy for refresh commands per rank (pJ)"),
1962 ADD_STAT(actBackEnergy, UNIT_JOULE,
1963 "Energy for active background per rank (pJ)"),
1964 ADD_STAT(preBackEnergy, UNIT_JOULE,
1965 "Energy for precharge background per rank (pJ)"),
1966 ADD_STAT(actPowerDownEnergy, UNIT_JOULE,
1967 "Energy for active power-down per rank (pJ)"),
1968 ADD_STAT(prePowerDownEnergy, UNIT_JOULE,
1969 "Energy for precharge power-down per rank (pJ)"),
1970 ADD_STAT(selfRefreshEnergy, UNIT_JOULE,
1971 "Energy for self refresh per rank (pJ)"),
1972
1973 ADD_STAT(totalEnergy, UNIT_JOULE, "Total energy per rank (pJ)"),
1974 ADD_STAT(averagePower, UNIT_WATT, "Core power per rank (mW)"),
1975
1976 ADD_STAT(totalIdleTime, UNIT_TICK, "Total Idle time Per DRAM Rank"),
1977 ADD_STAT(pwrStateTime, UNIT_TICK, "Time in different power states")
1978 {
1979 }
1980
1981 void
1982 DRAMInterface::RankStats::regStats()
1983 {
1984 Stats::Group::regStats();
1985
1986 pwrStateTime
1987 .init(6)
1988 .subname(0, "IDLE")
1989 .subname(1, "REF")
1990 .subname(2, "SREF")
1991 .subname(3, "PRE_PDN")
1992 .subname(4, "ACT")
1993 .subname(5, "ACT_PDN");
1994 }
1995
1996 void
1997 DRAMInterface::RankStats::resetStats()
1998 {
1999 Stats::Group::resetStats();
2000
2001 rank.resetStats();
2002 }
2003
2004 void
2005 DRAMInterface::RankStats::preDumpStats()
2006 {
2007 Stats::Group::preDumpStats();
2008
2009 rank.computeStats();
2010 }
2011
2012 NVMInterface::NVMInterface(const NVMInterfaceParams &_p)
2013 : MemInterface(_p),
2014 maxPendingWrites(_p.max_pending_writes),
2015 maxPendingReads(_p.max_pending_reads),
2016 twoCycleRdWr(_p.two_cycle_rdwr),
2017 tREAD(_p.tREAD), tWRITE(_p.tWRITE), tSEND(_p.tSEND),
2018 stats(*this),
2019 writeRespondEvent([this]{ processWriteRespondEvent(); }, name()),
2020 readReadyEvent([this]{ processReadReadyEvent(); }, name()),
2021 nextReadAt(0), numPendingReads(0), numReadDataReady(0),
2022 numReadsToIssue(0), numWritesQueued(0)
2023 {
2024 DPRINTF(NVM, "Setting up NVM Interface\n");
2025
2026 fatal_if(!isPowerOf2(burstSize), "NVM burst size %d is not allowed, "
2027 "must be a power of two\n", burstSize);
2028
2029 // sanity check the ranks since we rely on bit slicing for the
2030 // address decoding
2031 fatal_if(!isPowerOf2(ranksPerChannel), "NVM rank count of %d is "
2032 "not allowed, must be a power of two\n", ranksPerChannel);
2033
2034 for (int i =0; i < ranksPerChannel; i++) {
2035 // Add NVM ranks to the system
2036 DPRINTF(NVM, "Creating NVM rank %d \n", i);
2037 Rank* rank = new Rank(_p, i, *this);
2038 ranks.push_back(rank);
2039 }
2040
2041 uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
2042
2043 DPRINTF(NVM, "NVM capacity %lld (%lld) bytes\n", capacity,
2044 AbstractMemory::size());
2045
2046 rowsPerBank = capacity / (rowBufferSize *
2047 banksPerRank * ranksPerChannel);
2048
2049 }
2050
2051 NVMInterface::Rank::Rank(const NVMInterfaceParams &_p,
2052 int _rank, NVMInterface& _nvm)
2053 : EventManager(&_nvm), rank(_rank), banks(_p.banks_per_rank)
2054 {
2055 for (int b = 0; b < _p.banks_per_rank; b++) {
2056 banks[b].bank = b;
2057 // No bank groups; simply assign to bank number
2058 banks[b].bankgr = b;
2059 }
2060 }
2061
2062 void
2063 NVMInterface::init()
2064 {
2065 AbstractMemory::init();
2066 }
2067
2068 void NVMInterface::setupRank(const uint8_t rank, const bool is_read)
2069 {
2070 if (is_read) {
2071 // increment count to trigger read and track number of reads in Q
2072 numReadsToIssue++;
2073 } else {
2074 // increment count to track number of writes in Q
2075 numWritesQueued++;
2076 }
2077 }
2078
2079 std::pair<MemPacketQueue::iterator, Tick>
2080 NVMInterface::chooseNextFRFCFS(MemPacketQueue& queue, Tick min_col_at) const
2081 {
2082 // remember if we found a hit, but one that cannit issue seamlessly
2083 bool found_prepped_pkt = false;
2084
2085 auto selected_pkt_it = queue.end();
2086 Tick selected_col_at = MaxTick;
2087
2088 for (auto i = queue.begin(); i != queue.end() ; ++i) {
2089 MemPacket* pkt = *i;
2090
2091 // select optimal NVM packet in Q
2092 if (!pkt->isDram()) {
2093 const Bank& bank = ranks[pkt->rank]->banks[pkt->bank];
2094 const Tick col_allowed_at = pkt->isRead() ? bank.rdAllowedAt :
2095 bank.wrAllowedAt;
2096
2097 // check if rank is not doing a refresh and thus is available,
2098 // if not, jump to the next packet
2099 if (burstReady(pkt)) {
2100 DPRINTF(NVM, "%s bank %d - Rank %d available\n", __func__,
2101 pkt->bank, pkt->rank);
2102
2103 // no additional rank-to-rank or media delays
2104 if (col_allowed_at <= min_col_at) {
2105 // FCFS within entries that can issue without
2106 // additional delay, such as same rank accesses
2107 // or media delay requirements
2108 selected_pkt_it = i;
2109 selected_col_at = col_allowed_at;
2110 // no need to look through the remaining queue entries
2111 DPRINTF(NVM, "%s Seamless buffer hit\n", __func__);
2112 break;
2113 } else if (!found_prepped_pkt) {
2114 // packet is to prepped region but cannnot issue
2115 // seamlessly; remember this one and continue
2116 selected_pkt_it = i;
2117 selected_col_at = col_allowed_at;
2118 DPRINTF(NVM, "%s Prepped packet found \n", __func__);
2119 found_prepped_pkt = true;
2120 }
2121 } else {
2122 DPRINTF(NVM, "%s bank %d - Rank %d not available\n", __func__,
2123 pkt->bank, pkt->rank);
2124 }
2125 }
2126 }
2127
2128 if (selected_pkt_it == queue.end()) {
2129 DPRINTF(NVM, "%s no available NVM ranks found\n", __func__);
2130 }
2131
2132 return std::make_pair(selected_pkt_it, selected_col_at);
2133 }
2134
2135 void
2136 NVMInterface::chooseRead(MemPacketQueue& queue)
2137 {
2138 Tick cmd_at = std::max(curTick(), nextReadAt);
2139
2140 // This method does the arbitration between non-deterministic read
2141 // requests to NVM. The chosen packet is not removed from the queue
2142 // at this time. Removal from the queue will occur when the data is
2143 // ready and a separate SEND command is issued to retrieve it via the
2144 // chooseNext function in the top-level controller.
2145 assert(!queue.empty());
2146
2147 assert(numReadsToIssue > 0);
2148 numReadsToIssue--;
2149 // For simplicity, issue non-deterministic reads in order (fcfs)
2150 for (auto i = queue.begin(); i != queue.end() ; ++i) {
2151 MemPacket* pkt = *i;
2152
2153 // Find 1st NVM read packet that hasn't issued read command
2154 if (pkt->readyTime == MaxTick && !pkt->isDram() && pkt->isRead()) {
2155 // get the bank
2156 Bank& bank_ref = ranks[pkt->rank]->banks[pkt->bank];
2157
2158 // issueing a read, inc counter and verify we haven't overrun
2159 numPendingReads++;
2160 assert(numPendingReads <= maxPendingReads);
2161
2162 // increment the bytes accessed and the accesses per row
2163 bank_ref.bytesAccessed += burstSize;
2164
2165 // Verify command bandiwth to issue
2166 // Host can issue read immediately uith buffering closer
2167 // to the NVM. The actual execution at the NVM may be delayed
2168 // due to busy resources
2169 if (twoCycleRdWr) {
2170 cmd_at = ctrl->verifyMultiCmd(cmd_at,
2171 maxCommandsPerWindow, tCK);
2172 } else {
2173 cmd_at = ctrl->verifySingleCmd(cmd_at,
2174 maxCommandsPerWindow);
2175 }
2176
2177 // Update delay to next read
2178 // Ensures single read command issued per cycle
2179 nextReadAt = cmd_at + tCK;
2180
2181 // If accessing a new location in this bank, update timing
2182 // and stats
2183 if (bank_ref.openRow != pkt->row) {
2184 // update the open bank, re-using row field
2185 bank_ref.openRow = pkt->row;
2186
2187 // sample the bytes accessed to a buffer in this bank
2188 // here when we are re-buffering the data
2189 stats.bytesPerBank.sample(bank_ref.bytesAccessed);
2190 // start counting anew
2191 bank_ref.bytesAccessed = 0;
2192
2193 // holdoff next command to this bank until the read completes
2194 // and the data has been successfully buffered
2195 // can pipeline accesses to the same bank, sending them
2196 // across the interface B2B, but will incur full access
2197 // delay between data ready responses to different buffers
2198 // in a bank
2199 bank_ref.actAllowedAt = std::max(cmd_at,
2200 bank_ref.actAllowedAt) + tREAD;
2201 }
2202 // update per packet readyTime to holdoff burst read operation
2203 // overloading readyTime, which will be updated again when the
2204 // burst is issued
2205 pkt->readyTime = std::max(cmd_at, bank_ref.actAllowedAt);
2206
2207 DPRINTF(NVM, "Issuing NVM Read to bank %d at tick %d. "
2208 "Data ready at %d\n",
2209 bank_ref.bank, cmd_at, pkt->readyTime);
2210
2211 // Insert into read ready queue. It will be handled after
2212 // the media delay has been met
2213 if (readReadyQueue.empty()) {
2214 assert(!readReadyEvent.scheduled());
2215 schedule(readReadyEvent, pkt->readyTime);
2216 } else if (readReadyEvent.when() > pkt->readyTime) {
2217 // move it sooner in time, to the first read with data
2218 reschedule(readReadyEvent, pkt->readyTime);
2219 } else {
2220 assert(readReadyEvent.scheduled());
2221 }
2222 readReadyQueue.push_back(pkt->readyTime);
2223
2224 // found an NVM read to issue - break out
2225 break;
2226 }
2227 }
2228 }
2229
2230 void
2231 NVMInterface::processReadReadyEvent()
2232 {
2233 // signal that there is read data ready to be transmitted
2234 numReadDataReady++;
2235
2236 DPRINTF(NVM,
2237 "processReadReadyEvent(): Data for an NVM read is ready. "
2238 "numReadDataReady is %d\t numPendingReads is %d\n",
2239 numReadDataReady, numPendingReads);
2240
2241 // Find lowest ready time and verify it is equal to curTick
2242 // also find the next lowest to schedule next event
2243 // Done with this response, erase entry
2244 auto ready_it = readReadyQueue.begin();
2245 Tick next_ready_at = MaxTick;
2246 for (auto i = readReadyQueue.begin(); i != readReadyQueue.end() ; ++i) {
2247 if (*ready_it > *i) {
2248 next_ready_at = *ready_it;
2249 ready_it = i;
2250 } else if ((next_ready_at > *i) && (i != ready_it)) {
2251 next_ready_at = *i;
2252 }
2253 }
2254
2255 // Verify we found the time of this event and remove it
2256 assert(*ready_it == curTick());
2257 readReadyQueue.erase(ready_it);
2258
2259 if (!readReadyQueue.empty()) {
2260 assert(readReadyQueue.front() >= curTick());
2261 assert(!readReadyEvent.scheduled());
2262 schedule(readReadyEvent, next_ready_at);
2263 }
2264
2265 // It is possible that a new command kicks things back into
2266 // action before reaching this point but need to ensure that we
2267 // continue to process new commands as read data becomes ready
2268 // This will also trigger a drain if needed
2269 if (!ctrl->requestEventScheduled()) {
2270 DPRINTF(NVM, "Restart controller scheduler immediately\n");
2271 ctrl->restartScheduler(curTick());
2272 }
2273 }
2274
2275 bool
2276 NVMInterface::burstReady(MemPacket* pkt) const {
2277 bool read_rdy = pkt->isRead() && (ctrl->inReadBusState(true)) &&
2278 (pkt->readyTime <= curTick()) && (numReadDataReady > 0);
2279 bool write_rdy = !pkt->isRead() && !ctrl->inReadBusState(true) &&
2280 !writeRespQueueFull();
2281 return (read_rdy || write_rdy);
2282 }
2283
2284 std::pair<Tick, Tick>
2285 NVMInterface::doBurstAccess(MemPacket* pkt, Tick next_burst_at)
2286 {
2287 DPRINTF(NVM, "NVM Timing access to addr %lld, rank/bank/row %d %d %d\n",
2288 pkt->addr, pkt->rank, pkt->bank, pkt->row);
2289
2290 // get the bank
2291 Bank& bank_ref = ranks[pkt->rank]->banks[pkt->bank];
2292
2293 // respect any constraints on the command
2294 const Tick bst_allowed_at = pkt->isRead() ?
2295 bank_ref.rdAllowedAt : bank_ref.wrAllowedAt;
2296
2297 // we need to wait until the bus is available before we can issue
2298 // the command; need minimum of tBURST between commands
2299 Tick cmd_at = std::max(bst_allowed_at, curTick());
2300
2301 // we need to wait until the bus is available before we can issue
2302 // the command; need minimum of tBURST between commands
2303 cmd_at = std::max(cmd_at, next_burst_at);
2304
2305 // Verify there is command bandwidth to issue
2306 // Read burst (send command) is a simple data access and only requires
2307 // one command cycle
2308 // Write command may require multiple cycles to enable larger address space
2309 if (pkt->isRead() || !twoCycleRdWr) {
2310 cmd_at = ctrl->verifySingleCmd(cmd_at, maxCommandsPerWindow);
2311 } else {
2312 cmd_at = ctrl->verifyMultiCmd(cmd_at, maxCommandsPerWindow, tCK);
2313 }
2314 // update the packet ready time to reflect when data will be transferred
2315 // Use the same bus delays defined for NVM
2316 pkt->readyTime = cmd_at + tSEND + tBURST;
2317
2318 Tick dly_to_rd_cmd;
2319 Tick dly_to_wr_cmd;
2320 for (auto n : ranks) {
2321 for (int i = 0; i < banksPerRank; i++) {
2322 // base delay is a function of tBURST and bus turnaround
2323 dly_to_rd_cmd = pkt->isRead() ? tBURST : writeToReadDelay();
2324 dly_to_wr_cmd = pkt->isRead() ? readToWriteDelay() : tBURST;
2325
2326 if (pkt->rank != n->rank) {
2327 // adjust timing for different ranks
2328 // Need to account for rank-to-rank switching with tCS
2329 dly_to_wr_cmd = rankToRankDelay();
2330 dly_to_rd_cmd = rankToRankDelay();
2331 }
2332 n->banks[i].rdAllowedAt = std::max(cmd_at + dly_to_rd_cmd,
2333 n->banks[i].rdAllowedAt);
2334
2335 n->banks[i].wrAllowedAt = std::max(cmd_at + dly_to_wr_cmd,
2336 n->banks[i].wrAllowedAt);
2337 }
2338 }
2339
2340 DPRINTF(NVM, "NVM Access to %lld, ready at %lld.\n",
2341 pkt->addr, pkt->readyTime);
2342
2343 if (pkt->isRead()) {
2344 // completed the read, decrement counters
2345 assert(numPendingReads != 0);
2346 assert(numReadDataReady != 0);
2347
2348 numPendingReads--;
2349 numReadDataReady--;
2350 } else {
2351 // Adjust number of NVM writes in Q
2352 assert(numWritesQueued > 0);
2353 numWritesQueued--;
2354
2355 // increment the bytes accessed and the accesses per row
2356 // only increment for writes as the reads are handled when
2357 // the non-deterministic read is issued, before the data transfer
2358 bank_ref.bytesAccessed += burstSize;
2359
2360 // Commands will be issued serially when accessing the same bank
2361 // Commands can issue in parallel to different banks
2362 if ((bank_ref.bank == pkt->bank) &&
2363 (bank_ref.openRow != pkt->row)) {
2364 // update the open buffer, re-using row field
2365 bank_ref.openRow = pkt->row;
2366
2367 // sample the bytes accessed to a buffer in this bank
2368 // here when we are re-buffering the data
2369 stats.bytesPerBank.sample(bank_ref.bytesAccessed);
2370 // start counting anew
2371 bank_ref.bytesAccessed = 0;
2372 }
2373
2374 // Determine when write will actually complete, assuming it is
2375 // scheduled to push to NVM immediately
2376 // update actAllowedAt to serialize next command completion that
2377 // accesses this bank; must wait until this write completes
2378 // Data accesses to the same buffer in this bank
2379 // can issue immediately after actAllowedAt expires, without
2380 // waiting additional delay of tWRITE. Can revisit this
2381 // assumption/simplification in the future.
2382 bank_ref.actAllowedAt = std::max(pkt->readyTime,
2383 bank_ref.actAllowedAt) + tWRITE;
2384
2385 // Need to track number of outstanding writes to
2386 // ensure 'buffer' on media controller does not overflow
2387 assert(!writeRespQueueFull());
2388
2389 // Insert into write done queue. It will be handled after
2390 // the media delay has been met
2391 if (writeRespQueueEmpty()) {
2392 assert(!writeRespondEvent.scheduled());
2393 schedule(writeRespondEvent, bank_ref.actAllowedAt);
2394 } else {
2395 assert(writeRespondEvent.scheduled());
2396 }
2397 writeRespQueue.push_back(bank_ref.actAllowedAt);
2398 writeRespQueue.sort();
2399 if (writeRespondEvent.when() > bank_ref.actAllowedAt) {
2400 DPRINTF(NVM, "Rescheduled respond event from %lld to %11d\n",
2401 writeRespondEvent.when(), bank_ref.actAllowedAt);
2402 DPRINTF(NVM, "Front of response queue is %11d\n",
2403 writeRespQueue.front());
2404 reschedule(writeRespondEvent, bank_ref.actAllowedAt);
2405 }
2406
2407 }
2408
2409 // Update the stats
2410 if (pkt->isRead()) {
2411 stats.readBursts++;
2412 stats.bytesRead += burstSize;
2413 stats.perBankRdBursts[pkt->bankId]++;
2414 stats.pendingReads.sample(numPendingReads);
2415
2416 // Update latency stats
2417 stats.totMemAccLat += pkt->readyTime - pkt->entryTime;
2418 stats.totBusLat += tBURST;
2419 stats.totQLat += cmd_at - pkt->entryTime;
2420 } else {
2421 stats.writeBursts++;
2422 stats.bytesWritten += burstSize;
2423 stats.perBankWrBursts[pkt->bankId]++;
2424 }
2425
2426 return std::make_pair(cmd_at, cmd_at + tBURST);
2427 }
2428
2429 void
2430 NVMInterface::processWriteRespondEvent()
2431 {
2432 DPRINTF(NVM,
2433 "processWriteRespondEvent(): A NVM write reached its readyTime. "
2434 "%d remaining pending NVM writes\n", writeRespQueue.size());
2435
2436 // Update stat to track histogram of pending writes
2437 stats.pendingWrites.sample(writeRespQueue.size());
2438
2439 // Done with this response, pop entry
2440 writeRespQueue.pop_front();
2441
2442 if (!writeRespQueue.empty()) {
2443 assert(writeRespQueue.front() >= curTick());
2444 assert(!writeRespondEvent.scheduled());
2445 schedule(writeRespondEvent, writeRespQueue.front());
2446 }
2447
2448 // It is possible that a new command kicks things back into
2449 // action before reaching this point but need to ensure that we
2450 // continue to process new commands as writes complete at the media and
2451 // credits become available. This will also trigger a drain if needed
2452 if (!ctrl->requestEventScheduled()) {
2453 DPRINTF(NVM, "Restart controller scheduler immediately\n");
2454 ctrl->restartScheduler(curTick());
2455 }
2456 }
2457
2458 void
2459 NVMInterface::addRankToRankDelay(Tick cmd_at)
2460 {
2461 // update timing for NVM ranks due to bursts issued
2462 // to ranks for other media interfaces
2463 for (auto n : ranks) {
2464 for (int i = 0; i < banksPerRank; i++) {
2465 // different rank by default
2466 // Need to only account for rank-to-rank switching
2467 n->banks[i].rdAllowedAt = std::max(cmd_at + rankToRankDelay(),
2468 n->banks[i].rdAllowedAt);
2469 n->banks[i].wrAllowedAt = std::max(cmd_at + rankToRankDelay(),
2470 n->banks[i].wrAllowedAt);
2471 }
2472 }
2473 }
2474
2475 bool
2476 NVMInterface::isBusy(bool read_queue_empty, bool all_writes_nvm)
2477 {
2478 DPRINTF(NVM,"isBusy: numReadDataReady = %d\n", numReadDataReady);
2479 // Determine NVM is busy and cannot issue a burst
2480 // A read burst cannot issue when data is not ready from the NVM
2481 // Also check that we have reads queued to ensure we can change
2482 // bus direction to service potential write commands.
2483 // A write cannot issue once we've reached MAX pending writes
2484 // Only assert busy for the write case when there are also
2485 // no reads in Q and the write queue only contains NVM commands
2486 // This allows the bus state to switch and service reads
2487 return (ctrl->inReadBusState(true) ?
2488 (numReadDataReady == 0) && !read_queue_empty :
2489 writeRespQueueFull() && read_queue_empty &&
2490 all_writes_nvm);
2491 }
2492
2493
2494 NVMInterface::NVMStats::NVMStats(NVMInterface &_nvm)
2495 : Stats::Group(&_nvm),
2496 nvm(_nvm),
2497
2498 ADD_STAT(readBursts, UNIT_COUNT, "Number of NVM read bursts"),
2499 ADD_STAT(writeBursts, UNIT_COUNT, "Number of NVM write bursts"),
2500
2501 ADD_STAT(perBankRdBursts, UNIT_COUNT, "Per bank write bursts"),
2502 ADD_STAT(perBankWrBursts, UNIT_COUNT, "Per bank write bursts"),
2503
2504 ADD_STAT(totQLat, UNIT_TICK, "Total ticks spent queuing"),
2505 ADD_STAT(totBusLat, UNIT_TICK, "Total ticks spent in databus transfers"),
2506 ADD_STAT(totMemAccLat, UNIT_TICK,
2507 "Total ticks spent from burst creation until serviced "
2508 "by the NVM"),
2509 ADD_STAT(avgQLat, UNIT_RATE(Stats::Units::Tick, Stats::Units::Count),
2510 "Average queueing delay per NVM burst"),
2511 ADD_STAT(avgBusLat, UNIT_RATE(Stats::Units::Tick, Stats::Units::Count),
2512 "Average bus latency per NVM burst"),
2513 ADD_STAT(avgMemAccLat, UNIT_RATE(Stats::Units::Tick, Stats::Units::Count),
2514 "Average memory access latency per NVM burst"),
2515
2516 ADD_STAT(bytesRead, UNIT_BYTE, "Total number of bytes read from DRAM"),
2517 ADD_STAT(bytesWritten, UNIT_BYTE, "Total number of bytes written to DRAM"),
2518 ADD_STAT(avgRdBW, UNIT_RATE(Stats::Units::Byte, Stats::Units::Second),
2519 "Average DRAM read bandwidth in MiBytes/s"),
2520 ADD_STAT(avgWrBW, UNIT_RATE(Stats::Units::Byte, Stats::Units::Second),
2521 "Average DRAM write bandwidth in MiBytes/s"),
2522 ADD_STAT(peakBW, UNIT_RATE(Stats::Units::Byte, Stats::Units::Second),
2523 "Theoretical peak bandwidth in MiByte/s"),
2524 ADD_STAT(busUtil, UNIT_RATIO, "NVM Data bus utilization in percentage"),
2525 ADD_STAT(busUtilRead, UNIT_RATIO,
2526 "NVM Data bus read utilization in percentage"),
2527 ADD_STAT(busUtilWrite, UNIT_RATIO,
2528 "NVM Data bus write utilization in percentage"),
2529
2530 ADD_STAT(pendingReads, UNIT_COUNT,
2531 "Reads issued to NVM for which data has not been transferred"),
2532 ADD_STAT(bytesPerBank, UNIT_BYTE,
2533 "Bytes read within a bank before loading new bank")
2534
2535 {
2536 }
2537
2538 void
2539 NVMInterface::NVMStats::regStats()
2540 {
2541 using namespace Stats;
2542
2543 perBankRdBursts.init(nvm.ranksPerChannel == 0 ? 1 :
2544 nvm.banksPerRank * nvm.ranksPerChannel);
2545
2546 perBankWrBursts.init(nvm.ranksPerChannel == 0 ? 1 :
2547 nvm.banksPerRank * nvm.ranksPerChannel);
2548
2549 avgQLat.precision(2);
2550 avgBusLat.precision(2);
2551 avgMemAccLat.precision(2);
2552
2553 avgRdBW.precision(2);
2554 avgWrBW.precision(2);
2555 peakBW.precision(2);
2556
2557 busUtil.precision(2);
2558 busUtilRead.precision(2);
2559 busUtilWrite.precision(2);
2560
2561 pendingReads
2562 .init(nvm.maxPendingReads)
2563 .flags(nozero);
2564
2565 pendingWrites
2566 .init(nvm.maxPendingWrites)
2567 .flags(nozero);
2568
2569 bytesPerBank
2570 .init(nvm.rowBufferSize)
2571 .flags(nozero);
2572
2573 avgQLat = totQLat / readBursts;
2574 avgBusLat = totBusLat / readBursts;
2575 avgMemAccLat = totMemAccLat / readBursts;
2576
2577 avgRdBW = (bytesRead / 1000000) / simSeconds;
2578 avgWrBW = (bytesWritten / 1000000) / simSeconds;
2579 peakBW = (SimClock::Frequency / nvm.tBURST) *
2580 nvm.burstSize / 1000000;
2581
2582 busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
2583 busUtilRead = avgRdBW / peakBW * 100;
2584 busUtilWrite = avgWrBW / peakBW * 100;
2585 }