2 * Copyright (c) 2010-2020 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Copyright (c) 2013 Amin Farmahini-Farahani
15 * All rights reserved.
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include "mem/mem_interface.hh"
43 #include "base/bitfield.hh"
44 #include "base/cprintf.hh"
45 #include "base/trace.hh"
46 #include "debug/DRAM.hh"
47 #include "debug/DRAMPower.hh"
48 #include "debug/DRAMState.hh"
49 #include "debug/NVM.hh"
50 #include "sim/system.hh"
54 MemInterface::MemInterface(const MemInterfaceParams
&_p
)
56 addrMapping(_p
.addr_mapping
),
57 burstSize((_p
.devices_per_rank
* _p
.burst_length
*
58 _p
.device_bus_width
) / 8),
59 deviceSize(_p
.device_size
),
60 deviceRowBufferSize(_p
.device_rowbuffer_size
),
61 devicesPerRank(_p
.devices_per_rank
),
62 rowBufferSize(devicesPerRank
* deviceRowBufferSize
),
63 burstsPerRowBuffer(rowBufferSize
/ burstSize
),
64 burstsPerStripe(range
.interleaved() ?
65 range
.granularity() / burstSize
: 1),
66 ranksPerChannel(_p
.ranks_per_channel
),
67 banksPerRank(_p
.banks_per_rank
), rowsPerBank(0),
68 tCK(_p
.tCK
), tCS(_p
.tCS
), tBURST(_p
.tBURST
),
71 readBufferSize(_p
.read_buffer_size
),
72 writeBufferSize(_p
.write_buffer_size
)
76 MemInterface::setCtrl(MemCtrl
* _ctrl
, unsigned int command_window
)
79 maxCommandsPerWindow
= command_window
/ tCK
;
83 MemInterface::decodePacket(const PacketPtr pkt
, Addr pkt_addr
,
84 unsigned size
, bool is_read
, bool is_dram
)
86 // decode the address based on the address mapping scheme, with
87 // Ro, Ra, Co, Ba and Ch denoting row, rank, column, bank and
88 // channel, respectively
91 // use a 64-bit unsigned during the computations as the row is
92 // always the top bits, and check before creating the packet
95 // Get packed address, starting at 0
96 Addr addr
= getCtrlAddr(pkt_addr
);
98 // truncate the address to a memory burst, which makes it unique to
99 // a specific buffer, row, bank, rank and channel
100 addr
= addr
/ burstSize
;
102 // we have removed the lowest order address bits that denote the
103 // position within the column
104 if (addrMapping
== Enums::RoRaBaChCo
|| addrMapping
== Enums::RoRaBaCoCh
) {
105 // the lowest order bits denote the column to ensure that
106 // sequential cache lines occupy the same row
107 addr
= addr
/ burstsPerRowBuffer
;
109 // after the channel bits, get the bank bits to interleave
111 bank
= addr
% banksPerRank
;
112 addr
= addr
/ banksPerRank
;
114 // after the bank, we get the rank bits which thus interleaves
116 rank
= addr
% ranksPerChannel
;
117 addr
= addr
/ ranksPerChannel
;
119 // lastly, get the row bits, no need to remove them from addr
120 row
= addr
% rowsPerBank
;
121 } else if (addrMapping
== Enums::RoCoRaBaCh
) {
122 // with emerging technologies, could have small page size with
123 // interleaving granularity greater than row buffer
124 if (burstsPerStripe
> burstsPerRowBuffer
) {
125 // remove column bits which are a subset of burstsPerStripe
126 addr
= addr
/ burstsPerRowBuffer
;
128 // remove lower column bits below channel bits
129 addr
= addr
/ burstsPerStripe
;
132 // start with the bank bits, as this provides the maximum
133 // opportunity for parallelism between requests
134 bank
= addr
% banksPerRank
;
135 addr
= addr
/ banksPerRank
;
137 // next get the rank bits
138 rank
= addr
% ranksPerChannel
;
139 addr
= addr
/ ranksPerChannel
;
141 // next, the higher-order column bites
142 if (burstsPerStripe
< burstsPerRowBuffer
) {
143 addr
= addr
/ (burstsPerRowBuffer
/ burstsPerStripe
);
146 // lastly, get the row bits, no need to remove them from addr
147 row
= addr
% rowsPerBank
;
149 panic("Unknown address mapping policy chosen!");
151 assert(rank
< ranksPerChannel
);
152 assert(bank
< banksPerRank
);
153 assert(row
< rowsPerBank
);
154 assert(row
< Bank::NO_ROW
);
156 DPRINTF(DRAM
, "Address: %lld Rank %d Bank %d Row %d\n",
157 pkt_addr
, rank
, bank
, row
);
159 // create the corresponding memory packet with the entry time and
160 // ready time set to the current tick, the latter will be updated
162 uint16_t bank_id
= banksPerRank
* rank
+ bank
;
164 return new MemPacket(pkt
, is_read
, is_dram
, rank
, bank
, row
, bank_id
,
168 std::pair
<MemPacketQueue::iterator
, Tick
>
169 DRAMInterface::chooseNextFRFCFS(MemPacketQueue
& queue
, Tick min_col_at
) const
171 std::vector
<uint32_t> earliest_banks(ranksPerChannel
, 0);
173 // Has minBankPrep been called to populate earliest_banks?
174 bool filled_earliest_banks
= false;
175 // can the PRE/ACT sequence be done without impacting utlization?
176 bool hidden_bank_prep
= false;
178 // search for seamless row hits first, if no seamless row hit is
179 // found then determine if there are other packets that can be issued
180 // without incurring additional bus delay due to bank timing
181 // Will select closed rows first to enable more open row possibilies
182 // in future selections
183 bool found_hidden_bank
= false;
185 // remember if we found a row hit, not seamless, but bank prepped
187 bool found_prepped_pkt
= false;
189 // if we have no row hit, prepped or not, and no seamless packet,
190 // just go for the earliest possible
191 bool found_earliest_pkt
= false;
193 Tick selected_col_at
= MaxTick
;
194 auto selected_pkt_it
= queue
.end();
196 for (auto i
= queue
.begin(); i
!= queue
.end() ; ++i
) {
199 // select optimal DRAM packet in Q
201 const Bank
& bank
= ranks
[pkt
->rank
]->banks
[pkt
->bank
];
202 const Tick col_allowed_at
= pkt
->isRead() ? bank
.rdAllowedAt
:
205 DPRINTF(DRAM
, "%s checking DRAM packet in bank %d, row %d\n",
206 __func__
, pkt
->bank
, pkt
->row
);
208 // check if rank is not doing a refresh and thus is available,
209 // if not, jump to the next packet
210 if (burstReady(pkt
)) {
213 "%s bank %d - Rank %d available\n", __func__
,
214 pkt
->bank
, pkt
->rank
);
216 // check if it is a row hit
217 if (bank
.openRow
== pkt
->row
) {
218 // no additional rank-to-rank or same bank-group
219 // delays, or we switched read/write and might as well
220 // go for the row hit
221 if (col_allowed_at
<= min_col_at
) {
222 // FCFS within the hits, giving priority to
223 // commands that can issue seamlessly, without
224 // additional delay, such as same rank accesses
225 // and/or different bank-group accesses
226 DPRINTF(DRAM
, "%s Seamless buffer hit\n", __func__
);
228 selected_col_at
= col_allowed_at
;
229 // no need to look through the remaining queue entries
231 } else if (!found_hidden_bank
&& !found_prepped_pkt
) {
232 // if we did not find a packet to a closed row that can
233 // issue the bank commands without incurring delay, and
234 // did not yet find a packet to a prepped row, remember
237 selected_col_at
= col_allowed_at
;
238 found_prepped_pkt
= true;
239 DPRINTF(DRAM
, "%s Prepped row buffer hit\n", __func__
);
241 } else if (!found_earliest_pkt
) {
242 // if we have not initialised the bank status, do it
243 // now, and only once per scheduling decisions
244 if (!filled_earliest_banks
) {
245 // determine entries with earliest bank delay
246 std::tie(earliest_banks
, hidden_bank_prep
) =
247 minBankPrep(queue
, min_col_at
);
248 filled_earliest_banks
= true;
251 // bank is amongst first available banks
252 // minBankPrep will give priority to packets that can
254 if (bits(earliest_banks
[pkt
->rank
],
255 pkt
->bank
, pkt
->bank
)) {
256 found_earliest_pkt
= true;
257 found_hidden_bank
= hidden_bank_prep
;
259 // give priority to packets that can issue
260 // bank commands 'behind the scenes'
261 // any additional delay if any will be due to
262 // col-to-col command requirements
263 if (hidden_bank_prep
|| !found_prepped_pkt
) {
265 selected_col_at
= col_allowed_at
;
270 DPRINTF(DRAM
, "%s bank %d - Rank %d not available\n", __func__
,
271 pkt
->bank
, pkt
->rank
);
276 if (selected_pkt_it
== queue
.end()) {
277 DPRINTF(DRAM
, "%s no available DRAM ranks found\n", __func__
);
280 return std::make_pair(selected_pkt_it
, selected_col_at
);
284 DRAMInterface::activateBank(Rank
& rank_ref
, Bank
& bank_ref
,
285 Tick act_tick
, uint32_t row
)
287 assert(rank_ref
.actTicks
.size() == activationLimit
);
289 // verify that we have command bandwidth to issue the activate
290 // if not, shift to next burst window
292 if (twoCycleActivate
)
293 act_at
= ctrl
->verifyMultiCmd(act_tick
, maxCommandsPerWindow
, tAAD
);
295 act_at
= ctrl
->verifySingleCmd(act_tick
, maxCommandsPerWindow
);
297 DPRINTF(DRAM
, "Activate at tick %d\n", act_at
);
299 // update the open row
300 assert(bank_ref
.openRow
== Bank::NO_ROW
);
301 bank_ref
.openRow
= row
;
303 // start counting anew, this covers both the case when we
304 // auto-precharged, and when this access is forced to
306 bank_ref
.bytesAccessed
= 0;
307 bank_ref
.rowAccesses
= 0;
309 ++rank_ref
.numBanksActive
;
310 assert(rank_ref
.numBanksActive
<= banksPerRank
);
312 DPRINTF(DRAM
, "Activate bank %d, rank %d at tick %lld, now got "
313 "%d active\n", bank_ref
.bank
, rank_ref
.rank
, act_at
,
314 ranks
[rank_ref
.rank
]->numBanksActive
);
316 rank_ref
.cmdList
.push_back(Command(MemCommand::ACT
, bank_ref
.bank
,
319 DPRINTF(DRAMPower
, "%llu,ACT,%d,%d\n", divCeil(act_at
, tCK
) -
320 timeStampOffset
, bank_ref
.bank
, rank_ref
.rank
);
322 // The next access has to respect tRAS for this bank
323 bank_ref
.preAllowedAt
= act_at
+ tRAS
;
325 // Respect the row-to-column command delay for both read and write cmds
326 bank_ref
.rdAllowedAt
= std::max(act_at
+ tRCD
, bank_ref
.rdAllowedAt
);
327 bank_ref
.wrAllowedAt
= std::max(act_at
+ tRCD
, bank_ref
.wrAllowedAt
);
329 // start by enforcing tRRD
330 for (int i
= 0; i
< banksPerRank
; i
++) {
331 // next activate to any bank in this rank must not happen
333 if (bankGroupArch
&& (bank_ref
.bankgr
== rank_ref
.banks
[i
].bankgr
)) {
334 // bank group architecture requires longer delays between
335 // ACT commands within the same bank group. Use tRRD_L
337 rank_ref
.banks
[i
].actAllowedAt
= std::max(act_at
+ tRRD_L
,
338 rank_ref
.banks
[i
].actAllowedAt
);
340 // use shorter tRRD value when either
341 // 1) bank group architecture is not supportted
342 // 2) bank is in a different bank group
343 rank_ref
.banks
[i
].actAllowedAt
= std::max(act_at
+ tRRD
,
344 rank_ref
.banks
[i
].actAllowedAt
);
348 // next, we deal with tXAW, if the activation limit is disabled
349 // then we directly schedule an activate power event
350 if (!rank_ref
.actTicks
.empty()) {
352 if (rank_ref
.actTicks
.back() &&
353 (act_at
- rank_ref
.actTicks
.back()) < tXAW
) {
354 panic("Got %d activates in window %d (%llu - %llu) which "
355 "is smaller than %llu\n", activationLimit
, act_at
-
356 rank_ref
.actTicks
.back(), act_at
,
357 rank_ref
.actTicks
.back(), tXAW
);
360 // shift the times used for the book keeping, the last element
361 // (highest index) is the oldest one and hence the lowest value
362 rank_ref
.actTicks
.pop_back();
364 // record an new activation (in the future)
365 rank_ref
.actTicks
.push_front(act_at
);
367 // cannot activate more than X times in time window tXAW, push the
368 // next one (the X + 1'st activate) to be tXAW away from the
369 // oldest in our window of X
370 if (rank_ref
.actTicks
.back() &&
371 (act_at
- rank_ref
.actTicks
.back()) < tXAW
) {
372 DPRINTF(DRAM
, "Enforcing tXAW with X = %d, next activate "
373 "no earlier than %llu\n", activationLimit
,
374 rank_ref
.actTicks
.back() + tXAW
);
375 for (int j
= 0; j
< banksPerRank
; j
++)
376 // next activate must not happen before end of window
377 rank_ref
.banks
[j
].actAllowedAt
=
378 std::max(rank_ref
.actTicks
.back() + tXAW
,
379 rank_ref
.banks
[j
].actAllowedAt
);
383 // at the point when this activate takes place, make sure we
384 // transition to the active power state
385 if (!rank_ref
.activateEvent
.scheduled())
386 schedule(rank_ref
.activateEvent
, act_at
);
387 else if (rank_ref
.activateEvent
.when() > act_at
)
388 // move it sooner in time
389 reschedule(rank_ref
.activateEvent
, act_at
);
393 DRAMInterface::prechargeBank(Rank
& rank_ref
, Bank
& bank
, Tick pre_tick
,
394 bool auto_or_preall
, bool trace
)
396 // make sure the bank has an open row
397 assert(bank
.openRow
!= Bank::NO_ROW
);
399 // sample the bytes per activate here since we are closing
401 stats
.bytesPerActivate
.sample(bank
.bytesAccessed
);
403 bank
.openRow
= Bank::NO_ROW
;
405 Tick pre_at
= pre_tick
;
406 if (auto_or_preall
) {
407 // no precharge allowed before this one
408 bank
.preAllowedAt
= pre_at
;
410 // Issuing an explicit PRE command
411 // Verify that we have command bandwidth to issue the precharge
412 // if not, shift to next burst window
413 pre_at
= ctrl
->verifySingleCmd(pre_tick
, maxCommandsPerWindow
);
415 for (int i
= 0; i
< banksPerRank
; i
++) {
416 rank_ref
.banks
[i
].preAllowedAt
= std::max(pre_at
+ tPPD
,
417 rank_ref
.banks
[i
].preAllowedAt
);
421 Tick pre_done_at
= pre_at
+ tRP
;
423 bank
.actAllowedAt
= std::max(bank
.actAllowedAt
, pre_done_at
);
425 assert(rank_ref
.numBanksActive
!= 0);
426 --rank_ref
.numBanksActive
;
428 DPRINTF(DRAM
, "Precharging bank %d, rank %d at tick %lld, now got "
429 "%d active\n", bank
.bank
, rank_ref
.rank
, pre_at
,
430 rank_ref
.numBanksActive
);
434 rank_ref
.cmdList
.push_back(Command(MemCommand::PRE
, bank
.bank
,
436 DPRINTF(DRAMPower
, "%llu,PRE,%d,%d\n", divCeil(pre_at
, tCK
) -
437 timeStampOffset
, bank
.bank
, rank_ref
.rank
);
440 // if we look at the current number of active banks we might be
441 // tempted to think the DRAM is now idle, however this can be
442 // undone by an activate that is scheduled to happen before we
443 // would have reached the idle state, so schedule an event and
444 // rather check once we actually make it to the point in time when
445 // the (last) precharge takes place
446 if (!rank_ref
.prechargeEvent
.scheduled()) {
447 schedule(rank_ref
.prechargeEvent
, pre_done_at
);
448 // New event, increment count
449 ++rank_ref
.outstandingEvents
;
450 } else if (rank_ref
.prechargeEvent
.when() < pre_done_at
) {
451 reschedule(rank_ref
.prechargeEvent
, pre_done_at
);
455 std::pair
<Tick
, Tick
>
456 DRAMInterface::doBurstAccess(MemPacket
* mem_pkt
, Tick next_burst_at
,
457 const std::vector
<MemPacketQueue
>& queue
)
459 DPRINTF(DRAM
, "Timing access to addr %lld, rank/bank/row %d %d %d\n",
460 mem_pkt
->addr
, mem_pkt
->rank
, mem_pkt
->bank
, mem_pkt
->row
);
463 Rank
& rank_ref
= *ranks
[mem_pkt
->rank
];
465 assert(rank_ref
.inRefIdleState());
467 // are we in or transitioning to a low-power state and have not scheduled
469 // if so, wake up from power down to issue RD/WR burst
470 if (rank_ref
.inLowPowerState
) {
471 assert(rank_ref
.pwrState
!= PWR_SREF
);
472 rank_ref
.scheduleWakeUpEvent(tXP
);
476 Bank
& bank_ref
= rank_ref
.banks
[mem_pkt
->bank
];
478 // for the state we need to track if it is a row hit or not
481 // Determine the access latency and update the bank state
482 if (bank_ref
.openRow
== mem_pkt
->row
) {
487 // If there is a page open, precharge it.
488 if (bank_ref
.openRow
!= Bank::NO_ROW
) {
489 prechargeBank(rank_ref
, bank_ref
, std::max(bank_ref
.preAllowedAt
,
493 // next we need to account for the delay in activating the page
494 Tick act_tick
= std::max(bank_ref
.actAllowedAt
, curTick());
496 // Record the activation and deal with all the global timing
497 // constraints caused be a new activation (tRRD and tXAW)
498 activateBank(rank_ref
, bank_ref
, act_tick
, mem_pkt
->row
);
501 // respect any constraints on the command (e.g. tRCD or tCCD)
502 const Tick col_allowed_at
= mem_pkt
->isRead() ?
503 bank_ref
.rdAllowedAt
: bank_ref
.wrAllowedAt
;
505 // we need to wait until the bus is available before we can issue
506 // the command; need to ensure minimum bus delay requirement is met
507 Tick cmd_at
= std::max({col_allowed_at
, next_burst_at
, curTick()});
509 // verify that we have command bandwidth to issue the burst
510 // if not, shift to next burst window
511 if (dataClockSync
&& ((cmd_at
- rank_ref
.lastBurstTick
) > clkResyncDelay
))
512 cmd_at
= ctrl
->verifyMultiCmd(cmd_at
, maxCommandsPerWindow
, tCK
);
514 cmd_at
= ctrl
->verifySingleCmd(cmd_at
, maxCommandsPerWindow
);
516 // if we are interleaving bursts, ensure that
517 // 1) we don't double interleave on next burst issue
518 // 2) we are at an interleave boundary; if not, shift to next boundary
519 Tick burst_gap
= tBURST_MIN
;
520 if (burstInterleave
) {
521 if (cmd_at
== (rank_ref
.lastBurstTick
+ tBURST_MIN
)) {
522 // already interleaving, push next command to end of full burst
524 } else if (cmd_at
< (rank_ref
.lastBurstTick
+ tBURST
)) {
525 // not at an interleave boundary after bandwidth check
526 // Shift command to tBURST boundary to avoid data contention
527 // Command will remain in the same burst window given that
528 // tBURST is less than tBURST_MAX
529 cmd_at
= rank_ref
.lastBurstTick
+ tBURST
;
532 DPRINTF(DRAM
, "Schedule RD/WR burst at tick %d\n", cmd_at
);
534 // update the packet ready time
535 mem_pkt
->readyTime
= cmd_at
+ tCL
+ tBURST
;
537 rank_ref
.lastBurstTick
= cmd_at
;
539 // update the time for the next read/write burst for each
540 // bank (add a max with tCCD/tCCD_L/tCCD_L_WR here)
543 for (int j
= 0; j
< ranksPerChannel
; j
++) {
544 for (int i
= 0; i
< banksPerRank
; i
++) {
545 if (mem_pkt
->rank
== j
) {
547 (bank_ref
.bankgr
== ranks
[j
]->banks
[i
].bankgr
)) {
548 // bank group architecture requires longer delays between
549 // RD/WR burst commands to the same bank group.
550 // tCCD_L is default requirement for same BG timing
551 // tCCD_L_WR is required for write-to-write
552 // Need to also take bus turnaround delays into account
553 dly_to_rd_cmd
= mem_pkt
->isRead() ?
554 tCCD_L
: std::max(tCCD_L
, wrToRdDlySameBG
);
555 dly_to_wr_cmd
= mem_pkt
->isRead() ?
556 std::max(tCCD_L
, rdToWrDlySameBG
) :
559 // tBURST is default requirement for diff BG timing
560 // Need to also take bus turnaround delays into account
561 dly_to_rd_cmd
= mem_pkt
->isRead() ? burst_gap
:
563 dly_to_wr_cmd
= mem_pkt
->isRead() ? readToWriteDelay() :
567 // different rank is by default in a different bank group and
568 // doesn't require longer tCCD or additional RTW, WTR delays
569 // Need to account for rank-to-rank switching
570 dly_to_wr_cmd
= rankToRankDelay();
571 dly_to_rd_cmd
= rankToRankDelay();
573 ranks
[j
]->banks
[i
].rdAllowedAt
= std::max(cmd_at
+ dly_to_rd_cmd
,
574 ranks
[j
]->banks
[i
].rdAllowedAt
);
575 ranks
[j
]->banks
[i
].wrAllowedAt
= std::max(cmd_at
+ dly_to_wr_cmd
,
576 ranks
[j
]->banks
[i
].wrAllowedAt
);
580 // Save rank of current access
581 activeRank
= mem_pkt
->rank
;
583 // If this is a write, we also need to respect the write recovery
584 // time before a precharge, in the case of a read, respect the
585 // read to precharge constraint
586 bank_ref
.preAllowedAt
= std::max(bank_ref
.preAllowedAt
,
587 mem_pkt
->isRead() ? cmd_at
+ tRTP
:
588 mem_pkt
->readyTime
+ tWR
);
590 // increment the bytes accessed and the accesses per row
591 bank_ref
.bytesAccessed
+= burstSize
;
592 ++bank_ref
.rowAccesses
;
594 // if we reached the max, then issue with an auto-precharge
595 bool auto_precharge
= pageMgmt
== Enums::close
||
596 bank_ref
.rowAccesses
== maxAccessesPerRow
;
598 // if we did not hit the limit, we might still want to
600 if (!auto_precharge
&&
601 (pageMgmt
== Enums::open_adaptive
||
602 pageMgmt
== Enums::close_adaptive
)) {
603 // a twist on the open and close page policies:
604 // 1) open_adaptive page policy does not blindly keep the
605 // page open, but close it if there are no row hits, and there
606 // are bank conflicts in the queue
607 // 2) close_adaptive page policy does not blindly close the
608 // page, but closes it only if there are no row hits in the queue.
609 // In this case, only force an auto precharge when there
610 // are no same page hits in the queue
611 bool got_more_hits
= false;
612 bool got_bank_conflict
= false;
614 for (uint8_t i
= 0; i
< ctrl
->numPriorities(); ++i
) {
615 auto p
= queue
[i
].begin();
616 // keep on looking until we find a hit or reach the end of the
618 // 1) if a hit is found, then both open and close adaptive
619 // policies keep the page open
620 // 2) if no hit is found, got_bank_conflict is set to true if a
621 // bank conflict request is waiting in the queue
622 // 3) make sure we are not considering the packet that we are
623 // currently dealing with
624 while (!got_more_hits
&& p
!= queue
[i
].end()) {
625 if (mem_pkt
!= (*p
)) {
626 bool same_rank_bank
= (mem_pkt
->rank
== (*p
)->rank
) &&
627 (mem_pkt
->bank
== (*p
)->bank
);
629 bool same_row
= mem_pkt
->row
== (*p
)->row
;
630 got_more_hits
|= same_rank_bank
&& same_row
;
631 got_bank_conflict
|= same_rank_bank
&& !same_row
;
640 // auto pre-charge when either
641 // 1) open_adaptive policy, we have not got any more hits, and
642 // have a bank conflict
643 // 2) close_adaptive policy and we have not got any more hits
644 auto_precharge
= !got_more_hits
&&
645 (got_bank_conflict
|| pageMgmt
== Enums::close_adaptive
);
648 // DRAMPower trace command to be written
649 std::string mem_cmd
= mem_pkt
->isRead() ? "RD" : "WR";
651 // MemCommand required for DRAMPower library
652 MemCommand::cmds command
= (mem_cmd
== "RD") ? MemCommand::RD
:
655 rank_ref
.cmdList
.push_back(Command(command
, mem_pkt
->bank
, cmd_at
));
657 DPRINTF(DRAMPower
, "%llu,%s,%d,%d\n", divCeil(cmd_at
, tCK
) -
658 timeStampOffset
, mem_cmd
, mem_pkt
->bank
, mem_pkt
->rank
);
660 // if this access should use auto-precharge, then we are
661 // closing the row after the read/write burst
662 if (auto_precharge
) {
663 // if auto-precharge push a PRE command at the correct tick to the
664 // list used by DRAMPower library to calculate power
665 prechargeBank(rank_ref
, bank_ref
, std::max(curTick(),
666 bank_ref
.preAllowedAt
), true);
668 DPRINTF(DRAM
, "Auto-precharged bank: %d\n", mem_pkt
->bankId
);
671 // Update the stats and schedule the next request
672 if (mem_pkt
->isRead()) {
673 // Every respQueue which will generate an event, increment count
674 ++rank_ref
.outstandingEvents
;
679 stats
.bytesRead
+= burstSize
;
680 stats
.perBankRdBursts
[mem_pkt
->bankId
]++;
682 // Update latency stats
683 stats
.totMemAccLat
+= mem_pkt
->readyTime
- mem_pkt
->entryTime
;
684 stats
.totQLat
+= cmd_at
- mem_pkt
->entryTime
;
685 stats
.totBusLat
+= tBURST
;
687 // Schedule write done event to decrement event count
688 // after the readyTime has been reached
689 // Only schedule latest write event to minimize events
690 // required; only need to ensure that final event scheduled covers
691 // the time that writes are outstanding and bus is active
692 // to holdoff power-down entry events
693 if (!rank_ref
.writeDoneEvent
.scheduled()) {
694 schedule(rank_ref
.writeDoneEvent
, mem_pkt
->readyTime
);
695 // New event, increment count
696 ++rank_ref
.outstandingEvents
;
698 } else if (rank_ref
.writeDoneEvent
.when() < mem_pkt
->readyTime
) {
699 reschedule(rank_ref
.writeDoneEvent
, mem_pkt
->readyTime
);
701 // will remove write from queue when returned to parent function
702 // decrement count for DRAM rank
703 --rank_ref
.writeEntries
;
707 stats
.writeRowHits
++;
708 stats
.bytesWritten
+= burstSize
;
709 stats
.perBankWrBursts
[mem_pkt
->bankId
]++;
712 // Update bus state to reflect when previous command was issued
713 return std::make_pair(cmd_at
, cmd_at
+ burst_gap
);
717 DRAMInterface::addRankToRankDelay(Tick cmd_at
)
719 // update timing for DRAM ranks due to bursts issued
720 // to ranks on other media interfaces
721 for (auto n
: ranks
) {
722 for (int i
= 0; i
< banksPerRank
; i
++) {
723 // different rank by default
724 // Need to only account for rank-to-rank switching
725 n
->banks
[i
].rdAllowedAt
= std::max(cmd_at
+ rankToRankDelay(),
726 n
->banks
[i
].rdAllowedAt
);
727 n
->banks
[i
].wrAllowedAt
= std::max(cmd_at
+ rankToRankDelay(),
728 n
->banks
[i
].wrAllowedAt
);
733 DRAMInterface::DRAMInterface(const DRAMInterfaceParams
&_p
)
735 bankGroupsPerRank(_p
.bank_groups_per_rank
),
736 bankGroupArch(_p
.bank_groups_per_rank
> 0),
738 tBURST_MIN(_p
.tBURST_MIN
), tBURST_MAX(_p
.tBURST_MAX
),
739 tCCD_L_WR(_p
.tCCD_L_WR
), tCCD_L(_p
.tCCD_L
), tRCD(_p
.tRCD
),
740 tRP(_p
.tRP
), tRAS(_p
.tRAS
), tWR(_p
.tWR
), tRTP(_p
.tRTP
),
741 tRFC(_p
.tRFC
), tREFI(_p
.tREFI
), tRRD(_p
.tRRD
), tRRD_L(_p
.tRRD_L
),
742 tPPD(_p
.tPPD
), tAAD(_p
.tAAD
),
743 tXAW(_p
.tXAW
), tXP(_p
.tXP
), tXS(_p
.tXS
),
744 clkResyncDelay(tCL
+ _p
.tBURST_MAX
),
745 dataClockSync(_p
.data_clock_sync
),
746 burstInterleave(tBURST
!= tBURST_MIN
),
747 twoCycleActivate(_p
.two_cycle_activate
),
748 activationLimit(_p
.activation_limit
),
749 wrToRdDlySameBG(tCL
+ _p
.tBURST_MAX
+ _p
.tWTR_L
),
750 rdToWrDlySameBG(_p
.tRTW
+ _p
.tBURST_MAX
),
751 pageMgmt(_p
.page_policy
),
752 maxAccessesPerRow(_p
.max_accesses_per_row
),
753 timeStampOffset(0), activeRank(0),
754 enableDRAMPowerdown(_p
.enable_dram_powerdown
),
755 lastStatsResetTick(0),
758 DPRINTF(DRAM
, "Setting up DRAM Interface\n");
760 fatal_if(!isPowerOf2(burstSize
), "DRAM burst size %d is not allowed, "
761 "must be a power of two\n", burstSize
);
763 // sanity check the ranks since we rely on bit slicing for the
765 fatal_if(!isPowerOf2(ranksPerChannel
), "DRAM rank count of %d is "
766 "not allowed, must be a power of two\n", ranksPerChannel
);
768 for (int i
= 0; i
< ranksPerChannel
; i
++) {
769 DPRINTF(DRAM
, "Creating DRAM rank %d \n", i
);
770 Rank
* rank
= new Rank(_p
, i
, *this);
771 ranks
.push_back(rank
);
774 // determine the dram actual capacity from the DRAM config in Mbytes
775 uint64_t deviceCapacity
= deviceSize
/ (1024 * 1024) * devicesPerRank
*
778 uint64_t capacity
= ULL(1) << ceilLog2(AbstractMemory::size());
780 DPRINTF(DRAM
, "Memory capacity %lld (%lld) bytes\n", capacity
,
781 AbstractMemory::size());
783 // if actual DRAM size does not match memory capacity in system warn!
784 if (deviceCapacity
!= capacity
/ (1024 * 1024))
785 warn("DRAM device capacity (%d Mbytes) does not match the "
786 "address range assigned (%d Mbytes)\n", deviceCapacity
,
787 capacity
/ (1024 * 1024));
789 DPRINTF(DRAM
, "Row buffer size %d bytes with %d bursts per row buffer\n",
790 rowBufferSize
, burstsPerRowBuffer
);
792 rowsPerBank
= capacity
/ (rowBufferSize
* banksPerRank
* ranksPerChannel
);
794 // some basic sanity checks
795 if (tREFI
<= tRP
|| tREFI
<= tRFC
) {
796 fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n",
800 // basic bank group architecture checks ->
802 // must have at least one bank per bank group
803 if (bankGroupsPerRank
> banksPerRank
) {
804 fatal("banks per rank (%d) must be equal to or larger than "
805 "banks groups per rank (%d)\n",
806 banksPerRank
, bankGroupsPerRank
);
808 // must have same number of banks in each bank group
809 if ((banksPerRank
% bankGroupsPerRank
) != 0) {
810 fatal("Banks per rank (%d) must be evenly divisible by bank "
811 "groups per rank (%d) for equal banks per bank group\n",
812 banksPerRank
, bankGroupsPerRank
);
814 // tCCD_L should be greater than minimal, back-to-back burst delay
815 if (tCCD_L
<= tBURST
) {
816 fatal("tCCD_L (%d) should be larger than the minimum bus delay "
817 "(%d) when bank groups per rank (%d) is greater than 1\n",
818 tCCD_L
, tBURST
, bankGroupsPerRank
);
820 // tCCD_L_WR should be greater than minimal, back-to-back burst delay
821 if (tCCD_L_WR
<= tBURST
) {
822 fatal("tCCD_L_WR (%d) should be larger than the minimum bus delay "
823 " (%d) when bank groups per rank (%d) is greater than 1\n",
824 tCCD_L_WR
, tBURST
, bankGroupsPerRank
);
826 // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay
827 // some datasheets might specify it equal to tRRD
829 fatal("tRRD_L (%d) should be larger than tRRD (%d) when "
830 "bank groups per rank (%d) is greater than 1\n",
831 tRRD_L
, tRRD
, bankGroupsPerRank
);
837 DRAMInterface::init()
839 AbstractMemory::init();
841 // a bit of sanity checks on the interleaving, save it for here to
842 // ensure that the system pointer is initialised
843 if (range
.interleaved()) {
844 if (addrMapping
== Enums::RoRaBaChCo
) {
845 if (rowBufferSize
!= range
.granularity()) {
846 fatal("Channel interleaving of %s doesn't match RoRaBaChCo "
847 "address map\n", name());
849 } else if (addrMapping
== Enums::RoRaBaCoCh
||
850 addrMapping
== Enums::RoCoRaBaCh
) {
851 // for the interleavings with channel bits in the bottom,
852 // if the system uses a channel striping granularity that
853 // is larger than the DRAM burst size, then map the
854 // sequential accesses within a stripe to a number of
855 // columns in the DRAM, effectively placing some of the
856 // lower-order column bits as the least-significant bits
857 // of the address (above the ones denoting the burst size)
858 assert(burstsPerStripe
>= 1);
860 // channel striping has to be done at a granularity that
861 // is equal or larger to a cache line
862 if (system()->cacheLineSize() > range
.granularity()) {
863 fatal("Channel interleaving of %s must be at least as large "
864 "as the cache line size\n", name());
867 // ...and equal or smaller than the row-buffer size
868 if (rowBufferSize
< range
.granularity()) {
869 fatal("Channel interleaving of %s must be at most as large "
870 "as the row-buffer size\n", name());
872 // this is essentially the check above, so just to be sure
873 assert(burstsPerStripe
<= burstsPerRowBuffer
);
879 DRAMInterface::startup()
881 if (system()->isTimingMode()) {
882 // timestamp offset should be in clock cycles for DRAMPower
883 timeStampOffset
= divCeil(curTick(), tCK
);
885 for (auto r
: ranks
) {
886 r
->startup(curTick() + tREFI
- tRP
);
892 DRAMInterface::isBusy()
895 for (auto r
: ranks
) {
896 if (!r
->inRefIdleState()) {
897 if (r
->pwrState
!= PWR_SREF
) {
898 // rank is busy refreshing
899 DPRINTF(DRAMState
, "Rank %d is not available\n", r
->rank
);
902 // let the rank know that if it was waiting to drain, it
903 // is now done and ready to proceed
907 // check if we were in self-refresh and haven't started
909 if ((r
->pwrState
== PWR_SREF
) && r
->inLowPowerState
) {
910 DPRINTF(DRAMState
, "Rank %d is in self-refresh\n", r
->rank
);
911 // if we have commands queued to this rank and we don't have
912 // a minimum number of active commands enqueued,
914 if (r
->forceSelfRefreshExit()) {
915 DPRINTF(DRAMState
, "rank %d was in self refresh and"
916 " should wake up\n", r
->rank
);
917 //wake up from self-refresh
918 r
->scheduleWakeUpEvent(tXS
);
919 // things are brought back into action once a refresh is
920 // performed after self-refresh
921 // continue with selection for other ranks
926 return (busy_ranks
== ranksPerChannel
);
929 void DRAMInterface::setupRank(const uint8_t rank
, const bool is_read
)
931 // increment entry count of the rank based on packet type
933 ++ranks
[rank
]->readEntries
;
935 ++ranks
[rank
]->writeEntries
;
940 DRAMInterface::respondEvent(uint8_t rank
)
942 Rank
& rank_ref
= *ranks
[rank
];
944 // if a read has reached its ready-time, decrement the number of reads
945 // At this point the packet has been handled and there is a possibility
946 // to switch to low-power mode if no other packet is available
947 --rank_ref
.readEntries
;
948 DPRINTF(DRAM
, "number of read entries for rank %d is %d\n",
949 rank
, rank_ref
.readEntries
);
951 // counter should at least indicate one outstanding request
953 assert(rank_ref
.outstandingEvents
> 0);
954 // read response received, decrement count
955 --rank_ref
.outstandingEvents
;
957 // at this moment should not have transitioned to a low-power state
958 assert((rank_ref
.pwrState
!= PWR_SREF
) &&
959 (rank_ref
.pwrState
!= PWR_PRE_PDN
) &&
960 (rank_ref
.pwrState
!= PWR_ACT_PDN
));
962 // track if this is the last packet before idling
963 // and that there are no outstanding commands to this rank
964 if (rank_ref
.isQueueEmpty() && rank_ref
.outstandingEvents
== 0 &&
965 rank_ref
.inRefIdleState() && enableDRAMPowerdown
) {
966 // verify that there are no events scheduled
967 assert(!rank_ref
.activateEvent
.scheduled());
968 assert(!rank_ref
.prechargeEvent
.scheduled());
970 // if coming from active state, schedule power event to
971 // active power-down else go to precharge power-down
972 DPRINTF(DRAMState
, "Rank %d sleep at tick %d; current power state is "
973 "%d\n", rank
, curTick(), rank_ref
.pwrState
);
975 // default to ACT power-down unless already in IDLE state
976 // could be in IDLE if PRE issued before data returned
977 PowerState next_pwr_state
= PWR_ACT_PDN
;
978 if (rank_ref
.pwrState
== PWR_IDLE
) {
979 next_pwr_state
= PWR_PRE_PDN
;
982 rank_ref
.powerDownSleep(next_pwr_state
, curTick());
987 DRAMInterface::checkRefreshState(uint8_t rank
)
989 Rank
& rank_ref
= *ranks
[rank
];
991 if ((rank_ref
.refreshState
== REF_PRE
) &&
992 !rank_ref
.prechargeEvent
.scheduled()) {
993 // kick the refresh event loop into action again if banks already
994 // closed and just waiting for read to complete
995 schedule(rank_ref
.refreshEvent
, curTick());
1000 DRAMInterface::drainRanks()
1002 // also need to kick off events to exit self-refresh
1003 for (auto r
: ranks
) {
1004 // force self-refresh exit, which in turn will issue auto-refresh
1005 if (r
->pwrState
== PWR_SREF
) {
1006 DPRINTF(DRAM
,"Rank%d: Forcing self-refresh wakeup in drain\n",
1008 r
->scheduleWakeUpEvent(tXS
);
1014 DRAMInterface::allRanksDrained() const
1016 // true until proven false
1017 bool all_ranks_drained
= true;
1018 for (auto r
: ranks
) {
1019 // then verify that the power state is IDLE ensuring all banks are
1020 // closed and rank is not in a low power state. Also verify that rank
1021 // is idle from a refresh point of view.
1022 all_ranks_drained
= r
->inPwrIdleState() && r
->inRefIdleState() &&
1025 return all_ranks_drained
;
1029 DRAMInterface::suspend()
1031 for (auto r
: ranks
) {
1036 std::pair
<std::vector
<uint32_t>, bool>
1037 DRAMInterface::minBankPrep(const MemPacketQueue
& queue
,
1038 Tick min_col_at
) const
1040 Tick min_act_at
= MaxTick
;
1041 std::vector
<uint32_t> bank_mask(ranksPerChannel
, 0);
1043 // latest Tick for which ACT can occur without incurring additoinal
1044 // delay on the data bus
1045 const Tick hidden_act_max
= std::max(min_col_at
- tRCD
, curTick());
1047 // Flag condition when burst can issue back-to-back with previous burst
1048 bool found_seamless_bank
= false;
1050 // Flag condition when bank can be opened without incurring additional
1051 // delay on the data bus
1052 bool hidden_bank_prep
= false;
1054 // determine if we have queued transactions targetting the
1056 std::vector
<bool> got_waiting(ranksPerChannel
* banksPerRank
, false);
1057 for (const auto& p
: queue
) {
1058 if (p
->isDram() && ranks
[p
->rank
]->inRefIdleState())
1059 got_waiting
[p
->bankId
] = true;
1062 // Find command with optimal bank timing
1063 // Will prioritize commands that can issue seamlessly.
1064 for (int i
= 0; i
< ranksPerChannel
; i
++) {
1065 for (int j
= 0; j
< banksPerRank
; j
++) {
1066 uint16_t bank_id
= i
* banksPerRank
+ j
;
1068 // if we have waiting requests for the bank, and it is
1069 // amongst the first available, update the mask
1070 if (got_waiting
[bank_id
]) {
1071 // make sure this rank is not currently refreshing.
1072 assert(ranks
[i
]->inRefIdleState());
1073 // simplistic approximation of when the bank can issue
1074 // an activate, ignoring any rank-to-rank switching
1075 // cost in this calculation
1076 Tick act_at
= ranks
[i
]->banks
[j
].openRow
== Bank::NO_ROW
?
1077 std::max(ranks
[i
]->banks
[j
].actAllowedAt
, curTick()) :
1078 std::max(ranks
[i
]->banks
[j
].preAllowedAt
, curTick()) + tRP
;
1080 // When is the earliest the R/W burst can issue?
1081 const Tick col_allowed_at
= ctrl
->inReadBusState(false) ?
1082 ranks
[i
]->banks
[j
].rdAllowedAt
:
1083 ranks
[i
]->banks
[j
].wrAllowedAt
;
1084 Tick col_at
= std::max(col_allowed_at
, act_at
+ tRCD
);
1086 // bank can issue burst back-to-back (seamlessly) with
1088 bool new_seamless_bank
= col_at
<= min_col_at
;
1090 // if we found a new seamless bank or we have no
1091 // seamless banks, and got a bank with an earlier
1092 // activate time, it should be added to the bit mask
1093 if (new_seamless_bank
||
1094 (!found_seamless_bank
&& act_at
<= min_act_at
)) {
1095 // if we did not have a seamless bank before, and
1096 // we do now, reset the bank mask, also reset it
1097 // if we have not yet found a seamless bank and
1098 // the activate time is smaller than what we have
1100 if (!found_seamless_bank
&&
1101 (new_seamless_bank
|| act_at
< min_act_at
)) {
1102 std::fill(bank_mask
.begin(), bank_mask
.end(), 0);
1105 found_seamless_bank
|= new_seamless_bank
;
1107 // ACT can occur 'behind the scenes'
1108 hidden_bank_prep
= act_at
<= hidden_act_max
;
1110 // set the bit corresponding to the available bank
1111 replaceBits(bank_mask
[i
], j
, j
, 1);
1112 min_act_at
= act_at
;
1118 return std::make_pair(bank_mask
, hidden_bank_prep
);
1121 DRAMInterface::Rank::Rank(const DRAMInterfaceParams
&_p
,
1122 int _rank
, DRAMInterface
& _dram
)
1123 : EventManager(&_dram
), dram(_dram
),
1124 pwrStateTrans(PWR_IDLE
), pwrStatePostRefresh(PWR_IDLE
),
1125 pwrStateTick(0), refreshDueAt(0), pwrState(PWR_IDLE
),
1126 refreshState(REF_IDLE
), inLowPowerState(false), rank(_rank
),
1127 readEntries(0), writeEntries(0), outstandingEvents(0),
1128 wakeUpAllowedAt(0), power(_p
, false), banks(_p
.banks_per_rank
),
1129 numBanksActive(0), actTicks(_p
.activation_limit
, 0), lastBurstTick(0),
1130 writeDoneEvent([this]{ processWriteDoneEvent(); }, name()),
1131 activateEvent([this]{ processActivateEvent(); }, name()),
1132 prechargeEvent([this]{ processPrechargeEvent(); }, name()),
1133 refreshEvent([this]{ processRefreshEvent(); }, name()),
1134 powerEvent([this]{ processPowerEvent(); }, name()),
1135 wakeUpEvent([this]{ processWakeUpEvent(); }, name()),
1138 for (int b
= 0; b
< _p
.banks_per_rank
; b
++) {
1140 // GDDR addressing of banks to BG is linear.
1141 // Here we assume that all DRAM generations address bank groups as
1143 if (_p
.bank_groups_per_rank
> 0) {
1144 // Simply assign lower bits to bank group in order to
1145 // rotate across bank groups as banks are incremented
1146 // e.g. with 4 banks per bank group and 16 banks total:
1147 // banks 0,4,8,12 are in bank group 0
1148 // banks 1,5,9,13 are in bank group 1
1149 // banks 2,6,10,14 are in bank group 2
1150 // banks 3,7,11,15 are in bank group 3
1151 banks
[b
].bankgr
= b
% _p
.bank_groups_per_rank
;
1153 // No bank groups; simply assign to bank number
1154 banks
[b
].bankgr
= b
;
1160 DRAMInterface::Rank::startup(Tick ref_tick
)
1162 assert(ref_tick
> curTick());
1164 pwrStateTick
= curTick();
1166 // kick off the refresh, and give ourselves enough time to
1168 schedule(refreshEvent
, ref_tick
);
1172 DRAMInterface::Rank::suspend()
1174 deschedule(refreshEvent
);
1179 // don't automatically transition back to LP state after next REF
1180 pwrStatePostRefresh
= PWR_IDLE
;
1184 DRAMInterface::Rank::isQueueEmpty() const
1186 // check commmands in Q based on current bus direction
1187 bool no_queued_cmds
= (dram
.ctrl
->inReadBusState(true) &&
1189 || (dram
.ctrl
->inWriteBusState(true) &&
1190 (writeEntries
== 0));
1191 return no_queued_cmds
;
1195 DRAMInterface::Rank::checkDrainDone()
1197 // if this rank was waiting to drain it is now able to proceed to
1199 if (refreshState
== REF_DRAIN
) {
1200 DPRINTF(DRAM
, "Refresh drain done, now precharging\n");
1202 refreshState
= REF_PD_EXIT
;
1204 // hand control back to the refresh event loop
1205 schedule(refreshEvent
, curTick());
1210 DRAMInterface::Rank::flushCmdList()
1212 // at the moment sort the list of commands and update the counters
1213 // for DRAMPower libray when doing a refresh
1214 sort(cmdList
.begin(), cmdList
.end(), DRAMInterface::sortTime
);
1216 auto next_iter
= cmdList
.begin();
1217 // push to commands to DRAMPower
1218 for ( ; next_iter
!= cmdList
.end() ; ++next_iter
) {
1219 Command cmd
= *next_iter
;
1220 if (cmd
.timeStamp
<= curTick()) {
1221 // Move all commands at or before curTick to DRAMPower
1222 power
.powerlib
.doCommand(cmd
.type
, cmd
.bank
,
1223 divCeil(cmd
.timeStamp
, dram
.tCK
) -
1224 dram
.timeStampOffset
);
1226 // done - found all commands at or before curTick()
1227 // next_iter references the 1st command after curTick
1231 // reset cmdList to only contain commands after curTick
1232 // if there are no commands after curTick, updated cmdList will be empty
1233 // in this case, next_iter is cmdList.end()
1234 cmdList
.assign(next_iter
, cmdList
.end());
1238 DRAMInterface::Rank::processActivateEvent()
1240 // we should transition to the active state as soon as any bank is active
1241 if (pwrState
!= PWR_ACT
)
1242 // note that at this point numBanksActive could be back at
1243 // zero again due to a precharge scheduled in the future
1244 schedulePowerEvent(PWR_ACT
, curTick());
1248 DRAMInterface::Rank::processPrechargeEvent()
1250 // counter should at least indicate one outstanding request
1251 // for this precharge
1252 assert(outstandingEvents
> 0);
1253 // precharge complete, decrement count
1254 --outstandingEvents
;
1256 // if we reached zero, then special conditions apply as we track
1257 // if all banks are precharged for the power models
1258 if (numBanksActive
== 0) {
1259 // no reads to this rank in the Q and no pending
1260 // RD/WR or refresh commands
1261 if (isQueueEmpty() && outstandingEvents
== 0 &&
1262 dram
.enableDRAMPowerdown
) {
1263 // should still be in ACT state since bank still open
1264 assert(pwrState
== PWR_ACT
);
1266 // All banks closed - switch to precharge power down state.
1267 DPRINTF(DRAMState
, "Rank %d sleep at tick %d\n",
1269 powerDownSleep(PWR_PRE_PDN
, curTick());
1271 // we should transition to the idle state when the last bank
1273 schedulePowerEvent(PWR_IDLE
, curTick());
1279 DRAMInterface::Rank::processWriteDoneEvent()
1281 // counter should at least indicate one outstanding request
1283 assert(outstandingEvents
> 0);
1284 // Write transfer on bus has completed
1285 // decrement per rank counter
1286 --outstandingEvents
;
1290 DRAMInterface::Rank::processRefreshEvent()
1292 // when first preparing the refresh, remember when it was due
1293 if ((refreshState
== REF_IDLE
) || (refreshState
== REF_SREF_EXIT
)) {
1294 // remember when the refresh is due
1295 refreshDueAt
= curTick();
1298 refreshState
= REF_DRAIN
;
1300 // make nonzero while refresh is pending to ensure
1301 // power down and self-refresh are not entered
1302 ++outstandingEvents
;
1304 DPRINTF(DRAM
, "Refresh due\n");
1307 // let any scheduled read or write to the same rank go ahead,
1308 // after which it will
1309 // hand control back to this event loop
1310 if (refreshState
== REF_DRAIN
) {
1311 // if a request is at the moment being handled and this request is
1312 // accessing the current rank then wait for it to finish
1313 if ((rank
== dram
.activeRank
)
1314 && (dram
.ctrl
->requestEventScheduled())) {
1315 // hand control over to the request loop until it is
1317 DPRINTF(DRAM
, "Refresh awaiting draining\n");
1321 refreshState
= REF_PD_EXIT
;
1325 // at this point, ensure that rank is not in a power-down state
1326 if (refreshState
== REF_PD_EXIT
) {
1327 // if rank was sleeping and we have't started exit process,
1328 // wake-up for refresh
1329 if (inLowPowerState
) {
1330 DPRINTF(DRAM
, "Wake Up for refresh\n");
1331 // save state and return after refresh completes
1332 scheduleWakeUpEvent(dram
.tXP
);
1335 refreshState
= REF_PRE
;
1339 // at this point, ensure that all banks are precharged
1340 if (refreshState
== REF_PRE
) {
1341 // precharge any active bank
1342 if (numBanksActive
!= 0) {
1343 // at the moment, we use a precharge all even if there is
1344 // only a single bank open
1345 DPRINTF(DRAM
, "Precharging all\n");
1347 // first determine when we can precharge
1348 Tick pre_at
= curTick();
1350 for (auto &b
: banks
) {
1351 // respect both causality and any existing bank
1352 // constraints, some banks could already have a
1353 // (auto) precharge scheduled
1354 pre_at
= std::max(b
.preAllowedAt
, pre_at
);
1357 // make sure all banks per rank are precharged, and for those that
1358 // already are, update their availability
1359 Tick act_allowed_at
= pre_at
+ dram
.tRP
;
1361 for (auto &b
: banks
) {
1362 if (b
.openRow
!= Bank::NO_ROW
) {
1363 dram
.prechargeBank(*this, b
, pre_at
, true, false);
1365 b
.actAllowedAt
= std::max(b
.actAllowedAt
, act_allowed_at
);
1366 b
.preAllowedAt
= std::max(b
.preAllowedAt
, pre_at
);
1370 // precharge all banks in rank
1371 cmdList
.push_back(Command(MemCommand::PREA
, 0, pre_at
));
1373 DPRINTF(DRAMPower
, "%llu,PREA,0,%d\n",
1374 divCeil(pre_at
, dram
.tCK
) -
1375 dram
.timeStampOffset
, rank
);
1376 } else if ((pwrState
== PWR_IDLE
) && (outstandingEvents
== 1)) {
1377 // Banks are closed, have transitioned to IDLE state, and
1378 // no outstanding ACT,RD/WR,Auto-PRE sequence scheduled
1379 DPRINTF(DRAM
, "All banks already precharged, starting refresh\n");
1381 // go ahead and kick the power state machine into gear since
1382 // we are already idle
1383 schedulePowerEvent(PWR_REF
, curTick());
1385 // banks state is closed but haven't transitioned pwrState to IDLE
1386 // or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled
1387 // should have outstanding precharge or read response event
1388 assert(prechargeEvent
.scheduled() ||
1389 dram
.ctrl
->respondEventScheduled());
1390 // will start refresh when pwrState transitions to IDLE
1393 assert(numBanksActive
== 0);
1395 // wait for all banks to be precharged or read to complete
1396 // When precharge commands are done, power state machine will
1397 // transition to the idle state, and automatically move to a
1398 // refresh, at that point it will also call this method to get
1399 // the refresh event loop going again
1400 // Similarly, when read response completes, if all banks are
1401 // precharged, will call this method to get loop re-started
1405 // last but not least we perform the actual refresh
1406 if (refreshState
== REF_START
) {
1407 // should never get here with any banks active
1408 assert(numBanksActive
== 0);
1409 assert(pwrState
== PWR_REF
);
1411 Tick ref_done_at
= curTick() + dram
.tRFC
;
1413 for (auto &b
: banks
) {
1414 b
.actAllowedAt
= ref_done_at
;
1417 // at the moment this affects all ranks
1418 cmdList
.push_back(Command(MemCommand::REF
, 0, curTick()));
1423 DPRINTF(DRAMPower
, "%llu,REF,0,%d\n", divCeil(curTick(), dram
.tCK
) -
1424 dram
.timeStampOffset
, rank
);
1426 // Update for next refresh
1427 refreshDueAt
+= dram
.tREFI
;
1429 // make sure we did not wait so long that we cannot make up
1431 if (refreshDueAt
< ref_done_at
) {
1432 fatal("Refresh was delayed so long we cannot catch up\n");
1435 // Run the refresh and schedule event to transition power states
1436 // when refresh completes
1437 refreshState
= REF_RUN
;
1438 schedule(refreshEvent
, ref_done_at
);
1442 if (refreshState
== REF_RUN
) {
1443 // should never get here with any banks active
1444 assert(numBanksActive
== 0);
1445 assert(pwrState
== PWR_REF
);
1447 assert(!powerEvent
.scheduled());
1449 if ((dram
.ctrl
->drainState() == DrainState::Draining
) ||
1450 (dram
.ctrl
->drainState() == DrainState::Drained
)) {
1451 // if draining, do not re-enter low-power mode.
1452 // simply go to IDLE and wait
1453 schedulePowerEvent(PWR_IDLE
, curTick());
1455 // At the moment, we sleep when the refresh ends and wait to be
1456 // woken up again if previously in a low-power state.
1457 if (pwrStatePostRefresh
!= PWR_IDLE
) {
1458 // power State should be power Refresh
1459 assert(pwrState
== PWR_REF
);
1460 DPRINTF(DRAMState
, "Rank %d sleeping after refresh and was in "
1461 "power state %d before refreshing\n", rank
,
1462 pwrStatePostRefresh
);
1463 powerDownSleep(pwrState
, curTick());
1465 // Force PRE power-down if there are no outstanding commands
1466 // in Q after refresh.
1467 } else if (isQueueEmpty() && dram
.enableDRAMPowerdown
) {
1468 // still have refresh event outstanding but there should
1469 // be no other events outstanding
1470 assert(outstandingEvents
== 1);
1471 DPRINTF(DRAMState
, "Rank %d sleeping after refresh but was NOT"
1472 " in a low power state before refreshing\n", rank
);
1473 powerDownSleep(PWR_PRE_PDN
, curTick());
1476 // move to the idle power state once the refresh is done, this
1477 // will also move the refresh state machine to the refresh
1479 schedulePowerEvent(PWR_IDLE
, curTick());
1483 // At this point, we have completed the current refresh.
1484 // In the SREF bypass case, we do not get to this state in the
1485 // refresh STM and therefore can always schedule next event.
1486 // Compensate for the delay in actually performing the refresh
1487 // when scheduling the next one
1488 schedule(refreshEvent
, refreshDueAt
- dram
.tRP
);
1490 DPRINTF(DRAMState
, "Refresh done at %llu and next refresh"
1491 " at %llu\n", curTick(), refreshDueAt
);
1496 DRAMInterface::Rank::schedulePowerEvent(PowerState pwr_state
, Tick tick
)
1498 // respect causality
1499 assert(tick
>= curTick());
1501 if (!powerEvent
.scheduled()) {
1502 DPRINTF(DRAMState
, "Scheduling power event at %llu to state %d\n",
1505 // insert the new transition
1506 pwrStateTrans
= pwr_state
;
1508 schedule(powerEvent
, tick
);
1510 panic("Scheduled power event at %llu to state %d, "
1511 "with scheduled event at %llu to %d\n", tick
, pwr_state
,
1512 powerEvent
.when(), pwrStateTrans
);
1517 DRAMInterface::Rank::powerDownSleep(PowerState pwr_state
, Tick tick
)
1519 // if low power state is active low, schedule to active low power state.
1520 // in reality tCKE is needed to enter active low power. This is neglected
1521 // here and could be added in the future.
1522 if (pwr_state
== PWR_ACT_PDN
) {
1523 schedulePowerEvent(pwr_state
, tick
);
1524 // push command to DRAMPower
1525 cmdList
.push_back(Command(MemCommand::PDN_F_ACT
, 0, tick
));
1526 DPRINTF(DRAMPower
, "%llu,PDN_F_ACT,0,%d\n", divCeil(tick
,
1527 dram
.tCK
) - dram
.timeStampOffset
, rank
);
1528 } else if (pwr_state
== PWR_PRE_PDN
) {
1529 // if low power state is precharge low, schedule to precharge low
1530 // power state. In reality tCKE is needed to enter active low power.
1531 // This is neglected here.
1532 schedulePowerEvent(pwr_state
, tick
);
1533 //push Command to DRAMPower
1534 cmdList
.push_back(Command(MemCommand::PDN_F_PRE
, 0, tick
));
1535 DPRINTF(DRAMPower
, "%llu,PDN_F_PRE,0,%d\n", divCeil(tick
,
1536 dram
.tCK
) - dram
.timeStampOffset
, rank
);
1537 } else if (pwr_state
== PWR_REF
) {
1538 // if a refresh just occurred
1539 // transition to PRE_PDN now that all banks are closed
1540 // precharge power down requires tCKE to enter. For simplicity
1541 // this is not considered.
1542 schedulePowerEvent(PWR_PRE_PDN
, tick
);
1543 //push Command to DRAMPower
1544 cmdList
.push_back(Command(MemCommand::PDN_F_PRE
, 0, tick
));
1545 DPRINTF(DRAMPower
, "%llu,PDN_F_PRE,0,%d\n", divCeil(tick
,
1546 dram
.tCK
) - dram
.timeStampOffset
, rank
);
1547 } else if (pwr_state
== PWR_SREF
) {
1548 // should only enter SREF after PRE-PD wakeup to do a refresh
1549 assert(pwrStatePostRefresh
== PWR_PRE_PDN
);
1550 // self refresh requires time tCKESR to enter. For simplicity,
1551 // this is not considered.
1552 schedulePowerEvent(PWR_SREF
, tick
);
1553 // push Command to DRAMPower
1554 cmdList
.push_back(Command(MemCommand::SREN
, 0, tick
));
1555 DPRINTF(DRAMPower
, "%llu,SREN,0,%d\n", divCeil(tick
,
1556 dram
.tCK
) - dram
.timeStampOffset
, rank
);
1558 // Ensure that we don't power-down and back up in same tick
1559 // Once we commit to PD entry, do it and wait for at least 1tCK
1560 // This could be replaced with tCKE if/when that is added to the model
1561 wakeUpAllowedAt
= tick
+ dram
.tCK
;
1563 // Transitioning to a low power state, set flag
1564 inLowPowerState
= true;
1568 DRAMInterface::Rank::scheduleWakeUpEvent(Tick exit_delay
)
1570 Tick wake_up_tick
= std::max(curTick(), wakeUpAllowedAt
);
1572 DPRINTF(DRAMState
, "Scheduling wake-up for rank %d at tick %d\n",
1573 rank
, wake_up_tick
);
1575 // if waking for refresh, hold previous state
1576 // else reset state back to IDLE
1577 if (refreshState
== REF_PD_EXIT
) {
1578 pwrStatePostRefresh
= pwrState
;
1580 // don't automatically transition back to LP state after next REF
1581 pwrStatePostRefresh
= PWR_IDLE
;
1584 // schedule wake-up with event to ensure entry has completed before
1585 // we try to wake-up
1586 schedule(wakeUpEvent
, wake_up_tick
);
1588 for (auto &b
: banks
) {
1589 // respect both causality and any existing bank
1590 // constraints, some banks could already have a
1591 // (auto) precharge scheduled
1592 b
.wrAllowedAt
= std::max(wake_up_tick
+ exit_delay
, b
.wrAllowedAt
);
1593 b
.rdAllowedAt
= std::max(wake_up_tick
+ exit_delay
, b
.rdAllowedAt
);
1594 b
.preAllowedAt
= std::max(wake_up_tick
+ exit_delay
, b
.preAllowedAt
);
1595 b
.actAllowedAt
= std::max(wake_up_tick
+ exit_delay
, b
.actAllowedAt
);
1597 // Transitioning out of low power state, clear flag
1598 inLowPowerState
= false;
1600 // push to DRAMPower
1601 // use pwrStateTrans for cases where we have a power event scheduled
1602 // to enter low power that has not yet been processed
1603 if (pwrStateTrans
== PWR_ACT_PDN
) {
1604 cmdList
.push_back(Command(MemCommand::PUP_ACT
, 0, wake_up_tick
));
1605 DPRINTF(DRAMPower
, "%llu,PUP_ACT,0,%d\n", divCeil(wake_up_tick
,
1606 dram
.tCK
) - dram
.timeStampOffset
, rank
);
1608 } else if (pwrStateTrans
== PWR_PRE_PDN
) {
1609 cmdList
.push_back(Command(MemCommand::PUP_PRE
, 0, wake_up_tick
));
1610 DPRINTF(DRAMPower
, "%llu,PUP_PRE,0,%d\n", divCeil(wake_up_tick
,
1611 dram
.tCK
) - dram
.timeStampOffset
, rank
);
1612 } else if (pwrStateTrans
== PWR_SREF
) {
1613 cmdList
.push_back(Command(MemCommand::SREX
, 0, wake_up_tick
));
1614 DPRINTF(DRAMPower
, "%llu,SREX,0,%d\n", divCeil(wake_up_tick
,
1615 dram
.tCK
) - dram
.timeStampOffset
, rank
);
1620 DRAMInterface::Rank::processWakeUpEvent()
1622 // Should be in a power-down or self-refresh state
1623 assert((pwrState
== PWR_ACT_PDN
) || (pwrState
== PWR_PRE_PDN
) ||
1624 (pwrState
== PWR_SREF
));
1626 // Check current state to determine transition state
1627 if (pwrState
== PWR_ACT_PDN
) {
1628 // banks still open, transition to PWR_ACT
1629 schedulePowerEvent(PWR_ACT
, curTick());
1631 // transitioning from a precharge power-down or self-refresh state
1632 // banks are closed - transition to PWR_IDLE
1633 schedulePowerEvent(PWR_IDLE
, curTick());
1638 DRAMInterface::Rank::processPowerEvent()
1640 assert(curTick() >= pwrStateTick
);
1641 // remember where we were, and for how long
1642 Tick duration
= curTick() - pwrStateTick
;
1643 PowerState prev_state
= pwrState
;
1645 // update the accounting
1646 stats
.pwrStateTime
[prev_state
] += duration
;
1648 // track to total idle time
1649 if ((prev_state
== PWR_PRE_PDN
) || (prev_state
== PWR_ACT_PDN
) ||
1650 (prev_state
== PWR_SREF
)) {
1651 stats
.totalIdleTime
+= duration
;
1654 pwrState
= pwrStateTrans
;
1655 pwrStateTick
= curTick();
1657 // if rank was refreshing, make sure to start scheduling requests again
1658 if (prev_state
== PWR_REF
) {
1659 // bus IDLED prior to REF
1660 // counter should be one for refresh command only
1661 assert(outstandingEvents
== 1);
1662 // REF complete, decrement count and go back to IDLE
1663 --outstandingEvents
;
1664 refreshState
= REF_IDLE
;
1666 DPRINTF(DRAMState
, "Was refreshing for %llu ticks\n", duration
);
1667 // if moving back to power-down after refresh
1668 if (pwrState
!= PWR_IDLE
) {
1669 assert(pwrState
== PWR_PRE_PDN
);
1670 DPRINTF(DRAMState
, "Switching to power down state after refreshing"
1671 " rank %d at %llu tick\n", rank
, curTick());
1674 // completed refresh event, ensure next request is scheduled
1675 if (!dram
.ctrl
->requestEventScheduled()) {
1676 DPRINTF(DRAM
, "Scheduling next request after refreshing"
1677 " rank %d\n", rank
);
1678 dram
.ctrl
->restartScheduler(curTick());
1682 if ((pwrState
== PWR_ACT
) && (refreshState
== REF_PD_EXIT
)) {
1683 // have exited ACT PD
1684 assert(prev_state
== PWR_ACT_PDN
);
1686 // go back to REF event and close banks
1687 refreshState
= REF_PRE
;
1688 schedule(refreshEvent
, curTick());
1689 } else if (pwrState
== PWR_IDLE
) {
1690 DPRINTF(DRAMState
, "All banks precharged\n");
1691 if (prev_state
== PWR_SREF
) {
1692 // set refresh state to REF_SREF_EXIT, ensuring inRefIdleState
1693 // continues to return false during tXS after SREF exit
1694 // Schedule a refresh which kicks things back into action
1696 refreshState
= REF_SREF_EXIT
;
1697 schedule(refreshEvent
, curTick() + dram
.tXS
);
1699 // if we have a pending refresh, and are now moving to
1700 // the idle state, directly transition to, or schedule refresh
1701 if ((refreshState
== REF_PRE
) || (refreshState
== REF_PD_EXIT
)) {
1702 // ensure refresh is restarted only after final PRE command.
1703 // do not restart refresh if controller is in an intermediate
1704 // state, after PRE_PDN exit, when banks are IDLE but an
1705 // ACT is scheduled.
1706 if (!activateEvent
.scheduled()) {
1707 // there should be nothing waiting at this point
1708 assert(!powerEvent
.scheduled());
1709 if (refreshState
== REF_PD_EXIT
) {
1710 // exiting PRE PD, will be in IDLE until tXP expires
1711 // and then should transition to PWR_REF state
1712 assert(prev_state
== PWR_PRE_PDN
);
1713 schedulePowerEvent(PWR_REF
, curTick() + dram
.tXP
);
1714 } else if (refreshState
== REF_PRE
) {
1715 // can directly move to PWR_REF state and proceed below
1719 // must have PRE scheduled to transition back to IDLE
1720 // and re-kick off refresh
1721 assert(prechargeEvent
.scheduled());
1727 // transition to the refresh state and re-start refresh process
1728 // refresh state machine will schedule the next power state transition
1729 if (pwrState
== PWR_REF
) {
1730 // completed final PRE for refresh or exiting power-down
1731 assert(refreshState
== REF_PRE
|| refreshState
== REF_PD_EXIT
);
1733 // exited PRE PD for refresh, with no pending commands
1734 // bypass auto-refresh and go straight to SREF, where memory
1735 // will issue refresh immediately upon entry
1736 if (pwrStatePostRefresh
== PWR_PRE_PDN
&& isQueueEmpty() &&
1737 (dram
.ctrl
->drainState() != DrainState::Draining
) &&
1738 (dram
.ctrl
->drainState() != DrainState::Drained
) &&
1739 dram
.enableDRAMPowerdown
) {
1740 DPRINTF(DRAMState
, "Rank %d bypassing refresh and transitioning "
1741 "to self refresh at %11u tick\n", rank
, curTick());
1742 powerDownSleep(PWR_SREF
, curTick());
1744 // Since refresh was bypassed, remove event by decrementing count
1745 assert(outstandingEvents
== 1);
1746 --outstandingEvents
;
1748 // reset state back to IDLE temporarily until SREF is entered
1749 pwrState
= PWR_IDLE
;
1751 // Not bypassing refresh for SREF entry
1753 DPRINTF(DRAMState
, "Refreshing\n");
1755 // there should be nothing waiting at this point
1756 assert(!powerEvent
.scheduled());
1758 // kick the refresh event loop into action again, and that
1759 // in turn will schedule a transition to the idle power
1760 // state once the refresh is done
1761 schedule(refreshEvent
, curTick());
1763 // Banks transitioned to IDLE, start REF
1764 refreshState
= REF_START
;
1771 DRAMInterface::Rank::updatePowerStats()
1773 // All commands up to refresh have completed
1774 // flush cmdList to DRAMPower
1777 // Call the function that calculates window energy at intermediate update
1778 // events like at refresh, stats dump as well as at simulation exit.
1779 // Window starts at the last time the calcWindowEnergy function was called
1780 // and is upto current time.
1781 power
.powerlib
.calcWindowEnergy(divCeil(curTick(), dram
.tCK
) -
1782 dram
.timeStampOffset
);
1784 // Get the energy from DRAMPower
1785 Data::MemoryPowerModel::Energy energy
= power
.powerlib
.getEnergy();
1787 // The energy components inside the power lib are calculated over
1788 // the window so accumulate into the corresponding gem5 stat
1789 stats
.actEnergy
+= energy
.act_energy
* dram
.devicesPerRank
;
1790 stats
.preEnergy
+= energy
.pre_energy
* dram
.devicesPerRank
;
1791 stats
.readEnergy
+= energy
.read_energy
* dram
.devicesPerRank
;
1792 stats
.writeEnergy
+= energy
.write_energy
* dram
.devicesPerRank
;
1793 stats
.refreshEnergy
+= energy
.ref_energy
* dram
.devicesPerRank
;
1794 stats
.actBackEnergy
+= energy
.act_stdby_energy
* dram
.devicesPerRank
;
1795 stats
.preBackEnergy
+= energy
.pre_stdby_energy
* dram
.devicesPerRank
;
1796 stats
.actPowerDownEnergy
+= energy
.f_act_pd_energy
* dram
.devicesPerRank
;
1797 stats
.prePowerDownEnergy
+= energy
.f_pre_pd_energy
* dram
.devicesPerRank
;
1798 stats
.selfRefreshEnergy
+= energy
.sref_energy
* dram
.devicesPerRank
;
1800 // Accumulate window energy into the total energy.
1801 stats
.totalEnergy
+= energy
.window_energy
* dram
.devicesPerRank
;
1802 // Average power must not be accumulated but calculated over the time
1803 // since last stats reset. SimClock::Frequency is tick period not tick
1806 // power (mW) = ----------- * ----------
1807 // time (tick) tick_frequency
1808 stats
.averagePower
= (stats
.totalEnergy
.value() /
1809 (curTick() - dram
.lastStatsResetTick
)) *
1810 (SimClock::Frequency
/ 1000000000.0);
1814 DRAMInterface::Rank::computeStats()
1816 DPRINTF(DRAM
,"Computing stats due to a dump callback\n");
1821 // final update of power state times
1822 stats
.pwrStateTime
[pwrState
] += (curTick() - pwrStateTick
);
1823 pwrStateTick
= curTick();
1827 DRAMInterface::Rank::resetStats() {
1828 // The only way to clear the counters in DRAMPower is to call
1829 // calcWindowEnergy function as that then calls clearCounters. The
1830 // clearCounters method itself is private.
1831 power
.powerlib
.calcWindowEnergy(divCeil(curTick(), dram
.tCK
) -
1832 dram
.timeStampOffset
);
1837 DRAMInterface::Rank::forceSelfRefreshExit() const {
1838 return (readEntries
!= 0) ||
1839 (dram
.ctrl
->inWriteBusState(true) && (writeEntries
!= 0));
1843 DRAMInterface::DRAMStats::resetStats()
1845 dram
.lastStatsResetTick
= curTick();
1848 DRAMInterface::DRAMStats::DRAMStats(DRAMInterface
&_dram
)
1849 : Stats::Group(&_dram
),
1852 ADD_STAT(readBursts
, UNIT_COUNT
, "Number of DRAM read bursts"),
1853 ADD_STAT(writeBursts
, UNIT_COUNT
, "Number of DRAM write bursts"),
1855 ADD_STAT(perBankRdBursts
, UNIT_COUNT
, "Per bank write bursts"),
1856 ADD_STAT(perBankWrBursts
, UNIT_COUNT
, "Per bank write bursts"),
1858 ADD_STAT(totQLat
, UNIT_TICK
, "Total ticks spent queuing"),
1859 ADD_STAT(totBusLat
, UNIT_TICK
, "Total ticks spent in databus transfers"),
1860 ADD_STAT(totMemAccLat
, UNIT_TICK
,
1861 "Total ticks spent from burst creation until serviced "
1864 ADD_STAT(avgQLat
, UNIT_RATE(Stats::Units::Tick
, Stats::Units::Count
),
1865 "Average queueing delay per DRAM burst"),
1866 ADD_STAT(avgBusLat
, UNIT_RATE(Stats::Units::Tick
, Stats::Units::Count
),
1867 "Average bus latency per DRAM burst"),
1868 ADD_STAT(avgMemAccLat
, UNIT_RATE(Stats::Units::Tick
, Stats::Units::Count
),
1869 "Average memory access latency per DRAM burst"),
1871 ADD_STAT(readRowHits
, UNIT_COUNT
,
1872 "Number of row buffer hits during reads"),
1873 ADD_STAT(writeRowHits
, UNIT_COUNT
,
1874 "Number of row buffer hits during writes"),
1875 ADD_STAT(readRowHitRate
, UNIT_RATIO
, "Row buffer hit rate for reads"),
1876 ADD_STAT(writeRowHitRate
, UNIT_RATIO
, "Row buffer hit rate for writes"),
1878 ADD_STAT(bytesPerActivate
, UNIT_BYTE
, "Bytes accessed per row activation"),
1879 ADD_STAT(bytesRead
, UNIT_BYTE
, "Total number of bytes read from DRAM"),
1880 ADD_STAT(bytesWritten
, UNIT_BYTE
, "Total number of bytes written to DRAM"),
1881 ADD_STAT(avgRdBW
, UNIT_RATE(Stats::Units::Byte
, Stats::Units::Second
),
1882 "Average DRAM read bandwidth in MiBytes/s"),
1883 ADD_STAT(avgWrBW
, UNIT_RATE(Stats::Units::Byte
, Stats::Units::Second
),
1884 "Average DRAM write bandwidth in MiBytes/s"),
1885 ADD_STAT(peakBW
, UNIT_RATE(Stats::Units::Byte
, Stats::Units::Second
),
1886 "Theoretical peak bandwidth in MiByte/s"),
1888 ADD_STAT(busUtil
, UNIT_RATIO
, "Data bus utilization in percentage"),
1889 ADD_STAT(busUtilRead
, UNIT_RATIO
,
1890 "Data bus utilization in percentage for reads"),
1891 ADD_STAT(busUtilWrite
, UNIT_RATIO
,
1892 "Data bus utilization in percentage for writes"),
1894 ADD_STAT(pageHitRate
, UNIT_RATIO
,
1895 "Row buffer hit rate, read and write combined")
1901 DRAMInterface::DRAMStats::regStats()
1903 using namespace Stats
;
1905 avgQLat
.precision(2);
1906 avgBusLat
.precision(2);
1907 avgMemAccLat
.precision(2);
1909 readRowHitRate
.precision(2);
1910 writeRowHitRate
.precision(2);
1912 perBankRdBursts
.init(dram
.banksPerRank
* dram
.ranksPerChannel
);
1913 perBankWrBursts
.init(dram
.banksPerRank
* dram
.ranksPerChannel
);
1916 .init(dram
.maxAccessesPerRow
?
1917 dram
.maxAccessesPerRow
: dram
.rowBufferSize
)
1920 peakBW
.precision(2);
1921 busUtil
.precision(2);
1922 busUtilWrite
.precision(2);
1923 busUtilRead
.precision(2);
1925 pageHitRate
.precision(2);
1928 avgQLat
= totQLat
/ readBursts
;
1929 avgBusLat
= totBusLat
/ readBursts
;
1930 avgMemAccLat
= totMemAccLat
/ readBursts
;
1932 readRowHitRate
= (readRowHits
/ readBursts
) * 100;
1933 writeRowHitRate
= (writeRowHits
/ writeBursts
) * 100;
1935 avgRdBW
= (bytesRead
/ 1000000) / simSeconds
;
1936 avgWrBW
= (bytesWritten
/ 1000000) / simSeconds
;
1937 peakBW
= (SimClock::Frequency
/ dram
.burstDelay()) *
1938 dram
.bytesPerBurst() / 1000000;
1940 busUtil
= (avgRdBW
+ avgWrBW
) / peakBW
* 100;
1941 busUtilRead
= avgRdBW
/ peakBW
* 100;
1942 busUtilWrite
= avgWrBW
/ peakBW
* 100;
1944 pageHitRate
= (writeRowHits
+ readRowHits
) /
1945 (writeBursts
+ readBursts
) * 100;
1948 DRAMInterface::RankStats::RankStats(DRAMInterface
&_dram
, Rank
&_rank
)
1949 : Stats::Group(&_dram
, csprintf("rank%d", _rank
.rank
).c_str()),
1952 ADD_STAT(actEnergy
, UNIT_JOULE
,
1953 "Energy for activate commands per rank (pJ)"),
1954 ADD_STAT(preEnergy
, UNIT_JOULE
,
1955 "Energy for precharge commands per rank (pJ)"),
1956 ADD_STAT(readEnergy
, UNIT_JOULE
,
1957 "Energy for read commands per rank (pJ)"),
1958 ADD_STAT(writeEnergy
, UNIT_JOULE
,
1959 "Energy for write commands per rank (pJ)"),
1960 ADD_STAT(refreshEnergy
, UNIT_JOULE
,
1961 "Energy for refresh commands per rank (pJ)"),
1962 ADD_STAT(actBackEnergy
, UNIT_JOULE
,
1963 "Energy for active background per rank (pJ)"),
1964 ADD_STAT(preBackEnergy
, UNIT_JOULE
,
1965 "Energy for precharge background per rank (pJ)"),
1966 ADD_STAT(actPowerDownEnergy
, UNIT_JOULE
,
1967 "Energy for active power-down per rank (pJ)"),
1968 ADD_STAT(prePowerDownEnergy
, UNIT_JOULE
,
1969 "Energy for precharge power-down per rank (pJ)"),
1970 ADD_STAT(selfRefreshEnergy
, UNIT_JOULE
,
1971 "Energy for self refresh per rank (pJ)"),
1973 ADD_STAT(totalEnergy
, UNIT_JOULE
, "Total energy per rank (pJ)"),
1974 ADD_STAT(averagePower
, UNIT_WATT
, "Core power per rank (mW)"),
1976 ADD_STAT(totalIdleTime
, UNIT_TICK
, "Total Idle time Per DRAM Rank"),
1977 ADD_STAT(pwrStateTime
, UNIT_TICK
, "Time in different power states")
1982 DRAMInterface::RankStats::regStats()
1984 Stats::Group::regStats();
1991 .subname(3, "PRE_PDN")
1993 .subname(5, "ACT_PDN");
1997 DRAMInterface::RankStats::resetStats()
1999 Stats::Group::resetStats();
2005 DRAMInterface::RankStats::preDumpStats()
2007 Stats::Group::preDumpStats();
2009 rank
.computeStats();
2012 NVMInterface::NVMInterface(const NVMInterfaceParams
&_p
)
2014 maxPendingWrites(_p
.max_pending_writes
),
2015 maxPendingReads(_p
.max_pending_reads
),
2016 twoCycleRdWr(_p
.two_cycle_rdwr
),
2017 tREAD(_p
.tREAD
), tWRITE(_p
.tWRITE
), tSEND(_p
.tSEND
),
2019 writeRespondEvent([this]{ processWriteRespondEvent(); }, name()),
2020 readReadyEvent([this]{ processReadReadyEvent(); }, name()),
2021 nextReadAt(0), numPendingReads(0), numReadDataReady(0),
2022 numReadsToIssue(0), numWritesQueued(0)
2024 DPRINTF(NVM
, "Setting up NVM Interface\n");
2026 fatal_if(!isPowerOf2(burstSize
), "NVM burst size %d is not allowed, "
2027 "must be a power of two\n", burstSize
);
2029 // sanity check the ranks since we rely on bit slicing for the
2031 fatal_if(!isPowerOf2(ranksPerChannel
), "NVM rank count of %d is "
2032 "not allowed, must be a power of two\n", ranksPerChannel
);
2034 for (int i
=0; i
< ranksPerChannel
; i
++) {
2035 // Add NVM ranks to the system
2036 DPRINTF(NVM
, "Creating NVM rank %d \n", i
);
2037 Rank
* rank
= new Rank(_p
, i
, *this);
2038 ranks
.push_back(rank
);
2041 uint64_t capacity
= ULL(1) << ceilLog2(AbstractMemory::size());
2043 DPRINTF(NVM
, "NVM capacity %lld (%lld) bytes\n", capacity
,
2044 AbstractMemory::size());
2046 rowsPerBank
= capacity
/ (rowBufferSize
*
2047 banksPerRank
* ranksPerChannel
);
2051 NVMInterface::Rank::Rank(const NVMInterfaceParams
&_p
,
2052 int _rank
, NVMInterface
& _nvm
)
2053 : EventManager(&_nvm
), rank(_rank
), banks(_p
.banks_per_rank
)
2055 for (int b
= 0; b
< _p
.banks_per_rank
; b
++) {
2057 // No bank groups; simply assign to bank number
2058 banks
[b
].bankgr
= b
;
2063 NVMInterface::init()
2065 AbstractMemory::init();
2068 void NVMInterface::setupRank(const uint8_t rank
, const bool is_read
)
2071 // increment count to trigger read and track number of reads in Q
2074 // increment count to track number of writes in Q
2079 std::pair
<MemPacketQueue::iterator
, Tick
>
2080 NVMInterface::chooseNextFRFCFS(MemPacketQueue
& queue
, Tick min_col_at
) const
2082 // remember if we found a hit, but one that cannit issue seamlessly
2083 bool found_prepped_pkt
= false;
2085 auto selected_pkt_it
= queue
.end();
2086 Tick selected_col_at
= MaxTick
;
2088 for (auto i
= queue
.begin(); i
!= queue
.end() ; ++i
) {
2089 MemPacket
* pkt
= *i
;
2091 // select optimal NVM packet in Q
2092 if (!pkt
->isDram()) {
2093 const Bank
& bank
= ranks
[pkt
->rank
]->banks
[pkt
->bank
];
2094 const Tick col_allowed_at
= pkt
->isRead() ? bank
.rdAllowedAt
:
2097 // check if rank is not doing a refresh and thus is available,
2098 // if not, jump to the next packet
2099 if (burstReady(pkt
)) {
2100 DPRINTF(NVM
, "%s bank %d - Rank %d available\n", __func__
,
2101 pkt
->bank
, pkt
->rank
);
2103 // no additional rank-to-rank or media delays
2104 if (col_allowed_at
<= min_col_at
) {
2105 // FCFS within entries that can issue without
2106 // additional delay, such as same rank accesses
2107 // or media delay requirements
2108 selected_pkt_it
= i
;
2109 selected_col_at
= col_allowed_at
;
2110 // no need to look through the remaining queue entries
2111 DPRINTF(NVM
, "%s Seamless buffer hit\n", __func__
);
2113 } else if (!found_prepped_pkt
) {
2114 // packet is to prepped region but cannnot issue
2115 // seamlessly; remember this one and continue
2116 selected_pkt_it
= i
;
2117 selected_col_at
= col_allowed_at
;
2118 DPRINTF(NVM
, "%s Prepped packet found \n", __func__
);
2119 found_prepped_pkt
= true;
2122 DPRINTF(NVM
, "%s bank %d - Rank %d not available\n", __func__
,
2123 pkt
->bank
, pkt
->rank
);
2128 if (selected_pkt_it
== queue
.end()) {
2129 DPRINTF(NVM
, "%s no available NVM ranks found\n", __func__
);
2132 return std::make_pair(selected_pkt_it
, selected_col_at
);
2136 NVMInterface::chooseRead(MemPacketQueue
& queue
)
2138 Tick cmd_at
= std::max(curTick(), nextReadAt
);
2140 // This method does the arbitration between non-deterministic read
2141 // requests to NVM. The chosen packet is not removed from the queue
2142 // at this time. Removal from the queue will occur when the data is
2143 // ready and a separate SEND command is issued to retrieve it via the
2144 // chooseNext function in the top-level controller.
2145 assert(!queue
.empty());
2147 assert(numReadsToIssue
> 0);
2149 // For simplicity, issue non-deterministic reads in order (fcfs)
2150 for (auto i
= queue
.begin(); i
!= queue
.end() ; ++i
) {
2151 MemPacket
* pkt
= *i
;
2153 // Find 1st NVM read packet that hasn't issued read command
2154 if (pkt
->readyTime
== MaxTick
&& !pkt
->isDram() && pkt
->isRead()) {
2156 Bank
& bank_ref
= ranks
[pkt
->rank
]->banks
[pkt
->bank
];
2158 // issueing a read, inc counter and verify we haven't overrun
2160 assert(numPendingReads
<= maxPendingReads
);
2162 // increment the bytes accessed and the accesses per row
2163 bank_ref
.bytesAccessed
+= burstSize
;
2165 // Verify command bandiwth to issue
2166 // Host can issue read immediately uith buffering closer
2167 // to the NVM. The actual execution at the NVM may be delayed
2168 // due to busy resources
2170 cmd_at
= ctrl
->verifyMultiCmd(cmd_at
,
2171 maxCommandsPerWindow
, tCK
);
2173 cmd_at
= ctrl
->verifySingleCmd(cmd_at
,
2174 maxCommandsPerWindow
);
2177 // Update delay to next read
2178 // Ensures single read command issued per cycle
2179 nextReadAt
= cmd_at
+ tCK
;
2181 // If accessing a new location in this bank, update timing
2183 if (bank_ref
.openRow
!= pkt
->row
) {
2184 // update the open bank, re-using row field
2185 bank_ref
.openRow
= pkt
->row
;
2187 // sample the bytes accessed to a buffer in this bank
2188 // here when we are re-buffering the data
2189 stats
.bytesPerBank
.sample(bank_ref
.bytesAccessed
);
2190 // start counting anew
2191 bank_ref
.bytesAccessed
= 0;
2193 // holdoff next command to this bank until the read completes
2194 // and the data has been successfully buffered
2195 // can pipeline accesses to the same bank, sending them
2196 // across the interface B2B, but will incur full access
2197 // delay between data ready responses to different buffers
2199 bank_ref
.actAllowedAt
= std::max(cmd_at
,
2200 bank_ref
.actAllowedAt
) + tREAD
;
2202 // update per packet readyTime to holdoff burst read operation
2203 // overloading readyTime, which will be updated again when the
2205 pkt
->readyTime
= std::max(cmd_at
, bank_ref
.actAllowedAt
);
2207 DPRINTF(NVM
, "Issuing NVM Read to bank %d at tick %d. "
2208 "Data ready at %d\n",
2209 bank_ref
.bank
, cmd_at
, pkt
->readyTime
);
2211 // Insert into read ready queue. It will be handled after
2212 // the media delay has been met
2213 if (readReadyQueue
.empty()) {
2214 assert(!readReadyEvent
.scheduled());
2215 schedule(readReadyEvent
, pkt
->readyTime
);
2216 } else if (readReadyEvent
.when() > pkt
->readyTime
) {
2217 // move it sooner in time, to the first read with data
2218 reschedule(readReadyEvent
, pkt
->readyTime
);
2220 assert(readReadyEvent
.scheduled());
2222 readReadyQueue
.push_back(pkt
->readyTime
);
2224 // found an NVM read to issue - break out
2231 NVMInterface::processReadReadyEvent()
2233 // signal that there is read data ready to be transmitted
2237 "processReadReadyEvent(): Data for an NVM read is ready. "
2238 "numReadDataReady is %d\t numPendingReads is %d\n",
2239 numReadDataReady
, numPendingReads
);
2241 // Find lowest ready time and verify it is equal to curTick
2242 // also find the next lowest to schedule next event
2243 // Done with this response, erase entry
2244 auto ready_it
= readReadyQueue
.begin();
2245 Tick next_ready_at
= MaxTick
;
2246 for (auto i
= readReadyQueue
.begin(); i
!= readReadyQueue
.end() ; ++i
) {
2247 if (*ready_it
> *i
) {
2248 next_ready_at
= *ready_it
;
2250 } else if ((next_ready_at
> *i
) && (i
!= ready_it
)) {
2255 // Verify we found the time of this event and remove it
2256 assert(*ready_it
== curTick());
2257 readReadyQueue
.erase(ready_it
);
2259 if (!readReadyQueue
.empty()) {
2260 assert(readReadyQueue
.front() >= curTick());
2261 assert(!readReadyEvent
.scheduled());
2262 schedule(readReadyEvent
, next_ready_at
);
2265 // It is possible that a new command kicks things back into
2266 // action before reaching this point but need to ensure that we
2267 // continue to process new commands as read data becomes ready
2268 // This will also trigger a drain if needed
2269 if (!ctrl
->requestEventScheduled()) {
2270 DPRINTF(NVM
, "Restart controller scheduler immediately\n");
2271 ctrl
->restartScheduler(curTick());
2276 NVMInterface::burstReady(MemPacket
* pkt
) const {
2277 bool read_rdy
= pkt
->isRead() && (ctrl
->inReadBusState(true)) &&
2278 (pkt
->readyTime
<= curTick()) && (numReadDataReady
> 0);
2279 bool write_rdy
= !pkt
->isRead() && !ctrl
->inReadBusState(true) &&
2280 !writeRespQueueFull();
2281 return (read_rdy
|| write_rdy
);
2284 std::pair
<Tick
, Tick
>
2285 NVMInterface::doBurstAccess(MemPacket
* pkt
, Tick next_burst_at
)
2287 DPRINTF(NVM
, "NVM Timing access to addr %lld, rank/bank/row %d %d %d\n",
2288 pkt
->addr
, pkt
->rank
, pkt
->bank
, pkt
->row
);
2291 Bank
& bank_ref
= ranks
[pkt
->rank
]->banks
[pkt
->bank
];
2293 // respect any constraints on the command
2294 const Tick bst_allowed_at
= pkt
->isRead() ?
2295 bank_ref
.rdAllowedAt
: bank_ref
.wrAllowedAt
;
2297 // we need to wait until the bus is available before we can issue
2298 // the command; need minimum of tBURST between commands
2299 Tick cmd_at
= std::max(bst_allowed_at
, curTick());
2301 // we need to wait until the bus is available before we can issue
2302 // the command; need minimum of tBURST between commands
2303 cmd_at
= std::max(cmd_at
, next_burst_at
);
2305 // Verify there is command bandwidth to issue
2306 // Read burst (send command) is a simple data access and only requires
2307 // one command cycle
2308 // Write command may require multiple cycles to enable larger address space
2309 if (pkt
->isRead() || !twoCycleRdWr
) {
2310 cmd_at
= ctrl
->verifySingleCmd(cmd_at
, maxCommandsPerWindow
);
2312 cmd_at
= ctrl
->verifyMultiCmd(cmd_at
, maxCommandsPerWindow
, tCK
);
2314 // update the packet ready time to reflect when data will be transferred
2315 // Use the same bus delays defined for NVM
2316 pkt
->readyTime
= cmd_at
+ tSEND
+ tBURST
;
2320 for (auto n
: ranks
) {
2321 for (int i
= 0; i
< banksPerRank
; i
++) {
2322 // base delay is a function of tBURST and bus turnaround
2323 dly_to_rd_cmd
= pkt
->isRead() ? tBURST
: writeToReadDelay();
2324 dly_to_wr_cmd
= pkt
->isRead() ? readToWriteDelay() : tBURST
;
2326 if (pkt
->rank
!= n
->rank
) {
2327 // adjust timing for different ranks
2328 // Need to account for rank-to-rank switching with tCS
2329 dly_to_wr_cmd
= rankToRankDelay();
2330 dly_to_rd_cmd
= rankToRankDelay();
2332 n
->banks
[i
].rdAllowedAt
= std::max(cmd_at
+ dly_to_rd_cmd
,
2333 n
->banks
[i
].rdAllowedAt
);
2335 n
->banks
[i
].wrAllowedAt
= std::max(cmd_at
+ dly_to_wr_cmd
,
2336 n
->banks
[i
].wrAllowedAt
);
2340 DPRINTF(NVM
, "NVM Access to %lld, ready at %lld.\n",
2341 pkt
->addr
, pkt
->readyTime
);
2343 if (pkt
->isRead()) {
2344 // completed the read, decrement counters
2345 assert(numPendingReads
!= 0);
2346 assert(numReadDataReady
!= 0);
2351 // Adjust number of NVM writes in Q
2352 assert(numWritesQueued
> 0);
2355 // increment the bytes accessed and the accesses per row
2356 // only increment for writes as the reads are handled when
2357 // the non-deterministic read is issued, before the data transfer
2358 bank_ref
.bytesAccessed
+= burstSize
;
2360 // Commands will be issued serially when accessing the same bank
2361 // Commands can issue in parallel to different banks
2362 if ((bank_ref
.bank
== pkt
->bank
) &&
2363 (bank_ref
.openRow
!= pkt
->row
)) {
2364 // update the open buffer, re-using row field
2365 bank_ref
.openRow
= pkt
->row
;
2367 // sample the bytes accessed to a buffer in this bank
2368 // here when we are re-buffering the data
2369 stats
.bytesPerBank
.sample(bank_ref
.bytesAccessed
);
2370 // start counting anew
2371 bank_ref
.bytesAccessed
= 0;
2374 // Determine when write will actually complete, assuming it is
2375 // scheduled to push to NVM immediately
2376 // update actAllowedAt to serialize next command completion that
2377 // accesses this bank; must wait until this write completes
2378 // Data accesses to the same buffer in this bank
2379 // can issue immediately after actAllowedAt expires, without
2380 // waiting additional delay of tWRITE. Can revisit this
2381 // assumption/simplification in the future.
2382 bank_ref
.actAllowedAt
= std::max(pkt
->readyTime
,
2383 bank_ref
.actAllowedAt
) + tWRITE
;
2385 // Need to track number of outstanding writes to
2386 // ensure 'buffer' on media controller does not overflow
2387 assert(!writeRespQueueFull());
2389 // Insert into write done queue. It will be handled after
2390 // the media delay has been met
2391 if (writeRespQueueEmpty()) {
2392 assert(!writeRespondEvent
.scheduled());
2393 schedule(writeRespondEvent
, bank_ref
.actAllowedAt
);
2395 assert(writeRespondEvent
.scheduled());
2397 writeRespQueue
.push_back(bank_ref
.actAllowedAt
);
2398 writeRespQueue
.sort();
2399 if (writeRespondEvent
.when() > bank_ref
.actAllowedAt
) {
2400 DPRINTF(NVM
, "Rescheduled respond event from %lld to %11d\n",
2401 writeRespondEvent
.when(), bank_ref
.actAllowedAt
);
2402 DPRINTF(NVM
, "Front of response queue is %11d\n",
2403 writeRespQueue
.front());
2404 reschedule(writeRespondEvent
, bank_ref
.actAllowedAt
);
2410 if (pkt
->isRead()) {
2412 stats
.bytesRead
+= burstSize
;
2413 stats
.perBankRdBursts
[pkt
->bankId
]++;
2414 stats
.pendingReads
.sample(numPendingReads
);
2416 // Update latency stats
2417 stats
.totMemAccLat
+= pkt
->readyTime
- pkt
->entryTime
;
2418 stats
.totBusLat
+= tBURST
;
2419 stats
.totQLat
+= cmd_at
- pkt
->entryTime
;
2421 stats
.writeBursts
++;
2422 stats
.bytesWritten
+= burstSize
;
2423 stats
.perBankWrBursts
[pkt
->bankId
]++;
2426 return std::make_pair(cmd_at
, cmd_at
+ tBURST
);
2430 NVMInterface::processWriteRespondEvent()
2433 "processWriteRespondEvent(): A NVM write reached its readyTime. "
2434 "%d remaining pending NVM writes\n", writeRespQueue
.size());
2436 // Update stat to track histogram of pending writes
2437 stats
.pendingWrites
.sample(writeRespQueue
.size());
2439 // Done with this response, pop entry
2440 writeRespQueue
.pop_front();
2442 if (!writeRespQueue
.empty()) {
2443 assert(writeRespQueue
.front() >= curTick());
2444 assert(!writeRespondEvent
.scheduled());
2445 schedule(writeRespondEvent
, writeRespQueue
.front());
2448 // It is possible that a new command kicks things back into
2449 // action before reaching this point but need to ensure that we
2450 // continue to process new commands as writes complete at the media and
2451 // credits become available. This will also trigger a drain if needed
2452 if (!ctrl
->requestEventScheduled()) {
2453 DPRINTF(NVM
, "Restart controller scheduler immediately\n");
2454 ctrl
->restartScheduler(curTick());
2459 NVMInterface::addRankToRankDelay(Tick cmd_at
)
2461 // update timing for NVM ranks due to bursts issued
2462 // to ranks for other media interfaces
2463 for (auto n
: ranks
) {
2464 for (int i
= 0; i
< banksPerRank
; i
++) {
2465 // different rank by default
2466 // Need to only account for rank-to-rank switching
2467 n
->banks
[i
].rdAllowedAt
= std::max(cmd_at
+ rankToRankDelay(),
2468 n
->banks
[i
].rdAllowedAt
);
2469 n
->banks
[i
].wrAllowedAt
= std::max(cmd_at
+ rankToRankDelay(),
2470 n
->banks
[i
].wrAllowedAt
);
2476 NVMInterface::isBusy(bool read_queue_empty
, bool all_writes_nvm
)
2478 DPRINTF(NVM
,"isBusy: numReadDataReady = %d\n", numReadDataReady
);
2479 // Determine NVM is busy and cannot issue a burst
2480 // A read burst cannot issue when data is not ready from the NVM
2481 // Also check that we have reads queued to ensure we can change
2482 // bus direction to service potential write commands.
2483 // A write cannot issue once we've reached MAX pending writes
2484 // Only assert busy for the write case when there are also
2485 // no reads in Q and the write queue only contains NVM commands
2486 // This allows the bus state to switch and service reads
2487 return (ctrl
->inReadBusState(true) ?
2488 (numReadDataReady
== 0) && !read_queue_empty
:
2489 writeRespQueueFull() && read_queue_empty
&&
2494 NVMInterface::NVMStats::NVMStats(NVMInterface
&_nvm
)
2495 : Stats::Group(&_nvm
),
2498 ADD_STAT(readBursts
, UNIT_COUNT
, "Number of NVM read bursts"),
2499 ADD_STAT(writeBursts
, UNIT_COUNT
, "Number of NVM write bursts"),
2501 ADD_STAT(perBankRdBursts
, UNIT_COUNT
, "Per bank write bursts"),
2502 ADD_STAT(perBankWrBursts
, UNIT_COUNT
, "Per bank write bursts"),
2504 ADD_STAT(totQLat
, UNIT_TICK
, "Total ticks spent queuing"),
2505 ADD_STAT(totBusLat
, UNIT_TICK
, "Total ticks spent in databus transfers"),
2506 ADD_STAT(totMemAccLat
, UNIT_TICK
,
2507 "Total ticks spent from burst creation until serviced "
2509 ADD_STAT(avgQLat
, UNIT_RATE(Stats::Units::Tick
, Stats::Units::Count
),
2510 "Average queueing delay per NVM burst"),
2511 ADD_STAT(avgBusLat
, UNIT_RATE(Stats::Units::Tick
, Stats::Units::Count
),
2512 "Average bus latency per NVM burst"),
2513 ADD_STAT(avgMemAccLat
, UNIT_RATE(Stats::Units::Tick
, Stats::Units::Count
),
2514 "Average memory access latency per NVM burst"),
2516 ADD_STAT(bytesRead
, UNIT_BYTE
, "Total number of bytes read from DRAM"),
2517 ADD_STAT(bytesWritten
, UNIT_BYTE
, "Total number of bytes written to DRAM"),
2518 ADD_STAT(avgRdBW
, UNIT_RATE(Stats::Units::Byte
, Stats::Units::Second
),
2519 "Average DRAM read bandwidth in MiBytes/s"),
2520 ADD_STAT(avgWrBW
, UNIT_RATE(Stats::Units::Byte
, Stats::Units::Second
),
2521 "Average DRAM write bandwidth in MiBytes/s"),
2522 ADD_STAT(peakBW
, UNIT_RATE(Stats::Units::Byte
, Stats::Units::Second
),
2523 "Theoretical peak bandwidth in MiByte/s"),
2524 ADD_STAT(busUtil
, UNIT_RATIO
, "NVM Data bus utilization in percentage"),
2525 ADD_STAT(busUtilRead
, UNIT_RATIO
,
2526 "NVM Data bus read utilization in percentage"),
2527 ADD_STAT(busUtilWrite
, UNIT_RATIO
,
2528 "NVM Data bus write utilization in percentage"),
2530 ADD_STAT(pendingReads
, UNIT_COUNT
,
2531 "Reads issued to NVM for which data has not been transferred"),
2532 ADD_STAT(bytesPerBank
, UNIT_BYTE
,
2533 "Bytes read within a bank before loading new bank")
2539 NVMInterface::NVMStats::regStats()
2541 using namespace Stats
;
2543 perBankRdBursts
.init(nvm
.ranksPerChannel
== 0 ? 1 :
2544 nvm
.banksPerRank
* nvm
.ranksPerChannel
);
2546 perBankWrBursts
.init(nvm
.ranksPerChannel
== 0 ? 1 :
2547 nvm
.banksPerRank
* nvm
.ranksPerChannel
);
2549 avgQLat
.precision(2);
2550 avgBusLat
.precision(2);
2551 avgMemAccLat
.precision(2);
2553 avgRdBW
.precision(2);
2554 avgWrBW
.precision(2);
2555 peakBW
.precision(2);
2557 busUtil
.precision(2);
2558 busUtilRead
.precision(2);
2559 busUtilWrite
.precision(2);
2562 .init(nvm
.maxPendingReads
)
2566 .init(nvm
.maxPendingWrites
)
2570 .init(nvm
.rowBufferSize
)
2573 avgQLat
= totQLat
/ readBursts
;
2574 avgBusLat
= totBusLat
/ readBursts
;
2575 avgMemAccLat
= totMemAccLat
/ readBursts
;
2577 avgRdBW
= (bytesRead
/ 1000000) / simSeconds
;
2578 avgWrBW
= (bytesWritten
/ 1000000) / simSeconds
;
2579 peakBW
= (SimClock::Frequency
/ nvm
.tBURST
) *
2580 nvm
.burstSize
/ 1000000;
2582 busUtil
= (avgRdBW
+ avgWrBW
) / peakBW
* 100;
2583 busUtilRead
= avgRdBW
/ peakBW
* 100;
2584 busUtilWrite
= avgWrBW
/ peakBW
* 100;