From b6ecfe918364ce4b7df0f95590b483100bbfcba9 Mon Sep 17 00:00:00 2001 From: Wendy Elsasser Date: Sat, 20 Sep 2014 17:17:57 -0400 Subject: [PATCH] mem: Add memory rank-to-rank delay Add the following delay to the DRAM controller: - tCS : Different rank bus turnaround delay This will be applied for 1) read-to-read, 2) write-to-write, 3) write-to-read, and 4) read-to-write command sequences, where the new command accesses a different rank than the previous burst. The delay defaults to 2*tCK for each defined memory class. Note that this does not correspond to one particular timing constraint, but is a way of modelling all the associated constraints. The DRAM controller has some minor changes to prioritize commands to the same rank. This prioritization will only occur when the command stream is not switching from a read to write or vice versa (in the case of switching we have a gap in any case). To prioritize commands to the same rank, the model will determine if there are any commands queued (same type) to the same rank as the previous command. This check will ensure that the 'same rank' command will be able to execute without adding bubbles to the command flow, e.g. any ACT delay requirements can be done under the hoods, allowing the burst to issue seamlessly. --- src/mem/DRAMCtrl.py | 44 ++++++++--- src/mem/dram_ctrl.cc | 180 ++++++++++++++++++++++++++++++------------- src/mem/dram_ctrl.hh | 25 +++++- 3 files changed, 180 insertions(+), 69 deletions(-) diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py index 8c573ca3a..b06b8e7eb 100644 --- a/src/mem/DRAMCtrl.py +++ b/src/mem/DRAMCtrl.py @@ -156,11 +156,17 @@ class DRAMCtrl(AbstractMemory): # to be sent. It is 7.8 us for a 64ms refresh requirement tREFI = Param.Latency("Refresh command interval") - # write-to-read turn around penalty - tWTR = Param.Latency("Write to read switching time") + # write-to-read, same rank turnaround penalty + tWTR = Param.Latency("Write to read, same rank switching time") - # read-to-write turn around penalty, bus turnaround delay - tRTW = Param.Latency("Read to write switching time") + # read-to-write, same rank turnaround penalty + tRTW = Param.Latency("Read to write, same rank switching time") + + # rank-to-rank bus delay penalty + # this does not correlate to a memory timing parameter and encompasses: + # 1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD + # different rank bus delay + tCS = Param.Latency("Rank to rank switching time") # minimum row activate to row activate delay time tRRD = Param.Latency("ACT to ACT delay") @@ -221,9 +227,12 @@ class DDR3_1600_x64(DRAMCtrl): # Greater of 4 CK or 7.5 ns tRTP = '7.5ns' - # Default read-to-write bus around to 2 CK, @800 MHz = 2.5 ns + # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns tRTW = '2.5ns' + # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns + tCS = '2.5ns' + # <=85C, half for >85C tREFI = '7.8us' @@ -296,9 +305,12 @@ class DDR4_2400_x64(DRAMCtrl): # Greater of 4 CK or 7.5 ns tRTP = '7.5ns' - # Default read-to-write bus around to 2 CK, @1200 MHz = 1.666 ns + # Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666 ns tRTW = '1.666ns' + # Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns + tCS = '1.666ns' + # <=85C, half for >85C tREFI = '7.8us' @@ -353,9 +365,12 @@ class DDR3_1333_x64_DRAMSim2(DRAMCtrl): # Greater of 4 CK or 7.5 ns, 4 CK @ 666.66 MHz = 6 ns tWTR = '7.5ns' - # Default read-to-write bus around to 2 CK, @666.66 MHz = 3 ns + # Default same rank rd-to-wr bus turnaround to 2 CK, @666.66 MHz = 3 ns tRTW = '3ns' + # Default different rank bus delay to 2 CK, @666.66 MHz = 3 ns + tCS = '3ns' + tRRD = '6.0ns' tXAW = '30ns' @@ -416,9 +431,12 @@ class LPDDR2_S4_1066_x32(DRAMCtrl): # Irrespective of speed grade, tWTR is 7.5 ns tWTR = '7.5ns' - # Default read-to-write bus around to 2 CK, @533 MHz = 3.75 ns + # Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns tRTW = '3.75ns' + # Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns + tCS = '3.75ns' + # Activate to activate irrespective of density and speed grade tRRD = '10.0ns' @@ -473,9 +491,12 @@ class WideIO_200_x128(DRAMCtrl): # Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns tWTR = '15ns' - # Default read-to-write bus around to 2 CK, @200 MHz = 10 ns + # Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns tRTW = '10ns' + # Default different rank bus delay to 2 CK, @200 MHz = 10 ns + tCS = '10ns' + # Activate to activate irrespective of density and speed grade tRRD = '10.0ns' @@ -536,9 +557,12 @@ class LPDDR3_1600_x32(DRAMCtrl): # Irrespective of speed grade, tWTR is 7.5 ns tWTR = '7.5ns' - # Default read-to-write bus around to 2 CK, @800 MHz = 2.5 ns + # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns tRTW = '2.5ns' + # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns + tCS = '2.5ns' + # Activate to activate irrespective of density and speed grade tRRD = '10.0ns' diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc index 1d96e274c..ca562f4f7 100644 --- a/src/mem/dram_ctrl.cc +++ b/src/mem/dram_ctrl.cc @@ -76,7 +76,7 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0), minWritesPerSwitch(p->min_writes_per_switch), writesThisTime(0), readsThisTime(0), - tCK(p->tCK), tWTR(p->tWTR), tRTW(p->tRTW), tBURST(p->tBURST), + tCK(p->tCK), tWTR(p->tWTR), tRTW(p->tRTW), tCS(p->tCS), tBURST(p->tBURST), tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS), tWR(p->tWR), tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD), tXAW(p->tXAW), activationLimit(p->activation_limit), @@ -87,7 +87,8 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : backendLatency(p->static_backend_latency), busBusyUntil(0), refreshDueAt(0), refreshState(REF_IDLE), pwrStateTrans(PWR_IDLE), pwrState(PWR_IDLE), prevArrival(0), - nextReqTime(0), pwrStateTick(0), numBanksActive(0) + nextReqTime(0), pwrStateTick(0), numBanksActive(0), + activeRank(0) { // create the bank states based on the dimensions of the ranks and // banks @@ -683,7 +684,7 @@ DRAMCtrl::processRespondEvent() } void -DRAMCtrl::chooseNext(std::deque& queue) +DRAMCtrl::chooseNext(std::deque& queue, bool switched_cmd_type) { // This method does the arbitration between requests. The chosen // packet is simply moved to the head of the queue. The other @@ -699,13 +700,13 @@ DRAMCtrl::chooseNext(std::deque& queue) if (memSchedPolicy == Enums::fcfs) { // Do nothing, since the correct request is already head } else if (memSchedPolicy == Enums::frfcfs) { - reorderQueue(queue); + reorderQueue(queue, switched_cmd_type); } else panic("No scheduling policy chosen\n"); } void -DRAMCtrl::reorderQueue(std::deque& queue) +DRAMCtrl::reorderQueue(std::deque& queue, bool switched_cmd_type) { // Only determine this when needed uint64_t earliest_banks = 0; @@ -713,6 +714,7 @@ DRAMCtrl::reorderQueue(std::deque& queue) // Search for row hits first, if no row hit is found then schedule the // packet to one of the earliest banks available bool found_earliest_pkt = false; + bool found_prepped_diff_rank_pkt = false; auto selected_pkt_it = queue.begin(); for (auto i = queue.begin(); i != queue.end() ; ++i) { @@ -720,25 +722,30 @@ DRAMCtrl::reorderQueue(std::deque& queue) const Bank& bank = dram_pkt->bankRef; // Check if it is a row hit if (bank.openRow == dram_pkt->row) { - // FCFS within the hits - DPRINTF(DRAM, "Row buffer hit\n"); - selected_pkt_it = i; - break; - } else if (!found_earliest_pkt) { - // No row hit, go for first ready + if (dram_pkt->rank == activeRank || switched_cmd_type) { + // FCFS within the hits, giving priority to commands + // that access the same rank as the previous burst + // to minimize bus turnaround delays + // Only give rank prioity when command type is not changing + DPRINTF(DRAM, "Row buffer hit\n"); + selected_pkt_it = i; + break; + } else if (!found_prepped_diff_rank_pkt) { + // found row hit for command on different rank than prev burst + selected_pkt_it = i; + found_prepped_diff_rank_pkt = true; + } + } else if (!found_earliest_pkt & !found_prepped_diff_rank_pkt) { + // No row hit and + // haven't found an entry with a row hit to a new rank if (earliest_banks == 0) - earliest_banks = minBankActAt(queue); - - // simplistic approximation of when the bank can issue an - // activate, this is calculated in minBankActAt and could - // be cached - Tick act_at = bank.openRow == Bank::NO_ROW ? - bank.actAllowedAt : - std::max(bank.preAllowedAt, curTick()) + tRP; - - // Bank is ready or is the first available bank - if (act_at <= curTick() || - bits(earliest_banks, dram_pkt->bankId, dram_pkt->bankId)) { + // Determine entries with earliest bank prep delay + // Function will give priority to commands that access the + // same rank as previous burst and can prep the bank seamlessly + earliest_banks = minBankPrep(queue, switched_cmd_type); + + // FCFS - Bank is first available bank + if (bits(earliest_banks, dram_pkt->bankId, dram_pkt->bankId)) { // Remember the packet to be scheduled to one of the earliest // banks available, FCFS amongst the earliest banks selected_pkt_it = i; @@ -983,6 +990,9 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt) // read/write (add a max with tCCD here) bank.colAllowedAt = cmd_at + tBURST; + // Save rank of current access + activeRank = dram_pkt->rank; + // If this is a write, we also need to respect the write recovery // time before a precharge, in the case of a read, respect the // read to precharge constraint @@ -1095,6 +1105,9 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt) void DRAMCtrl::processNextReqEvent() { + // pre-emptively set to false. Overwrite if in READ_TO_WRITE + // or WRITE_TO_READ state + bool switched_cmd_type = false; if (busState == READ_TO_WRITE) { DPRINTF(DRAM, "Switching to writes after %d reads with %d reads " "waiting\n", readsThisTime, readQueue.size()); @@ -1106,6 +1119,7 @@ DRAMCtrl::processNextReqEvent() // now proceed to do the actual writes busState = WRITE; + switched_cmd_type = true; } else if (busState == WRITE_TO_READ) { DPRINTF(DRAM, "Switching to reads after %d writes with %d writes " "waiting\n", writesThisTime, writeQueue.size()); @@ -1114,6 +1128,7 @@ DRAMCtrl::processNextReqEvent() writesThisTime = 0; busState = READ; + switched_cmd_type = true; } if (refreshState != REF_IDLE) { @@ -1160,10 +1175,26 @@ DRAMCtrl::processNextReqEvent() } else { // Figure out which read request goes next, and move it to the // front of the read queue - chooseNext(readQueue); + chooseNext(readQueue, switched_cmd_type); DRAMPacket* dram_pkt = readQueue.front(); + // here we get a bit creative and shift the bus busy time not + // just the tWTR, but also a CAS latency to capture the fact + // that we are allowed to prepare a new bank, but not issue a + // read command until after tWTR, in essence we capture a + // bubble on the data bus that is tWTR + tCL + if (switched_cmd_type) { + // add a bubble to the data bus for write-to-read turn around + // or tCS (different rank bus delay). + busBusyUntil += (dram_pkt->rank == activeRank) ? tWTR + tCL : + tCS; + } else if (dram_pkt->rank != activeRank) { + // add a bubble to the data bus, as defined by the + // tCS parameter for rank-to-rank delay + busBusyUntil += tCS; + } + doDRAMAccess(dram_pkt); // At this point we're done dealing with the request @@ -1197,21 +1228,23 @@ DRAMCtrl::processNextReqEvent() if (switch_to_writes) { // transition to writing busState = READ_TO_WRITE; - - // add a bubble to the data bus, as defined by the - // tRTW parameter - busBusyUntil += tRTW; - - // update the minimum timing between the requests, - // this shifts us back in time far enough to do any - // bank preparation - nextReqTime = busBusyUntil - (tRP + tRCD + tCL); } } else { - chooseNext(writeQueue); + chooseNext(writeQueue, switched_cmd_type); DRAMPacket* dram_pkt = writeQueue.front(); // sanity check assert(dram_pkt->size <= burstSize); + + if (switched_cmd_type) { + // add a bubble to the data bus, as defined by the + // tRTW or tCS parameter, depending on whether changing ranks + busBusyUntil += (dram_pkt->rank == activeRank) ? tRTW : tCS; + } else if (dram_pkt->rank != activeRank) { + // add a bubble to the data bus, as defined by the + // tCS parameter for rank-to-rank delay + busBusyUntil += tCS; + } + doDRAMAccess(dram_pkt); writeQueue.pop_front(); @@ -1232,17 +1265,6 @@ DRAMCtrl::processNextReqEvent() // case, which eventually will check for any draining and // also pause any further scheduling if there is really // nothing to do - - // here we get a bit creative and shift the bus busy time not - // just the tWTR, but also a CAS latency to capture the fact - // that we are allowed to prepare a new bank, but not issue a - // read command until after tWTR, in essence we capture a - // bubble on the data bus that is tWTR + tCL - busBusyUntil += tWTR + tCL; - - // update the minimum timing between the requests, this shifts - // us back in time far enough to do any bank preparation - nextReqTime = busBusyUntil - (tRP + tRCD + tCL); } } @@ -1259,12 +1281,19 @@ DRAMCtrl::processNextReqEvent() } uint64_t -DRAMCtrl::minBankActAt(const deque& queue) const +DRAMCtrl::minBankPrep(const deque& queue, + bool switched_cmd_type) const { uint64_t bank_mask = 0; Tick min_act_at = MaxTick; - // deterimne if we have queued transactions targetting a + uint64_t bank_mask_same_rank = 0; + Tick min_act_at_same_rank = MaxTick; + + // Give precedence to commands that access same rank as previous command + bool same_rank_match = false; + + // determine if we have queued transactions targetting the // bank in question vector got_waiting(ranksPerChannel * banksPerRank, false); for (auto p = queue.begin(); p != queue.end(); ++p) { @@ -1280,23 +1309,64 @@ DRAMCtrl::minBankActAt(const deque& queue) const if (got_waiting[bank_id]) { // simplistic approximation of when the bank can issue // an activate, ignoring any rank-to-rank switching - // cost + // cost in this calculation Tick act_at = banks[i][j].openRow == Bank::NO_ROW ? banks[i][j].actAllowedAt : std::max(banks[i][j].preAllowedAt, curTick()) + tRP; - if (act_at <= min_act_at) { - // reset bank mask if new minimum is found - if (act_at < min_act_at) - bank_mask = 0; - // set the bit corresponding to the available bank - replaceBits(bank_mask, bank_id, bank_id, 1); - min_act_at = act_at; + // prioritize commands that access the + // same rank as previous burst + // Calculate bank mask separately for the case and + // evaluate after loop iterations complete + if (i == activeRank && ranksPerChannel > 1) { + if (act_at <= min_act_at_same_rank) { + // reset same rank bank mask if new minimum is found + // and previous minimum could not immediately send ACT + if (act_at < min_act_at_same_rank && + min_act_at_same_rank > curTick()) + bank_mask_same_rank = 0; + + // Set flag indicating that a same rank + // opportunity was found + same_rank_match = true; + + // set the bit corresponding to the available bank + replaceBits(bank_mask_same_rank, bank_id, bank_id, 1); + min_act_at_same_rank = act_at; + } + } else { + if (act_at <= min_act_at) { + // reset bank mask if new minimum is found + // and either previous minimum could not immediately send ACT + if (act_at < min_act_at && min_act_at > curTick()) + bank_mask = 0; + // set the bit corresponding to the available bank + replaceBits(bank_mask, bank_id, bank_id, 1); + min_act_at = act_at; + } } } } } + // Determine the earliest time when the next burst can issue based + // on the current busBusyUntil delay. + // Offset by tRCD to correlate with ACT timing variables + Tick min_cmd_at = busBusyUntil - tCL - tRCD; + + // Prioritize same rank accesses that can issue B2B + // Only optimize for same ranks when the command type + // does not change; do not want to unnecessarily incur tWTR + // + // Resulting FCFS prioritization Order is: + // 1) Commands that access the same rank as previous burst + // and can prep the bank seamlessly. + // 2) Commands (any rank) with earliest bank prep + if (!switched_cmd_type && same_rank_match && + min_act_at_same_rank <= min_cmd_at) { + bank_mask = bank_mask_same_rank; + } + return bank_mask; } diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh index ef8b47a2e..8dcbe817b 100644 --- a/src/mem/dram_ctrl.hh +++ b/src/mem/dram_ctrl.hh @@ -368,23 +368,36 @@ class DRAMCtrl : public AbstractMemory * The memory schduler/arbiter - picks which request needs to * go next, based on the specified policy such as FCFS or FR-FCFS * and moves it to the head of the queue. + * Prioritizes accesses to the same rank as previous burst unless + * controller is switching command type. + * + * @param queue Queued requests to consider + * @param switched_cmd_type Command type is changing */ - void chooseNext(std::deque& queue); + void chooseNext(std::deque& queue, bool switched_cmd_type); /** * For FR-FCFS policy reorder the read/write queue depending on row buffer * hits and earliest banks available in DRAM + * Prioritizes accesses to the same rank as previous burst unless + * controller is switching command type. + * + * @param queue Queued requests to consider + * @param switched_cmd_type Command type is changing */ - void reorderQueue(std::deque& queue); + void reorderQueue(std::deque& queue, bool switched_cmd_type); /** * Find which are the earliest banks ready to issue an activate * for the enqueued requests. Assumes maximum of 64 banks per DIMM + * Also checks if the bank is already prepped. * - * @param Queued requests to consider + * @param queue Queued requests to consider + * @param switched_cmd_type Command type is changing * @return One-hot encoded mask of bank indices */ - uint64_t minBankActAt(const std::deque& queue) const; + uint64_t minBankPrep(const std::deque& queue, + bool switched_cmd_type) const; /** * Keep track of when row activations happen, in order to enforce @@ -475,6 +488,7 @@ class DRAMCtrl : public AbstractMemory const Tick M5_CLASS_VAR_USED tCK; const Tick tWTR; const Tick tRTW; + const Tick tCS; const Tick tBURST; const Tick tRCD; const Tick tCL; @@ -664,6 +678,9 @@ class DRAMCtrl : public AbstractMemory // To track number of banks which are currently active unsigned int numBanksActive; + // Holds the value of the rank of burst issued + uint8_t activeRank; + /** @todo this is a temporary workaround until the 4-phase code is * committed. upstream caches needs this packet until true is returned, so * hold onto it for deletion until a subsequent call -- 2.30.2