From: Wendy Elsasser Date: Sat, 20 Sep 2014 21:18:21 +0000 (-0400) Subject: mem: Add DDR4 bank group timing X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=bf238470726b4cc5c0b34fcb349d767726fe53bc;p=gem5.git mem: Add DDR4 bank group timing Added the following parameter to the DRAMCtrl class: - bank_groups_per_rank This defaults to 1. For the DDR4 case, the default is overridden to indicate bank group architecture, with multiple bank groups per rank. Added the following delays to the DRAMCtrl class: - tCCD_L : CAS-to-CAS, same bank group delay - tRRD_L : RAS-to-RAS, same bank group delay These parameters are only applied when bank group timing is enabled. Bank group timing is currently enabled only for DDR4 memories. For all other memories, these delays will default to '0 ns' In the DRAM controller model, applied the bank group timing to the per bank parameters actAllowedAt and colAllowedAt. The actAllowedAt will be updated based on bank group when an ACT is issued. The colAllowedAt will be updated based on bank group when a RD/WR burst is issued. At the moment no modifications are made to the scheduling. --- diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py index b06b8e7eb..02fa4fa46 100644 --- a/src/mem/DRAMCtrl.py +++ b/src/mem/DRAMCtrl.py @@ -111,6 +111,11 @@ class DRAMCtrl(AbstractMemory): "device/chip") devices_per_rank = Param.Unsigned("Number of devices/chips per rank") ranks_per_channel = Param.Unsigned("Number of ranks per channel") + + # default to 0 bank groups per rank, indicating bank group architecture + # is not used + # update per memory class when bank group architecture is supported + bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per rank") banks_per_rank = Param.Unsigned("Number of banks per rank") # only used for the address mapping as the controller by # construction is a single channel and multiple controllers have @@ -147,8 +152,17 @@ class DRAMCtrl(AbstractMemory): # This parameter has to account for burst length. # Read/Write requests with data size larger than one full burst are broken # down into multiple requests in the controller + # tBURST is equivalent to the CAS-to-CAS delay (tCCD) + # With bank group architectures, tBURST represents the CAS-to-CAS + # delay for bursts to different bank groups (tCCD_S) tBURST = Param.Latency("Burst duration (for DDR burst length / 2 cycles)") + # CAS-to-CAS delay for bursts to the same bank group + # only utilized with bank group architectures; set to 0 for default case + # tBURST is equivalent to tCCD_S; no explicit parameter required + # for CAS-to-CAS delay for bursts to different bank groups + tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay") + # time taken to complete one refresh cycle (N rows in all banks) tRFC = Param.Latency("Refresh cycle time") @@ -171,6 +185,9 @@ class DRAMCtrl(AbstractMemory): # minimum row activate to row activate delay time tRRD = Param.Latency("ACT to ACT delay") + # only utilized with bank group architectures; set to 0 for default case + tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay") + # time window in which a maximum number of activates are allowed # to take place, set to 0 to disable tXAW = Param.Latency("X activation window") @@ -274,6 +291,10 @@ class DDR4_2400_x64(DRAMCtrl): # Use a single rank ranks_per_channel = 1 + # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups + # Set to 4 for x4, x8 case + bank_groups_per_rank = 4 + # DDR4 has 16 banks (4 bank groups) in all # configurations. Currently we do not capture the additional # constraints incurred by the bank groups @@ -283,16 +304,29 @@ class DDR4_2400_x64(DRAMCtrl): tCK = '0.833ns' # 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz + # tBURST is equivalent to the CAS-to-CAS delay (tCCD) + # With bank group architectures, tBURST represents the CAS-to-CAS + # delay for bursts to different bank groups (tCCD_S) tBURST = '3.333ns' + # @2400 data rate, tCCD_L is 6 CK + # CAS-to-CAS delay for bursts to the same bank group + # tBURST is equivalent to tCCD_S; no explicit parameter required + # for CAS-to-CAS delay for bursts to different bank groups + tCCD_L = '5ns'; + # DDR4-2400 17-17-17 tRCD = '14.16ns' tCL = '14.16ns' tRP = '14.16ns' tRAS = '32ns' - # Here using the average of RRD_S and RRD_L - tRRD = '4.1ns' + # RRD_S (different bank group) for 1K page is MAX(4 CK, 3.3ns) + tRRD = '3.3ns' + + # RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns) + tRRD_L = '4.9ns'; + tXAW = '21ns' activation_limit = 4 tRFC = '260ns' diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc index ca562f4f7..38c240fcf 100644 --- a/src/mem/dram_ctrl.cc +++ b/src/mem/dram_ctrl.cc @@ -69,6 +69,8 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : columnsPerRowBuffer(rowBufferSize / burstSize), columnsPerStripe(range.granularity() / burstSize), ranksPerChannel(p->ranks_per_channel), + bankGroupsPerRank(p->bank_groups_per_rank), + bankGroupArch(p->bank_groups_per_rank > 0), banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0), readBufferSize(p->read_buffer_size), writeBufferSize(p->write_buffer_size), @@ -77,9 +79,9 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : minWritesPerSwitch(p->min_writes_per_switch), writesThisTime(0), readsThisTime(0), tCK(p->tCK), tWTR(p->tWTR), tRTW(p->tRTW), tCS(p->tCS), tBURST(p->tBURST), - tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS), tWR(p->tWR), - tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD), - tXAW(p->tXAW), activationLimit(p->activation_limit), + tCCD_L(p->tCCD_L), tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS), + tWR(p->tWR), tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD), + tRRD_L(p->tRRD_L), tXAW(p->tXAW), activationLimit(p->activation_limit), memSchedPolicy(p->mem_sched_policy), addrMapping(p->addr_mapping), pageMgmt(p->page_policy), maxAccessesPerRow(p->max_accesses_per_row), @@ -104,6 +106,19 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : for (int b = 0; b < banksPerRank; b++) { banks[r][b].rank = r; banks[r][b].bank = b; + if (bankGroupArch) { + // Simply assign lower bits to bank group in order to + // rotate across bank groups as banks are incremented + // e.g. with 4 banks per bank group and 16 banks total: + // banks 0,4,8,12 are in bank group 0 + // banks 1,5,9,13 are in bank group 1 + // banks 2,6,10,14 are in bank group 2 + // banks 3,7,11,15 are in bank group 3 + banks[r][b].bankgr = b % bankGroupsPerRank; + } else { + // No bank groups; simply assign to bank number + banks[r][b].bankgr = b; + } } } @@ -168,6 +183,35 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n", tREFI, tRP, tRFC); } + + // basic bank group architecture checks -> + if (bankGroupArch) { + // must have at least one bank per bank group + if (bankGroupsPerRank > banksPerRank) { + fatal("banks per rank (%d) must be equal to or larger than " + "banks groups per rank (%d)\n", + banksPerRank, bankGroupsPerRank); + } + // must have same number of banks in each bank group + if ((banksPerRank % bankGroupsPerRank) != 0) { + fatal("Banks per rank (%d) must be evenly divisible by bank groups " + "per rank (%d) for equal banks per bank group\n", + banksPerRank, bankGroupsPerRank); + } + // tCCD_L should be greater than minimal, back-to-back burst delay + if (tCCD_L <= tBURST) { + fatal("tCCD_L (%d) should be larger than tBURST (%d) when " + "bank groups per rank (%d) is greater than 1\n", + tCCD_L, tBURST, bankGroupsPerRank); + } + // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay + if (tRRD_L <= tRRD) { + fatal("tRRD_L (%d) should be larger than tRRD (%d) when " + "bank groups per rank (%d) is greater than 1\n", + tRRD_L, tRRD, bankGroupsPerRank); + } + } + } void @@ -824,14 +868,25 @@ DRAMCtrl::activateBank(Bank& bank, Tick act_tick, uint32_t row) bank.preAllowedAt = act_tick + tRAS; // Respect the row-to-column command delay - bank.colAllowedAt = act_tick + tRCD; + bank.colAllowedAt = std::max(act_tick + tRCD, bank.colAllowedAt); // start by enforcing tRRD for(int i = 0; i < banksPerRank; i++) { // next activate to any bank in this rank must not happen // before tRRD - banks[rank][i].actAllowedAt = std::max(act_tick + tRRD, - banks[rank][i].actAllowedAt); + if (bankGroupArch && (bank.bankgr == banks[rank][i].bankgr)) { + // bank group architecture requires longer delays between + // ACT commands within the same bank group. Use tRRD_L + // in this case + banks[rank][i].actAllowedAt = std::max(act_tick + tRRD_L, + banks[rank][i].actAllowedAt); + } else { + // use shorter tRRD value when either + // 1) bank group architecture is not supportted + // 2) bank is in a different bank group + banks[rank][i].actAllowedAt = std::max(act_tick + tRRD, + banks[rank][i].actAllowedAt); + } } // next, we deal with tXAW, if the activation limit is disabled @@ -986,9 +1041,38 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt) // only one burst can use the bus at any one point in time assert(dram_pkt->readyTime - busBusyUntil >= tBURST); - // not strictly necessary, but update the time for the next - // read/write (add a max with tCCD here) - bank.colAllowedAt = cmd_at + tBURST; + // update the time for the next read/write burst for each + // bank (add a max with tCCD/tCCD_L here) + Tick cmd_dly; + for(int j = 0; j < ranksPerChannel; j++) { + for(int i = 0; i < banksPerRank; i++) { + // next burst to same bank group in this rank must not happen + // before tCCD_L. Different bank group timing requirement is + // tBURST; Add tCS for different ranks + if (dram_pkt->rank == j) { + if (bankGroupArch && (bank.bankgr == banks[j][i].bankgr)) { + // bank group architecture requires longer delays between + // RD/WR burst commands to the same bank group. + // Use tCCD_L in this case + cmd_dly = tCCD_L; + } else { + // use tBURST (equivalent to tCCD_S), the shorter + // cas-to-cas delay value, when either: + // 1) bank group architecture is not supportted + // 2) bank is in a different bank group + cmd_dly = tBURST; + } + } else { + // different rank is by default in a different bank group + // use tBURST (equivalent to tCCD_S), which is the shorter + // cas-to-cas delay in this case + // Add tCS to account for rank-to-rank bus delay requirements + cmd_dly = tBURST + tCS; + } + banks[j][i].colAllowedAt = std::max(cmd_at + cmd_dly, + banks[j][i].colAllowedAt); + } + } // Save rank of current access activeRank = dram_pkt->rank; @@ -1184,15 +1268,8 @@ DRAMCtrl::processNextReqEvent() // that we are allowed to prepare a new bank, but not issue a // read command until after tWTR, in essence we capture a // bubble on the data bus that is tWTR + tCL - if (switched_cmd_type) { - // add a bubble to the data bus for write-to-read turn around - // or tCS (different rank bus delay). - busBusyUntil += (dram_pkt->rank == activeRank) ? tWTR + tCL : - tCS; - } else if (dram_pkt->rank != activeRank) { - // add a bubble to the data bus, as defined by the - // tCS parameter for rank-to-rank delay - busBusyUntil += tCS; + if (switched_cmd_type && dram_pkt->rank == activeRank) { + busBusyUntil += tWTR + tCL; } doDRAMAccess(dram_pkt); @@ -1235,14 +1312,12 @@ DRAMCtrl::processNextReqEvent() // sanity check assert(dram_pkt->size <= burstSize); - if (switched_cmd_type) { - // add a bubble to the data bus, as defined by the - // tRTW or tCS parameter, depending on whether changing ranks - busBusyUntil += (dram_pkt->rank == activeRank) ? tRTW : tCS; - } else if (dram_pkt->rank != activeRank) { - // add a bubble to the data bus, as defined by the - // tCS parameter for rank-to-rank delay - busBusyUntil += tCS; + // add a bubble to the data bus, as defined by the + // tRTW when access is to the same rank as previous burst + // Different rank timing is handled with tCS, which is + // applied to colAllowedAt + if (switched_cmd_type && dram_pkt->rank == activeRank) { + busBusyUntil += tRTW; } doDRAMAccess(dram_pkt); diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh index 8dcbe817b..cc2bd13fd 100644 --- a/src/mem/dram_ctrl.hh +++ b/src/mem/dram_ctrl.hh @@ -158,6 +158,7 @@ class DRAMCtrl : public AbstractMemory uint32_t openRow; uint8_t rank; uint8_t bank; + uint8_t bankgr; Tick colAllowedAt; Tick preAllowedAt; @@ -167,7 +168,7 @@ class DRAMCtrl : public AbstractMemory uint32_t bytesAccessed; Bank() : - openRow(NO_ROW), rank(0), bank(0), + openRow(NO_ROW), rank(0), bank(0), bankgr(0), colAllowedAt(0), preAllowedAt(0), actAllowedAt(0), rowAccesses(0), bytesAccessed(0) { } @@ -470,6 +471,8 @@ class DRAMCtrl : public AbstractMemory const uint32_t columnsPerRowBuffer; const uint32_t columnsPerStripe; const uint32_t ranksPerChannel; + const uint32_t bankGroupsPerRank; + const bool bankGroupArch; const uint32_t banksPerRank; const uint32_t channels; uint32_t rowsPerBank; @@ -490,6 +493,7 @@ class DRAMCtrl : public AbstractMemory const Tick tRTW; const Tick tCS; const Tick tBURST; + const Tick tCCD_L; const Tick tRCD; const Tick tCL; const Tick tRP; @@ -499,6 +503,7 @@ class DRAMCtrl : public AbstractMemory const Tick tRFC; const Tick tREFI; const Tick tRRD; + const Tick tRRD_L; const Tick tXAW; const uint32_t activationLimit;