mem: Add DDR4 bank group timing

author Wendy Elsasser <wendy.elsasser@arm.com>

Sat, 20 Sep 2014 21:18:21 +0000 (17:18 -0400)

committer Wendy Elsasser <wendy.elsasser@arm.com>

Sat, 20 Sep 2014 21:18:21 +0000 (17:18 -0400)
author Wendy Elsasser <wendy.elsasser@arm.com>
Sat, 20 Sep 2014 21:18:21 +0000 (17:18 -0400)
committer Wendy Elsasser <wendy.elsasser@arm.com>
Sat, 20 Sep 2014 21:18:21 +0000 (17:18 -0400)
diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py

index b06b8e7ebfea4ad5a3d571d4f479b887ee71a837..02fa4fa46c5af51e0022f750aa5efd86c7b1e9ff 100644 (file)
--- a/src/mem/DRAMCtrl.py
+++ b/src/mem/DRAMCtrl.py
@@ -111,6 +111,11 @@ class DRAMCtrl(AbstractMemory):
                                             "device/chip")
      devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
      ranks_per_channel = Param.Unsigned("Number of ranks per channel")
+
+    # default to 0 bank groups per rank, indicating bank group architecture
+    # is not used
+    # update per memory class when bank group architecture is supported
+    bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per rank")
      banks_per_rank = Param.Unsigned("Number of banks per rank")
      # only used for the address mapping as the controller by
      # construction is a single channel and multiple controllers have
@@ -147,8 +152,17 @@ class DRAMCtrl(AbstractMemory):
      # This parameter has to account for burst length.
      # Read/Write requests with data size larger than one full burst are broken
      # down into multiple requests in the controller
+    # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
+    # With bank group architectures, tBURST represents the CAS-to-CAS
+    # delay for bursts to different bank groups (tCCD_S)
      tBURST = Param.Latency("Burst duration (for DDR burst length / 2 cycles)")
  
+    # CAS-to-CAS delay for bursts to the same bank group
+    # only utilized with bank group architectures; set to 0 for default case
+    # tBURST is equivalent to tCCD_S; no explicit parameter required
+    # for CAS-to-CAS delay for bursts to different bank groups
+    tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay")
+
      # time taken to complete one refresh cycle (N rows in all banks)
      tRFC = Param.Latency("Refresh cycle time")
  
@@ -171,6 +185,9 @@ class DRAMCtrl(AbstractMemory):
      # minimum row activate to row activate delay time
      tRRD = Param.Latency("ACT to ACT delay")
  
+    # only utilized with bank group architectures; set to 0 for default case
+    tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay")
+
      # time window in which a maximum number of activates are allowed
      # to take place, set to 0 to disable
      tXAW = Param.Latency("X activation window")
@@ -274,6 +291,10 @@ class DDR4_2400_x64(DRAMCtrl):
      # Use a single rank
      ranks_per_channel = 1
  
+    # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
+    # Set to 4 for x4, x8 case
+    bank_groups_per_rank = 4
+
      # DDR4 has 16 banks (4 bank groups) in all
      # configurations. Currently we do not capture the additional
      # constraints incurred by the bank groups
@@ -283,16 +304,29 @@ class DDR4_2400_x64(DRAMCtrl):
      tCK = '0.833ns'
  
      # 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
+    # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
+    # With bank group architectures, tBURST represents the CAS-to-CAS
+    # delay for bursts to different bank groups (tCCD_S)
      tBURST = '3.333ns'
  
+    # @2400 data rate, tCCD_L is 6 CK
+    # CAS-to-CAS delay for bursts to the same bank group
+    # tBURST is equivalent to tCCD_S; no explicit parameter required
+    # for CAS-to-CAS delay for bursts to different bank groups
+    tCCD_L = '5ns';
+
      # DDR4-2400 17-17-17
      tRCD = '14.16ns'
      tCL = '14.16ns'
      tRP = '14.16ns'
      tRAS = '32ns'
  
-    # Here using the average of RRD_S and RRD_L
-    tRRD = '4.1ns'
+    # RRD_S (different bank group) for 1K page is MAX(4 CK, 3.3ns)
+    tRRD = '3.3ns'
+
+    # RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns)
+    tRRD_L = '4.9ns';
+
      tXAW = '21ns'
      activation_limit = 4
      tRFC = '260ns'
diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc

index ca562f4f741cf0aeaa05bc8a2377ba2068bf974e..38c240fcff216839a3b06085a10ad65bc8e83de7 100644 (file)
--- a/src/mem/dram_ctrl.cc
+++ b/src/mem/dram_ctrl.cc
@@ -69,6 +69,8 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
      columnsPerRowBuffer(rowBufferSize / burstSize),
      columnsPerStripe(range.granularity() / burstSize),
      ranksPerChannel(p->ranks_per_channel),
+    bankGroupsPerRank(p->bank_groups_per_rank),
+    bankGroupArch(p->bank_groups_per_rank > 0),
      banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0),
      readBufferSize(p->read_buffer_size),
      writeBufferSize(p->write_buffer_size),
@@ -77,9 +79,9 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
      minWritesPerSwitch(p->min_writes_per_switch),
      writesThisTime(0), readsThisTime(0),
      tCK(p->tCK), tWTR(p->tWTR), tRTW(p->tRTW), tCS(p->tCS), tBURST(p->tBURST),
-    tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS), tWR(p->tWR),
-    tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD),
-    tXAW(p->tXAW), activationLimit(p->activation_limit),
+    tCCD_L(p->tCCD_L), tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS),
+    tWR(p->tWR), tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD),
+    tRRD_L(p->tRRD_L), tXAW(p->tXAW), activationLimit(p->activation_limit),
      memSchedPolicy(p->mem_sched_policy), addrMapping(p->addr_mapping),
      pageMgmt(p->page_policy),
      maxAccessesPerRow(p->max_accesses_per_row),
@@ -104,6 +106,19 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
          for (int b = 0; b < banksPerRank; b++) {
              banks[r][b].rank = r;
              banks[r][b].bank = b;
+            if (bankGroupArch) {
+                // Simply assign lower bits to bank group in order to
+                // rotate across bank groups as banks are incremented
+                // e.g. with 4 banks per bank group and 16 banks total:
+                //    banks 0,4,8,12  are in bank group 0
+                //    banks 1,5,9,13  are in bank group 1
+                //    banks 2,6,10,14 are in bank group 2
+                //    banks 3,7,11,15 are in bank group 3
+                banks[r][b].bankgr = b % bankGroupsPerRank;
+            } else {
+                // No bank groups; simply assign to bank number
+                banks[r][b].bankgr = b;
+            }
          }
      }
  
@@ -168,6 +183,35 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
          fatal("tREFI (%d) must be larger than tRP (%d) and tRFC (%d)\n",
                tREFI, tRP, tRFC);
      }
+
+    // basic bank group architecture checks ->
+    if (bankGroupArch) {
+        // must have at least one bank per bank group
+        if (bankGroupsPerRank > banksPerRank) {
+            fatal("banks per rank (%d) must be equal to or larger than "
+                  "banks groups per rank (%d)\n",
+                  banksPerRank, bankGroupsPerRank);
+        }
+        // must have same number of banks in each bank group
+        if ((banksPerRank % bankGroupsPerRank) != 0) {
+            fatal("Banks per rank (%d) must be evenly divisible by bank groups "
+                  "per rank (%d) for equal banks per bank group\n",
+                  banksPerRank, bankGroupsPerRank);
+        }
+        // tCCD_L should be greater than minimal, back-to-back burst delay
+        if (tCCD_L <= tBURST) {
+            fatal("tCCD_L (%d) should be larger than tBURST (%d) when "
+                  "bank groups per rank (%d) is greater than 1\n",
+                  tCCD_L, tBURST, bankGroupsPerRank);
+        }
+        // tRRD_L is greater than minimal, same bank group ACT-to-ACT delay
+        if (tRRD_L <= tRRD) {
+            fatal("tRRD_L (%d) should be larger than tRRD (%d) when "
+                  "bank groups per rank (%d) is greater than 1\n",
+                  tRRD_L, tRRD, bankGroupsPerRank);
+        }
+    }
+
  }
  
  void
@@ -824,14 +868,25 @@ DRAMCtrl::activateBank(Bank& bank, Tick act_tick, uint32_t row)
      bank.preAllowedAt = act_tick + tRAS;
  
      // Respect the row-to-column command delay
-    bank.colAllowedAt = act_tick + tRCD;
+    bank.colAllowedAt = std::max(act_tick + tRCD, bank.colAllowedAt);
  
      // start by enforcing tRRD
      for(int i = 0; i < banksPerRank; i++) {
          // next activate to any bank in this rank must not happen
          // before tRRD
-        banks[rank][i].actAllowedAt = std::max(act_tick + tRRD,
-                                               banks[rank][i].actAllowedAt);
+        if (bankGroupArch && (bank.bankgr == banks[rank][i].bankgr)) {
+            // bank group architecture requires longer delays between
+            // ACT commands within the same bank group.  Use tRRD_L
+            // in this case
+            banks[rank][i].actAllowedAt = std::max(act_tick + tRRD_L,
+                                                   banks[rank][i].actAllowedAt);
+        } else {
+            // use shorter tRRD value when either
+            // 1) bank group architecture is not supportted
+            // 2) bank is in a different bank group
+            banks[rank][i].actAllowedAt = std::max(act_tick + tRRD,
+                                                   banks[rank][i].actAllowedAt);
+        }
      }
  
      // next, we deal with tXAW, if the activation limit is disabled
@@ -986,9 +1041,38 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
      // only one burst can use the bus at any one point in time
      assert(dram_pkt->readyTime - busBusyUntil >= tBURST);
  
-    // not strictly necessary, but update the time for the next
-    // read/write (add a max with tCCD here)
-    bank.colAllowedAt = cmd_at + tBURST;
+    // update the time for the next read/write burst for each
+    // bank (add a max with tCCD/tCCD_L here)
+    Tick cmd_dly;
+    for(int j = 0; j < ranksPerChannel; j++) {
+        for(int i = 0; i < banksPerRank; i++) {
+            // next burst to same bank group in this rank must not happen
+            // before tCCD_L.  Different bank group timing requirement is
+            // tBURST; Add tCS for different ranks
+            if (dram_pkt->rank == j) {
+                if (bankGroupArch && (bank.bankgr == banks[j][i].bankgr)) {
+                    // bank group architecture requires longer delays between
+                    // RD/WR burst commands to the same bank group.
+                    // Use tCCD_L in this case
+                    cmd_dly = tCCD_L;
+                } else {
+                    // use tBURST (equivalent to tCCD_S), the shorter
+                    // cas-to-cas delay value, when either:
+                    // 1) bank group architecture is not supportted
+                    // 2) bank is in a different bank group
+                    cmd_dly = tBURST;
+                }
+            } else {
+                // different rank is by default in a different bank group
+                // use tBURST (equivalent to tCCD_S), which is the shorter
+                // cas-to-cas delay in this case
+                // Add tCS to account for rank-to-rank bus delay requirements
+                cmd_dly = tBURST + tCS;
+            }
+            banks[j][i].colAllowedAt = std::max(cmd_at + cmd_dly,
+                                                banks[j][i].colAllowedAt);
+        }
+    }
  
      // Save rank of current access
      activeRank = dram_pkt->rank;
@@ -1184,15 +1268,8 @@ DRAMCtrl::processNextReqEvent()
              // that we are allowed to prepare a new bank, but not issue a
              // read command until after tWTR, in essence we capture a
              // bubble on the data bus that is tWTR + tCL
-            if (switched_cmd_type) {
-                // add a bubble to the data bus for write-to-read turn around
-                // or tCS (different rank bus delay).
-                busBusyUntil += (dram_pkt->rank == activeRank) ? tWTR + tCL :
-                                                                 tCS;
-            } else if (dram_pkt->rank != activeRank) {
-                // add a bubble to the data bus, as defined by the
-                // tCS parameter for rank-to-rank delay
-                busBusyUntil += tCS;
+            if (switched_cmd_type && dram_pkt->rank == activeRank) {
+                busBusyUntil += tWTR + tCL;
              }
  
              doDRAMAccess(dram_pkt);
@@ -1235,14 +1312,12 @@ DRAMCtrl::processNextReqEvent()
          // sanity check
          assert(dram_pkt->size <= burstSize);
  
-        if (switched_cmd_type) {
-            // add a bubble to the data bus, as defined by the
-            // tRTW or tCS parameter, depending on whether changing ranks
-            busBusyUntil += (dram_pkt->rank == activeRank) ? tRTW : tCS;
-        } else if (dram_pkt->rank != activeRank) {
-            // add a bubble to the data bus, as defined by the
-            // tCS parameter for rank-to-rank delay
-            busBusyUntil += tCS;
+        // add a bubble to the data bus, as defined by the
+        // tRTW when access is to the same rank as previous burst
+        // Different rank timing is handled with tCS, which is
+        // applied to colAllowedAt
+        if (switched_cmd_type && dram_pkt->rank == activeRank) {
+            busBusyUntil += tRTW;
          }
  
          doDRAMAccess(dram_pkt);
diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh

index 8dcbe817b6372f78616772fb6b643d662c3f28e4..cc2bd13fd2303ca941b96015dbf88e3ba0cb6b2c 100644 (file)
--- a/src/mem/dram_ctrl.hh
+++ b/src/mem/dram_ctrl.hh
@@ -158,6 +158,7 @@ class DRAMCtrl : public AbstractMemory
          uint32_t openRow;
          uint8_t rank;
          uint8_t bank;
+        uint8_t bankgr;
  
          Tick colAllowedAt;
          Tick preAllowedAt;
@@ -167,7 +168,7 @@ class DRAMCtrl : public AbstractMemory
          uint32_t bytesAccessed;
  
          Bank() :
-            openRow(NO_ROW), rank(0), bank(0),
+            openRow(NO_ROW), rank(0), bank(0), bankgr(0),
              colAllowedAt(0), preAllowedAt(0), actAllowedAt(0),
              rowAccesses(0), bytesAccessed(0)
          { }
@@ -470,6 +471,8 @@ class DRAMCtrl : public AbstractMemory
      const uint32_t columnsPerRowBuffer;
      const uint32_t columnsPerStripe;
      const uint32_t ranksPerChannel;
+    const uint32_t bankGroupsPerRank;
+    const bool bankGroupArch;
      const uint32_t banksPerRank;
      const uint32_t channels;
      uint32_t rowsPerBank;
@@ -490,6 +493,7 @@ class DRAMCtrl : public AbstractMemory
      const Tick tRTW;
      const Tick tCS;
      const Tick tBURST;
+    const Tick tCCD_L;
      const Tick tRCD;
      const Tick tCL;
      const Tick tRP;
@@ -499,6 +503,7 @@ class DRAMCtrl : public AbstractMemory
      const Tick tRFC;
      const Tick tREFI;
      const Tick tRRD;
+    const Tick tRRD_L;
      const Tick tXAW;
      const uint32_t activationLimit;
author	Wendy Elsasser <wendy.elsasser@arm.com>
	Sat, 20 Sep 2014 21:18:21 +0000 (17:18 -0400)
committer	Wendy Elsasser <wendy.elsasser@arm.com>
	Sat, 20 Sep 2014 21:18:21 +0000 (17:18 -0400)
src/mem/DRAMCtrl.py		patch \| blob \| history
src/mem/dram_ctrl.cc		patch \| blob \| history
src/mem/dram_ctrl.hh		patch \| blob \| history