mem: Modify DRAM controller for flexibility and new memories

author Wendy Elsasser <wendy.elsasser@arm.com>

Tue, 21 May 2019 19:38:53 +0000 (14:38 -0500)

committer Wendy Elsasser <wendy.elsasser@arm.com>

Thu, 9 Apr 2020 16:15:07 +0000 (16:15 +0000)
author Wendy Elsasser <wendy.elsasser@arm.com>
Tue, 21 May 2019 19:38:53 +0000 (14:38 -0500)
committer Wendy Elsasser <wendy.elsasser@arm.com>
Thu, 9 Apr 2020 16:15:07 +0000 (16:15 +0000)
diff --git a/configs/dram/sweep.py b/configs/dram/sweep.py

index c2650a72feea251ebf41947ec78632730d62be29..d3c86c3344cf20aff55b34e0c05e5bb40c3fdaf8 100644 (file)
--- a/configs/dram/sweep.py
+++ b/configs/dram/sweep.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2014-2015, 2018-2019 ARM Limited
+# Copyright (c) 2014-2015, 2018-2020 ARM Limited
  # All rights reserved.
  #
  # The license below extends only to copyright in the software and shall
@@ -146,7 +146,8 @@ page_size = system.mem_ctrls[0].devices_per_rank.value * \
  
  # match the maximum bandwidth of the memory, the parameter is in seconds
  # and we need it in ticks (ps)
-itt = system.mem_ctrls[0].tBURST.value * 1000000000000
+itt =  getattr(system.mem_ctrls[0].tBURST_MIN, 'value',
+               system.mem_ctrls[0].tBURST.value) * 1000000000000
  
  # assume we start at 0
  max_addr = mem_range.end
@@ -180,8 +181,8 @@ m5.instantiate()
  def trace():
      addr_map = ObjectList.dram_addr_map_list.get(options.addr_map)
      generator = dram_generators[options.mode](system.tgen)
-    for bank in range(1, nbr_banks + 1):
-        for stride_size in range(burst_size, max_stride + 1, burst_size):
+    for stride_size in range(burst_size, max_stride + 1, burst_size):
+        for bank in range(1, nbr_banks + 1):
              num_seq_pkts = int(math.ceil(float(stride_size) / burst_size))
              yield generator(period,
                              0, max_addr, burst_size, int(itt), int(itt),
@@ -194,5 +195,5 @@ system.tgen.start(trace())
  
  m5.simulate()
  
-print("DRAM sweep with burst: %d, banks: %d, max stride: %d" %
-    (burst_size, nbr_banks, max_stride))
+print("DRAM sweep with burst: %d, banks: %d, max stride: %d, request \
+       generation period: %d" % (burst_size, nbr_banks, max_stride, itt))
diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py

index 121d0042598f0896e2bed25c0f351106e2166c70..0f70dffece95029550eb25101a52de1dc2dea6f6 100644 (file)
--- a/src/mem/DRAMCtrl.py
+++ b/src/mem/DRAMCtrl.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2012-2019 ARM Limited
+# Copyright (c) 2012-2020 ARM Limited
  # All rights reserved.
  #
  # The license below extends only to copyright in the software and shall
@@ -171,7 +171,17 @@ class DRAMCtrl(QoSMemCtrl):
      # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
      # With bank group architectures, tBURST represents the CAS-to-CAS
      # delay for bursts to different bank groups (tCCD_S)
-    tBURST = Param.Latency("Burst duration (for DDR burst length / 2 cycles)")
+    tBURST = Param.Latency("Burst duration "
+                           "(typically burst length / 2 cycles)")
+
+    # tBURST_MAX is the column array cycle delay required before next access,
+    # which could be greater than tBURST when the memory access time is greater
+    # than tBURST
+    tBURST_MAX = Param.Latency(Self.tBURST, "Column access delay")
+
+    # tBURST_MIN is the minimum delay between bursts, which could be less than
+    # tBURST when interleaving is supported
+    tBURST_MIN = Param.Latency(Self.tBURST, "Minimim delay between bursts")
  
      # CAS-to-CAS delay for bursts to the same bank group
      # only utilized with bank group architectures; set to 0 for default case
@@ -196,6 +206,10 @@ class DRAMCtrl(QoSMemCtrl):
      # write-to-read, same rank turnaround penalty
      tWTR = Param.Latency("Write to read, same rank switching time")
  
+    # write-to-read, same rank turnaround penalty for same bank group
+    tWTR_L = Param.Latency(Self.tWTR, "Write to read, same rank switching "
+                           "time, same bank group")
+
      # read-to-write, same rank turnaround penalty
      tRTW = Param.Latency("Read to write, same rank switching time")
  
@@ -205,6 +219,16 @@ class DRAMCtrl(QoSMemCtrl):
      # different rank bus delay
      tCS = Param.Latency("Rank to rank switching time")
  
+    # minimum precharge to precharge delay time
+    tPPD = Param.Latency("0ns", "PRE to PRE delay")
+
+    # maximum delay between two-cycle ACT command phases
+    tAAD = Param.Latency(Self.tCK,
+                         "Maximum delay between two-cycle ACT commands")
+
+    two_cycle_activate = Param.Bool(False,
+                         "Two cycles required to send activate")
+
      # minimum row activate to row activate delay time
      tRRD = Param.Latency("ACT to ACT delay")
  
@@ -229,6 +253,11 @@ class DRAMCtrl(QoSMemCtrl):
      # time to exit self-refresh mode with locked DLL
      tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL")
  
+    # number of data beats per clock. with DDR, default is 2, one per edge
+    beats_per_clock = Param.Unsigned(2, "Data beats per clock")
+
+    data_clock_sync = Param.Bool(False, "Synchronization commands required")
+
      # Currently rolled into other params
      ######################################################################
  
@@ -1189,3 +1218,295 @@ class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
  
      # self refresh exit time
      tXS = '65ns'
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture
+# burst of 32, which means bursts can be interleaved
+class LPDDR5_5500_1x16_BG_BL32(DRAMCtrl):
+
+    # Increase buffer size to account for more bank resources
+    read_buffer_size = 64
+
+    # Set page policy to better suit DMC Huxley
+    page_policy = 'close_adaptive'
+
+    # 16-bit channel interface
+    device_bus_width = 16
+
+    # LPDDR5 is a BL16 or BL32 device
+    # With BG mode, BL16 and BL32 are supported
+    # Use BL32 for higher command bandwidth
+    burst_length = 32
+
+    # size of device in bytes
+    device_size = '1GB'
+
+    # 2kB page with BG mode
+    device_rowbuffer_size = '2kB'
+
+    # Use a 1x16 configuration
+    devices_per_rank = 1
+
+    # Use a single rank
+    ranks_per_channel = 1
+
+    # LPDDR5 supports configurable bank options
+    # 8B  : BL32, all frequencies
+    # 16B : BL32 or BL16, <=3.2Gbps
+    # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
+    # Initial configuration will have 16 banks with Bank Group Arch
+    # to maximim resources and enable higher data rates
+    banks_per_rank = 16
+    bank_groups_per_rank = 4
+
+    # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
+    tCK = '1.455ns'
+
+    # Greater of 2 CK or 18ns
+    tRCD = '18ns'
+
+    # Base RL is 16 CK @ 687.5 MHz = 23.28ns
+    tCL = '23.280ns'
+
+    # Greater of 2 CK or 18ns
+    tRP = '18ns'
+
+    # Greater of 3 CK or 42ns
+    tRAS = '42ns'
+
+    # Greater of 3 CK or 34ns
+    tWR = '34ns'
+
+    # active powerdown and precharge powerdown exit time
+    # Greater of 3 CK or 7ns
+    tXP = '7ns'
+
+    # self refresh exit time (tRFCab + 7.5ns)
+    tXS = '217.5ns'
+
+    # Greater of 2 CK or 7.5 ns minus 2 CK
+    tRTP = '4.59ns'
+
+    # With BG architecture, burst of 32 transferred in two 16-beat
+    # sub-bursts, with a 16-beat gap in between.
+    # Each 16-beat sub-burst is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
+    # tBURST is the delay to transfer the Bstof32 =  6 CK @ 687.5 MHz
+    tBURST = '8.73ns'
+    # can interleave a Bstof32 from another bank group at tBURST_MIN
+    # 16-beats is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
+    tBURST_MIN = '2.91ns'
+    # tBURST_MAX is the maximum burst delay for same bank group timing
+    # this is 8 CK @ 687.5 MHz
+    tBURST_MAX = '11.64ns'
+
+    # 8 CK @ 687.5 MHz
+    tCCD_L = "11.64ns"
+
+    # LPDDR5, 8 Gbit/channel for 280ns tRFCab
+    tRFC = '210ns'
+    tREFI = '3.9us'
+
+    # Greater of 4 CK or 6.25 ns
+    tWTR = '6.25ns'
+    # Greater of 4 CK or 12 ns
+    tWTR_L = '12ns'
+
+    # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
+    # tWCKDQ0/tCK will be 1 CK for most cases
+    # For gem5 RL = WL and BL/n is already accounted for with tBURST
+    # Result is and additional 1 CK is required
+    tRTW = '1.455ns'
+
+    # Default different rank bus delay to 2 CK, @687.5 MHz = 2.91 ns
+    tCS = '2.91ns'
+
+    # 2 CK
+    tPPD = '2.91ns'
+
+    # Greater of 2 CK or 5 ns
+    tRRD = '5ns'
+    tRRD_L = '5ns'
+
+    # With Bank Group Arch mode tFAW is 20 ns
+    tXAW = '20ns'
+    activation_limit = 4
+
+    # at 5Gbps, 4:1 WCK to CK ratio required
+    # 2 data beats per WCK (DDR) -> 8 per CK
+    beats_per_clock = 8
+
+    # 2 cycles required to send activate command
+    # 2 command phases can be sent back-to-back or
+    # with a gap up to tAAD = 8 CK
+    two_cycle_activate = True
+    tAAD = '11.640ns'
+
+    data_clock_sync = True
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture, burst of 16
+class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32):
+
+    # LPDDR5 is a BL16 or BL32 device
+    # With BG mode, BL16 and BL32 are supported
+    # Use BL16 for smaller access granularity
+    burst_length = 16
+
+    # For Bstof16 with BG arch, 2 CK @ 687.5 MHz with 4:1 clock ratio
+    tBURST = '2.91ns'
+    tBURST_MIN = '2.91ns'
+    # For Bstof16 with BG arch, 4 CK @ 687.5 MHz with 4:1 clock ratio
+    tBURST_MAX = '5.82ns'
+
+    # 4 CK @ 687.5 MHz
+    tCCD_L = "5.82ns"
+
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 8-bank mode, burst of 32
+class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32):
+
+    # 4kB page with 8B mode
+    device_rowbuffer_size = '4kB'
+
+    # LPDDR5 supports configurable bank options
+    # 8B  : BL32, all frequencies
+    # 16B : BL32 or BL16, <=3.2Gbps
+    # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
+    # Select 8B
+    banks_per_rank = 8
+    bank_groups_per_rank = 0
+
+    # For Bstof32 with 8B mode, 4 CK @ 687.5 MHz with 4:1 clock ratio
+    tBURST = '5.82ns'
+    tBURST_MIN = '5.82ns'
+    tBURST_MAX = '5.82ns'
+
+    # Greater of 4 CK or 12 ns
+    tWTR = '12ns'
+
+    # Greater of 2 CK or 10 ns
+    tRRD = '10ns'
+
+    # With 8B mode tFAW is 40 ns
+    tXAW = '40ns'
+    activation_limit = 4
+
+    # Reset BG arch timing for 8B mode
+    tCCD_L = "0ns"
+    tRRD_L = "0ns"
+    tWTR_L = "0ns"
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture
+# burst of 32, which means bursts can be interleaved
+class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32):
+
+    # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
+    tCK = '1.25ns'
+
+    # Base RL is 17 CK @ 800 MHz = 21.25ns
+    tCL = '21.25ns'
+
+    # With BG architecture, burst of 32 transferred in two 16-beat
+    # sub-bursts, with a 16-beat gap in between.
+    # Each 16-beat sub-burst is 8 WCK @3.2 GHz or 2 CK @ 800 MHz
+    # tBURST is the delay to transfer the Bstof32 =  6 CK @ 800 MHz
+    tBURST = '7.5ns'
+    # can interleave a Bstof32 from another bank group at tBURST_MIN
+    # 16-beats is 8 WCK @2.3 GHz or 2 CK @ 800 MHz
+    tBURST_MIN = '2.5ns'
+    # tBURST_MAX is the maximum burst delay for same bank group timing
+    # this is 8 CK @ 800 MHz
+    tBURST_MAX = '10ns'
+
+    # 8 CK @ 800 MHz
+    tCCD_L = "10ns"
+
+    # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
+    # tWCKDQ0/tCK will be 1 CK for most cases
+    # For gem5 RL = WL and BL/n is already accounted for with tBURST
+    # Result is and additional 1 CK is required
+    tRTW = '1.25ns'
+
+    # Default different rank bus delay to 2 CK, @687.5 MHz = 2.5 ns
+    tCS = '2.5ns'
+
+    # 2 CK
+    tPPD = '2.5ns'
+
+    # 2 command phases can be sent back-to-back or
+    # with a gap up to tAAD = 8 CK
+    tAAD = '10ns'
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on initial
+# JEDEC specifcation
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture, burst of 16
+class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32):
+
+    # LPDDR5 is a BL16 or BL32 device
+    # With BG mode, BL16 and BL32 are supported
+    # Use BL16 for smaller access granularity
+    burst_length = 16
+
+    # For Bstof16 with BG arch, 2 CK @ 800 MHz with 4:1 clock ratio
+    tBURST = '2.5ns'
+    tBURST_MIN = '2.5ns'
+    # For Bstof16 with BG arch, 4 CK @ 800 MHz with 4:1 clock ratio
+    tBURST_MAX = '5ns'
+
+    # 4 CK @ 800 MHz
+    tCCD_L = "5ns"
+
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 8-bank mode, burst of 32
+class LPDDR5_6400_1x16_8B_BL32(LPDDR5_6400_1x16_BG_BL32):
+
+    # 4kB page with 8B mode
+    device_rowbuffer_size = '4kB'
+
+    # LPDDR5 supports configurable bank options
+    # 8B  : BL32, all frequencies
+    # 16B : BL32 or BL16, <=3.2Gbps
+    # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
+    # Select 8B
+    banks_per_rank = 8
+    bank_groups_per_rank = 0
+
+    # For Bstof32 with 8B mode, 4 CK @ 800 MHz with 4:1 clock ratio
+    tBURST = '5ns'
+    tBURST_MIN = '5ns'
+    tBURST_MAX = '5ns'
+
+    # Greater of 4 CK or 12 ns
+    tWTR = '12ns'
+
+    # Greater of 2 CK or 10 ns
+    tRRD = '10ns'
+
+    # With 8B mode tFAW is 40 ns
+    tXAW = '40ns'
+    activation_limit = 4
+
+    # Reset BG arch timing for 8B mode
+    tCCD_L = "0ns"
+    tRRD_L = "0ns"
+    tWTR_L = "0ns"
diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc

index ed2be4b5fe9cab48641d8bc38e1861b97a95e3d6..0a8479eb89cc304d9f3d359737df8a4280a3d445 100644 (file)
--- a/src/mem/dram_ctrl.cc
+++ b/src/mem/dram_ctrl.cc
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2010-2019 ARM Limited
+ * Copyright (c) 2010-2020 ARM Limited
   * All rights reserved
   *
   * The license below extends only to copyright in the software and shall
@@ -77,12 +77,22 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
      minWritesPerSwitch(p->min_writes_per_switch),
      writesThisTime(0), readsThisTime(0),
      tCK(p->tCK), tRTW(p->tRTW), tCS(p->tCS), tBURST(p->tBURST),
+    tBURST_MIN(p->tBURST_MIN),
      tCCD_L_WR(p->tCCD_L_WR),
      tCCD_L(p->tCCD_L), tRCD(p->tRCD), tCL(p->tCL), tRP(p->tRP), tRAS(p->tRAS),
      tWR(p->tWR), tRTP(p->tRTP), tRFC(p->tRFC), tREFI(p->tREFI), tRRD(p->tRRD),
-    tRRD_L(p->tRRD_L), tXAW(p->tXAW), tXP(p->tXP), tXS(p->tXS),
+    tRRD_L(p->tRRD_L), tPPD(p->tPPD), tAAD(p->tAAD), tXAW(p->tXAW),
+    tXP(p->tXP), tXS(p->tXS),
+    clkResyncDelay(tCL + p->tBURST_MAX),
+    maxCommandsPerBurst(burstLength / p->beats_per_clock),
+    dataClockSync(p->data_clock_sync),
+    twoCycleActivate(p->two_cycle_activate),
      activationLimit(p->activation_limit), rankToRankDly(tCS + tBURST),
      wrToRdDly(tCL + tBURST + p->tWTR), rdToWrDly(tRTW + tBURST),
+    wrToRdDlySameBG(tCL + p->tBURST_MAX + p->tWTR_L),
+    rdToWrDlySameBG(tRTW + p->tBURST_MAX),
+    burstInterleave(tBURST != tBURST_MIN),
+    burstDataCycles(burstInterleave ? p->tBURST_MAX / 2 : tBURST),
      memSchedPolicy(p->mem_sched_policy), addrMapping(p->addr_mapping),
      pageMgmt(p->page_policy),
      maxAccessesPerRow(p->max_accesses_per_row),
@@ -104,7 +114,6 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) :
      readQueue.resize(p->qos_priorities);
      writeQueue.resize(p->qos_priorities);
  
-
      for (int i = 0; i < ranksPerChannel; i++) {
          Rank* rank = new Rank(*this, p, i);
          ranks.push_back(rank);
@@ -668,7 +677,8 @@ DRAMCtrl::processRespondEvent()
      // track if this is the last packet before idling
      // and that there are no outstanding commands to this rank
      if (dram_pkt->rankRef.isQueueEmpty() &&
-        dram_pkt->rankRef.outstandingEvents == 0 && enableDRAMPowerdown) {
+        dram_pkt->rankRef.outstandingEvents == 0 &&
+        dram_pkt->rankRef.inRefIdleState() && enableDRAMPowerdown) {
          // verify that there are no events scheduled
          assert(!dram_pkt->rankRef.activateEvent.scheduled());
          assert(!dram_pkt->rankRef.prechargeEvent.scheduled());
@@ -720,6 +730,11 @@ DRAMCtrl::processRespondEvent()
  
              DPRINTF(Drain, "DRAM controller done draining\n");
              signalDrainDone();
+        } else if ((dram_pkt->rankRef.refreshState == REF_PRE) &&
+                   !dram_pkt->rankRef.prechargeEvent.scheduled()) {
+            // kick the refresh event loop into action again if banks already
+            // closed and just waiting for read to complete
+            schedule(dram_pkt->rankRef.refreshEvent, curTick());
          }
      }
  
@@ -803,8 +818,8 @@ DRAMCtrl::chooseNextFRFCFS(DRAMPacketQueue& queue, Tick extra_col_delay)
          const Tick col_allowed_at = dram_pkt->isRead() ? bank.rdAllowedAt :
                                                           bank.wrAllowedAt;
  
-        DPRINTF(DRAM, "%s checking packet in bank %d\n",
-                __func__, dram_pkt->bankRef.bank);
+        DPRINTF(DRAM, "%s checking packet in bank %d, row %d\n",
+                __func__, dram_pkt->bankRef.bank, dram_pkt->row);
  
          // check if rank is not doing a refresh and thus is available, if not,
          // jump to the next packet
@@ -913,13 +928,127 @@ DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency)
      return;
  }
  
+void
+DRAMCtrl::pruneBurstTick()
+{
+    auto it = burstTicks.begin();
+    while (it != burstTicks.end()) {
+        auto current_it = it++;
+        if (curTick() > *current_it) {
+            DPRINTF(DRAM, "Removing burstTick for %d\n", *current_it);
+            burstTicks.erase(current_it);
+        }
+    }
+}
+
+Tick
+DRAMCtrl::getBurstWindow(Tick cmd_tick)
+{
+    // get tick aligned to burst window
+    Tick burst_offset = cmd_tick % burstDataCycles;
+    return (cmd_tick - burst_offset);
+}
+
+Tick
+DRAMCtrl::verifySingleCmd(Tick cmd_tick)
+{
+    // start with assumption that there is no contention on command bus
+    Tick cmd_at = cmd_tick;
+
+    // get tick aligned to burst window
+    Tick burst_tick = getBurstWindow(cmd_tick);
+
+    // verify that we have command bandwidth to issue the command
+    // if not, iterate over next window(s) until slot found
+    while (burstTicks.count(burst_tick) >= maxCommandsPerBurst) {
+        DPRINTF(DRAM, "Contention found on command bus at %d\n", burst_tick);
+        burst_tick += burstDataCycles;
+        cmd_at = burst_tick;
+    }
+
+    // add command into burst window and return corresponding Tick
+    burstTicks.insert(burst_tick);
+    return cmd_at;
+}
+
+Tick
+DRAMCtrl::verifyMultiCmd(Tick cmd_tick, Tick max_multi_cmd_split)
+{
+    // start with assumption that there is no contention on command bus
+    Tick cmd_at = cmd_tick;
+
+    // get tick aligned to burst window
+    Tick burst_tick = getBurstWindow(cmd_tick);
+
+    // Command timing requirements are from 2nd command
+    // Start with assumption that 2nd command will issue at cmd_at and
+    // find prior slot for 1st command to issue
+    // Given a maximum latency of max_multi_cmd_split between the commands,
+    // find the burst at the maximum latency prior to cmd_at
+    Tick burst_offset = 0;
+    Tick first_cmd_offset = cmd_tick % burstDataCycles;
+    while (max_multi_cmd_split > (first_cmd_offset + burst_offset)) {
+        burst_offset += burstDataCycles;
+    }
+    // get the earliest burst aligned address for first command
+    // ensure that the time does not go negative
+    Tick first_cmd_tick = burst_tick - std::min(burst_offset, burst_tick);
+
+    // Can required commands issue?
+    bool first_can_issue = false;
+    bool second_can_issue = false;
+    // verify that we have command bandwidth to issue the command(s)
+    while (!first_can_issue || !second_can_issue) {
+        bool same_burst = (burst_tick == first_cmd_tick);
+        auto first_cmd_count = burstTicks.count(first_cmd_tick);
+        auto second_cmd_count = same_burst ? first_cmd_count + 1 :
+                                   burstTicks.count(burst_tick);
+
+        first_can_issue = first_cmd_count < maxCommandsPerBurst;
+        second_can_issue = second_cmd_count < maxCommandsPerBurst;
+
+        if (!second_can_issue) {
+            DPRINTF(DRAM, "Contention (cmd2) found on command bus at %d\n",
+                    burst_tick);
+            burst_tick += burstDataCycles;
+            cmd_at = burst_tick;
+        }
+
+        // Verify max_multi_cmd_split isn't violated when command 2 is shifted
+        // If commands initially were issued in same burst, they are
+        // now in consecutive bursts and can still issue B2B
+        bool gap_violated = !same_burst &&
+             ((burst_tick - first_cmd_tick) > max_multi_cmd_split);
+
+        if (!first_can_issue || (!second_can_issue && gap_violated)) {
+            DPRINTF(DRAM, "Contention (cmd1) found on command bus at %d\n",
+                    first_cmd_tick);
+            first_cmd_tick += burstDataCycles;
+        }
+    }
+
+    // Add command to burstTicks
+    burstTicks.insert(burst_tick);
+    burstTicks.insert(first_cmd_tick);
+
+    return cmd_at;
+}
+
  void
  DRAMCtrl::activateBank(Rank& rank_ref, Bank& bank_ref,
                         Tick act_tick, uint32_t row)
  {
      assert(rank_ref.actTicks.size() == activationLimit);
  
-    DPRINTF(DRAM, "Activate at tick %d\n", act_tick);
+    // verify that we have command bandwidth to issue the activate
+    // if not, shift to next burst window
+    Tick act_at;
+    if (twoCycleActivate)
+        act_at = verifyMultiCmd(act_tick, tAAD);
+    else
+        act_at = verifySingleCmd(act_tick);
+
+    DPRINTF(DRAM, "Activate at tick %d\n", act_at);
  
      // update the open row
      assert(bank_ref.openRow == Bank::NO_ROW);
@@ -935,21 +1064,21 @@ DRAMCtrl::activateBank(Rank& rank_ref, Bank& bank_ref,
      assert(rank_ref.numBanksActive <= banksPerRank);
  
      DPRINTF(DRAM, "Activate bank %d, rank %d at tick %lld, now got %d active\n",
-            bank_ref.bank, rank_ref.rank, act_tick,
+            bank_ref.bank, rank_ref.rank, act_at,
              ranks[rank_ref.rank]->numBanksActive);
  
      rank_ref.cmdList.push_back(Command(MemCommand::ACT, bank_ref.bank,
-                               act_tick));
+                               act_at));
  
-    DPRINTF(DRAMPower, "%llu,ACT,%d,%d\n", divCeil(act_tick, tCK) -
+    DPRINTF(DRAMPower, "%llu,ACT,%d,%d\n", divCeil(act_at, tCK) -
              timeStampOffset, bank_ref.bank, rank_ref.rank);
  
      // The next access has to respect tRAS for this bank
-    bank_ref.preAllowedAt = act_tick + tRAS;
+    bank_ref.preAllowedAt = act_at + tRAS;
  
      // Respect the row-to-column command delay for both read and write cmds
-    bank_ref.rdAllowedAt = std::max(act_tick + tRCD, bank_ref.rdAllowedAt);
-    bank_ref.wrAllowedAt = std::max(act_tick + tRCD, bank_ref.wrAllowedAt);
+    bank_ref.rdAllowedAt = std::max(act_at + tRCD, bank_ref.rdAllowedAt);
+    bank_ref.wrAllowedAt = std::max(act_at + tRCD, bank_ref.wrAllowedAt);
  
      // start by enforcing tRRD
      for (int i = 0; i < banksPerRank; i++) {
@@ -959,13 +1088,13 @@ DRAMCtrl::activateBank(Rank& rank_ref, Bank& bank_ref,
              // bank group architecture requires longer delays between
              // ACT commands within the same bank group.  Use tRRD_L
              // in this case
-            rank_ref.banks[i].actAllowedAt = std::max(act_tick + tRRD_L,
+            rank_ref.banks[i].actAllowedAt = std::max(act_at + tRRD_L,
                                               rank_ref.banks[i].actAllowedAt);
          } else {
              // use shorter tRRD value when either
              // 1) bank group architecture is not supportted
              // 2) bank is in a different bank group
-            rank_ref.banks[i].actAllowedAt = std::max(act_tick + tRRD,
+            rank_ref.banks[i].actAllowedAt = std::max(act_at + tRRD,
                                               rank_ref.banks[i].actAllowedAt);
          }
      }
@@ -975,10 +1104,10 @@ DRAMCtrl::activateBank(Rank& rank_ref, Bank& bank_ref,
      if (!rank_ref.actTicks.empty()) {
          // sanity check
          if (rank_ref.actTicks.back() &&
-           (act_tick - rank_ref.actTicks.back()) < tXAW) {
+           (act_at - rank_ref.actTicks.back()) < tXAW) {
              panic("Got %d activates in window %d (%llu - %llu) which "
-                  "is smaller than %llu\n", activationLimit, act_tick -
-                  rank_ref.actTicks.back(), act_tick,
+                  "is smaller than %llu\n", activationLimit, act_at -
+                  rank_ref.actTicks.back(), act_at,
                    rank_ref.actTicks.back(), tXAW);
          }
  
@@ -987,13 +1116,13 @@ DRAMCtrl::activateBank(Rank& rank_ref, Bank& bank_ref,
          rank_ref.actTicks.pop_back();
  
          // record an new activation (in the future)
-        rank_ref.actTicks.push_front(act_tick);
+        rank_ref.actTicks.push_front(act_at);
  
          // cannot activate more than X times in time window tXAW, push the
          // next one (the X + 1'st activate) to be tXAW away from the
          // oldest in our window of X
          if (rank_ref.actTicks.back() &&
-           (act_tick - rank_ref.actTicks.back()) < tXAW) {
+           (act_at - rank_ref.actTicks.back()) < tXAW) {
              DPRINTF(DRAM, "Enforcing tXAW with X = %d, next activate "
                      "no earlier than %llu\n", activationLimit,
                      rank_ref.actTicks.back() + tXAW);
@@ -1008,14 +1137,15 @@ DRAMCtrl::activateBank(Rank& rank_ref, Bank& bank_ref,
      // at the point when this activate takes place, make sure we
      // transition to the active power state
      if (!rank_ref.activateEvent.scheduled())
-        schedule(rank_ref.activateEvent, act_tick);
-    else if (rank_ref.activateEvent.when() > act_tick)
+        schedule(rank_ref.activateEvent, act_at);
+    else if (rank_ref.activateEvent.when() > act_at)
          // move it sooner in time
-        reschedule(rank_ref.activateEvent, act_tick);
+        reschedule(rank_ref.activateEvent, act_at);
  }
  
  void
-DRAMCtrl::prechargeBank(Rank& rank_ref, Bank& bank, Tick pre_at, bool trace)
+DRAMCtrl::prechargeBank(Rank& rank_ref, Bank& bank, Tick pre_tick,
+                        bool auto_or_preall, bool trace)
  {
      // make sure the bank has an open row
      assert(bank.openRow != Bank::NO_ROW);
@@ -1026,8 +1156,21 @@ DRAMCtrl::prechargeBank(Rank& rank_ref, Bank& bank, Tick pre_at, bool trace)
  
      bank.openRow = Bank::NO_ROW;
  
-    // no precharge allowed before this one
-    bank.preAllowedAt = pre_at;
+    Tick pre_at = pre_tick;
+    if (auto_or_preall) {
+        // no precharge allowed before this one
+        bank.preAllowedAt = pre_at;
+    } else {
+        // Issuing an explicit PRE command
+        // Verify that we have command bandwidth to issue the precharge
+        // if not, shift to next burst window
+        pre_at = verifySingleCmd(pre_tick);
+        // enforce tPPD
+        for (int i = 0; i < banksPerRank; i++) {
+            rank_ref.banks[i].preAllowedAt = std::max(pre_at + tPPD,
+                                             rank_ref.banks[i].preAllowedAt);
+        }
+    }
  
      Tick pre_done_at = pre_at + tRP;
  
@@ -1047,6 +1190,7 @@ DRAMCtrl::prechargeBank(Rank& rank_ref, Bank& bank, Tick pre_at, bool trace)
          DPRINTF(DRAMPower, "%llu,PRE,%d,%d\n", divCeil(pre_at, tCK) -
                  timeStampOffset, bank.bank, rank_ref.rank);
      }
+
      // if we look at the current number of active banks we might be
      // tempted to think the DRAM is now idle, however this can be
      // undone by an activate that is scheduled to happen before we
@@ -1068,6 +1212,10 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
      DPRINTF(DRAM, "Timing access to addr %lld, rank/bank/row %d %d %d\n",
              dram_pkt->addr, dram_pkt->rank, dram_pkt->bank, dram_pkt->row);
  
+    // first clean up the burstTick set, removing old entries
+    // before adding new entries for next burst
+    pruneBurstTick();
+
      // get the rank
      Rank& rank = dram_pkt->rankRef;
  
@@ -1113,9 +1261,36 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
      // the command; need minimum of tBURST between commands
      Tick cmd_at = std::max({col_allowed_at, nextBurstAt, curTick()});
  
+    // verify that we have command bandwidth to issue the burst
+    // if not, shift to next burst window
+    if (dataClockSync && ((cmd_at - rank.lastBurstTick) > clkResyncDelay))
+        cmd_at = verifyMultiCmd(cmd_at, tCK);
+    else
+        cmd_at = verifySingleCmd(cmd_at);
+
+    // if we are interleaving bursts, ensure that
+    // 1) we don't double interleave on next burst issue
+    // 2) we are at an interleave boundary; if not, shift to next boundary
+    Tick burst_gap = tBURST_MIN;
+    if (burstInterleave) {
+        if (cmd_at == (rank.lastBurstTick + tBURST_MIN)) {
+            // already interleaving, push next command to end of full burst
+            burst_gap = tBURST;
+        } else if (cmd_at < (rank.lastBurstTick + tBURST)) {
+            // not at an interleave boundary after bandwidth check
+            // Shift command to tBURST boundary to avoid data contention
+            // Command will remain in the same burstTicks window given that
+            // tBURST is less than tBURST_MAX
+            cmd_at = rank.lastBurstTick + tBURST;
+        }
+    }
+    DPRINTF(DRAM, "Schedule RD/WR burst at tick %d\n", cmd_at);
+
      // update the packet ready time
      dram_pkt->readyTime = cmd_at + tCL + tBURST;
  
+    rank.lastBurstTick = cmd_at;
+
      // update the time for the next read/write burst for each
      // bank (add a max with tCCD/tCCD_L/tCCD_L_WR here)
      Tick dly_to_rd_cmd;
@@ -1134,14 +1309,15 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
                      // tCCD_L_WR is required for write-to-write
                      // Need to also take bus turnaround delays into account
                      dly_to_rd_cmd = dram_pkt->isRead() ?
-                                    tCCD_L : std::max(tCCD_L, wrToRdDly);
+                                    tCCD_L : std::max(tCCD_L, wrToRdDlySameBG);
                      dly_to_wr_cmd = dram_pkt->isRead() ?
-                                    std::max(tCCD_L, rdToWrDly) : tCCD_L_WR;
+                                    std::max(tCCD_L, rdToWrDlySameBG) :
+                                    tCCD_L_WR;
                  } else {
                      // tBURST is default requirement for diff BG timing
                      // Need to also take bus turnaround delays into account
-                    dly_to_rd_cmd = dram_pkt->isRead() ? tBURST : wrToRdDly;
-                    dly_to_wr_cmd = dram_pkt->isRead() ? rdToWrDly : tBURST;
+                    dly_to_rd_cmd = dram_pkt->isRead() ? burst_gap : wrToRdDly;
+                    dly_to_wr_cmd = dram_pkt->isRead() ? rdToWrDly : burst_gap;
                  }
              } else {
                  // different rank is by default in a different bank group and
@@ -1236,8 +1412,7 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
                                                     MemCommand::WR;
  
      // Update bus state to reflect when previous command was issued
-    nextBurstAt = cmd_at + tBURST;
-
+    nextBurstAt = cmd_at + burst_gap;
      DPRINTF(DRAM, "Access to %lld, ready at %lld next burst at %lld.\n",
              dram_pkt->addr, dram_pkt->readyTime, nextBurstAt);
  
@@ -1252,7 +1427,8 @@ DRAMCtrl::doDRAMAccess(DRAMPacket* dram_pkt)
      if (auto_precharge) {
          // if auto-precharge push a PRE command at the correct tick to the
          // list used by DRAMPower library to calculate power
-        prechargeBank(rank, bank, std::max(curTick(), bank.preAllowedAt));
+        prechargeBank(rank, bank, std::max(curTick(), bank.preAllowedAt),
+                      true);
  
          DPRINTF(DRAM, "Auto-precharged bank: %d\n", dram_pkt->bankId);
      }
@@ -1689,7 +1865,7 @@ DRAMCtrl::Rank::Rank(DRAMCtrl& _memory, const DRAMCtrlParams* _p, int rank)
        refreshState(REF_IDLE), inLowPowerState(false), rank(rank),
        readEntries(0), writeEntries(0), outstandingEvents(0),
        wakeUpAllowedAt(0), power(_p, false), banks(_p->banks_per_rank),
-      numBanksActive(0), actTicks(_p->activation_limit, 0),
+      numBanksActive(0), actTicks(_p->activation_limit, 0), lastBurstTick(0),
        writeDoneEvent([this]{ processWriteDoneEvent(); }, name()),
        activateEvent([this]{ processActivateEvent(); }, name()),
        prechargeEvent([this]{ processPrechargeEvent(); }, name()),
@@ -1922,7 +2098,7 @@ DRAMCtrl::Rank::processRefreshEvent()
  
              for (auto &b : banks) {
                  if (b.openRow != Bank::NO_ROW) {
-                    memory.prechargeBank(*this, b, pre_at, false);
+                    memory.prechargeBank(*this, b, pre_at, true, false);
                  } else {
                      b.actAllowedAt = std::max(b.actAllowedAt, act_allowed_at);
                      b.preAllowedAt = std::max(b.preAllowedAt, pre_at);
@@ -1946,17 +2122,21 @@ DRAMCtrl::Rank::processRefreshEvent()
          } else {
              // banks state is closed but haven't transitioned pwrState to IDLE
              // or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled
-            // should have outstanding precharge event in this case
-            assert(prechargeEvent.scheduled());
+            // should have outstanding precharge or read response event
+            assert(prechargeEvent.scheduled() ||
+                   memory.respondEvent.scheduled());
              // will start refresh when pwrState transitions to IDLE
          }
  
          assert(numBanksActive == 0);
  
-        // wait for all banks to be precharged, at which point the
-        // power state machine will transition to the idle state, and
-        // automatically move to a refresh, at that point it will also
-        // call this method to get the refresh event loop going again
+        // wait for all banks to be precharged or read to complete
+        // When precharge commands are done, power state machine will
+        // transition to the idle state, and automatically move to a
+        // refresh, at that point it will also call this method to get
+        // the refresh event loop going again
+        // Similarly, when read response completes, if all banks are
+        // precharged, will call this method to get loop re-started
          return;
      }
  
@@ -2612,7 +2792,8 @@ DRAMCtrl::DRAMStats::regStats()
      avgWrBW = (bytesWritten / 1000000) / simSeconds;
      avgRdBWSys = (bytesReadSys / 1000000) / simSeconds;
      avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds;
-    peakBW = (SimClock::Frequency / dram.tBURST) * dram.burstSize / 1000000;
+    peakBW = (SimClock::Frequency / dram.burstDataCycles) *
+              dram.burstSize / 1000000;
  
      busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
  
diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh

index 8e026f5278a27ee5a7cf5a4072f2e014ad9c1ca9..0fe78da4e4ff55ce9b2929f9ec4d9fca7e283abe 100644 (file)
--- a/src/mem/dram_ctrl.hh
+++ b/src/mem/dram_ctrl.hh
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2012-2019 ARM Limited
+ * Copyright (c) 2012-2020 ARM Limited
   * All rights reserved
   *
   * The license below extends only to copyright in the software and shall
@@ -446,6 +446,11 @@ class DRAMCtrl : public QoS::MemCtrl
          /** List to keep track of activate ticks */
          std::deque<Tick> actTicks;
  
+        /**
+         * Track when we issued the last read/write burst
+         */
+        Tick lastBurstTick;
+
          Rank(DRAMCtrl& _memory, const DRAMCtrlParams* _p, int rank);
  
          const std::string name() const
@@ -862,6 +867,46 @@ class DRAMCtrl : public QoS::MemCtrl
      std::pair<std::vector<uint32_t>, bool>
      minBankPrep(const DRAMPacketQueue& queue, Tick min_col_at) const;
  
+    /**
+     * Remove commands that have already issued from burstTicks
+     */
+    void pruneBurstTick();
+
+    /**
+     * Calculate burst window aligned tick
+     *
+     * @param cmd_tick Initial tick of command
+     * @return burst window aligned tick
+     */
+    Tick getBurstWindow(Tick cmd_tick);
+
+    /**
+     * Check for command bus contention for single cycle command.
+     * If there is contention, shift command to next burst.
+     * Check verifies that the commands issued per burst is less
+     * than a defined max number, maxCommandsPerBurst.
+     * Therefore, contention per cycle is not verified and instead
+     * is done based on a burst window.
+     *
+     * @param cmd_tick Initial tick of command, to be verified
+     * @return tick for command issue without contention
+     */
+    Tick verifySingleCmd(Tick cmd_tick);
+
+    /**
+     * Check for command bus contention for multi-cycle (2 currently)
+     * command. If there is contention, shift command(s) to next burst.
+     * Check verifies that the commands issued per burst is less
+     * than a defined max number, maxCommandsPerBurst.
+     * Therefore, contention per cycle is not verified and instead
+     * is done based on a burst window.
+     *
+     * @param cmd_tick Initial tick of command, to be verified
+     * @param max_multi_cmd_split Maximum delay between commands
+     * @return tick for command issue without contention
+     */
+    Tick verifyMultiCmd(Tick cmd_tick, Tick max_multi_cmd_split = 0);
+
      /**
       * Keep track of when row activations happen, in order to enforce
       * the maximum number of activations in the activation window. The
@@ -883,11 +928,13 @@ class DRAMCtrl : public QoS::MemCtrl
       *
       * @param rank_ref The rank to precharge
       * @param bank_ref The bank to precharge
-     * @param pre_at Time when the precharge takes place
+     * @param pre_tick Time when the precharge takes place
+     * @param auto_or_preall Is this an auto-precharge or precharge all command
       * @param trace Is this an auto precharge then do not add to trace
       */
      void prechargeBank(Rank& rank_ref, Bank& bank_ref,
-                       Tick pre_at, bool trace = true);
+                       Tick pre_tick, bool auto_or_preall = false,
+                       bool trace = true);
  
      /**
       * Used for debugging to observe the contents of the queues.
@@ -928,6 +975,13 @@ class DRAMCtrl : public QoS::MemCtrl
       */
      std::deque<DRAMPacket*> respQueue;
  
+    /**
+     * Holds count of commands issued in burst window starting at
+     * defined Tick. This is used to ensure that the command bandwidth
+     * does not exceed the allowable media constraints.
+     */
+    std::unordered_multiset<Tick> burstTicks;
+
      /**
       * Vector of ranks
       */
@@ -969,6 +1023,7 @@ class DRAMCtrl : public QoS::MemCtrl
      const Tick tRTW;
      const Tick tCS;
      const Tick tBURST;
+    const Tick tBURST_MIN;
      const Tick tCCD_L_WR;
      const Tick tCCD_L;
      const Tick tRCD;
@@ -981,13 +1036,23 @@ class DRAMCtrl : public QoS::MemCtrl
      const Tick tREFI;
      const Tick tRRD;
      const Tick tRRD_L;
+    const Tick tPPD;
+    const Tick tAAD;
      const Tick tXAW;
      const Tick tXP;
      const Tick tXS;
+    const Tick clkResyncDelay;
+    unsigned int maxCommandsPerBurst;
+    const bool dataClockSync;
+    const uint8_t twoCycleActivate;
      const uint32_t activationLimit;
      const Tick rankToRankDly;
      const Tick wrToRdDly;
      const Tick rdToWrDly;
+    const Tick wrToRdDlySameBG;
+    const Tick rdToWrDlySameBG;
+    const bool burstInterleave;
+    const Tick burstDataCycles;
  
      /**
       * Memory controller configuration initialized based on parameter
diff --git a/src/mem/drampower.cc b/src/mem/drampower.cc

index 05107919c9d061bfa6e8a9e6ebc51048c2dbd40e..f5069282b5fd31390d74a62d6442bae76e5361ef 100644 (file)
--- a/src/mem/drampower.cc
+++ b/src/mem/drampower.cc
@@ -150,10 +150,10 @@ DRAMPower::hasTwoVDD(const DRAMCtrlParams* p)
  uint8_t
  DRAMPower::getDataRate(const DRAMCtrlParams* p)
  {
-    uint32_t burst_cycles = divCeil(p->tBURST, p->tCK);
+    uint32_t burst_cycles = divCeil(p->tBURST_MAX, p->tCK);
      uint8_t data_rate = p->burst_length / burst_cycles;
      // 4 for GDDR5
-    if (data_rate != 1 && data_rate != 2 && data_rate != 4)
-        fatal("Got unexpected data rate %d, should be 1 or 2 or 4\n");
+    if (data_rate != 1 && data_rate != 2 && data_rate != 4 && data_rate != 8)
+        fatal("Got unexpected data rate %d, should be 1 or 2 or 4 or 8\n");
      return data_rate;
  }
author	Wendy Elsasser <wendy.elsasser@arm.com>
	Tue, 21 May 2019 19:38:53 +0000 (14:38 -0500)
committer	Wendy Elsasser <wendy.elsasser@arm.com>
	Thu, 9 Apr 2020 16:15:07 +0000 (16:15 +0000)
configs/dram/sweep.py		patch \| blob \| history
src/mem/DRAMCtrl.py		patch \| blob \| history
src/mem/dram_ctrl.cc		patch \| blob \| history
src/mem/dram_ctrl.hh		patch \| blob \| history
src/mem/drampower.cc		patch \| blob \| history