from common import ObjectList
from common import HMC
-def create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size,\
+def create_mem_intf(intf, r, i, nbr_mem_ctrls, intlv_bits, intlv_size,
xor_low_bit):
"""
Helper function for creating a single memoy controller from the given
# Create an instance so we can figure out the address
# mapping and row-buffer size
- ctrl = cls()
+ interface = intf()
# Only do this for DRAMs
- if issubclass(cls, m5.objects.DRAMCtrl):
+ if issubclass(intf, m5.objects.DRAMInterface):
# If the channel bits are appearing after the column
# bits, we need to add the appropriate number of bits
# for the row buffer size
- if ctrl.addr_mapping.value == 'RoRaBaChCo':
+ if interface.addr_mapping.value == 'RoRaBaChCo':
# This computation only really needs to happen
# once, but as we rely on having an instance we
# end up having to repeat it for each and every
# one
- rowbuffer_size = ctrl.device_rowbuffer_size.value * \
- ctrl.devices_per_rank.value
+ rowbuffer_size = interface.device_rowbuffer_size.value * \
+ interface.devices_per_rank.value
intlv_low_bit = int(math.log(rowbuffer_size, 2))
# We got all we need to configure the appropriate address
# range
- ctrl.range = m5.objects.AddrRange(r.start, size = r.size(),
+ interface.range = m5.objects.AddrRange(r.start, size = r.size(),
intlvHighBit = \
intlv_low_bit + intlv_bits - 1,
xorHighBit = xor_high_bit,
intlvBits = intlv_bits,
intlvMatch = i)
- return ctrl
+ return interface
def config_mem(options, system):
"""
if 2 ** intlv_bits != nbr_mem_ctrls:
fatal("Number of memory channels must be a power of 2")
- cls = ObjectList.mem_list.get(opt_mem_type)
+ intf = ObjectList.mem_list.get(opt_mem_type)
mem_ctrls = []
- if opt_elastic_trace_en and not issubclass(cls, m5.objects.SimpleMemory):
+ if opt_elastic_trace_en and not issubclass(intf, m5.objects.SimpleMemory):
fatal("When elastic trace is enabled, configure mem-type as "
"simple-mem.")
intlv_size = max(opt_mem_channels_intlv, system.cache_line_size.value)
# For every range (most systems will only have one), create an
- # array of controllers and set their parameters to match their
- # address mapping in the case of a DRAM
+ # array of memory interfaces and set their parameters to match
+ # their address mapping in the case of a DRAM
for r in system.mem_ranges:
for i in range(nbr_mem_ctrls):
- mem_ctrl = create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits,
+ # Create the DRAM interface
+ dram_intf = create_mem_intf(intf, r, i, nbr_mem_ctrls, intlv_bits,
intlv_size, opt_xor_low_bit)
+
# Set the number of ranks based on the command-line
# options if it was explicitly set
- if issubclass(cls, m5.objects.DRAMCtrl) and opt_mem_ranks:
- mem_ctrl.ranks_per_channel = opt_mem_ranks
+ if issubclass(intf, m5.objects.DRAMInterface) and opt_mem_ranks:
+ dram_intf.ranks_per_channel = opt_mem_ranks
# Enable low-power DRAM states if option is set
- if issubclass(cls, m5.objects.DRAMCtrl):
- mem_ctrl.enable_dram_powerdown = opt_dram_powerdown
+ if issubclass(intf, m5.objects.DRAMInterface):
+ dram_intf.enable_dram_powerdown = opt_dram_powerdown
if opt_elastic_trace_en:
- mem_ctrl.latency = '1ns'
+ dram_intf.latency = '1ns'
print("For elastic trace, over-riding Simple Memory "
"latency to 1ns.")
+ # Create the controller that will drive the interface
+ if opt_mem_type == "HMC_2500_1x32":
+ # The static latency of the vault controllers is estimated
+ # to be smaller than a full DRAM channel controller
+ mem_ctrl = m5.objects.DRAMCtrl(min_writes_per_switch = 8,
+ static_backend_latency = '4ns',
+ static_frontend_latency = '4ns')
+ else:
+ mem_ctrl = m5.objects.DRAMCtrl()
+
+ # Hookup the controller to the interface and add to the list
+ mem_ctrl.dram = dram_intf
mem_ctrls.append(mem_ctrl)
- subsystem.mem_ctrls = mem_ctrls
-
- # Connect the controllers to the membus
- for i in range(len(subsystem.mem_ctrls)):
+ # Create a controller and connect the interfaces to a controller
+ for i in range(len(mem_ctrls)):
if opt_mem_type == "HMC_2500_1x32":
- subsystem.mem_ctrls[i].port = xbar[i/4].master
+ # Connect the controllers to the membus
+ mem_ctrls[i].port = xbar[i/4].master
# Set memory device size. There is an independent controller for
# each vault. All vaults are same size.
- subsystem.mem_ctrls[i].device_size = options.hmc_dev_vault_size
+ mem_ctrls[i].dram.device_size = options.hmc_dev_vault_size
else:
- subsystem.mem_ctrls[i].port = xbar.master
+ # Connect the controllers to the membus
+ mem_ctrls[i].port = xbar.master
+
+ subsystem.mem_ctrls = mem_ctrls
+
# Sanity check for memory controller class.
if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl):
- fatal("This script assumes the memory is a DRAMCtrl subclass")
+ fatal("This script assumes the controller is a DRAMCtrl subclass")
+if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface):
+ fatal("This script assumes the memory is a DRAMInterface subclass")
# There is no point slowing things down by saving any data.
-system.mem_ctrls[0].null = True
+system.mem_ctrls[0].dram.null = True
+
+# enable DRAM low power states
+system.mem_ctrls[0].dram.enable_dram_powerdown = True
# Set the address mapping based on input argument
-system.mem_ctrls[0].addr_mapping = args.addr_map
-system.mem_ctrls[0].page_policy = args.page_policy
+system.mem_ctrls[0].dram.addr_mapping = args.addr_map
+system.mem_ctrls[0].dram.page_policy = args.page_policy
# We create a traffic generator state for each param combination we want to
# test. Each traffic generator state is specified in the config file and the
cfg_file = open(cfg_file_path, 'w')
# Get the number of banks
-nbr_banks = int(system.mem_ctrls[0].banks_per_rank.value)
+nbr_banks = int(system.mem_ctrls[0].dram.banks_per_rank.value)
# determine the burst size in bytes
-burst_size = int((system.mem_ctrls[0].devices_per_rank.value *
- system.mem_ctrls[0].device_bus_width.value *
- system.mem_ctrls[0].burst_length.value) / 8)
+burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
+ system.mem_ctrls[0].dram.device_bus_width.value *
+ system.mem_ctrls[0].dram.burst_length.value) / 8)
# next, get the page size in bytes (the rowbuffer size is already in bytes)
-page_size = system.mem_ctrls[0].devices_per_rank.value * \
- system.mem_ctrls[0].device_rowbuffer_size.value
+page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
+ system.mem_ctrls[0].dram.device_rowbuffer_size.value
# Inter-request delay should be such that we can hit as many transitions
# to/from low power states as possible to. We provide a min and max itt to the
# traffic generator and it randomises in the range. The parameter is in
# seconds and we need it in ticks (ps).
-itt_min = system.mem_ctrls[0].tBURST.value * 1000000000000
+itt_min = system.mem_ctrls[0].dram.tBURST.value * 1000000000000
#The itt value when set to (tRAS + tRP + tCK) covers the case where
# a read command is delayed beyond the delay from ACT to PRE_PDN entry of the
# between a write and power down entry will be tRCD + tCL + tWR + tRP + tCK.
# As we use this delay as a unit and create multiples of it as bigger delays
# for the sweep, this parameter works for reads, writes and mix of them.
-pd_entry_time = (system.mem_ctrls[0].tRAS.value +
- system.mem_ctrls[0].tRP.value +
- system.mem_ctrls[0].tCK.value) * 1000000000000
+pd_entry_time = (system.mem_ctrls[0].dram.tRAS.value +
+ system.mem_ctrls[0].dram.tRP.value +
+ system.mem_ctrls[0].dram.tCK.value) * 1000000000000
# We sweep itt max using the multipliers specified by the user.
itt_max_str = args.itt_list.strip().split()
# the following assumes that we are using the native DRAM
# controller, check to be sure
if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl):
- fatal("This script assumes the memory is a DRAMCtrl subclass")
+ fatal("This script assumes the controller is a DRAMCtrl subclass")
+if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface):
+ fatal("This script assumes the memory is a DRAMInterface subclass")
# there is no point slowing things down by saving any data
-system.mem_ctrls[0].null = True
+system.mem_ctrls[0].dram.null = True
# Set the address mapping based on input argument
-system.mem_ctrls[0].addr_mapping = options.addr_map
+system.mem_ctrls[0].dram.addr_mapping = options.addr_map
# stay in each state for 0.25 ms, long enough to warm things up, and
# short enough to avoid hitting a refresh
# the DRAM maximum bandwidth to ensure that it is saturated
# get the number of banks
-nbr_banks = system.mem_ctrls[0].banks_per_rank.value
+nbr_banks = system.mem_ctrls[0].dram.banks_per_rank.value
# determine the burst length in bytes
-burst_size = int((system.mem_ctrls[0].devices_per_rank.value *
- system.mem_ctrls[0].device_bus_width.value *
- system.mem_ctrls[0].burst_length.value) / 8)
+burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value *
+ system.mem_ctrls[0].dram.device_bus_width.value *
+ system.mem_ctrls[0].dram.burst_length.value) / 8)
# next, get the page size in bytes
-page_size = system.mem_ctrls[0].devices_per_rank.value * \
- system.mem_ctrls[0].device_rowbuffer_size.value
+page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \
+ system.mem_ctrls[0].dram.device_rowbuffer_size.value
# match the maximum bandwidth of the memory, the parameter is in seconds
# and we need it in ticks (ps)
-itt = getattr(system.mem_ctrls[0].tBURST_MIN, 'value',
- system.mem_ctrls[0].tBURST.value) * 1000000000000
+itt = getattr(system.mem_ctrls[0].dram.tBURST_MIN, 'value',
+ system.mem_ctrls[0].dram.tBURST.value) * 1000000000000
# assume we start at 0
max_addr = mem_range.end
proto_tester = TrafficGen(config_file = cfg_file_path)
# Set up the system along with a DRAM controller
-system = System(physmem = DDR3_1600_8x8())
+system = System(physmem = DRAMCtrl(dram = DDR3_1600_8x8()))
system.voltage_domain = VoltageDomain(voltage = '1V')
system.cpu.interrupts[0].int_slave = system.membus.master
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
# Connect the system up to the membus
system.system_port = system.membus.slave
# Create a DDR3 memory controller
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
# Create a process for a simple "Hello World" application
system.cpu.interrupts[0].int_slave = system.membus.master
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
# Connect the system up to the membus
system.cpu.interrupts[0].int_slave = system.membus.master
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
system.mem_ctrl.port = system.membus.master
# Connect the system up to the membus
system.cpu = [TimingSimpleCPU() for i in range(2)]
# Create a DDR3 memory controller and connect it to the membus
-system.mem_ctrl = DDR3_1600_8x8()
-system.mem_ctrl.range = system.mem_ranges[0]
+system.mem_ctrl = DRAMCtrl()
+system.mem_ctrl.dram = DDR3_1600_8x8()
+system.mem_ctrl.dram.range = system.mem_ranges[0]
# create the interrupt controller for the CPU and connect to the membus
for cpu in system.cpu:
dir_ranges = []
for r in system.mem_ranges:
mem_type = ObjectList.mem_list.get(options.mem_type)
- mem_ctrl = MemConfig.create_mem_ctrl(mem_type, r, index,
+ dram_intf = MemConfig.create_mem_intf(mem_type, r, index,
options.num_dirs, int(math.log(options.num_dirs, 2)),
intlv_size, options.xor_low_bit)
+ mem_ctrl = m5.objects.DRAMCtrl(dram = dram_intf)
if options.access_backing_store:
mem_ctrl.kvm_map=False
mem_ctrls.append(mem_ctrl)
- dir_ranges.append(mem_ctrl.range)
+ dir_ranges.append(mem_ctrl.dram.range)
if crossbar != None:
mem_ctrl.port = crossbar.master
from m5.params import *
from m5.proxy import *
-from m5.objects.AbstractMemory import *
from m5.objects.QoSMemCtrl import *
# Enum for memory scheduling algorithms, currently First-Come
# First-Served and a First-Row Hit then First-Come First-Served
class MemSched(Enum): vals = ['fcfs', 'frfcfs']
-# Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting
-# channel, rank, bank, row and column, respectively, and going from
-# MSB to LSB. Available are RoRaBaChCo and RoRaBaCoCh, that are
-# suitable for an open-page policy, optimising for sequential accesses
-# hitting in the open row. For a closed-page policy, RoCoRaBaCh
-# maximises parallelism.
-class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh']
-
-# Enum for the page policy, either open, open_adaptive, close, or
-# close_adaptive.
-class PageManage(Enum): vals = ['open', 'open_adaptive', 'close',
- 'close_adaptive']
-
# DRAMCtrl is a single-channel single-ported DRAM controller model
# that aims to model the most important system-level performance
# effects of a DRAM without getting into too much detail of the DRAM
# bus in front of the controller for multiple ports
port = SlavePort("Slave port")
- # the basic configuration of the controller architecture, note
- # that each entry corresponds to a burst for the specific DRAM
- # configuration (e.g. x32 with burst length 8 is 32 bytes) and not
- # the cacheline size or request/packet size
- write_buffer_size = Param.Unsigned(64, "Number of write queue entries")
- read_buffer_size = Param.Unsigned(32, "Number of read queue entries")
+ # Interface to volatile, DRAM media
+ dram = Param.DRAMInterface("DRAM interface")
+
+ # read and write buffer depths are set in the interface
+ # the controller will read these values when instantiated
# threshold in percent for when to forcefully trigger writes and
# start emptying the write buffer
# scheduler, address map and page policy
mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy")
- addr_mapping = Param.AddrMap('RoRaBaCoCh', "Address mapping policy")
- page_policy = Param.PageManage('open_adaptive', "Page management policy")
-
- # enforce a limit on the number of accesses per row
- max_accesses_per_row = Param.Unsigned(16, "Max accesses per row before "
- "closing");
-
- # size of DRAM Chip in Bytes
- device_size = Param.MemorySize("Size of DRAM chip")
# pipeline latency of the controller and PHY, split into a
# frontend part and a backend part, with reads and writes serviced
# serviced by the memory seeing the sum of the two
static_frontend_latency = Param.Latency("10ns", "Static frontend latency")
static_backend_latency = Param.Latency("10ns", "Static backend latency")
-
- # the physical organisation of the DRAM
- device_bus_width = Param.Unsigned("data bus width in bits for each DRAM "\
- "device/chip")
- burst_length = Param.Unsigned("Burst lenght (BL) in beats")
- device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
- "device/chip")
- devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
- ranks_per_channel = Param.Unsigned("Number of ranks per channel")
-
- # default to 0 bank groups per rank, indicating bank group architecture
- # is not used
- # update per memory class when bank group architecture is supported
- bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per rank")
- banks_per_rank = Param.Unsigned("Number of banks per rank")
-
- # Enable DRAM powerdown states if True. This is False by default due to
- # performance being lower when enabled
- enable_dram_powerdown = Param.Bool(False, "Enable powerdown states")
-
- # For power modelling we need to know if the DRAM has a DLL or not
- dll = Param.Bool(True, "DRAM has DLL or not")
-
- # DRAMPower provides in addition to the core power, the possibility to
- # include RD/WR termination and IO power. This calculation assumes some
- # default values. The integration of DRAMPower with gem5 does not include
- # IO and RD/WR termination power by default. This might be added as an
- # additional feature in the future.
-
- # timing behaviour and constraints - all in nanoseconds
-
- # the base clock period of the DRAM
- tCK = Param.Latency("Clock period")
-
- # the amount of time in nanoseconds from issuing an activate command
- # to the data being available in the row buffer for a read/write
- tRCD = Param.Latency("RAS to CAS delay")
-
- # the time from issuing a read/write command to seeing the actual data
- tCL = Param.Latency("CAS latency")
-
- # minimum time between a precharge and subsequent activate
- tRP = Param.Latency("Row precharge time")
-
- # minimum time between an activate and a precharge to the same row
- tRAS = Param.Latency("ACT to PRE delay")
-
- # minimum time between a write data transfer and a precharge
- tWR = Param.Latency("Write recovery time")
-
- # minimum time between a read and precharge command
- tRTP = Param.Latency("Read to precharge")
-
- # time to complete a burst transfer, typically the burst length
- # divided by two due to the DDR bus, but by making it a parameter
- # it is easier to also evaluate SDR memories like WideIO.
- # This parameter has to account for burst length.
- # Read/Write requests with data size larger than one full burst are broken
- # down into multiple requests in the controller
- # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
- # With bank group architectures, tBURST represents the CAS-to-CAS
- # delay for bursts to different bank groups (tCCD_S)
- tBURST = Param.Latency("Burst duration "
- "(typically burst length / 2 cycles)")
-
- # tBURST_MAX is the column array cycle delay required before next access,
- # which could be greater than tBURST when the memory access time is greater
- # than tBURST
- tBURST_MAX = Param.Latency(Self.tBURST, "Column access delay")
-
- # tBURST_MIN is the minimum delay between bursts, which could be less than
- # tBURST when interleaving is supported
- tBURST_MIN = Param.Latency(Self.tBURST, "Minimim delay between bursts")
-
- # CAS-to-CAS delay for bursts to the same bank group
- # only utilized with bank group architectures; set to 0 for default case
- # tBURST is equivalent to tCCD_S; no explicit parameter required
- # for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay")
-
- # Write-to-Write delay for bursts to the same bank group
- # only utilized with bank group architectures; set to 0 for default case
- # This will be used to enable different same bank group delays
- # for writes versus reads
- tCCD_L_WR = Param.Latency(Self.tCCD_L,
- "Same bank group Write to Write delay")
-
- # time taken to complete one refresh cycle (N rows in all banks)
- tRFC = Param.Latency("Refresh cycle time")
-
- # refresh command interval, how often a "ref" command needs
- # to be sent. It is 7.8 us for a 64ms refresh requirement
- tREFI = Param.Latency("Refresh command interval")
-
- # write-to-read, same rank turnaround penalty
- tWTR = Param.Latency("Write to read, same rank switching time")
-
- # write-to-read, same rank turnaround penalty for same bank group
- tWTR_L = Param.Latency(Self.tWTR, "Write to read, same rank switching "
- "time, same bank group")
-
- # read-to-write, same rank turnaround penalty
- tRTW = Param.Latency("Read to write, same rank switching time")
-
- # rank-to-rank bus delay penalty
- # this does not correlate to a memory timing parameter and encompasses:
- # 1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD
- # different rank bus delay
- tCS = Param.Latency("Rank to rank switching time")
-
- # minimum precharge to precharge delay time
- tPPD = Param.Latency("0ns", "PRE to PRE delay")
-
- # maximum delay between two-cycle ACT command phases
- tAAD = Param.Latency(Self.tCK,
- "Maximum delay between two-cycle ACT commands")
-
- two_cycle_activate = Param.Bool(False,
- "Two cycles required to send activate")
-
- # minimum row activate to row activate delay time
- tRRD = Param.Latency("ACT to ACT delay")
-
- # only utilized with bank group architectures; set to 0 for default case
- tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay")
-
- # time window in which a maximum number of activates are allowed
- # to take place, set to 0 to disable
- tXAW = Param.Latency("X activation window")
- activation_limit = Param.Unsigned("Max number of activates in window")
-
- # time to exit power-down mode
- # Exit power-down to next valid command delay
- tXP = Param.Latency("0ns", "Power-up Delay")
-
- # Exit Powerdown to commands requiring a locked DLL
- tXPDLL = Param.Latency("0ns", "Power-up Delay with locked DLL")
-
- # time to exit self-refresh mode
- tXS = Param.Latency("0ns", "Self-refresh exit latency")
-
- # time to exit self-refresh mode with locked DLL
- tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL")
-
- # number of data beats per clock. with DDR, default is 2, one per edge
- beats_per_clock = Param.Unsigned(2, "Data beats per clock")
-
- data_clock_sync = Param.Bool(False, "Synchronization commands required")
-
- # Currently rolled into other params
- ######################################################################
-
- # tRC - assumed to be tRAS + tRP
-
- # Power Behaviour and Constraints
- # DRAMs like LPDDR and WideIO have 2 external voltage domains. These are
- # defined as VDD and VDD2. Each current is defined for each voltage domain
- # separately. For example, current IDD0 is active-precharge current for
- # voltage domain VDD and current IDD02 is active-precharge current for
- # voltage domain VDD2.
- # By default all currents are set to 0mA. Users who are only interested in
- # the performance of DRAMs can leave them at 0.
-
- # Operating 1 Bank Active-Precharge current
- IDD0 = Param.Current("0mA", "Active precharge current")
-
- # Operating 1 Bank Active-Precharge current multiple voltage Range
- IDD02 = Param.Current("0mA", "Active precharge current VDD2")
-
- # Precharge Power-down Current: Slow exit
- IDD2P0 = Param.Current("0mA", "Precharge Powerdown slow")
-
- # Precharge Power-down Current: Slow exit multiple voltage Range
- IDD2P02 = Param.Current("0mA", "Precharge Powerdown slow VDD2")
-
- # Precharge Power-down Current: Fast exit
- IDD2P1 = Param.Current("0mA", "Precharge Powerdown fast")
-
- # Precharge Power-down Current: Fast exit multiple voltage Range
- IDD2P12 = Param.Current("0mA", "Precharge Powerdown fast VDD2")
-
- # Precharge Standby current
- IDD2N = Param.Current("0mA", "Precharge Standby current")
-
- # Precharge Standby current multiple voltage range
- IDD2N2 = Param.Current("0mA", "Precharge Standby current VDD2")
-
- # Active Power-down current: slow exit
- IDD3P0 = Param.Current("0mA", "Active Powerdown slow")
-
- # Active Power-down current: slow exit multiple voltage range
- IDD3P02 = Param.Current("0mA", "Active Powerdown slow VDD2")
-
- # Active Power-down current : fast exit
- IDD3P1 = Param.Current("0mA", "Active Powerdown fast")
-
- # Active Power-down current : fast exit multiple voltage range
- IDD3P12 = Param.Current("0mA", "Active Powerdown fast VDD2")
-
- # Active Standby current
- IDD3N = Param.Current("0mA", "Active Standby current")
-
- # Active Standby current multiple voltage range
- IDD3N2 = Param.Current("0mA", "Active Standby current VDD2")
-
- # Burst Read Operating Current
- IDD4R = Param.Current("0mA", "READ current")
-
- # Burst Read Operating Current multiple voltage range
- IDD4R2 = Param.Current("0mA", "READ current VDD2")
-
- # Burst Write Operating Current
- IDD4W = Param.Current("0mA", "WRITE current")
-
- # Burst Write Operating Current multiple voltage range
- IDD4W2 = Param.Current("0mA", "WRITE current VDD2")
-
- # Refresh Current
- IDD5 = Param.Current("0mA", "Refresh current")
-
- # Refresh Current multiple voltage range
- IDD52 = Param.Current("0mA", "Refresh current VDD2")
-
- # Self-Refresh Current
- IDD6 = Param.Current("0mA", "Self-refresh Current")
-
- # Self-Refresh Current multiple voltage range
- IDD62 = Param.Current("0mA", "Self-refresh Current VDD2")
-
- # Main voltage range of the DRAM
- VDD = Param.Voltage("0V", "Main Voltage Range")
-
- # Second voltage range defined by some DRAMs
- VDD2 = Param.Voltage("0V", "2nd Voltage Range")
-
-# A single DDR3-1600 x64 channel (one command and address bus), with
-# timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in
-# an 8x8 configuration.
-class DDR3_1600_8x8(DRAMCtrl):
- # size of device in bytes
- device_size = '512MB'
-
- # 8x8 configuration, 8 devices each with an 8-bit interface
- device_bus_width = 8
-
- # DDR3 is a BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
- device_rowbuffer_size = '1kB'
-
- # 8x8 configuration, so 8 devices
- devices_per_rank = 8
-
- # Use two ranks
- ranks_per_channel = 2
-
- # DDR3 has 8 banks in all configurations
- banks_per_rank = 8
-
- # 800 MHz
- tCK = '1.25ns'
-
- # 8 beats across an x64 interface translates to 4 clocks @ 800 MHz
- tBURST = '5ns'
-
- # DDR3-1600 11-11-11
- tRCD = '13.75ns'
- tCL = '13.75ns'
- tRP = '13.75ns'
- tRAS = '35ns'
- tRRD = '6ns'
- tXAW = '30ns'
- activation_limit = 4
- tRFC = '260ns'
-
- tWR = '15ns'
-
- # Greater of 4 CK or 7.5 ns
- tWTR = '7.5ns'
-
- # Greater of 4 CK or 7.5 ns
- tRTP = '7.5ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
- tRTW = '2.5ns'
-
- # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
- tCS = '2.5ns'
-
- # <=85C, half for >85C
- tREFI = '7.8us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '6ns'
-
- # self refresh exit time
- tXS = '270ns'
-
- # Current values from datasheet Die Rev E,J
- IDD0 = '55mA'
- IDD2N = '32mA'
- IDD3N = '38mA'
- IDD4W = '125mA'
- IDD4R = '157mA'
- IDD5 = '235mA'
- IDD3P1 = '38mA'
- IDD2P1 = '32mA'
- IDD6 = '20mA'
- VDD = '1.5V'
-
-# A single HMC-2500 x32 model based on:
-# [1] DRAMSpec: a high-level DRAM bank modelling tool
-# developed at the University of Kaiserslautern. This high level tool
-# uses RC (resistance-capacitance) and CV (capacitance-voltage) models to
-# estimate the DRAM bank latency and power numbers.
-# [2] High performance AXI-4.0 based interconnect for extensible smart memory
-# cubes (E. Azarkhish et. al)
-# Assumed for the HMC model is a 30 nm technology node.
-# The modelled HMC consists of 4 Gbit layers which sum up to 2GB of memory (4
-# layers).
-# Each layer has 16 vaults and each vault consists of 2 banks per layer.
-# In order to be able to use the same controller used for 2D DRAM generations
-# for HMC, the following analogy is done:
-# Channel (DDR) => Vault (HMC)
-# device_size (DDR) => size of a single layer in a vault
-# ranks per channel (DDR) => number of layers
-# banks per rank (DDR) => banks per layer
-# devices per rank (DDR) => devices per layer ( 1 for HMC).
-# The parameters for which no input is available are inherited from the DDR3
-# configuration.
-# This configuration includes the latencies from the DRAM to the logic layer
-# of the HMC
-class HMC_2500_1x32(DDR3_1600_8x8):
- # size of device
- # two banks per device with each bank 4MB [2]
- device_size = '8MB'
-
- # 1x32 configuration, 1 device with 32 TSVs [2]
- device_bus_width = 32
-
- # HMC is a BL8 device [2]
- burst_length = 8
-
- # Each device has a page (row buffer) size of 256 bytes [2]
- device_rowbuffer_size = '256B'
-
- # 1x32 configuration, so 1 device [2]
- devices_per_rank = 1
-
- # 4 layers so 4 ranks [2]
- ranks_per_channel = 4
-
- # HMC has 2 banks per layer [2]
- # Each layer represents a rank. With 4 layers and 8 banks in total, each
- # layer has 2 banks; thus 2 banks per rank.
- banks_per_rank = 2
-
- # 1250 MHz [2]
- tCK = '0.8ns'
-
- # 8 beats across an x32 interface translates to 4 clocks @ 1250 MHz
- tBURST = '3.2ns'
-
- # Values using DRAMSpec HMC model [1]
- tRCD = '10.2ns'
- tCL = '9.9ns'
- tRP = '7.7ns'
- tRAS = '21.6ns'
-
- # tRRD depends on the power supply network for each vendor.
- # We assume a tRRD of a double bank approach to be equal to 4 clock
- # cycles (Assumption)
- tRRD = '3.2ns'
-
- # activation limit is set to 0 since there are only 2 banks per vault
- # layer.
- activation_limit = 0
-
- # Values using DRAMSpec HMC model [1]
- tRFC = '59ns'
- tWR = '8ns'
- tRTP = '4.9ns'
-
- # Default different rank bus delay assumed to 1 CK for TSVs, @1250 MHz =
- # 0.8 ns (Assumption)
- tCS = '0.8ns'
-
- # Value using DRAMSpec HMC model [1]
- tREFI = '3.9us'
-
- # The default page policy in the vault controllers is simple closed page
- # [2] nevertheless 'close' policy opens and closes the row multiple times
- # for bursts largers than 32Bytes. For this reason we use 'close_adaptive'
- page_policy = 'close_adaptive'
-
- # RoCoRaBaCh resembles the default address mapping in HMC
- addr_mapping = 'RoCoRaBaCh'
- min_writes_per_switch = 8
-
- # These parameters do not directly correlate with buffer_size in real
- # hardware. Nevertheless, their value has been tuned to achieve a
- # bandwidth similar to the cycle-accurate model in [2]
- write_buffer_size = 32
- read_buffer_size = 32
-
- # The static latency of the vault controllers is estimated to be smaller
- # than a full DRAM channel controller
- static_backend_latency='4ns'
- static_frontend_latency='4ns'
-
-# A single DDR3-2133 x64 channel refining a selected subset of the
-# options for the DDR-1600 configuration, based on the same DDR3-1600
-# 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept
-# consistent across the two configurations.
-class DDR3_2133_8x8(DDR3_1600_8x8):
- # 1066 MHz
- tCK = '0.938ns'
-
- # 8 beats across an x64 interface translates to 4 clocks @ 1066 MHz
- tBURST = '3.752ns'
-
- # DDR3-2133 14-14-14
- tRCD = '13.09ns'
- tCL = '13.09ns'
- tRP = '13.09ns'
- tRAS = '33ns'
- tRRD = '5ns'
- tXAW = '25ns'
-
- # Current values from datasheet
- IDD0 = '70mA'
- IDD2N = '37mA'
- IDD3N = '44mA'
- IDD4W = '157mA'
- IDD4R = '191mA'
- IDD5 = '250mA'
- IDD3P1 = '44mA'
- IDD2P1 = '43mA'
- IDD6 ='20mA'
- VDD = '1.5V'
-
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A2G4)
-# in an 16x4 configuration.
-# Total channel capacity is 32GB
-# 16 devices/rank * 2 ranks/channel * 1GB/device = 32GB/channel
-class DDR4_2400_16x4(DRAMCtrl):
- # size of device
- device_size = '1GB'
-
- # 16x4 configuration, 16 devices each with a 4-bit interface
- device_bus_width = 4
-
- # DDR4 is a BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 512 byte (1K columns x4)
- device_rowbuffer_size = '512B'
-
- # 16x4 configuration, so 16 devices
- devices_per_rank = 16
-
- # Match our DDR3 configurations which is dual rank
- ranks_per_channel = 2
-
- # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
- # Set to 4 for x4 case
- bank_groups_per_rank = 4
-
- # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
- # configurations). Currently we do not capture the additional
- # constraints incurred by the bank groups
- banks_per_rank = 16
-
- # override the default buffer sizes and go for something larger to
- # accommodate the larger bank count
- write_buffer_size = 128
- read_buffer_size = 64
-
- # 1200 MHz
- tCK = '0.833ns'
-
- # 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
- # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
- # With bank group architectures, tBURST represents the CAS-to-CAS
- # delay for bursts to different bank groups (tCCD_S)
- tBURST = '3.332ns'
-
- # @2400 data rate, tCCD_L is 6 CK
- # CAS-to-CAS delay for bursts to the same bank group
- # tBURST is equivalent to tCCD_S; no explicit parameter required
- # for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = '5ns';
-
- # DDR4-2400 17-17-17
- tRCD = '14.16ns'
- tCL = '14.16ns'
- tRP = '14.16ns'
- tRAS = '32ns'
-
- # RRD_S (different bank group) for 512B page is MAX(4 CK, 3.3ns)
- tRRD = '3.332ns'
-
- # RRD_L (same bank group) for 512B page is MAX(4 CK, 4.9ns)
- tRRD_L = '4.9ns';
-
- # tFAW for 512B page is MAX(16 CK, 13ns)
- tXAW = '13.328ns'
- activation_limit = 4
- # tRFC is 350ns
- tRFC = '350ns'
-
- tWR = '15ns'
-
- # Here using the average of WTR_S and WTR_L
- tWTR = '5ns'
-
- # Greater of 4 CK or 7.5 ns
- tRTP = '7.5ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666 ns
- tRTW = '1.666ns'
-
- # Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns
- tCS = '1.666ns'
-
- # <=85C, half for >85C
- tREFI = '7.8us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '6ns'
-
- # self refresh exit time
- # exit delay to ACT, PRE, PREALL, REF, SREF Enter, and PD Enter is:
- # tRFC + 10ns = 340ns
- tXS = '340ns'
-
- # Current values from datasheet
- IDD0 = '43mA'
- IDD02 = '3mA'
- IDD2N = '34mA'
- IDD3N = '38mA'
- IDD3N2 = '3mA'
- IDD4W = '103mA'
- IDD4R = '110mA'
- IDD5 = '250mA'
- IDD3P1 = '32mA'
- IDD2P1 = '25mA'
- IDD6 = '30mA'
- VDD = '1.2V'
- VDD2 = '2.5V'
-
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A1G8)
-# in an 8x8 configuration.
-# Total channel capacity is 16GB
-# 8 devices/rank * 2 ranks/channel * 1GB/device = 16GB/channel
-class DDR4_2400_8x8(DDR4_2400_16x4):
- # 8x8 configuration, 8 devices each with an 8-bit interface
- device_bus_width = 8
-
- # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
- device_rowbuffer_size = '1kB'
-
- # 8x8 configuration, so 8 devices
- devices_per_rank = 8
-
- # RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns)
- tRRD_L = '4.9ns';
-
- tXAW = '21ns'
-
- # Current values from datasheet
- IDD0 = '48mA'
- IDD3N = '43mA'
- IDD4W = '123mA'
- IDD4R = '135mA'
- IDD3P1 = '37mA'
-
-# A single DDR4-2400 x64 channel (one command and address bus), with
-# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A512M16)
-# in an 4x16 configuration.
-# Total channel capacity is 4GB
-# 4 devices/rank * 1 ranks/channel * 1GB/device = 4GB/channel
-class DDR4_2400_4x16(DDR4_2400_16x4):
- # 4x16 configuration, 4 devices each with an 16-bit interface
- device_bus_width = 16
-
- # Each device has a page (row buffer) size of 2 Kbyte (1K columns x16)
- device_rowbuffer_size = '2kB'
-
- # 4x16 configuration, so 4 devices
- devices_per_rank = 4
-
- # Single rank for x16
- ranks_per_channel = 1
-
- # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
- # Set to 2 for x16 case
- bank_groups_per_rank = 2
-
- # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
- # configurations). Currently we do not capture the additional
- # constraints incurred by the bank groups
- banks_per_rank = 8
-
- # RRD_S (different bank group) for 2K page is MAX(4 CK, 5.3ns)
- tRRD = '5.3ns'
-
- # RRD_L (same bank group) for 2K page is MAX(4 CK, 6.4ns)
- tRRD_L = '6.4ns';
-
- tXAW = '30ns'
-
- # Current values from datasheet
- IDD0 = '80mA'
- IDD02 = '4mA'
- IDD2N = '34mA'
- IDD3N = '47mA'
- IDD4W = '228mA'
- IDD4R = '243mA'
- IDD5 = '280mA'
- IDD3P1 = '41mA'
-
-# A single LPDDR2-S4 x32 interface (one command/address bus), with
-# default timings based on a LPDDR2-1066 4 Gbit part (Micron MT42L128M32D1)
-# in a 1x32 configuration.
-class LPDDR2_S4_1066_1x32(DRAMCtrl):
- # No DLL in LPDDR2
- dll = False
-
- # size of device
- device_size = '512MB'
-
- # 1x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
- # LPDDR2_S4 is a BL4 and BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 1KB
- # (this depends on the memory density)
- device_rowbuffer_size = '1kB'
-
- # 1x32 configuration, so 1 device
- devices_per_rank = 1
-
- # Use a single rank
- ranks_per_channel = 1
-
- # LPDDR2-S4 has 8 banks in all configurations
- banks_per_rank = 8
-
- # 533 MHz
- tCK = '1.876ns'
-
- # Fixed at 15 ns
- tRCD = '15ns'
-
- # 8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time
- tCL = '15ns'
-
- # Pre-charge one bank 15 ns (all banks 18 ns)
- tRP = '15ns'
-
- tRAS = '42ns'
- tWR = '15ns'
-
- tRTP = '7.5ns'
-
- # 8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
- # Note this is a BL8 DDR device.
- # Requests larger than 32 bytes are broken down into multiple requests
- # in the controller
- tBURST = '7.5ns'
-
- # LPDDR2-S4, 4 Gbit
- tRFC = '130ns'
- tREFI = '3.9us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '7.5ns'
-
- # self refresh exit time
- tXS = '140ns'
-
- # Irrespective of speed grade, tWTR is 7.5 ns
- tWTR = '7.5ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns
- tRTW = '3.75ns'
-
- # Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns
- tCS = '3.75ns'
-
- # Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
- # Irrespective of density, tFAW is 50 ns
- tXAW = '50ns'
- activation_limit = 4
-
- # Current values from datasheet
- IDD0 = '15mA'
- IDD02 = '70mA'
- IDD2N = '2mA'
- IDD2N2 = '30mA'
- IDD3N = '2.5mA'
- IDD3N2 = '30mA'
- IDD4W = '10mA'
- IDD4W2 = '190mA'
- IDD4R = '3mA'
- IDD4R2 = '220mA'
- IDD5 = '40mA'
- IDD52 = '150mA'
- IDD3P1 = '1.2mA'
- IDD3P12 = '8mA'
- IDD2P1 = '0.6mA'
- IDD2P12 = '0.8mA'
- IDD6 = '1mA'
- IDD62 = '3.2mA'
- VDD = '1.8V'
- VDD2 = '1.2V'
-
-# A single WideIO x128 interface (one command and address bus), with
-# default timings based on an estimated WIO-200 8 Gbit part.
-class WideIO_200_1x128(DRAMCtrl):
- # No DLL for WideIO
- dll = False
-
- # size of device
- device_size = '1024MB'
-
- # 1x128 configuration, 1 device with a 128-bit interface
- device_bus_width = 128
-
- # This is a BL4 device
- burst_length = 4
-
- # Each device has a page (row buffer) size of 4KB
- # (this depends on the memory density)
- device_rowbuffer_size = '4kB'
-
- # 1x128 configuration, so 1 device
- devices_per_rank = 1
-
- # Use one rank for a one-high die stack
- ranks_per_channel = 1
-
- # WideIO has 4 banks in all configurations
- banks_per_rank = 4
-
- # 200 MHz
- tCK = '5ns'
-
- # WIO-200
- tRCD = '18ns'
- tCL = '18ns'
- tRP = '18ns'
- tRAS = '42ns'
- tWR = '15ns'
- # Read to precharge is same as the burst
- tRTP = '20ns'
-
- # 4 beats across an x128 SDR interface translates to 4 clocks @ 200 MHz.
- # Note this is a BL4 SDR device.
- tBURST = '20ns'
-
- # WIO 8 Gb
- tRFC = '210ns'
-
- # WIO 8 Gb, <=85C, half for >85C
- tREFI = '3.9us'
-
- # Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns
- tWTR = '15ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns
- tRTW = '10ns'
-
- # Default different rank bus delay to 2 CK, @200 MHz = 10 ns
- tCS = '10ns'
-
- # Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
- # Two instead of four activation window
- tXAW = '50ns'
- activation_limit = 2
-
- # The WideIO specification does not provide current information
-
-# A single LPDDR3 x32 interface (one command/address bus), with
-# default timings based on a LPDDR3-1600 4 Gbit part (Micron
-# EDF8132A1MC) in a 1x32 configuration.
-class LPDDR3_1600_1x32(DRAMCtrl):
- # No DLL for LPDDR3
- dll = False
-
- # size of device
- device_size = '512MB'
-
- # 1x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
- # LPDDR3 is a BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 4KB
- device_rowbuffer_size = '4kB'
-
- # 1x32 configuration, so 1 device
- devices_per_rank = 1
-
- # Technically the datasheet is a dual-rank package, but for
- # comparison with the LPDDR2 config we stick to a single rank
- ranks_per_channel = 1
-
- # LPDDR3 has 8 banks in all configurations
- banks_per_rank = 8
-
- # 800 MHz
- tCK = '1.25ns'
-
- tRCD = '18ns'
-
- # 12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time
- tCL = '15ns'
-
- tRAS = '42ns'
- tWR = '15ns'
-
- # Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns
- tRTP = '7.5ns'
-
- # Pre-charge one bank 18 ns (all banks 21 ns)
- tRP = '18ns'
-
- # 8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
- # Note this is a BL8 DDR device.
- # Requests larger than 32 bytes are broken down into multiple requests
- # in the controller
- tBURST = '5ns'
-
- # LPDDR3, 4 Gb
- tRFC = '130ns'
- tREFI = '3.9us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '7.5ns'
-
- # self refresh exit time
- tXS = '140ns'
-
- # Irrespective of speed grade, tWTR is 7.5 ns
- tWTR = '7.5ns'
-
- # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
- tRTW = '2.5ns'
-
- # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
- tCS = '2.5ns'
-
- # Activate to activate irrespective of density and speed grade
- tRRD = '10.0ns'
-
- # Irrespective of size, tFAW is 50 ns
- tXAW = '50ns'
- activation_limit = 4
-
- # Current values from datasheet
- IDD0 = '8mA'
- IDD02 = '60mA'
- IDD2N = '0.8mA'
- IDD2N2 = '26mA'
- IDD3N = '2mA'
- IDD3N2 = '34mA'
- IDD4W = '2mA'
- IDD4W2 = '190mA'
- IDD4R = '2mA'
- IDD4R2 = '230mA'
- IDD5 = '28mA'
- IDD52 = '150mA'
- IDD3P1 = '1.4mA'
- IDD3P12 = '11mA'
- IDD2P1 = '0.8mA'
- IDD2P12 = '1.8mA'
- IDD6 = '0.5mA'
- IDD62 = '1.8mA'
- VDD = '1.8V'
- VDD2 = '1.2V'
-
-# A single GDDR5 x64 interface, with
-# default timings based on a GDDR5-4000 1 Gbit part (SK Hynix
-# H5GQ1H24AFR) in a 2x32 configuration.
-class GDDR5_4000_2x32(DRAMCtrl):
- # size of device
- device_size = '128MB'
-
- # 2x32 configuration, 1 device with a 32-bit interface
- device_bus_width = 32
-
- # GDDR5 is a BL8 device
- burst_length = 8
-
- # Each device has a page (row buffer) size of 2Kbits (256Bytes)
- device_rowbuffer_size = '256B'
-
- # 2x32 configuration, so 2 devices
- devices_per_rank = 2
-
- # assume single rank
- ranks_per_channel = 1
-
- # GDDR5 has 4 bank groups
- bank_groups_per_rank = 4
-
- # GDDR5 has 16 banks with 4 bank groups
- banks_per_rank = 16
-
- # 1000 MHz
- tCK = '1ns'
-
- # 8 beats across an x64 interface translates to 2 clocks @ 1000 MHz
- # Data bus runs @2000 Mhz => DDR ( data runs at 4000 MHz )
- # 8 beats at 4000 MHz = 2 beats at 1000 MHz
- # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
- # With bank group architectures, tBURST represents the CAS-to-CAS
- # delay for bursts to different bank groups (tCCD_S)
- tBURST = '2ns'
-
- # @1000MHz data rate, tCCD_L is 3 CK
- # CAS-to-CAS delay for bursts to the same bank group
- # tBURST is equivalent to tCCD_S; no explicit parameter required
- # for CAS-to-CAS delay for bursts to different bank groups
- tCCD_L = '3ns';
-
- tRCD = '12ns'
-
- # tCL is not directly found in datasheet and assumed equal tRCD
- tCL = '12ns'
-
- tRP = '12ns'
- tRAS = '28ns'
-
- # RRD_S (different bank group)
- # RRD_S is 5.5 ns in datasheet.
- # rounded to the next multiple of tCK
- tRRD = '6ns'
-
- # RRD_L (same bank group)
- # RRD_L is 5.5 ns in datasheet.
- # rounded to the next multiple of tCK
- tRRD_L = '6ns'
-
- tXAW = '23ns'
-
- # tXAW < 4 x tRRD.
- # Therefore, activation limit is set to 0
- activation_limit = 0
-
- tRFC = '65ns'
- tWR = '12ns'
-
- # Here using the average of WTR_S and WTR_L
- tWTR = '5ns'
-
- # Read-to-Precharge 2 CK
- tRTP = '2ns'
-
- # Assume 2 cycles
- tRTW = '2ns'
-
-# A single HBM x128 interface (one command and address bus), with
-# default timings based on data publically released
-# ("HBM: Memory Solution for High Performance Processors", MemCon, 2014),
-# IDD measurement values, and by extrapolating data from other classes.
-# Architecture values based on published HBM spec
-# A 4H stack is defined, 2Gb per die for a total of 1GB of memory.
-class HBM_1000_4H_1x128(DRAMCtrl):
- # HBM gen1 supports up to 8 128-bit physical channels
- # Configuration defines a single channel, with the capacity
- # set to (full_ stack_capacity / 8) based on 2Gb dies
- # To use all 8 channels, set 'channels' parameter to 8 in
- # system configuration
-
- # 128-bit interface legacy mode
- device_bus_width = 128
-
- # HBM supports BL4 and BL2 (legacy mode only)
- burst_length = 4
-
- # size of channel in bytes, 4H stack of 2Gb dies is 1GB per stack;
- # with 8 channels, 128MB per channel
- device_size = '128MB'
-
- device_rowbuffer_size = '2kB'
-
- # 1x128 configuration
- devices_per_rank = 1
-
- # HBM does not have a CS pin; set rank to 1
- ranks_per_channel = 1
-
- # HBM has 8 or 16 banks depending on capacity
- # 2Gb dies have 8 banks
- banks_per_rank = 8
-
- # depending on frequency, bank groups may be required
- # will always have 4 bank groups when enabled
- # current specifications do not define the minimum frequency for
- # bank group architecture
- # setting bank_groups_per_rank to 0 to disable until range is defined
- bank_groups_per_rank = 0
-
- # 500 MHz for 1Gbps DDR data rate
- tCK = '2ns'
-
- # use values from IDD measurement in JEDEC spec
- # use tRP value for tRCD and tCL similar to other classes
- tRP = '15ns'
- tRCD = '15ns'
- tCL = '15ns'
- tRAS = '33ns'
-
- # BL2 and BL4 supported, default to BL4
- # DDR @ 500 MHz means 4 * 2ns / 2 = 4ns
- tBURST = '4ns'
-
- # value for 2Gb device from JEDEC spec
- tRFC = '160ns'
-
- # value for 2Gb device from JEDEC spec
- tREFI = '3.9us'
-
- # extrapolate the following from LPDDR configs, using ns values
- # to minimize burst length, prefetch differences
- tWR = '18ns'
- tRTP = '7.5ns'
- tWTR = '10ns'
-
- # start with 2 cycles turnaround, similar to other memory classes
- # could be more with variations across the stack
- tRTW = '4ns'
-
- # single rank device, set to 0
- tCS = '0ns'
-
- # from MemCon example, tRRD is 4ns with 2ns tCK
- tRRD = '4ns'
-
- # from MemCon example, tFAW is 30ns with 2ns tCK
- tXAW = '30ns'
- activation_limit = 4
-
- # 4tCK
- tXP = '8ns'
-
- # start with tRFC + tXP -> 160ns + 8ns = 168ns
- tXS = '168ns'
-
-# A single HBM x64 interface (one command and address bus), with
-# default timings based on HBM gen1 and data publically released
-# A 4H stack is defined, 8Gb per die for a total of 4GB of memory.
-# Note: This defines a pseudo-channel with a unique controller
-# instantiated per pseudo-channel
-# Stay at same IO rate (1Gbps) to maintain timing relationship with
-# HBM gen1 class (HBM_1000_4H_x128) where possible
-class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
- # For HBM gen2 with pseudo-channel mode, configure 2X channels.
- # Configuration defines a single pseudo channel, with the capacity
- # set to (full_ stack_capacity / 16) based on 8Gb dies
- # To use all 16 pseudo channels, set 'channels' parameter to 16 in
- # system configuration
-
- # 64-bit pseudo-channle interface
- device_bus_width = 64
-
- # HBM pseudo-channel only supports BL4
- burst_length = 4
-
- # size of channel in bytes, 4H stack of 8Gb dies is 4GB per stack;
- # with 16 channels, 256MB per channel
- device_size = '256MB'
-
- # page size is halved with pseudo-channel; maintaining the same same number
- # of rows per pseudo-channel with 2X banks across 2 channels
- device_rowbuffer_size = '1kB'
-
- # HBM has 8 or 16 banks depending on capacity
- # Starting with 4Gb dies, 16 banks are defined
- banks_per_rank = 16
-
- # reset tRFC for larger, 8Gb device
- # use HBM1 4Gb value as a starting point
- tRFC = '260ns'
-
- # start with tRFC + tXP -> 160ns + 8ns = 168ns
- tXS = '268ns'
- # Default different rank bus delay to 2 CK, @1000 MHz = 2 ns
- tCS = '2ns'
- tREFI = '3.9us'
-
- # active powerdown and precharge powerdown exit time
- tXP = '10ns'
-
- # self refresh exit time
- tXS = '65ns'
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture
-# burst of 32, which means bursts can be interleaved
-class LPDDR5_5500_1x16_BG_BL32(DRAMCtrl):
-
- # Increase buffer size to account for more bank resources
- read_buffer_size = 64
-
- # Set page policy to better suit DMC Huxley
- page_policy = 'close_adaptive'
-
- # 16-bit channel interface
- device_bus_width = 16
-
- # LPDDR5 is a BL16 or BL32 device
- # With BG mode, BL16 and BL32 are supported
- # Use BL32 for higher command bandwidth
- burst_length = 32
-
- # size of device in bytes
- device_size = '1GB'
-
- # 2kB page with BG mode
- device_rowbuffer_size = '2kB'
-
- # Use a 1x16 configuration
- devices_per_rank = 1
-
- # Use a single rank
- ranks_per_channel = 1
-
- # LPDDR5 supports configurable bank options
- # 8B : BL32, all frequencies
- # 16B : BL32 or BL16, <=3.2Gbps
- # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
- # Initial configuration will have 16 banks with Bank Group Arch
- # to maximim resources and enable higher data rates
- banks_per_rank = 16
- bank_groups_per_rank = 4
-
- # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
- tCK = '1.455ns'
-
- # Greater of 2 CK or 18ns
- tRCD = '18ns'
-
- # Base RL is 16 CK @ 687.5 MHz = 23.28ns
- tCL = '23.280ns'
-
- # Greater of 2 CK or 18ns
- tRP = '18ns'
-
- # Greater of 3 CK or 42ns
- tRAS = '42ns'
-
- # Greater of 3 CK or 34ns
- tWR = '34ns'
-
- # active powerdown and precharge powerdown exit time
- # Greater of 3 CK or 7ns
- tXP = '7ns'
-
- # self refresh exit time (tRFCab + 7.5ns)
- tXS = '217.5ns'
-
- # Greater of 2 CK or 7.5 ns minus 2 CK
- tRTP = '4.59ns'
-
- # With BG architecture, burst of 32 transferred in two 16-beat
- # sub-bursts, with a 16-beat gap in between.
- # Each 16-beat sub-burst is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
- # tBURST is the delay to transfer the Bstof32 = 6 CK @ 687.5 MHz
- tBURST = '8.73ns'
- # can interleave a Bstof32 from another bank group at tBURST_MIN
- # 16-beats is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
- tBURST_MIN = '2.91ns'
- # tBURST_MAX is the maximum burst delay for same bank group timing
- # this is 8 CK @ 687.5 MHz
- tBURST_MAX = '11.64ns'
-
- # 8 CK @ 687.5 MHz
- tCCD_L = "11.64ns"
-
- # LPDDR5, 8 Gbit/channel for 280ns tRFCab
- tRFC = '210ns'
- tREFI = '3.9us'
-
- # Greater of 4 CK or 6.25 ns
- tWTR = '6.25ns'
- # Greater of 4 CK or 12 ns
- tWTR_L = '12ns'
-
- # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
- # tWCKDQ0/tCK will be 1 CK for most cases
- # For gem5 RL = WL and BL/n is already accounted for with tBURST
- # Result is and additional 1 CK is required
- tRTW = '1.455ns'
-
- # Default different rank bus delay to 2 CK, @687.5 MHz = 2.91 ns
- tCS = '2.91ns'
-
- # 2 CK
- tPPD = '2.91ns'
-
- # Greater of 2 CK or 5 ns
- tRRD = '5ns'
- tRRD_L = '5ns'
-
- # With Bank Group Arch mode tFAW is 20 ns
- tXAW = '20ns'
- activation_limit = 4
-
- # at 5Gbps, 4:1 WCK to CK ratio required
- # 2 data beats per WCK (DDR) -> 8 per CK
- beats_per_clock = 8
-
- # 2 cycles required to send activate command
- # 2 command phases can be sent back-to-back or
- # with a gap up to tAAD = 8 CK
- two_cycle_activate = True
- tAAD = '11.640ns'
-
- data_clock_sync = True
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture, burst of 16
-class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32):
-
- # LPDDR5 is a BL16 or BL32 device
- # With BG mode, BL16 and BL32 are supported
- # Use BL16 for smaller access granularity
- burst_length = 16
-
- # For Bstof16 with BG arch, 2 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST = '2.91ns'
- tBURST_MIN = '2.91ns'
- # For Bstof16 with BG arch, 4 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST_MAX = '5.82ns'
-
- # 4 CK @ 687.5 MHz
- tCCD_L = "5.82ns"
-
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# Starting with 5.5Gbps data rates and 8Gbit die
-# Configuring for 8-bank mode, burst of 32
-class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32):
-
- # 4kB page with 8B mode
- device_rowbuffer_size = '4kB'
-
- # LPDDR5 supports configurable bank options
- # 8B : BL32, all frequencies
- # 16B : BL32 or BL16, <=3.2Gbps
- # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
- # Select 8B
- banks_per_rank = 8
- bank_groups_per_rank = 0
-
- # For Bstof32 with 8B mode, 4 CK @ 687.5 MHz with 4:1 clock ratio
- tBURST = '5.82ns'
- tBURST_MIN = '5.82ns'
- tBURST_MAX = '5.82ns'
-
- # Greater of 4 CK or 12 ns
- tWTR = '12ns'
-
- # Greater of 2 CK or 10 ns
- tRRD = '10ns'
-
- # With 8B mode tFAW is 40 ns
- tXAW = '40ns'
- activation_limit = 4
-
- # Reset BG arch timing for 8B mode
- tCCD_L = "0ns"
- tRRD_L = "0ns"
- tWTR_L = "0ns"
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture
-# burst of 32, which means bursts can be interleaved
-class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32):
-
- # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
- tCK = '1.25ns'
-
- # Base RL is 17 CK @ 800 MHz = 21.25ns
- tCL = '21.25ns'
-
- # With BG architecture, burst of 32 transferred in two 16-beat
- # sub-bursts, with a 16-beat gap in between.
- # Each 16-beat sub-burst is 8 WCK @3.2 GHz or 2 CK @ 800 MHz
- # tBURST is the delay to transfer the Bstof32 = 6 CK @ 800 MHz
- tBURST = '7.5ns'
- # can interleave a Bstof32 from another bank group at tBURST_MIN
- # 16-beats is 8 WCK @2.3 GHz or 2 CK @ 800 MHz
- tBURST_MIN = '2.5ns'
- # tBURST_MAX is the maximum burst delay for same bank group timing
- # this is 8 CK @ 800 MHz
- tBURST_MAX = '10ns'
-
- # 8 CK @ 800 MHz
- tCCD_L = "10ns"
-
- # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
- # tWCKDQ0/tCK will be 1 CK for most cases
- # For gem5 RL = WL and BL/n is already accounted for with tBURST
- # Result is and additional 1 CK is required
- tRTW = '1.25ns'
-
- # Default different rank bus delay to 2 CK, @687.5 MHz = 2.5 ns
- tCS = '2.5ns'
-
- # 2 CK
- tPPD = '2.5ns'
-
- # 2 command phases can be sent back-to-back or
- # with a gap up to tAAD = 8 CK
- tAAD = '10ns'
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on initial
-# JEDEC specifcation
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 16-bank mode with bank-group architecture, burst of 16
-class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32):
-
- # LPDDR5 is a BL16 or BL32 device
- # With BG mode, BL16 and BL32 are supported
- # Use BL16 for smaller access granularity
- burst_length = 16
-
- # For Bstof16 with BG arch, 2 CK @ 800 MHz with 4:1 clock ratio
- tBURST = '2.5ns'
- tBURST_MIN = '2.5ns'
- # For Bstof16 with BG arch, 4 CK @ 800 MHz with 4:1 clock ratio
- tBURST_MAX = '5ns'
-
- # 4 CK @ 800 MHz
- tCCD_L = "5ns"
-
-
-# A single LPDDR5 x16 interface (one command/address bus)
-# for a single x16 channel with default timings based on
-# initial JEDEC specification
-# 6.4Gbps data rates and 8Gbit die
-# Configuring for 8-bank mode, burst of 32
-class LPDDR5_6400_1x16_8B_BL32(LPDDR5_6400_1x16_BG_BL32):
-
- # 4kB page with 8B mode
- device_rowbuffer_size = '4kB'
-
- # LPDDR5 supports configurable bank options
- # 8B : BL32, all frequencies
- # 16B : BL32 or BL16, <=3.2Gbps
- # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
- # Select 8B
- banks_per_rank = 8
- bank_groups_per_rank = 0
-
- # For Bstof32 with 8B mode, 4 CK @ 800 MHz with 4:1 clock ratio
- tBURST = '5ns'
- tBURST_MIN = '5ns'
- tBURST_MAX = '5ns'
-
- # Greater of 4 CK or 12 ns
- tWTR = '12ns'
-
- # Greater of 2 CK or 10 ns
- tRRD = '10ns'
-
- # With 8B mode tFAW is 40 ns
- tXAW = '40ns'
- activation_limit = 4
-
- # Reset BG arch timing for 8B mode
- tCCD_L = "0ns"
- tRRD_L = "0ns"
- tWTR_L = "0ns"
--- /dev/null
+# Copyright (c) 2012-2020 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Copyright (c) 2013 Amin Farmahini-Farahani
+# Copyright (c) 2015 University of Kaiserslautern
+# Copyright (c) 2015 The University of Bologna
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.params import *
+from m5.proxy import *
+
+from m5.objects.AbstractMemory import AbstractMemory
+
+# Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting
+# channel, rank, bank, row and column, respectively, and going from
+# MSB to LSB. Available are RoRaBaChCo and RoRaBaCoCh, that are
+# suitable for an open-page policy, optimising for sequential accesses
+# hitting in the open row. For a closed-page policy, RoCoRaBaCh
+# maximises parallelism.
+class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh']
+
+# Enum for the page policy, either open, open_adaptive, close, or
+# close_adaptive.
+class PageManage(Enum): vals = ['open', 'open_adaptive', 'close',
+ 'close_adaptive']
+
+class DRAMInterface(AbstractMemory):
+ type = 'DRAMInterface'
+ cxx_header = "mem/dram_ctrl.hh"
+
+ # Allow the interface to set required controller buffer sizes
+ # each entry corresponds to a burst for the specific DRAM
+ # configuration (e.g. x32 with burst length 8 is 32 bytes) and not
+ # the cacheline size or request/packet size
+ write_buffer_size = Param.Unsigned(64, "Number of write queue entries")
+ read_buffer_size = Param.Unsigned(32, "Number of read queue entries")
+
+ # scheduler, address map and page policy
+ addr_mapping = Param.AddrMap('RoRaBaCoCh', "Address mapping policy")
+ page_policy = Param.PageManage('open_adaptive', "Page management policy")
+
+ # enforce a limit on the number of accesses per row
+ max_accesses_per_row = Param.Unsigned(16, "Max accesses per row before "
+ "closing");
+
+ # size of DRAM Chip in Bytes
+ device_size = Param.MemorySize("Size of DRAM chip")
+ # the physical organisation of the DRAM
+ device_bus_width = Param.Unsigned("data bus width in bits for each DRAM "\
+ "device/chip")
+ burst_length = Param.Unsigned("Burst lenght (BL) in beats")
+ device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
+ "device/chip")
+ devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
+ ranks_per_channel = Param.Unsigned("Number of ranks per channel")
+
+ # default to 0 bank groups per rank, indicating bank group architecture
+ # is not used
+ # update per memory class when bank group architecture is supported
+ bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per rank")
+ banks_per_rank = Param.Unsigned("Number of banks per rank")
+
+ # Enable DRAM powerdown states if True. This is False by default due to
+ # performance being lower when enabled
+ enable_dram_powerdown = Param.Bool(False, "Enable powerdown states")
+
+ # For power modelling we need to know if the DRAM has a DLL or not
+ dll = Param.Bool(True, "DRAM has DLL or not")
+
+ # DRAMPower provides in addition to the core power, the possibility to
+ # include RD/WR termination and IO power. This calculation assumes some
+ # default values. The integration of DRAMPower with gem5 does not include
+ # IO and RD/WR termination power by default. This might be added as an
+ # additional feature in the future.
+
+ # timing behaviour and constraints - all in nanoseconds
+
+ # the base clock period of the DRAM
+ tCK = Param.Latency("Clock period")
+
+ # the amount of time in nanoseconds from issuing an activate command
+ # to the data being available in the row buffer for a read/write
+ tRCD = Param.Latency("RAS to CAS delay")
+
+ # the time from issuing a read/write command to seeing the actual data
+ tCL = Param.Latency("CAS latency")
+
+ # minimum time between a precharge and subsequent activate
+ tRP = Param.Latency("Row precharge time")
+
+ # minimum time between an activate and a precharge to the same row
+ tRAS = Param.Latency("ACT to PRE delay")
+
+ # minimum time between a write data transfer and a precharge
+ tWR = Param.Latency("Write recovery time")
+
+ # minimum time between a read and precharge command
+ tRTP = Param.Latency("Read to precharge")
+
+ # time to complete a burst transfer, typically the burst length
+ # divided by two due to the DDR bus, but by making it a parameter
+ # it is easier to also evaluate SDR memories like WideIO.
+ # This parameter has to account for burst length.
+ # Read/Write requests with data size larger than one full burst are broken
+ # down into multiple requests in the controller
+ # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
+ # With bank group architectures, tBURST represents the CAS-to-CAS
+ # delay for bursts to different bank groups (tCCD_S)
+ tBURST = Param.Latency("Burst duration "
+ "(typically burst length / 2 cycles)")
+
+ # tBURST_MAX is the column array cycle delay required before next access,
+ # which could be greater than tBURST when the memory access time is greater
+ # than tBURST
+ tBURST_MAX = Param.Latency(Self.tBURST, "Column access delay")
+
+ # tBURST_MIN is the minimum delay between bursts, which could be less than
+ # tBURST when interleaving is supported
+ tBURST_MIN = Param.Latency(Self.tBURST, "Minimim delay between bursts")
+
+ # CAS-to-CAS delay for bursts to the same bank group
+ # only utilized with bank group architectures; set to 0 for default case
+ # tBURST is equivalent to tCCD_S; no explicit parameter required
+ # for CAS-to-CAS delay for bursts to different bank groups
+ tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay")
+
+ # Write-to-Write delay for bursts to the same bank group
+ # only utilized with bank group architectures; set to 0 for default case
+ # This will be used to enable different same bank group delays
+ # for writes versus reads
+ tCCD_L_WR = Param.Latency(Self.tCCD_L,
+ "Same bank group Write to Write delay")
+
+ # time taken to complete one refresh cycle (N rows in all banks)
+ tRFC = Param.Latency("Refresh cycle time")
+
+ # refresh command interval, how often a "ref" command needs
+ # to be sent. It is 7.8 us for a 64ms refresh requirement
+ tREFI = Param.Latency("Refresh command interval")
+
+ # write-to-read, same rank turnaround penalty
+ tWTR = Param.Latency("Write to read, same rank switching time")
+
+ # write-to-read, same rank turnaround penalty for same bank group
+ tWTR_L = Param.Latency(Self.tWTR, "Write to read, same rank switching "
+ "time, same bank group")
+
+ # read-to-write, same rank turnaround penalty
+ tRTW = Param.Latency("Read to write, same rank switching time")
+
+ # rank-to-rank bus delay penalty
+ # this does not correlate to a memory timing parameter and encompasses:
+ # 1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD
+ # different rank bus delay
+ tCS = Param.Latency("Rank to rank switching time")
+
+ # minimum precharge to precharge delay time
+ tPPD = Param.Latency("0ns", "PRE to PRE delay")
+
+ # maximum delay between two-cycle ACT command phases
+ tAAD = Param.Latency(Self.tCK,
+ "Maximum delay between two-cycle ACT commands")
+
+ two_cycle_activate = Param.Bool(False,
+ "Two cycles required to send activate")
+
+ # minimum row activate to row activate delay time
+ tRRD = Param.Latency("ACT to ACT delay")
+
+ # only utilized with bank group architectures; set to 0 for default case
+ tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay")
+
+ # time window in which a maximum number of activates are allowed
+ # to take place, set to 0 to disable
+ tXAW = Param.Latency("X activation window")
+ activation_limit = Param.Unsigned("Max number of activates in window")
+
+ # time to exit power-down mode
+ # Exit power-down to next valid command delay
+ tXP = Param.Latency("0ns", "Power-up Delay")
+
+ # Exit Powerdown to commands requiring a locked DLL
+ tXPDLL = Param.Latency("0ns", "Power-up Delay with locked DLL")
+
+ # time to exit self-refresh mode
+ tXS = Param.Latency("0ns", "Self-refresh exit latency")
+
+ # time to exit self-refresh mode with locked DLL
+ tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL")
+
+ # number of data beats per clock. with DDR, default is 2, one per edge
+ beats_per_clock = Param.Unsigned(2, "Data beats per clock")
+
+ data_clock_sync = Param.Bool(False, "Synchronization commands required")
+
+ # Currently rolled into other params
+ ######################################################################
+
+ # tRC - assumed to be tRAS + tRP
+
+ # Power Behaviour and Constraints
+ # DRAMs like LPDDR and WideIO have 2 external voltage domains. These are
+ # defined as VDD and VDD2. Each current is defined for each voltage domain
+ # separately. For example, current IDD0 is active-precharge current for
+ # voltage domain VDD and current IDD02 is active-precharge current for
+ # voltage domain VDD2.
+ # By default all currents are set to 0mA. Users who are only interested in
+ # the performance of DRAMs can leave them at 0.
+
+ # Operating 1 Bank Active-Precharge current
+ IDD0 = Param.Current("0mA", "Active precharge current")
+
+ # Operating 1 Bank Active-Precharge current multiple voltage Range
+ IDD02 = Param.Current("0mA", "Active precharge current VDD2")
+
+ # Precharge Power-down Current: Slow exit
+ IDD2P0 = Param.Current("0mA", "Precharge Powerdown slow")
+
+ # Precharge Power-down Current: Slow exit multiple voltage Range
+ IDD2P02 = Param.Current("0mA", "Precharge Powerdown slow VDD2")
+
+ # Precharge Power-down Current: Fast exit
+ IDD2P1 = Param.Current("0mA", "Precharge Powerdown fast")
+
+ # Precharge Power-down Current: Fast exit multiple voltage Range
+ IDD2P12 = Param.Current("0mA", "Precharge Powerdown fast VDD2")
+
+ # Precharge Standby current
+ IDD2N = Param.Current("0mA", "Precharge Standby current")
+
+ # Precharge Standby current multiple voltage range
+ IDD2N2 = Param.Current("0mA", "Precharge Standby current VDD2")
+
+ # Active Power-down current: slow exit
+ IDD3P0 = Param.Current("0mA", "Active Powerdown slow")
+
+ # Active Power-down current: slow exit multiple voltage range
+ IDD3P02 = Param.Current("0mA", "Active Powerdown slow VDD2")
+
+ # Active Power-down current : fast exit
+ IDD3P1 = Param.Current("0mA", "Active Powerdown fast")
+
+ # Active Power-down current : fast exit multiple voltage range
+ IDD3P12 = Param.Current("0mA", "Active Powerdown fast VDD2")
+
+ # Active Standby current
+ IDD3N = Param.Current("0mA", "Active Standby current")
+
+ # Active Standby current multiple voltage range
+ IDD3N2 = Param.Current("0mA", "Active Standby current VDD2")
+
+ # Burst Read Operating Current
+ IDD4R = Param.Current("0mA", "READ current")
+
+ # Burst Read Operating Current multiple voltage range
+ IDD4R2 = Param.Current("0mA", "READ current VDD2")
+
+ # Burst Write Operating Current
+ IDD4W = Param.Current("0mA", "WRITE current")
+
+ # Burst Write Operating Current multiple voltage range
+ IDD4W2 = Param.Current("0mA", "WRITE current VDD2")
+
+ # Refresh Current
+ IDD5 = Param.Current("0mA", "Refresh current")
+
+ # Refresh Current multiple voltage range
+ IDD52 = Param.Current("0mA", "Refresh current VDD2")
+
+ # Self-Refresh Current
+ IDD6 = Param.Current("0mA", "Self-refresh Current")
+
+ # Self-Refresh Current multiple voltage range
+ IDD62 = Param.Current("0mA", "Self-refresh Current VDD2")
+
+ # Main voltage range of the DRAM
+ VDD = Param.Voltage("0V", "Main Voltage Range")
+
+ # Second voltage range defined by some DRAMs
+ VDD2 = Param.Voltage("0V", "2nd Voltage Range")
+
+# A single DDR3-1600 x64 channel (one command and address bus), with
+# timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in
+# an 8x8 configuration.
+class DDR3_1600_8x8(DRAMInterface):
+ # size of device in bytes
+ device_size = '512MB'
+
+ # 8x8 configuration, 8 devices each with an 8-bit interface
+ device_bus_width = 8
+
+ # DDR3 is a BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
+ device_rowbuffer_size = '1kB'
+
+ # 8x8 configuration, so 8 devices
+ devices_per_rank = 8
+
+ # Use two ranks
+ ranks_per_channel = 2
+
+ # DDR3 has 8 banks in all configurations
+ banks_per_rank = 8
+
+ # 800 MHz
+ tCK = '1.25ns'
+
+ # 8 beats across an x64 interface translates to 4 clocks @ 800 MHz
+ tBURST = '5ns'
+
+ # DDR3-1600 11-11-11
+ tRCD = '13.75ns'
+ tCL = '13.75ns'
+ tRP = '13.75ns'
+ tRAS = '35ns'
+ tRRD = '6ns'
+ tXAW = '30ns'
+ activation_limit = 4
+ tRFC = '260ns'
+
+ tWR = '15ns'
+
+ # Greater of 4 CK or 7.5 ns
+ tWTR = '7.5ns'
+
+ # Greater of 4 CK or 7.5 ns
+ tRTP = '7.5ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
+ tRTW = '2.5ns'
+
+ # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
+ tCS = '2.5ns'
+
+ # <=85C, half for >85C
+ tREFI = '7.8us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '6ns'
+
+ # self refresh exit time
+ tXS = '270ns'
+
+ # Current values from datasheet Die Rev E,J
+ IDD0 = '55mA'
+ IDD2N = '32mA'
+ IDD3N = '38mA'
+ IDD4W = '125mA'
+ IDD4R = '157mA'
+ IDD5 = '235mA'
+ IDD3P1 = '38mA'
+ IDD2P1 = '32mA'
+ IDD6 = '20mA'
+ VDD = '1.5V'
+
+# A single HMC-2500 x32 model based on:
+# [1] DRAMSpec: a high-level DRAM bank modelling tool
+# developed at the University of Kaiserslautern. This high level tool
+# uses RC (resistance-capacitance) and CV (capacitance-voltage) models to
+# estimate the DRAM bank latency and power numbers.
+# [2] High performance AXI-4.0 based interconnect for extensible smart memory
+# cubes (E. Azarkhish et. al)
+# Assumed for the HMC model is a 30 nm technology node.
+# The modelled HMC consists of 4 Gbit layers which sum up to 2GB of memory (4
+# layers).
+# Each layer has 16 vaults and each vault consists of 2 banks per layer.
+# In order to be able to use the same controller used for 2D DRAM generations
+# for HMC, the following analogy is done:
+# Channel (DDR) => Vault (HMC)
+# device_size (DDR) => size of a single layer in a vault
+# ranks per channel (DDR) => number of layers
+# banks per rank (DDR) => banks per layer
+# devices per rank (DDR) => devices per layer ( 1 for HMC).
+# The parameters for which no input is available are inherited from the DDR3
+# configuration.
+# This configuration includes the latencies from the DRAM to the logic layer
+# of the HMC
+class HMC_2500_1x32(DDR3_1600_8x8):
+ # size of device
+ # two banks per device with each bank 4MB [2]
+ device_size = '8MB'
+
+ # 1x32 configuration, 1 device with 32 TSVs [2]
+ device_bus_width = 32
+
+ # HMC is a BL8 device [2]
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 256 bytes [2]
+ device_rowbuffer_size = '256B'
+
+ # 1x32 configuration, so 1 device [2]
+ devices_per_rank = 1
+
+ # 4 layers so 4 ranks [2]
+ ranks_per_channel = 4
+
+ # HMC has 2 banks per layer [2]
+ # Each layer represents a rank. With 4 layers and 8 banks in total, each
+ # layer has 2 banks; thus 2 banks per rank.
+ banks_per_rank = 2
+
+ # 1250 MHz [2]
+ tCK = '0.8ns'
+
+ # 8 beats across an x32 interface translates to 4 clocks @ 1250 MHz
+ tBURST = '3.2ns'
+
+ # Values using DRAMSpec HMC model [1]
+ tRCD = '10.2ns'
+ tCL = '9.9ns'
+ tRP = '7.7ns'
+ tRAS = '21.6ns'
+
+ # tRRD depends on the power supply network for each vendor.
+ # We assume a tRRD of a double bank approach to be equal to 4 clock
+ # cycles (Assumption)
+ tRRD = '3.2ns'
+
+ # activation limit is set to 0 since there are only 2 banks per vault
+ # layer.
+ activation_limit = 0
+
+ # Values using DRAMSpec HMC model [1]
+ tRFC = '59ns'
+ tWR = '8ns'
+ tRTP = '4.9ns'
+
+ # Default different rank bus delay assumed to 1 CK for TSVs, @1250 MHz =
+ # 0.8 ns (Assumption)
+ tCS = '0.8ns'
+
+ # Value using DRAMSpec HMC model [1]
+ tREFI = '3.9us'
+
+ # The default page policy in the vault controllers is simple closed page
+ # [2] nevertheless 'close' policy opens and closes the row multiple times
+ # for bursts largers than 32Bytes. For this reason we use 'close_adaptive'
+ page_policy = 'close_adaptive'
+
+ # RoCoRaBaCh resembles the default address mapping in HMC
+ addr_mapping = 'RoCoRaBaCh'
+
+ # These parameters do not directly correlate with buffer_size in real
+ # hardware. Nevertheless, their value has been tuned to achieve a
+ # bandwidth similar to the cycle-accurate model in [2]
+ write_buffer_size = 32
+ read_buffer_size = 32
+
+# A single DDR3-2133 x64 channel refining a selected subset of the
+# options for the DDR-1600 configuration, based on the same DDR3-1600
+# 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept
+# consistent across the two configurations.
+class DDR3_2133_8x8(DDR3_1600_8x8):
+ # 1066 MHz
+ tCK = '0.938ns'
+
+ # 8 beats across an x64 interface translates to 4 clocks @ 1066 MHz
+ tBURST = '3.752ns'
+
+ # DDR3-2133 14-14-14
+ tRCD = '13.09ns'
+ tCL = '13.09ns'
+ tRP = '13.09ns'
+ tRAS = '33ns'
+ tRRD = '5ns'
+ tXAW = '25ns'
+
+ # Current values from datasheet
+ IDD0 = '70mA'
+ IDD2N = '37mA'
+ IDD3N = '44mA'
+ IDD4W = '157mA'
+ IDD4R = '191mA'
+ IDD5 = '250mA'
+ IDD3P1 = '44mA'
+ IDD2P1 = '43mA'
+ IDD6 ='20mA'
+ VDD = '1.5V'
+
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A2G4)
+# in an 16x4 configuration.
+# Total channel capacity is 32GB
+# 16 devices/rank * 2 ranks/channel * 1GB/device = 32GB/channel
+class DDR4_2400_16x4(DRAMInterface):
+ # size of device
+ device_size = '1GB'
+
+ # 16x4 configuration, 16 devices each with a 4-bit interface
+ device_bus_width = 4
+
+ # DDR4 is a BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 512 byte (1K columns x4)
+ device_rowbuffer_size = '512B'
+
+ # 16x4 configuration, so 16 devices
+ devices_per_rank = 16
+
+ # Match our DDR3 configurations which is dual rank
+ ranks_per_channel = 2
+
+ # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
+ # Set to 4 for x4 case
+ bank_groups_per_rank = 4
+
+ # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
+ # configurations). Currently we do not capture the additional
+ # constraints incurred by the bank groups
+ banks_per_rank = 16
+
+ # override the default buffer sizes and go for something larger to
+ # accommodate the larger bank count
+ write_buffer_size = 128
+ read_buffer_size = 64
+
+ # 1200 MHz
+ tCK = '0.833ns'
+
+ # 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
+ # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
+ # With bank group architectures, tBURST represents the CAS-to-CAS
+ # delay for bursts to different bank groups (tCCD_S)
+ tBURST = '3.332ns'
+
+ # @2400 data rate, tCCD_L is 6 CK
+ # CAS-to-CAS delay for bursts to the same bank group
+ # tBURST is equivalent to tCCD_S; no explicit parameter required
+ # for CAS-to-CAS delay for bursts to different bank groups
+ tCCD_L = '5ns';
+
+ # DDR4-2400 17-17-17
+ tRCD = '14.16ns'
+ tCL = '14.16ns'
+ tRP = '14.16ns'
+ tRAS = '32ns'
+
+ # RRD_S (different bank group) for 512B page is MAX(4 CK, 3.3ns)
+ tRRD = '3.332ns'
+
+ # RRD_L (same bank group) for 512B page is MAX(4 CK, 4.9ns)
+ tRRD_L = '4.9ns';
+
+ # tFAW for 512B page is MAX(16 CK, 13ns)
+ tXAW = '13.328ns'
+ activation_limit = 4
+ # tRFC is 350ns
+ tRFC = '350ns'
+
+ tWR = '15ns'
+
+ # Here using the average of WTR_S and WTR_L
+ tWTR = '5ns'
+
+ # Greater of 4 CK or 7.5 ns
+ tRTP = '7.5ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666 ns
+ tRTW = '1.666ns'
+
+ # Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns
+ tCS = '1.666ns'
+
+ # <=85C, half for >85C
+ tREFI = '7.8us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '6ns'
+
+ # self refresh exit time
+ # exit delay to ACT, PRE, PREALL, REF, SREF Enter, and PD Enter is:
+ # tRFC + 10ns = 340ns
+ tXS = '340ns'
+
+ # Current values from datasheet
+ IDD0 = '43mA'
+ IDD02 = '3mA'
+ IDD2N = '34mA'
+ IDD3N = '38mA'
+ IDD3N2 = '3mA'
+ IDD4W = '103mA'
+ IDD4R = '110mA'
+ IDD5 = '250mA'
+ IDD3P1 = '32mA'
+ IDD2P1 = '25mA'
+ IDD6 = '30mA'
+ VDD = '1.2V'
+ VDD2 = '2.5V'
+
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A1G8)
+# in an 8x8 configuration.
+# Total channel capacity is 16GB
+# 8 devices/rank * 2 ranks/channel * 1GB/device = 16GB/channel
+class DDR4_2400_8x8(DDR4_2400_16x4):
+ # 8x8 configuration, 8 devices each with an 8-bit interface
+ device_bus_width = 8
+
+ # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
+ device_rowbuffer_size = '1kB'
+
+ # 8x8 configuration, so 8 devices
+ devices_per_rank = 8
+
+ # RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns)
+ tRRD_L = '4.9ns';
+
+ tXAW = '21ns'
+
+ # Current values from datasheet
+ IDD0 = '48mA'
+ IDD3N = '43mA'
+ IDD4W = '123mA'
+ IDD4R = '135mA'
+ IDD3P1 = '37mA'
+
+# A single DDR4-2400 x64 channel (one command and address bus), with
+# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A512M16)
+# in an 4x16 configuration.
+# Total channel capacity is 4GB
+# 4 devices/rank * 1 ranks/channel * 1GB/device = 4GB/channel
+class DDR4_2400_4x16(DDR4_2400_16x4):
+ # 4x16 configuration, 4 devices each with an 16-bit interface
+ device_bus_width = 16
+
+ # Each device has a page (row buffer) size of 2 Kbyte (1K columns x16)
+ device_rowbuffer_size = '2kB'
+
+ # 4x16 configuration, so 4 devices
+ devices_per_rank = 4
+
+ # Single rank for x16
+ ranks_per_channel = 1
+
+ # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups
+ # Set to 2 for x16 case
+ bank_groups_per_rank = 2
+
+ # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all
+ # configurations). Currently we do not capture the additional
+ # constraints incurred by the bank groups
+ banks_per_rank = 8
+
+ # RRD_S (different bank group) for 2K page is MAX(4 CK, 5.3ns)
+ tRRD = '5.3ns'
+
+ # RRD_L (same bank group) for 2K page is MAX(4 CK, 6.4ns)
+ tRRD_L = '6.4ns';
+
+ tXAW = '30ns'
+
+ # Current values from datasheet
+ IDD0 = '80mA'
+ IDD02 = '4mA'
+ IDD2N = '34mA'
+ IDD3N = '47mA'
+ IDD4W = '228mA'
+ IDD4R = '243mA'
+ IDD5 = '280mA'
+ IDD3P1 = '41mA'
+
+# A single LPDDR2-S4 x32 interface (one command/address bus), with
+# default timings based on a LPDDR2-1066 4 Gbit part (Micron MT42L128M32D1)
+# in a 1x32 configuration.
+class LPDDR2_S4_1066_1x32(DRAMInterface):
+ # No DLL in LPDDR2
+ dll = False
+
+ # size of device
+ device_size = '512MB'
+
+ # 1x32 configuration, 1 device with a 32-bit interface
+ device_bus_width = 32
+
+ # LPDDR2_S4 is a BL4 and BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 1KB
+ # (this depends on the memory density)
+ device_rowbuffer_size = '1kB'
+
+ # 1x32 configuration, so 1 device
+ devices_per_rank = 1
+
+ # Use a single rank
+ ranks_per_channel = 1
+
+ # LPDDR2-S4 has 8 banks in all configurations
+ banks_per_rank = 8
+
+ # 533 MHz
+ tCK = '1.876ns'
+
+ # Fixed at 15 ns
+ tRCD = '15ns'
+
+ # 8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time
+ tCL = '15ns'
+
+ # Pre-charge one bank 15 ns (all banks 18 ns)
+ tRP = '15ns'
+
+ tRAS = '42ns'
+ tWR = '15ns'
+
+ tRTP = '7.5ns'
+
+ # 8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
+ # Note this is a BL8 DDR device.
+ # Requests larger than 32 bytes are broken down into multiple requests
+ # in the controller
+ tBURST = '7.5ns'
+
+ # LPDDR2-S4, 4 Gbit
+ tRFC = '130ns'
+ tREFI = '3.9us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '7.5ns'
+
+ # self refresh exit time
+ tXS = '140ns'
+
+ # Irrespective of speed grade, tWTR is 7.5 ns
+ tWTR = '7.5ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns
+ tRTW = '3.75ns'
+
+ # Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns
+ tCS = '3.75ns'
+
+ # Activate to activate irrespective of density and speed grade
+ tRRD = '10.0ns'
+
+ # Irrespective of density, tFAW is 50 ns
+ tXAW = '50ns'
+ activation_limit = 4
+
+ # Current values from datasheet
+ IDD0 = '15mA'
+ IDD02 = '70mA'
+ IDD2N = '2mA'
+ IDD2N2 = '30mA'
+ IDD3N = '2.5mA'
+ IDD3N2 = '30mA'
+ IDD4W = '10mA'
+ IDD4W2 = '190mA'
+ IDD4R = '3mA'
+ IDD4R2 = '220mA'
+ IDD5 = '40mA'
+ IDD52 = '150mA'
+ IDD3P1 = '1.2mA'
+ IDD3P12 = '8mA'
+ IDD2P1 = '0.6mA'
+ IDD2P12 = '0.8mA'
+ IDD6 = '1mA'
+ IDD62 = '3.2mA'
+ VDD = '1.8V'
+ VDD2 = '1.2V'
+
+# A single WideIO x128 interface (one command and address bus), with
+# default timings based on an estimated WIO-200 8 Gbit part.
+class WideIO_200_1x128(DRAMInterface):
+ # No DLL for WideIO
+ dll = False
+
+ # size of device
+ device_size = '1024MB'
+
+ # 1x128 configuration, 1 device with a 128-bit interface
+ device_bus_width = 128
+
+ # This is a BL4 device
+ burst_length = 4
+
+ # Each device has a page (row buffer) size of 4KB
+ # (this depends on the memory density)
+ device_rowbuffer_size = '4kB'
+
+ # 1x128 configuration, so 1 device
+ devices_per_rank = 1
+
+ # Use one rank for a one-high die stack
+ ranks_per_channel = 1
+
+ # WideIO has 4 banks in all configurations
+ banks_per_rank = 4
+
+ # 200 MHz
+ tCK = '5ns'
+
+ # WIO-200
+ tRCD = '18ns'
+ tCL = '18ns'
+ tRP = '18ns'
+ tRAS = '42ns'
+ tWR = '15ns'
+ # Read to precharge is same as the burst
+ tRTP = '20ns'
+
+ # 4 beats across an x128 SDR interface translates to 4 clocks @ 200 MHz.
+ # Note this is a BL4 SDR device.
+ tBURST = '20ns'
+
+ # WIO 8 Gb
+ tRFC = '210ns'
+
+ # WIO 8 Gb, <=85C, half for >85C
+ tREFI = '3.9us'
+
+ # Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns
+ tWTR = '15ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns
+ tRTW = '10ns'
+
+ # Default different rank bus delay to 2 CK, @200 MHz = 10 ns
+ tCS = '10ns'
+
+ # Activate to activate irrespective of density and speed grade
+ tRRD = '10.0ns'
+
+ # Two instead of four activation window
+ tXAW = '50ns'
+ activation_limit = 2
+
+ # The WideIO specification does not provide current information
+
+# A single LPDDR3 x32 interface (one command/address bus), with
+# default timings based on a LPDDR3-1600 4 Gbit part (Micron
+# EDF8132A1MC) in a 1x32 configuration.
+class LPDDR3_1600_1x32(DRAMInterface):
+ # No DLL for LPDDR3
+ dll = False
+
+ # size of device
+ device_size = '512MB'
+
+ # 1x32 configuration, 1 device with a 32-bit interface
+ device_bus_width = 32
+
+ # LPDDR3 is a BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 4KB
+ device_rowbuffer_size = '4kB'
+
+ # 1x32 configuration, so 1 device
+ devices_per_rank = 1
+
+ # Technically the datasheet is a dual-rank package, but for
+ # comparison with the LPDDR2 config we stick to a single rank
+ ranks_per_channel = 1
+
+ # LPDDR3 has 8 banks in all configurations
+ banks_per_rank = 8
+
+ # 800 MHz
+ tCK = '1.25ns'
+
+ tRCD = '18ns'
+
+ # 12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time
+ tCL = '15ns'
+
+ tRAS = '42ns'
+ tWR = '15ns'
+
+ # Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns
+ tRTP = '7.5ns'
+
+ # Pre-charge one bank 18 ns (all banks 21 ns)
+ tRP = '18ns'
+
+ # 8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
+ # Note this is a BL8 DDR device.
+ # Requests larger than 32 bytes are broken down into multiple requests
+ # in the controller
+ tBURST = '5ns'
+
+ # LPDDR3, 4 Gb
+ tRFC = '130ns'
+ tREFI = '3.9us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '7.5ns'
+
+ # self refresh exit time
+ tXS = '140ns'
+
+ # Irrespective of speed grade, tWTR is 7.5 ns
+ tWTR = '7.5ns'
+
+ # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
+ tRTW = '2.5ns'
+
+ # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
+ tCS = '2.5ns'
+
+ # Activate to activate irrespective of density and speed grade
+ tRRD = '10.0ns'
+
+ # Irrespective of size, tFAW is 50 ns
+ tXAW = '50ns'
+ activation_limit = 4
+
+ # Current values from datasheet
+ IDD0 = '8mA'
+ IDD02 = '60mA'
+ IDD2N = '0.8mA'
+ IDD2N2 = '26mA'
+ IDD3N = '2mA'
+ IDD3N2 = '34mA'
+ IDD4W = '2mA'
+ IDD4W2 = '190mA'
+ IDD4R = '2mA'
+ IDD4R2 = '230mA'
+ IDD5 = '28mA'
+ IDD52 = '150mA'
+ IDD3P1 = '1.4mA'
+ IDD3P12 = '11mA'
+ IDD2P1 = '0.8mA'
+ IDD2P12 = '1.8mA'
+ IDD6 = '0.5mA'
+ IDD62 = '1.8mA'
+ VDD = '1.8V'
+ VDD2 = '1.2V'
+
+# A single GDDR5 x64 interface, with
+# default timings based on a GDDR5-4000 1 Gbit part (SK Hynix
+# H5GQ1H24AFR) in a 2x32 configuration.
+class GDDR5_4000_2x32(DRAMInterface):
+ # size of device
+ device_size = '128MB'
+
+ # 2x32 configuration, 1 device with a 32-bit interface
+ device_bus_width = 32
+
+ # GDDR5 is a BL8 device
+ burst_length = 8
+
+ # Each device has a page (row buffer) size of 2Kbits (256Bytes)
+ device_rowbuffer_size = '256B'
+
+ # 2x32 configuration, so 2 devices
+ devices_per_rank = 2
+
+ # assume single rank
+ ranks_per_channel = 1
+
+ # GDDR5 has 4 bank groups
+ bank_groups_per_rank = 4
+
+ # GDDR5 has 16 banks with 4 bank groups
+ banks_per_rank = 16
+
+ # 1000 MHz
+ tCK = '1ns'
+
+ # 8 beats across an x64 interface translates to 2 clocks @ 1000 MHz
+ # Data bus runs @2000 Mhz => DDR ( data runs at 4000 MHz )
+ # 8 beats at 4000 MHz = 2 beats at 1000 MHz
+ # tBURST is equivalent to the CAS-to-CAS delay (tCCD)
+ # With bank group architectures, tBURST represents the CAS-to-CAS
+ # delay for bursts to different bank groups (tCCD_S)
+ tBURST = '2ns'
+
+ # @1000MHz data rate, tCCD_L is 3 CK
+ # CAS-to-CAS delay for bursts to the same bank group
+ # tBURST is equivalent to tCCD_S; no explicit parameter required
+ # for CAS-to-CAS delay for bursts to different bank groups
+ tCCD_L = '3ns';
+
+ tRCD = '12ns'
+
+ # tCL is not directly found in datasheet and assumed equal tRCD
+ tCL = '12ns'
+
+ tRP = '12ns'
+ tRAS = '28ns'
+
+ # RRD_S (different bank group)
+ # RRD_S is 5.5 ns in datasheet.
+ # rounded to the next multiple of tCK
+ tRRD = '6ns'
+
+ # RRD_L (same bank group)
+ # RRD_L is 5.5 ns in datasheet.
+ # rounded to the next multiple of tCK
+ tRRD_L = '6ns'
+
+ tXAW = '23ns'
+
+ # tXAW < 4 x tRRD.
+ # Therefore, activation limit is set to 0
+ activation_limit = 0
+
+ tRFC = '65ns'
+ tWR = '12ns'
+
+ # Here using the average of WTR_S and WTR_L
+ tWTR = '5ns'
+
+ # Read-to-Precharge 2 CK
+ tRTP = '2ns'
+
+ # Assume 2 cycles
+ tRTW = '2ns'
+
+# A single HBM x128 interface (one command and address bus), with
+# default timings based on data publically released
+# ("HBM: Memory Solution for High Performance Processors", MemCon, 2014),
+# IDD measurement values, and by extrapolating data from other classes.
+# Architecture values based on published HBM spec
+# A 4H stack is defined, 2Gb per die for a total of 1GB of memory.
+class HBM_1000_4H_1x128(DRAMInterface):
+ # HBM gen1 supports up to 8 128-bit physical channels
+ # Configuration defines a single channel, with the capacity
+ # set to (full_ stack_capacity / 8) based on 2Gb dies
+ # To use all 8 channels, set 'channels' parameter to 8 in
+ # system configuration
+
+ # 128-bit interface legacy mode
+ device_bus_width = 128
+
+ # HBM supports BL4 and BL2 (legacy mode only)
+ burst_length = 4
+
+ # size of channel in bytes, 4H stack of 2Gb dies is 1GB per stack;
+ # with 8 channels, 128MB per channel
+ device_size = '128MB'
+
+ device_rowbuffer_size = '2kB'
+
+ # 1x128 configuration
+ devices_per_rank = 1
+
+ # HBM does not have a CS pin; set rank to 1
+ ranks_per_channel = 1
+
+ # HBM has 8 or 16 banks depending on capacity
+ # 2Gb dies have 8 banks
+ banks_per_rank = 8
+
+ # depending on frequency, bank groups may be required
+ # will always have 4 bank groups when enabled
+ # current specifications do not define the minimum frequency for
+ # bank group architecture
+ # setting bank_groups_per_rank to 0 to disable until range is defined
+ bank_groups_per_rank = 0
+
+ # 500 MHz for 1Gbps DDR data rate
+ tCK = '2ns'
+
+ # use values from IDD measurement in JEDEC spec
+ # use tRP value for tRCD and tCL similar to other classes
+ tRP = '15ns'
+ tRCD = '15ns'
+ tCL = '15ns'
+ tRAS = '33ns'
+
+ # BL2 and BL4 supported, default to BL4
+ # DDR @ 500 MHz means 4 * 2ns / 2 = 4ns
+ tBURST = '4ns'
+
+ # value for 2Gb device from JEDEC spec
+ tRFC = '160ns'
+
+ # value for 2Gb device from JEDEC spec
+ tREFI = '3.9us'
+
+ # extrapolate the following from LPDDR configs, using ns values
+ # to minimize burst length, prefetch differences
+ tWR = '18ns'
+ tRTP = '7.5ns'
+ tWTR = '10ns'
+
+ # start with 2 cycles turnaround, similar to other memory classes
+ # could be more with variations across the stack
+ tRTW = '4ns'
+
+ # single rank device, set to 0
+ tCS = '0ns'
+
+ # from MemCon example, tRRD is 4ns with 2ns tCK
+ tRRD = '4ns'
+
+ # from MemCon example, tFAW is 30ns with 2ns tCK
+ tXAW = '30ns'
+ activation_limit = 4
+
+ # 4tCK
+ tXP = '8ns'
+
+ # start with tRFC + tXP -> 160ns + 8ns = 168ns
+ tXS = '168ns'
+
+# A single HBM x64 interface (one command and address bus), with
+# default timings based on HBM gen1 and data publically released
+# A 4H stack is defined, 8Gb per die for a total of 4GB of memory.
+# Note: This defines a pseudo-channel with a unique controller
+# instantiated per pseudo-channel
+# Stay at same IO rate (1Gbps) to maintain timing relationship with
+# HBM gen1 class (HBM_1000_4H_x128) where possible
+class HBM_1000_4H_1x64(HBM_1000_4H_1x128):
+ # For HBM gen2 with pseudo-channel mode, configure 2X channels.
+ # Configuration defines a single pseudo channel, with the capacity
+ # set to (full_ stack_capacity / 16) based on 8Gb dies
+ # To use all 16 pseudo channels, set 'channels' parameter to 16 in
+ # system configuration
+
+ # 64-bit pseudo-channle interface
+ device_bus_width = 64
+
+ # HBM pseudo-channel only supports BL4
+ burst_length = 4
+
+ # size of channel in bytes, 4H stack of 8Gb dies is 4GB per stack;
+ # with 16 channels, 256MB per channel
+ device_size = '256MB'
+
+ # page size is halved with pseudo-channel; maintaining the same same number
+ # of rows per pseudo-channel with 2X banks across 2 channels
+ device_rowbuffer_size = '1kB'
+
+ # HBM has 8 or 16 banks depending on capacity
+ # Starting with 4Gb dies, 16 banks are defined
+ banks_per_rank = 16
+
+ # reset tRFC for larger, 8Gb device
+ # use HBM1 4Gb value as a starting point
+ tRFC = '260ns'
+
+ # start with tRFC + tXP -> 160ns + 8ns = 168ns
+ tXS = '268ns'
+ # Default different rank bus delay to 2 CK, @1000 MHz = 2 ns
+ tCS = '2ns'
+ tREFI = '3.9us'
+
+ # active powerdown and precharge powerdown exit time
+ tXP = '10ns'
+
+ # self refresh exit time
+ tXS = '65ns'
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture
+# burst of 32, which means bursts can be interleaved
+class LPDDR5_5500_1x16_BG_BL32(DRAMInterface):
+
+ # Increase buffer size to account for more bank resources
+ read_buffer_size = 64
+
+ # Set page policy to better suit DMC Huxley
+ page_policy = 'close_adaptive'
+
+ # 16-bit channel interface
+ device_bus_width = 16
+
+ # LPDDR5 is a BL16 or BL32 device
+ # With BG mode, BL16 and BL32 are supported
+ # Use BL32 for higher command bandwidth
+ burst_length = 32
+
+ # size of device in bytes
+ device_size = '1GB'
+
+ # 2kB page with BG mode
+ device_rowbuffer_size = '2kB'
+
+ # Use a 1x16 configuration
+ devices_per_rank = 1
+
+ # Use a single rank
+ ranks_per_channel = 1
+
+ # LPDDR5 supports configurable bank options
+ # 8B : BL32, all frequencies
+ # 16B : BL32 or BL16, <=3.2Gbps
+ # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
+ # Initial configuration will have 16 banks with Bank Group Arch
+ # to maximim resources and enable higher data rates
+ banks_per_rank = 16
+ bank_groups_per_rank = 4
+
+ # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
+ tCK = '1.455ns'
+
+ # Greater of 2 CK or 18ns
+ tRCD = '18ns'
+
+ # Base RL is 16 CK @ 687.5 MHz = 23.28ns
+ tCL = '23.280ns'
+
+ # Greater of 2 CK or 18ns
+ tRP = '18ns'
+
+ # Greater of 3 CK or 42ns
+ tRAS = '42ns'
+
+ # Greater of 3 CK or 34ns
+ tWR = '34ns'
+
+ # active powerdown and precharge powerdown exit time
+ # Greater of 3 CK or 7ns
+ tXP = '7ns'
+
+ # self refresh exit time (tRFCab + 7.5ns)
+ tXS = '217.5ns'
+
+ # Greater of 2 CK or 7.5 ns minus 2 CK
+ tRTP = '4.59ns'
+
+ # With BG architecture, burst of 32 transferred in two 16-beat
+ # sub-bursts, with a 16-beat gap in between.
+ # Each 16-beat sub-burst is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
+ # tBURST is the delay to transfer the Bstof32 = 6 CK @ 687.5 MHz
+ tBURST = '8.73ns'
+ # can interleave a Bstof32 from another bank group at tBURST_MIN
+ # 16-beats is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz
+ tBURST_MIN = '2.91ns'
+ # tBURST_MAX is the maximum burst delay for same bank group timing
+ # this is 8 CK @ 687.5 MHz
+ tBURST_MAX = '11.64ns'
+
+ # 8 CK @ 687.5 MHz
+ tCCD_L = "11.64ns"
+
+ # LPDDR5, 8 Gbit/channel for 280ns tRFCab
+ tRFC = '210ns'
+ tREFI = '3.9us'
+
+ # Greater of 4 CK or 6.25 ns
+ tWTR = '6.25ns'
+ # Greater of 4 CK or 12 ns
+ tWTR_L = '12ns'
+
+ # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
+ # tWCKDQ0/tCK will be 1 CK for most cases
+ # For gem5 RL = WL and BL/n is already accounted for with tBURST
+ # Result is and additional 1 CK is required
+ tRTW = '1.455ns'
+
+ # Default different rank bus delay to 2 CK, @687.5 MHz = 2.91 ns
+ tCS = '2.91ns'
+
+ # 2 CK
+ tPPD = '2.91ns'
+
+ # Greater of 2 CK or 5 ns
+ tRRD = '5ns'
+ tRRD_L = '5ns'
+
+ # With Bank Group Arch mode tFAW is 20 ns
+ tXAW = '20ns'
+ activation_limit = 4
+
+ # at 5Gbps, 4:1 WCK to CK ratio required
+ # 2 data beats per WCK (DDR) -> 8 per CK
+ beats_per_clock = 8
+
+ # 2 cycles required to send activate command
+ # 2 command phases can be sent back-to-back or
+ # with a gap up to tAAD = 8 CK
+ two_cycle_activate = True
+ tAAD = '11.640ns'
+
+ data_clock_sync = True
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture, burst of 16
+class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32):
+
+ # LPDDR5 is a BL16 or BL32 device
+ # With BG mode, BL16 and BL32 are supported
+ # Use BL16 for smaller access granularity
+ burst_length = 16
+
+ # For Bstof16 with BG arch, 2 CK @ 687.5 MHz with 4:1 clock ratio
+ tBURST = '2.91ns'
+ tBURST_MIN = '2.91ns'
+ # For Bstof16 with BG arch, 4 CK @ 687.5 MHz with 4:1 clock ratio
+ tBURST_MAX = '5.82ns'
+
+ # 4 CK @ 687.5 MHz
+ tCCD_L = "5.82ns"
+
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# Starting with 5.5Gbps data rates and 8Gbit die
+# Configuring for 8-bank mode, burst of 32
+class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32):
+
+ # 4kB page with 8B mode
+ device_rowbuffer_size = '4kB'
+
+ # LPDDR5 supports configurable bank options
+ # 8B : BL32, all frequencies
+ # 16B : BL32 or BL16, <=3.2Gbps
+ # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
+ # Select 8B
+ banks_per_rank = 8
+ bank_groups_per_rank = 0
+
+ # For Bstof32 with 8B mode, 4 CK @ 687.5 MHz with 4:1 clock ratio
+ tBURST = '5.82ns'
+ tBURST_MIN = '5.82ns'
+ tBURST_MAX = '5.82ns'
+
+ # Greater of 4 CK or 12 ns
+ tWTR = '12ns'
+
+ # Greater of 2 CK or 10 ns
+ tRRD = '10ns'
+
+ # With 8B mode tFAW is 40 ns
+ tXAW = '40ns'
+ activation_limit = 4
+
+ # Reset BG arch timing for 8B mode
+ tCCD_L = "0ns"
+ tRRD_L = "0ns"
+ tWTR_L = "0ns"
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture
+# burst of 32, which means bursts can be interleaved
+class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32):
+
+ # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK
+ tCK = '1.25ns'
+
+ # Base RL is 17 CK @ 800 MHz = 21.25ns
+ tCL = '21.25ns'
+
+ # With BG architecture, burst of 32 transferred in two 16-beat
+ # sub-bursts, with a 16-beat gap in between.
+ # Each 16-beat sub-burst is 8 WCK @3.2 GHz or 2 CK @ 800 MHz
+ # tBURST is the delay to transfer the Bstof32 = 6 CK @ 800 MHz
+ tBURST = '7.5ns'
+ # can interleave a Bstof32 from another bank group at tBURST_MIN
+ # 16-beats is 8 WCK @2.3 GHz or 2 CK @ 800 MHz
+ tBURST_MIN = '2.5ns'
+ # tBURST_MAX is the maximum burst delay for same bank group timing
+ # this is 8 CK @ 800 MHz
+ tBURST_MAX = '10ns'
+
+ # 8 CK @ 800 MHz
+ tCCD_L = "10ns"
+
+ # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL
+ # tWCKDQ0/tCK will be 1 CK for most cases
+ # For gem5 RL = WL and BL/n is already accounted for with tBURST
+ # Result is and additional 1 CK is required
+ tRTW = '1.25ns'
+
+ # Default different rank bus delay to 2 CK, @687.5 MHz = 2.5 ns
+ tCS = '2.5ns'
+
+ # 2 CK
+ tPPD = '2.5ns'
+
+ # 2 command phases can be sent back-to-back or
+ # with a gap up to tAAD = 8 CK
+ tAAD = '10ns'
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on initial
+# JEDEC specifcation
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 16-bank mode with bank-group architecture, burst of 16
+class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32):
+
+ # LPDDR5 is a BL16 or BL32 device
+ # With BG mode, BL16 and BL32 are supported
+ # Use BL16 for smaller access granularity
+ burst_length = 16
+
+ # For Bstof16 with BG arch, 2 CK @ 800 MHz with 4:1 clock ratio
+ tBURST = '2.5ns'
+ tBURST_MIN = '2.5ns'
+ # For Bstof16 with BG arch, 4 CK @ 800 MHz with 4:1 clock ratio
+ tBURST_MAX = '5ns'
+
+ # 4 CK @ 800 MHz
+ tCCD_L = "5ns"
+
+
+# A single LPDDR5 x16 interface (one command/address bus)
+# for a single x16 channel with default timings based on
+# initial JEDEC specification
+# 6.4Gbps data rates and 8Gbit die
+# Configuring for 8-bank mode, burst of 32
+class LPDDR5_6400_1x16_8B_BL32(LPDDR5_6400_1x16_BG_BL32):
+
+ # 4kB page with 8B mode
+ device_rowbuffer_size = '4kB'
+
+ # LPDDR5 supports configurable bank options
+ # 8B : BL32, all frequencies
+ # 16B : BL32 or BL16, <=3.2Gbps
+ # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps
+ # Select 8B
+ banks_per_rank = 8
+ bank_groups_per_rank = 0
+
+ # For Bstof32 with 8B mode, 4 CK @ 800 MHz with 4:1 clock ratio
+ tBURST = '5ns'
+ tBURST_MIN = '5ns'
+ tBURST_MAX = '5ns'
+
+ # Greater of 4 CK or 12 ns
+ tWTR = '12ns'
+
+ # Greater of 2 CK or 10 ns
+ tRRD = '10ns'
+
+ # With 8B mode tFAW is 40 ns
+ tXAW = '40ns'
+ activation_limit = 4
+
+ # Reset BG arch timing for 8B mode
+ tCCD_L = "0ns"
+ tRRD_L = "0ns"
+ tWTR_L = "0ns"
# -*- mode:python -*-
#
-# Copyright (c) 2018-2019 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
SimObject('AddrMapper.py')
SimObject('Bridge.py')
SimObject('DRAMCtrl.py')
+SimObject('DRAMInterface.py')
SimObject('ExternalMaster.py')
SimObject('ExternalSlave.py')
SimObject('MemObject.py')
#include "debug/DRAMState.hh"
#include "debug/Drain.hh"
#include "debug/QOS.hh"
+#include "params/DRAMInterface.hh"
#include "sim/system.hh"
using namespace std;
retryRdReq(false), retryWrReq(false),
nextReqEvent([this]{ processNextReqEvent(); }, name()),
respondEvent([this]{ processRespondEvent(); }, name()),
- readBufferSize(p->read_buffer_size),
- writeBufferSize(p->write_buffer_size),
+ dram(p->dram),
+ readBufferSize(dram->readBufferSize),
+ writeBufferSize(dram->writeBufferSize),
writeHighThreshold(writeBufferSize * p->write_high_thresh_perc / 100.0),
writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0),
minWritesPerSwitch(p->min_writes_per_switch),
- writesThisTime(0), readsThisTime(0), tCS(p->tCS),
+ writesThisTime(0), readsThisTime(0),
memSchedPolicy(p->mem_sched_policy),
frontendLatency(p->static_frontend_latency),
backendLatency(p->static_backend_latency),
readQueue.resize(p->qos_priorities);
writeQueue.resize(p->qos_priorities);
+ dram->setCtrl(this);
+
// perform a basic check of the write thresholds
if (p->write_low_thresh_perc >= p->write_high_thresh_perc)
fatal("Write buffer low threshold %d must be smaller than the "
"high threshold %d\n", p->write_low_thresh_perc,
p->write_high_thresh_perc);
-
- // determine the rows per bank by looking at the total capacity
- uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
-
- DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
- AbstractMemory::size());
-
- // create a DRAM interface
- // will only populate the ranks if DRAM is configured
- dram = new DRAMInterface(*this, p, capacity, range);
- DPRINTF(DRAM, "Created DRAM interface \n");
}
void
DRAMCtrl::init()
{
- MemCtrl::init();
-
if (!port.isConnected()) {
fatal("DRAMCtrl %s is unconnected!\n", name());
} else {
port.sendRangeChange();
}
-
- dram->init(range);
-
}
void
isTimingMode = system()->isTimingMode();
if (isTimingMode) {
- dram->startupRanks();
-
// shift the bus busy time sufficiently far ahead that we never
// have to worry about negative values when computing the time for
// the next request, this will add an insignificant bubble at the
"is responding");
// do the actual memory access and turn the packet into a response
- access(pkt);
+ dram->access(pkt);
Tick latency = 0;
if (pkt->hasData()) {
// address of first DRAM packet is kept unaliged. Subsequent DRAM packets
// are aligned to burst size boundaries. This is to ensure we accurately
// check read packets against packets in write queue.
- const Addr base_addr = getCtrlAddr(pkt->getAddr());
+ const Addr base_addr = dram->getCtrlAddr(pkt->getAddr());
Addr addr = base_addr;
unsigned pktsServicedByWrQ = 0;
BurstHelper* burst_helper = NULL;
// if the request size is larger than burst size, the pkt is split into
// multiple DRAM packets
- const Addr base_addr = getCtrlAddr(pkt->getAddr());
+ const Addr base_addr = dram->getCtrlAddr(pkt->getAddr());
Addr addr = base_addr;
uint32_t burstSize = dram->bytesPerBurst();
for (int cnt = 0; cnt < pktCount; ++cnt) {
DRAMPacket* dram_pkt = respQueue.front();
// media specific checks and functions when read response is complete
- dram->respondEventDRAM(dram_pkt->rank);
+ dram->respondEvent(dram_pkt->rank);
if (dram_pkt->burstHelper) {
// it is a split packet
void
DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency)
{
- DPRINTF(DRAM, "Responding to Address %lld.. ",pkt->getAddr());
+ DPRINTF(DRAM, "Responding to Address %lld.. \n",pkt->getAddr());
bool needsResponse = pkt->needsResponse();
// do the actual memory access which also turns the packet into a
// response
- access(pkt);
+ dram->access(pkt);
// turn packet around to go back to requester if response expected
if (needsResponse) {
// if not, shift to next burst window
Tick act_at;
if (twoCycleActivate)
- act_at = ctrl.verifyMultiCmd(act_tick, tAAD);
+ act_at = ctrl->verifyMultiCmd(act_tick, tAAD);
else
- act_at = ctrl.verifySingleCmd(act_tick);
+ act_at = ctrl->verifySingleCmd(act_tick);
DPRINTF(DRAM, "Activate at tick %d\n", act_at);
// Issuing an explicit PRE command
// Verify that we have command bandwidth to issue the precharge
// if not, shift to next burst window
- pre_at = ctrl.verifySingleCmd(pre_tick);
+ pre_at = ctrl->verifySingleCmd(pre_tick);
// enforce tPPD
for (int i = 0; i < banksPerRank; i++) {
rank_ref.banks[i].preAllowedAt = std::max(pre_at + tPPD,
// verify that we have command bandwidth to issue the burst
// if not, shift to next burst window
if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) > clkResyncDelay))
- cmd_at = ctrl.verifyMultiCmd(cmd_at, tCK);
+ cmd_at = ctrl->verifyMultiCmd(cmd_at, tCK);
else
- cmd_at = ctrl.verifySingleCmd(cmd_at);
+ cmd_at = ctrl->verifySingleCmd(cmd_at);
// if we are interleaving bursts, ensure that
// 1) we don't double interleave on next burst issue
bool got_more_hits = false;
bool got_bank_conflict = false;
- for (uint8_t i = 0; i < ctrl.numPriorities(); ++i) {
+ for (uint8_t i = 0; i < ctrl->numPriorities(); ++i) {
auto p = queue[i].begin();
// keep on looking until we find a hit or reach the end of the
// queue
// Update latency stats
stats.totMemAccLat += dram_pkt->readyTime - dram_pkt->entryTime;
stats.totQLat += cmd_at - dram_pkt->entryTime;
+ stats.totBusLat += tBURST;
} else {
// Schedule write done event to decrement event count
// after the readyTime has been reached
// Update latency stats
stats.masterReadTotalLat[dram_pkt->masterId()] +=
dram_pkt->readyTime - dram_pkt->entryTime;
-
- stats.bytesRead += dram->bytesPerBurst();
- stats.totBusLat += dram->burstDelay();
stats.masterReadBytes[dram_pkt->masterId()] += dram_pkt->size;
} else {
++writesThisTime;
- stats.bytesWritten += dram->bytesPerBurst();
stats.masterWriteBytes[dram_pkt->masterId()] += dram_pkt->size;
stats.masterWriteTotalLat[dram_pkt->masterId()] +=
dram_pkt->readyTime - dram_pkt->entryTime;
// Figure out which read request goes next
// If we are changing command type, incorporate the minimum
- // bus turnaround delay which will be tCS (different rank) case
- to_read = chooseNext((*queue), switched_cmd_type ? tCS : 0);
+ // bus turnaround delay which will be rank to rank delay
+ to_read = chooseNext((*queue), switched_cmd_type ?
+ dram->rankDelay() : 0);
if (to_read != queue->end()) {
// candidate read found
// If we are changing command type, incorporate the minimum
// bus turnaround delay
to_write = chooseNext((*queue),
- switched_cmd_type ? std::min(dram->minRdToWr(), tCS) : 0);
+ switched_cmd_type ? std::min(dram->minRdToWr(),
+ dram->rankDelay()) : 0);
if (to_write != queue->end()) {
write_found = true;
}
}
-DRAMInterface::DRAMInterface(DRAMCtrl& _ctrl,
- const DRAMCtrlParams* _p,
- const uint64_t capacity,
- const AddrRange range)
- : SimObject(_p), ctrl(_ctrl),
+DRAMInterface::DRAMInterface(const DRAMInterfaceParams* _p)
+ : AbstractMemory(_p),
addrMapping(_p->addr_mapping),
burstSize((_p->devices_per_rank * _p->burst_length *
_p->device_bus_width) / 8),
bankGroupsPerRank(_p->bank_groups_per_rank),
bankGroupArch(_p->bank_groups_per_rank > 0),
banksPerRank(_p->banks_per_rank), rowsPerBank(0),
- tCK(_p->tCK), tCL(_p->tCL), tBURST(_p->tBURST),
+ tCK(_p->tCK), tCS(_p->tCS), tCL(_p->tCL), tBURST(_p->tBURST),
tBURST_MIN(_p->tBURST_MIN), tBURST_MAX(_p->tBURST_MAX), tRTW(_p->tRTW),
tCCD_L_WR(_p->tCCD_L_WR), tCCD_L(_p->tCCD_L), tRCD(_p->tRCD),
tRP(_p->tRP), tRAS(_p->tRAS), tWR(_p->tWR), tRTP(_p->tRTP),
wrToRdDly(tCL + tBURST + _p->tWTR), rdToWrDly(tBURST + tRTW),
wrToRdDlySameBG(tCL + _p->tBURST_MAX + _p->tWTR_L),
rdToWrDlySameBG(tRTW + _p->tBURST_MAX),
- rankToRankDly(ctrl.rankDelay() + tBURST),
+ rankToRankDly(tCS + tBURST),
pageMgmt(_p->page_policy),
maxAccessesPerRow(_p->max_accesses_per_row),
timeStampOffset(0), activeRank(0),
enableDRAMPowerdown(_p->enable_dram_powerdown),
lastStatsResetTick(0),
- stats(_ctrl, *this)
+ stats(*this),
+ readBufferSize(_p->read_buffer_size),
+ writeBufferSize(_p->write_buffer_size)
{
fatal_if(!isPowerOf2(burstSize), "DRAM burst size %d is not allowed, "
"must be a power of two\n", burstSize);
for (int i = 0; i < ranksPerChannel; i++) {
DPRINTF(DRAM, "Creating DRAM rank %d \n", i);
- Rank* rank = new Rank(ctrl, _p, i, *this);
+ Rank* rank = new Rank(_p, i, *this);
ranks.push_back(rank);
}
uint64_t deviceCapacity = deviceSize / (1024 * 1024) * devicesPerRank *
ranksPerChannel;
+ uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size());
+
+ DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity,
+ AbstractMemory::size());
+
// if actual DRAM size does not match memory capacity in system warn!
if (deviceCapacity != capacity / (1024 * 1024))
warn("DRAM device capacity (%d Mbytes) does not match the "
}
void
-DRAMInterface::init(AddrRange range)
+DRAMInterface::init()
{
+ AbstractMemory::init();
+
// a bit of sanity checks on the interleaving, save it for here to
// ensure that the system pointer is initialised
if (range.interleaved()) {
// channel striping has to be done at a granularity that
// is equal or larger to a cache line
- if (ctrl.system()->cacheLineSize() > range.granularity()) {
+ if (system()->cacheLineSize() > range.granularity()) {
fatal("Channel interleaving of %s must be at least as large "
"as the cache line size\n", name());
}
}
void
-DRAMInterface::startupRanks()
+DRAMInterface::startup()
{
- // timestamp offset should be in clock cycles for DRAMPower
- timeStampOffset = divCeil(curTick(), tCK);
+ if (system()->isTimingMode()) {
+ // timestamp offset should be in clock cycles for DRAMPower
+ timeStampOffset = divCeil(curTick(), tCK);
+ }
for (auto r : ranks) {
r->startup(curTick() + tREFI - tRP);
}
void
-DRAMInterface::respondEventDRAM(uint8_t rank)
+DRAMInterface::respondEvent(uint8_t rank)
{
Rank& rank_ref = *ranks[rank];
std::max(ranks[i]->banks[j].preAllowedAt, curTick()) + tRP;
// When is the earliest the R/W burst can issue?
- const Tick col_allowed_at = ctrl.inReadBusState(false) ?
+ const Tick col_allowed_at = ctrl->inReadBusState(false) ?
ranks[i]->banks[j].rdAllowedAt :
ranks[i]->banks[j].wrAllowedAt;
Tick col_at = std::max(col_allowed_at, act_at + tRCD);
return make_pair(bank_mask, hidden_bank_prep);
}
-DRAMInterface::Rank::Rank(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p, int _rank,
- DRAMInterface& _dram)
- : EventManager(&_ctrl), ctrl(_ctrl), dram(_dram),
+DRAMInterface*
+DRAMInterfaceParams::create()
+{
+ return new DRAMInterface(this);
+}
+
+DRAMInterface::Rank::Rank(const DRAMInterfaceParams* _p,
+ int _rank, DRAMInterface& _dram)
+ : EventManager(&_dram), dram(_dram),
pwrStateTrans(PWR_IDLE), pwrStatePostRefresh(PWR_IDLE),
pwrStateTick(0), refreshDueAt(0), pwrState(PWR_IDLE),
refreshState(REF_IDLE), inLowPowerState(false), rank(_rank),
refreshEvent([this]{ processRefreshEvent(); }, name()),
powerEvent([this]{ processPowerEvent(); }, name()),
wakeUpEvent([this]{ processWakeUpEvent(); }, name()),
- stats(_ctrl, *this)
+ stats(_dram, *this)
{
for (int b = 0; b < _p->banks_per_rank; b++) {
banks[b].bank = b;
DRAMInterface::Rank::isQueueEmpty() const
{
// check commmands in Q based on current bus direction
- bool no_queued_cmds = (ctrl.inReadBusState(true) && (readEntries == 0))
- || (ctrl.inWriteBusState(true) && (writeEntries == 0));
+ bool no_queued_cmds = (dram.ctrl->inReadBusState(true) &&
+ (readEntries == 0))
+ || (dram.ctrl->inWriteBusState(true) &&
+ (writeEntries == 0));
return no_queued_cmds;
}
// if a request is at the moment being handled and this request is
// accessing the current rank then wait for it to finish
if ((rank == dram.activeRank)
- && (ctrl.requestEventScheduled())) {
+ && (dram.ctrl->requestEventScheduled())) {
// hand control over to the request loop until it is
// evaluated next
DPRINTF(DRAM, "Refresh awaiting draining\n");
// or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled
// should have outstanding precharge or read response event
assert(prechargeEvent.scheduled() ||
- ctrl.respondEventScheduled());
+ dram.ctrl->respondEventScheduled());
// will start refresh when pwrState transitions to IDLE
}
assert(!powerEvent.scheduled());
- if ((ctrl.drainState() == DrainState::Draining) ||
- (ctrl.drainState() == DrainState::Drained)) {
+ if ((dram.ctrl->drainState() == DrainState::Draining) ||
+ (dram.ctrl->drainState() == DrainState::Drained)) {
// if draining, do not re-enter low-power mode.
// simply go to IDLE and wait
schedulePowerEvent(PWR_IDLE, curTick());
}
// completed refresh event, ensure next request is scheduled
- if (!ctrl.requestEventScheduled()) {
+ if (!dram.ctrl->requestEventScheduled()) {
DPRINTF(DRAM, "Scheduling next request after refreshing"
" rank %d\n", rank);
- ctrl.restartScheduler(curTick());
+ dram.ctrl->restartScheduler(curTick());
}
}
// bypass auto-refresh and go straight to SREF, where memory
// will issue refresh immediately upon entry
if (pwrStatePostRefresh == PWR_PRE_PDN && isQueueEmpty() &&
- (ctrl.drainState() != DrainState::Draining) &&
- (ctrl.drainState() != DrainState::Drained) &&
+ (dram.ctrl->drainState() != DrainState::Draining) &&
+ (dram.ctrl->drainState() != DrainState::Drained) &&
dram.enableDRAMPowerdown) {
DPRINTF(DRAMState, "Rank %d bypassing refresh and transitioning "
"to self refresh at %11u tick\n", rank, curTick());
bool
DRAMInterface::Rank::forceSelfRefreshExit() const {
return (readEntries != 0) ||
- (ctrl.inWriteBusState(true) && (writeEntries != 0));
+ (dram.ctrl->inWriteBusState(true) && (writeEntries != 0));
}
DRAMCtrl::CtrlStats::CtrlStats(DRAMCtrl &_ctrl)
ADD_STAT(writeReqs, "Number of write requests accepted"),
ADD_STAT(readBursts,
- "Number of DRAM read bursts, "
+ "Number of controller read bursts, "
"including those serviced by the write queue"),
ADD_STAT(writeBursts,
- "Number of DRAM write bursts, "
+ "Number of controller write bursts, "
"including those merged in the write queue"),
ADD_STAT(servicedByWrQ,
- "Number of DRAM read bursts serviced by the write queue"),
+ "Number of controller read bursts serviced by the write queue"),
ADD_STAT(mergedWrBursts,
- "Number of DRAM write bursts merged with an existing one"),
+ "Number of controller write bursts merged with an existing one"),
ADD_STAT(neitherReadNorWriteReqs,
"Number of requests that are neither read nor write"),
ADD_STAT(avgRdQLen, "Average read queue length when enqueuing"),
ADD_STAT(avgWrQLen, "Average write queue length when enqueuing"),
- ADD_STAT(totBusLat, "Total ticks spent in databus transfers"),
- ADD_STAT(avgBusLat, "Average bus latency per DRAM burst"),
-
ADD_STAT(numRdRetry, "Number of times read queue was full causing retry"),
ADD_STAT(numWrRetry, "Number of times write queue was full causing retry"),
ADD_STAT(wrPerTurnAround,
"Writes before turning the bus around for reads"),
- ADD_STAT(bytesRead, "Total number of bytes read from memory"),
ADD_STAT(bytesReadWrQ, "Total number of bytes read from write queue"),
- ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"),
ADD_STAT(bytesReadSys, "Total read bytes from the system interface side"),
ADD_STAT(bytesWrittenSys,
"Total written bytes from the system interface side"),
- ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiByte/s"),
- ADD_STAT(avgWrBW, "Average achieved write bandwidth in MiByte/s"),
ADD_STAT(avgRdBWSys, "Average system read bandwidth in MiByte/s"),
ADD_STAT(avgWrBWSys, "Average system write bandwidth in MiByte/s"),
- ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"),
-
- ADD_STAT(busUtil, "Data bus utilization in percentage"),
- ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"),
- ADD_STAT(busUtilWrite, "Data bus utilization in percentage for writes"),
ADD_STAT(totGap, "Total gap between requests"),
ADD_STAT(avgGap, "Average gap between requests"),
{
using namespace Stats;
- assert(ctrl._system);
- const auto max_masters = ctrl._system->maxMasters();
+ assert(ctrl.system());
+ const auto max_masters = ctrl.system()->maxMasters();
avgRdQLen.precision(2);
avgWrQLen.precision(2);
- avgBusLat.precision(2);
readPktSize.init(ceilLog2(ctrl.dram->bytesPerBurst()) + 1);
writePktSize.init(ceilLog2(ctrl.dram->bytesPerBurst()) + 1);
.init(ctrl.writeBufferSize)
.flags(nozero);
- avgRdBW.precision(2);
- avgWrBW.precision(2);
avgRdBWSys.precision(2);
avgWrBWSys.precision(2);
- peakBW.precision(2);
- busUtil.precision(2);
avgGap.precision(2);
- busUtilWrite.precision(2);
// per-master bytes read and written to memory
masterReadBytes
.flags(nonan)
.precision(2);
- busUtilRead
- .precision(2);
-
masterWriteRate
.flags(nozero | nonan)
.precision(12);
.precision(2);
for (int i = 0; i < max_masters; i++) {
- const std::string master = ctrl._system->getMasterName(i);
+ const std::string master = ctrl.system()->getMasterName(i);
masterReadBytes.subname(i, master);
masterReadRate.subname(i, master);
masterWriteBytes.subname(i, master);
}
// Formula stats
- avgBusLat = totBusLat / (readBursts - servicedByWrQ);
-
- avgRdBW = (bytesRead / 1000000) / simSeconds;
- avgWrBW = (bytesWritten / 1000000) / simSeconds;
avgRdBWSys = (bytesReadSys / 1000000) / simSeconds;
avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds;
- peakBW = (SimClock::Frequency / ctrl.dram->burstDataDelay()) *
- ctrl.dram->bytesPerBurst() / 1000000;
-
- busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
avgGap = totGap / (readReqs + writeReqs);
- busUtilRead = avgRdBW / peakBW * 100;
- busUtilWrite = avgWrBW / peakBW * 100;
-
masterReadRate = masterReadBytes / simSeconds;
masterWriteRate = masterWriteBytes / simSeconds;
masterReadAvgLat = masterReadTotalLat / masterReadAccesses;
dram.lastStatsResetTick = curTick();
}
-DRAMInterface::DRAMStats::DRAMStats(DRAMCtrl &_ctrl, DRAMInterface &_dram)
- : Stats::Group(&_ctrl, csprintf("dram").c_str()),
+DRAMInterface::DRAMStats::DRAMStats(DRAMInterface &_dram)
+ : Stats::Group(&_dram),
dram(_dram),
ADD_STAT(readBursts, "Number of DRAM read bursts"),
ADD_STAT(perBankWrBursts, "Per bank write bursts"),
ADD_STAT(totQLat, "Total ticks spent queuing"),
+ ADD_STAT(totBusLat, "Total ticks spent in databus transfers"),
ADD_STAT(totMemAccLat,
"Total ticks spent from burst creation until serviced "
"by the DRAM"),
+
ADD_STAT(avgQLat, "Average queueing delay per DRAM burst"),
+ ADD_STAT(avgBusLat, "Average bus latency per DRAM burst"),
ADD_STAT(avgMemAccLat, "Average memory access latency per DRAM burst"),
ADD_STAT(readRowHits, "Number of row buffer hits during reads"),
ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"),
ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiBytes/s"),
ADD_STAT(avgWrBW, "Average DRAM write bandwidth in MiBytes/s"),
+ ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"),
+
+ ADD_STAT(busUtil, "Data bus utilization in percentage"),
+ ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"),
+ ADD_STAT(busUtilWrite, "Data bus utilization in percentage for writes"),
+
ADD_STAT(pageHitRate, "Row buffer hit rate, read and write combined")
{
using namespace Stats;
avgQLat.precision(2);
+ avgBusLat.precision(2);
avgMemAccLat.precision(2);
readRowHitRate.precision(2);
dram.maxAccessesPerRow : dram.rowBufferSize)
.flags(nozero);
+ peakBW.precision(2);
+ busUtil.precision(2);
+ busUtilWrite.precision(2);
+ busUtilRead.precision(2);
+
pageHitRate.precision(2);
// Formula stats
avgQLat = totQLat / readBursts;
+ avgBusLat = totBusLat / readBursts;
avgMemAccLat = totMemAccLat / readBursts;
readRowHitRate = (readRowHits / readBursts) * 100;
avgRdBW = (bytesRead / 1000000) / simSeconds;
avgWrBW = (bytesWritten / 1000000) / simSeconds;
+ peakBW = (SimClock::Frequency / dram.burstDataDelay()) *
+ dram.bytesPerBurst() / 1000000;
+
+ busUtil = (avgRdBW + avgWrBW) / peakBW * 100;
+ busUtilRead = avgRdBW / peakBW * 100;
+ busUtilWrite = avgWrBW / peakBW * 100;
pageHitRate = (writeRowHits + readRowHits) /
(writeBursts + readBursts) * 100;
}
-DRAMInterface::RankStats::RankStats(DRAMCtrl &_ctrl, Rank &_rank)
- : Stats::Group(&_ctrl, csprintf("dram_rank%d", _rank.rank).c_str()),
+DRAMInterface::RankStats::RankStats(DRAMInterface &_dram, Rank &_rank)
+ : Stats::Group(&_dram, csprintf("rank%d", _rank.rank).c_str()),
rank(_rank),
ADD_STAT(actEnergy, "Energy for activate commands per rank (pJ)"),
DRAMCtrl::recvFunctional(PacketPtr pkt)
{
// rely on the abstract memory
- functionalAccess(pkt);
+ dram->functionalAccess(pkt);
}
Port &
// if we switched to timing mode, kick things into action,
// and behave as if we restored from a checkpoint
startup();
+ dram->startup();
} else if (isTimingMode && !system()->isTimingMode()) {
// if we switch from timing mode, stop the refresh events to
// not cause issues with KVM
DRAMCtrl::MemoryPort::getAddrRanges() const
{
AddrRangeList ranges;
- ranges.push_back(ctrl.getAddrRange());
+ ranges.push_back(ctrl.dram->getAddrRange());
return ranges;
}
#include "enums/AddrMap.hh"
#include "enums/MemSched.hh"
#include "enums/PageManage.hh"
+#include "mem/abstract_mem.hh"
#include "mem/drampower.hh"
#include "mem/qos/mem_ctrl.hh"
#include "mem/qport.hh"
#include "params/DRAMCtrl.hh"
#include "sim/eventq.hh"
+class DRAMInterfaceParams;
+
/**
* A basic class to track the bank state, i.e. what row is
* currently open (if any), when is the bank free to accept a new
* The DRAMInterface includes a class for individual ranks
* and per rank functions.
*/
-class DRAMInterface : public SimObject
+class DRAMInterface : public AbstractMemory
{
private:
/**
class Rank;
struct RankStats : public Stats::Group
{
- RankStats(DRAMCtrl &ctrl, Rank &rank);
+ RankStats(DRAMInterface &dram, Rank &rank);
void regStats() override;
void resetStats() override;
*/
class Rank : public EventManager
{
- protected:
-
- /**
- * A reference to the parent DRAMCtrl instance
- */
- DRAMCtrl& ctrl;
-
private:
/**
*/
Tick lastBurstTick;
- Rank(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p, int _rank,
+ Rank(const DRAMInterfaceParams* _p, int _rank,
DRAMInterface& _dram);
- const std::string name() const { return csprintf("dram_%d", rank); }
+ const std::string name() const { return csprintf("%d", rank); }
/**
* Kick off accounting for power and refresh states and
* @param next Memory Command
* @return true if timeStamp of Command 1 < timeStamp of Command 2
*/
- static bool sortTime(const Command& cmd, const Command& cmd_next)
+ static bool
+ sortTime(const Command& cmd, const Command& cmd_next)
{
return cmd.timeStamp < cmd_next.timeStamp;
- };
+ }
/**
- * A reference to the parent DRAMCtrl instance
+ * A pointer to the parent DRAMCtrl instance
*/
- DRAMCtrl& ctrl;
+ DRAMCtrl* ctrl;
/**
* Memory controller configuration initialized based on parameter
* DRAM timing requirements
*/
const Tick M5_CLASS_VAR_USED tCK;
+ const Tick tCS;
const Tick tCL;
const Tick tBURST;
const Tick tBURST_MIN;
struct DRAMStats : public Stats::Group
{
- DRAMStats(DRAMCtrl &ctrl, DRAMInterface &dram);
+ DRAMStats(DRAMInterface &dram);
void regStats() override;
void resetStats() override;
// Latencies summed over all requests
Stats::Scalar totQLat;
+ Stats::Scalar totBusLat;
Stats::Scalar totMemAccLat;
// Average latencies per request
Stats::Formula avgQLat;
+ Stats::Formula avgBusLat;
Stats::Formula avgMemAccLat;
// Row hit count and rate
// Average bandwidth
Stats::Formula avgRdBW;
Stats::Formula avgWrBW;
+ Stats::Formula peakBW;
+ // bus utilization
+ Stats::Formula busUtil;
+ Stats::Formula busUtilRead;
+ Stats::Formula busUtilWrite;
Stats::Formula pageHitRate;
};
std::vector<Rank*> ranks;
public:
+
+ /**
+ * Buffer sizes for read and write queues in the controller
+ * These are passed to the controller on instantiation
+ * Defining them here allows for buffers to be resized based
+ * on memory type / configuration.
+ */
+ const uint32_t readBufferSize;
+ const uint32_t writeBufferSize;
+
+ /** Setting a pointer to the controller */
+ void setCtrl(DRAMCtrl* _ctrl) { ctrl = _ctrl; }
+
/**
* Initialize the DRAM interface and verify parameters
- * @param range is the address range for this interface
*/
- void init(AddrRange range);
+ void init() override;
/**
* Iterate through dram ranks and instantiate per rank startup routine
*/
- void startupRanks();
+ void startup() override;
/**
* Iterate through dram ranks to exit self-refresh in order to drain
*/
void suspend();
+ /**
+ * Get an address in a dense range which starts from 0. The input
+ * address is the physical address of the request in an address
+ * space that contains other SimObjects apart from this
+ * controller.
+ *
+ * @param addr The intput address which should be in the addrRange
+ * @return An address in the continues range [0, max)
+ */
+ Addr getCtrlAddr(Addr addr) { return range.getOffset(addr); }
+
/**
* @return number of bytes in a burst for this interface
*/
- uint32_t bytesPerBurst() const { return burstSize; };
+ uint32_t bytesPerBurst() const { return burstSize; }
/**
*
* @return number of ranks per channel for this interface
*/
- uint32_t numRanks() const { return ranksPerChannel; };
+ uint32_t numRanks() const { return ranksPerChannel; }
/*
* @return time to send a burst of data
/*
* @return time to send a burst of data without gaps
*/
- Tick burstDataDelay() const
+ Tick
+ burstDataDelay() const
{
return (burstInterleave ? tBURST_MAX / 2 : tBURST);
}
*
* @return additional bus turnaround required for read-to-write
*/
- Tick minRdToWr() const { return tRTW; };
+ Tick minRdToWr() const { return tRTW; }
+
+ /**
+ * Determine the required delay for an access to a different rank
+ *
+ * @return required rank to rank delay
+ */
+ Tick rankDelay() const { return tCS; }
/*
* Function to calulate RAS cycle time for use within and
* This requires the DRAM to be in the
* REF IDLE state
*/
- bool burstReady(uint8_t rank) const
+ bool
+ burstReady(uint8_t rank) const
{
return ranks[rank]->inRefIdleState();
}
*
* @param rank Specifies rank associated with read burst
*/
- void respondEventDRAM(uint8_t rank);
+ void respondEvent(uint8_t rank);
/**
* Check the refresh state to determine if refresh needs
*/
void checkRefreshState(uint8_t rank);
- DRAMInterface(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p,
- uint64_t capacity, AddrRange range);
+ DRAMInterface(const DRAMInterfaceParams* _p);
};
/**
*/
void accessAndRespond(PacketPtr pkt, Tick static_latency);
- /**
- * Get an address in a dense range which starts from 0. The input
- * address is the physical address of the request in an address
- * space that contains other SimObjects apart from this
- * controller.
- *
- * @param addr The intput address which should be in the addrRange
- * @return An address in the continues range [0, max)
- */
- Addr getCtrlAddr(Addr addr)
- {
- return range.getOffset(addr);
- }
-
/**
* The memory schduler/arbiter - picks which request needs to
* go next, based on the specified policy such as FCFS or FR-FCFS
*/
std::unordered_multiset<Tick> burstTicks;
+ /**
+ * Create pointer to interface of the actual dram media
+ */
+ DRAMInterface* const dram;
+
/**
* The following are basic design parameters of the memory
* controller, and are initialized based on parameter values.
uint32_t writesThisTime;
uint32_t readsThisTime;
- /**
- * Basic memory timing parameters initialized based on parameter
- * values. These will be used across memory interfaces.
- */
- const Tick tCS;
-
/**
* Memory controller configuration initialized based on parameter
* values.
// Average queue lengths
Stats::Average avgRdQLen;
Stats::Average avgWrQLen;
- // Latencies summed over all requests
- Stats::Scalar totBusLat;
- // Average latencies per request
- Stats::Formula avgBusLat;
Stats::Scalar numRdRetry;
Stats::Scalar numWrRetry;
Stats::Histogram rdPerTurnAround;
Stats::Histogram wrPerTurnAround;
- Stats::Scalar bytesRead;
Stats::Scalar bytesReadWrQ;
- Stats::Scalar bytesWritten;
Stats::Scalar bytesReadSys;
Stats::Scalar bytesWrittenSys;
// Average bandwidth
- Stats::Formula avgRdBW;
- Stats::Formula avgWrBW;
Stats::Formula avgRdBWSys;
Stats::Formula avgWrBWSys;
- Stats::Formula peakBW;
- // bus utilization
- Stats::Formula busUtil;
- Stats::Formula busUtilRead;
- Stats::Formula busUtilWrite;
Stats::Scalar totGap;
Stats::Formula avgGap;
CtrlStats stats;
- /**
- * Create pointer to interfasce to the actual media
- */
- DRAMInterface* dram;
-
/**
* Upstream caches need this packet until true is returned, so
* hold it for deletion until a subsequent call
*/
void restartScheduler(Tick tick) { schedule(nextReqEvent, tick); }
- /**
- * Determine the required delay for an access to a different rank
- *
- * @return required rank to rank delay
- */
- Tick rankDelay() const { return tCS; }
-
/**
* Check the current direction of the memory channel
*
#include "base/intmath.hh"
#include "sim/core.hh"
-DRAMPower::DRAMPower(const DRAMCtrlParams* p, bool include_io) :
+DRAMPower::DRAMPower(const DRAMInterfaceParams* p, bool include_io) :
powerlib(libDRAMPower(getMemSpec(p), include_io))
{
}
Data::MemArchitectureSpec
-DRAMPower::getArchParams(const DRAMCtrlParams* p)
+DRAMPower::getArchParams(const DRAMInterfaceParams* p)
{
Data::MemArchitectureSpec archSpec;
archSpec.burstLength = p->burst_length;
}
Data::MemTimingSpec
-DRAMPower::getTimingParams(const DRAMCtrlParams* p)
+DRAMPower::getTimingParams(const DRAMInterfaceParams* p)
{
// Set the values that are used for power calculations and ignore
// the ones only used by the controller functionality in DRAMPower
}
Data::MemPowerSpec
-DRAMPower::getPowerParams(const DRAMCtrlParams* p)
+DRAMPower::getPowerParams(const DRAMInterfaceParams* p)
{
// All DRAMPower currents are in mA
Data::MemPowerSpec powerSpec;
}
Data::MemorySpecification
-DRAMPower::getMemSpec(const DRAMCtrlParams* p)
+DRAMPower::getMemSpec(const DRAMInterfaceParams* p)
{
Data::MemorySpecification memSpec;
memSpec.memArchSpec = getArchParams(p);
}
bool
-DRAMPower::hasTwoVDD(const DRAMCtrlParams* p)
+DRAMPower::hasTwoVDD(const DRAMInterfaceParams* p)
{
return p->VDD2 == 0 ? false : true;
}
+
+uint8_t
+DRAMPower::getDataRate(const DRAMInterfaceParams* p)
+{
+ uint32_t burst_cycles = divCeil(p->tBURST_MAX, p->tCK);
+ uint8_t data_rate = p->burst_length / burst_cycles;
+ // 4 for GDDR5
+ if (data_rate != 1 && data_rate != 2 && data_rate != 4 && data_rate != 8)
+ fatal("Got unexpected data rate %d, should be 1 or 2 or 4 or 8\n");
+ return data_rate;
+}
#define __MEM_DRAM_POWER_HH__
#include "libdrampower/LibDRAMPower.h"
-#include "params/DRAMCtrl.hh"
+#include "params/DRAMInterface.hh"
/**
* DRAMPower is a standalone tool which calculates the power consumed by a
/**
* Transform the architechture parameters defined in
- * DRAMCtrlParams to the memSpec of DRAMPower
+ * DRAMInterfaceParams to the memSpec of DRAMPower
*/
- static Data::MemArchitectureSpec getArchParams(const DRAMCtrlParams* p);
+ static Data::MemArchitectureSpec getArchParams(
+ const DRAMInterfaceParams* p);
/**
- * Transforms the timing parameters defined in DRAMCtrlParams to
+ * Transforms the timing parameters defined in DRAMInterfaceParams to
* the memSpec of DRAMPower
*/
- static Data::MemTimingSpec getTimingParams(const DRAMCtrlParams* p);
+ static Data::MemTimingSpec getTimingParams(const DRAMInterfaceParams* p);
/**
* Transforms the power and current parameters defined in
- * DRAMCtrlParam to the memSpec of DRAMPower
+ * DRAMInterfaceParams to the memSpec of DRAMPower
*/
- static Data::MemPowerSpec getPowerParams(const DRAMCtrlParams* p);
+ static Data::MemPowerSpec getPowerParams(const DRAMInterfaceParams* p);
+
+ /**
+ * Determine data rate, either one or two.
+ */
+ static uint8_t getDataRate(const DRAMInterfaceParams* p);
/**
* Determine if DRAM has two voltage domains (or one)
*/
- static bool hasTwoVDD(const DRAMCtrlParams* p);
+ static bool hasTwoVDD(const DRAMInterfaceParams* p);
/**
- * Return an instance of MemSpec based on the DRAMCtrlParams
+ * Return an instance of MemSpec based on the DRAMInterfaceParams
*/
- static Data::MemorySpecification getMemSpec(const DRAMCtrlParams* p);
+ static Data::MemorySpecification getMemSpec(const DRAMInterfaceParams* p);
public:
// Instance of DRAMPower Library
libDRAMPower powerlib;
- DRAMPower(const DRAMCtrlParams* p, bool include_io);
+ DRAMPower(const DRAMInterfaceParams* p, bool include_io);
};
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from m5.params import *
-from m5.objects.AbstractMemory import AbstractMemory
+from m5.proxy import *
+from m5.objects.ClockedObject import ClockedObject
from m5.objects.QoSTurnaround import *
# QoS Queue Selection policy used to select packets among same-QoS queues
class QoSQPolicy(Enum): vals = ["fifo", "lifo", "lrg"]
-class QoSMemCtrl(AbstractMemory):
+class QoSMemCtrl(ClockedObject):
type = 'QoSMemCtrl'
cxx_header = "mem/qos/mem_ctrl.hh"
cxx_class = 'QoS::MemCtrl'
abstract = True
+ system = Param.System(Parent.any, "System that the controller belongs to.")
+
##### QoS support parameters ####
# Number of priorities in the system
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
from m5.params import *
from m5.objects.QoSMemCtrl import *
+from m5.objects.QoSMemSinkInterface import *
class QoSMemSinkCtrl(QoSMemCtrl):
type = 'QoSMemSinkCtrl'
cxx_class = "QoS::MemSinkCtrl"
port = ResponsePort("Response ports")
+
+ interface = Param.QoSMemSinkInterface(QoSMemSinkInterface(),
+ "Interface to memory")
+
# the basic configuration of the controller architecture, note
# that each entry corresponds to a burst for the specific DRAM
# configuration (e.g. x32 with burst length 8 is 32 bytes) and not
# response latency - time to issue a response once a request is serviced
response_latency = Param.Latency("20ns", "Memory response latency")
-
-
--- /dev/null
+# Copyright (c) 2020 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder. You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.objects.AbstractMemory import AbstractMemory
+
+class QoSMemSinkInterface(AbstractMemory):
+ type = 'QoSMemSinkInterface'
+ cxx_header = "mem/qos/mem_sink.hh"
-# Copyright (c) 2018 ARM Limited
+# Copyright (c) 2018-2020 ARM Limited
# All rights reserved
#
# The license below extends only to copyright in the software and shall
SimObject('QoSMemCtrl.py')
SimObject('QoSMemSinkCtrl.py')
+SimObject('QoSMemSinkInterface.py')
SimObject('QoSPolicy.py')
SimObject('QoSTurnaround.py')
/*
- * Copyright (c) 2017-2019 ARM Limited
+ * Copyright (c) 2017-2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
namespace QoS {
MemCtrl::MemCtrl(const QoSMemCtrlParams * p)
- : AbstractMemory(p),
+ : ClockedObject(p),
policy(p->qos_policy),
turnPolicy(p->qos_turnaround_policy),
queuePolicy(QueuePolicy::create(p)),
qosSyncroScheduler(p->qos_syncro_scheduler),
totalReadQueueSize(0), totalWriteQueueSize(0),
busState(READ), busStateNext(READ),
- stats(*this)
+ stats(*this),
+ _system(p->system)
{
// Set the priority policy
if (policy) {
MemCtrl::~MemCtrl()
{}
-void
-MemCtrl::init()
-{
- AbstractMemory::init();
-}
-
void
MemCtrl::logRequest(BusState dir, MasterID m_id, uint8_t qos,
Addr addr, uint64_t entries)
/*
- * Copyright (c) 2019 ARM Limited
+ * Copyright (c) 2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
*/
#include "debug/QOS.hh"
-#include "mem/abstract_mem.hh"
-#include "mem/qos/q_policy.hh"
#include "mem/qos/policy.hh"
+#include "mem/qos/q_policy.hh"
#include "params/QoSMemCtrl.hh"
+#include "sim/clocked_object.hh"
#include "sim/system.hh"
#include <unordered_map>
* which support QoS - it provides access to a set of QoS
* scheduling policies
*/
-class MemCtrl: public AbstractMemory
+class MemCtrl : public ClockedObject
{
public:
/** Bus Direction */
Stats::Scalar numStayWriteState;
} stats;
+ /** Pointer to the System object */
+ System* _system;
+
/**
* Initializes dynamically counters and
* statistics for a given Master
virtual ~MemCtrl();
- /**
- * Initializes this object
- */
- void init() override;
-
/**
* Gets the current bus state
*
* @return total number of priority levels
*/
uint8_t numPriorities() const { return _numPriorities; }
+
+ /** read the system pointer
+ * @return pointer to the system object */
+ System* system() const { return _system; }
};
template<typename Queues>
/*
- * Copyright (c) 2018 ARM Limited
+ * Copyright (c) 2018-2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
#include "debug/Drain.hh"
#include "debug/QOS.hh"
#include "mem_sink.hh"
+#include "params/QoSMemSinkInterface.hh"
#include "sim/system.hh"
namespace QoS {
memoryPacketSize(p->memory_packet_size),
readBufferSize(p->read_buffer_size),
writeBufferSize(p->write_buffer_size), port(name() + ".port", *this),
+ interface(p->interface),
retryRdReq(false), retryWrReq(false), nextRequest(0), nextReqEvent(this)
{
// Resize read and write queue to allocate space
// for configured QoS priorities
readQueue.resize(numPriorities());
writeQueue.resize(numPriorities());
+
+ interface->setMemCtrl(this);
}
MemSinkCtrl::~MemSinkCtrl()
"%s Should not see packets where cache is responding\n",
__func__);
- access(pkt);
+ interface->access(pkt);
return responseLatency;
}
{
pkt->pushLabel(name());
- functionalAccess(pkt);
+ interface->functionalAccess(pkt);
pkt->popLabel();
}
// Do the actual memory access which also turns the packet
// into a response
- access(pkt);
+ interface->access(pkt);
// Log the response
logResponse(pkt->isRead()? READ : WRITE,
MemSinkCtrl::MemoryPort::getAddrRanges() const
{
AddrRangeList ranges;
- ranges.push_back(memory.getAddrRange());
+ ranges.push_back(memory.interface->getAddrRange());
return ranges;
}
return new QoS::MemSinkCtrl(this);
}
+QoSMemSinkInterface::QoSMemSinkInterface(const QoSMemSinkInterfaceParams* _p)
+ : AbstractMemory(_p)
+{
+}
+
+QoSMemSinkInterface*
+QoSMemSinkInterfaceParams::create()
+{
+ return new QoSMemSinkInterface(this);
+}
/*
- * Copyright (c) 2018 ARM Limited
+ * Copyright (c) 2018-2020 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
#ifndef __MEM_QOS_MEM_SINK_HH__
#define __MEM_QOS_MEM_SINK_HH__
+#include "mem/abstract_mem.hh"
#include "mem/qos/mem_ctrl.hh"
#include "mem/qport.hh"
#include "params/QoSMemSinkCtrl.hh"
+class QoSMemSinkInterfaceParams;
+class QoSMemSinkInterface;
+
namespace QoS {
/**
/** Memory slave port */
MemoryPort port;
+ /**
+ * Create pointer to interface of actual media
+ */
+ QoSMemSinkInterface* const interface;
+
/** Read request pending */
bool retryRdReq;
} // namespace QoS
+class QoSMemSinkInterface : public AbstractMemory
+{
+ public:
+ /** Setting a pointer to the interface */
+ void setMemCtrl(QoS::MemSinkCtrl* _ctrl) { ctrl = _ctrl; };
+
+ /** Pointer to the controller */
+ QoS::MemSinkCtrl* ctrl;
+
+ QoSMemSinkInterface(const QoSMemSinkInterfaceParams* _p);
+};
+
+
#endif /* __MEM_QOS_MEM_SINK_HH__ */
-# Copyright (c) 2012-2013, 2017-2018 ARM Limited
+# Copyright (c) 2012-2013, 2017-2018, 2020 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
super(BaseSESystem, self).init_system(system)
def create_system(self):
- system = System(physmem = self.mem_class(),
+ if issubclass(self.mem_class, m5.objects.DRAMInterface):
+ mem_ctrl = DRAMCtrl()
+ mem_ctrl.dram = self.mem_class()
+ else:
+ mem_ctrl = self.mem_class()
+ system = System(physmem = mem_ctrl,
membus = SystemXBar(),
mem_mode = self.mem_mode,
multi_thread = (self.num_threads > 1))
else:
# create the memory controllers and connect them, stick with
# the physmem name to avoid bumping all the reference stats
- system.physmem = [self.mem_class(range = r)
- for r in system.mem_ranges]
+ if issubclass(self.mem_class, m5.objects.DRAMInterface):
+ mem_ctrls = []
+ for r in system.mem_ranges:
+ mem_ctrl = DRAMCtrl()
+ mem_ctrl.dram = self.mem_class(range = r)
+ mem_ctrls.append(mem_ctrl)
+ system.physmem = mem_ctrls
+ else:
+ system.physmem = [self.mem_class(range = r)
+ for r in system.mem_ranges]
for i in range(len(system.physmem)):
system.physmem[i].port = system.membus.master