From: Wendy Elsasser Date: Sat, 8 Feb 2020 00:00:57 +0000 (-0600) Subject: mem: Make MemCtrl a ClockedObject X-Git-Tag: v20.1.0.0~73 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4acc419b6fa5ea7bbc1cf128e75be1cc263557b0;hp=518e79ad2df51c6abe0af15259c5477ec0c1425c;p=gem5.git mem: Make MemCtrl a ClockedObject Made DRAMCtrl a ClockedObject, with DRAMInterface defined as an AbstractMemory. The address ranges are now defined per interface. Currently the model only includes a DRAMInterface but this can be expanded for other media types. The controller object includes a parameter to the interface, which is setup when gem5 is configured. Change-Id: I6a368b845d574a713c7196c5671188ca8c1dc5e8 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/28968 Reviewed-by: Jason Lowe-Power Maintainer: Jason Lowe-Power Tested-by: kokoro --- diff --git a/configs/common/MemConfig.py b/configs/common/MemConfig.py index b53014524..1ace87518 100644 --- a/configs/common/MemConfig.py +++ b/configs/common/MemConfig.py @@ -40,7 +40,7 @@ import m5.objects from common import ObjectList from common import HMC -def create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size,\ +def create_mem_intf(intf, r, i, nbr_mem_ctrls, intlv_bits, intlv_size, xor_low_bit): """ Helper function for creating a single memoy controller from the given @@ -63,32 +63,32 @@ def create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, intlv_size,\ # Create an instance so we can figure out the address # mapping and row-buffer size - ctrl = cls() + interface = intf() # Only do this for DRAMs - if issubclass(cls, m5.objects.DRAMCtrl): + if issubclass(intf, m5.objects.DRAMInterface): # If the channel bits are appearing after the column # bits, we need to add the appropriate number of bits # for the row buffer size - if ctrl.addr_mapping.value == 'RoRaBaChCo': + if interface.addr_mapping.value == 'RoRaBaChCo': # This computation only really needs to happen # once, but as we rely on having an instance we # end up having to repeat it for each and every # one - rowbuffer_size = ctrl.device_rowbuffer_size.value * \ - ctrl.devices_per_rank.value + rowbuffer_size = interface.device_rowbuffer_size.value * \ + interface.devices_per_rank.value intlv_low_bit = int(math.log(rowbuffer_size, 2)) # We got all we need to configure the appropriate address # range - ctrl.range = m5.objects.AddrRange(r.start, size = r.size(), + interface.range = m5.objects.AddrRange(r.start, size = r.size(), intlvHighBit = \ intlv_low_bit + intlv_bits - 1, xorHighBit = xor_high_bit, intlvBits = intlv_bits, intlvMatch = i) - return ctrl + return interface def config_mem(options, system): """ @@ -148,10 +148,10 @@ def config_mem(options, system): if 2 ** intlv_bits != nbr_mem_ctrls: fatal("Number of memory channels must be a power of 2") - cls = ObjectList.mem_list.get(opt_mem_type) + intf = ObjectList.mem_list.get(opt_mem_type) mem_ctrls = [] - if opt_elastic_trace_en and not issubclass(cls, m5.objects.SimpleMemory): + if opt_elastic_trace_en and not issubclass(intf, m5.objects.SimpleMemory): fatal("When elastic trace is enabled, configure mem-type as " "simple-mem.") @@ -162,36 +162,53 @@ def config_mem(options, system): intlv_size = max(opt_mem_channels_intlv, system.cache_line_size.value) # For every range (most systems will only have one), create an - # array of controllers and set their parameters to match their - # address mapping in the case of a DRAM + # array of memory interfaces and set their parameters to match + # their address mapping in the case of a DRAM for r in system.mem_ranges: for i in range(nbr_mem_ctrls): - mem_ctrl = create_mem_ctrl(cls, r, i, nbr_mem_ctrls, intlv_bits, + # Create the DRAM interface + dram_intf = create_mem_intf(intf, r, i, nbr_mem_ctrls, intlv_bits, intlv_size, opt_xor_low_bit) + # Set the number of ranks based on the command-line # options if it was explicitly set - if issubclass(cls, m5.objects.DRAMCtrl) and opt_mem_ranks: - mem_ctrl.ranks_per_channel = opt_mem_ranks + if issubclass(intf, m5.objects.DRAMInterface) and opt_mem_ranks: + dram_intf.ranks_per_channel = opt_mem_ranks # Enable low-power DRAM states if option is set - if issubclass(cls, m5.objects.DRAMCtrl): - mem_ctrl.enable_dram_powerdown = opt_dram_powerdown + if issubclass(intf, m5.objects.DRAMInterface): + dram_intf.enable_dram_powerdown = opt_dram_powerdown if opt_elastic_trace_en: - mem_ctrl.latency = '1ns' + dram_intf.latency = '1ns' print("For elastic trace, over-riding Simple Memory " "latency to 1ns.") + # Create the controller that will drive the interface + if opt_mem_type == "HMC_2500_1x32": + # The static latency of the vault controllers is estimated + # to be smaller than a full DRAM channel controller + mem_ctrl = m5.objects.DRAMCtrl(min_writes_per_switch = 8, + static_backend_latency = '4ns', + static_frontend_latency = '4ns') + else: + mem_ctrl = m5.objects.DRAMCtrl() + + # Hookup the controller to the interface and add to the list + mem_ctrl.dram = dram_intf mem_ctrls.append(mem_ctrl) - subsystem.mem_ctrls = mem_ctrls - - # Connect the controllers to the membus - for i in range(len(subsystem.mem_ctrls)): + # Create a controller and connect the interfaces to a controller + for i in range(len(mem_ctrls)): if opt_mem_type == "HMC_2500_1x32": - subsystem.mem_ctrls[i].port = xbar[i/4].master + # Connect the controllers to the membus + mem_ctrls[i].port = xbar[i/4].master # Set memory device size. There is an independent controller for # each vault. All vaults are same size. - subsystem.mem_ctrls[i].device_size = options.hmc_dev_vault_size + mem_ctrls[i].dram.device_size = options.hmc_dev_vault_size else: - subsystem.mem_ctrls[i].port = xbar.master + # Connect the controllers to the membus + mem_ctrls[i].port = xbar.master + + subsystem.mem_ctrls = mem_ctrls + diff --git a/configs/dram/low_power_sweep.py b/configs/dram/low_power_sweep.py index 9a6239362..0da2b935b 100644 --- a/configs/dram/low_power_sweep.py +++ b/configs/dram/low_power_sweep.py @@ -111,14 +111,19 @@ MemConfig.config_mem(args, system) # Sanity check for memory controller class. if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl): - fatal("This script assumes the memory is a DRAMCtrl subclass") + fatal("This script assumes the controller is a DRAMCtrl subclass") +if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface): + fatal("This script assumes the memory is a DRAMInterface subclass") # There is no point slowing things down by saving any data. -system.mem_ctrls[0].null = True +system.mem_ctrls[0].dram.null = True + +# enable DRAM low power states +system.mem_ctrls[0].dram.enable_dram_powerdown = True # Set the address mapping based on input argument -system.mem_ctrls[0].addr_mapping = args.addr_map -system.mem_ctrls[0].page_policy = args.page_policy +system.mem_ctrls[0].dram.addr_mapping = args.addr_map +system.mem_ctrls[0].dram.page_policy = args.page_policy # We create a traffic generator state for each param combination we want to # test. Each traffic generator state is specified in the config file and the @@ -132,22 +137,22 @@ cfg_file_path = os.path.dirname(__file__) + "/" +cfg_file_name cfg_file = open(cfg_file_path, 'w') # Get the number of banks -nbr_banks = int(system.mem_ctrls[0].banks_per_rank.value) +nbr_banks = int(system.mem_ctrls[0].dram.banks_per_rank.value) # determine the burst size in bytes -burst_size = int((system.mem_ctrls[0].devices_per_rank.value * - system.mem_ctrls[0].device_bus_width.value * - system.mem_ctrls[0].burst_length.value) / 8) +burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value * + system.mem_ctrls[0].dram.device_bus_width.value * + system.mem_ctrls[0].dram.burst_length.value) / 8) # next, get the page size in bytes (the rowbuffer size is already in bytes) -page_size = system.mem_ctrls[0].devices_per_rank.value * \ - system.mem_ctrls[0].device_rowbuffer_size.value +page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \ + system.mem_ctrls[0].dram.device_rowbuffer_size.value # Inter-request delay should be such that we can hit as many transitions # to/from low power states as possible to. We provide a min and max itt to the # traffic generator and it randomises in the range. The parameter is in # seconds and we need it in ticks (ps). -itt_min = system.mem_ctrls[0].tBURST.value * 1000000000000 +itt_min = system.mem_ctrls[0].dram.tBURST.value * 1000000000000 #The itt value when set to (tRAS + tRP + tCK) covers the case where # a read command is delayed beyond the delay from ACT to PRE_PDN entry of the @@ -155,9 +160,9 @@ itt_min = system.mem_ctrls[0].tBURST.value * 1000000000000 # between a write and power down entry will be tRCD + tCL + tWR + tRP + tCK. # As we use this delay as a unit and create multiples of it as bigger delays # for the sweep, this parameter works for reads, writes and mix of them. -pd_entry_time = (system.mem_ctrls[0].tRAS.value + - system.mem_ctrls[0].tRP.value + - system.mem_ctrls[0].tCK.value) * 1000000000000 +pd_entry_time = (system.mem_ctrls[0].dram.tRAS.value + + system.mem_ctrls[0].dram.tRP.value + + system.mem_ctrls[0].dram.tCK.value) * 1000000000000 # We sweep itt max using the multipliers specified by the user. itt_max_str = args.itt_list.strip().split() diff --git a/configs/dram/sweep.py b/configs/dram/sweep.py index a340b46a5..a771c5c9f 100644 --- a/configs/dram/sweep.py +++ b/configs/dram/sweep.py @@ -116,13 +116,15 @@ MemConfig.config_mem(options, system) # the following assumes that we are using the native DRAM # controller, check to be sure if not isinstance(system.mem_ctrls[0], m5.objects.DRAMCtrl): - fatal("This script assumes the memory is a DRAMCtrl subclass") + fatal("This script assumes the controller is a DRAMCtrl subclass") +if not isinstance(system.mem_ctrls[0].dram, m5.objects.DRAMInterface): + fatal("This script assumes the memory is a DRAMInterface subclass") # there is no point slowing things down by saving any data -system.mem_ctrls[0].null = True +system.mem_ctrls[0].dram.null = True # Set the address mapping based on input argument -system.mem_ctrls[0].addr_mapping = options.addr_map +system.mem_ctrls[0].dram.addr_mapping = options.addr_map # stay in each state for 0.25 ms, long enough to warm things up, and # short enough to avoid hitting a refresh @@ -133,21 +135,21 @@ period = 250000000 # the DRAM maximum bandwidth to ensure that it is saturated # get the number of banks -nbr_banks = system.mem_ctrls[0].banks_per_rank.value +nbr_banks = system.mem_ctrls[0].dram.banks_per_rank.value # determine the burst length in bytes -burst_size = int((system.mem_ctrls[0].devices_per_rank.value * - system.mem_ctrls[0].device_bus_width.value * - system.mem_ctrls[0].burst_length.value) / 8) +burst_size = int((system.mem_ctrls[0].dram.devices_per_rank.value * + system.mem_ctrls[0].dram.device_bus_width.value * + system.mem_ctrls[0].dram.burst_length.value) / 8) # next, get the page size in bytes -page_size = system.mem_ctrls[0].devices_per_rank.value * \ - system.mem_ctrls[0].device_rowbuffer_size.value +page_size = system.mem_ctrls[0].dram.devices_per_rank.value * \ + system.mem_ctrls[0].dram.device_rowbuffer_size.value # match the maximum bandwidth of the memory, the parameter is in seconds # and we need it in ticks (ps) -itt = getattr(system.mem_ctrls[0].tBURST_MIN, 'value', - system.mem_ctrls[0].tBURST.value) * 1000000000000 +itt = getattr(system.mem_ctrls[0].dram.tBURST_MIN, 'value', + system.mem_ctrls[0].dram.tBURST.value) * 1000000000000 # assume we start at 0 max_addr = mem_range.end diff --git a/configs/example/memcheck.py b/configs/example/memcheck.py index 6d80d603a..6bccd54db 100644 --- a/configs/example/memcheck.py +++ b/configs/example/memcheck.py @@ -217,7 +217,7 @@ cfg_file.close() proto_tester = TrafficGen(config_file = cfg_file_path) # Set up the system along with a DRAM controller -system = System(physmem = DDR3_1600_8x8()) +system = System(physmem = DRAMCtrl(dram = DDR3_1600_8x8())) system.voltage_domain = VoltageDomain(voltage = '1V') diff --git a/configs/learning_gem5/part1/simple.py b/configs/learning_gem5/part1/simple.py index ef73a06ee..cfd15bebb 100644 --- a/configs/learning_gem5/part1/simple.py +++ b/configs/learning_gem5/part1/simple.py @@ -77,8 +77,9 @@ if m5.defines.buildEnv['TARGET_ISA'] == "x86": system.cpu.interrupts[0].int_slave = system.membus.master # Create a DDR3 memory controller and connect it to the membus -system.mem_ctrl = DDR3_1600_8x8() -system.mem_ctrl.range = system.mem_ranges[0] +system.mem_ctrl = DRAMCtrl() +system.mem_ctrl.dram = DDR3_1600_8x8() +system.mem_ctrl.dram.range = system.mem_ranges[0] system.mem_ctrl.port = system.membus.master # Connect the system up to the membus diff --git a/configs/learning_gem5/part1/two_level.py b/configs/learning_gem5/part1/two_level.py index 564c785b5..0dbcfc7ff 100644 --- a/configs/learning_gem5/part1/two_level.py +++ b/configs/learning_gem5/part1/two_level.py @@ -132,8 +132,9 @@ if m5.defines.buildEnv['TARGET_ISA'] == "x86": system.system_port = system.membus.slave # Create a DDR3 memory controller -system.mem_ctrl = DDR3_1600_8x8() -system.mem_ctrl.range = system.mem_ranges[0] +system.mem_ctrl = DRAMCtrl() +system.mem_ctrl.dram = DDR3_1600_8x8() +system.mem_ctrl.dram.range = system.mem_ranges[0] system.mem_ctrl.port = system.membus.master # Create a process for a simple "Hello World" application diff --git a/configs/learning_gem5/part2/simple_cache.py b/configs/learning_gem5/part2/simple_cache.py index 8d98d9244..fbea73d97 100644 --- a/configs/learning_gem5/part2/simple_cache.py +++ b/configs/learning_gem5/part2/simple_cache.py @@ -76,8 +76,9 @@ system.cpu.interrupts[0].int_master = system.membus.slave system.cpu.interrupts[0].int_slave = system.membus.master # Create a DDR3 memory controller and connect it to the membus -system.mem_ctrl = DDR3_1600_8x8() -system.mem_ctrl.range = system.mem_ranges[0] +system.mem_ctrl = DRAMCtrl() +system.mem_ctrl.dram = DDR3_1600_8x8() +system.mem_ctrl.dram.range = system.mem_ranges[0] system.mem_ctrl.port = system.membus.master # Connect the system up to the membus diff --git a/configs/learning_gem5/part2/simple_memobj.py b/configs/learning_gem5/part2/simple_memobj.py index d30977ce4..e792eb9bb 100644 --- a/configs/learning_gem5/part2/simple_memobj.py +++ b/configs/learning_gem5/part2/simple_memobj.py @@ -74,8 +74,9 @@ system.cpu.interrupts[0].int_master = system.membus.slave system.cpu.interrupts[0].int_slave = system.membus.master # Create a DDR3 memory controller and connect it to the membus -system.mem_ctrl = DDR3_1600_8x8() -system.mem_ctrl.range = system.mem_ranges[0] +system.mem_ctrl = DRAMCtrl() +system.mem_ctrl.dram = DDR3_1600_8x8() +system.mem_ctrl.dram.range = system.mem_ranges[0] system.mem_ctrl.port = system.membus.master # Connect the system up to the membus diff --git a/configs/learning_gem5/part3/simple_ruby.py b/configs/learning_gem5/part3/simple_ruby.py index c47ee7e67..7f70a8c7d 100644 --- a/configs/learning_gem5/part3/simple_ruby.py +++ b/configs/learning_gem5/part3/simple_ruby.py @@ -68,8 +68,9 @@ system.mem_ranges = [AddrRange('512MB')] # Create an address range system.cpu = [TimingSimpleCPU() for i in range(2)] # Create a DDR3 memory controller and connect it to the membus -system.mem_ctrl = DDR3_1600_8x8() -system.mem_ctrl.range = system.mem_ranges[0] +system.mem_ctrl = DRAMCtrl() +system.mem_ctrl.dram = DDR3_1600_8x8() +system.mem_ctrl.dram.range = system.mem_ranges[0] # create the interrupt controller for the CPU and connect to the membus for cpu in system.cpu: diff --git a/configs/ruby/Ruby.py b/configs/ruby/Ruby.py index 9bceaa346..9f400a8a9 100644 --- a/configs/ruby/Ruby.py +++ b/configs/ruby/Ruby.py @@ -130,15 +130,16 @@ def setup_memory_controllers(system, ruby, dir_cntrls, options): dir_ranges = [] for r in system.mem_ranges: mem_type = ObjectList.mem_list.get(options.mem_type) - mem_ctrl = MemConfig.create_mem_ctrl(mem_type, r, index, + dram_intf = MemConfig.create_mem_intf(mem_type, r, index, options.num_dirs, int(math.log(options.num_dirs, 2)), intlv_size, options.xor_low_bit) + mem_ctrl = m5.objects.DRAMCtrl(dram = dram_intf) if options.access_backing_store: mem_ctrl.kvm_map=False mem_ctrls.append(mem_ctrl) - dir_ranges.append(mem_ctrl.range) + dir_ranges.append(mem_ctrl.dram.range) if crossbar != None: mem_ctrl.port = crossbar.master diff --git a/src/mem/DRAMCtrl.py b/src/mem/DRAMCtrl.py index 0f70dffec..b7b43dca7 100644 --- a/src/mem/DRAMCtrl.py +++ b/src/mem/DRAMCtrl.py @@ -40,26 +40,12 @@ from m5.params import * from m5.proxy import * -from m5.objects.AbstractMemory import * from m5.objects.QoSMemCtrl import * # Enum for memory scheduling algorithms, currently First-Come # First-Served and a First-Row Hit then First-Come First-Served class MemSched(Enum): vals = ['fcfs', 'frfcfs'] -# Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting -# channel, rank, bank, row and column, respectively, and going from -# MSB to LSB. Available are RoRaBaChCo and RoRaBaCoCh, that are -# suitable for an open-page policy, optimising for sequential accesses -# hitting in the open row. For a closed-page policy, RoCoRaBaCh -# maximises parallelism. -class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh'] - -# Enum for the page policy, either open, open_adaptive, close, or -# close_adaptive. -class PageManage(Enum): vals = ['open', 'open_adaptive', 'close', - 'close_adaptive'] - # DRAMCtrl is a single-channel single-ported DRAM controller model # that aims to model the most important system-level performance # effects of a DRAM without getting into too much detail of the DRAM @@ -72,12 +58,11 @@ class DRAMCtrl(QoSMemCtrl): # bus in front of the controller for multiple ports port = SlavePort("Slave port") - # the basic configuration of the controller architecture, note - # that each entry corresponds to a burst for the specific DRAM - # configuration (e.g. x32 with burst length 8 is 32 bytes) and not - # the cacheline size or request/packet size - write_buffer_size = Param.Unsigned(64, "Number of write queue entries") - read_buffer_size = Param.Unsigned(32, "Number of read queue entries") + # Interface to volatile, DRAM media + dram = Param.DRAMInterface("DRAM interface") + + # read and write buffer depths are set in the interface + # the controller will read these values when instantiated # threshold in percent for when to forcefully trigger writes and # start emptying the write buffer @@ -93,15 +78,6 @@ class DRAMCtrl(QoSMemCtrl): # scheduler, address map and page policy mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy") - addr_mapping = Param.AddrMap('RoRaBaCoCh', "Address mapping policy") - page_policy = Param.PageManage('open_adaptive', "Page management policy") - - # enforce a limit on the number of accesses per row - max_accesses_per_row = Param.Unsigned(16, "Max accesses per row before " - "closing"); - - # size of DRAM Chip in Bytes - device_size = Param.MemorySize("Size of DRAM chip") # pipeline latency of the controller and PHY, split into a # frontend part and a backend part, with reads and writes serviced @@ -109,1404 +85,3 @@ class DRAMCtrl(QoSMemCtrl): # serviced by the memory seeing the sum of the two static_frontend_latency = Param.Latency("10ns", "Static frontend latency") static_backend_latency = Param.Latency("10ns", "Static backend latency") - - # the physical organisation of the DRAM - device_bus_width = Param.Unsigned("data bus width in bits for each DRAM "\ - "device/chip") - burst_length = Param.Unsigned("Burst lenght (BL) in beats") - device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\ - "device/chip") - devices_per_rank = Param.Unsigned("Number of devices/chips per rank") - ranks_per_channel = Param.Unsigned("Number of ranks per channel") - - # default to 0 bank groups per rank, indicating bank group architecture - # is not used - # update per memory class when bank group architecture is supported - bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per rank") - banks_per_rank = Param.Unsigned("Number of banks per rank") - - # Enable DRAM powerdown states if True. This is False by default due to - # performance being lower when enabled - enable_dram_powerdown = Param.Bool(False, "Enable powerdown states") - - # For power modelling we need to know if the DRAM has a DLL or not - dll = Param.Bool(True, "DRAM has DLL or not") - - # DRAMPower provides in addition to the core power, the possibility to - # include RD/WR termination and IO power. This calculation assumes some - # default values. The integration of DRAMPower with gem5 does not include - # IO and RD/WR termination power by default. This might be added as an - # additional feature in the future. - - # timing behaviour and constraints - all in nanoseconds - - # the base clock period of the DRAM - tCK = Param.Latency("Clock period") - - # the amount of time in nanoseconds from issuing an activate command - # to the data being available in the row buffer for a read/write - tRCD = Param.Latency("RAS to CAS delay") - - # the time from issuing a read/write command to seeing the actual data - tCL = Param.Latency("CAS latency") - - # minimum time between a precharge and subsequent activate - tRP = Param.Latency("Row precharge time") - - # minimum time between an activate and a precharge to the same row - tRAS = Param.Latency("ACT to PRE delay") - - # minimum time between a write data transfer and a precharge - tWR = Param.Latency("Write recovery time") - - # minimum time between a read and precharge command - tRTP = Param.Latency("Read to precharge") - - # time to complete a burst transfer, typically the burst length - # divided by two due to the DDR bus, but by making it a parameter - # it is easier to also evaluate SDR memories like WideIO. - # This parameter has to account for burst length. - # Read/Write requests with data size larger than one full burst are broken - # down into multiple requests in the controller - # tBURST is equivalent to the CAS-to-CAS delay (tCCD) - # With bank group architectures, tBURST represents the CAS-to-CAS - # delay for bursts to different bank groups (tCCD_S) - tBURST = Param.Latency("Burst duration " - "(typically burst length / 2 cycles)") - - # tBURST_MAX is the column array cycle delay required before next access, - # which could be greater than tBURST when the memory access time is greater - # than tBURST - tBURST_MAX = Param.Latency(Self.tBURST, "Column access delay") - - # tBURST_MIN is the minimum delay between bursts, which could be less than - # tBURST when interleaving is supported - tBURST_MIN = Param.Latency(Self.tBURST, "Minimim delay between bursts") - - # CAS-to-CAS delay for bursts to the same bank group - # only utilized with bank group architectures; set to 0 for default case - # tBURST is equivalent to tCCD_S; no explicit parameter required - # for CAS-to-CAS delay for bursts to different bank groups - tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay") - - # Write-to-Write delay for bursts to the same bank group - # only utilized with bank group architectures; set to 0 for default case - # This will be used to enable different same bank group delays - # for writes versus reads - tCCD_L_WR = Param.Latency(Self.tCCD_L, - "Same bank group Write to Write delay") - - # time taken to complete one refresh cycle (N rows in all banks) - tRFC = Param.Latency("Refresh cycle time") - - # refresh command interval, how often a "ref" command needs - # to be sent. It is 7.8 us for a 64ms refresh requirement - tREFI = Param.Latency("Refresh command interval") - - # write-to-read, same rank turnaround penalty - tWTR = Param.Latency("Write to read, same rank switching time") - - # write-to-read, same rank turnaround penalty for same bank group - tWTR_L = Param.Latency(Self.tWTR, "Write to read, same rank switching " - "time, same bank group") - - # read-to-write, same rank turnaround penalty - tRTW = Param.Latency("Read to write, same rank switching time") - - # rank-to-rank bus delay penalty - # this does not correlate to a memory timing parameter and encompasses: - # 1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD - # different rank bus delay - tCS = Param.Latency("Rank to rank switching time") - - # minimum precharge to precharge delay time - tPPD = Param.Latency("0ns", "PRE to PRE delay") - - # maximum delay between two-cycle ACT command phases - tAAD = Param.Latency(Self.tCK, - "Maximum delay between two-cycle ACT commands") - - two_cycle_activate = Param.Bool(False, - "Two cycles required to send activate") - - # minimum row activate to row activate delay time - tRRD = Param.Latency("ACT to ACT delay") - - # only utilized with bank group architectures; set to 0 for default case - tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay") - - # time window in which a maximum number of activates are allowed - # to take place, set to 0 to disable - tXAW = Param.Latency("X activation window") - activation_limit = Param.Unsigned("Max number of activates in window") - - # time to exit power-down mode - # Exit power-down to next valid command delay - tXP = Param.Latency("0ns", "Power-up Delay") - - # Exit Powerdown to commands requiring a locked DLL - tXPDLL = Param.Latency("0ns", "Power-up Delay with locked DLL") - - # time to exit self-refresh mode - tXS = Param.Latency("0ns", "Self-refresh exit latency") - - # time to exit self-refresh mode with locked DLL - tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL") - - # number of data beats per clock. with DDR, default is 2, one per edge - beats_per_clock = Param.Unsigned(2, "Data beats per clock") - - data_clock_sync = Param.Bool(False, "Synchronization commands required") - - # Currently rolled into other params - ###################################################################### - - # tRC - assumed to be tRAS + tRP - - # Power Behaviour and Constraints - # DRAMs like LPDDR and WideIO have 2 external voltage domains. These are - # defined as VDD and VDD2. Each current is defined for each voltage domain - # separately. For example, current IDD0 is active-precharge current for - # voltage domain VDD and current IDD02 is active-precharge current for - # voltage domain VDD2. - # By default all currents are set to 0mA. Users who are only interested in - # the performance of DRAMs can leave them at 0. - - # Operating 1 Bank Active-Precharge current - IDD0 = Param.Current("0mA", "Active precharge current") - - # Operating 1 Bank Active-Precharge current multiple voltage Range - IDD02 = Param.Current("0mA", "Active precharge current VDD2") - - # Precharge Power-down Current: Slow exit - IDD2P0 = Param.Current("0mA", "Precharge Powerdown slow") - - # Precharge Power-down Current: Slow exit multiple voltage Range - IDD2P02 = Param.Current("0mA", "Precharge Powerdown slow VDD2") - - # Precharge Power-down Current: Fast exit - IDD2P1 = Param.Current("0mA", "Precharge Powerdown fast") - - # Precharge Power-down Current: Fast exit multiple voltage Range - IDD2P12 = Param.Current("0mA", "Precharge Powerdown fast VDD2") - - # Precharge Standby current - IDD2N = Param.Current("0mA", "Precharge Standby current") - - # Precharge Standby current multiple voltage range - IDD2N2 = Param.Current("0mA", "Precharge Standby current VDD2") - - # Active Power-down current: slow exit - IDD3P0 = Param.Current("0mA", "Active Powerdown slow") - - # Active Power-down current: slow exit multiple voltage range - IDD3P02 = Param.Current("0mA", "Active Powerdown slow VDD2") - - # Active Power-down current : fast exit - IDD3P1 = Param.Current("0mA", "Active Powerdown fast") - - # Active Power-down current : fast exit multiple voltage range - IDD3P12 = Param.Current("0mA", "Active Powerdown fast VDD2") - - # Active Standby current - IDD3N = Param.Current("0mA", "Active Standby current") - - # Active Standby current multiple voltage range - IDD3N2 = Param.Current("0mA", "Active Standby current VDD2") - - # Burst Read Operating Current - IDD4R = Param.Current("0mA", "READ current") - - # Burst Read Operating Current multiple voltage range - IDD4R2 = Param.Current("0mA", "READ current VDD2") - - # Burst Write Operating Current - IDD4W = Param.Current("0mA", "WRITE current") - - # Burst Write Operating Current multiple voltage range - IDD4W2 = Param.Current("0mA", "WRITE current VDD2") - - # Refresh Current - IDD5 = Param.Current("0mA", "Refresh current") - - # Refresh Current multiple voltage range - IDD52 = Param.Current("0mA", "Refresh current VDD2") - - # Self-Refresh Current - IDD6 = Param.Current("0mA", "Self-refresh Current") - - # Self-Refresh Current multiple voltage range - IDD62 = Param.Current("0mA", "Self-refresh Current VDD2") - - # Main voltage range of the DRAM - VDD = Param.Voltage("0V", "Main Voltage Range") - - # Second voltage range defined by some DRAMs - VDD2 = Param.Voltage("0V", "2nd Voltage Range") - -# A single DDR3-1600 x64 channel (one command and address bus), with -# timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in -# an 8x8 configuration. -class DDR3_1600_8x8(DRAMCtrl): - # size of device in bytes - device_size = '512MB' - - # 8x8 configuration, 8 devices each with an 8-bit interface - device_bus_width = 8 - - # DDR3 is a BL8 device - burst_length = 8 - - # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8) - device_rowbuffer_size = '1kB' - - # 8x8 configuration, so 8 devices - devices_per_rank = 8 - - # Use two ranks - ranks_per_channel = 2 - - # DDR3 has 8 banks in all configurations - banks_per_rank = 8 - - # 800 MHz - tCK = '1.25ns' - - # 8 beats across an x64 interface translates to 4 clocks @ 800 MHz - tBURST = '5ns' - - # DDR3-1600 11-11-11 - tRCD = '13.75ns' - tCL = '13.75ns' - tRP = '13.75ns' - tRAS = '35ns' - tRRD = '6ns' - tXAW = '30ns' - activation_limit = 4 - tRFC = '260ns' - - tWR = '15ns' - - # Greater of 4 CK or 7.5 ns - tWTR = '7.5ns' - - # Greater of 4 CK or 7.5 ns - tRTP = '7.5ns' - - # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns - tRTW = '2.5ns' - - # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns - tCS = '2.5ns' - - # <=85C, half for >85C - tREFI = '7.8us' - - # active powerdown and precharge powerdown exit time - tXP = '6ns' - - # self refresh exit time - tXS = '270ns' - - # Current values from datasheet Die Rev E,J - IDD0 = '55mA' - IDD2N = '32mA' - IDD3N = '38mA' - IDD4W = '125mA' - IDD4R = '157mA' - IDD5 = '235mA' - IDD3P1 = '38mA' - IDD2P1 = '32mA' - IDD6 = '20mA' - VDD = '1.5V' - -# A single HMC-2500 x32 model based on: -# [1] DRAMSpec: a high-level DRAM bank modelling tool -# developed at the University of Kaiserslautern. This high level tool -# uses RC (resistance-capacitance) and CV (capacitance-voltage) models to -# estimate the DRAM bank latency and power numbers. -# [2] High performance AXI-4.0 based interconnect for extensible smart memory -# cubes (E. Azarkhish et. al) -# Assumed for the HMC model is a 30 nm technology node. -# The modelled HMC consists of 4 Gbit layers which sum up to 2GB of memory (4 -# layers). -# Each layer has 16 vaults and each vault consists of 2 banks per layer. -# In order to be able to use the same controller used for 2D DRAM generations -# for HMC, the following analogy is done: -# Channel (DDR) => Vault (HMC) -# device_size (DDR) => size of a single layer in a vault -# ranks per channel (DDR) => number of layers -# banks per rank (DDR) => banks per layer -# devices per rank (DDR) => devices per layer ( 1 for HMC). -# The parameters for which no input is available are inherited from the DDR3 -# configuration. -# This configuration includes the latencies from the DRAM to the logic layer -# of the HMC -class HMC_2500_1x32(DDR3_1600_8x8): - # size of device - # two banks per device with each bank 4MB [2] - device_size = '8MB' - - # 1x32 configuration, 1 device with 32 TSVs [2] - device_bus_width = 32 - - # HMC is a BL8 device [2] - burst_length = 8 - - # Each device has a page (row buffer) size of 256 bytes [2] - device_rowbuffer_size = '256B' - - # 1x32 configuration, so 1 device [2] - devices_per_rank = 1 - - # 4 layers so 4 ranks [2] - ranks_per_channel = 4 - - # HMC has 2 banks per layer [2] - # Each layer represents a rank. With 4 layers and 8 banks in total, each - # layer has 2 banks; thus 2 banks per rank. - banks_per_rank = 2 - - # 1250 MHz [2] - tCK = '0.8ns' - - # 8 beats across an x32 interface translates to 4 clocks @ 1250 MHz - tBURST = '3.2ns' - - # Values using DRAMSpec HMC model [1] - tRCD = '10.2ns' - tCL = '9.9ns' - tRP = '7.7ns' - tRAS = '21.6ns' - - # tRRD depends on the power supply network for each vendor. - # We assume a tRRD of a double bank approach to be equal to 4 clock - # cycles (Assumption) - tRRD = '3.2ns' - - # activation limit is set to 0 since there are only 2 banks per vault - # layer. - activation_limit = 0 - - # Values using DRAMSpec HMC model [1] - tRFC = '59ns' - tWR = '8ns' - tRTP = '4.9ns' - - # Default different rank bus delay assumed to 1 CK for TSVs, @1250 MHz = - # 0.8 ns (Assumption) - tCS = '0.8ns' - - # Value using DRAMSpec HMC model [1] - tREFI = '3.9us' - - # The default page policy in the vault controllers is simple closed page - # [2] nevertheless 'close' policy opens and closes the row multiple times - # for bursts largers than 32Bytes. For this reason we use 'close_adaptive' - page_policy = 'close_adaptive' - - # RoCoRaBaCh resembles the default address mapping in HMC - addr_mapping = 'RoCoRaBaCh' - min_writes_per_switch = 8 - - # These parameters do not directly correlate with buffer_size in real - # hardware. Nevertheless, their value has been tuned to achieve a - # bandwidth similar to the cycle-accurate model in [2] - write_buffer_size = 32 - read_buffer_size = 32 - - # The static latency of the vault controllers is estimated to be smaller - # than a full DRAM channel controller - static_backend_latency='4ns' - static_frontend_latency='4ns' - -# A single DDR3-2133 x64 channel refining a selected subset of the -# options for the DDR-1600 configuration, based on the same DDR3-1600 -# 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept -# consistent across the two configurations. -class DDR3_2133_8x8(DDR3_1600_8x8): - # 1066 MHz - tCK = '0.938ns' - - # 8 beats across an x64 interface translates to 4 clocks @ 1066 MHz - tBURST = '3.752ns' - - # DDR3-2133 14-14-14 - tRCD = '13.09ns' - tCL = '13.09ns' - tRP = '13.09ns' - tRAS = '33ns' - tRRD = '5ns' - tXAW = '25ns' - - # Current values from datasheet - IDD0 = '70mA' - IDD2N = '37mA' - IDD3N = '44mA' - IDD4W = '157mA' - IDD4R = '191mA' - IDD5 = '250mA' - IDD3P1 = '44mA' - IDD2P1 = '43mA' - IDD6 ='20mA' - VDD = '1.5V' - -# A single DDR4-2400 x64 channel (one command and address bus), with -# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A2G4) -# in an 16x4 configuration. -# Total channel capacity is 32GB -# 16 devices/rank * 2 ranks/channel * 1GB/device = 32GB/channel -class DDR4_2400_16x4(DRAMCtrl): - # size of device - device_size = '1GB' - - # 16x4 configuration, 16 devices each with a 4-bit interface - device_bus_width = 4 - - # DDR4 is a BL8 device - burst_length = 8 - - # Each device has a page (row buffer) size of 512 byte (1K columns x4) - device_rowbuffer_size = '512B' - - # 16x4 configuration, so 16 devices - devices_per_rank = 16 - - # Match our DDR3 configurations which is dual rank - ranks_per_channel = 2 - - # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups - # Set to 4 for x4 case - bank_groups_per_rank = 4 - - # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all - # configurations). Currently we do not capture the additional - # constraints incurred by the bank groups - banks_per_rank = 16 - - # override the default buffer sizes and go for something larger to - # accommodate the larger bank count - write_buffer_size = 128 - read_buffer_size = 64 - - # 1200 MHz - tCK = '0.833ns' - - # 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz - # tBURST is equivalent to the CAS-to-CAS delay (tCCD) - # With bank group architectures, tBURST represents the CAS-to-CAS - # delay for bursts to different bank groups (tCCD_S) - tBURST = '3.332ns' - - # @2400 data rate, tCCD_L is 6 CK - # CAS-to-CAS delay for bursts to the same bank group - # tBURST is equivalent to tCCD_S; no explicit parameter required - # for CAS-to-CAS delay for bursts to different bank groups - tCCD_L = '5ns'; - - # DDR4-2400 17-17-17 - tRCD = '14.16ns' - tCL = '14.16ns' - tRP = '14.16ns' - tRAS = '32ns' - - # RRD_S (different bank group) for 512B page is MAX(4 CK, 3.3ns) - tRRD = '3.332ns' - - # RRD_L (same bank group) for 512B page is MAX(4 CK, 4.9ns) - tRRD_L = '4.9ns'; - - # tFAW for 512B page is MAX(16 CK, 13ns) - tXAW = '13.328ns' - activation_limit = 4 - # tRFC is 350ns - tRFC = '350ns' - - tWR = '15ns' - - # Here using the average of WTR_S and WTR_L - tWTR = '5ns' - - # Greater of 4 CK or 7.5 ns - tRTP = '7.5ns' - - # Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666 ns - tRTW = '1.666ns' - - # Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns - tCS = '1.666ns' - - # <=85C, half for >85C - tREFI = '7.8us' - - # active powerdown and precharge powerdown exit time - tXP = '6ns' - - # self refresh exit time - # exit delay to ACT, PRE, PREALL, REF, SREF Enter, and PD Enter is: - # tRFC + 10ns = 340ns - tXS = '340ns' - - # Current values from datasheet - IDD0 = '43mA' - IDD02 = '3mA' - IDD2N = '34mA' - IDD3N = '38mA' - IDD3N2 = '3mA' - IDD4W = '103mA' - IDD4R = '110mA' - IDD5 = '250mA' - IDD3P1 = '32mA' - IDD2P1 = '25mA' - IDD6 = '30mA' - VDD = '1.2V' - VDD2 = '2.5V' - -# A single DDR4-2400 x64 channel (one command and address bus), with -# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A1G8) -# in an 8x8 configuration. -# Total channel capacity is 16GB -# 8 devices/rank * 2 ranks/channel * 1GB/device = 16GB/channel -class DDR4_2400_8x8(DDR4_2400_16x4): - # 8x8 configuration, 8 devices each with an 8-bit interface - device_bus_width = 8 - - # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8) - device_rowbuffer_size = '1kB' - - # 8x8 configuration, so 8 devices - devices_per_rank = 8 - - # RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns) - tRRD_L = '4.9ns'; - - tXAW = '21ns' - - # Current values from datasheet - IDD0 = '48mA' - IDD3N = '43mA' - IDD4W = '123mA' - IDD4R = '135mA' - IDD3P1 = '37mA' - -# A single DDR4-2400 x64 channel (one command and address bus), with -# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A512M16) -# in an 4x16 configuration. -# Total channel capacity is 4GB -# 4 devices/rank * 1 ranks/channel * 1GB/device = 4GB/channel -class DDR4_2400_4x16(DDR4_2400_16x4): - # 4x16 configuration, 4 devices each with an 16-bit interface - device_bus_width = 16 - - # Each device has a page (row buffer) size of 2 Kbyte (1K columns x16) - device_rowbuffer_size = '2kB' - - # 4x16 configuration, so 4 devices - devices_per_rank = 4 - - # Single rank for x16 - ranks_per_channel = 1 - - # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups - # Set to 2 for x16 case - bank_groups_per_rank = 2 - - # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all - # configurations). Currently we do not capture the additional - # constraints incurred by the bank groups - banks_per_rank = 8 - - # RRD_S (different bank group) for 2K page is MAX(4 CK, 5.3ns) - tRRD = '5.3ns' - - # RRD_L (same bank group) for 2K page is MAX(4 CK, 6.4ns) - tRRD_L = '6.4ns'; - - tXAW = '30ns' - - # Current values from datasheet - IDD0 = '80mA' - IDD02 = '4mA' - IDD2N = '34mA' - IDD3N = '47mA' - IDD4W = '228mA' - IDD4R = '243mA' - IDD5 = '280mA' - IDD3P1 = '41mA' - -# A single LPDDR2-S4 x32 interface (one command/address bus), with -# default timings based on a LPDDR2-1066 4 Gbit part (Micron MT42L128M32D1) -# in a 1x32 configuration. -class LPDDR2_S4_1066_1x32(DRAMCtrl): - # No DLL in LPDDR2 - dll = False - - # size of device - device_size = '512MB' - - # 1x32 configuration, 1 device with a 32-bit interface - device_bus_width = 32 - - # LPDDR2_S4 is a BL4 and BL8 device - burst_length = 8 - - # Each device has a page (row buffer) size of 1KB - # (this depends on the memory density) - device_rowbuffer_size = '1kB' - - # 1x32 configuration, so 1 device - devices_per_rank = 1 - - # Use a single rank - ranks_per_channel = 1 - - # LPDDR2-S4 has 8 banks in all configurations - banks_per_rank = 8 - - # 533 MHz - tCK = '1.876ns' - - # Fixed at 15 ns - tRCD = '15ns' - - # 8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time - tCL = '15ns' - - # Pre-charge one bank 15 ns (all banks 18 ns) - tRP = '15ns' - - tRAS = '42ns' - tWR = '15ns' - - tRTP = '7.5ns' - - # 8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz. - # Note this is a BL8 DDR device. - # Requests larger than 32 bytes are broken down into multiple requests - # in the controller - tBURST = '7.5ns' - - # LPDDR2-S4, 4 Gbit - tRFC = '130ns' - tREFI = '3.9us' - - # active powerdown and precharge powerdown exit time - tXP = '7.5ns' - - # self refresh exit time - tXS = '140ns' - - # Irrespective of speed grade, tWTR is 7.5 ns - tWTR = '7.5ns' - - # Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns - tRTW = '3.75ns' - - # Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns - tCS = '3.75ns' - - # Activate to activate irrespective of density and speed grade - tRRD = '10.0ns' - - # Irrespective of density, tFAW is 50 ns - tXAW = '50ns' - activation_limit = 4 - - # Current values from datasheet - IDD0 = '15mA' - IDD02 = '70mA' - IDD2N = '2mA' - IDD2N2 = '30mA' - IDD3N = '2.5mA' - IDD3N2 = '30mA' - IDD4W = '10mA' - IDD4W2 = '190mA' - IDD4R = '3mA' - IDD4R2 = '220mA' - IDD5 = '40mA' - IDD52 = '150mA' - IDD3P1 = '1.2mA' - IDD3P12 = '8mA' - IDD2P1 = '0.6mA' - IDD2P12 = '0.8mA' - IDD6 = '1mA' - IDD62 = '3.2mA' - VDD = '1.8V' - VDD2 = '1.2V' - -# A single WideIO x128 interface (one command and address bus), with -# default timings based on an estimated WIO-200 8 Gbit part. -class WideIO_200_1x128(DRAMCtrl): - # No DLL for WideIO - dll = False - - # size of device - device_size = '1024MB' - - # 1x128 configuration, 1 device with a 128-bit interface - device_bus_width = 128 - - # This is a BL4 device - burst_length = 4 - - # Each device has a page (row buffer) size of 4KB - # (this depends on the memory density) - device_rowbuffer_size = '4kB' - - # 1x128 configuration, so 1 device - devices_per_rank = 1 - - # Use one rank for a one-high die stack - ranks_per_channel = 1 - - # WideIO has 4 banks in all configurations - banks_per_rank = 4 - - # 200 MHz - tCK = '5ns' - - # WIO-200 - tRCD = '18ns' - tCL = '18ns' - tRP = '18ns' - tRAS = '42ns' - tWR = '15ns' - # Read to precharge is same as the burst - tRTP = '20ns' - - # 4 beats across an x128 SDR interface translates to 4 clocks @ 200 MHz. - # Note this is a BL4 SDR device. - tBURST = '20ns' - - # WIO 8 Gb - tRFC = '210ns' - - # WIO 8 Gb, <=85C, half for >85C - tREFI = '3.9us' - - # Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns - tWTR = '15ns' - - # Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns - tRTW = '10ns' - - # Default different rank bus delay to 2 CK, @200 MHz = 10 ns - tCS = '10ns' - - # Activate to activate irrespective of density and speed grade - tRRD = '10.0ns' - - # Two instead of four activation window - tXAW = '50ns' - activation_limit = 2 - - # The WideIO specification does not provide current information - -# A single LPDDR3 x32 interface (one command/address bus), with -# default timings based on a LPDDR3-1600 4 Gbit part (Micron -# EDF8132A1MC) in a 1x32 configuration. -class LPDDR3_1600_1x32(DRAMCtrl): - # No DLL for LPDDR3 - dll = False - - # size of device - device_size = '512MB' - - # 1x32 configuration, 1 device with a 32-bit interface - device_bus_width = 32 - - # LPDDR3 is a BL8 device - burst_length = 8 - - # Each device has a page (row buffer) size of 4KB - device_rowbuffer_size = '4kB' - - # 1x32 configuration, so 1 device - devices_per_rank = 1 - - # Technically the datasheet is a dual-rank package, but for - # comparison with the LPDDR2 config we stick to a single rank - ranks_per_channel = 1 - - # LPDDR3 has 8 banks in all configurations - banks_per_rank = 8 - - # 800 MHz - tCK = '1.25ns' - - tRCD = '18ns' - - # 12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time - tCL = '15ns' - - tRAS = '42ns' - tWR = '15ns' - - # Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns - tRTP = '7.5ns' - - # Pre-charge one bank 18 ns (all banks 21 ns) - tRP = '18ns' - - # 8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz. - # Note this is a BL8 DDR device. - # Requests larger than 32 bytes are broken down into multiple requests - # in the controller - tBURST = '5ns' - - # LPDDR3, 4 Gb - tRFC = '130ns' - tREFI = '3.9us' - - # active powerdown and precharge powerdown exit time - tXP = '7.5ns' - - # self refresh exit time - tXS = '140ns' - - # Irrespective of speed grade, tWTR is 7.5 ns - tWTR = '7.5ns' - - # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns - tRTW = '2.5ns' - - # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns - tCS = '2.5ns' - - # Activate to activate irrespective of density and speed grade - tRRD = '10.0ns' - - # Irrespective of size, tFAW is 50 ns - tXAW = '50ns' - activation_limit = 4 - - # Current values from datasheet - IDD0 = '8mA' - IDD02 = '60mA' - IDD2N = '0.8mA' - IDD2N2 = '26mA' - IDD3N = '2mA' - IDD3N2 = '34mA' - IDD4W = '2mA' - IDD4W2 = '190mA' - IDD4R = '2mA' - IDD4R2 = '230mA' - IDD5 = '28mA' - IDD52 = '150mA' - IDD3P1 = '1.4mA' - IDD3P12 = '11mA' - IDD2P1 = '0.8mA' - IDD2P12 = '1.8mA' - IDD6 = '0.5mA' - IDD62 = '1.8mA' - VDD = '1.8V' - VDD2 = '1.2V' - -# A single GDDR5 x64 interface, with -# default timings based on a GDDR5-4000 1 Gbit part (SK Hynix -# H5GQ1H24AFR) in a 2x32 configuration. -class GDDR5_4000_2x32(DRAMCtrl): - # size of device - device_size = '128MB' - - # 2x32 configuration, 1 device with a 32-bit interface - device_bus_width = 32 - - # GDDR5 is a BL8 device - burst_length = 8 - - # Each device has a page (row buffer) size of 2Kbits (256Bytes) - device_rowbuffer_size = '256B' - - # 2x32 configuration, so 2 devices - devices_per_rank = 2 - - # assume single rank - ranks_per_channel = 1 - - # GDDR5 has 4 bank groups - bank_groups_per_rank = 4 - - # GDDR5 has 16 banks with 4 bank groups - banks_per_rank = 16 - - # 1000 MHz - tCK = '1ns' - - # 8 beats across an x64 interface translates to 2 clocks @ 1000 MHz - # Data bus runs @2000 Mhz => DDR ( data runs at 4000 MHz ) - # 8 beats at 4000 MHz = 2 beats at 1000 MHz - # tBURST is equivalent to the CAS-to-CAS delay (tCCD) - # With bank group architectures, tBURST represents the CAS-to-CAS - # delay for bursts to different bank groups (tCCD_S) - tBURST = '2ns' - - # @1000MHz data rate, tCCD_L is 3 CK - # CAS-to-CAS delay for bursts to the same bank group - # tBURST is equivalent to tCCD_S; no explicit parameter required - # for CAS-to-CAS delay for bursts to different bank groups - tCCD_L = '3ns'; - - tRCD = '12ns' - - # tCL is not directly found in datasheet and assumed equal tRCD - tCL = '12ns' - - tRP = '12ns' - tRAS = '28ns' - - # RRD_S (different bank group) - # RRD_S is 5.5 ns in datasheet. - # rounded to the next multiple of tCK - tRRD = '6ns' - - # RRD_L (same bank group) - # RRD_L is 5.5 ns in datasheet. - # rounded to the next multiple of tCK - tRRD_L = '6ns' - - tXAW = '23ns' - - # tXAW < 4 x tRRD. - # Therefore, activation limit is set to 0 - activation_limit = 0 - - tRFC = '65ns' - tWR = '12ns' - - # Here using the average of WTR_S and WTR_L - tWTR = '5ns' - - # Read-to-Precharge 2 CK - tRTP = '2ns' - - # Assume 2 cycles - tRTW = '2ns' - -# A single HBM x128 interface (one command and address bus), with -# default timings based on data publically released -# ("HBM: Memory Solution for High Performance Processors", MemCon, 2014), -# IDD measurement values, and by extrapolating data from other classes. -# Architecture values based on published HBM spec -# A 4H stack is defined, 2Gb per die for a total of 1GB of memory. -class HBM_1000_4H_1x128(DRAMCtrl): - # HBM gen1 supports up to 8 128-bit physical channels - # Configuration defines a single channel, with the capacity - # set to (full_ stack_capacity / 8) based on 2Gb dies - # To use all 8 channels, set 'channels' parameter to 8 in - # system configuration - - # 128-bit interface legacy mode - device_bus_width = 128 - - # HBM supports BL4 and BL2 (legacy mode only) - burst_length = 4 - - # size of channel in bytes, 4H stack of 2Gb dies is 1GB per stack; - # with 8 channels, 128MB per channel - device_size = '128MB' - - device_rowbuffer_size = '2kB' - - # 1x128 configuration - devices_per_rank = 1 - - # HBM does not have a CS pin; set rank to 1 - ranks_per_channel = 1 - - # HBM has 8 or 16 banks depending on capacity - # 2Gb dies have 8 banks - banks_per_rank = 8 - - # depending on frequency, bank groups may be required - # will always have 4 bank groups when enabled - # current specifications do not define the minimum frequency for - # bank group architecture - # setting bank_groups_per_rank to 0 to disable until range is defined - bank_groups_per_rank = 0 - - # 500 MHz for 1Gbps DDR data rate - tCK = '2ns' - - # use values from IDD measurement in JEDEC spec - # use tRP value for tRCD and tCL similar to other classes - tRP = '15ns' - tRCD = '15ns' - tCL = '15ns' - tRAS = '33ns' - - # BL2 and BL4 supported, default to BL4 - # DDR @ 500 MHz means 4 * 2ns / 2 = 4ns - tBURST = '4ns' - - # value for 2Gb device from JEDEC spec - tRFC = '160ns' - - # value for 2Gb device from JEDEC spec - tREFI = '3.9us' - - # extrapolate the following from LPDDR configs, using ns values - # to minimize burst length, prefetch differences - tWR = '18ns' - tRTP = '7.5ns' - tWTR = '10ns' - - # start with 2 cycles turnaround, similar to other memory classes - # could be more with variations across the stack - tRTW = '4ns' - - # single rank device, set to 0 - tCS = '0ns' - - # from MemCon example, tRRD is 4ns with 2ns tCK - tRRD = '4ns' - - # from MemCon example, tFAW is 30ns with 2ns tCK - tXAW = '30ns' - activation_limit = 4 - - # 4tCK - tXP = '8ns' - - # start with tRFC + tXP -> 160ns + 8ns = 168ns - tXS = '168ns' - -# A single HBM x64 interface (one command and address bus), with -# default timings based on HBM gen1 and data publically released -# A 4H stack is defined, 8Gb per die for a total of 4GB of memory. -# Note: This defines a pseudo-channel with a unique controller -# instantiated per pseudo-channel -# Stay at same IO rate (1Gbps) to maintain timing relationship with -# HBM gen1 class (HBM_1000_4H_x128) where possible -class HBM_1000_4H_1x64(HBM_1000_4H_1x128): - # For HBM gen2 with pseudo-channel mode, configure 2X channels. - # Configuration defines a single pseudo channel, with the capacity - # set to (full_ stack_capacity / 16) based on 8Gb dies - # To use all 16 pseudo channels, set 'channels' parameter to 16 in - # system configuration - - # 64-bit pseudo-channle interface - device_bus_width = 64 - - # HBM pseudo-channel only supports BL4 - burst_length = 4 - - # size of channel in bytes, 4H stack of 8Gb dies is 4GB per stack; - # with 16 channels, 256MB per channel - device_size = '256MB' - - # page size is halved with pseudo-channel; maintaining the same same number - # of rows per pseudo-channel with 2X banks across 2 channels - device_rowbuffer_size = '1kB' - - # HBM has 8 or 16 banks depending on capacity - # Starting with 4Gb dies, 16 banks are defined - banks_per_rank = 16 - - # reset tRFC for larger, 8Gb device - # use HBM1 4Gb value as a starting point - tRFC = '260ns' - - # start with tRFC + tXP -> 160ns + 8ns = 168ns - tXS = '268ns' - # Default different rank bus delay to 2 CK, @1000 MHz = 2 ns - tCS = '2ns' - tREFI = '3.9us' - - # active powerdown and precharge powerdown exit time - tXP = '10ns' - - # self refresh exit time - tXS = '65ns' - -# A single LPDDR5 x16 interface (one command/address bus) -# for a single x16 channel with default timings based on -# initial JEDEC specification -# Starting with 5.5Gbps data rates and 8Gbit die -# Configuring for 16-bank mode with bank-group architecture -# burst of 32, which means bursts can be interleaved -class LPDDR5_5500_1x16_BG_BL32(DRAMCtrl): - - # Increase buffer size to account for more bank resources - read_buffer_size = 64 - - # Set page policy to better suit DMC Huxley - page_policy = 'close_adaptive' - - # 16-bit channel interface - device_bus_width = 16 - - # LPDDR5 is a BL16 or BL32 device - # With BG mode, BL16 and BL32 are supported - # Use BL32 for higher command bandwidth - burst_length = 32 - - # size of device in bytes - device_size = '1GB' - - # 2kB page with BG mode - device_rowbuffer_size = '2kB' - - # Use a 1x16 configuration - devices_per_rank = 1 - - # Use a single rank - ranks_per_channel = 1 - - # LPDDR5 supports configurable bank options - # 8B : BL32, all frequencies - # 16B : BL32 or BL16, <=3.2Gbps - # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps - # Initial configuration will have 16 banks with Bank Group Arch - # to maximim resources and enable higher data rates - banks_per_rank = 16 - bank_groups_per_rank = 4 - - # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK - tCK = '1.455ns' - - # Greater of 2 CK or 18ns - tRCD = '18ns' - - # Base RL is 16 CK @ 687.5 MHz = 23.28ns - tCL = '23.280ns' - - # Greater of 2 CK or 18ns - tRP = '18ns' - - # Greater of 3 CK or 42ns - tRAS = '42ns' - - # Greater of 3 CK or 34ns - tWR = '34ns' - - # active powerdown and precharge powerdown exit time - # Greater of 3 CK or 7ns - tXP = '7ns' - - # self refresh exit time (tRFCab + 7.5ns) - tXS = '217.5ns' - - # Greater of 2 CK or 7.5 ns minus 2 CK - tRTP = '4.59ns' - - # With BG architecture, burst of 32 transferred in two 16-beat - # sub-bursts, with a 16-beat gap in between. - # Each 16-beat sub-burst is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz - # tBURST is the delay to transfer the Bstof32 = 6 CK @ 687.5 MHz - tBURST = '8.73ns' - # can interleave a Bstof32 from another bank group at tBURST_MIN - # 16-beats is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz - tBURST_MIN = '2.91ns' - # tBURST_MAX is the maximum burst delay for same bank group timing - # this is 8 CK @ 687.5 MHz - tBURST_MAX = '11.64ns' - - # 8 CK @ 687.5 MHz - tCCD_L = "11.64ns" - - # LPDDR5, 8 Gbit/channel for 280ns tRFCab - tRFC = '210ns' - tREFI = '3.9us' - - # Greater of 4 CK or 6.25 ns - tWTR = '6.25ns' - # Greater of 4 CK or 12 ns - tWTR_L = '12ns' - - # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL - # tWCKDQ0/tCK will be 1 CK for most cases - # For gem5 RL = WL and BL/n is already accounted for with tBURST - # Result is and additional 1 CK is required - tRTW = '1.455ns' - - # Default different rank bus delay to 2 CK, @687.5 MHz = 2.91 ns - tCS = '2.91ns' - - # 2 CK - tPPD = '2.91ns' - - # Greater of 2 CK or 5 ns - tRRD = '5ns' - tRRD_L = '5ns' - - # With Bank Group Arch mode tFAW is 20 ns - tXAW = '20ns' - activation_limit = 4 - - # at 5Gbps, 4:1 WCK to CK ratio required - # 2 data beats per WCK (DDR) -> 8 per CK - beats_per_clock = 8 - - # 2 cycles required to send activate command - # 2 command phases can be sent back-to-back or - # with a gap up to tAAD = 8 CK - two_cycle_activate = True - tAAD = '11.640ns' - - data_clock_sync = True - -# A single LPDDR5 x16 interface (one command/address bus) -# for a single x16 channel with default timings based on -# initial JEDEC specification -# Starting with 5.5Gbps data rates and 8Gbit die -# Configuring for 16-bank mode with bank-group architecture, burst of 16 -class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32): - - # LPDDR5 is a BL16 or BL32 device - # With BG mode, BL16 and BL32 are supported - # Use BL16 for smaller access granularity - burst_length = 16 - - # For Bstof16 with BG arch, 2 CK @ 687.5 MHz with 4:1 clock ratio - tBURST = '2.91ns' - tBURST_MIN = '2.91ns' - # For Bstof16 with BG arch, 4 CK @ 687.5 MHz with 4:1 clock ratio - tBURST_MAX = '5.82ns' - - # 4 CK @ 687.5 MHz - tCCD_L = "5.82ns" - - -# A single LPDDR5 x16 interface (one command/address bus) -# for a single x16 channel with default timings based on -# initial JEDEC specification -# Starting with 5.5Gbps data rates and 8Gbit die -# Configuring for 8-bank mode, burst of 32 -class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32): - - # 4kB page with 8B mode - device_rowbuffer_size = '4kB' - - # LPDDR5 supports configurable bank options - # 8B : BL32, all frequencies - # 16B : BL32 or BL16, <=3.2Gbps - # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps - # Select 8B - banks_per_rank = 8 - bank_groups_per_rank = 0 - - # For Bstof32 with 8B mode, 4 CK @ 687.5 MHz with 4:1 clock ratio - tBURST = '5.82ns' - tBURST_MIN = '5.82ns' - tBURST_MAX = '5.82ns' - - # Greater of 4 CK or 12 ns - tWTR = '12ns' - - # Greater of 2 CK or 10 ns - tRRD = '10ns' - - # With 8B mode tFAW is 40 ns - tXAW = '40ns' - activation_limit = 4 - - # Reset BG arch timing for 8B mode - tCCD_L = "0ns" - tRRD_L = "0ns" - tWTR_L = "0ns" - -# A single LPDDR5 x16 interface (one command/address bus) -# for a single x16 channel with default timings based on -# initial JEDEC specification -# 6.4Gbps data rates and 8Gbit die -# Configuring for 16-bank mode with bank-group architecture -# burst of 32, which means bursts can be interleaved -class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32): - - # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK - tCK = '1.25ns' - - # Base RL is 17 CK @ 800 MHz = 21.25ns - tCL = '21.25ns' - - # With BG architecture, burst of 32 transferred in two 16-beat - # sub-bursts, with a 16-beat gap in between. - # Each 16-beat sub-burst is 8 WCK @3.2 GHz or 2 CK @ 800 MHz - # tBURST is the delay to transfer the Bstof32 = 6 CK @ 800 MHz - tBURST = '7.5ns' - # can interleave a Bstof32 from another bank group at tBURST_MIN - # 16-beats is 8 WCK @2.3 GHz or 2 CK @ 800 MHz - tBURST_MIN = '2.5ns' - # tBURST_MAX is the maximum burst delay for same bank group timing - # this is 8 CK @ 800 MHz - tBURST_MAX = '10ns' - - # 8 CK @ 800 MHz - tCCD_L = "10ns" - - # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL - # tWCKDQ0/tCK will be 1 CK for most cases - # For gem5 RL = WL and BL/n is already accounted for with tBURST - # Result is and additional 1 CK is required - tRTW = '1.25ns' - - # Default different rank bus delay to 2 CK, @687.5 MHz = 2.5 ns - tCS = '2.5ns' - - # 2 CK - tPPD = '2.5ns' - - # 2 command phases can be sent back-to-back or - # with a gap up to tAAD = 8 CK - tAAD = '10ns' - -# A single LPDDR5 x16 interface (one command/address bus) -# for a single x16 channel with default timings based on initial -# JEDEC specifcation -# 6.4Gbps data rates and 8Gbit die -# Configuring for 16-bank mode with bank-group architecture, burst of 16 -class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32): - - # LPDDR5 is a BL16 or BL32 device - # With BG mode, BL16 and BL32 are supported - # Use BL16 for smaller access granularity - burst_length = 16 - - # For Bstof16 with BG arch, 2 CK @ 800 MHz with 4:1 clock ratio - tBURST = '2.5ns' - tBURST_MIN = '2.5ns' - # For Bstof16 with BG arch, 4 CK @ 800 MHz with 4:1 clock ratio - tBURST_MAX = '5ns' - - # 4 CK @ 800 MHz - tCCD_L = "5ns" - - -# A single LPDDR5 x16 interface (one command/address bus) -# for a single x16 channel with default timings based on -# initial JEDEC specification -# 6.4Gbps data rates and 8Gbit die -# Configuring for 8-bank mode, burst of 32 -class LPDDR5_6400_1x16_8B_BL32(LPDDR5_6400_1x16_BG_BL32): - - # 4kB page with 8B mode - device_rowbuffer_size = '4kB' - - # LPDDR5 supports configurable bank options - # 8B : BL32, all frequencies - # 16B : BL32 or BL16, <=3.2Gbps - # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps - # Select 8B - banks_per_rank = 8 - bank_groups_per_rank = 0 - - # For Bstof32 with 8B mode, 4 CK @ 800 MHz with 4:1 clock ratio - tBURST = '5ns' - tBURST_MIN = '5ns' - tBURST_MAX = '5ns' - - # Greater of 4 CK or 12 ns - tWTR = '12ns' - - # Greater of 2 CK or 10 ns - tRRD = '10ns' - - # With 8B mode tFAW is 40 ns - tXAW = '40ns' - activation_limit = 4 - - # Reset BG arch timing for 8B mode - tCCD_L = "0ns" - tRRD_L = "0ns" - tWTR_L = "0ns" diff --git a/src/mem/DRAMInterface.py b/src/mem/DRAMInterface.py new file mode 100644 index 000000000..f571920c2 --- /dev/null +++ b/src/mem/DRAMInterface.py @@ -0,0 +1,1473 @@ +# Copyright (c) 2012-2020 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Copyright (c) 2013 Amin Farmahini-Farahani +# Copyright (c) 2015 University of Kaiserslautern +# Copyright (c) 2015 The University of Bologna +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.params import * +from m5.proxy import * + +from m5.objects.AbstractMemory import AbstractMemory + +# Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting +# channel, rank, bank, row and column, respectively, and going from +# MSB to LSB. Available are RoRaBaChCo and RoRaBaCoCh, that are +# suitable for an open-page policy, optimising for sequential accesses +# hitting in the open row. For a closed-page policy, RoCoRaBaCh +# maximises parallelism. +class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh'] + +# Enum for the page policy, either open, open_adaptive, close, or +# close_adaptive. +class PageManage(Enum): vals = ['open', 'open_adaptive', 'close', + 'close_adaptive'] + +class DRAMInterface(AbstractMemory): + type = 'DRAMInterface' + cxx_header = "mem/dram_ctrl.hh" + + # Allow the interface to set required controller buffer sizes + # each entry corresponds to a burst for the specific DRAM + # configuration (e.g. x32 with burst length 8 is 32 bytes) and not + # the cacheline size or request/packet size + write_buffer_size = Param.Unsigned(64, "Number of write queue entries") + read_buffer_size = Param.Unsigned(32, "Number of read queue entries") + + # scheduler, address map and page policy + addr_mapping = Param.AddrMap('RoRaBaCoCh', "Address mapping policy") + page_policy = Param.PageManage('open_adaptive', "Page management policy") + + # enforce a limit on the number of accesses per row + max_accesses_per_row = Param.Unsigned(16, "Max accesses per row before " + "closing"); + + # size of DRAM Chip in Bytes + device_size = Param.MemorySize("Size of DRAM chip") + # the physical organisation of the DRAM + device_bus_width = Param.Unsigned("data bus width in bits for each DRAM "\ + "device/chip") + burst_length = Param.Unsigned("Burst lenght (BL) in beats") + device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\ + "device/chip") + devices_per_rank = Param.Unsigned("Number of devices/chips per rank") + ranks_per_channel = Param.Unsigned("Number of ranks per channel") + + # default to 0 bank groups per rank, indicating bank group architecture + # is not used + # update per memory class when bank group architecture is supported + bank_groups_per_rank = Param.Unsigned(0, "Number of bank groups per rank") + banks_per_rank = Param.Unsigned("Number of banks per rank") + + # Enable DRAM powerdown states if True. This is False by default due to + # performance being lower when enabled + enable_dram_powerdown = Param.Bool(False, "Enable powerdown states") + + # For power modelling we need to know if the DRAM has a DLL or not + dll = Param.Bool(True, "DRAM has DLL or not") + + # DRAMPower provides in addition to the core power, the possibility to + # include RD/WR termination and IO power. This calculation assumes some + # default values. The integration of DRAMPower with gem5 does not include + # IO and RD/WR termination power by default. This might be added as an + # additional feature in the future. + + # timing behaviour and constraints - all in nanoseconds + + # the base clock period of the DRAM + tCK = Param.Latency("Clock period") + + # the amount of time in nanoseconds from issuing an activate command + # to the data being available in the row buffer for a read/write + tRCD = Param.Latency("RAS to CAS delay") + + # the time from issuing a read/write command to seeing the actual data + tCL = Param.Latency("CAS latency") + + # minimum time between a precharge and subsequent activate + tRP = Param.Latency("Row precharge time") + + # minimum time between an activate and a precharge to the same row + tRAS = Param.Latency("ACT to PRE delay") + + # minimum time between a write data transfer and a precharge + tWR = Param.Latency("Write recovery time") + + # minimum time between a read and precharge command + tRTP = Param.Latency("Read to precharge") + + # time to complete a burst transfer, typically the burst length + # divided by two due to the DDR bus, but by making it a parameter + # it is easier to also evaluate SDR memories like WideIO. + # This parameter has to account for burst length. + # Read/Write requests with data size larger than one full burst are broken + # down into multiple requests in the controller + # tBURST is equivalent to the CAS-to-CAS delay (tCCD) + # With bank group architectures, tBURST represents the CAS-to-CAS + # delay for bursts to different bank groups (tCCD_S) + tBURST = Param.Latency("Burst duration " + "(typically burst length / 2 cycles)") + + # tBURST_MAX is the column array cycle delay required before next access, + # which could be greater than tBURST when the memory access time is greater + # than tBURST + tBURST_MAX = Param.Latency(Self.tBURST, "Column access delay") + + # tBURST_MIN is the minimum delay between bursts, which could be less than + # tBURST when interleaving is supported + tBURST_MIN = Param.Latency(Self.tBURST, "Minimim delay between bursts") + + # CAS-to-CAS delay for bursts to the same bank group + # only utilized with bank group architectures; set to 0 for default case + # tBURST is equivalent to tCCD_S; no explicit parameter required + # for CAS-to-CAS delay for bursts to different bank groups + tCCD_L = Param.Latency("0ns", "Same bank group CAS to CAS delay") + + # Write-to-Write delay for bursts to the same bank group + # only utilized with bank group architectures; set to 0 for default case + # This will be used to enable different same bank group delays + # for writes versus reads + tCCD_L_WR = Param.Latency(Self.tCCD_L, + "Same bank group Write to Write delay") + + # time taken to complete one refresh cycle (N rows in all banks) + tRFC = Param.Latency("Refresh cycle time") + + # refresh command interval, how often a "ref" command needs + # to be sent. It is 7.8 us for a 64ms refresh requirement + tREFI = Param.Latency("Refresh command interval") + + # write-to-read, same rank turnaround penalty + tWTR = Param.Latency("Write to read, same rank switching time") + + # write-to-read, same rank turnaround penalty for same bank group + tWTR_L = Param.Latency(Self.tWTR, "Write to read, same rank switching " + "time, same bank group") + + # read-to-write, same rank turnaround penalty + tRTW = Param.Latency("Read to write, same rank switching time") + + # rank-to-rank bus delay penalty + # this does not correlate to a memory timing parameter and encompasses: + # 1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD + # different rank bus delay + tCS = Param.Latency("Rank to rank switching time") + + # minimum precharge to precharge delay time + tPPD = Param.Latency("0ns", "PRE to PRE delay") + + # maximum delay between two-cycle ACT command phases + tAAD = Param.Latency(Self.tCK, + "Maximum delay between two-cycle ACT commands") + + two_cycle_activate = Param.Bool(False, + "Two cycles required to send activate") + + # minimum row activate to row activate delay time + tRRD = Param.Latency("ACT to ACT delay") + + # only utilized with bank group architectures; set to 0 for default case + tRRD_L = Param.Latency("0ns", "Same bank group ACT to ACT delay") + + # time window in which a maximum number of activates are allowed + # to take place, set to 0 to disable + tXAW = Param.Latency("X activation window") + activation_limit = Param.Unsigned("Max number of activates in window") + + # time to exit power-down mode + # Exit power-down to next valid command delay + tXP = Param.Latency("0ns", "Power-up Delay") + + # Exit Powerdown to commands requiring a locked DLL + tXPDLL = Param.Latency("0ns", "Power-up Delay with locked DLL") + + # time to exit self-refresh mode + tXS = Param.Latency("0ns", "Self-refresh exit latency") + + # time to exit self-refresh mode with locked DLL + tXSDLL = Param.Latency("0ns", "Self-refresh exit latency DLL") + + # number of data beats per clock. with DDR, default is 2, one per edge + beats_per_clock = Param.Unsigned(2, "Data beats per clock") + + data_clock_sync = Param.Bool(False, "Synchronization commands required") + + # Currently rolled into other params + ###################################################################### + + # tRC - assumed to be tRAS + tRP + + # Power Behaviour and Constraints + # DRAMs like LPDDR and WideIO have 2 external voltage domains. These are + # defined as VDD and VDD2. Each current is defined for each voltage domain + # separately. For example, current IDD0 is active-precharge current for + # voltage domain VDD and current IDD02 is active-precharge current for + # voltage domain VDD2. + # By default all currents are set to 0mA. Users who are only interested in + # the performance of DRAMs can leave them at 0. + + # Operating 1 Bank Active-Precharge current + IDD0 = Param.Current("0mA", "Active precharge current") + + # Operating 1 Bank Active-Precharge current multiple voltage Range + IDD02 = Param.Current("0mA", "Active precharge current VDD2") + + # Precharge Power-down Current: Slow exit + IDD2P0 = Param.Current("0mA", "Precharge Powerdown slow") + + # Precharge Power-down Current: Slow exit multiple voltage Range + IDD2P02 = Param.Current("0mA", "Precharge Powerdown slow VDD2") + + # Precharge Power-down Current: Fast exit + IDD2P1 = Param.Current("0mA", "Precharge Powerdown fast") + + # Precharge Power-down Current: Fast exit multiple voltage Range + IDD2P12 = Param.Current("0mA", "Precharge Powerdown fast VDD2") + + # Precharge Standby current + IDD2N = Param.Current("0mA", "Precharge Standby current") + + # Precharge Standby current multiple voltage range + IDD2N2 = Param.Current("0mA", "Precharge Standby current VDD2") + + # Active Power-down current: slow exit + IDD3P0 = Param.Current("0mA", "Active Powerdown slow") + + # Active Power-down current: slow exit multiple voltage range + IDD3P02 = Param.Current("0mA", "Active Powerdown slow VDD2") + + # Active Power-down current : fast exit + IDD3P1 = Param.Current("0mA", "Active Powerdown fast") + + # Active Power-down current : fast exit multiple voltage range + IDD3P12 = Param.Current("0mA", "Active Powerdown fast VDD2") + + # Active Standby current + IDD3N = Param.Current("0mA", "Active Standby current") + + # Active Standby current multiple voltage range + IDD3N2 = Param.Current("0mA", "Active Standby current VDD2") + + # Burst Read Operating Current + IDD4R = Param.Current("0mA", "READ current") + + # Burst Read Operating Current multiple voltage range + IDD4R2 = Param.Current("0mA", "READ current VDD2") + + # Burst Write Operating Current + IDD4W = Param.Current("0mA", "WRITE current") + + # Burst Write Operating Current multiple voltage range + IDD4W2 = Param.Current("0mA", "WRITE current VDD2") + + # Refresh Current + IDD5 = Param.Current("0mA", "Refresh current") + + # Refresh Current multiple voltage range + IDD52 = Param.Current("0mA", "Refresh current VDD2") + + # Self-Refresh Current + IDD6 = Param.Current("0mA", "Self-refresh Current") + + # Self-Refresh Current multiple voltage range + IDD62 = Param.Current("0mA", "Self-refresh Current VDD2") + + # Main voltage range of the DRAM + VDD = Param.Voltage("0V", "Main Voltage Range") + + # Second voltage range defined by some DRAMs + VDD2 = Param.Voltage("0V", "2nd Voltage Range") + +# A single DDR3-1600 x64 channel (one command and address bus), with +# timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in +# an 8x8 configuration. +class DDR3_1600_8x8(DRAMInterface): + # size of device in bytes + device_size = '512MB' + + # 8x8 configuration, 8 devices each with an 8-bit interface + device_bus_width = 8 + + # DDR3 is a BL8 device + burst_length = 8 + + # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8) + device_rowbuffer_size = '1kB' + + # 8x8 configuration, so 8 devices + devices_per_rank = 8 + + # Use two ranks + ranks_per_channel = 2 + + # DDR3 has 8 banks in all configurations + banks_per_rank = 8 + + # 800 MHz + tCK = '1.25ns' + + # 8 beats across an x64 interface translates to 4 clocks @ 800 MHz + tBURST = '5ns' + + # DDR3-1600 11-11-11 + tRCD = '13.75ns' + tCL = '13.75ns' + tRP = '13.75ns' + tRAS = '35ns' + tRRD = '6ns' + tXAW = '30ns' + activation_limit = 4 + tRFC = '260ns' + + tWR = '15ns' + + # Greater of 4 CK or 7.5 ns + tWTR = '7.5ns' + + # Greater of 4 CK or 7.5 ns + tRTP = '7.5ns' + + # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns + tRTW = '2.5ns' + + # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns + tCS = '2.5ns' + + # <=85C, half for >85C + tREFI = '7.8us' + + # active powerdown and precharge powerdown exit time + tXP = '6ns' + + # self refresh exit time + tXS = '270ns' + + # Current values from datasheet Die Rev E,J + IDD0 = '55mA' + IDD2N = '32mA' + IDD3N = '38mA' + IDD4W = '125mA' + IDD4R = '157mA' + IDD5 = '235mA' + IDD3P1 = '38mA' + IDD2P1 = '32mA' + IDD6 = '20mA' + VDD = '1.5V' + +# A single HMC-2500 x32 model based on: +# [1] DRAMSpec: a high-level DRAM bank modelling tool +# developed at the University of Kaiserslautern. This high level tool +# uses RC (resistance-capacitance) and CV (capacitance-voltage) models to +# estimate the DRAM bank latency and power numbers. +# [2] High performance AXI-4.0 based interconnect for extensible smart memory +# cubes (E. Azarkhish et. al) +# Assumed for the HMC model is a 30 nm technology node. +# The modelled HMC consists of 4 Gbit layers which sum up to 2GB of memory (4 +# layers). +# Each layer has 16 vaults and each vault consists of 2 banks per layer. +# In order to be able to use the same controller used for 2D DRAM generations +# for HMC, the following analogy is done: +# Channel (DDR) => Vault (HMC) +# device_size (DDR) => size of a single layer in a vault +# ranks per channel (DDR) => number of layers +# banks per rank (DDR) => banks per layer +# devices per rank (DDR) => devices per layer ( 1 for HMC). +# The parameters for which no input is available are inherited from the DDR3 +# configuration. +# This configuration includes the latencies from the DRAM to the logic layer +# of the HMC +class HMC_2500_1x32(DDR3_1600_8x8): + # size of device + # two banks per device with each bank 4MB [2] + device_size = '8MB' + + # 1x32 configuration, 1 device with 32 TSVs [2] + device_bus_width = 32 + + # HMC is a BL8 device [2] + burst_length = 8 + + # Each device has a page (row buffer) size of 256 bytes [2] + device_rowbuffer_size = '256B' + + # 1x32 configuration, so 1 device [2] + devices_per_rank = 1 + + # 4 layers so 4 ranks [2] + ranks_per_channel = 4 + + # HMC has 2 banks per layer [2] + # Each layer represents a rank. With 4 layers and 8 banks in total, each + # layer has 2 banks; thus 2 banks per rank. + banks_per_rank = 2 + + # 1250 MHz [2] + tCK = '0.8ns' + + # 8 beats across an x32 interface translates to 4 clocks @ 1250 MHz + tBURST = '3.2ns' + + # Values using DRAMSpec HMC model [1] + tRCD = '10.2ns' + tCL = '9.9ns' + tRP = '7.7ns' + tRAS = '21.6ns' + + # tRRD depends on the power supply network for each vendor. + # We assume a tRRD of a double bank approach to be equal to 4 clock + # cycles (Assumption) + tRRD = '3.2ns' + + # activation limit is set to 0 since there are only 2 banks per vault + # layer. + activation_limit = 0 + + # Values using DRAMSpec HMC model [1] + tRFC = '59ns' + tWR = '8ns' + tRTP = '4.9ns' + + # Default different rank bus delay assumed to 1 CK for TSVs, @1250 MHz = + # 0.8 ns (Assumption) + tCS = '0.8ns' + + # Value using DRAMSpec HMC model [1] + tREFI = '3.9us' + + # The default page policy in the vault controllers is simple closed page + # [2] nevertheless 'close' policy opens and closes the row multiple times + # for bursts largers than 32Bytes. For this reason we use 'close_adaptive' + page_policy = 'close_adaptive' + + # RoCoRaBaCh resembles the default address mapping in HMC + addr_mapping = 'RoCoRaBaCh' + + # These parameters do not directly correlate with buffer_size in real + # hardware. Nevertheless, their value has been tuned to achieve a + # bandwidth similar to the cycle-accurate model in [2] + write_buffer_size = 32 + read_buffer_size = 32 + +# A single DDR3-2133 x64 channel refining a selected subset of the +# options for the DDR-1600 configuration, based on the same DDR3-1600 +# 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept +# consistent across the two configurations. +class DDR3_2133_8x8(DDR3_1600_8x8): + # 1066 MHz + tCK = '0.938ns' + + # 8 beats across an x64 interface translates to 4 clocks @ 1066 MHz + tBURST = '3.752ns' + + # DDR3-2133 14-14-14 + tRCD = '13.09ns' + tCL = '13.09ns' + tRP = '13.09ns' + tRAS = '33ns' + tRRD = '5ns' + tXAW = '25ns' + + # Current values from datasheet + IDD0 = '70mA' + IDD2N = '37mA' + IDD3N = '44mA' + IDD4W = '157mA' + IDD4R = '191mA' + IDD5 = '250mA' + IDD3P1 = '44mA' + IDD2P1 = '43mA' + IDD6 ='20mA' + VDD = '1.5V' + +# A single DDR4-2400 x64 channel (one command and address bus), with +# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A2G4) +# in an 16x4 configuration. +# Total channel capacity is 32GB +# 16 devices/rank * 2 ranks/channel * 1GB/device = 32GB/channel +class DDR4_2400_16x4(DRAMInterface): + # size of device + device_size = '1GB' + + # 16x4 configuration, 16 devices each with a 4-bit interface + device_bus_width = 4 + + # DDR4 is a BL8 device + burst_length = 8 + + # Each device has a page (row buffer) size of 512 byte (1K columns x4) + device_rowbuffer_size = '512B' + + # 16x4 configuration, so 16 devices + devices_per_rank = 16 + + # Match our DDR3 configurations which is dual rank + ranks_per_channel = 2 + + # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups + # Set to 4 for x4 case + bank_groups_per_rank = 4 + + # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all + # configurations). Currently we do not capture the additional + # constraints incurred by the bank groups + banks_per_rank = 16 + + # override the default buffer sizes and go for something larger to + # accommodate the larger bank count + write_buffer_size = 128 + read_buffer_size = 64 + + # 1200 MHz + tCK = '0.833ns' + + # 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz + # tBURST is equivalent to the CAS-to-CAS delay (tCCD) + # With bank group architectures, tBURST represents the CAS-to-CAS + # delay for bursts to different bank groups (tCCD_S) + tBURST = '3.332ns' + + # @2400 data rate, tCCD_L is 6 CK + # CAS-to-CAS delay for bursts to the same bank group + # tBURST is equivalent to tCCD_S; no explicit parameter required + # for CAS-to-CAS delay for bursts to different bank groups + tCCD_L = '5ns'; + + # DDR4-2400 17-17-17 + tRCD = '14.16ns' + tCL = '14.16ns' + tRP = '14.16ns' + tRAS = '32ns' + + # RRD_S (different bank group) for 512B page is MAX(4 CK, 3.3ns) + tRRD = '3.332ns' + + # RRD_L (same bank group) for 512B page is MAX(4 CK, 4.9ns) + tRRD_L = '4.9ns'; + + # tFAW for 512B page is MAX(16 CK, 13ns) + tXAW = '13.328ns' + activation_limit = 4 + # tRFC is 350ns + tRFC = '350ns' + + tWR = '15ns' + + # Here using the average of WTR_S and WTR_L + tWTR = '5ns' + + # Greater of 4 CK or 7.5 ns + tRTP = '7.5ns' + + # Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666 ns + tRTW = '1.666ns' + + # Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns + tCS = '1.666ns' + + # <=85C, half for >85C + tREFI = '7.8us' + + # active powerdown and precharge powerdown exit time + tXP = '6ns' + + # self refresh exit time + # exit delay to ACT, PRE, PREALL, REF, SREF Enter, and PD Enter is: + # tRFC + 10ns = 340ns + tXS = '340ns' + + # Current values from datasheet + IDD0 = '43mA' + IDD02 = '3mA' + IDD2N = '34mA' + IDD3N = '38mA' + IDD3N2 = '3mA' + IDD4W = '103mA' + IDD4R = '110mA' + IDD5 = '250mA' + IDD3P1 = '32mA' + IDD2P1 = '25mA' + IDD6 = '30mA' + VDD = '1.2V' + VDD2 = '2.5V' + +# A single DDR4-2400 x64 channel (one command and address bus), with +# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A1G8) +# in an 8x8 configuration. +# Total channel capacity is 16GB +# 8 devices/rank * 2 ranks/channel * 1GB/device = 16GB/channel +class DDR4_2400_8x8(DDR4_2400_16x4): + # 8x8 configuration, 8 devices each with an 8-bit interface + device_bus_width = 8 + + # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8) + device_rowbuffer_size = '1kB' + + # 8x8 configuration, so 8 devices + devices_per_rank = 8 + + # RRD_L (same bank group) for 1K page is MAX(4 CK, 4.9ns) + tRRD_L = '4.9ns'; + + tXAW = '21ns' + + # Current values from datasheet + IDD0 = '48mA' + IDD3N = '43mA' + IDD4W = '123mA' + IDD4R = '135mA' + IDD3P1 = '37mA' + +# A single DDR4-2400 x64 channel (one command and address bus), with +# timings based on a DDR4-2400 8 Gbit datasheet (Micron MT40A512M16) +# in an 4x16 configuration. +# Total channel capacity is 4GB +# 4 devices/rank * 1 ranks/channel * 1GB/device = 4GB/channel +class DDR4_2400_4x16(DDR4_2400_16x4): + # 4x16 configuration, 4 devices each with an 16-bit interface + device_bus_width = 16 + + # Each device has a page (row buffer) size of 2 Kbyte (1K columns x16) + device_rowbuffer_size = '2kB' + + # 4x16 configuration, so 4 devices + devices_per_rank = 4 + + # Single rank for x16 + ranks_per_channel = 1 + + # DDR4 has 2 (x16) or 4 (x4 and x8) bank groups + # Set to 2 for x16 case + bank_groups_per_rank = 2 + + # DDR4 has 16 banks(x4,x8) and 8 banks(x16) (4 bank groups in all + # configurations). Currently we do not capture the additional + # constraints incurred by the bank groups + banks_per_rank = 8 + + # RRD_S (different bank group) for 2K page is MAX(4 CK, 5.3ns) + tRRD = '5.3ns' + + # RRD_L (same bank group) for 2K page is MAX(4 CK, 6.4ns) + tRRD_L = '6.4ns'; + + tXAW = '30ns' + + # Current values from datasheet + IDD0 = '80mA' + IDD02 = '4mA' + IDD2N = '34mA' + IDD3N = '47mA' + IDD4W = '228mA' + IDD4R = '243mA' + IDD5 = '280mA' + IDD3P1 = '41mA' + +# A single LPDDR2-S4 x32 interface (one command/address bus), with +# default timings based on a LPDDR2-1066 4 Gbit part (Micron MT42L128M32D1) +# in a 1x32 configuration. +class LPDDR2_S4_1066_1x32(DRAMInterface): + # No DLL in LPDDR2 + dll = False + + # size of device + device_size = '512MB' + + # 1x32 configuration, 1 device with a 32-bit interface + device_bus_width = 32 + + # LPDDR2_S4 is a BL4 and BL8 device + burst_length = 8 + + # Each device has a page (row buffer) size of 1KB + # (this depends on the memory density) + device_rowbuffer_size = '1kB' + + # 1x32 configuration, so 1 device + devices_per_rank = 1 + + # Use a single rank + ranks_per_channel = 1 + + # LPDDR2-S4 has 8 banks in all configurations + banks_per_rank = 8 + + # 533 MHz + tCK = '1.876ns' + + # Fixed at 15 ns + tRCD = '15ns' + + # 8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time + tCL = '15ns' + + # Pre-charge one bank 15 ns (all banks 18 ns) + tRP = '15ns' + + tRAS = '42ns' + tWR = '15ns' + + tRTP = '7.5ns' + + # 8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz. + # Note this is a BL8 DDR device. + # Requests larger than 32 bytes are broken down into multiple requests + # in the controller + tBURST = '7.5ns' + + # LPDDR2-S4, 4 Gbit + tRFC = '130ns' + tREFI = '3.9us' + + # active powerdown and precharge powerdown exit time + tXP = '7.5ns' + + # self refresh exit time + tXS = '140ns' + + # Irrespective of speed grade, tWTR is 7.5 ns + tWTR = '7.5ns' + + # Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns + tRTW = '3.75ns' + + # Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns + tCS = '3.75ns' + + # Activate to activate irrespective of density and speed grade + tRRD = '10.0ns' + + # Irrespective of density, tFAW is 50 ns + tXAW = '50ns' + activation_limit = 4 + + # Current values from datasheet + IDD0 = '15mA' + IDD02 = '70mA' + IDD2N = '2mA' + IDD2N2 = '30mA' + IDD3N = '2.5mA' + IDD3N2 = '30mA' + IDD4W = '10mA' + IDD4W2 = '190mA' + IDD4R = '3mA' + IDD4R2 = '220mA' + IDD5 = '40mA' + IDD52 = '150mA' + IDD3P1 = '1.2mA' + IDD3P12 = '8mA' + IDD2P1 = '0.6mA' + IDD2P12 = '0.8mA' + IDD6 = '1mA' + IDD62 = '3.2mA' + VDD = '1.8V' + VDD2 = '1.2V' + +# A single WideIO x128 interface (one command and address bus), with +# default timings based on an estimated WIO-200 8 Gbit part. +class WideIO_200_1x128(DRAMInterface): + # No DLL for WideIO + dll = False + + # size of device + device_size = '1024MB' + + # 1x128 configuration, 1 device with a 128-bit interface + device_bus_width = 128 + + # This is a BL4 device + burst_length = 4 + + # Each device has a page (row buffer) size of 4KB + # (this depends on the memory density) + device_rowbuffer_size = '4kB' + + # 1x128 configuration, so 1 device + devices_per_rank = 1 + + # Use one rank for a one-high die stack + ranks_per_channel = 1 + + # WideIO has 4 banks in all configurations + banks_per_rank = 4 + + # 200 MHz + tCK = '5ns' + + # WIO-200 + tRCD = '18ns' + tCL = '18ns' + tRP = '18ns' + tRAS = '42ns' + tWR = '15ns' + # Read to precharge is same as the burst + tRTP = '20ns' + + # 4 beats across an x128 SDR interface translates to 4 clocks @ 200 MHz. + # Note this is a BL4 SDR device. + tBURST = '20ns' + + # WIO 8 Gb + tRFC = '210ns' + + # WIO 8 Gb, <=85C, half for >85C + tREFI = '3.9us' + + # Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns + tWTR = '15ns' + + # Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns + tRTW = '10ns' + + # Default different rank bus delay to 2 CK, @200 MHz = 10 ns + tCS = '10ns' + + # Activate to activate irrespective of density and speed grade + tRRD = '10.0ns' + + # Two instead of four activation window + tXAW = '50ns' + activation_limit = 2 + + # The WideIO specification does not provide current information + +# A single LPDDR3 x32 interface (one command/address bus), with +# default timings based on a LPDDR3-1600 4 Gbit part (Micron +# EDF8132A1MC) in a 1x32 configuration. +class LPDDR3_1600_1x32(DRAMInterface): + # No DLL for LPDDR3 + dll = False + + # size of device + device_size = '512MB' + + # 1x32 configuration, 1 device with a 32-bit interface + device_bus_width = 32 + + # LPDDR3 is a BL8 device + burst_length = 8 + + # Each device has a page (row buffer) size of 4KB + device_rowbuffer_size = '4kB' + + # 1x32 configuration, so 1 device + devices_per_rank = 1 + + # Technically the datasheet is a dual-rank package, but for + # comparison with the LPDDR2 config we stick to a single rank + ranks_per_channel = 1 + + # LPDDR3 has 8 banks in all configurations + banks_per_rank = 8 + + # 800 MHz + tCK = '1.25ns' + + tRCD = '18ns' + + # 12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time + tCL = '15ns' + + tRAS = '42ns' + tWR = '15ns' + + # Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns + tRTP = '7.5ns' + + # Pre-charge one bank 18 ns (all banks 21 ns) + tRP = '18ns' + + # 8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz. + # Note this is a BL8 DDR device. + # Requests larger than 32 bytes are broken down into multiple requests + # in the controller + tBURST = '5ns' + + # LPDDR3, 4 Gb + tRFC = '130ns' + tREFI = '3.9us' + + # active powerdown and precharge powerdown exit time + tXP = '7.5ns' + + # self refresh exit time + tXS = '140ns' + + # Irrespective of speed grade, tWTR is 7.5 ns + tWTR = '7.5ns' + + # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns + tRTW = '2.5ns' + + # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns + tCS = '2.5ns' + + # Activate to activate irrespective of density and speed grade + tRRD = '10.0ns' + + # Irrespective of size, tFAW is 50 ns + tXAW = '50ns' + activation_limit = 4 + + # Current values from datasheet + IDD0 = '8mA' + IDD02 = '60mA' + IDD2N = '0.8mA' + IDD2N2 = '26mA' + IDD3N = '2mA' + IDD3N2 = '34mA' + IDD4W = '2mA' + IDD4W2 = '190mA' + IDD4R = '2mA' + IDD4R2 = '230mA' + IDD5 = '28mA' + IDD52 = '150mA' + IDD3P1 = '1.4mA' + IDD3P12 = '11mA' + IDD2P1 = '0.8mA' + IDD2P12 = '1.8mA' + IDD6 = '0.5mA' + IDD62 = '1.8mA' + VDD = '1.8V' + VDD2 = '1.2V' + +# A single GDDR5 x64 interface, with +# default timings based on a GDDR5-4000 1 Gbit part (SK Hynix +# H5GQ1H24AFR) in a 2x32 configuration. +class GDDR5_4000_2x32(DRAMInterface): + # size of device + device_size = '128MB' + + # 2x32 configuration, 1 device with a 32-bit interface + device_bus_width = 32 + + # GDDR5 is a BL8 device + burst_length = 8 + + # Each device has a page (row buffer) size of 2Kbits (256Bytes) + device_rowbuffer_size = '256B' + + # 2x32 configuration, so 2 devices + devices_per_rank = 2 + + # assume single rank + ranks_per_channel = 1 + + # GDDR5 has 4 bank groups + bank_groups_per_rank = 4 + + # GDDR5 has 16 banks with 4 bank groups + banks_per_rank = 16 + + # 1000 MHz + tCK = '1ns' + + # 8 beats across an x64 interface translates to 2 clocks @ 1000 MHz + # Data bus runs @2000 Mhz => DDR ( data runs at 4000 MHz ) + # 8 beats at 4000 MHz = 2 beats at 1000 MHz + # tBURST is equivalent to the CAS-to-CAS delay (tCCD) + # With bank group architectures, tBURST represents the CAS-to-CAS + # delay for bursts to different bank groups (tCCD_S) + tBURST = '2ns' + + # @1000MHz data rate, tCCD_L is 3 CK + # CAS-to-CAS delay for bursts to the same bank group + # tBURST is equivalent to tCCD_S; no explicit parameter required + # for CAS-to-CAS delay for bursts to different bank groups + tCCD_L = '3ns'; + + tRCD = '12ns' + + # tCL is not directly found in datasheet and assumed equal tRCD + tCL = '12ns' + + tRP = '12ns' + tRAS = '28ns' + + # RRD_S (different bank group) + # RRD_S is 5.5 ns in datasheet. + # rounded to the next multiple of tCK + tRRD = '6ns' + + # RRD_L (same bank group) + # RRD_L is 5.5 ns in datasheet. + # rounded to the next multiple of tCK + tRRD_L = '6ns' + + tXAW = '23ns' + + # tXAW < 4 x tRRD. + # Therefore, activation limit is set to 0 + activation_limit = 0 + + tRFC = '65ns' + tWR = '12ns' + + # Here using the average of WTR_S and WTR_L + tWTR = '5ns' + + # Read-to-Precharge 2 CK + tRTP = '2ns' + + # Assume 2 cycles + tRTW = '2ns' + +# A single HBM x128 interface (one command and address bus), with +# default timings based on data publically released +# ("HBM: Memory Solution for High Performance Processors", MemCon, 2014), +# IDD measurement values, and by extrapolating data from other classes. +# Architecture values based on published HBM spec +# A 4H stack is defined, 2Gb per die for a total of 1GB of memory. +class HBM_1000_4H_1x128(DRAMInterface): + # HBM gen1 supports up to 8 128-bit physical channels + # Configuration defines a single channel, with the capacity + # set to (full_ stack_capacity / 8) based on 2Gb dies + # To use all 8 channels, set 'channels' parameter to 8 in + # system configuration + + # 128-bit interface legacy mode + device_bus_width = 128 + + # HBM supports BL4 and BL2 (legacy mode only) + burst_length = 4 + + # size of channel in bytes, 4H stack of 2Gb dies is 1GB per stack; + # with 8 channels, 128MB per channel + device_size = '128MB' + + device_rowbuffer_size = '2kB' + + # 1x128 configuration + devices_per_rank = 1 + + # HBM does not have a CS pin; set rank to 1 + ranks_per_channel = 1 + + # HBM has 8 or 16 banks depending on capacity + # 2Gb dies have 8 banks + banks_per_rank = 8 + + # depending on frequency, bank groups may be required + # will always have 4 bank groups when enabled + # current specifications do not define the minimum frequency for + # bank group architecture + # setting bank_groups_per_rank to 0 to disable until range is defined + bank_groups_per_rank = 0 + + # 500 MHz for 1Gbps DDR data rate + tCK = '2ns' + + # use values from IDD measurement in JEDEC spec + # use tRP value for tRCD and tCL similar to other classes + tRP = '15ns' + tRCD = '15ns' + tCL = '15ns' + tRAS = '33ns' + + # BL2 and BL4 supported, default to BL4 + # DDR @ 500 MHz means 4 * 2ns / 2 = 4ns + tBURST = '4ns' + + # value for 2Gb device from JEDEC spec + tRFC = '160ns' + + # value for 2Gb device from JEDEC spec + tREFI = '3.9us' + + # extrapolate the following from LPDDR configs, using ns values + # to minimize burst length, prefetch differences + tWR = '18ns' + tRTP = '7.5ns' + tWTR = '10ns' + + # start with 2 cycles turnaround, similar to other memory classes + # could be more with variations across the stack + tRTW = '4ns' + + # single rank device, set to 0 + tCS = '0ns' + + # from MemCon example, tRRD is 4ns with 2ns tCK + tRRD = '4ns' + + # from MemCon example, tFAW is 30ns with 2ns tCK + tXAW = '30ns' + activation_limit = 4 + + # 4tCK + tXP = '8ns' + + # start with tRFC + tXP -> 160ns + 8ns = 168ns + tXS = '168ns' + +# A single HBM x64 interface (one command and address bus), with +# default timings based on HBM gen1 and data publically released +# A 4H stack is defined, 8Gb per die for a total of 4GB of memory. +# Note: This defines a pseudo-channel with a unique controller +# instantiated per pseudo-channel +# Stay at same IO rate (1Gbps) to maintain timing relationship with +# HBM gen1 class (HBM_1000_4H_x128) where possible +class HBM_1000_4H_1x64(HBM_1000_4H_1x128): + # For HBM gen2 with pseudo-channel mode, configure 2X channels. + # Configuration defines a single pseudo channel, with the capacity + # set to (full_ stack_capacity / 16) based on 8Gb dies + # To use all 16 pseudo channels, set 'channels' parameter to 16 in + # system configuration + + # 64-bit pseudo-channle interface + device_bus_width = 64 + + # HBM pseudo-channel only supports BL4 + burst_length = 4 + + # size of channel in bytes, 4H stack of 8Gb dies is 4GB per stack; + # with 16 channels, 256MB per channel + device_size = '256MB' + + # page size is halved with pseudo-channel; maintaining the same same number + # of rows per pseudo-channel with 2X banks across 2 channels + device_rowbuffer_size = '1kB' + + # HBM has 8 or 16 banks depending on capacity + # Starting with 4Gb dies, 16 banks are defined + banks_per_rank = 16 + + # reset tRFC for larger, 8Gb device + # use HBM1 4Gb value as a starting point + tRFC = '260ns' + + # start with tRFC + tXP -> 160ns + 8ns = 168ns + tXS = '268ns' + # Default different rank bus delay to 2 CK, @1000 MHz = 2 ns + tCS = '2ns' + tREFI = '3.9us' + + # active powerdown and precharge powerdown exit time + tXP = '10ns' + + # self refresh exit time + tXS = '65ns' + +# A single LPDDR5 x16 interface (one command/address bus) +# for a single x16 channel with default timings based on +# initial JEDEC specification +# Starting with 5.5Gbps data rates and 8Gbit die +# Configuring for 16-bank mode with bank-group architecture +# burst of 32, which means bursts can be interleaved +class LPDDR5_5500_1x16_BG_BL32(DRAMInterface): + + # Increase buffer size to account for more bank resources + read_buffer_size = 64 + + # Set page policy to better suit DMC Huxley + page_policy = 'close_adaptive' + + # 16-bit channel interface + device_bus_width = 16 + + # LPDDR5 is a BL16 or BL32 device + # With BG mode, BL16 and BL32 are supported + # Use BL32 for higher command bandwidth + burst_length = 32 + + # size of device in bytes + device_size = '1GB' + + # 2kB page with BG mode + device_rowbuffer_size = '2kB' + + # Use a 1x16 configuration + devices_per_rank = 1 + + # Use a single rank + ranks_per_channel = 1 + + # LPDDR5 supports configurable bank options + # 8B : BL32, all frequencies + # 16B : BL32 or BL16, <=3.2Gbps + # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps + # Initial configuration will have 16 banks with Bank Group Arch + # to maximim resources and enable higher data rates + banks_per_rank = 16 + bank_groups_per_rank = 4 + + # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK + tCK = '1.455ns' + + # Greater of 2 CK or 18ns + tRCD = '18ns' + + # Base RL is 16 CK @ 687.5 MHz = 23.28ns + tCL = '23.280ns' + + # Greater of 2 CK or 18ns + tRP = '18ns' + + # Greater of 3 CK or 42ns + tRAS = '42ns' + + # Greater of 3 CK or 34ns + tWR = '34ns' + + # active powerdown and precharge powerdown exit time + # Greater of 3 CK or 7ns + tXP = '7ns' + + # self refresh exit time (tRFCab + 7.5ns) + tXS = '217.5ns' + + # Greater of 2 CK or 7.5 ns minus 2 CK + tRTP = '4.59ns' + + # With BG architecture, burst of 32 transferred in two 16-beat + # sub-bursts, with a 16-beat gap in between. + # Each 16-beat sub-burst is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz + # tBURST is the delay to transfer the Bstof32 = 6 CK @ 687.5 MHz + tBURST = '8.73ns' + # can interleave a Bstof32 from another bank group at tBURST_MIN + # 16-beats is 8 WCK @2.75 GHz or 2 CK @ 687.5 MHz + tBURST_MIN = '2.91ns' + # tBURST_MAX is the maximum burst delay for same bank group timing + # this is 8 CK @ 687.5 MHz + tBURST_MAX = '11.64ns' + + # 8 CK @ 687.5 MHz + tCCD_L = "11.64ns" + + # LPDDR5, 8 Gbit/channel for 280ns tRFCab + tRFC = '210ns' + tREFI = '3.9us' + + # Greater of 4 CK or 6.25 ns + tWTR = '6.25ns' + # Greater of 4 CK or 12 ns + tWTR_L = '12ns' + + # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL + # tWCKDQ0/tCK will be 1 CK for most cases + # For gem5 RL = WL and BL/n is already accounted for with tBURST + # Result is and additional 1 CK is required + tRTW = '1.455ns' + + # Default different rank bus delay to 2 CK, @687.5 MHz = 2.91 ns + tCS = '2.91ns' + + # 2 CK + tPPD = '2.91ns' + + # Greater of 2 CK or 5 ns + tRRD = '5ns' + tRRD_L = '5ns' + + # With Bank Group Arch mode tFAW is 20 ns + tXAW = '20ns' + activation_limit = 4 + + # at 5Gbps, 4:1 WCK to CK ratio required + # 2 data beats per WCK (DDR) -> 8 per CK + beats_per_clock = 8 + + # 2 cycles required to send activate command + # 2 command phases can be sent back-to-back or + # with a gap up to tAAD = 8 CK + two_cycle_activate = True + tAAD = '11.640ns' + + data_clock_sync = True + +# A single LPDDR5 x16 interface (one command/address bus) +# for a single x16 channel with default timings based on +# initial JEDEC specification +# Starting with 5.5Gbps data rates and 8Gbit die +# Configuring for 16-bank mode with bank-group architecture, burst of 16 +class LPDDR5_5500_1x16_BG_BL16(LPDDR5_5500_1x16_BG_BL32): + + # LPDDR5 is a BL16 or BL32 device + # With BG mode, BL16 and BL32 are supported + # Use BL16 for smaller access granularity + burst_length = 16 + + # For Bstof16 with BG arch, 2 CK @ 687.5 MHz with 4:1 clock ratio + tBURST = '2.91ns' + tBURST_MIN = '2.91ns' + # For Bstof16 with BG arch, 4 CK @ 687.5 MHz with 4:1 clock ratio + tBURST_MAX = '5.82ns' + + # 4 CK @ 687.5 MHz + tCCD_L = "5.82ns" + + +# A single LPDDR5 x16 interface (one command/address bus) +# for a single x16 channel with default timings based on +# initial JEDEC specification +# Starting with 5.5Gbps data rates and 8Gbit die +# Configuring for 8-bank mode, burst of 32 +class LPDDR5_5500_1x16_8B_BL32(LPDDR5_5500_1x16_BG_BL32): + + # 4kB page with 8B mode + device_rowbuffer_size = '4kB' + + # LPDDR5 supports configurable bank options + # 8B : BL32, all frequencies + # 16B : BL32 or BL16, <=3.2Gbps + # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps + # Select 8B + banks_per_rank = 8 + bank_groups_per_rank = 0 + + # For Bstof32 with 8B mode, 4 CK @ 687.5 MHz with 4:1 clock ratio + tBURST = '5.82ns' + tBURST_MIN = '5.82ns' + tBURST_MAX = '5.82ns' + + # Greater of 4 CK or 12 ns + tWTR = '12ns' + + # Greater of 2 CK or 10 ns + tRRD = '10ns' + + # With 8B mode tFAW is 40 ns + tXAW = '40ns' + activation_limit = 4 + + # Reset BG arch timing for 8B mode + tCCD_L = "0ns" + tRRD_L = "0ns" + tWTR_L = "0ns" + +# A single LPDDR5 x16 interface (one command/address bus) +# for a single x16 channel with default timings based on +# initial JEDEC specification +# 6.4Gbps data rates and 8Gbit die +# Configuring for 16-bank mode with bank-group architecture +# burst of 32, which means bursts can be interleaved +class LPDDR5_6400_1x16_BG_BL32(LPDDR5_5500_1x16_BG_BL32): + + # 5.5Gb/s DDR with 4:1 WCK:CK ratio for 687.5 MHz CK + tCK = '1.25ns' + + # Base RL is 17 CK @ 800 MHz = 21.25ns + tCL = '21.25ns' + + # With BG architecture, burst of 32 transferred in two 16-beat + # sub-bursts, with a 16-beat gap in between. + # Each 16-beat sub-burst is 8 WCK @3.2 GHz or 2 CK @ 800 MHz + # tBURST is the delay to transfer the Bstof32 = 6 CK @ 800 MHz + tBURST = '7.5ns' + # can interleave a Bstof32 from another bank group at tBURST_MIN + # 16-beats is 8 WCK @2.3 GHz or 2 CK @ 800 MHz + tBURST_MIN = '2.5ns' + # tBURST_MAX is the maximum burst delay for same bank group timing + # this is 8 CK @ 800 MHz + tBURST_MAX = '10ns' + + # 8 CK @ 800 MHz + tCCD_L = "10ns" + + # Required RD-to-WR timing is RL+ BL/n + tWCKDQ0/tCK - WL + # tWCKDQ0/tCK will be 1 CK for most cases + # For gem5 RL = WL and BL/n is already accounted for with tBURST + # Result is and additional 1 CK is required + tRTW = '1.25ns' + + # Default different rank bus delay to 2 CK, @687.5 MHz = 2.5 ns + tCS = '2.5ns' + + # 2 CK + tPPD = '2.5ns' + + # 2 command phases can be sent back-to-back or + # with a gap up to tAAD = 8 CK + tAAD = '10ns' + +# A single LPDDR5 x16 interface (one command/address bus) +# for a single x16 channel with default timings based on initial +# JEDEC specifcation +# 6.4Gbps data rates and 8Gbit die +# Configuring for 16-bank mode with bank-group architecture, burst of 16 +class LPDDR5_6400_1x16_BG_BL16(LPDDR5_6400_1x16_BG_BL32): + + # LPDDR5 is a BL16 or BL32 device + # With BG mode, BL16 and BL32 are supported + # Use BL16 for smaller access granularity + burst_length = 16 + + # For Bstof16 with BG arch, 2 CK @ 800 MHz with 4:1 clock ratio + tBURST = '2.5ns' + tBURST_MIN = '2.5ns' + # For Bstof16 with BG arch, 4 CK @ 800 MHz with 4:1 clock ratio + tBURST_MAX = '5ns' + + # 4 CK @ 800 MHz + tCCD_L = "5ns" + + +# A single LPDDR5 x16 interface (one command/address bus) +# for a single x16 channel with default timings based on +# initial JEDEC specification +# 6.4Gbps data rates and 8Gbit die +# Configuring for 8-bank mode, burst of 32 +class LPDDR5_6400_1x16_8B_BL32(LPDDR5_6400_1x16_BG_BL32): + + # 4kB page with 8B mode + device_rowbuffer_size = '4kB' + + # LPDDR5 supports configurable bank options + # 8B : BL32, all frequencies + # 16B : BL32 or BL16, <=3.2Gbps + # 16B with Bank Group Arch (4B/BG): BL32 or BL16, >3.2Gbps + # Select 8B + banks_per_rank = 8 + bank_groups_per_rank = 0 + + # For Bstof32 with 8B mode, 4 CK @ 800 MHz with 4:1 clock ratio + tBURST = '5ns' + tBURST_MIN = '5ns' + tBURST_MAX = '5ns' + + # Greater of 4 CK or 12 ns + tWTR = '12ns' + + # Greater of 2 CK or 10 ns + tRRD = '10ns' + + # With 8B mode tFAW is 40 ns + tXAW = '40ns' + activation_limit = 4 + + # Reset BG arch timing for 8B mode + tCCD_L = "0ns" + tRRD_L = "0ns" + tWTR_L = "0ns" diff --git a/src/mem/SConscript b/src/mem/SConscript index 2fe179d37..ceeed980a 100644 --- a/src/mem/SConscript +++ b/src/mem/SConscript @@ -1,6 +1,6 @@ # -*- mode:python -*- # -# Copyright (c) 2018-2019 ARM Limited +# Copyright (c) 2018-2020 ARM Limited # All rights reserved # # The license below extends only to copyright in the software and shall @@ -47,6 +47,7 @@ SimObject('AbstractMemory.py') SimObject('AddrMapper.py') SimObject('Bridge.py') SimObject('DRAMCtrl.py') +SimObject('DRAMInterface.py') SimObject('ExternalMaster.py') SimObject('ExternalSlave.py') SimObject('MemObject.py') diff --git a/src/mem/dram_ctrl.cc b/src/mem/dram_ctrl.cc index b6465818d..4055505b3 100644 --- a/src/mem/dram_ctrl.cc +++ b/src/mem/dram_ctrl.cc @@ -47,6 +47,7 @@ #include "debug/DRAMState.hh" #include "debug/Drain.hh" #include "debug/QOS.hh" +#include "params/DRAMInterface.hh" #include "sim/system.hh" using namespace std; @@ -58,12 +59,13 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : retryRdReq(false), retryWrReq(false), nextReqEvent([this]{ processNextReqEvent(); }, name()), respondEvent([this]{ processRespondEvent(); }, name()), - readBufferSize(p->read_buffer_size), - writeBufferSize(p->write_buffer_size), + dram(p->dram), + readBufferSize(dram->readBufferSize), + writeBufferSize(dram->writeBufferSize), writeHighThreshold(writeBufferSize * p->write_high_thresh_perc / 100.0), writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0), minWritesPerSwitch(p->min_writes_per_switch), - writesThisTime(0), readsThisTime(0), tCS(p->tCS), + writesThisTime(0), readsThisTime(0), memSchedPolicy(p->mem_sched_policy), frontendLatency(p->static_frontend_latency), backendLatency(p->static_backend_latency), @@ -74,37 +76,23 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : readQueue.resize(p->qos_priorities); writeQueue.resize(p->qos_priorities); + dram->setCtrl(this); + // perform a basic check of the write thresholds if (p->write_low_thresh_perc >= p->write_high_thresh_perc) fatal("Write buffer low threshold %d must be smaller than the " "high threshold %d\n", p->write_low_thresh_perc, p->write_high_thresh_perc); - - // determine the rows per bank by looking at the total capacity - uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size()); - - DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity, - AbstractMemory::size()); - - // create a DRAM interface - // will only populate the ranks if DRAM is configured - dram = new DRAMInterface(*this, p, capacity, range); - DPRINTF(DRAM, "Created DRAM interface \n"); } void DRAMCtrl::init() { - MemCtrl::init(); - if (!port.isConnected()) { fatal("DRAMCtrl %s is unconnected!\n", name()); } else { port.sendRangeChange(); } - - dram->init(range); - } void @@ -114,8 +102,6 @@ DRAMCtrl::startup() isTimingMode = system()->isTimingMode(); if (isTimingMode) { - dram->startupRanks(); - // shift the bus busy time sufficiently far ahead that we never // have to worry about negative values when computing the time for // the next request, this will add an insignificant bubble at the @@ -133,7 +119,7 @@ DRAMCtrl::recvAtomic(PacketPtr pkt) "is responding"); // do the actual memory access and turn the packet into a response - access(pkt); + dram->access(pkt); Tick latency = 0; if (pkt->hasData()) { @@ -263,7 +249,7 @@ DRAMCtrl::addToReadQueue(PacketPtr pkt, unsigned int pktCount) // address of first DRAM packet is kept unaliged. Subsequent DRAM packets // are aligned to burst size boundaries. This is to ensure we accurately // check read packets against packets in write queue. - const Addr base_addr = getCtrlAddr(pkt->getAddr()); + const Addr base_addr = dram->getCtrlAddr(pkt->getAddr()); Addr addr = base_addr; unsigned pktsServicedByWrQ = 0; BurstHelper* burst_helper = NULL; @@ -363,7 +349,7 @@ DRAMCtrl::addToWriteQueue(PacketPtr pkt, unsigned int pktCount) // if the request size is larger than burst size, the pkt is split into // multiple DRAM packets - const Addr base_addr = getCtrlAddr(pkt->getAddr()); + const Addr base_addr = dram->getCtrlAddr(pkt->getAddr()); Addr addr = base_addr; uint32_t burstSize = dram->bytesPerBurst(); for (int cnt = 0; cnt < pktCount; ++cnt) { @@ -526,7 +512,7 @@ DRAMCtrl::processRespondEvent() DRAMPacket* dram_pkt = respQueue.front(); // media specific checks and functions when read response is complete - dram->respondEventDRAM(dram_pkt->rank); + dram->respondEvent(dram_pkt->rank); if (dram_pkt->burstHelper) { // it is a split packet @@ -727,12 +713,12 @@ DRAMCtrl::chooseNextFRFCFS(DRAMPacketQueue& queue, Tick extra_col_delay) void DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency) { - DPRINTF(DRAM, "Responding to Address %lld.. ",pkt->getAddr()); + DPRINTF(DRAM, "Responding to Address %lld.. \n",pkt->getAddr()); bool needsResponse = pkt->needsResponse(); // do the actual memory access which also turns the packet into a // response - access(pkt); + dram->access(pkt); // turn packet around to go back to requester if response expected if (needsResponse) { @@ -877,9 +863,9 @@ DRAMInterface::activateBank(Rank& rank_ref, Bank& bank_ref, // if not, shift to next burst window Tick act_at; if (twoCycleActivate) - act_at = ctrl.verifyMultiCmd(act_tick, tAAD); + act_at = ctrl->verifyMultiCmd(act_tick, tAAD); else - act_at = ctrl.verifySingleCmd(act_tick); + act_at = ctrl->verifySingleCmd(act_tick); DPRINTF(DRAM, "Activate at tick %d\n", act_at); @@ -997,7 +983,7 @@ DRAMInterface::prechargeBank(Rank& rank_ref, Bank& bank, Tick pre_tick, // Issuing an explicit PRE command // Verify that we have command bandwidth to issue the precharge // if not, shift to next burst window - pre_at = ctrl.verifySingleCmd(pre_tick); + pre_at = ctrl->verifySingleCmd(pre_tick); // enforce tPPD for (int i = 0; i < banksPerRank; i++) { rank_ref.banks[i].preAllowedAt = std::max(pre_at + tPPD, @@ -1096,9 +1082,9 @@ DRAMInterface::doBurstAccess(DRAMPacket* dram_pkt, Tick next_burst_at, // verify that we have command bandwidth to issue the burst // if not, shift to next burst window if (dataClockSync && ((cmd_at - rank_ref.lastBurstTick) > clkResyncDelay)) - cmd_at = ctrl.verifyMultiCmd(cmd_at, tCK); + cmd_at = ctrl->verifyMultiCmd(cmd_at, tCK); else - cmd_at = ctrl.verifySingleCmd(cmd_at); + cmd_at = ctrl->verifySingleCmd(cmd_at); // if we are interleaving bursts, ensure that // 1) we don't double interleave on next burst issue @@ -1196,7 +1182,7 @@ DRAMInterface::doBurstAccess(DRAMPacket* dram_pkt, Tick next_burst_at, bool got_more_hits = false; bool got_bank_conflict = false; - for (uint8_t i = 0; i < ctrl.numPriorities(); ++i) { + for (uint8_t i = 0; i < ctrl->numPriorities(); ++i) { auto p = queue[i].begin(); // keep on looking until we find a hit or reach the end of the // queue @@ -1267,6 +1253,7 @@ DRAMInterface::doBurstAccess(DRAMPacket* dram_pkt, Tick next_burst_at, // Update latency stats stats.totMemAccLat += dram_pkt->readyTime - dram_pkt->entryTime; stats.totQLat += cmd_at - dram_pkt->entryTime; + stats.totBusLat += tBURST; } else { // Schedule write done event to decrement event count // after the readyTime has been reached @@ -1350,13 +1337,9 @@ DRAMCtrl::doBurstAccess(DRAMPacket* dram_pkt) // Update latency stats stats.masterReadTotalLat[dram_pkt->masterId()] += dram_pkt->readyTime - dram_pkt->entryTime; - - stats.bytesRead += dram->bytesPerBurst(); - stats.totBusLat += dram->burstDelay(); stats.masterReadBytes[dram_pkt->masterId()] += dram_pkt->size; } else { ++writesThisTime; - stats.bytesWritten += dram->bytesPerBurst(); stats.masterWriteBytes[dram_pkt->masterId()] += dram_pkt->size; stats.masterWriteTotalLat[dram_pkt->masterId()] += dram_pkt->readyTime - dram_pkt->entryTime; @@ -1458,8 +1441,9 @@ DRAMCtrl::processNextReqEvent() // Figure out which read request goes next // If we are changing command type, incorporate the minimum - // bus turnaround delay which will be tCS (different rank) case - to_read = chooseNext((*queue), switched_cmd_type ? tCS : 0); + // bus turnaround delay which will be rank to rank delay + to_read = chooseNext((*queue), switched_cmd_type ? + dram->rankDelay() : 0); if (to_read != queue->end()) { // candidate read found @@ -1538,7 +1522,8 @@ DRAMCtrl::processNextReqEvent() // If we are changing command type, incorporate the minimum // bus turnaround delay to_write = chooseNext((*queue), - switched_cmd_type ? std::min(dram->minRdToWr(), tCS) : 0); + switched_cmd_type ? std::min(dram->minRdToWr(), + dram->rankDelay()) : 0); if (to_write != queue->end()) { write_found = true; @@ -1611,11 +1596,8 @@ DRAMCtrl::processNextReqEvent() } } -DRAMInterface::DRAMInterface(DRAMCtrl& _ctrl, - const DRAMCtrlParams* _p, - const uint64_t capacity, - const AddrRange range) - : SimObject(_p), ctrl(_ctrl), +DRAMInterface::DRAMInterface(const DRAMInterfaceParams* _p) + : AbstractMemory(_p), addrMapping(_p->addr_mapping), burstSize((_p->devices_per_rank * _p->burst_length * _p->device_bus_width) / 8), @@ -1630,7 +1612,7 @@ DRAMInterface::DRAMInterface(DRAMCtrl& _ctrl, bankGroupsPerRank(_p->bank_groups_per_rank), bankGroupArch(_p->bank_groups_per_rank > 0), banksPerRank(_p->banks_per_rank), rowsPerBank(0), - tCK(_p->tCK), tCL(_p->tCL), tBURST(_p->tBURST), + tCK(_p->tCK), tCS(_p->tCS), tCL(_p->tCL), tBURST(_p->tBURST), tBURST_MIN(_p->tBURST_MIN), tBURST_MAX(_p->tBURST_MAX), tRTW(_p->tRTW), tCCD_L_WR(_p->tCCD_L_WR), tCCD_L(_p->tCCD_L), tRCD(_p->tRCD), tRP(_p->tRP), tRAS(_p->tRAS), tWR(_p->tWR), tRTP(_p->tRTP), @@ -1646,13 +1628,15 @@ DRAMInterface::DRAMInterface(DRAMCtrl& _ctrl, wrToRdDly(tCL + tBURST + _p->tWTR), rdToWrDly(tBURST + tRTW), wrToRdDlySameBG(tCL + _p->tBURST_MAX + _p->tWTR_L), rdToWrDlySameBG(tRTW + _p->tBURST_MAX), - rankToRankDly(ctrl.rankDelay() + tBURST), + rankToRankDly(tCS + tBURST), pageMgmt(_p->page_policy), maxAccessesPerRow(_p->max_accesses_per_row), timeStampOffset(0), activeRank(0), enableDRAMPowerdown(_p->enable_dram_powerdown), lastStatsResetTick(0), - stats(_ctrl, *this) + stats(*this), + readBufferSize(_p->read_buffer_size), + writeBufferSize(_p->write_buffer_size) { fatal_if(!isPowerOf2(burstSize), "DRAM burst size %d is not allowed, " "must be a power of two\n", burstSize); @@ -1664,7 +1648,7 @@ DRAMInterface::DRAMInterface(DRAMCtrl& _ctrl, for (int i = 0; i < ranksPerChannel; i++) { DPRINTF(DRAM, "Creating DRAM rank %d \n", i); - Rank* rank = new Rank(ctrl, _p, i, *this); + Rank* rank = new Rank(_p, i, *this); ranks.push_back(rank); } @@ -1672,6 +1656,11 @@ DRAMInterface::DRAMInterface(DRAMCtrl& _ctrl, uint64_t deviceCapacity = deviceSize / (1024 * 1024) * devicesPerRank * ranksPerChannel; + uint64_t capacity = ULL(1) << ceilLog2(AbstractMemory::size()); + + DPRINTF(DRAM, "Memory capacity %lld (%lld) bytes\n", capacity, + AbstractMemory::size()); + // if actual DRAM size does not match memory capacity in system warn! if (deviceCapacity != capacity / (1024 * 1024)) warn("DRAM device capacity (%d Mbytes) does not match the " @@ -1726,8 +1715,10 @@ DRAMInterface::DRAMInterface(DRAMCtrl& _ctrl, } void -DRAMInterface::init(AddrRange range) +DRAMInterface::init() { + AbstractMemory::init(); + // a bit of sanity checks on the interleaving, save it for here to // ensure that the system pointer is initialised if (range.interleaved()) { @@ -1749,7 +1740,7 @@ DRAMInterface::init(AddrRange range) // channel striping has to be done at a granularity that // is equal or larger to a cache line - if (ctrl.system()->cacheLineSize() > range.granularity()) { + if (system()->cacheLineSize() > range.granularity()) { fatal("Channel interleaving of %s must be at least as large " "as the cache line size\n", name()); } @@ -1766,10 +1757,12 @@ DRAMInterface::init(AddrRange range) } void -DRAMInterface::startupRanks() +DRAMInterface::startup() { - // timestamp offset should be in clock cycles for DRAMPower - timeStampOffset = divCeil(curTick(), tCK); + if (system()->isTimingMode()) { + // timestamp offset should be in clock cycles for DRAMPower + timeStampOffset = divCeil(curTick(), tCK); + } for (auto r : ranks) { r->startup(curTick() + tREFI - tRP); @@ -1815,7 +1808,7 @@ DRAMInterface::isBusy() } void -DRAMInterface::respondEventDRAM(uint8_t rank) +DRAMInterface::respondEvent(uint8_t rank) { Rank& rank_ref = *ranks[rank]; @@ -1956,7 +1949,7 @@ DRAMInterface::minBankPrep(const DRAMPacketQueue& queue, std::max(ranks[i]->banks[j].preAllowedAt, curTick()) + tRP; // When is the earliest the R/W burst can issue? - const Tick col_allowed_at = ctrl.inReadBusState(false) ? + const Tick col_allowed_at = ctrl->inReadBusState(false) ? ranks[i]->banks[j].rdAllowedAt : ranks[i]->banks[j].wrAllowedAt; Tick col_at = std::max(col_allowed_at, act_at + tRCD); @@ -1996,9 +1989,15 @@ DRAMInterface::minBankPrep(const DRAMPacketQueue& queue, return make_pair(bank_mask, hidden_bank_prep); } -DRAMInterface::Rank::Rank(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p, int _rank, - DRAMInterface& _dram) - : EventManager(&_ctrl), ctrl(_ctrl), dram(_dram), +DRAMInterface* +DRAMInterfaceParams::create() +{ + return new DRAMInterface(this); +} + +DRAMInterface::Rank::Rank(const DRAMInterfaceParams* _p, + int _rank, DRAMInterface& _dram) + : EventManager(&_dram), dram(_dram), pwrStateTrans(PWR_IDLE), pwrStatePostRefresh(PWR_IDLE), pwrStateTick(0), refreshDueAt(0), pwrState(PWR_IDLE), refreshState(REF_IDLE), inLowPowerState(false), rank(_rank), @@ -2011,7 +2010,7 @@ DRAMInterface::Rank::Rank(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p, int _rank, refreshEvent([this]{ processRefreshEvent(); }, name()), powerEvent([this]{ processPowerEvent(); }, name()), wakeUpEvent([this]{ processWakeUpEvent(); }, name()), - stats(_ctrl, *this) + stats(_dram, *this) { for (int b = 0; b < _p->banks_per_rank; b++) { banks[b].bank = b; @@ -2062,8 +2061,10 @@ bool DRAMInterface::Rank::isQueueEmpty() const { // check commmands in Q based on current bus direction - bool no_queued_cmds = (ctrl.inReadBusState(true) && (readEntries == 0)) - || (ctrl.inWriteBusState(true) && (writeEntries == 0)); + bool no_queued_cmds = (dram.ctrl->inReadBusState(true) && + (readEntries == 0)) + || (dram.ctrl->inWriteBusState(true) && + (writeEntries == 0)); return no_queued_cmds; } @@ -2187,7 +2188,7 @@ DRAMInterface::Rank::processRefreshEvent() // if a request is at the moment being handled and this request is // accessing the current rank then wait for it to finish if ((rank == dram.activeRank) - && (ctrl.requestEventScheduled())) { + && (dram.ctrl->requestEventScheduled())) { // hand control over to the request loop until it is // evaluated next DPRINTF(DRAM, "Refresh awaiting draining\n"); @@ -2262,7 +2263,7 @@ DRAMInterface::Rank::processRefreshEvent() // or have outstanding ACT,RD/WR,Auto-PRE sequence scheduled // should have outstanding precharge or read response event assert(prechargeEvent.scheduled() || - ctrl.respondEventScheduled()); + dram.ctrl->respondEventScheduled()); // will start refresh when pwrState transitions to IDLE } @@ -2322,8 +2323,8 @@ DRAMInterface::Rank::processRefreshEvent() assert(!powerEvent.scheduled()); - if ((ctrl.drainState() == DrainState::Draining) || - (ctrl.drainState() == DrainState::Drained)) { + if ((dram.ctrl->drainState() == DrainState::Draining) || + (dram.ctrl->drainState() == DrainState::Drained)) { // if draining, do not re-enter low-power mode. // simply go to IDLE and wait schedulePowerEvent(PWR_IDLE, curTick()); @@ -2548,10 +2549,10 @@ DRAMInterface::Rank::processPowerEvent() } // completed refresh event, ensure next request is scheduled - if (!ctrl.requestEventScheduled()) { + if (!dram.ctrl->requestEventScheduled()) { DPRINTF(DRAM, "Scheduling next request after refreshing" " rank %d\n", rank); - ctrl.restartScheduler(curTick()); + dram.ctrl->restartScheduler(curTick()); } } @@ -2610,8 +2611,8 @@ DRAMInterface::Rank::processPowerEvent() // bypass auto-refresh and go straight to SREF, where memory // will issue refresh immediately upon entry if (pwrStatePostRefresh == PWR_PRE_PDN && isQueueEmpty() && - (ctrl.drainState() != DrainState::Draining) && - (ctrl.drainState() != DrainState::Drained) && + (dram.ctrl->drainState() != DrainState::Draining) && + (dram.ctrl->drainState() != DrainState::Drained) && dram.enableDRAMPowerdown) { DPRINTF(DRAMState, "Rank %d bypassing refresh and transitioning " "to self refresh at %11u tick\n", rank, curTick()); @@ -2712,7 +2713,7 @@ DRAMInterface::Rank::resetStats() { bool DRAMInterface::Rank::forceSelfRefreshExit() const { return (readEntries != 0) || - (ctrl.inWriteBusState(true) && (writeEntries != 0)); + (dram.ctrl->inWriteBusState(true) && (writeEntries != 0)); } DRAMCtrl::CtrlStats::CtrlStats(DRAMCtrl &_ctrl) @@ -2723,15 +2724,15 @@ DRAMCtrl::CtrlStats::CtrlStats(DRAMCtrl &_ctrl) ADD_STAT(writeReqs, "Number of write requests accepted"), ADD_STAT(readBursts, - "Number of DRAM read bursts, " + "Number of controller read bursts, " "including those serviced by the write queue"), ADD_STAT(writeBursts, - "Number of DRAM write bursts, " + "Number of controller write bursts, " "including those merged in the write queue"), ADD_STAT(servicedByWrQ, - "Number of DRAM read bursts serviced by the write queue"), + "Number of controller read bursts serviced by the write queue"), ADD_STAT(mergedWrBursts, - "Number of DRAM write bursts merged with an existing one"), + "Number of controller write bursts merged with an existing one"), ADD_STAT(neitherReadNorWriteReqs, "Number of requests that are neither read nor write"), @@ -2739,9 +2740,6 @@ DRAMCtrl::CtrlStats::CtrlStats(DRAMCtrl &_ctrl) ADD_STAT(avgRdQLen, "Average read queue length when enqueuing"), ADD_STAT(avgWrQLen, "Average write queue length when enqueuing"), - ADD_STAT(totBusLat, "Total ticks spent in databus transfers"), - ADD_STAT(avgBusLat, "Average bus latency per DRAM burst"), - ADD_STAT(numRdRetry, "Number of times read queue was full causing retry"), ADD_STAT(numWrRetry, "Number of times write queue was full causing retry"), @@ -2756,22 +2754,13 @@ DRAMCtrl::CtrlStats::CtrlStats(DRAMCtrl &_ctrl) ADD_STAT(wrPerTurnAround, "Writes before turning the bus around for reads"), - ADD_STAT(bytesRead, "Total number of bytes read from memory"), ADD_STAT(bytesReadWrQ, "Total number of bytes read from write queue"), - ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"), ADD_STAT(bytesReadSys, "Total read bytes from the system interface side"), ADD_STAT(bytesWrittenSys, "Total written bytes from the system interface side"), - ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiByte/s"), - ADD_STAT(avgWrBW, "Average achieved write bandwidth in MiByte/s"), ADD_STAT(avgRdBWSys, "Average system read bandwidth in MiByte/s"), ADD_STAT(avgWrBWSys, "Average system write bandwidth in MiByte/s"), - ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"), - - ADD_STAT(busUtil, "Data bus utilization in percentage"), - ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"), - ADD_STAT(busUtilWrite, "Data bus utilization in percentage for writes"), ADD_STAT(totGap, "Total gap between requests"), ADD_STAT(avgGap, "Average gap between requests"), @@ -2803,12 +2792,11 @@ DRAMCtrl::CtrlStats::regStats() { using namespace Stats; - assert(ctrl._system); - const auto max_masters = ctrl._system->maxMasters(); + assert(ctrl.system()); + const auto max_masters = ctrl.system()->maxMasters(); avgRdQLen.precision(2); avgWrQLen.precision(2); - avgBusLat.precision(2); readPktSize.init(ceilLog2(ctrl.dram->bytesPerBurst()) + 1); writePktSize.init(ceilLog2(ctrl.dram->bytesPerBurst()) + 1); @@ -2823,14 +2811,9 @@ DRAMCtrl::CtrlStats::regStats() .init(ctrl.writeBufferSize) .flags(nozero); - avgRdBW.precision(2); - avgWrBW.precision(2); avgRdBWSys.precision(2); avgWrBWSys.precision(2); - peakBW.precision(2); - busUtil.precision(2); avgGap.precision(2); - busUtilWrite.precision(2); // per-master bytes read and written to memory masterReadBytes @@ -2862,9 +2845,6 @@ DRAMCtrl::CtrlStats::regStats() .flags(nonan) .precision(2); - busUtilRead - .precision(2); - masterWriteRate .flags(nozero | nonan) .precision(12); @@ -2878,7 +2858,7 @@ DRAMCtrl::CtrlStats::regStats() .precision(2); for (int i = 0; i < max_masters; i++) { - const std::string master = ctrl._system->getMasterName(i); + const std::string master = ctrl.system()->getMasterName(i); masterReadBytes.subname(i, master); masterReadRate.subname(i, master); masterWriteBytes.subname(i, master); @@ -2892,22 +2872,11 @@ DRAMCtrl::CtrlStats::regStats() } // Formula stats - avgBusLat = totBusLat / (readBursts - servicedByWrQ); - - avgRdBW = (bytesRead / 1000000) / simSeconds; - avgWrBW = (bytesWritten / 1000000) / simSeconds; avgRdBWSys = (bytesReadSys / 1000000) / simSeconds; avgWrBWSys = (bytesWrittenSys / 1000000) / simSeconds; - peakBW = (SimClock::Frequency / ctrl.dram->burstDataDelay()) * - ctrl.dram->bytesPerBurst() / 1000000; - - busUtil = (avgRdBW + avgWrBW) / peakBW * 100; avgGap = totGap / (readReqs + writeReqs); - busUtilRead = avgRdBW / peakBW * 100; - busUtilWrite = avgWrBW / peakBW * 100; - masterReadRate = masterReadBytes / simSeconds; masterWriteRate = masterWriteBytes / simSeconds; masterReadAvgLat = masterReadTotalLat / masterReadAccesses; @@ -2920,8 +2889,8 @@ DRAMInterface::DRAMStats::resetStats() dram.lastStatsResetTick = curTick(); } -DRAMInterface::DRAMStats::DRAMStats(DRAMCtrl &_ctrl, DRAMInterface &_dram) - : Stats::Group(&_ctrl, csprintf("dram").c_str()), +DRAMInterface::DRAMStats::DRAMStats(DRAMInterface &_dram) + : Stats::Group(&_dram), dram(_dram), ADD_STAT(readBursts, "Number of DRAM read bursts"), @@ -2931,10 +2900,13 @@ DRAMInterface::DRAMStats::DRAMStats(DRAMCtrl &_ctrl, DRAMInterface &_dram) ADD_STAT(perBankWrBursts, "Per bank write bursts"), ADD_STAT(totQLat, "Total ticks spent queuing"), + ADD_STAT(totBusLat, "Total ticks spent in databus transfers"), ADD_STAT(totMemAccLat, "Total ticks spent from burst creation until serviced " "by the DRAM"), + ADD_STAT(avgQLat, "Average queueing delay per DRAM burst"), + ADD_STAT(avgBusLat, "Average bus latency per DRAM burst"), ADD_STAT(avgMemAccLat, "Average memory access latency per DRAM burst"), ADD_STAT(readRowHits, "Number of row buffer hits during reads"), @@ -2947,6 +2919,12 @@ DRAMInterface::DRAMStats::DRAMStats(DRAMCtrl &_ctrl, DRAMInterface &_dram) ADD_STAT(bytesWritten, "Total number of bytes written to DRAM"), ADD_STAT(avgRdBW, "Average DRAM read bandwidth in MiBytes/s"), ADD_STAT(avgWrBW, "Average DRAM write bandwidth in MiBytes/s"), + ADD_STAT(peakBW, "Theoretical peak bandwidth in MiByte/s"), + + ADD_STAT(busUtil, "Data bus utilization in percentage"), + ADD_STAT(busUtilRead, "Data bus utilization in percentage for reads"), + ADD_STAT(busUtilWrite, "Data bus utilization in percentage for writes"), + ADD_STAT(pageHitRate, "Row buffer hit rate, read and write combined") { @@ -2958,6 +2936,7 @@ DRAMInterface::DRAMStats::regStats() using namespace Stats; avgQLat.precision(2); + avgBusLat.precision(2); avgMemAccLat.precision(2); readRowHitRate.precision(2); @@ -2971,10 +2950,16 @@ DRAMInterface::DRAMStats::regStats() dram.maxAccessesPerRow : dram.rowBufferSize) .flags(nozero); + peakBW.precision(2); + busUtil.precision(2); + busUtilWrite.precision(2); + busUtilRead.precision(2); + pageHitRate.precision(2); // Formula stats avgQLat = totQLat / readBursts; + avgBusLat = totBusLat / readBursts; avgMemAccLat = totMemAccLat / readBursts; readRowHitRate = (readRowHits / readBursts) * 100; @@ -2982,13 +2967,19 @@ DRAMInterface::DRAMStats::regStats() avgRdBW = (bytesRead / 1000000) / simSeconds; avgWrBW = (bytesWritten / 1000000) / simSeconds; + peakBW = (SimClock::Frequency / dram.burstDataDelay()) * + dram.bytesPerBurst() / 1000000; + + busUtil = (avgRdBW + avgWrBW) / peakBW * 100; + busUtilRead = avgRdBW / peakBW * 100; + busUtilWrite = avgWrBW / peakBW * 100; pageHitRate = (writeRowHits + readRowHits) / (writeBursts + readBursts) * 100; } -DRAMInterface::RankStats::RankStats(DRAMCtrl &_ctrl, Rank &_rank) - : Stats::Group(&_ctrl, csprintf("dram_rank%d", _rank.rank).c_str()), +DRAMInterface::RankStats::RankStats(DRAMInterface &_dram, Rank &_rank) + : Stats::Group(&_dram, csprintf("rank%d", _rank.rank).c_str()), rank(_rank), ADD_STAT(actEnergy, "Energy for activate commands per rank (pJ)"), @@ -3047,7 +3038,7 @@ void DRAMCtrl::recvFunctional(PacketPtr pkt) { // rely on the abstract memory - functionalAccess(pkt); + dram->functionalAccess(pkt); } Port & @@ -3093,6 +3084,7 @@ DRAMCtrl::drainResume() // if we switched to timing mode, kick things into action, // and behave as if we restored from a checkpoint startup(); + dram->startup(); } else if (isTimingMode && !system()->isTimingMode()) { // if we switch from timing mode, stop the refresh events to // not cause issues with KVM @@ -3112,7 +3104,7 @@ AddrRangeList DRAMCtrl::MemoryPort::getAddrRanges() const { AddrRangeList ranges; - ranges.push_back(ctrl.getAddrRange()); + ranges.push_back(ctrl.dram->getAddrRange()); return ranges; } diff --git a/src/mem/dram_ctrl.hh b/src/mem/dram_ctrl.hh index dc030b1ea..417e93554 100644 --- a/src/mem/dram_ctrl.hh +++ b/src/mem/dram_ctrl.hh @@ -55,12 +55,15 @@ #include "enums/AddrMap.hh" #include "enums/MemSched.hh" #include "enums/PageManage.hh" +#include "mem/abstract_mem.hh" #include "mem/drampower.hh" #include "mem/qos/mem_ctrl.hh" #include "mem/qport.hh" #include "params/DRAMCtrl.hh" #include "sim/eventq.hh" +class DRAMInterfaceParams; + /** * A basic class to track the bank state, i.e. what row is * currently open (if any), when is the bank free to accept a new @@ -242,7 +245,7 @@ typedef std::deque DRAMPacketQueue; * The DRAMInterface includes a class for individual ranks * and per rank functions. */ -class DRAMInterface : public SimObject +class DRAMInterface : public AbstractMemory { private: /** @@ -342,7 +345,7 @@ class DRAMInterface : public SimObject class Rank; struct RankStats : public Stats::Group { - RankStats(DRAMCtrl &ctrl, Rank &rank); + RankStats(DRAMInterface &dram, Rank &rank); void regStats() override; void resetStats() override; @@ -408,13 +411,6 @@ class DRAMInterface : public SimObject */ class Rank : public EventManager { - protected: - - /** - * A reference to the parent DRAMCtrl instance - */ - DRAMCtrl& ctrl; - private: /** @@ -534,10 +530,10 @@ class DRAMInterface : public SimObject */ Tick lastBurstTick; - Rank(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p, int _rank, + Rank(const DRAMInterfaceParams* _p, int _rank, DRAMInterface& _dram); - const std::string name() const { return csprintf("dram_%d", rank); } + const std::string name() const { return csprintf("%d", rank); } /** * Kick off accounting for power and refresh states and @@ -659,15 +655,16 @@ class DRAMInterface : public SimObject * @param next Memory Command * @return true if timeStamp of Command 1 < timeStamp of Command 2 */ - static bool sortTime(const Command& cmd, const Command& cmd_next) + static bool + sortTime(const Command& cmd, const Command& cmd_next) { return cmd.timeStamp < cmd_next.timeStamp; - }; + } /** - * A reference to the parent DRAMCtrl instance + * A pointer to the parent DRAMCtrl instance */ - DRAMCtrl& ctrl; + DRAMCtrl* ctrl; /** * Memory controller configuration initialized based on parameter @@ -698,6 +695,7 @@ class DRAMInterface : public SimObject * DRAM timing requirements */ const Tick M5_CLASS_VAR_USED tCK; + const Tick tCS; const Tick tCL; const Tick tBURST; const Tick tBURST_MIN; @@ -781,7 +779,7 @@ class DRAMInterface : public SimObject struct DRAMStats : public Stats::Group { - DRAMStats(DRAMCtrl &ctrl, DRAMInterface &dram); + DRAMStats(DRAMInterface &dram); void regStats() override; void resetStats() override; @@ -798,10 +796,12 @@ class DRAMInterface : public SimObject // Latencies summed over all requests Stats::Scalar totQLat; + Stats::Scalar totBusLat; Stats::Scalar totMemAccLat; // Average latencies per request Stats::Formula avgQLat; + Stats::Formula avgBusLat; Stats::Formula avgMemAccLat; // Row hit count and rate @@ -817,6 +817,11 @@ class DRAMInterface : public SimObject // Average bandwidth Stats::Formula avgRdBW; Stats::Formula avgWrBW; + Stats::Formula peakBW; + // bus utilization + Stats::Formula busUtil; + Stats::Formula busUtilRead; + Stats::Formula busUtilWrite; Stats::Formula pageHitRate; }; @@ -828,16 +833,28 @@ class DRAMInterface : public SimObject std::vector ranks; public: + + /** + * Buffer sizes for read and write queues in the controller + * These are passed to the controller on instantiation + * Defining them here allows for buffers to be resized based + * on memory type / configuration. + */ + const uint32_t readBufferSize; + const uint32_t writeBufferSize; + + /** Setting a pointer to the controller */ + void setCtrl(DRAMCtrl* _ctrl) { ctrl = _ctrl; } + /** * Initialize the DRAM interface and verify parameters - * @param range is the address range for this interface */ - void init(AddrRange range); + void init() override; /** * Iterate through dram ranks and instantiate per rank startup routine */ - void startupRanks(); + void startup() override; /** * Iterate through dram ranks to exit self-refresh in order to drain @@ -860,16 +877,27 @@ class DRAMInterface : public SimObject */ void suspend(); + /** + * Get an address in a dense range which starts from 0. The input + * address is the physical address of the request in an address + * space that contains other SimObjects apart from this + * controller. + * + * @param addr The intput address which should be in the addrRange + * @return An address in the continues range [0, max) + */ + Addr getCtrlAddr(Addr addr) { return range.getOffset(addr); } + /** * @return number of bytes in a burst for this interface */ - uint32_t bytesPerBurst() const { return burstSize; }; + uint32_t bytesPerBurst() const { return burstSize; } /** * * @return number of ranks per channel for this interface */ - uint32_t numRanks() const { return ranksPerChannel; }; + uint32_t numRanks() const { return ranksPerChannel; } /* * @return time to send a burst of data @@ -879,7 +907,8 @@ class DRAMInterface : public SimObject /* * @return time to send a burst of data without gaps */ - Tick burstDataDelay() const + Tick + burstDataDelay() const { return (burstInterleave ? tBURST_MAX / 2 : tBURST); } @@ -893,7 +922,14 @@ class DRAMInterface : public SimObject * * @return additional bus turnaround required for read-to-write */ - Tick minRdToWr() const { return tRTW; }; + Tick minRdToWr() const { return tRTW; } + + /** + * Determine the required delay for an access to a different rank + * + * @return required rank to rank delay + */ + Tick rankDelay() const { return tCS; } /* * Function to calulate RAS cycle time for use within and @@ -957,7 +993,8 @@ class DRAMInterface : public SimObject * This requires the DRAM to be in the * REF IDLE state */ - bool burstReady(uint8_t rank) const + bool + burstReady(uint8_t rank) const { return ranks[rank]->inRefIdleState(); } @@ -979,7 +1016,7 @@ class DRAMInterface : public SimObject * * @param rank Specifies rank associated with read burst */ - void respondEventDRAM(uint8_t rank); + void respondEvent(uint8_t rank); /** * Check the refresh state to determine if refresh needs @@ -989,8 +1026,7 @@ class DRAMInterface : public SimObject */ void checkRefreshState(uint8_t rank); - DRAMInterface(DRAMCtrl& _ctrl, const DRAMCtrlParams* _p, - uint64_t capacity, AddrRange range); + DRAMInterface(const DRAMInterfaceParams* _p); }; /** @@ -1140,20 +1176,6 @@ class DRAMCtrl : public QoS::MemCtrl */ void accessAndRespond(PacketPtr pkt, Tick static_latency); - /** - * Get an address in a dense range which starts from 0. The input - * address is the physical address of the request in an address - * space that contains other SimObjects apart from this - * controller. - * - * @param addr The intput address which should be in the addrRange - * @return An address in the continues range [0, max) - */ - Addr getCtrlAddr(Addr addr) - { - return range.getOffset(addr); - } - /** * The memory schduler/arbiter - picks which request needs to * go next, based on the specified policy such as FCFS or FR-FCFS @@ -1236,6 +1258,11 @@ class DRAMCtrl : public QoS::MemCtrl */ std::unordered_multiset burstTicks; + /** + * Create pointer to interface of the actual dram media + */ + DRAMInterface* const dram; + /** * The following are basic design parameters of the memory * controller, and are initialized based on parameter values. @@ -1250,12 +1277,6 @@ class DRAMCtrl : public QoS::MemCtrl uint32_t writesThisTime; uint32_t readsThisTime; - /** - * Basic memory timing parameters initialized based on parameter - * values. These will be used across memory interfaces. - */ - const Tick tCS; - /** * Memory controller configuration initialized based on parameter * values. @@ -1310,10 +1331,6 @@ class DRAMCtrl : public QoS::MemCtrl // Average queue lengths Stats::Average avgRdQLen; Stats::Average avgWrQLen; - // Latencies summed over all requests - Stats::Scalar totBusLat; - // Average latencies per request - Stats::Formula avgBusLat; Stats::Scalar numRdRetry; Stats::Scalar numWrRetry; @@ -1324,21 +1341,12 @@ class DRAMCtrl : public QoS::MemCtrl Stats::Histogram rdPerTurnAround; Stats::Histogram wrPerTurnAround; - Stats::Scalar bytesRead; Stats::Scalar bytesReadWrQ; - Stats::Scalar bytesWritten; Stats::Scalar bytesReadSys; Stats::Scalar bytesWrittenSys; // Average bandwidth - Stats::Formula avgRdBW; - Stats::Formula avgWrBW; Stats::Formula avgRdBWSys; Stats::Formula avgWrBWSys; - Stats::Formula peakBW; - // bus utilization - Stats::Formula busUtil; - Stats::Formula busUtilRead; - Stats::Formula busUtilWrite; Stats::Scalar totGap; Stats::Formula avgGap; @@ -1366,11 +1374,6 @@ class DRAMCtrl : public QoS::MemCtrl CtrlStats stats; - /** - * Create pointer to interfasce to the actual media - */ - DRAMInterface* dram; - /** * Upstream caches need this packet until true is returned, so * hold it for deletion until a subsequent call @@ -1448,13 +1451,6 @@ class DRAMCtrl : public QoS::MemCtrl */ void restartScheduler(Tick tick) { schedule(nextReqEvent, tick); } - /** - * Determine the required delay for an access to a different rank - * - * @return required rank to rank delay - */ - Tick rankDelay() const { return tCS; } - /** * Check the current direction of the memory channel * diff --git a/src/mem/drampower.cc b/src/mem/drampower.cc index 13551a0b4..96dcb5518 100644 --- a/src/mem/drampower.cc +++ b/src/mem/drampower.cc @@ -40,13 +40,13 @@ #include "base/intmath.hh" #include "sim/core.hh" -DRAMPower::DRAMPower(const DRAMCtrlParams* p, bool include_io) : +DRAMPower::DRAMPower(const DRAMInterfaceParams* p, bool include_io) : powerlib(libDRAMPower(getMemSpec(p), include_io)) { } Data::MemArchitectureSpec -DRAMPower::getArchParams(const DRAMCtrlParams* p) +DRAMPower::getArchParams(const DRAMInterfaceParams* p) { Data::MemArchitectureSpec archSpec; archSpec.burstLength = p->burst_length; @@ -68,7 +68,7 @@ DRAMPower::getArchParams(const DRAMCtrlParams* p) } Data::MemTimingSpec -DRAMPower::getTimingParams(const DRAMCtrlParams* p) +DRAMPower::getTimingParams(const DRAMInterfaceParams* p) { // Set the values that are used for power calculations and ignore // the ones only used by the controller functionality in DRAMPower @@ -100,7 +100,7 @@ DRAMPower::getTimingParams(const DRAMCtrlParams* p) } Data::MemPowerSpec -DRAMPower::getPowerParams(const DRAMCtrlParams* p) +DRAMPower::getPowerParams(const DRAMInterfaceParams* p) { // All DRAMPower currents are in mA Data::MemPowerSpec powerSpec; @@ -132,7 +132,7 @@ DRAMPower::getPowerParams(const DRAMCtrlParams* p) } Data::MemorySpecification -DRAMPower::getMemSpec(const DRAMCtrlParams* p) +DRAMPower::getMemSpec(const DRAMInterfaceParams* p) { Data::MemorySpecification memSpec; memSpec.memArchSpec = getArchParams(p); @@ -142,7 +142,18 @@ DRAMPower::getMemSpec(const DRAMCtrlParams* p) } bool -DRAMPower::hasTwoVDD(const DRAMCtrlParams* p) +DRAMPower::hasTwoVDD(const DRAMInterfaceParams* p) { return p->VDD2 == 0 ? false : true; } + +uint8_t +DRAMPower::getDataRate(const DRAMInterfaceParams* p) +{ + uint32_t burst_cycles = divCeil(p->tBURST_MAX, p->tCK); + uint8_t data_rate = p->burst_length / burst_cycles; + // 4 for GDDR5 + if (data_rate != 1 && data_rate != 2 && data_rate != 4 && data_rate != 8) + fatal("Got unexpected data rate %d, should be 1 or 2 or 4 or 8\n"); + return data_rate; +} diff --git a/src/mem/drampower.hh b/src/mem/drampower.hh index da24bcadb..da68a787f 100644 --- a/src/mem/drampower.hh +++ b/src/mem/drampower.hh @@ -44,7 +44,7 @@ #define __MEM_DRAM_POWER_HH__ #include "libdrampower/LibDRAMPower.h" -#include "params/DRAMCtrl.hh" +#include "params/DRAMInterface.hh" /** * DRAMPower is a standalone tool which calculates the power consumed by a @@ -57,38 +57,44 @@ class DRAMPower /** * Transform the architechture parameters defined in - * DRAMCtrlParams to the memSpec of DRAMPower + * DRAMInterfaceParams to the memSpec of DRAMPower */ - static Data::MemArchitectureSpec getArchParams(const DRAMCtrlParams* p); + static Data::MemArchitectureSpec getArchParams( + const DRAMInterfaceParams* p); /** - * Transforms the timing parameters defined in DRAMCtrlParams to + * Transforms the timing parameters defined in DRAMInterfaceParams to * the memSpec of DRAMPower */ - static Data::MemTimingSpec getTimingParams(const DRAMCtrlParams* p); + static Data::MemTimingSpec getTimingParams(const DRAMInterfaceParams* p); /** * Transforms the power and current parameters defined in - * DRAMCtrlParam to the memSpec of DRAMPower + * DRAMInterfaceParams to the memSpec of DRAMPower */ - static Data::MemPowerSpec getPowerParams(const DRAMCtrlParams* p); + static Data::MemPowerSpec getPowerParams(const DRAMInterfaceParams* p); + + /** + * Determine data rate, either one or two. + */ + static uint8_t getDataRate(const DRAMInterfaceParams* p); /** * Determine if DRAM has two voltage domains (or one) */ - static bool hasTwoVDD(const DRAMCtrlParams* p); + static bool hasTwoVDD(const DRAMInterfaceParams* p); /** - * Return an instance of MemSpec based on the DRAMCtrlParams + * Return an instance of MemSpec based on the DRAMInterfaceParams */ - static Data::MemorySpecification getMemSpec(const DRAMCtrlParams* p); + static Data::MemorySpecification getMemSpec(const DRAMInterfaceParams* p); public: // Instance of DRAMPower Library libDRAMPower powerlib; - DRAMPower(const DRAMCtrlParams* p, bool include_io); + DRAMPower(const DRAMInterfaceParams* p, bool include_io); }; diff --git a/src/mem/qos/QoSMemCtrl.py b/src/mem/qos/QoSMemCtrl.py index 1cd3f0b33..f55105bef 100644 --- a/src/mem/qos/QoSMemCtrl.py +++ b/src/mem/qos/QoSMemCtrl.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 ARM Limited +# Copyright (c) 2018-2020 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -34,18 +34,21 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from m5.params import * -from m5.objects.AbstractMemory import AbstractMemory +from m5.proxy import * +from m5.objects.ClockedObject import ClockedObject from m5.objects.QoSTurnaround import * # QoS Queue Selection policy used to select packets among same-QoS queues class QoSQPolicy(Enum): vals = ["fifo", "lifo", "lrg"] -class QoSMemCtrl(AbstractMemory): +class QoSMemCtrl(ClockedObject): type = 'QoSMemCtrl' cxx_header = "mem/qos/mem_ctrl.hh" cxx_class = 'QoS::MemCtrl' abstract = True + system = Param.System(Parent.any, "System that the controller belongs to.") + ##### QoS support parameters #### # Number of priorities in the system diff --git a/src/mem/qos/QoSMemSinkCtrl.py b/src/mem/qos/QoSMemSinkCtrl.py index 6c4f2632d..fafac64a6 100644 --- a/src/mem/qos/QoSMemSinkCtrl.py +++ b/src/mem/qos/QoSMemSinkCtrl.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 ARM Limited +# Copyright (c) 2018-2020 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -37,6 +37,7 @@ from m5.params import * from m5.objects.QoSMemCtrl import * +from m5.objects.QoSMemSinkInterface import * class QoSMemSinkCtrl(QoSMemCtrl): type = 'QoSMemSinkCtrl' @@ -44,6 +45,10 @@ class QoSMemSinkCtrl(QoSMemCtrl): cxx_class = "QoS::MemSinkCtrl" port = ResponsePort("Response ports") + + interface = Param.QoSMemSinkInterface(QoSMemSinkInterface(), + "Interface to memory") + # the basic configuration of the controller architecture, note # that each entry corresponds to a burst for the specific DRAM # configuration (e.g. x32 with burst length 8 is 32 bytes) and not @@ -59,5 +64,3 @@ class QoSMemSinkCtrl(QoSMemCtrl): # response latency - time to issue a response once a request is serviced response_latency = Param.Latency("20ns", "Memory response latency") - - diff --git a/src/mem/qos/QoSMemSinkInterface.py b/src/mem/qos/QoSMemSinkInterface.py new file mode 100644 index 000000000..5c79f64ec --- /dev/null +++ b/src/mem/qos/QoSMemSinkInterface.py @@ -0,0 +1,40 @@ +# Copyright (c) 2020 ARM Limited +# All rights reserved. +# +# The license below extends only to copyright in the software and shall +# not be construed as granting a license to any other intellectual +# property including but not limited to intellectual property relating +# to a hardware implementation of the functionality of the software +# licensed hereunder. You may use the software subject to the license +# terms below provided that you ensure that this notice is replicated +# unmodified and in its entirety in all distributions of the software, +# modified or unmodified, in source code or in binary form. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.objects.AbstractMemory import AbstractMemory + +class QoSMemSinkInterface(AbstractMemory): + type = 'QoSMemSinkInterface' + cxx_header = "mem/qos/mem_sink.hh" diff --git a/src/mem/qos/SConscript b/src/mem/qos/SConscript index f8601b68f..1d90f9ce8 100644 --- a/src/mem/qos/SConscript +++ b/src/mem/qos/SConscript @@ -1,4 +1,4 @@ -# Copyright (c) 2018 ARM Limited +# Copyright (c) 2018-2020 ARM Limited # All rights reserved # # The license below extends only to copyright in the software and shall @@ -37,6 +37,7 @@ Import('*') SimObject('QoSMemCtrl.py') SimObject('QoSMemSinkCtrl.py') +SimObject('QoSMemSinkInterface.py') SimObject('QoSPolicy.py') SimObject('QoSTurnaround.py') diff --git a/src/mem/qos/mem_ctrl.cc b/src/mem/qos/mem_ctrl.cc index 50e6035e9..190960bc1 100644 --- a/src/mem/qos/mem_ctrl.cc +++ b/src/mem/qos/mem_ctrl.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited + * Copyright (c) 2017-2020 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -42,7 +42,7 @@ namespace QoS { MemCtrl::MemCtrl(const QoSMemCtrlParams * p) - : AbstractMemory(p), + : ClockedObject(p), policy(p->qos_policy), turnPolicy(p->qos_turnaround_policy), queuePolicy(QueuePolicy::create(p)), @@ -51,7 +51,8 @@ MemCtrl::MemCtrl(const QoSMemCtrlParams * p) qosSyncroScheduler(p->qos_syncro_scheduler), totalReadQueueSize(0), totalWriteQueueSize(0), busState(READ), busStateNext(READ), - stats(*this) + stats(*this), + _system(p->system) { // Set the priority policy if (policy) { @@ -76,12 +77,6 @@ MemCtrl::MemCtrl(const QoSMemCtrlParams * p) MemCtrl::~MemCtrl() {} -void -MemCtrl::init() -{ - AbstractMemory::init(); -} - void MemCtrl::logRequest(BusState dir, MasterID m_id, uint8_t qos, Addr addr, uint64_t entries) diff --git a/src/mem/qos/mem_ctrl.hh b/src/mem/qos/mem_ctrl.hh index 0e29fcc6c..5d7c9d632 100644 --- a/src/mem/qos/mem_ctrl.hh +++ b/src/mem/qos/mem_ctrl.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited + * Copyright (c) 2020 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -36,10 +36,10 @@ */ #include "debug/QOS.hh" -#include "mem/abstract_mem.hh" -#include "mem/qos/q_policy.hh" #include "mem/qos/policy.hh" +#include "mem/qos/q_policy.hh" #include "params/QoSMemCtrl.hh" +#include "sim/clocked_object.hh" #include "sim/system.hh" #include @@ -56,7 +56,7 @@ namespace QoS { * which support QoS - it provides access to a set of QoS * scheduling policies */ -class MemCtrl: public AbstractMemory +class MemCtrl : public ClockedObject { public: /** Bus Direction */ @@ -151,6 +151,9 @@ class MemCtrl: public AbstractMemory Stats::Scalar numStayWriteState; } stats; + /** Pointer to the System object */ + System* _system; + /** * Initializes dynamically counters and * statistics for a given Master @@ -265,11 +268,6 @@ class MemCtrl: public AbstractMemory virtual ~MemCtrl(); - /** - * Initializes this object - */ - void init() override; - /** * Gets the current bus state * @@ -346,6 +344,10 @@ class MemCtrl: public AbstractMemory * @return total number of priority levels */ uint8_t numPriorities() const { return _numPriorities; } + + /** read the system pointer + * @return pointer to the system object */ + System* system() const { return _system; } }; template diff --git a/src/mem/qos/mem_sink.cc b/src/mem/qos/mem_sink.cc index 1f104e432..dbdf54812 100644 --- a/src/mem/qos/mem_sink.cc +++ b/src/mem/qos/mem_sink.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited + * Copyright (c) 2018-2020 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -40,6 +40,7 @@ #include "debug/Drain.hh" #include "debug/QOS.hh" #include "mem_sink.hh" +#include "params/QoSMemSinkInterface.hh" #include "sim/system.hh" namespace QoS { @@ -50,12 +51,15 @@ MemSinkCtrl::MemSinkCtrl(const QoSMemSinkCtrlParams* p) memoryPacketSize(p->memory_packet_size), readBufferSize(p->read_buffer_size), writeBufferSize(p->write_buffer_size), port(name() + ".port", *this), + interface(p->interface), retryRdReq(false), retryWrReq(false), nextRequest(0), nextReqEvent(this) { // Resize read and write queue to allocate space // for configured QoS priorities readQueue.resize(numPriorities()); writeQueue.resize(numPriorities()); + + interface->setMemCtrl(this); } MemSinkCtrl::~MemSinkCtrl() @@ -92,7 +96,7 @@ MemSinkCtrl::recvAtomic(PacketPtr pkt) "%s Should not see packets where cache is responding\n", __func__); - access(pkt); + interface->access(pkt); return responseLatency; } @@ -101,7 +105,7 @@ MemSinkCtrl::recvFunctional(PacketPtr pkt) { pkt->pushLabel(name()); - functionalAccess(pkt); + interface->functionalAccess(pkt); pkt->popLabel(); } @@ -279,7 +283,7 @@ MemSinkCtrl::processNextReqEvent() // Do the actual memory access which also turns the packet // into a response - access(pkt); + interface->access(pkt); // Log the response logResponse(pkt->isRead()? READ : WRITE, @@ -351,7 +355,7 @@ AddrRangeList MemSinkCtrl::MemoryPort::getAddrRanges() const { AddrRangeList ranges; - ranges.push_back(memory.getAddrRange()); + ranges.push_back(memory.interface->getAddrRange()); return ranges; } @@ -390,3 +394,13 @@ QoSMemSinkCtrlParams::create() return new QoS::MemSinkCtrl(this); } +QoSMemSinkInterface::QoSMemSinkInterface(const QoSMemSinkInterfaceParams* _p) + : AbstractMemory(_p) +{ +} + +QoSMemSinkInterface* +QoSMemSinkInterfaceParams::create() +{ + return new QoSMemSinkInterface(this); +} diff --git a/src/mem/qos/mem_sink.hh b/src/mem/qos/mem_sink.hh index 9a51269dc..5f6c1be76 100644 --- a/src/mem/qos/mem_sink.hh +++ b/src/mem/qos/mem_sink.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited + * Copyright (c) 2018-2020 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -41,10 +41,14 @@ #ifndef __MEM_QOS_MEM_SINK_HH__ #define __MEM_QOS_MEM_SINK_HH__ +#include "mem/abstract_mem.hh" #include "mem/qos/mem_ctrl.hh" #include "mem/qport.hh" #include "params/QoSMemSinkCtrl.hh" +class QoSMemSinkInterfaceParams; +class QoSMemSinkInterface; + namespace QoS { /** @@ -163,6 +167,11 @@ class MemSinkCtrl : public MemCtrl /** Memory slave port */ MemoryPort port; + /** + * Create pointer to interface of actual media + */ + QoSMemSinkInterface* const interface; + /** Read request pending */ bool retryRdReq; @@ -244,4 +253,17 @@ class MemSinkCtrl : public MemCtrl } // namespace QoS +class QoSMemSinkInterface : public AbstractMemory +{ + public: + /** Setting a pointer to the interface */ + void setMemCtrl(QoS::MemSinkCtrl* _ctrl) { ctrl = _ctrl; }; + + /** Pointer to the controller */ + QoS::MemSinkCtrl* ctrl; + + QoSMemSinkInterface(const QoSMemSinkInterfaceParams* _p); +}; + + #endif /* __MEM_QOS_MEM_SINK_HH__ */ diff --git a/tests/gem5/configs/base_config.py b/tests/gem5/configs/base_config.py index b5bddf4ce..cbea76874 100644 --- a/tests/gem5/configs/base_config.py +++ b/tests/gem5/configs/base_config.py @@ -1,4 +1,4 @@ -# Copyright (c) 2012-2013, 2017-2018 ARM Limited +# Copyright (c) 2012-2013, 2017-2018, 2020 ARM Limited # All rights reserved. # # The license below extends only to copyright in the software and shall @@ -220,7 +220,12 @@ class BaseSESystem(BaseSystem): super(BaseSESystem, self).init_system(system) def create_system(self): - system = System(physmem = self.mem_class(), + if issubclass(self.mem_class, m5.objects.DRAMInterface): + mem_ctrl = DRAMCtrl() + mem_ctrl.dram = self.mem_class() + else: + mem_ctrl = self.mem_class() + system = System(physmem = mem_ctrl, membus = SystemXBar(), mem_mode = self.mem_mode, multi_thread = (self.num_threads > 1)) @@ -272,8 +277,16 @@ class BaseFSSystem(BaseSystem): else: # create the memory controllers and connect them, stick with # the physmem name to avoid bumping all the reference stats - system.physmem = [self.mem_class(range = r) - for r in system.mem_ranges] + if issubclass(self.mem_class, m5.objects.DRAMInterface): + mem_ctrls = [] + for r in system.mem_ranges: + mem_ctrl = DRAMCtrl() + mem_ctrl.dram = self.mem_class(range = r) + mem_ctrls.append(mem_ctrl) + system.physmem = mem_ctrls + else: + system.physmem = [self.mem_class(range = r) + for r in system.mem_ranges] for i in range(len(system.physmem)): system.physmem[i].port = system.membus.master