mem: Add memory rank-to-rank delay
[gem5.git] / src / mem / DRAMCtrl.py
1 # Copyright (c) 2012-2014 ARM Limited
2 # All rights reserved.
3 #
4 # The license below extends only to copyright in the software and shall
5 # not be construed as granting a license to any other intellectual
6 # property including but not limited to intellectual property relating
7 # to a hardware implementation of the functionality of the software
8 # licensed hereunder. You may use the software subject to the license
9 # terms below provided that you ensure that this notice is replicated
10 # unmodified and in its entirety in all distributions of the software,
11 # modified or unmodified, in source code or in binary form.
12 #
13 # Copyright (c) 2013 Amin Farmahini-Farahani
14 # All rights reserved.
15 #
16 # Redistribution and use in source and binary forms, with or without
17 # modification, are permitted provided that the following conditions are
18 # met: redistributions of source code must retain the above copyright
19 # notice, this list of conditions and the following disclaimer;
20 # redistributions in binary form must reproduce the above copyright
21 # notice, this list of conditions and the following disclaimer in the
22 # documentation and/or other materials provided with the distribution;
23 # neither the name of the copyright holders nor the names of its
24 # contributors may be used to endorse or promote products derived from
25 # this software without specific prior written permission.
26 #
27 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #
39 # Authors: Andreas Hansson
40 # Ani Udipi
41
42 from m5.params import *
43 from AbstractMemory import *
44
45 # Enum for memory scheduling algorithms, currently First-Come
46 # First-Served and a First-Row Hit then First-Come First-Served
47 class MemSched(Enum): vals = ['fcfs', 'frfcfs']
48
49 # Enum for the address mapping. With Ch, Ra, Ba, Ro and Co denoting
50 # channel, rank, bank, row and column, respectively, and going from
51 # MSB to LSB. Available are RoRaBaChCo and RoRaBaCoCh, that are
52 # suitable for an open-page policy, optimising for sequential accesses
53 # hitting in the open row. For a closed-page policy, RoCoRaBaCh
54 # maximises parallelism.
55 class AddrMap(Enum): vals = ['RoRaBaChCo', 'RoRaBaCoCh', 'RoCoRaBaCh']
56
57 # Enum for the page policy, either open, open_adaptive, close, or
58 # close_adaptive.
59 class PageManage(Enum): vals = ['open', 'open_adaptive', 'close',
60 'close_adaptive']
61
62 # DRAMCtrl is a single-channel single-ported DRAM controller model
63 # that aims to model the most important system-level performance
64 # effects of a DRAM without getting into too much detail of the DRAM
65 # itself.
66 class DRAMCtrl(AbstractMemory):
67 type = 'DRAMCtrl'
68 cxx_header = "mem/dram_ctrl.hh"
69
70 # single-ported on the system interface side, instantiate with a
71 # bus in front of the controller for multiple ports
72 port = SlavePort("Slave port")
73
74 # the basic configuration of the controller architecture
75 write_buffer_size = Param.Unsigned(64, "Number of write queue entries")
76 read_buffer_size = Param.Unsigned(32, "Number of read queue entries")
77
78 # threshold in percent for when to forcefully trigger writes and
79 # start emptying the write buffer
80 write_high_thresh_perc = Param.Percent(85, "Threshold to force writes")
81
82 # threshold in percentage for when to start writes if the read
83 # queue is empty
84 write_low_thresh_perc = Param.Percent(50, "Threshold to start writes")
85
86 # minimum write bursts to schedule before switching back to reads
87 min_writes_per_switch = Param.Unsigned(16, "Minimum write bursts before "
88 "switching to reads")
89
90 # scheduler, address map and page policy
91 mem_sched_policy = Param.MemSched('frfcfs', "Memory scheduling policy")
92 addr_mapping = Param.AddrMap('RoRaBaChCo', "Address mapping policy")
93 page_policy = Param.PageManage('open_adaptive', "Page management policy")
94
95 # enforce a limit on the number of accesses per row
96 max_accesses_per_row = Param.Unsigned(16, "Max accesses per row before "
97 "closing");
98
99 # pipeline latency of the controller and PHY, split into a
100 # frontend part and a backend part, with reads and writes serviced
101 # by the queues only seeing the frontend contribution, and reads
102 # serviced by the memory seeing the sum of the two
103 static_frontend_latency = Param.Latency("10ns", "Static frontend latency")
104 static_backend_latency = Param.Latency("10ns", "Static backend latency")
105
106 # the physical organisation of the DRAM
107 device_bus_width = Param.Unsigned("data bus width in bits for each DRAM "\
108 "device/chip")
109 burst_length = Param.Unsigned("Burst lenght (BL) in beats")
110 device_rowbuffer_size = Param.MemorySize("Page (row buffer) size per "\
111 "device/chip")
112 devices_per_rank = Param.Unsigned("Number of devices/chips per rank")
113 ranks_per_channel = Param.Unsigned("Number of ranks per channel")
114 banks_per_rank = Param.Unsigned("Number of banks per rank")
115 # only used for the address mapping as the controller by
116 # construction is a single channel and multiple controllers have
117 # to be instantiated for a multi-channel configuration
118 channels = Param.Unsigned(1, "Number of channels")
119
120 # timing behaviour and constraints - all in nanoseconds
121
122 # the base clock period of the DRAM
123 tCK = Param.Latency("Clock period")
124
125 # the amount of time in nanoseconds from issuing an activate command
126 # to the data being available in the row buffer for a read/write
127 tRCD = Param.Latency("RAS to CAS delay")
128
129 # the time from issuing a read/write command to seeing the actual data
130 tCL = Param.Latency("CAS latency")
131
132 # minimum time between a precharge and subsequent activate
133 tRP = Param.Latency("Row precharge time")
134
135 # minimum time between an activate and a precharge to the same row
136 tRAS = Param.Latency("ACT to PRE delay")
137
138 # minimum time between a write data transfer and a precharge
139 tWR = Param.Latency("Write recovery time")
140
141 # minimum time between a read and precharge command
142 tRTP = Param.Latency("Read to precharge")
143
144 # time to complete a burst transfer, typically the burst length
145 # divided by two due to the DDR bus, but by making it a parameter
146 # it is easier to also evaluate SDR memories like WideIO.
147 # This parameter has to account for burst length.
148 # Read/Write requests with data size larger than one full burst are broken
149 # down into multiple requests in the controller
150 tBURST = Param.Latency("Burst duration (for DDR burst length / 2 cycles)")
151
152 # time taken to complete one refresh cycle (N rows in all banks)
153 tRFC = Param.Latency("Refresh cycle time")
154
155 # refresh command interval, how often a "ref" command needs
156 # to be sent. It is 7.8 us for a 64ms refresh requirement
157 tREFI = Param.Latency("Refresh command interval")
158
159 # write-to-read, same rank turnaround penalty
160 tWTR = Param.Latency("Write to read, same rank switching time")
161
162 # read-to-write, same rank turnaround penalty
163 tRTW = Param.Latency("Read to write, same rank switching time")
164
165 # rank-to-rank bus delay penalty
166 # this does not correlate to a memory timing parameter and encompasses:
167 # 1) RD-to-RD, 2) WR-to-WR, 3) RD-to-WR, and 4) WR-to-RD
168 # different rank bus delay
169 tCS = Param.Latency("Rank to rank switching time")
170
171 # minimum row activate to row activate delay time
172 tRRD = Param.Latency("ACT to ACT delay")
173
174 # time window in which a maximum number of activates are allowed
175 # to take place, set to 0 to disable
176 tXAW = Param.Latency("X activation window")
177 activation_limit = Param.Unsigned("Max number of activates in window")
178
179 # Currently rolled into other params
180 ######################################################################
181
182 # tRC - assumed to be tRAS + tRP
183
184 # A single DDR3-1600 x64 channel (one command and address bus), with
185 # timings based on a DDR3-1600 4 Gbit datasheet (Micron MT41J512M8) in
186 # an 8x8 configuration, amounting to 4 Gbyte of memory.
187 class DDR3_1600_x64(DRAMCtrl):
188 # 8x8 configuration, 8 devices each with an 8-bit interface
189 device_bus_width = 8
190
191 # DDR3 is a BL8 device
192 burst_length = 8
193
194 # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
195 device_rowbuffer_size = '1kB'
196
197 # 8x8 configuration, so 8 devices
198 devices_per_rank = 8
199
200 # Use two ranks
201 ranks_per_channel = 2
202
203 # DDR3 has 8 banks in all configurations
204 banks_per_rank = 8
205
206 # 800 MHz
207 tCK = '1.25ns'
208
209 # 8 beats across an x64 interface translates to 4 clocks @ 800 MHz
210 tBURST = '5ns'
211
212 # DDR3-1600 11-11-11
213 tRCD = '13.75ns'
214 tCL = '13.75ns'
215 tRP = '13.75ns'
216 tRAS = '35ns'
217 tRRD = '6ns'
218 tXAW = '30ns'
219 activation_limit = 4
220 tRFC = '260ns'
221
222 tWR = '15ns'
223
224 # Greater of 4 CK or 7.5 ns
225 tWTR = '7.5ns'
226
227 # Greater of 4 CK or 7.5 ns
228 tRTP = '7.5ns'
229
230 # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
231 tRTW = '2.5ns'
232
233 # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
234 tCS = '2.5ns'
235
236 # <=85C, half for >85C
237 tREFI = '7.8us'
238
239 # A single DDR3-2133 x64 channel refining a selected subset of the
240 # options for the DDR-1600 configuration, based on the same DDR3-1600
241 # 4 Gbit datasheet (Micron MT41J512M8). Most parameters are kept
242 # consistent across the two configurations.
243 class DDR3_2133_x64(DDR3_1600_x64):
244 # 1066 MHz
245 tCK = '0.938ns'
246
247 # 8 beats across an x64 interface translates to 4 clocks @ 1066 MHz
248 tBURST = '3.752ns'
249
250 # DDR3-2133 14-14-14
251 tRCD = '13.09ns'
252 tCL = '13.09ns'
253 tRP = '13.09ns'
254 tRAS = '33ns'
255 tRRD = '5ns'
256 tXAW = '25ns'
257
258 # A single DDR4-2400 x64 channel (one command and address bus), with
259 # timings based on a DDR4-2400 4 Gbit datasheet (Samsung K4A4G085WD)
260 # in an 8x8 configuration, amounting to 4 Gbyte of memory.
261 class DDR4_2400_x64(DRAMCtrl):
262 # 8x8 configuration, 8 devices each with an 8-bit interface
263 device_bus_width = 8
264
265 # DDR4 is a BL8 device
266 burst_length = 8
267
268 # Each device has a page (row buffer) size of 1 Kbyte (1K columns x8)
269 device_rowbuffer_size = '1kB'
270
271 # 8x8 configuration, so 8 devices
272 devices_per_rank = 8
273
274 # Use a single rank
275 ranks_per_channel = 1
276
277 # DDR4 has 16 banks (4 bank groups) in all
278 # configurations. Currently we do not capture the additional
279 # constraints incurred by the bank groups
280 banks_per_rank = 16
281
282 # 1200 MHz
283 tCK = '0.833ns'
284
285 # 8 beats across an x64 interface translates to 4 clocks @ 1200 MHz
286 tBURST = '3.333ns'
287
288 # DDR4-2400 17-17-17
289 tRCD = '14.16ns'
290 tCL = '14.16ns'
291 tRP = '14.16ns'
292 tRAS = '32ns'
293
294 # Here using the average of RRD_S and RRD_L
295 tRRD = '4.1ns'
296 tXAW = '21ns'
297 activation_limit = 4
298 tRFC = '260ns'
299
300 tWR = '15ns'
301
302 # Here using the average of WTR_S and WTR_L
303 tWTR = '5ns'
304
305 # Greater of 4 CK or 7.5 ns
306 tRTP = '7.5ns'
307
308 # Default same rank rd-to-wr bus turnaround to 2 CK, @1200 MHz = 1.666 ns
309 tRTW = '1.666ns'
310
311 # Default different rank bus delay to 2 CK, @1200 MHz = 1.666 ns
312 tCS = '1.666ns'
313
314 # <=85C, half for >85C
315 tREFI = '7.8us'
316
317 # A single DDR3 x64 interface (one command and address bus), with
318 # default timings based on DDR3-1333 4 Gbit parts in an 8x8
319 # configuration, which would amount to 4 GByte of memory. This
320 # configuration is primarily for comparing with DRAMSim2, and all the
321 # parameters except ranks_per_channel are based on the DRAMSim2 config
322 # file DDR3_micron_32M_8B_x8_sg15.ini. Note that ranks_per_channel has
323 # to be manually set, depending on size of the memory to be
324 # simulated. By default DRAMSim2 has 2048MB of memory with a single
325 # rank. Therefore for 4 GByte memory, set ranks_per_channel = 2
326 class DDR3_1333_x64_DRAMSim2(DRAMCtrl):
327 # 8x8 configuration, 8 devices each with an 8-bit interface
328 device_bus_width = 8
329
330 # DDR3 is a BL8 device
331 burst_length = 8
332
333 # Each device has a page (row buffer) size of 1KB
334 # (this depends on the memory density)
335 device_rowbuffer_size = '1kB'
336
337 # 8x8 configuration, so 8 devices
338 devices_per_rank = 8
339
340 # Use two ranks
341 ranks_per_channel = 2
342
343 # DDR3 has 8 banks in all configurations
344 banks_per_rank = 8
345
346 # 666 MHs
347 tCK = '1.5ns'
348
349 tRCD = '15ns'
350 tCL = '15ns'
351 tRP = '15ns'
352 tRAS = '36ns'
353 tWR = '15ns'
354 tRTP = '7.5ns'
355
356 # 8 beats across an x64 interface translates to 4 clocks @ 666.66 MHz.
357 # Note this is a BL8 DDR device.
358 tBURST = '6ns'
359
360 tRFC = '160ns'
361
362 # DDR3, <=85C, half for >85C
363 tREFI = '7.8us'
364
365 # Greater of 4 CK or 7.5 ns, 4 CK @ 666.66 MHz = 6 ns
366 tWTR = '7.5ns'
367
368 # Default same rank rd-to-wr bus turnaround to 2 CK, @666.66 MHz = 3 ns
369 tRTW = '3ns'
370
371 # Default different rank bus delay to 2 CK, @666.66 MHz = 3 ns
372 tCS = '3ns'
373
374 tRRD = '6.0ns'
375
376 tXAW = '30ns'
377 activation_limit = 4
378
379
380 # A single LPDDR2-S4 x32 interface (one command/address bus), with
381 # default timings based on a LPDDR2-1066 4 Gbit part in a 1x32
382 # configuration.
383 class LPDDR2_S4_1066_x32(DRAMCtrl):
384 # 1x32 configuration, 1 device with a 32-bit interface
385 device_bus_width = 32
386
387 # LPDDR2_S4 is a BL4 and BL8 device
388 burst_length = 8
389
390 # Each device has a page (row buffer) size of 1KB
391 # (this depends on the memory density)
392 device_rowbuffer_size = '1kB'
393
394 # 1x32 configuration, so 1 device
395 devices_per_rank = 1
396
397 # Use a single rank
398 ranks_per_channel = 1
399
400 # LPDDR2-S4 has 8 banks in all configurations
401 banks_per_rank = 8
402
403 # 533 MHz
404 tCK = '1.876ns'
405
406 # Fixed at 15 ns
407 tRCD = '15ns'
408
409 # 8 CK read latency, 4 CK write latency @ 533 MHz, 1.876 ns cycle time
410 tCL = '15ns'
411
412 # Pre-charge one bank 15 ns (all banks 18 ns)
413 tRP = '15ns'
414
415 tRAS = '42ns'
416 tWR = '15ns'
417
418 # 6 CK read to precharge delay
419 tRTP = '11.256ns'
420
421 # 8 beats across an x32 DDR interface translates to 4 clocks @ 533 MHz.
422 # Note this is a BL8 DDR device.
423 # Requests larger than 32 bytes are broken down into multiple requests
424 # in the controller
425 tBURST = '7.5ns'
426
427 # LPDDR2-S4, 4 Gbit
428 tRFC = '130ns'
429 tREFI = '3.9us'
430
431 # Irrespective of speed grade, tWTR is 7.5 ns
432 tWTR = '7.5ns'
433
434 # Default same rank rd-to-wr bus turnaround to 2 CK, @533 MHz = 3.75 ns
435 tRTW = '3.75ns'
436
437 # Default different rank bus delay to 2 CK, @533 MHz = 3.75 ns
438 tCS = '3.75ns'
439
440 # Activate to activate irrespective of density and speed grade
441 tRRD = '10.0ns'
442
443 # Irrespective of density, tFAW is 50 ns
444 tXAW = '50ns'
445 activation_limit = 4
446
447 # A single WideIO x128 interface (one command and address bus), with
448 # default timings based on an estimated WIO-200 8 Gbit part.
449 class WideIO_200_x128(DRAMCtrl):
450 # 1x128 configuration, 1 device with a 128-bit interface
451 device_bus_width = 128
452
453 # This is a BL4 device
454 burst_length = 4
455
456 # Each device has a page (row buffer) size of 4KB
457 # (this depends on the memory density)
458 device_rowbuffer_size = '4kB'
459
460 # 1x128 configuration, so 1 device
461 devices_per_rank = 1
462
463 # Use one rank for a one-high die stack
464 ranks_per_channel = 1
465
466 # WideIO has 4 banks in all configurations
467 banks_per_rank = 4
468
469 # 200 MHz
470 tCK = '5ns'
471
472 # WIO-200
473 tRCD = '18ns'
474 tCL = '18ns'
475 tRP = '18ns'
476 tRAS = '42ns'
477 tWR = '15ns'
478 # Read to precharge is same as the burst
479 tRTP = '20ns'
480
481 # 4 beats across an x128 SDR interface translates to 4 clocks @ 200 MHz.
482 # Note this is a BL4 SDR device.
483 tBURST = '20ns'
484
485 # WIO 8 Gb
486 tRFC = '210ns'
487
488 # WIO 8 Gb, <=85C, half for >85C
489 tREFI = '3.9us'
490
491 # Greater of 2 CK or 15 ns, 2 CK @ 200 MHz = 10 ns
492 tWTR = '15ns'
493
494 # Default same rank rd-to-wr bus turnaround to 2 CK, @200 MHz = 10 ns
495 tRTW = '10ns'
496
497 # Default different rank bus delay to 2 CK, @200 MHz = 10 ns
498 tCS = '10ns'
499
500 # Activate to activate irrespective of density and speed grade
501 tRRD = '10.0ns'
502
503 # Two instead of four activation window
504 tXAW = '50ns'
505 activation_limit = 2
506
507 # A single LPDDR3 x32 interface (one command/address bus), with
508 # default timings based on a LPDDR3-1600 4 Gbit part in a 1x32
509 # configuration
510 class LPDDR3_1600_x32(DRAMCtrl):
511 # 1x32 configuration, 1 device with a 32-bit interface
512 device_bus_width = 32
513
514 # LPDDR3 is a BL8 device
515 burst_length = 8
516
517 # Each device has a page (row buffer) size of 4KB
518 device_rowbuffer_size = '4kB'
519
520 # 1x32 configuration, so 1 device
521 devices_per_rank = 1
522
523 # Use a single rank
524 ranks_per_channel = 1
525
526 # LPDDR3 has 8 banks in all configurations
527 banks_per_rank = 8
528
529 # 800 MHz
530 tCK = '1.25ns'
531
532 # Fixed at 15 ns
533 tRCD = '15ns'
534
535 # 12 CK read latency, 6 CK write latency @ 800 MHz, 1.25 ns cycle time
536 tCL = '15ns'
537
538 tRAS = '42ns'
539 tWR = '15ns'
540
541 # Greater of 4 CK or 7.5 ns, 4 CK @ 800 MHz = 5 ns
542 tRTP = '7.5ns'
543
544 # Pre-charge one bank 15 ns (all banks 18 ns)
545 tRP = '15ns'
546
547 # 8 beats across a x32 DDR interface translates to 4 clocks @ 800 MHz.
548 # Note this is a BL8 DDR device.
549 # Requests larger than 32 bytes are broken down into multiple requests
550 # in the controller
551 tBURST = '5ns'
552
553 # LPDDR3, 4 Gb
554 tRFC = '130ns'
555 tREFI = '3.9us'
556
557 # Irrespective of speed grade, tWTR is 7.5 ns
558 tWTR = '7.5ns'
559
560 # Default same rank rd-to-wr bus turnaround to 2 CK, @800 MHz = 2.5 ns
561 tRTW = '2.5ns'
562
563 # Default different rank bus delay to 2 CK, @800 MHz = 2.5 ns
564 tCS = '2.5ns'
565
566 # Activate to activate irrespective of density and speed grade
567 tRRD = '10.0ns'
568
569 # Irrespective of size, tFAW is 50 ns
570 tXAW = '50ns'
571 activation_limit = 4