mem: Split the hit_latency into tag_latency and data_latency
[gem5.git] / configs / common / HMC.py
1 # Copyright (c) 2012-2013 ARM Limited
2 # All rights reserved.
3 #
4 # The license below extends only to copyright in the software and shall
5 # not be construed as granting a license to any other intellectual
6 # property including but not limited to intellectual property relating
7 # to a hardware implementation of the functionality of the software
8 # licensed hereunder. You may use the software subject to the license
9 # terms below provided that you ensure that this notice is replicated
10 # unmodified and in its entirety in all distributions of the software,
11 # modified or unmodified, in source code or in binary form.
12 #
13 # Copyright (c) 2015 The University of Bologna
14 # All rights reserved.
15 #
16 # Redistribution and use in source and binary forms, with or without
17 # modification, are permitted provided that the following conditions are
18 # met: redistributions of source code must retain the above copyright
19 # notice, this list of conditions and the following disclaimer;
20 # redistributions in binary form must reproduce the above copyright
21 # notice, this list of conditions and the following disclaimer in the
22 # documentation and/or other materials provided with the distribution;
23 # neither the name of the copyright holders nor the names of its
24 # contributors may be used to endorse or promote products derived from
25 # this software without specific prior written permission.
26 #
27 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #
39 # Authors: Erfan Azarkhish
40 # Abdul Mutaal Ahmad
41
42 # A Simplified model of a complete HMC device. Based on:
43 # [1] http://www.hybridmemorycube.org/specification-download/
44 # [2] High performance AXI-4.0 based interconnect for extensible smart memory
45 # cubes(E. Azarkhish et. al)
46 # [3] Low-Power Hybrid Memory Cubes With Link Power Management and Two-Level
47 # Prefetching (J. Ahn et. al)
48 # [4] Memory-centric system interconnect design with Hybrid Memory Cubes
49 # (G. Kim et. al)
50 # [5] Near Data Processing, Are we there yet? (M. Gokhale)
51 # http://www.cs.utah.edu/wondp/gokhale.pdf
52 # [6] openHMC - A Configurable Open-Source Hybrid Memory Cube Controller
53 # (J. Schmidt)
54 # [7] Hybrid Memory Cube performance characterization on data-centric
55 # workloads (M. Gokhale)
56 #
57 # This script builds a complete HMC device composed of vault controllers,
58 # serial links, the main internal crossbar, and an external hmc controller.
59 #
60 # - VAULT CONTROLLERS:
61 # Instances of the HMC_2500_x32 class with their functionality specified in
62 # dram_ctrl.cc
63 #
64 # - THE MAIN XBAR:
65 # This component is simply an instance of the NoncoherentXBar class, and its
66 # parameters are tuned to [2].
67 #
68 # - SERIAL LINKS CONTROLLER:
69 # SerialLink is a simple variation of the Bridge class, with the ability to
70 # account for the latency of packet serialization and controller latency. We
71 # assume that the serializer component at the transmitter side does not need
72 # to receive the whole packet to start the serialization. But the
73 # deserializer waits for the complete packet to check its integrity first.
74 #
75 # * Bandwidth of the serial links is not modeled in the SerialLink component
76 # itself.
77 #
78 # * Latency of serial link controller is composed of SerDes latency + link
79 # controller
80 #
81 # * It is inferred from the standard [1] and the literature [3] that serial
82 # links share the same address range and packets can travel over any of
83 # them so a load distribution mechanism is required among them.
84 #
85 # -----------------------------------------
86 # | Host/HMC Controller |
87 # | ---------------------- |
88 # | | Link Aggregator | opt |
89 # | ---------------------- |
90 # | ---------------------- |
91 # | | Serial Link + Ser | * 4 |
92 # | ---------------------- |
93 # |---------------------------------------
94 # -----------------------------------------
95 # | Device
96 # | ---------------------- |
97 # | | Xbar | * 4 |
98 # | ---------------------- |
99 # | ---------------------- |
100 # | | Vault Controller | * 16 |
101 # | ---------------------- |
102 # | ---------------------- |
103 # | | Memory | |
104 # | ---------------------- |
105 # |---------------------------------------|
106 #
107 # In this version we have present 3 different HMC archiecture along with
108 # alongwith their corresponding test script.
109 #
110 # same: It has 4 crossbars in HMC memory. All the crossbars are connected
111 # to each other, providing complete memory range. This archicture also covers
112 # the added latency for sending a request to non-local vault(bridge in b/t
113 # crossbars). All the 4 serial links can access complete memory. So each
114 # link can be connected to separate processor.
115 #
116 # distributed: It has 4 crossbars inside the HMC. Crossbars are not
117 # connected.Through each crossbar only local vaults can be accessed. But to
118 # support this architecture we need a crossbar between serial links and
119 # processor.
120 #
121 # mixed: This is a hybrid architecture. It has 4 crossbars inside the HMC.
122 # 2 Crossbars are connected to only local vaults. From other 2 crossbar, a
123 # request can be forwarded to any other vault.
124
125 import optparse
126
127 import m5
128 from m5.objects import *
129
130 # A single Hybrid Memory Cube (HMC)
131 class HMCSystem(SubSystem):
132 #*****************************CROSSBAR PARAMETERS*************************
133 # Flit size of the main interconnect [1]
134 xbar_width = Param.Unsigned(32, "Data width of the main XBar (Bytes)")
135
136 # Clock frequency of the main interconnect [1]
137 # This crossbar, is placed on the logic-based of the HMC and it has its
138 # own voltage and clock domains, different from the DRAM dies or from the
139 # host.
140 xbar_frequency = Param.Frequency('1GHz', "Clock Frequency of the main "
141 "XBar")
142
143 # Arbitration latency of the HMC XBar [1]
144 xbar_frontend_latency = Param.Cycles(1, "Arbitration latency of the XBar")
145
146 # Latency to forward a packet via the interconnect [1](two levels of FIFOs
147 # at the input and output of the inteconnect)
148 xbar_forward_latency = Param.Cycles(2, "Forward latency of the XBar")
149
150 # Latency to forward a response via the interconnect [1](two levels of
151 # FIFOs at the input and output of the inteconnect)
152 xbar_response_latency = Param.Cycles(2, "Response latency of the XBar")
153
154 # number of cross which connects 16 Vaults to serial link[7]
155 number_mem_crossbar = Param.Unsigned(4, "Number of crossbar in HMC"
156 )
157
158 #*****************************SERIAL LINK PARAMETERS***********************
159 # Number of serial links controllers [1]
160 num_links_controllers = Param.Unsigned(4, "Number of serial links")
161
162 # Number of packets (not flits) to store at the request side of the serial
163 # link. This number should be adjusted to achive required bandwidth
164 link_buffer_size_req = Param.Unsigned(10, "Number of packets to buffer "
165 "at the request side of the serial link")
166
167 # Number of packets (not flits) to store at the response side of the serial
168 # link. This number should be adjusted to achive required bandwidth
169 link_buffer_size_rsp = Param.Unsigned(10, "Number of packets to buffer "
170 "at the response side of the serial link")
171
172 # Latency of the serial link composed by SER/DES latency (1.6ns [4]) plus
173 # the PCB trace latency (3ns Estimated based on [5])
174 link_latency = Param.Latency('4.6ns', "Latency of the serial links")
175
176 # Clock frequency of the each serial link(SerDes) [1]
177 link_frequency = Param.Frequency('10GHz', "Clock Frequency of the serial"
178 "links")
179
180 # Clock frequency of serial link Controller[6]
181 # clk_hmc[Mhz]= num_lanes_per_link * lane_speed [Gbits/s] /
182 # data_path_width * 10^6
183 # clk_hmc[Mhz]= 16 * 10 Gbps / 256 * 10^6 = 625 Mhz
184 link_controller_frequency = Param.Frequency('625MHz',
185 "Clock Frequency of the link controller")
186
187 # Latency of the serial link controller to process the packets[1][6]
188 # (ClockDomain = 625 Mhz )
189 # used here for calculations only
190 link_ctrl_latency = Param.Cycles(4, "The number of cycles required for the"
191 "controller to process the packet")
192
193 # total_ctrl_latency = link_ctrl_latency + link_latency
194 # total_ctrl_latency = 4(Cycles) * 1.6 ns + 4.6 ns
195 total_ctrl_latency = Param.Latency('11ns', "The latency experienced by"
196 "every packet regardless of size of packet")
197
198 # Number of parallel lanes in each serial link [1]
199 num_lanes_per_link = Param.Unsigned( 16, "Number of lanes per each link")
200
201 # Number of serial links [1]
202 num_serial_links = Param.Unsigned(4, "Number of serial links")
203
204 # speed of each lane of serial link - SerDes serial interface 10 Gb/s
205 serial_link_speed = Param.UInt64(10, "Gbs/s speed of each lane of"
206 "serial link")
207
208 #*****************************PERFORMANCE MONITORING************************
209 # The main monitor behind the HMC Controller
210 enable_global_monitor = Param.Bool(False, "The main monitor behind the "
211 "HMC Controller")
212
213 # The link performance monitors
214 enable_link_monitor = Param.Bool(False, "The link monitors" )
215
216 # link aggregator enable - put a cross between buffers & links
217 enable_link_aggr = Param.Bool(False, "The crossbar between port and "
218 "Link Controller")
219
220 enable_buff_div = Param.Bool(True, "Memory Range of Buffer is"
221 "divided between total range")
222
223 #*****************************HMC ARCHITECTURE ************************
224 # Memory chunk for 16 vault - numbers of vault / number of crossbars
225 mem_chunk = Param.Unsigned(4, "Chunk of memory range for each cross bar "
226 "in arch 0")
227
228 # size of req buffer within crossbar, used for modelling extra latency
229 # when the reuqest go to non-local vault
230 xbar_buffer_size_req = Param.Unsigned(10, "Number of packets to buffer "
231 "at the request side of the crossbar")
232
233 # size of response buffer within crossbar, used for modelling extra latency
234 # when the response received from non-local vault
235 xbar_buffer_size_resp = Param.Unsigned(10, "Number of packets to buffer "
236 "at the response side of the crossbar")
237
238 # configure host system with Serial Links
239 def config_host_hmc(options, system):
240
241 system.hmc_host=HMCSystem()
242
243 try:
244 system.hmc_host.enable_global_monitor = options.enable_global_monitor
245 except:
246 pass;
247
248 try:
249 system.hmc_host.enable_link_monitor = options.enable_link_monitor
250 except:
251 pass;
252
253 # Serial link Controller with 16 SerDes links at 10 Gbps
254 # with serial link ranges w.r.t to architecture
255 system.hmc_host.seriallink = [SerialLink(ranges = options.ser_ranges[i],
256 req_size=system.hmc_host.link_buffer_size_req,
257 resp_size=system.hmc_host.link_buffer_size_rsp,
258 num_lanes=system.hmc_host.num_lanes_per_link,
259 link_speed=system.hmc_host.serial_link_speed,
260 delay=system.hmc_host.total_ctrl_latency)
261 for i in xrange(system.hmc_host.num_serial_links)]
262
263 # enable global monitor
264 if system.hmc_host.enable_global_monitor:
265 system.hmc_host.lmonitor = [ CommMonitor()
266 for i in xrange(system.hmc_host.num_serial_links)]
267
268 # set the clock frequency for serial link
269 for i in xrange(system.hmc_host.num_serial_links):
270 system.hmc_host.seriallink[i].clk_domain = SrcClockDomain(clock=system.
271 hmc_host.link_controller_frequency, voltage_domain=
272 VoltageDomain(voltage = '1V'))
273
274 # Connect membus/traffic gen to Serial Link Controller for differrent HMC
275 # architectures
276 if options.arch == "distributed":
277 for i in xrange(system.hmc_host.num_links_controllers):
278 if system.hmc_host.enable_global_monitor:
279 system.membus.master = system.hmc_host.lmonitor[i].slave
280 system.hmc_host.lmonitor[i].master = \
281 system.hmc_host.seriallink[i].slave
282 else:
283 system.membus.master = system.hmc_host.seriallink[i].slave
284 if options.arch == "mixed":
285 if system.hmc_host.enable_global_monitor:
286 system.membus.master = system.hmc_host.lmonitor[0].slave
287 system.hmc_host.lmonitor[0].master = \
288 system.hmc_host.seriallink[0].slave
289
290 system.membus.master = system.hmc_host.lmonitor[1].slave
291 system.hmc_host.lmonitor[1].master = \
292 system.hmc_host.seriallink[1].slave
293
294 system.tgen[2].port = system.hmc_host.lmonitor[2].slave
295 system.hmc_host.lmonitor[2].master = \
296 system.hmc_host.seriallink[2].slave
297
298 system.tgen[3].port = system.hmc_host.lmonitor[3].slave
299 system.hmc_host.lmonitor[3].master = \
300 system.hmc_host.seriallink[3].slave
301 else:
302 system.membus.master = system.hmc_host.seriallink[0].slave
303 system.membus.master = system.hmc_host.seriallink[1].slave
304 system.tgen[2].port = system.hmc_host.seriallink[2].slave
305 system.tgen[3].port = system.hmc_host.seriallink[3].slave
306 if options.arch == "same" :
307 for i in xrange(system.hmc_host.num_links_controllers):
308 if system.hmc_host.enable_global_monitor:
309 system.tgen[i].port = system.hmc_host.lmonitor[i].slave
310 system.hmc_host.lmonitor[i].master = \
311 system.hmc_host.seriallink[i].slave
312 else:
313 system.tgen[i].port = system.hmc_host.seriallink[i].slave
314
315 return system
316
317 # Create an HMC device and attach it to the current system
318 def config_hmc(options, system, hmc_host):
319
320 # Create HMC device
321 system.hmc_dev = HMCSystem()
322
323 # Global monitor
324 try:
325 system.hmc_dev.enable_global_monitor = options.enable_global_monitor
326 except:
327 pass;
328
329 try:
330 system.hmc_dev.enable_link_monitor = options.enable_link_monitor
331 except:
332 pass;
333
334
335 if system.hmc_dev.enable_link_monitor:
336 system.hmc_dev.lmonitor = [ CommMonitor()
337 for i in xrange(system.hmc_dev.num_links_controllers)]
338
339 # 4 HMC Crossbars located in its logic-base (LoB)
340 system.hmc_dev.xbar = [ NoncoherentXBar(width=system.hmc_dev.xbar_width,
341 frontend_latency=system.hmc_dev.xbar_frontend_latency,
342 forward_latency=system.hmc_dev.xbar_forward_latency,
343 response_latency=system.hmc_dev.xbar_response_latency )
344 for i in xrange(system.hmc_host.number_mem_crossbar)]
345
346 for i in xrange(system.hmc_dev.number_mem_crossbar):
347 system.hmc_dev.xbar[i].clk_domain = SrcClockDomain(
348 clock=system.hmc_dev.xbar_frequency,voltage_domain=
349 VoltageDomain(voltage='1V'))
350
351 # Attach 4 serial link to 4 crossbar/s
352 for i in xrange(system.hmc_dev.num_serial_links):
353 if system.hmc_dev.enable_link_monitor:
354 system.hmc_host.seriallink[i].master = \
355 system.hmc_dev.lmonitor[i].slave
356 system.hmc_dev.lmonitor[i].master = system.hmc_dev.xbar[i].slave
357 else:
358 system.hmc_host.seriallink[i].master = system.hmc_dev.xbar[i].slave
359
360 # Connecting xbar with each other for request arriving at the wrong xbar,
361 # then it will be forward to correct xbar. Bridge is used to connect xbars
362 if options.arch == "same":
363 numx = len(system.hmc_dev.xbar)
364
365 # create a list of buffers
366 system.hmc_dev.buffers = [ Bridge(
367 req_size=system.hmc_dev.xbar_buffer_size_req,
368 resp_size=system.hmc_dev.xbar_buffer_size_resp)
369 for i in xrange(numx * (system.hmc_dev.mem_chunk - 1))]
370
371 # Buffer iterator
372 it = iter(range(len(system.hmc_dev.buffers)))
373
374 # necesarry to add system_port to one of the xbar
375 system.system_port = system.hmc_dev.xbar[3].slave
376
377 # iterate over all the crossbars and connect them as required
378 for i in range(numx):
379 for j in range(numx):
380 # connect xbar to all other xbars except itself
381 if i != j:
382 # get the next index of buffer
383 index = it.next()
384
385 # Change the default values for ranges of bridge
386 system.hmc_dev.buffers[index].ranges = system.mem_ranges[
387 j * int(system.hmc_dev.mem_chunk):
388 (j + 1) * int(system.hmc_dev.mem_chunk)]
389
390 # Connect the bridge between corssbars
391 system.hmc_dev.xbar[i].master = system.hmc_dev.buffers[
392 index].slave
393 system.hmc_dev.buffers[
394 index].master = system.hmc_dev.xbar[j].slave
395 else:
396 # Don't connect the xbar to itself
397 pass
398
399 # Two crossbars are connected to all other crossbars-Other 2 vault
400 # can only direct traffic to it local vaults
401 if options.arch == "mixed":
402
403 system.hmc_dev.buffer30 = Bridge(ranges=system.mem_ranges[0:4])
404 system.hmc_dev.xbar[3].master = system.hmc_dev.buffer30.slave
405 system.hmc_dev.buffer30.master = system.hmc_dev.xbar[0].slave
406
407 system.hmc_dev.buffer31 = Bridge(ranges=system.mem_ranges[4:8])
408 system.hmc_dev.xbar[3].master = system.hmc_dev.buffer31.slave
409 system.hmc_dev.buffer31.master = system.hmc_dev.xbar[1].slave
410
411 system.hmc_dev.buffer32 = Bridge(ranges=system.mem_ranges[8:12])
412 system.hmc_dev.xbar[3].master = system.hmc_dev.buffer32.slave
413 system.hmc_dev.buffer32.master = system.hmc_dev.xbar[2].slave
414
415
416 system.hmc_dev.buffer20 = Bridge(ranges=system.mem_ranges[0:4])
417 system.hmc_dev.xbar[2].master = system.hmc_dev.buffer20.slave
418 system.hmc_dev.buffer20.master = system.hmc_dev.xbar[0].slave
419
420 system.hmc_dev.buffer21 = Bridge(ranges=system.mem_ranges[4:8])
421 system.hmc_dev.xbar[2].master = system.hmc_dev.buffer21.slave
422 system.hmc_dev.buffer21.master = system.hmc_dev.xbar[1].slave
423
424 system.hmc_dev.buffer23 = Bridge(ranges=system.mem_ranges[12:16])
425 system.hmc_dev.xbar[2].master = system.hmc_dev.buffer23.slave
426 system.hmc_dev.buffer23.master = system.hmc_dev.xbar[3].slave
427