1 # Copyright (c) 2012-2013 ARM Limited
4 # The license below extends only to copyright in the software and shall
5 # not be construed as granting a license to any other intellectual
6 # property including but not limited to intellectual property relating
7 # to a hardware implementation of the functionality of the software
8 # licensed hereunder. You may use the software subject to the license
9 # terms below provided that you ensure that this notice is replicated
10 # unmodified and in its entirety in all distributions of the software,
11 # modified or unmodified, in source code or in binary form.
13 # Copyright (c) 2015 The University of Bologna
14 # All rights reserved.
16 # Redistribution and use in source and binary forms, with or without
17 # modification, are permitted provided that the following conditions are
18 # met: redistributions of source code must retain the above copyright
19 # notice, this list of conditions and the following disclaimer;
20 # redistributions in binary form must reproduce the above copyright
21 # notice, this list of conditions and the following disclaimer in the
22 # documentation and/or other materials provided with the distribution;
23 # neither the name of the copyright holders nor the names of its
24 # contributors may be used to endorse or promote products derived from
25 # this software without specific prior written permission.
27 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 # Authors: Erfan Azarkhish
42 # A Simplified model of a complete HMC device. Based on:
43 # [1] http://www.hybridmemorycube.org/specification-download/
44 # [2] High performance AXI-4.0 based interconnect for extensible smart memory
45 # cubes(E. Azarkhish et. al)
46 # [3] Low-Power Hybrid Memory Cubes With Link Power Management and Two-Level
47 # Prefetching (J. Ahn et. al)
48 # [4] Memory-centric system interconnect design with Hybrid Memory Cubes
50 # [5] Near Data Processing, Are we there yet? (M. Gokhale)
51 # http://www.cs.utah.edu/wondp/gokhale.pdf
52 # [6] openHMC - A Configurable Open-Source Hybrid Memory Cube Controller
54 # [7] Hybrid Memory Cube performance characterization on data-centric
55 # workloads (M. Gokhale)
57 # This script builds a complete HMC device composed of vault controllers,
58 # serial links, the main internal crossbar, and an external hmc controller.
60 # - VAULT CONTROLLERS:
61 # Instances of the HMC_2500_x32 class with their functionality specified in
65 # This component is simply an instance of the NoncoherentXBar class, and its
66 # parameters are tuned to [2].
68 # - SERIAL LINKS CONTROLLER:
69 # SerialLink is a simple variation of the Bridge class, with the ability to
70 # account for the latency of packet serialization and controller latency. We
71 # assume that the serializer component at the transmitter side does not need
72 # to receive the whole packet to start the serialization. But the
73 # deserializer waits for the complete packet to check its integrity first.
75 # * Bandwidth of the serial links is not modeled in the SerialLink component
78 # * Latency of serial link controller is composed of SerDes latency + link
81 # * It is inferred from the standard [1] and the literature [3] that serial
82 # links share the same address range and packets can travel over any of
83 # them so a load distribution mechanism is required among them.
85 # -----------------------------------------
86 # | Host/HMC Controller |
87 # | ---------------------- |
88 # | | Link Aggregator | opt |
89 # | ---------------------- |
90 # | ---------------------- |
91 # | | Serial Link + Ser | * 4 |
92 # | ---------------------- |
93 # |---------------------------------------
94 # -----------------------------------------
96 # | ---------------------- |
98 # | ---------------------- |
99 # | ---------------------- |
100 # | | Vault Controller | * 16 |
101 # | ---------------------- |
102 # | ---------------------- |
104 # | ---------------------- |
105 # |---------------------------------------|
107 # In this version we have present 3 different HMC archiecture along with
108 # alongwith their corresponding test script.
110 # same: It has 4 crossbars in HMC memory. All the crossbars are connected
111 # to each other, providing complete memory range. This archicture also covers
112 # the added latency for sending a request to non-local vault(bridge in b/t
113 # crossbars). All the 4 serial links can access complete memory. So each
114 # link can be connected to separate processor.
116 # distributed: It has 4 crossbars inside the HMC. Crossbars are not
117 # connected.Through each crossbar only local vaults can be accessed. But to
118 # support this architecture we need a crossbar between serial links and
121 # mixed: This is a hybrid architecture. It has 4 crossbars inside the HMC.
122 # 2 Crossbars are connected to only local vaults. From other 2 crossbar, a
123 # request can be forwarded to any other vault.
128 from m5
.objects
import *
130 # A single Hybrid Memory Cube (HMC)
131 class HMCSystem(SubSystem
):
132 #*****************************CROSSBAR PARAMETERS*************************
133 # Flit size of the main interconnect [1]
134 xbar_width
= Param
.Unsigned(32, "Data width of the main XBar (Bytes)")
136 # Clock frequency of the main interconnect [1]
137 # This crossbar, is placed on the logic-based of the HMC and it has its
138 # own voltage and clock domains, different from the DRAM dies or from the
140 xbar_frequency
= Param
.Frequency('1GHz', "Clock Frequency of the main "
143 # Arbitration latency of the HMC XBar [1]
144 xbar_frontend_latency
= Param
.Cycles(1, "Arbitration latency of the XBar")
146 # Latency to forward a packet via the interconnect [1](two levels of FIFOs
147 # at the input and output of the inteconnect)
148 xbar_forward_latency
= Param
.Cycles(2, "Forward latency of the XBar")
150 # Latency to forward a response via the interconnect [1](two levels of
151 # FIFOs at the input and output of the inteconnect)
152 xbar_response_latency
= Param
.Cycles(2, "Response latency of the XBar")
154 # number of cross which connects 16 Vaults to serial link[7]
155 number_mem_crossbar
= Param
.Unsigned(4, "Number of crossbar in HMC"
158 #*****************************SERIAL LINK PARAMETERS***********************
159 # Number of serial links controllers [1]
160 num_links_controllers
= Param
.Unsigned(4, "Number of serial links")
162 # Number of packets (not flits) to store at the request side of the serial
163 # link. This number should be adjusted to achive required bandwidth
164 link_buffer_size_req
= Param
.Unsigned(10, "Number of packets to buffer "
165 "at the request side of the serial link")
167 # Number of packets (not flits) to store at the response side of the serial
168 # link. This number should be adjusted to achive required bandwidth
169 link_buffer_size_rsp
= Param
.Unsigned(10, "Number of packets to buffer "
170 "at the response side of the serial link")
172 # Latency of the serial link composed by SER/DES latency (1.6ns [4]) plus
173 # the PCB trace latency (3ns Estimated based on [5])
174 link_latency
= Param
.Latency('4.6ns', "Latency of the serial links")
176 # Clock frequency of the each serial link(SerDes) [1]
177 link_frequency
= Param
.Frequency('10GHz', "Clock Frequency of the serial"
180 # Clock frequency of serial link Controller[6]
181 # clk_hmc[Mhz]= num_lanes_per_link * lane_speed [Gbits/s] /
182 # data_path_width * 10^6
183 # clk_hmc[Mhz]= 16 * 10 Gbps / 256 * 10^6 = 625 Mhz
184 link_controller_frequency
= Param
.Frequency('625MHz',
185 "Clock Frequency of the link controller")
187 # Latency of the serial link controller to process the packets[1][6]
188 # (ClockDomain = 625 Mhz )
189 # used here for calculations only
190 link_ctrl_latency
= Param
.Cycles(4, "The number of cycles required for the"
191 "controller to process the packet")
193 # total_ctrl_latency = link_ctrl_latency + link_latency
194 # total_ctrl_latency = 4(Cycles) * 1.6 ns + 4.6 ns
195 total_ctrl_latency
= Param
.Latency('11ns', "The latency experienced by"
196 "every packet regardless of size of packet")
198 # Number of parallel lanes in each serial link [1]
199 num_lanes_per_link
= Param
.Unsigned( 16, "Number of lanes per each link")
201 # Number of serial links [1]
202 num_serial_links
= Param
.Unsigned(4, "Number of serial links")
204 # speed of each lane of serial link - SerDes serial interface 10 Gb/s
205 serial_link_speed
= Param
.UInt64(10, "Gbs/s speed of each lane of"
208 #*****************************PERFORMANCE MONITORING************************
209 # The main monitor behind the HMC Controller
210 enable_global_monitor
= Param
.Bool(False, "The main monitor behind the "
213 # The link performance monitors
214 enable_link_monitor
= Param
.Bool(False, "The link monitors" )
216 # link aggregator enable - put a cross between buffers & links
217 enable_link_aggr
= Param
.Bool(False, "The crossbar between port and "
220 enable_buff_div
= Param
.Bool(True, "Memory Range of Buffer is"
221 "divided between total range")
223 #*****************************HMC ARCHITECTURE ************************
224 # Memory chunk for 16 vault - numbers of vault / number of crossbars
225 mem_chunk
= Param
.Unsigned(4, "Chunk of memory range for each cross bar "
228 # size of req buffer within crossbar, used for modelling extra latency
229 # when the reuqest go to non-local vault
230 xbar_buffer_size_req
= Param
.Unsigned(10, "Number of packets to buffer "
231 "at the request side of the crossbar")
233 # size of response buffer within crossbar, used for modelling extra latency
234 # when the response received from non-local vault
235 xbar_buffer_size_resp
= Param
.Unsigned(10, "Number of packets to buffer "
236 "at the response side of the crossbar")
238 # configure host system with Serial Links
239 def config_host_hmc(options
, system
):
241 system
.hmc_host
=HMCSystem()
244 system
.hmc_host
.enable_global_monitor
= options
.enable_global_monitor
249 system
.hmc_host
.enable_link_monitor
= options
.enable_link_monitor
253 # Serial link Controller with 16 SerDes links at 10 Gbps
254 # with serial link ranges w.r.t to architecture
255 system
.hmc_host
.seriallink
= [SerialLink(ranges
= options
.ser_ranges
[i
],
256 req_size
=system
.hmc_host
.link_buffer_size_req
,
257 resp_size
=system
.hmc_host
.link_buffer_size_rsp
,
258 num_lanes
=system
.hmc_host
.num_lanes_per_link
,
259 link_speed
=system
.hmc_host
.serial_link_speed
,
260 delay
=system
.hmc_host
.total_ctrl_latency
)
261 for i
in xrange(system
.hmc_host
.num_serial_links
)]
263 # enable global monitor
264 if system
.hmc_host
.enable_global_monitor
:
265 system
.hmc_host
.lmonitor
= [ CommMonitor()
266 for i
in xrange(system
.hmc_host
.num_serial_links
)]
268 # set the clock frequency for serial link
269 for i
in xrange(system
.hmc_host
.num_serial_links
):
270 system
.hmc_host
.seriallink
[i
].clk_domain
= SrcClockDomain(clock
=system
.
271 hmc_host
.link_controller_frequency
, voltage_domain
=
272 VoltageDomain(voltage
= '1V'))
274 # Connect membus/traffic gen to Serial Link Controller for differrent HMC
276 if options
.arch
== "distributed":
277 for i
in xrange(system
.hmc_host
.num_links_controllers
):
278 if system
.hmc_host
.enable_global_monitor
:
279 system
.membus
.master
= system
.hmc_host
.lmonitor
[i
].slave
280 system
.hmc_host
.lmonitor
[i
].master
= \
281 system
.hmc_host
.seriallink
[i
].slave
283 system
.membus
.master
= system
.hmc_host
.seriallink
[i
].slave
284 if options
.arch
== "mixed":
285 if system
.hmc_host
.enable_global_monitor
:
286 system
.membus
.master
= system
.hmc_host
.lmonitor
[0].slave
287 system
.hmc_host
.lmonitor
[0].master
= \
288 system
.hmc_host
.seriallink
[0].slave
290 system
.membus
.master
= system
.hmc_host
.lmonitor
[1].slave
291 system
.hmc_host
.lmonitor
[1].master
= \
292 system
.hmc_host
.seriallink
[1].slave
294 system
.tgen
[2].port
= system
.hmc_host
.lmonitor
[2].slave
295 system
.hmc_host
.lmonitor
[2].master
= \
296 system
.hmc_host
.seriallink
[2].slave
298 system
.tgen
[3].port
= system
.hmc_host
.lmonitor
[3].slave
299 system
.hmc_host
.lmonitor
[3].master
= \
300 system
.hmc_host
.seriallink
[3].slave
302 system
.membus
.master
= system
.hmc_host
.seriallink
[0].slave
303 system
.membus
.master
= system
.hmc_host
.seriallink
[1].slave
304 system
.tgen
[2].port
= system
.hmc_host
.seriallink
[2].slave
305 system
.tgen
[3].port
= system
.hmc_host
.seriallink
[3].slave
306 if options
.arch
== "same" :
307 for i
in xrange(system
.hmc_host
.num_links_controllers
):
308 if system
.hmc_host
.enable_global_monitor
:
309 system
.tgen
[i
].port
= system
.hmc_host
.lmonitor
[i
].slave
310 system
.hmc_host
.lmonitor
[i
].master
= \
311 system
.hmc_host
.seriallink
[i
].slave
313 system
.tgen
[i
].port
= system
.hmc_host
.seriallink
[i
].slave
317 # Create an HMC device and attach it to the current system
318 def config_hmc(options
, system
, hmc_host
):
321 system
.hmc_dev
= HMCSystem()
325 system
.hmc_dev
.enable_global_monitor
= options
.enable_global_monitor
330 system
.hmc_dev
.enable_link_monitor
= options
.enable_link_monitor
335 if system
.hmc_dev
.enable_link_monitor
:
336 system
.hmc_dev
.lmonitor
= [ CommMonitor()
337 for i
in xrange(system
.hmc_dev
.num_links_controllers
)]
339 # 4 HMC Crossbars located in its logic-base (LoB)
340 system
.hmc_dev
.xbar
= [ NoncoherentXBar(width
=system
.hmc_dev
.xbar_width
,
341 frontend_latency
=system
.hmc_dev
.xbar_frontend_latency
,
342 forward_latency
=system
.hmc_dev
.xbar_forward_latency
,
343 response_latency
=system
.hmc_dev
.xbar_response_latency
)
344 for i
in xrange(system
.hmc_host
.number_mem_crossbar
)]
346 for i
in xrange(system
.hmc_dev
.number_mem_crossbar
):
347 system
.hmc_dev
.xbar
[i
].clk_domain
= SrcClockDomain(
348 clock
=system
.hmc_dev
.xbar_frequency
,voltage_domain
=
349 VoltageDomain(voltage
='1V'))
351 # Attach 4 serial link to 4 crossbar/s
352 for i
in xrange(system
.hmc_dev
.num_serial_links
):
353 if system
.hmc_dev
.enable_link_monitor
:
354 system
.hmc_host
.seriallink
[i
].master
= \
355 system
.hmc_dev
.lmonitor
[i
].slave
356 system
.hmc_dev
.lmonitor
[i
].master
= system
.hmc_dev
.xbar
[i
].slave
358 system
.hmc_host
.seriallink
[i
].master
= system
.hmc_dev
.xbar
[i
].slave
360 # Connecting xbar with each other for request arriving at the wrong xbar,
361 # then it will be forward to correct xbar. Bridge is used to connect xbars
362 if options
.arch
== "same":
363 numx
= len(system
.hmc_dev
.xbar
)
365 # create a list of buffers
366 system
.hmc_dev
.buffers
= [ Bridge(
367 req_size
=system
.hmc_dev
.xbar_buffer_size_req
,
368 resp_size
=system
.hmc_dev
.xbar_buffer_size_resp
)
369 for i
in xrange(numx
* (system
.hmc_dev
.mem_chunk
- 1))]
372 it
= iter(range(len(system
.hmc_dev
.buffers
)))
374 # necesarry to add system_port to one of the xbar
375 system
.system_port
= system
.hmc_dev
.xbar
[3].slave
377 # iterate over all the crossbars and connect them as required
378 for i
in range(numx
):
379 for j
in range(numx
):
380 # connect xbar to all other xbars except itself
382 # get the next index of buffer
385 # Change the default values for ranges of bridge
386 system
.hmc_dev
.buffers
[index
].ranges
= system
.mem_ranges
[
387 j
* int(system
.hmc_dev
.mem_chunk
):
388 (j
+ 1) * int(system
.hmc_dev
.mem_chunk
)]
390 # Connect the bridge between corssbars
391 system
.hmc_dev
.xbar
[i
].master
= system
.hmc_dev
.buffers
[
393 system
.hmc_dev
.buffers
[
394 index
].master
= system
.hmc_dev
.xbar
[j
].slave
396 # Don't connect the xbar to itself
399 # Two crossbars are connected to all other crossbars-Other 2 vault
400 # can only direct traffic to it local vaults
401 if options
.arch
== "mixed":
403 system
.hmc_dev
.buffer30
= Bridge(ranges
=system
.mem_ranges
[0:4])
404 system
.hmc_dev
.xbar
[3].master
= system
.hmc_dev
.buffer30
.slave
405 system
.hmc_dev
.buffer30
.master
= system
.hmc_dev
.xbar
[0].slave
407 system
.hmc_dev
.buffer31
= Bridge(ranges
=system
.mem_ranges
[4:8])
408 system
.hmc_dev
.xbar
[3].master
= system
.hmc_dev
.buffer31
.slave
409 system
.hmc_dev
.buffer31
.master
= system
.hmc_dev
.xbar
[1].slave
411 system
.hmc_dev
.buffer32
= Bridge(ranges
=system
.mem_ranges
[8:12])
412 system
.hmc_dev
.xbar
[3].master
= system
.hmc_dev
.buffer32
.slave
413 system
.hmc_dev
.buffer32
.master
= system
.hmc_dev
.xbar
[2].slave
416 system
.hmc_dev
.buffer20
= Bridge(ranges
=system
.mem_ranges
[0:4])
417 system
.hmc_dev
.xbar
[2].master
= system
.hmc_dev
.buffer20
.slave
418 system
.hmc_dev
.buffer20
.master
= system
.hmc_dev
.xbar
[0].slave
420 system
.hmc_dev
.buffer21
= Bridge(ranges
=system
.mem_ranges
[4:8])
421 system
.hmc_dev
.xbar
[2].master
= system
.hmc_dev
.buffer21
.slave
422 system
.hmc_dev
.buffer21
.master
= system
.hmc_dev
.xbar
[1].slave
424 system
.hmc_dev
.buffer23
= Bridge(ranges
=system
.mem_ranges
[12:16])
425 system
.hmc_dev
.xbar
[2].master
= system
.hmc_dev
.buffer23
.slave
426 system
.hmc_dev
.buffer23
.master
= system
.hmc_dev
.xbar
[3].slave