1 /*****************************************************************************
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 ***************************************************************************/
40 #include "parameter.h"
44 double ArrayST::area_efficiency_threshold
= 20.0;
46 //Fixed number, make sure timing can be satisfied.
47 int ArrayST::delay_wt
= 100;
48 int ArrayST::cycle_time_wt
= 1000;
49 //Fixed number, This is used to exhaustive search for individual components.
50 int ArrayST::area_wt
= 10;
51 //Fixed number, This is used to exhaustive search for individual components.
52 int ArrayST::dynamic_power_wt
= 10;
53 int ArrayST::leakage_power_wt
= 10;
54 //Fixed number, make sure timing can be satisfied.
55 int ArrayST::delay_dev
= 1000000;
56 int ArrayST::cycle_time_dev
= 100;
57 //Fixed number, This is used to exhaustive search for individual components.
58 int ArrayST::area_dev
= 1000000;
59 //Fixed number, This is used to exhaustive search for individual components.
60 int ArrayST::dynamic_power_dev
= 1000000;
61 int ArrayST::leakage_power_dev
= 1000000;
62 int ArrayST::cycle_time_dev_threshold
= 10;
65 ArrayST::ArrayST(XMLNode
* _xml_data
,
66 const InputParameter
*configure_interface
, string _name
,
67 enum Device_ty device_ty_
, double _clockRate
,
68 bool opt_local_
, enum Core_type core_ty_
, bool _is_default
)
69 : McPATComponent(_xml_data
), l_ip(*configure_interface
),
70 device_ty(device_ty_
), opt_local(opt_local_
), core_ty(core_ty_
),
71 is_default(_is_default
) {
73 clockRate
= _clockRate
;
74 if (l_ip
.cache_sz
< MIN_BUFFER_SIZE
)
75 l_ip
.cache_sz
= MIN_BUFFER_SIZE
;
77 if (!l_ip
.error_checking(name
)) {
87 void ArrayST::compute_base_power() {
88 local_result
= cacti_interface(&l_ip
);
91 void ArrayST::computeArea() {
92 area
.set_area(local_result
.area
);
93 output_data
.area
= local_result
.area
/ 1e6
;
96 void ArrayST::computeEnergy() {
97 list
<uca_org_t
> candidate_solutions(0);
98 list
<uca_org_t
>::iterator candidate_iter
, min_dynamic_energy_iter
;
100 uca_org_t
* temp_res
= NULL
;
101 local_result
.valid
= false;
103 double throughput
= l_ip
.throughput
;
104 double latency
= l_ip
.latency
;
105 bool throughput_overflow
= true;
106 bool latency_overflow
= true;
107 compute_base_power();
109 if ((local_result
.cycle_time
- throughput
) <= 1e-10 )
110 throughput_overflow
= false;
111 if ((local_result
.access_time
- latency
) <= 1e-10)
112 latency_overflow
= false;
114 if (opt_for_clk
&& opt_local
) {
115 if (throughput_overflow
|| latency_overflow
) {
118 l_ip
.delay_wt
= delay_wt
;
119 l_ip
.cycle_time_wt
= cycle_time_wt
;
121 l_ip
.area_wt
= area_wt
;
122 l_ip
.dynamic_power_wt
= dynamic_power_wt
;
123 l_ip
.leakage_power_wt
= leakage_power_wt
;
125 l_ip
.delay_dev
= delay_dev
;
126 l_ip
.cycle_time_dev
= cycle_time_dev
;
128 l_ip
.area_dev
= area_dev
;
129 l_ip
.dynamic_power_dev
= dynamic_power_dev
;
130 l_ip
.leakage_power_dev
= leakage_power_dev
;
132 //Reset overflow flag before start optimization iterations
133 throughput_overflow
= true;
134 latency_overflow
= true;
136 //Clean up the result for optimized for ED^2P
137 temp_res
= &local_result
;
142 while ((throughput_overflow
|| latency_overflow
) &&
143 l_ip
.cycle_time_dev
> cycle_time_dev_threshold
) {
144 compute_base_power();
146 //This is the time_dev to be used for next iteration
147 l_ip
.cycle_time_dev
-= cycle_time_dev_threshold
;
149 // from best area to worst area -->worst timing to best timing
150 if ((((local_result
.cycle_time
- throughput
) <= 1e-10 ) &&
151 (local_result
.access_time
- latency
) <= 1e-10) ||
152 (local_result
.data_array2
->area_efficiency
<
153 area_efficiency_threshold
&& l_ip
.assoc
== 0)) {
154 //if no satisfiable solution is found,the most aggressive one
156 candidate_solutions
.push_back(local_result
);
157 if (((local_result
.cycle_time
- throughput
) <= 1e-10) &&
158 ((local_result
.access_time
- latency
) <= 1e-10)) {
159 //ensure stop opt not because of cam
160 throughput_overflow
= false;
161 latency_overflow
= false;
165 if ((local_result
.cycle_time
- throughput
) <= 1e-10)
166 throughput_overflow
= false;
167 if ((local_result
.access_time
- latency
) <= 1e-10)
168 latency_overflow
= false;
170 //if not >10 local_result is the last result, it cannot be
172 if (l_ip
.cycle_time_dev
> cycle_time_dev_threshold
) {
173 //Only solutions not saved in the list need to be
175 temp_res
= &local_result
;
182 if (l_ip
.assoc
> 0) {
183 //For array structures except CAM and FA, Give warning but still
184 //provide a result with best timing found
185 if (throughput_overflow
== true)
186 cout
<< "Warning: " << name
187 << " array structure cannot satisfy throughput constraint."
189 if (latency_overflow
== true)
190 cout
<< "Warning: " << name
191 << " array structure cannot satisfy latency constraint."
195 double min_dynamic_energy
= BIGNUM
;
196 if (candidate_solutions
.empty() == false) {
197 local_result
.valid
= true;
198 for (candidate_iter
= candidate_solutions
.begin();
199 candidate_iter
!= candidate_solutions
.end();
201 if (min_dynamic_energy
>
202 (candidate_iter
)->power
.readOp
.dynamic
) {
204 (candidate_iter
)->power
.readOp
.dynamic
;
205 min_dynamic_energy_iter
= candidate_iter
;
206 local_result
= *(min_dynamic_energy_iter
);
208 candidate_iter
->cleanup() ;
215 candidate_solutions
.clear();
218 double long_channel_device_reduction
=
219 longer_channel_device_reduction(device_ty
, core_ty
);
221 double macro_layout_overhead
= g_tp
.macro_layout_overhead
;
222 double chip_PR_overhead
= g_tp
.chip_layout_overhead
;
223 double total_overhead
= macro_layout_overhead
* chip_PR_overhead
;
224 local_result
.area
*= total_overhead
;
226 //maintain constant power density
227 double pppm_t
[4] = {total_overhead
, 1, 1, total_overhead
};
229 double sckRation
= g_tp
.sckt_co_eff
;
230 local_result
.power
.readOp
.dynamic
*= sckRation
;
231 local_result
.power
.writeOp
.dynamic
*= sckRation
;
232 local_result
.power
.searchOp
.dynamic
*= sckRation
;
233 local_result
.power
.readOp
.leakage
*= l_ip
.nbanks
;
234 local_result
.power
.readOp
.longer_channel_leakage
=
235 local_result
.power
.readOp
.leakage
* long_channel_device_reduction
;
236 local_result
.power
= local_result
.power
* pppm_t
;
238 local_result
.data_array2
->power
.readOp
.dynamic
*= sckRation
;
239 local_result
.data_array2
->power
.writeOp
.dynamic
*= sckRation
;
240 local_result
.data_array2
->power
.searchOp
.dynamic
*= sckRation
;
241 local_result
.data_array2
->power
.readOp
.leakage
*= l_ip
.nbanks
;
242 local_result
.data_array2
->power
.readOp
.longer_channel_leakage
=
243 local_result
.data_array2
->power
.readOp
.leakage
*
244 long_channel_device_reduction
;
245 local_result
.data_array2
->power
= local_result
.data_array2
->power
* pppm_t
;
248 if (!(l_ip
.pure_cam
|| l_ip
.pure_ram
|| l_ip
.fully_assoc
) && l_ip
.is_cache
) {
249 local_result
.tag_array2
->power
.readOp
.dynamic
*= sckRation
;
250 local_result
.tag_array2
->power
.writeOp
.dynamic
*= sckRation
;
251 local_result
.tag_array2
->power
.searchOp
.dynamic
*= sckRation
;
252 local_result
.tag_array2
->power
.readOp
.leakage
*= l_ip
.nbanks
;
253 local_result
.tag_array2
->power
.readOp
.longer_channel_leakage
=
254 local_result
.tag_array2
->power
.readOp
.leakage
*
255 long_channel_device_reduction
;
256 local_result
.tag_array2
->power
=
257 local_result
.tag_array2
->power
* pppm_t
;
260 power
= local_result
.power
;
262 output_data
.peak_dynamic_power
= power
.readOp
.dynamic
* clockRate
;
263 output_data
.subthreshold_leakage_power
= power
.readOp
.leakage
;
264 output_data
.gate_leakage_power
= power
.readOp
.gate_leakage
;
267 void ArrayST::leakage_feedback(double temperature
)
269 // Update the temperature. l_ip is already set and error-checked in the creator function.
270 l_ip
.temp
= (unsigned int)round(temperature
/10.0)*10;
272 // This corresponds to cacti_interface() in the initialization process. Leakage power is updated here.
273 reconfigure(&l_ip
,&local_result
);
275 // Scale the power values. This is part of ArrayST::optimize_array().
276 double long_channel_device_reduction
= longer_channel_device_reduction(device_ty
,core_ty
);
278 double macro_layout_overhead
= g_tp
.macro_layout_overhead
;
279 double chip_PR_overhead
= g_tp
.chip_layout_overhead
;
280 double total_overhead
= macro_layout_overhead
*chip_PR_overhead
;
282 double pppm_t
[4] = {total_overhead
,1,1,total_overhead
};
284 double sckRation
= g_tp
.sckt_co_eff
;
285 local_result
.power
.readOp
.dynamic
*= sckRation
;
286 local_result
.power
.writeOp
.dynamic
*= sckRation
;
287 local_result
.power
.searchOp
.dynamic
*= sckRation
;
288 local_result
.power
.readOp
.leakage
*= l_ip
.nbanks
;
289 local_result
.power
.readOp
.longer_channel_leakage
= local_result
.power
.readOp
.leakage
*long_channel_device_reduction
;
290 local_result
.power
= local_result
.power
* pppm_t
;
292 local_result
.data_array2
->power
.readOp
.dynamic
*= sckRation
;
293 local_result
.data_array2
->power
.writeOp
.dynamic
*= sckRation
;
294 local_result
.data_array2
->power
.searchOp
.dynamic
*= sckRation
;
295 local_result
.data_array2
->power
.readOp
.leakage
*= l_ip
.nbanks
;
296 local_result
.data_array2
->power
.readOp
.longer_channel_leakage
= local_result
.data_array2
->power
.readOp
.leakage
*long_channel_device_reduction
;
297 local_result
.data_array2
->power
= local_result
.data_array2
->power
* pppm_t
;
299 if (!(l_ip
.pure_cam
|| l_ip
.pure_ram
|| l_ip
.fully_assoc
) && l_ip
.is_cache
)
301 local_result
.tag_array2
->power
.readOp
.dynamic
*= sckRation
;
302 local_result
.tag_array2
->power
.writeOp
.dynamic
*= sckRation
;
303 local_result
.tag_array2
->power
.searchOp
.dynamic
*= sckRation
;
304 local_result
.tag_array2
->power
.readOp
.leakage
*= l_ip
.nbanks
;
305 local_result
.tag_array2
->power
.readOp
.longer_channel_leakage
= local_result
.tag_array2
->power
.readOp
.leakage
*long_channel_device_reduction
;
306 local_result
.tag_array2
->power
= local_result
.tag_array2
->power
* pppm_t
;
310 ArrayST::~ArrayST() {
311 local_result
.cleanup();