1 /*****************************************************************************
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution;
14 * neither the name of the copyright holders nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
30 ***************************************************************************/
40 #include "globalvar.h"
41 #include "parameter.h"
45 ArrayST::ArrayST(const InputParameter
*configure_interface
,
47 enum Device_ty device_ty_
,
49 enum Core_type core_ty_
,
51 :l_ip(*configure_interface
),
53 device_ty(device_ty_
),
54 opt_local(opt_local_
),
56 is_default(_is_default
)
59 if (l_ip
.cache_sz
<64) l_ip
.cache_sz
=64;
60 l_ip
.error_checking();//not only do the error checking but also fill some missing parameters
66 void ArrayST::compute_base_power()
68 //l_ip.out_w =l_ip.line_sz*8;
69 local_result
=cacti_interface(&l_ip
);
73 void ArrayST::optimize_array()
75 list
<uca_org_t
> candidate_solutions(0);
76 list
<uca_org_t
>::iterator candidate_iter
, min_dynamic_energy_iter
;
78 uca_org_t
* temp_res
= 0;
79 local_result
.valid
=false;
81 double throughput
=l_ip
.throughput
, latency
=l_ip
.latency
;
82 double area_efficiency_threshold
= 20.0;
83 bool throughput_overflow
=true, latency_overflow
=true;
86 if ((local_result
.cycle_time
- throughput
) <= 1e-10 )
87 throughput_overflow
=false;
88 if ((local_result
.access_time
- latency
)<= 1e-10)
89 latency_overflow
=false;
91 if (opt_for_clk
&& opt_local
)
93 if (throughput_overflow
|| latency_overflow
)
97 l_ip
.delay_wt
= 100;//Fixed number, make sure timing can be satisfied.
98 l_ip
.cycle_time_wt
= 1000;
100 l_ip
.area_wt
= 10;//Fixed number, This is used to exhaustive search for individual components.
101 l_ip
.dynamic_power_wt
= 10;//Fixed number, This is used to exhaustive search for individual components.
102 l_ip
.leakage_power_wt
= 10;
104 l_ip
.delay_dev
= 1000000;//Fixed number, make sure timing can be satisfied.
105 l_ip
.cycle_time_dev
= 100;
107 l_ip
.area_dev
= 1000000;//Fixed number, This is used to exhaustive search for individual components.
108 l_ip
.dynamic_power_dev
= 1000000;//Fixed number, This is used to exhaustive search for individual components.
109 l_ip
.leakage_power_dev
= 1000000;
111 throughput_overflow
=true; //Reset overflow flag before start optimization iterations
112 latency_overflow
=true;
114 temp_res
= &local_result
; //Clean up the result for optimized for ED^2P
119 while ((throughput_overflow
|| latency_overflow
)&&l_ip
.cycle_time_dev
> 10)// && l_ip.delay_dev > 10
121 compute_base_power();
123 l_ip
.cycle_time_dev
-=10;//This is the time_dev to be used for next iteration
125 // from best area to worst area -->worst timing to best timing
126 if ((((local_result
.cycle_time
- throughput
) <= 1e-10 ) && (local_result
.access_time
- latency
)<= 1e-10)||
127 (local_result
.data_array2
->area_efficiency
< area_efficiency_threshold
&& l_ip
.assoc
== 0))
128 { //if no satisfiable solution is found,the most aggressive one is left
129 candidate_solutions
.push_back(local_result
);
130 //output_data_csv(candidate_solutions.back());
131 if (((local_result
.cycle_time
- throughput
) <= 1e-10) && ((local_result
.access_time
- latency
)<= 1e-10))
132 //ensure stop opt not because of cam
134 throughput_overflow
=false;
135 latency_overflow
=false;
141 //TODO: whether checking the partial satisfied results too, or just change the mark???
142 if ((local_result
.cycle_time
- throughput
) <= 1e-10)
143 throughput_overflow
=false;
144 if ((local_result
.access_time
- latency
)<= 1e-10)
145 latency_overflow
=false;
147 if (l_ip
.cycle_time_dev
> 10)
148 { //if not >10 local_result is the last result, it cannot be cleaned up
149 temp_res
= &local_result
; //Only solutions not saved in the list need to be cleaned up
153 // l_ip.cycle_time_dev-=10;
154 // l_ip.delay_dev-=10;
161 //For array structures except CAM and FA, Give warning but still provide a result with best timing found
162 if (throughput_overflow
==true)
163 cout
<< "Warning: " << name
<<" array structure cannot satisfy throughput constraint." << endl
;
164 if (latency_overflow
==true)
165 cout
<< "Warning: " << name
<<" array structure cannot satisfy latency constraint." << endl
;
170 // /*According to "Content-Addressable Memory (CAM) Circuits and
171 // Architectures": A Tutorial and Survey
172 // by Kostas Pagiamtzis et al.
173 // CAM structures can be heavily pipelined and use look-ahead techniques,
174 // therefore timing can be relaxed. But McPAT does not model the advanced
175 // techniques. If continue optimizing, the area efficiency will be too low
177 // //For CAM and FA, stop opt if area efficiency is too low
178 // if (throughput_overflow==true)
179 // cout<< "Warning: " <<" McPAT stopped optimization on throughput for "<< name
180 // <<" array structure because its area efficiency is below "<<area_efficiency_threshold<<"% " << endl;
181 // if (latency_overflow==true)
182 // cout<< "Warning: " <<" McPAT stopped optimization on latency for "<< name
183 // <<" array structure because its area efficiency is below "<<area_efficiency_threshold<<"% " << endl;
186 //double min_dynamic_energy, min_dynamic_power, min_leakage_power, min_cycle_time;
187 double min_dynamic_energy
=BIGNUM
;
188 if (candidate_solutions
.empty()==false)
190 local_result
.valid
=true;
191 for (candidate_iter
= candidate_solutions
.begin(); candidate_iter
!= candidate_solutions
.end(); ++candidate_iter
)
194 if (min_dynamic_energy
> (candidate_iter
)->power
.readOp
.dynamic
)
196 min_dynamic_energy
= (candidate_iter
)->power
.readOp
.dynamic
;
197 min_dynamic_energy_iter
= candidate_iter
;
198 local_result
= *(min_dynamic_energy_iter
);
199 //TODO: since results are reordered results and l_ip may miss match. Therefore, the final output spread sheets may show the miss match.
204 candidate_iter
->cleanup() ;
211 candidate_solutions
.clear();
214 double long_channel_device_reduction
= longer_channel_device_reduction(device_ty
,core_ty
);
216 double macro_layout_overhead
= g_tp
.macro_layout_overhead
;
217 double chip_PR_overhead
= g_tp
.chip_layout_overhead
;
218 double total_overhead
= macro_layout_overhead
*chip_PR_overhead
;
219 local_result
.area
*= total_overhead
;
221 //maintain constant power density
222 double pppm_t
[4] = {total_overhead
,1,1,total_overhead
};
224 double sckRation
= g_tp
.sckt_co_eff
;
225 local_result
.power
.readOp
.dynamic
*= sckRation
;
226 local_result
.power
.writeOp
.dynamic
*= sckRation
;
227 local_result
.power
.searchOp
.dynamic
*= sckRation
;
228 local_result
.power
.readOp
.leakage
*= l_ip
.nbanks
;
229 local_result
.power
.readOp
.longer_channel_leakage
=
230 local_result
.power
.readOp
.leakage
*long_channel_device_reduction
;
231 local_result
.power
= local_result
.power
* pppm_t
;
233 local_result
.data_array2
->power
.readOp
.dynamic
*= sckRation
;
234 local_result
.data_array2
->power
.writeOp
.dynamic
*= sckRation
;
235 local_result
.data_array2
->power
.searchOp
.dynamic
*= sckRation
;
236 local_result
.data_array2
->power
.readOp
.leakage
*= l_ip
.nbanks
;
237 local_result
.data_array2
->power
.readOp
.longer_channel_leakage
=
238 local_result
.data_array2
->power
.readOp
.leakage
*long_channel_device_reduction
;
239 local_result
.data_array2
->power
= local_result
.data_array2
->power
* pppm_t
;
242 if (!(l_ip
.pure_cam
|| l_ip
.pure_ram
|| l_ip
.fully_assoc
) && l_ip
.is_cache
)
244 local_result
.tag_array2
->power
.readOp
.dynamic
*= sckRation
;
245 local_result
.tag_array2
->power
.writeOp
.dynamic
*= sckRation
;
246 local_result
.tag_array2
->power
.searchOp
.dynamic
*= sckRation
;
247 local_result
.tag_array2
->power
.readOp
.leakage
*= l_ip
.nbanks
;
248 local_result
.tag_array2
->power
.readOp
.longer_channel_leakage
=
249 local_result
.tag_array2
->power
.readOp
.leakage
*long_channel_device_reduction
;
250 local_result
.tag_array2
->power
= local_result
.tag_array2
->power
* pppm_t
;
256 void ArrayST::leakage_feedback(double temperature
)
258 // Update the temperature. l_ip is already set and error-checked in the creator function.
259 l_ip
.temp
= (unsigned int)round(temperature
/10.0)*10;
261 // This corresponds to cacti_interface() in the initialization process. Leakage power is updated here.
262 reconfigure(&l_ip
,&local_result
);
264 // Scale the power values. This is part of ArrayST::optimize_array().
265 double long_channel_device_reduction
= longer_channel_device_reduction(device_ty
,core_ty
);
267 double macro_layout_overhead
= g_tp
.macro_layout_overhead
;
268 double chip_PR_overhead
= g_tp
.chip_layout_overhead
;
269 double total_overhead
= macro_layout_overhead
*chip_PR_overhead
;
271 double pppm_t
[4] = {total_overhead
,1,1,total_overhead
};
273 double sckRation
= g_tp
.sckt_co_eff
;
274 local_result
.power
.readOp
.dynamic
*= sckRation
;
275 local_result
.power
.writeOp
.dynamic
*= sckRation
;
276 local_result
.power
.searchOp
.dynamic
*= sckRation
;
277 local_result
.power
.readOp
.leakage
*= l_ip
.nbanks
;
278 local_result
.power
.readOp
.longer_channel_leakage
= local_result
.power
.readOp
.leakage
*long_channel_device_reduction
;
279 local_result
.power
= local_result
.power
* pppm_t
;
281 local_result
.data_array2
->power
.readOp
.dynamic
*= sckRation
;
282 local_result
.data_array2
->power
.writeOp
.dynamic
*= sckRation
;
283 local_result
.data_array2
->power
.searchOp
.dynamic
*= sckRation
;
284 local_result
.data_array2
->power
.readOp
.leakage
*= l_ip
.nbanks
;
285 local_result
.data_array2
->power
.readOp
.longer_channel_leakage
= local_result
.data_array2
->power
.readOp
.leakage
*long_channel_device_reduction
;
286 local_result
.data_array2
->power
= local_result
.data_array2
->power
* pppm_t
;
288 if (!(l_ip
.pure_cam
|| l_ip
.pure_ram
|| l_ip
.fully_assoc
) && l_ip
.is_cache
)
290 local_result
.tag_array2
->power
.readOp
.dynamic
*= sckRation
;
291 local_result
.tag_array2
->power
.writeOp
.dynamic
*= sckRation
;
292 local_result
.tag_array2
->power
.searchOp
.dynamic
*= sckRation
;
293 local_result
.tag_array2
->power
.readOp
.leakage
*= l_ip
.nbanks
;
294 local_result
.tag_array2
->power
.readOp
.longer_channel_leakage
= local_result
.tag_array2
->power
.readOp
.leakage
*long_channel_device_reduction
;
295 local_result
.tag_array2
->power
= local_result
.tag_array2
->power
* pppm_t
;
301 local_result
.cleanup();