style: change C/C++ source permissions to noexec
[gem5.git] / ext / mcpat / cacti / uca.cc
1 /*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
6 * All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 ***************************************************************************/
32
33
34
35 #include <cmath>
36 #include <iostream>
37
38 #include "uca.h"
39
40 UCA::UCA(const DynamicParameter & dyn_p)
41 : dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) {
42 int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)
43 / 2 : (_log2(nbanks) - _log2(nbanks) / 2));
44 int num_banks_hor_dir = nbanks / num_banks_ver_dir;
45
46 if (dp.use_inp_params) {
47 RWP = dp.num_rw_ports;
48 ERP = dp.num_rd_ports;
49 EWP = dp.num_wr_ports;
50 SCHP = dp.num_search_ports;
51 } else {
52 RWP = g_ip->num_rw_ports;
53 ERP = g_ip->num_rd_ports;
54 EWP = g_ip->num_wr_ports;
55 SCHP = g_ip->num_search_ports;
56 }
57
58 num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode) *
59 (RWP + ERP + EWP);
60 num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP);
61 num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP);
62 num_si_b_bank = dp.num_si_b_bank_per_port * SCHP;
63 num_so_b_bank = dp.num_so_b_bank_per_port * SCHP;
64
65 if (!dp.fully_assoc && !dp.pure_cam) {
66
67 if (g_ip->fast_access && dp.is_tag == false) {
68 num_do_b_bank *= g_ip->data_assoc;
69 }
70
71 htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
72 num_addr_b_bank, num_di_b_bank, 0,
73 num_do_b_bank, 0, num_banks_ver_dir * 2,
74 num_banks_hor_dir * 2, Add_htree, true);
75 htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
76 num_addr_b_bank, num_di_b_bank, 0,
77 num_do_b_bank, 0, num_banks_ver_dir * 2,
78 num_banks_hor_dir * 2, Data_in_htree, true);
79 htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
80 num_addr_b_bank, num_di_b_bank, 0,
81 num_do_b_bank, 0, num_banks_ver_dir * 2,
82 num_banks_hor_dir * 2, Data_out_htree, true);
83 }
84
85 else {
86
87 htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
88 num_addr_b_bank, num_di_b_bank,
89 num_si_b_bank, num_do_b_bank, num_so_b_bank,
90 num_banks_ver_dir * 2, num_banks_hor_dir * 2,
91 Add_htree, true);
92 htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
93 num_addr_b_bank, num_di_b_bank,
94 num_si_b_bank, num_do_b_bank, num_so_b_bank,
95 num_banks_ver_dir * 2, num_banks_hor_dir * 2,
96 Data_in_htree, true);
97 htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
98 num_addr_b_bank, num_di_b_bank,
99 num_si_b_bank, num_do_b_bank,
100 num_so_b_bank, num_banks_ver_dir * 2,
101 num_banks_hor_dir * 2, Data_out_htree, true);
102 htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
103 num_addr_b_bank, num_di_b_bank,
104 num_si_b_bank, num_do_b_bank,
105 num_so_b_bank, num_banks_ver_dir * 2,
106 num_banks_hor_dir * 2, Data_in_htree, true);
107 htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h,
108 num_addr_b_bank, num_di_b_bank,
109 num_si_b_bank, num_do_b_bank,
110 num_so_b_bank, num_banks_ver_dir * 2,
111 num_banks_hor_dir * 2, Data_out_htree,
112 true);
113 }
114
115 area.w = htree_in_data->area.w;
116 area.h = htree_in_data->area.h;
117
118 area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks;
119 // cout<<"area cell"<<area_all_dataramcells<<endl;
120 // cout<<area.get_area()<<endl;
121 // delay calculation
122 double inrisetime = 0.0;
123 compute_delays(inrisetime);
124 compute_power_energy();
125 }
126
127
128
129 UCA::~UCA() {
130 delete htree_in_add;
131 delete htree_in_data;
132 delete htree_out_data;
133 }
134
135
136
137 double UCA::compute_delays(double inrisetime) {
138 double outrisetime = bank.compute_delays(inrisetime);
139
140 double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay;
141 double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay;
142 delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat +
143 bank.mat.sa_mux_lev_1_predec->delay +
144 bank.mat.sa_mux_lev_1_dec->delay;
145 delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat +
146 bank.mat.sa_mux_lev_2_predec->delay +
147 bank.mat.sa_mux_lev_2_dec->delay;
148 double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa;
149
150 delay_before_subarray_output_driver =
151 MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path
152 delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path
153 MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path
154 delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path
155 delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree +
156 bank.htree_out_data->delay + htree_out_data->delay;
157 access_time = bank.mat.delay_comparator;
158
159 double ram_delay_inside_mat;
160 if (dp.fully_assoc) {
161 //delay of FA contains both CAM tag and RAM data
162 { //delay of CAM
163 ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
164 access_time = htree_in_add->delay + bank.htree_in_add->delay;
165 //delay of fully-associative data array
166 access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out;
167 }
168 } else {
169 access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path
170 }
171
172 if (dp.is_main_mem) {
173 double t_rcd = max_delay_before_row_decoder + delay_inside_mat;
174 double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) +
175 delay_from_subarray_out_drv_to_out;
176 access_time = t_rcd + cas_latency;
177 }
178
179 double temp;
180
181 if (!dp.fully_assoc) {
182 temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit
183 if (dp.is_dram) {
184 temp += bank.mat.delay_writeback; // temp stores random cycle time
185 }
186
187
188 temp = MAX(temp, bank.mat.r_predec->delay);
189 temp = MAX(temp, bank.mat.b_mux_predec->delay);
190 temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
191 temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
192 } else {
193 ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline;
194 temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore
195 + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset;
196
197 temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc.
198 temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay);
199 temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay);
200 }
201
202 // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav
203 if (g_ip->rpters_in_htree == false) {
204 temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay);
205 }
206 cycle_time = temp;
207
208 double delay_req_network = max_delay_before_row_decoder;
209 double delay_rep_network = delay_from_subarray_out_drv_to_out;
210 multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network);
211
212 if (dp.is_main_mem) {
213 multisubbank_interleave_cycle_time = htree_in_add->delay;
214 precharge_delay = htree_in_add->delay +
215 bank.htree_in_add->delay + bank.mat.delay_writeback +
216 bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;
217 cycle_time = access_time + precharge_delay;
218 } else {
219 precharge_delay = 0;
220 }
221
222 double dram_array_availability = 0;
223 if (dp.is_dram) {
224 dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100;
225 }
226
227 return outrisetime;
228 }
229
230
231
232 // note: currently, power numbers are for a bank of an array
233 void UCA::compute_power_energy() {
234 bank.compute_power_energy();
235 power = bank.power;
236
237 power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic;
238 power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic;
239 if (dp.fully_assoc || dp.pure_cam)
240 power_routing_to_bank.searchOp.dynamic =
241 htree_in_search->power.searchOp.dynamic +
242 htree_out_search->power.searchOp.dynamic;
243
244 power_routing_to_bank.readOp.leakage +=
245 htree_in_add->power.readOp.leakage +
246 htree_in_data->power.readOp.leakage +
247 htree_out_data->power.readOp.leakage;
248
249 power_routing_to_bank.readOp.gate_leakage +=
250 htree_in_add->power.readOp.gate_leakage +
251 htree_in_data->power.readOp.gate_leakage +
252 htree_out_data->power.readOp.gate_leakage;
253 if (dp.fully_assoc || dp.pure_cam) {
254 power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
255 power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
256 }
257
258 power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic;
259 power.readOp.dynamic += power_routing_to_bank.readOp.dynamic;
260 power.readOp.leakage += power_routing_to_bank.readOp.leakage;
261 power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage;
262
263 // calculate total write energy per access
264 power.writeOp.dynamic = power.readOp.dynamic
265 - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
266 + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
267 - power_routing_to_bank.readOp.dynamic
268 + power_routing_to_bank.writeOp.dynamic
269 + bank.htree_in_data->power.readOp.dynamic
270 - bank.htree_out_data->power.readOp.dynamic;
271
272 if (dp.is_dram == false) {
273 power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
274 }
275
276 dyn_read_energy_from_closed_page = power.readOp.dynamic;
277 dyn_read_energy_from_open_page = power.readOp.dynamic -
278 (bank.mat.r_predec->power.readOp.dynamic +
279 bank.mat.power_row_decoders.readOp.dynamic +
280 bank.mat.power_bl_precharge_eq_drv.readOp.dynamic +
281 bank.mat.power_sa.readOp.dynamic +
282 bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir;
283
284 dyn_read_energy_remaining_words_in_burst =
285 (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) *
286 ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
287 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
288 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
289 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
290 bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
291 bank.htree_out_data->power.readOp.dynamic +
292 power_routing_to_bank.readOp.dynamic);
293 dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst;
294 dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst;
295
296 activate_energy = htree_in_add->power.readOp.dynamic +
297 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act +
298 (bank.mat.r_predec->power.readOp.dynamic +
299 bank.mat.power_row_decoders.readOp.dynamic +
300 bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir;
301 read_energy = (htree_in_add->power.readOp.dynamic +
302 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
303 (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
304 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
305 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
306 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic +
307 bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir +
308 bank.htree_out_data->power.readOp.dynamic +
309 htree_in_data->power.readOp.dynamic) * g_ip->burst_len;
310 write_energy = (htree_in_add->power.readOp.dynamic +
311 bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr +
312 htree_in_data->power.readOp.dynamic +
313 bank.htree_in_data->power.readOp.dynamic +
314 (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic +
315 bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic +
316 bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic +
317 bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len;
318 precharge_energy = (bank.mat.power_bitline.readOp.dynamic +
319 bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir;
320
321 leak_power_subbank_closed_page =
322 (bank.mat.r_predec->power.readOp.leakage +
323 bank.mat.b_mux_predec->power.readOp.leakage +
324 bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
325 bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
326 bank.mat.power_row_decoders.readOp.leakage +
327 bank.mat.power_bit_mux_decoders.readOp.leakage +
328 bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
329 bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
330 bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
331
332 leak_power_subbank_closed_page +=
333 (bank.mat.r_predec->power.readOp.gate_leakage +
334 bank.mat.b_mux_predec->power.readOp.gate_leakage +
335 bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
336 bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
337 bank.mat.power_row_decoders.readOp.gate_leakage +
338 bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
339 bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
340 bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+
341 //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir;
342
343 leak_power_subbank_open_page =
344 (bank.mat.r_predec->power.readOp.leakage +
345 bank.mat.b_mux_predec->power.readOp.leakage +
346 bank.mat.sa_mux_lev_1_predec->power.readOp.leakage +
347 bank.mat.sa_mux_lev_2_predec->power.readOp.leakage +
348 bank.mat.power_row_decoders.readOp.leakage +
349 bank.mat.power_bit_mux_decoders.readOp.leakage +
350 bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage +
351 bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage +
352 bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
353
354 leak_power_subbank_open_page +=
355 (bank.mat.r_predec->power.readOp.gate_leakage +
356 bank.mat.b_mux_predec->power.readOp.gate_leakage +
357 bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage +
358 bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage +
359 bank.mat.power_row_decoders.readOp.gate_leakage +
360 bank.mat.power_bit_mux_decoders.readOp.gate_leakage +
361 bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage +
362 bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir;
363 //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir;
364
365 leak_power_request_and_reply_networks =
366 power_routing_to_bank.readOp.leakage +
367 bank.htree_in_add->power.readOp.leakage +
368 bank.htree_in_data->power.readOp.leakage +
369 bank.htree_out_data->power.readOp.leakage;
370
371 leak_power_request_and_reply_networks +=
372 power_routing_to_bank.readOp.gate_leakage +
373 bank.htree_in_add->power.readOp.gate_leakage +
374 bank.htree_in_data->power.readOp.gate_leakage +
375 bank.htree_out_data->power.readOp.gate_leakage;
376
377 if (dp.fully_assoc || dp.pure_cam) {
378 leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage;
379 leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage;
380 }
381
382
383 // if DRAM, add contribution of power spent in row predecoder drivers,
384 // blocks and decoders to refresh power
385 if (dp.is_dram) {
386 refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir +
387 bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays;
388 refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays;
389 refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir;
390 refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
391 refresh_power /= dp.dram_refresh_period;
392 }
393
394
395 if (dp.is_tag == false) {
396 power.readOp.dynamic = dyn_read_energy_from_closed_page;
397 power.writeOp.dynamic = dyn_read_energy_from_closed_page
398 - dyn_read_energy_remaining_words_in_burst
399 - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir
400 + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir
401 + (power_routing_to_bank.writeOp.dynamic -
402 power_routing_to_bank.readOp.dynamic -
403 bank.htree_out_data->power.readOp.dynamic +
404 bank.htree_in_data->power.readOp.dynamic) *
405 (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME
406
407 if (dp.is_dram == false) {
408 power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir;
409 }
410 }
411
412 // if DRAM, add refresh power to total leakage
413 if (dp.is_dram) {
414 power.readOp.leakage += refresh_power;
415 }
416
417 // TODO: below should be avoided.
418 /*if (dp.is_main_mem)
419 {
420 power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks;
421 }*/
422
423 assert(power.readOp.dynamic > 0);
424 assert(power.writeOp.dynamic > 0);
425 assert(power.readOp.leakage > 0);
426 }
427