ef98107c70775a2d57165f6d90e4a3e1ab3be26d
[gem5.git] / ext / mcpat / cacti / mat.cc
1 /*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
5 * All Rights Reserved
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution;
14 * neither the name of the copyright holders nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
29 *
30 ***************************************************************************/
31
32
33
34 #include <cassert>
35
36 #include "mat.h"
37
38 Mat::Mat(const DynamicParameter & dyn_p)
39 :dp(dyn_p),
40 power_subarray_out_drv(),
41 delay_fa_tag(0), delay_cam(0),
42 delay_before_decoder(0), delay_bitline(0),
43 delay_wl_reset(0), delay_bl_restore(0),
44 delay_searchline(0), delay_matchchline(0),
45 delay_cam_sl_restore(0), delay_cam_ml_reset(0),
46 delay_fa_ram_wl(0),delay_hit_miss_reset(0),
47 delay_hit_miss(0),
48 subarray(dp, dp.fully_assoc),
49 power_bitline(), per_bitline_read_energy(0),
50 deg_bl_muxing(dp.deg_bl_muxing),
51 num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
52 delay_writeback(0),
53 cell(subarray.cell), cam_cell(subarray.cam_cell),
54 is_dram(dyn_p.is_dram),
55 pure_cam(dyn_p.pure_cam),
56 num_mats(dp.num_mats),
57 power_sa(), delay_sa(0),
58 leak_power_sense_amps_closed_page_state(0),
59 leak_power_sense_amps_open_page_state(0),
60 delay_subarray_out_drv(0),
61 delay_comparator(0), power_comparator(),
62 num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
63 num_subarrays_per_mat(dp.num_subarrays/dp.num_mats),
64 num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir)
65 {
66 assert(num_subarrays_per_mat <= 4);
67 assert(num_subarrays_per_row <= 2);
68 is_fa = (dp.fully_assoc) ? true : false;
69 camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
70
71 if (is_fa || pure_cam)
72 num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat;
73
74 if (dp.use_inp_params == 1) {
75 RWP = dp.num_rw_ports;
76 ERP = dp.num_rd_ports;
77 EWP = dp.num_wr_ports;
78 SCHP = dp.num_search_ports;
79 }
80 else {
81 RWP = g_ip->num_rw_ports;
82 ERP = g_ip->num_rd_ports;
83 EWP = g_ip->num_wr_ports;
84 SCHP = g_ip->num_search_ports;
85
86 }
87
88 double number_sa_subarray;
89
90 if (!is_fa && !pure_cam)
91 {
92 number_sa_subarray = subarray.num_cols / deg_bl_muxing;
93 }
94 else if (is_fa && !pure_cam)
95 {
96 number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
97 }
98
99 else
100 {
101 number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing;
102 }
103
104 int num_dec_signals = subarray.num_rows;
105 double C_ld_bit_mux_dec_out = 0;
106 double C_ld_sa_mux_lev_1_dec_out = 0;
107 double C_ld_sa_mux_lev_2_dec_out = 0;
108 double R_wire_wl_drv_out;
109
110 if (!is_fa && !pure_cam)
111 {
112 R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
113 }
114 else if (is_fa && !pure_cam)
115 {
116 R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
117 }
118 else
119 {
120 R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um;
121 }
122
123 double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
124 double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
125
126 if (deg_bl_muxing > 1)
127 {
128 C_ld_bit_mux_dec_out =
129 (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell
130 num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
131 }
132
133 if (dp.Ndsam_lev_1 > 1)
134 {
135 C_ld_sa_mux_lev_1_dec_out =
136 (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
137 num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
138 }
139 if (dp.Ndsam_lev_2 > 1)
140 {
141 C_ld_sa_mux_lev_2_dec_out =
142 (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
143 num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
144 }
145
146 if (num_subarrays_per_row >= 2)
147 {
148 // wire heads for both right and left side of a mat, so half the resistance
149 R_wire_bit_mux_dec_out /= 2.0;
150 R_wire_sa_mux_dec_out /= 2.0;
151 }
152
153
154 row_dec = new Decoder(
155 num_dec_signals,
156 false,
157 subarray.C_wl,
158 R_wire_wl_drv_out,
159 false/*is_fa*/,
160 is_dram,
161 true,
162 camFlag? cam_cell:cell);
163 // if (is_fa && (!dp.is_tag))
164 // {
165 // row_dec->exist = true;
166 // }
167 bit_mux_dec = new Decoder(
168 deg_bl_muxing,// This number is 1 for FA or CAM
169 false,
170 C_ld_bit_mux_dec_out,
171 R_wire_bit_mux_dec_out,
172 false/*is_fa*/,
173 is_dram,
174 false,
175 camFlag? cam_cell:cell);
176 sa_mux_lev_1_dec = new Decoder(
177 dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
178 dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
179 C_ld_sa_mux_lev_1_dec_out,
180 R_wire_sa_mux_dec_out,
181 false/*is_fa*/,
182 is_dram,
183 false,
184 camFlag? cam_cell:cell);
185 sa_mux_lev_2_dec = new Decoder(
186 dp.Ndsam_lev_2, // This number is 1 for FA or CAM
187 false,
188 C_ld_sa_mux_lev_2_dec_out,
189 R_wire_sa_mux_dec_out,
190 false/*is_fa*/,
191 is_dram,
192 false,
193 camFlag? cam_cell:cell);
194
195 double C_wire_predec_blk_out;
196 double R_wire_predec_blk_out;
197
198 if (!is_fa && !pure_cam)
199 {
200
201 C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
202 R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
203
204 }
205 else //for pre-decode block's load is same for both FA and CAM
206 {
207 C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
208 R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
209 }
210
211
212 if (is_fa||pure_cam)
213 num_dec_signals += _log2(num_subarrays_per_mat);
214
215 PredecBlk * r_predec_blk1 = new PredecBlk(
216 num_dec_signals,
217 row_dec,
218 C_wire_predec_blk_out,
219 R_wire_predec_blk_out,
220 num_subarrays_per_mat,
221 is_dram,
222 true);
223 PredecBlk * r_predec_blk2 = new PredecBlk(
224 num_dec_signals,
225 row_dec,
226 C_wire_predec_blk_out,
227 R_wire_predec_blk_out,
228 num_subarrays_per_mat,
229 is_dram,
230 false);
231 PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
232 PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
233 PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
234 PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
235 PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
236 PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
237 dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
238 dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
239
240 PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
241 PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
242 PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
243 PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
244 PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
245 PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
246 PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
247 PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
248 way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
249 dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
250
251 r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
252 b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
253 sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
254 sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
255
256 subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
257
258 double driver_c_gate_load;
259 double driver_c_wire_load;
260 double driver_r_wire_load;
261
262 if (is_fa || pure_cam)
263
264 { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
265 driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
266 driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
267 driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
268 cam_bl_precharge_eq_drv = new Driver(
269 driver_c_gate_load,
270 driver_c_wire_load,
271 driver_r_wire_load,
272 is_dram);
273
274 if (!pure_cam)
275 {
276 //This is only used for fully asso not pure CAM
277 driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
278 driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um;
279 driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um;
280 bl_precharge_eq_drv = new Driver(
281 driver_c_gate_load,
282 driver_c_wire_load,
283 driver_r_wire_load,
284 is_dram);
285 }
286 }
287
288 else
289 {
290 driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
291 driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
292 driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
293 bl_precharge_eq_drv = new Driver(
294 driver_c_gate_load,
295 driver_c_wire_load,
296 driver_r_wire_load,
297 is_dram);
298 }
299 double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
300 double w_row_decoder = area_row_decoder / subarray.area.get_h();
301
302 double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
303 compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
304
305 double h_subarray_out_drv = subarray_out_wire->area.get_area() *
306 (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
307
308
309 h_subarray_out_drv *= (RWP + ERP + SCHP);
310
311 double h_comparators = 0.0;
312 double w_row_predecode_output_wires = 0.0;
313 double h_bit_mux_dec_out_wires = 0.0;
314 double h_senseamp_mux_dec_out_wires = 0.0;
315
316 if ((!is_fa)&&(dp.is_tag))
317 {
318 //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
319 h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
320 h_comparators *= (RWP + ERP);
321 }
322
323
324 int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits);
325 int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits);
326 w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) *
327 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
328
329
330 double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
331 (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
332 h_subarray_out_drv + h_comparators);
333
334 double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
335
336 if (deg_bl_muxing > 1)
337 {
338 h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
339 }
340 if (dp.Ndsam_lev_1 > 1)
341 {
342 h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
343 }
344 if (dp.Ndsam_lev_2 > 1)
345 {
346 h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
347 }
348
349 double h_addr_datain_wires;
350 if (!g_ip->ver_htree_wires_over_array)
351 {
352 h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat +
353 (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) *
354 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
355
356 if (is_fa || pure_cam)
357 {
358 h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit
359 (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) *
360 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
361 (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP;
362 }
363 //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
364 //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
365 h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
366 h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
367 h_addr_datain_wires +
368 h_bit_mux_dec_out_wires +
369 h_senseamp_mux_dec_out_wires;
370
371 }
372
373 // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
374 double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
375 b_mux_predec_blk_drv1->area.get_area() +
376 sa_mux_lev_1_predec_blk_drv1->area.get_area() +
377 sa_mux_lev_2_predec_blk_drv1->area.get_area() +
378 way_sel_drv1->area.get_area() +
379 r_predec_blk_drv2->area.get_area() +
380 b_mux_predec_blk_drv2->area.get_area() +
381 sa_mux_lev_1_predec_blk_drv2->area.get_area() +
382 sa_mux_lev_2_predec_blk_drv2->area.get_area() +
383 r_predec_blk1->area.get_area() +
384 b_mux_predec_blk1->area.get_area() +
385 sa_mux_lev_1_predec_blk1->area.get_area() +
386 sa_mux_lev_2_predec_blk1->area.get_area() +
387 r_predec_blk2->area.get_area() +
388 b_mux_predec_blk2->area.get_area() +
389 sa_mux_lev_1_predec_blk2->area.get_area() +
390 sa_mux_lev_2_predec_blk2->area.get_area() +
391 bit_mux_dec->area.get_area() +
392 sa_mux_lev_1_dec->area.get_area() +
393 sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
394
395 double area_efficiency_mat;
396
397 // if (!is_fa)
398 // {
399 assert(num_subarrays_per_mat/num_subarrays_per_row>0);
400 area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area;
401 area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
402 area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
403 area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area();
404
405 // cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl;
406 // cout<<"h_comparators"<<h_comparators<<endl;
407 // cout<<"h_subarray_out_drv"<<h_subarray_out_drv<<endl;
408 // cout<<"h_addr_datain_wires"<<h_addr_datain_wires<<endl;
409 // cout<<"h_bit_mux_dec_out_wires"<<h_bit_mux_dec_out_wires<<endl;
410 // cout<<"h_senseamp_mux_dec_out_wires"<<h_senseamp_mux_dec_out_wires<<endl;
411 // cout<<"h_non_cell_area"<<h_non_cell_area<<endl;
412 // cout<<"area.h =" << (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h<<endl;
413 // cout<<"w_non_cell_area"<<w_non_cell_area<<endl;
414 // cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl;
415
416 assert(area.h>0);
417 assert(area.w>0);
418 // }
419 // else
420 // {
421 // area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.get_h() + h_non_cell_area;
422 // area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
423 // area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
424 // area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area();
425 // }
426 }
427
428
429
430 Mat::~Mat()
431 {
432 delete row_dec;
433 delete bit_mux_dec;
434 delete sa_mux_lev_1_dec;
435 delete sa_mux_lev_2_dec;
436
437 delete r_predec->blk1;
438 delete r_predec->blk2;
439 delete b_mux_predec->blk1;
440 delete b_mux_predec->blk2;
441 delete sa_mux_lev_1_predec->blk1;
442 delete sa_mux_lev_1_predec->blk2;
443 delete sa_mux_lev_2_predec->blk1;
444 delete sa_mux_lev_2_predec->blk2;
445 delete dummy_way_sel_predec_blk1;
446 delete dummy_way_sel_predec_blk2;
447
448 delete r_predec->drv1;
449 delete r_predec->drv2;
450 delete b_mux_predec->drv1;
451 delete b_mux_predec->drv2;
452 delete sa_mux_lev_1_predec->drv1;
453 delete sa_mux_lev_1_predec->drv2;
454 delete sa_mux_lev_2_predec->drv1;
455 delete sa_mux_lev_2_predec->drv2;
456 delete way_sel_drv1;
457 delete dummy_way_sel_predec_blk_drv2;
458
459 delete r_predec;
460 delete b_mux_predec;
461 delete sa_mux_lev_1_predec;
462 delete sa_mux_lev_2_predec;
463
464 delete subarray_out_wire;
465 if (!pure_cam)
466 delete bl_precharge_eq_drv;
467
468 if (is_fa || pure_cam)
469 {
470 delete sl_precharge_eq_drv ;
471 delete sl_data_drv ;
472 delete cam_bl_precharge_eq_drv;
473 delete ml_precharge_drv;
474 delete ml_to_ram_wl_drv;
475 }
476 }
477
478
479
480 double Mat::compute_delays(double inrisetime)
481 {
482 int k;
483 double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl;
484 double outrisetime_search, outrisetime, row_dec_outrisetime;
485 // delay calculation for tags of fully associative cache
486 if (is_fa || pure_cam)
487 {
488 //Compute search access time
489 outrisetime_search = compute_cam_delay(inrisetime);
490 if (is_fa)
491 {
492 bl_precharge_eq_drv->compute_delay(0);
493 k = ml_to_ram_wl_drv->number_gates - 1;
494 rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
495 C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
496 drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
497 C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load;
498 tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
499 delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
500
501 R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
502 r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
503 R_bl = subarray.num_rows * r_b_metal;
504 C_bl = subarray.C_bl;
505 delay_bl_restore = bl_precharge_eq_drv->delay +
506 log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
507 (R_bl_precharge * C_bl + R_bl * C_bl / 2);
508
509
510 outrisetime_search = compute_bitline_delay(outrisetime_search);
511 outrisetime_search = compute_sa_delay(outrisetime_search);
512 }
513 outrisetime_search = compute_subarray_out_drv(outrisetime_search);
514 subarray_out_wire->set_in_rise_time(outrisetime_search);
515 outrisetime_search = subarray_out_wire->signal_rise_time();
516 delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
517
518
519 //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
520 outrisetime = r_predec->compute_delays(inrisetime);
521 row_dec_outrisetime = row_dec->compute_delays(outrisetime);
522
523 outrisetime = b_mux_predec->compute_delays(inrisetime);
524 bit_mux_dec->compute_delays(outrisetime);
525
526 outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
527 sa_mux_lev_1_dec->compute_delays(outrisetime);
528
529 outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
530 sa_mux_lev_2_dec->compute_delays(outrisetime);
531
532 if (pure_cam)
533 {
534 outrisetime = compute_bitline_delay(row_dec_outrisetime);
535 outrisetime = compute_sa_delay(outrisetime);
536 }
537 return outrisetime_search;
538 }
539 else
540 {
541 bl_precharge_eq_drv->compute_delay(0);
542 if (row_dec->exist == true)
543 {
544 int k = row_dec->num_gates - 1;
545 double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
546 // TODO: this 4*cell.h number must be revisited
547 double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
548 drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
549 double C_ld = row_dec->C_ld_dec_out;
550 double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
551 delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
552 }
553 double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
554 double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
555 double R_bl = subarray.num_rows * r_b_metal;
556 double C_bl = subarray.C_bl;
557
558 if (is_dram)
559 {
560 delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
561 }
562 else
563 {
564 delay_bl_restore = bl_precharge_eq_drv->delay +
565 log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
566 (R_bl_precharge * C_bl + R_bl * C_bl / 2);
567 }
568 }
569
570
571
572 outrisetime = r_predec->compute_delays(inrisetime);
573 row_dec_outrisetime = row_dec->compute_delays(outrisetime);
574
575 outrisetime = b_mux_predec->compute_delays(inrisetime);
576 bit_mux_dec->compute_delays(outrisetime);
577
578 outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
579 sa_mux_lev_1_dec->compute_delays(outrisetime);
580
581 outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
582 sa_mux_lev_2_dec->compute_delays(outrisetime);
583
584 outrisetime = compute_bitline_delay(row_dec_outrisetime);
585 outrisetime = compute_sa_delay(outrisetime);
586 outrisetime = compute_subarray_out_drv(outrisetime);
587 subarray_out_wire->set_in_rise_time(outrisetime);
588 outrisetime = subarray_out_wire->signal_rise_time();
589
590 delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
591
592 if (dp.is_tag == true && dp.fully_assoc == false)
593 {
594 compute_comparator_delay(0);
595 }
596
597 if (row_dec->exist == false)
598 {
599 delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
600 }
601 return outrisetime;
602 }
603
604
605
606 double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h()
607 {
608
609 double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) +
610 compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP)); // precharge circuitry
611
612 if (deg_bl_muxing > 1)
613 {
614 height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP))); // col mux tr height
615 // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
616 }
617
618 height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height
619
620 if (dp.Ndsam_lev_1 > 1)
621 {
622 height += compute_tr_width_after_folding(
623 g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
624 //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
625 }
626
627 if (dp.Ndsam_lev_2 > 1)
628 {
629 height += compute_tr_width_after_folding(
630 g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height
631 //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
632
633 // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
634 height += 2 * compute_tr_width_after_folding(
635 pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
636 height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
637 }
638
639 // TODO: this should be uncommented...
640 /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
641 {
642 //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
643 double width_write_driver_write_mux = width_write_driver_or_write_mux();
644 double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
645 cell.w *
646 // deg_bl_muxing *
647 dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
648 height += height_write_driver_write_mux;
649 }*/
650
651 return height;
652 }
653
654
655
656 double Mat::compute_cam_delay(double inrisetime)
657 {
658
659 double out_time_ramp, this_delay;
660 double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
661
662
663 double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
664 Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp,
665 Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp,
666 Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p;
667
668 double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng;
669 int Htagbits;
670
671 double driver_c_gate_load;
672 double driver_c_wire_load;
673 double driver_r_wire_load;
674 //double searchline_precharge_time;
675
676 double leak_power_cc_inverters_sram_cell = 0;
677 double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
678 double leak_power_RD_port_sram_cell = 0;
679 double leak_power_SCHP_port_sram_cell = 0;
680 double leak_comparator_cam_cell =0;
681
682 double gate_leak_comparator_cam_cell = 0;
683 double gate_leak_power_cc_inverters_sram_cell = 0;
684 double gate_leak_power_RD_port_sram_cell = 0;
685 double gate_leak_power_SCHP_port_sram_cell = 0;
686
687 c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um;
688 c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um;
689 r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um;
690 r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um;
691
692 dynSearchEng = 0.0;
693 delay_matchchline = 0.0;
694 double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
695 bool linear_scaling = false;
696
697 if (linear_scaling)
698 {
699 Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
700 Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
701 Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
702 Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
703 Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
704 Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
705 Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
706 Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
707 Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
708 Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
709 Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
710 Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
711 Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
712 Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
713 Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
714 Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
715 Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
716 Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
717 Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
718
719 Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
720 Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
721 Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
722 Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
723 Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
724 Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
725 Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
726 Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
727 W_hit_miss_n = Wdummyn;
728 W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
729 //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
730 }
731 else
732 {
733 Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
734 Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
735 Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
736 Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
737 Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
738 Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
739 Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
740 Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
741 Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
742 Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process
743 Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
744 Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
745 Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process
746 Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
747 Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
748 Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process
749 Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
750 Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
751 Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process
752
753 Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process
754 Wdummyn = g_tp.cam.cell_nmos_w;
755 Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process
756 Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process
757 Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
758 Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process
759 Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process
760 Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process
761 W_hit_miss_n = Wdummyn;
762 W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r;
763 }
764
765 Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
766
767 /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
768 search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
769 From the driver(am and an) to the comparators in all the rows including the dummy row,
770 Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
771
772 //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
773 //Searchline precharge routes horizontally
774 driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
775 driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
776 driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
777
778 sl_precharge_eq_drv = new Driver(
779 driver_c_gate_load,
780 driver_c_wire_load,
781 driver_r_wire_load,
782 is_dram);
783
784 //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
785 //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
786 driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
787 driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
788 driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
789 sl_data_drv = new Driver(
790 driver_c_gate_load,
791 driver_c_wire_load,
792 driver_r_wire_load,
793 is_dram);
794
795 sl_precharge_eq_drv->compute_delay(0);
796 double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
797 double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
798 double R_bl = (subarray.num_rows + 1) * r_b_metal;
799 double C_bl = subarray.C_bl_cam;
800 delay_cam_sl_restore = sl_precharge_eq_drv->delay
801 + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2);
802
803 out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
804
805 //matchline ops delay
806 delay_matchchline += sl_data_drv->delay;
807
808 /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
809 //matchline delay, matchline power, matchline_reset for cycle time computation,
810
811 ////matchline precharge circuitry routes vertically
812 //There are two matchline precharge driver chains per subarray.
813 driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
814 driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
815 driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
816
817 ml_precharge_drv = new Driver(
818 driver_c_gate_load,
819 driver_c_wire_load,
820 driver_r_wire_load,
821 is_dram);
822
823 ml_precharge_drv->compute_delay(0);
824
825
826 rd = tr_R_on(Wdummyn, NCH, 2, is_dram);
827 c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit
828 + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline
829
830 Cwire = c_matchline_metal * Htagbits;
831 Rwire = r_matchline_metal * Htagbits;
832 c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
833
834 double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
835 //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
836 double R_ml = Rwire;
837 double C_ml = Cwire + c_intrinsic;
838 delay_cam_ml_reset = ml_precharge_drv->delay
839 + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too
840
841 //matchline ops delay
842 tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
843 this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
844 delay_matchchline += this_delay;
845 out_time_ramp = this_delay / VTHFA3;
846
847 dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise
848 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves
849
850 /* third stage, from the NAND2 gates to the drivers in the dummy row */
851 rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
852 c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
853 drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2;
854 c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
855 tf = rd * (c_intrinsic + c_gate_load);
856 this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
857 out_time_ramp = this_delay / (1 - VTHFA4);
858 delay_matchchline += this_delay;
859
860 //only the dummy row has the extra inverter between NAND and NOR gates
861 dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl;
862
863 /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
864 rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
865 c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
866 Cwire = c_matchline_metal * Htagbits + c_searchline_metal * (subarray.num_rows+1)/2;
867 Rwire = r_matchline_metal * Htagbits + r_searchline_metal * (subarray.num_rows+1)/2;
868 c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
869 tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
870 this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
871 out_time_ramp = this_delay / VTHFA5;
872 delay_matchchline += this_delay;
873
874 dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
875
876 /*final statge from the NOR gate to drive the wordline of the data portion */
877
878 //searchline data driver There are two matchline precharge driver chains per subarray.
879 driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
880 driver_c_wire_load = subarray.C_wl_ram;
881 driver_r_wire_load = subarray.R_wl_ram;
882
883 ml_to_ram_wl_drv = new Driver(
884 driver_c_gate_load,
885 driver_c_wire_load,
886 driver_r_wire_load,
887 is_dram);
888
889
890
891 rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
892 c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
893 c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
894 tf = rd * (c_intrinsic + c_gate_load);
895 this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
896 out_time_ramp = this_delay / (1-0.5);
897 delay_matchchline += this_delay;
898
899 out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
900
901 //c_gate_load energy is computed in ml_to_ram_wl_drv
902 dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
903
904
905 /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
906 /*Precharge the hitting logic */
907 c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
908 Cwire = c_searchline_metal * subarray.num_rows;
909 Rwire = r_searchline_metal * subarray.num_rows;
910 c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
911
912 rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
913 //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
914 double R_hit_miss = Rwire;
915 double C_hit_miss = Cwire + c_intrinsic;
916 delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
917 dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
918
919 /*hitting logic evaluation */
920 c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
921 Cwire = c_searchline_metal * subarray.num_rows;
922 Rwire = r_searchline_metal * subarray.num_rows;
923 c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
924
925 rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
926 tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
927
928 delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
929
930 if (is_fa)
931 delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
932
933 dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
934
935 /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
936
937 power_matchline.searchOp.dynamic = dynSearchEng;
938
939 //leakage in one subarray
940 double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2?
941 double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true);
942 double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
943 double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv
944
945 leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd;
946 leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd;
947 leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
948 leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd;
949 leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports
950
951 power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
952 leak_comparator_cam_cell +
953 leak_power_acc_tr_RW_or_WR_port_sram_cell +
954 leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
955 leak_power_RD_port_sram_cell * ERP +
956 leak_power_SCHP_port_sram_cell*SCHP;
957 // power_matchline.searchOp.leakage += leak_comparator_cam_cell;
958 power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
959 power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
960 power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
961 power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd;
962 //In idle states, the hit/miss txs are closed (on) therefore no Isub
963 power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
964 // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
965
966 //in idle state, Ig_on only possibly exist in access transistors of read only ports
967 double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
968 double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
969 double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2;
970
971 gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd;
972 gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd;
973 gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
974 gate_leak_power_SCHP_port_sram_cell = 0;
975
976 //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
977
978 power_matchline.searchOp.gate_leakage += gate_leak_power_cc_inverters_sram_cell;
979 power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
980 power_matchline.searchOp.gate_leakage += gate_leak_power_SCHP_port_sram_cell*SCHP + gate_leak_power_RD_port_sram_cell * ERP;
981 power_matchline.searchOp.gate_leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
982 power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(0, Wfaprechp,1, pmos) * g_tp.cam_cell.Vdd;
983 power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
984 power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
985 power_matchline.searchOp.gate_leakage += subarray.num_rows * cmos_Ig_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
986 + cmos_Ig_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
987
988
989 return out_time_ramp;
990 }
991
992
993 double Mat::width_write_driver_or_write_mux()
994 {
995 // calculate resistance of SRAM cell pull-up PMOS transistor
996 // cam and sram have same cell trasistor properties
997 double R_sram_cell_pull_up_tr = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
998 double R_access_tr = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, is_dram, true);
999 double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
1000 double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
1001
1002 return width_write_driver_nmos;
1003 }
1004
1005
1006
1007 double Mat::compute_comparators_height(
1008 int tagbits,
1009 int number_ways_in_mat,
1010 double subarray_mem_cell_area_width)
1011 {
1012 double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
1013 double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
1014 return cumulative_area / subarray_mem_cell_area_width;
1015 }
1016
1017
1018
1019 double Mat::compute_bitline_delay(double inrisetime)
1020 {
1021 double V_b_pre, v_th_mem_cell, V_wl;
1022 double tstep;
1023 double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
1024 double R_cell_pull_down=0.0, R_cell_acc =0.0, r_dev=0.0;
1025 int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
1026
1027 double R_b_metal = camFlag? cam_cell.h:cell.h * g_tp.wire_local.R_per_um;
1028 double R_bl = subarray.num_rows * R_b_metal;
1029 double C_bl = subarray.C_bl;
1030
1031 // TODO: no leakage for DRAMs?
1032 double leak_power_cc_inverters_sram_cell = 0;
1033 double gate_leak_power_cc_inverters_sram_cell = 0;
1034 double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
1035 double leak_power_RD_port_sram_cell = 0;
1036 double gate_leak_power_RD_port_sram_cell = 0;
1037
1038 if (is_dram == true)
1039 {
1040 V_b_pre = g_tp.dram.Vbitpre;
1041 v_th_mem_cell = g_tp.dram_acc.Vth;
1042 V_wl = g_tp.vpp;
1043 //The access transistor is not folded. So we just need to specify a threshold value for the
1044 //folding width that is equal to or greater than Wmemcella.
1045 R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
1046 r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
1047 }
1048 else
1049 { //SRAM
1050 V_b_pre = g_tp.sram.Vbitpre;
1051 v_th_mem_cell = g_tp.sram_cell.Vth;
1052 V_wl = g_tp.sram_cell.Vdd;
1053 R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
1054 R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
1055
1056 //Leakage current of an SRAM cell
1057 double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);//TODO: how much is the idle time? just by *2?
1058 double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0, 2, nmos,false, true);
1059 double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true)*2;//two invs per cell
1060
1061 leak_power_cc_inverters_sram_cell = Icell * g_tp.sram_cell.Vdd;
1062 leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
1063 leak_power_RD_port_sram_cell = Iport_erp * g_tp.sram_cell.Vdd;
1064
1065
1066 //in idle state, Ig_on only possibly exist in access transistors of read only ports
1067 double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);
1068 double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true);
1069
1070 gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd;
1071 gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd;
1072 }
1073
1074
1075 double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram);
1076 double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
1077 double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
1078 double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
1079 double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
1080 drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1081 drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
1082 double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
1083
1084 if (is_dram)
1085 {
1086 double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl));
1087 tstep = 2.3 * fraction * r_dev *
1088 (g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) /
1089 (g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux));
1090 delay_writeback = tstep;
1091 dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
1092 (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
1093 dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) *
1094 (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100;
1095 per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
1096 (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
1097 }
1098 else
1099 {
1100 double tau;
1101
1102 if (deg_bl_muxing > 1)
1103 {
1104 tau = (R_cell_pull_down + R_cell_acc) *
1105 (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
1106 R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
1107 R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
1108 R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
1109 dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /*
1110 subarray.num_cols * num_subarrays_per_mat*/;
1111 dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
1112 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing);
1113 dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
1114 num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
1115 //Write Ops are differential for SRAM
1116 }
1117 else
1118 {
1119 tau = (R_cell_pull_down + R_cell_acc) *
1120 (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
1121 R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
1122 dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
1123 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
1124 dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
1125 num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
1126
1127 }
1128 tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
1129 power_bitline.readOp.leakage =
1130 leak_power_cc_inverters_sram_cell +
1131 leak_power_acc_tr_RW_or_WR_port_sram_cell +
1132 leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
1133 leak_power_RD_port_sram_cell * ERP;
1134 power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
1135 gate_leak_power_RD_port_sram_cell * ERP;
1136
1137 }
1138
1139 // cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl;
1140 // cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
1141 // cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
1142 // cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl;
1143
1144
1145 /* take input rise time into account */
1146 double m = V_wl / inrisetime;
1147 if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m))
1148 {
1149 delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell)/ m);
1150 }
1151 else
1152 {
1153 delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
1154 }
1155
1156 bool is_fa = (dp.fully_assoc) ? true : false;
1157
1158 if (dp.is_tag == false || is_fa == false)
1159 {
1160 power_bitline.readOp.dynamic = dynRdEnergy;
1161 power_bitline.writeOp.dynamic = dynWriteEnergy;
1162 }
1163
1164 double outrisetime = 0;
1165 return outrisetime;
1166 }
1167
1168
1169
1170 double Mat::compute_sa_delay(double inrisetime)
1171 {
1172 //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
1173
1174 //Bitline circuitry leakage.
1175 double Iiso = simplified_pmos_leakage(g_tp.w_iso, is_dram);
1176 double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
1177 double IsenseN = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
1178 double IsenseP = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
1179
1180 double lkgIdlePh = IsenseEn;//+ 2*IoBufP;
1181 //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
1182 double lkgReadPh = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
1183 //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
1184 // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
1185 double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
1186 leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
1187 leak_power_sense_amps_open_page_state = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
1188
1189 // sense amplifier has to drive logic in "data out driver" and sense precharge load.
1190 // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
1191 //constant as well as the magnitude of input differential voltage.
1192 double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
1193 drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1194 drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1195 drain_C_(g_tp.w_iso,PCH,1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1196 drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
1197 double tau = C_ld / g_tp.gm_sense_amp_latch;
1198 delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
1199 power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
1200 num_subarrays_per_mat * num_act_mats_hor_dir*/;
1201 power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
1202
1203 double outrisetime = 0;
1204 return outrisetime;
1205 }
1206
1207
1208
1209 double Mat::compute_subarray_out_drv(double inrisetime)
1210 {
1211 double C_ld, rd, tf, this_delay;
1212 double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
1213
1214 // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
1215 rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
1216 C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
1217 gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
1218 tf = rd * C_ld;
1219 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1220 delay_subarray_out_drv += this_delay;
1221 inrisetime = this_delay/(1.0 - 0.5);
1222 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1223 power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
1224 power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
1225 // delay of signal through inverter-buffer to second level of sense-amp mux.
1226 // internal delay of buffer
1227 rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
1228 C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
1229 drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1230 gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
1231 tf = rd * C_ld;
1232 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1233 delay_subarray_out_drv += this_delay;
1234 inrisetime = this_delay/(1.0 - 0.5);
1235 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1236 power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram)* g_tp.peri_global.Vdd;
1237 power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
1238
1239 // inverter driving drain of pass transistor of second level of sense-amp mux.
1240 rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
1241 C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
1242 drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1243 drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram);
1244 tf = rd * C_ld;
1245 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1246 delay_subarray_out_drv += this_delay;
1247 inrisetime = this_delay/(1.0 - 0.5);
1248 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1249 power_subarray_out_drv.readOp.leakage += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
1250 power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
1251
1252
1253 // delay of signal through pass-transistor to input of subarray output driver.
1254 rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
1255 C_ld = dp.Ndsam_lev_2 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram) +
1256 //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
1257 gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
1258 tf = rd * C_ld;
1259 this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
1260 delay_subarray_out_drv += this_delay;
1261 inrisetime = this_delay/(1.0 - 0.5);
1262 power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
1263 power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0
1264 power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
1265
1266
1267 return inrisetime;
1268 }
1269
1270
1271
1272 double Mat::compute_comparator_delay(double inrisetime)
1273 {
1274 int A = g_ip->tag_assoc;
1275
1276 int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
1277 // a multiple of 4.
1278
1279 /* First Inverter */
1280 double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) +
1281 drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1282 drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1283 double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
1284 double tf = Req*Ceq;
1285 double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL);
1286 double nextinputtime = st1del/VTHCOMPINV;
1287 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1288
1289 //For each degree of associativity
1290 //there are 4 such quarter comparators
1291 double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
1292 double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
1293 /* Second Inverter */
1294 Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) +
1295 drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1296 drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1297 Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
1298 tf = Req*Ceq;
1299 double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE);
1300 nextinputtime = st2del/(1.0-VTHCOMPINV);
1301 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1302 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
1303 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
1304
1305 /* Third Inverter */
1306 Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) +
1307 drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
1308 drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
1309 Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
1310 tf = Req*Ceq;
1311 double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL);
1312 nextinputtime = st3del/(VTHEVALINV);
1313 power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1314 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
1315 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
1316
1317 /* Final Inverter (virtual ground driver) discharging compare part */
1318 double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram);
1319 double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */
1320 double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
1321 drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
1322 drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
1323 drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram);
1324 double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
1325 drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
1326 drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
1327 gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram);
1328 power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
1329 power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1);
1330 lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
1331 lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2
1332
1333 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
1334 gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter
1335
1336 /* time to go to threshold of mux driver */
1337 double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND);
1338 /* take into account non-zero input rise time */
1339 double m = g_tp.peri_global.Vdd/nextinputtime;
1340 double Tcomparatorni;
1341
1342 if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m))
1343 {
1344 double a = m;
1345 double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
1346 double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
1347 Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a);
1348 }
1349 else
1350 {
1351 Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m;
1352 }
1353 delay_comparator = Tcomparatorni+st1del+st2del+st3del;
1354 power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
1355 power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
1356
1357 return Tcomparatorni / (1.0 - VTHMUXNAND);;
1358 }
1359
1360
1361
1362 void Mat::compute_power_energy()
1363 {
1364 //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
1365 //when search all subarrays and all mats are fully active
1366 //when plain read/write only one subarray in a single mat is active.
1367
1368 // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat.
1369 power.readOp.dynamic += r_predec->power.readOp.dynamic +
1370 b_mux_predec->power.readOp.dynamic +
1371 sa_mux_lev_1_predec->power.readOp.dynamic +
1372 sa_mux_lev_2_predec->power.readOp.dynamic;
1373
1374 // add energy consumed in decoders
1375 power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic;
1376 if (!(is_fa||pure_cam))
1377 power_row_decoders.readOp.dynamic *= num_subarrays_per_mat;
1378
1379 // add energy consumed in bitline prechagers, SAs, and bitlines
1380 if (!(is_fa||pure_cam))
1381 {
1382 // add energy consumed in bitline prechagers
1383 power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
1384 power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
1385
1386 //Add sense amps energy
1387 num_sa_subarray = subarray.num_cols / deg_bl_muxing;
1388 power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ;
1389
1390 // add energy consumed in bitlines
1391 //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
1392 power_bitline.readOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
1393 power_bitline.writeOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
1394 //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
1395 //Add subarray output energy
1396 power_subarray_out_drv.readOp.dynamic =
1397 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1398
1399 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1400 power_sa.readOp.dynamic +
1401 power_bitline.readOp.dynamic +
1402 power_subarray_out_drv.readOp.dynamic;
1403
1404 power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1405 bit_mux_dec->power.readOp.dynamic +
1406 sa_mux_lev_1_dec->power.readOp.dynamic +
1407 sa_mux_lev_2_dec->power.readOp.dynamic +
1408 power_comparator.readOp.dynamic;
1409 }
1410
1411 else if (is_fa)
1412 {
1413 //for plain read/write only one subarray in a mat is active
1414 // add energy consumed in bitline prechagers
1415 power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
1416 + cam_bl_precharge_eq_drv->power.readOp.dynamic;
1417 power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
1418
1419 //Add sense amps energy
1420 num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing;
1421 num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing;
1422 power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search;
1423 power_sa.readOp.dynamic *= num_sa_subarray;
1424
1425
1426 // add energy consumed in bitlines
1427 power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
1428 power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
1429 power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
1430 power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
1431
1432 //Add subarray output energy
1433 power_subarray_out_drv.searchOp.dynamic =
1434 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
1435 power_subarray_out_drv.readOp.dynamic =
1436 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1437
1438
1439 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1440 power_sa.readOp.dynamic +
1441 power_bitline.readOp.dynamic +
1442 power_subarray_out_drv.readOp.dynamic;
1443
1444 power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1445 bit_mux_dec->power.readOp.dynamic +
1446 sa_mux_lev_1_dec->power.readOp.dynamic +
1447 sa_mux_lev_2_dec->power.readOp.dynamic +
1448 power_comparator.readOp.dynamic;
1449
1450 //add energy consumed inside cam
1451 power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
1452 power_searchline_precharge = sl_precharge_eq_drv->power;
1453 power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
1454 power_searchline = sl_data_drv->power;
1455 power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
1456 power_matchline_precharge = ml_precharge_drv->power;
1457 power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
1458 power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
1459 power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
1460
1461 power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
1462 power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
1463 power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
1464 power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
1465
1466 power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
1467 //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
1468
1469 }
1470 else
1471 {
1472 // add energy consumed in bitline prechagers
1473 power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
1474 //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
1475 //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
1476 //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
1477
1478 //Add sense amps energy
1479 num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing;
1480 power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
1481 power_sa.searchOp.dynamic = 0;
1482
1483 power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
1484 power_bitline.searchOp.dynamic = 0;
1485 power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
1486
1487 power_subarray_out_drv.searchOp.dynamic =
1488 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
1489 power_subarray_out_drv.readOp.dynamic =
1490 (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
1491
1492 power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
1493 power_sa.readOp.dynamic +
1494 power_bitline.readOp.dynamic +
1495 power_subarray_out_drv.readOp.dynamic;
1496
1497 power.readOp.dynamic += power_row_decoders.readOp.dynamic +
1498 bit_mux_dec->power.readOp.dynamic +
1499 sa_mux_lev_1_dec->power.readOp.dynamic +
1500 sa_mux_lev_2_dec->power.readOp.dynamic +
1501 power_comparator.readOp.dynamic;
1502
1503
1504 ////add energy consumed inside cam
1505 power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
1506 power_searchline_precharge = sl_precharge_eq_drv->power;
1507 power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
1508 power_searchline = sl_data_drv->power;
1509 power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
1510 power_matchline_precharge = ml_precharge_drv->power;
1511 power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
1512 power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
1513 power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
1514
1515 power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
1516 power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
1517 power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
1518 power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
1519
1520 power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
1521 //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
1522
1523 }
1524
1525
1526
1527 // calculate leakage power
1528 if (!(is_fa || pure_cam))
1529 {
1530 int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1531
1532 power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1533 power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1534 power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
1535
1536 //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
1537 power_subarray_out_drv.readOp.leakage =
1538 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1539 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
1540
1541 power.readOp.leakage += power_bitline.readOp.leakage +
1542 power_bl_precharge_eq_drv.readOp.leakage +
1543 power_sa.readOp.leakage +
1544 power_subarray_out_drv.readOp.leakage;
1545 //cout<<"leakage"<<power.readOp.leakage<<endl;
1546
1547 power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
1548 power.readOp.leakage += power_comparator.readOp.leakage;
1549
1550 //cout<<"leakage1"<<power.readOp.leakage<<endl;
1551
1552 // leakage power
1553 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
1554 power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
1555 power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
1556 power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
1557
1558 power.readOp.leakage += r_predec->power.readOp.leakage +
1559 b_mux_predec->power.readOp.leakage +
1560 sa_mux_lev_1_predec->power.readOp.leakage +
1561 sa_mux_lev_2_predec->power.readOp.leakage +
1562 power_row_decoders.readOp.leakage +
1563 power_bit_mux_decoders.readOp.leakage +
1564 power_sa_mux_lev_1_decoders.readOp.leakage +
1565 power_sa_mux_lev_2_decoders.readOp.leakage;
1566 //cout<<"leakage2"<<power.readOp.leakage<<endl;
1567
1568 //++++Below is gate leakage
1569 power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1570 power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1571 power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
1572
1573 //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
1574 power_subarray_out_drv.readOp.gate_leakage =
1575 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1576 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
1577
1578 power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
1579 power_bl_precharge_eq_drv.readOp.gate_leakage +
1580 power_sa.readOp.gate_leakage +
1581 power_subarray_out_drv.readOp.gate_leakage;
1582 //cout<<"leakage"<<power.readOp.leakage<<endl;
1583
1584 power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
1585 power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
1586
1587 //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
1588
1589 // gate_leakage power
1590 power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
1591 power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
1592 power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
1593 power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
1594
1595 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1596 b_mux_predec->power.readOp.gate_leakage +
1597 sa_mux_lev_1_predec->power.readOp.gate_leakage +
1598 sa_mux_lev_2_predec->power.readOp.gate_leakage +
1599 power_row_decoders.readOp.gate_leakage +
1600 power_bit_mux_decoders.readOp.gate_leakage +
1601 power_sa_mux_lev_1_decoders.readOp.gate_leakage +
1602 power_sa_mux_lev_2_decoders.readOp.gate_leakage;
1603 }
1604 else if (is_fa)
1605 {
1606 int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1607
1608 power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1609 power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1610 power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1611 power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
1612
1613 //cout<<"leakage3"<<power.readOp.leakage<<endl;
1614
1615
1616 power_subarray_out_drv.readOp.leakage =
1617 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1618 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1619
1620 power.readOp.leakage += power_bitline.readOp.leakage +
1621 power_bl_precharge_eq_drv.readOp.leakage +
1622 power_bl_precharge_eq_drv.searchOp.leakage +
1623 power_sa.readOp.leakage +
1624 power_subarray_out_drv.readOp.leakage;
1625
1626 //cout<<"leakage4"<<power.readOp.leakage<<endl;
1627
1628 // leakage power
1629 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
1630 power.readOp.leakage += r_predec->power.readOp.leakage +
1631 power_row_decoders.readOp.leakage;
1632
1633 //cout<<"leakage5"<<power.readOp.leakage<<endl;
1634
1635 //inside cam
1636 power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
1637 power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
1638 power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
1639 power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
1640 power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
1641
1642 power.readOp.leakage += power_cam_all_active.searchOp.leakage;
1643
1644 // cout<<"leakage6"<<power.readOp.leakage<<endl;
1645
1646 //+++Below is gate leakage
1647 power_bitline.readOp.gate_leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1648 power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1649 power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1650 power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
1651
1652 //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
1653
1654
1655 power_subarray_out_drv.readOp.gate_leakage =
1656 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1657 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1658
1659 power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
1660 power_bl_precharge_eq_drv.readOp.gate_leakage +
1661 power_bl_precharge_eq_drv.searchOp.gate_leakage +
1662 power_sa.readOp.gate_leakage +
1663 power_subarray_out_drv.readOp.gate_leakage;
1664
1665 //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
1666
1667 // gate_leakage power
1668 power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
1669 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1670 power_row_decoders.readOp.gate_leakage;
1671
1672 //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
1673
1674 //inside cam
1675 power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
1676 power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
1677 power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
1678 power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
1679 power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
1680
1681 power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
1682
1683 }
1684 else
1685 {
1686 int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1687
1688 //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1689 //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1690 power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1691 power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
1692
1693
1694 power_subarray_out_drv.readOp.leakage =
1695 (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
1696 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1697
1698 power.readOp.leakage += //power_bitline.readOp.leakage +
1699 //power_bl_precharge_eq_drv.readOp.leakage +
1700 power_bl_precharge_eq_drv.searchOp.leakage +
1701 power_sa.readOp.leakage +
1702 power_subarray_out_drv.readOp.leakage;
1703
1704 // leakage power
1705 power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
1706 power.readOp.leakage += r_predec->power.readOp.leakage +
1707 power_row_decoders.readOp.leakage;
1708
1709 //inside cam
1710 power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
1711 power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
1712 power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
1713 power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
1714 power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
1715
1716 power.readOp.leakage += power_cam_all_active.searchOp.leakage;
1717
1718 //+++Below is gate leakage
1719 power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
1720 power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
1721
1722
1723 power_subarray_out_drv.readOp.gate_leakage =
1724 (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
1725 number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
1726
1727 power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
1728 //power_bl_precharge_eq_drv.readOp.gate_leakage +
1729 power_bl_precharge_eq_drv.searchOp.gate_leakage +
1730 power_sa.readOp.gate_leakage +
1731 power_subarray_out_drv.readOp.gate_leakage;
1732
1733 // gate_leakage power
1734 power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
1735 power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
1736 power_row_decoders.readOp.gate_leakage;
1737
1738 //inside cam
1739 power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
1740 power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
1741 power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
1742 power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
1743 power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
1744
1745 power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
1746 }
1747 }
1748