1 /*****************************************************************************
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met: redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer;
11 * redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution;
14 * neither the name of the copyright holders nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.”
30 ***************************************************************************/
38 Mat::Mat(const DynamicParameter
& dyn_p
)
40 power_subarray_out_drv(),
41 delay_fa_tag(0), delay_cam(0),
42 delay_before_decoder(0), delay_bitline(0),
43 delay_wl_reset(0), delay_bl_restore(0),
44 delay_searchline(0), delay_matchchline(0),
45 delay_cam_sl_restore(0), delay_cam_ml_reset(0),
46 delay_fa_ram_wl(0),delay_hit_miss_reset(0),
48 subarray(dp
, dp
.fully_assoc
),
49 power_bitline(), per_bitline_read_energy(0),
50 deg_bl_muxing(dp
.deg_bl_muxing
),
51 num_act_mats_hor_dir(dyn_p
.num_act_mats_hor_dir
),
53 cell(subarray
.cell
), cam_cell(subarray
.cam_cell
),
54 is_dram(dyn_p
.is_dram
),
55 pure_cam(dyn_p
.pure_cam
),
56 num_mats(dp
.num_mats
),
57 power_sa(), delay_sa(0),
58 leak_power_sense_amps_closed_page_state(0),
59 leak_power_sense_amps_open_page_state(0),
60 delay_subarray_out_drv(0),
61 delay_comparator(0), power_comparator(),
62 num_do_b_mat(dyn_p
.num_do_b_mat
), num_so_b_mat(dyn_p
.num_so_b_mat
),
63 num_subarrays_per_mat(dp
.num_subarrays
/dp
.num_mats
),
64 num_subarrays_per_row(dp
.Ndwl
/dp
.num_mats_h_dir
)
66 assert(num_subarrays_per_mat
<= 4);
67 assert(num_subarrays_per_row
<= 2);
68 is_fa
= (dp
.fully_assoc
) ? true : false;
69 camFlag
= (is_fa
|| pure_cam
);//although cam_cell.w = cell.w for fa, we still differentiate them.
71 if (is_fa
|| pure_cam
)
72 num_subarrays_per_row
= num_subarrays_per_mat
>2?num_subarrays_per_mat
/2:num_subarrays_per_mat
;
74 if (dp
.use_inp_params
== 1) {
75 RWP
= dp
.num_rw_ports
;
76 ERP
= dp
.num_rd_ports
;
77 EWP
= dp
.num_wr_ports
;
78 SCHP
= dp
.num_search_ports
;
81 RWP
= g_ip
->num_rw_ports
;
82 ERP
= g_ip
->num_rd_ports
;
83 EWP
= g_ip
->num_wr_ports
;
84 SCHP
= g_ip
->num_search_ports
;
88 double number_sa_subarray
;
90 if (!is_fa
&& !pure_cam
)
92 number_sa_subarray
= subarray
.num_cols
/ deg_bl_muxing
;
94 else if (is_fa
&& !pure_cam
)
96 number_sa_subarray
= (subarray
.num_cols_fa_cam
+ subarray
.num_cols_fa_ram
) / deg_bl_muxing
;
101 number_sa_subarray
= (subarray
.num_cols_fa_cam
) / deg_bl_muxing
;
104 int num_dec_signals
= subarray
.num_rows
;
105 double C_ld_bit_mux_dec_out
= 0;
106 double C_ld_sa_mux_lev_1_dec_out
= 0;
107 double C_ld_sa_mux_lev_2_dec_out
= 0;
108 double R_wire_wl_drv_out
;
110 if (!is_fa
&& !pure_cam
)
112 R_wire_wl_drv_out
= subarray
.num_cols
* cell
.w
* g_tp
.wire_local
.R_per_um
;
114 else if (is_fa
&& !pure_cam
)
116 R_wire_wl_drv_out
= (subarray
.num_cols_fa_cam
* cam_cell
.w
+ subarray
.num_cols_fa_ram
* cell
.w
) * g_tp
.wire_local
.R_per_um
;
120 R_wire_wl_drv_out
= (subarray
.num_cols_fa_cam
* cam_cell
.w
) * g_tp
.wire_local
.R_per_um
;
123 double R_wire_bit_mux_dec_out
= num_subarrays_per_row
* subarray
.num_cols
* g_tp
.wire_inside_mat
.R_per_um
* cell
.w
;//TODO:revisit for FA
124 double R_wire_sa_mux_dec_out
= num_subarrays_per_row
* subarray
.num_cols
* g_tp
.wire_inside_mat
.R_per_um
* cell
.w
;
126 if (deg_bl_muxing
> 1)
128 C_ld_bit_mux_dec_out
=
129 (2 * num_subarrays_per_mat
* subarray
.num_cols
/ deg_bl_muxing
)*gate_C(g_tp
.w_nmos_b_mux
, 0, is_dram
) + // 2 transistor per cell
130 num_subarrays_per_row
* subarray
.num_cols
*g_tp
.wire_inside_mat
.C_per_um
*cell
.get_w();
133 if (dp
.Ndsam_lev_1
> 1)
135 C_ld_sa_mux_lev_1_dec_out
=
136 (num_subarrays_per_mat
* number_sa_subarray
/ dp
.Ndsam_lev_1
)*gate_C(g_tp
.w_nmos_sa_mux
, 0, is_dram
) +
137 num_subarrays_per_row
* subarray
.num_cols
*g_tp
.wire_inside_mat
.C_per_um
*cell
.get_w();
139 if (dp
.Ndsam_lev_2
> 1)
141 C_ld_sa_mux_lev_2_dec_out
=
142 (num_subarrays_per_mat
* number_sa_subarray
/ (dp
.Ndsam_lev_1
*dp
.Ndsam_lev_2
))*gate_C(g_tp
.w_nmos_sa_mux
, 0, is_dram
) +
143 num_subarrays_per_row
* subarray
.num_cols
*g_tp
.wire_inside_mat
.C_per_um
*cell
.get_w();
146 if (num_subarrays_per_row
>= 2)
148 // wire heads for both right and left side of a mat, so half the resistance
149 R_wire_bit_mux_dec_out
/= 2.0;
150 R_wire_sa_mux_dec_out
/= 2.0;
154 row_dec
= new Decoder(
162 camFlag
? cam_cell
:cell
);
163 // if (is_fa && (!dp.is_tag))
165 // row_dec->exist = true;
167 bit_mux_dec
= new Decoder(
168 deg_bl_muxing
,// This number is 1 for FA or CAM
170 C_ld_bit_mux_dec_out
,
171 R_wire_bit_mux_dec_out
,
175 camFlag
? cam_cell
:cell
);
176 sa_mux_lev_1_dec
= new Decoder(
177 dp
.deg_senseamp_muxing_non_associativity
, // This number is 1 for FA or CAM
178 dp
.number_way_select_signals_mat
? true : false,//only sa_mux_lev_1_dec needs way select signal
179 C_ld_sa_mux_lev_1_dec_out
,
180 R_wire_sa_mux_dec_out
,
184 camFlag
? cam_cell
:cell
);
185 sa_mux_lev_2_dec
= new Decoder(
186 dp
.Ndsam_lev_2
, // This number is 1 for FA or CAM
188 C_ld_sa_mux_lev_2_dec_out
,
189 R_wire_sa_mux_dec_out
,
193 camFlag
? cam_cell
:cell
);
195 double C_wire_predec_blk_out
;
196 double R_wire_predec_blk_out
;
198 if (!is_fa
&& !pure_cam
)
201 C_wire_predec_blk_out
= num_subarrays_per_row
* subarray
.num_rows
* g_tp
.wire_inside_mat
.C_per_um
* cell
.h
;
202 R_wire_predec_blk_out
= num_subarrays_per_row
* subarray
.num_rows
* g_tp
.wire_inside_mat
.R_per_um
* cell
.h
;
205 else //for pre-decode block's load is same for both FA and CAM
207 C_wire_predec_blk_out
= subarray
.num_rows
* g_tp
.wire_inside_mat
.C_per_um
* cam_cell
.h
;
208 R_wire_predec_blk_out
= subarray
.num_rows
* g_tp
.wire_inside_mat
.R_per_um
* cam_cell
.h
;
213 num_dec_signals
+= _log2(num_subarrays_per_mat
);
215 PredecBlk
* r_predec_blk1
= new PredecBlk(
218 C_wire_predec_blk_out
,
219 R_wire_predec_blk_out
,
220 num_subarrays_per_mat
,
223 PredecBlk
* r_predec_blk2
= new PredecBlk(
226 C_wire_predec_blk_out
,
227 R_wire_predec_blk_out
,
228 num_subarrays_per_mat
,
231 PredecBlk
* b_mux_predec_blk1
= new PredecBlk(deg_bl_muxing
, bit_mux_dec
, 0, 0, 1, is_dram
, true);
232 PredecBlk
* b_mux_predec_blk2
= new PredecBlk(deg_bl_muxing
, bit_mux_dec
, 0, 0, 1, is_dram
, false);
233 PredecBlk
* sa_mux_lev_1_predec_blk1
= new PredecBlk(dyn_p
.deg_senseamp_muxing_non_associativity
, sa_mux_lev_1_dec
, 0, 0, 1, is_dram
, true);
234 PredecBlk
* sa_mux_lev_1_predec_blk2
= new PredecBlk(dyn_p
.deg_senseamp_muxing_non_associativity
, sa_mux_lev_1_dec
, 0, 0, 1, is_dram
, false);
235 PredecBlk
* sa_mux_lev_2_predec_blk1
= new PredecBlk(dp
.Ndsam_lev_2
, sa_mux_lev_2_dec
, 0, 0, 1, is_dram
, true);
236 PredecBlk
* sa_mux_lev_2_predec_blk2
= new PredecBlk(dp
.Ndsam_lev_2
, sa_mux_lev_2_dec
, 0, 0, 1, is_dram
, false);
237 dummy_way_sel_predec_blk1
= new PredecBlk(1, sa_mux_lev_1_dec
, 0, 0, 0, is_dram
, true);
238 dummy_way_sel_predec_blk2
= new PredecBlk(1, sa_mux_lev_1_dec
, 0, 0, 0, is_dram
, false);
240 PredecBlkDrv
* r_predec_blk_drv1
= new PredecBlkDrv(0, r_predec_blk1
, is_dram
);
241 PredecBlkDrv
* r_predec_blk_drv2
= new PredecBlkDrv(0, r_predec_blk2
, is_dram
);
242 PredecBlkDrv
* b_mux_predec_blk_drv1
= new PredecBlkDrv(0, b_mux_predec_blk1
, is_dram
);
243 PredecBlkDrv
* b_mux_predec_blk_drv2
= new PredecBlkDrv(0, b_mux_predec_blk2
, is_dram
);
244 PredecBlkDrv
* sa_mux_lev_1_predec_blk_drv1
= new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1
, is_dram
);
245 PredecBlkDrv
* sa_mux_lev_1_predec_blk_drv2
= new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2
, is_dram
);
246 PredecBlkDrv
* sa_mux_lev_2_predec_blk_drv1
= new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1
, is_dram
);
247 PredecBlkDrv
* sa_mux_lev_2_predec_blk_drv2
= new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2
, is_dram
);
248 way_sel_drv1
= new PredecBlkDrv(dyn_p
.number_way_select_signals_mat
, dummy_way_sel_predec_blk1
, is_dram
);
249 dummy_way_sel_predec_blk_drv2
= new PredecBlkDrv(1, dummy_way_sel_predec_blk2
, is_dram
);
251 r_predec
= new Predec(r_predec_blk_drv1
, r_predec_blk_drv2
);
252 b_mux_predec
= new Predec(b_mux_predec_blk_drv1
, b_mux_predec_blk_drv2
);
253 sa_mux_lev_1_predec
= new Predec(sa_mux_lev_1_predec_blk_drv1
, sa_mux_lev_1_predec_blk_drv2
);
254 sa_mux_lev_2_predec
= new Predec(sa_mux_lev_2_predec_blk_drv1
, sa_mux_lev_2_predec_blk_drv2
);
256 subarray_out_wire
= new Wire(g_ip
->wt
, subarray
.area
.h
);//Bug should be subarray.area.w Owen and Sheng
258 double driver_c_gate_load
;
259 double driver_c_wire_load
;
260 double driver_r_wire_load
;
262 if (is_fa
|| pure_cam
)
264 { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
265 driver_c_gate_load
= (subarray
.num_cols_fa_cam
)* gate_C(2 * g_tp
.w_pmos_bl_precharge
+ g_tp
.w_pmos_bl_eq
, 0, is_dram
, false, false);
266 driver_c_wire_load
= subarray
.num_cols_fa_cam
* cam_cell
.w
* g_tp
.wire_outside_mat
.C_per_um
;
267 driver_r_wire_load
= subarray
.num_cols_fa_cam
* cam_cell
.w
* g_tp
.wire_outside_mat
.R_per_um
;
268 cam_bl_precharge_eq_drv
= new Driver(
276 //This is only used for fully asso not pure CAM
277 driver_c_gate_load
= (subarray
.num_cols_fa_ram
)* gate_C(2 * g_tp
.w_pmos_bl_precharge
+ g_tp
.w_pmos_bl_eq
, 0, is_dram
, false, false);
278 driver_c_wire_load
= subarray
.num_cols_fa_ram
* cell
.w
* g_tp
.wire_outside_mat
.C_per_um
;
279 driver_r_wire_load
= subarray
.num_cols_fa_ram
* cell
.w
* g_tp
.wire_outside_mat
.R_per_um
;
280 bl_precharge_eq_drv
= new Driver(
290 driver_c_gate_load
= subarray
.num_cols
* gate_C(2 * g_tp
.w_pmos_bl_precharge
+ g_tp
.w_pmos_bl_eq
, 0, is_dram
, false, false);
291 driver_c_wire_load
= subarray
.num_cols
* cell
.w
* g_tp
.wire_outside_mat
.C_per_um
;
292 driver_r_wire_load
= subarray
.num_cols
* cell
.w
* g_tp
.wire_outside_mat
.R_per_um
;
293 bl_precharge_eq_drv
= new Driver(
299 double area_row_decoder
= row_dec
->area
.get_area() * subarray
.num_rows
* (RWP
+ ERP
+ EWP
);
300 double w_row_decoder
= area_row_decoder
/ subarray
.area
.get_h();
302 double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux
=
303 compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
305 double h_subarray_out_drv
= subarray_out_wire
->area
.get_area() *
306 (subarray
.num_cols
/ (deg_bl_muxing
* dp
.Ndsam_lev_1
* dp
.Ndsam_lev_2
)) / subarray
.area
.get_w();
309 h_subarray_out_drv
*= (RWP
+ ERP
+ SCHP
);
311 double h_comparators
= 0.0;
312 double w_row_predecode_output_wires
= 0.0;
313 double h_bit_mux_dec_out_wires
= 0.0;
314 double h_senseamp_mux_dec_out_wires
= 0.0;
316 if ((!is_fa
)&&(dp
.is_tag
))
318 //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
319 h_comparators
= compute_comparators_height(dp
.tagbits
, dyn_p
.num_do_b_mat
, subarray
.area
.get_w());
320 h_comparators
*= (RWP
+ ERP
);
324 int branch_effort_predec_blk1_out
= (1 << r_predec_blk2
->number_input_addr_bits
);
325 int branch_effort_predec_blk2_out
= (1 << r_predec_blk1
->number_input_addr_bits
);
326 w_row_predecode_output_wires
= (branch_effort_predec_blk1_out
+ branch_effort_predec_blk2_out
) *
327 g_tp
.wire_inside_mat
.pitch
* (RWP
+ ERP
+ EWP
);
330 double h_non_cell_area
= (num_subarrays_per_mat
/ num_subarrays_per_row
) *
331 (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux
+
332 h_subarray_out_drv
+ h_comparators
);
334 double w_non_cell_area
= MAX(w_row_predecode_output_wires
, num_subarrays_per_row
* w_row_decoder
);
336 if (deg_bl_muxing
> 1)
338 h_bit_mux_dec_out_wires
= deg_bl_muxing
* g_tp
.wire_inside_mat
.pitch
* (RWP
+ ERP
);
340 if (dp
.Ndsam_lev_1
> 1)
342 h_senseamp_mux_dec_out_wires
= dp
.Ndsam_lev_1
* g_tp
.wire_inside_mat
.pitch
* (RWP
+ ERP
);
344 if (dp
.Ndsam_lev_2
> 1)
346 h_senseamp_mux_dec_out_wires
+= dp
.Ndsam_lev_2
* g_tp
.wire_inside_mat
.pitch
* (RWP
+ ERP
);
349 double h_addr_datain_wires
;
350 if (!g_ip
->ver_htree_wires_over_array
)
352 h_addr_datain_wires
= (dp
.number_addr_bits_mat
+ dp
.number_way_select_signals_mat
+
353 (dp
.num_di_b_mat
+ dp
.num_do_b_mat
)/num_subarrays_per_row
) *
354 g_tp
.wire_inside_mat
.pitch
* (RWP
+ ERP
+ EWP
);
356 if (is_fa
|| pure_cam
)
358 h_addr_datain_wires
= (dp
.number_addr_bits_mat
+ dp
.number_way_select_signals_mat
+ //TODO: revisit
359 (dp
.num_di_b_mat
+ dp
.num_do_b_mat
)/num_subarrays_per_row
) *
360 g_tp
.wire_inside_mat
.pitch
* (RWP
+ ERP
+ EWP
) +
361 (dp
.num_si_b_mat
+ dp
.num_so_b_mat
)/num_subarrays_per_row
* g_tp
.wire_inside_mat
.pitch
* SCHP
;
363 //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
364 //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
365 h_non_cell_area
= (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux
+ h_comparators
+
366 h_subarray_out_drv
) * (num_subarrays_per_mat
/ num_subarrays_per_row
) +
367 h_addr_datain_wires
+
368 h_bit_mux_dec_out_wires
+
369 h_senseamp_mux_dec_out_wires
;
373 // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
374 double area_mat_center_circuitry
= (r_predec_blk_drv1
->area
.get_area() +
375 b_mux_predec_blk_drv1
->area
.get_area() +
376 sa_mux_lev_1_predec_blk_drv1
->area
.get_area() +
377 sa_mux_lev_2_predec_blk_drv1
->area
.get_area() +
378 way_sel_drv1
->area
.get_area() +
379 r_predec_blk_drv2
->area
.get_area() +
380 b_mux_predec_blk_drv2
->area
.get_area() +
381 sa_mux_lev_1_predec_blk_drv2
->area
.get_area() +
382 sa_mux_lev_2_predec_blk_drv2
->area
.get_area() +
383 r_predec_blk1
->area
.get_area() +
384 b_mux_predec_blk1
->area
.get_area() +
385 sa_mux_lev_1_predec_blk1
->area
.get_area() +
386 sa_mux_lev_2_predec_blk1
->area
.get_area() +
387 r_predec_blk2
->area
.get_area() +
388 b_mux_predec_blk2
->area
.get_area() +
389 sa_mux_lev_1_predec_blk2
->area
.get_area() +
390 sa_mux_lev_2_predec_blk2
->area
.get_area() +
391 bit_mux_dec
->area
.get_area() +
392 sa_mux_lev_1_dec
->area
.get_area() +
393 sa_mux_lev_2_dec
->area
.get_area()) * (RWP
+ ERP
+ EWP
);
395 double area_efficiency_mat
;
399 assert(num_subarrays_per_mat
/num_subarrays_per_row
>0);
400 area
.h
= (num_subarrays_per_mat
/num_subarrays_per_row
)* subarray
.area
.h
+ h_non_cell_area
;
401 area
.w
= num_subarrays_per_row
* subarray
.area
.get_w() + w_non_cell_area
;
402 area
.w
= (area
.h
*area
.w
+ area_mat_center_circuitry
) / area
.h
;
403 area_efficiency_mat
= subarray
.area
.get_area() * num_subarrays_per_mat
* 100.0 / area
.get_area();
405 // cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl;
406 // cout<<"h_comparators"<<h_comparators<<endl;
407 // cout<<"h_subarray_out_drv"<<h_subarray_out_drv<<endl;
408 // cout<<"h_addr_datain_wires"<<h_addr_datain_wires<<endl;
409 // cout<<"h_bit_mux_dec_out_wires"<<h_bit_mux_dec_out_wires<<endl;
410 // cout<<"h_senseamp_mux_dec_out_wires"<<h_senseamp_mux_dec_out_wires<<endl;
411 // cout<<"h_non_cell_area"<<h_non_cell_area<<endl;
412 // cout<<"area.h =" << (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h<<endl;
413 // cout<<"w_non_cell_area"<<w_non_cell_area<<endl;
414 // cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl;
421 // area.h = (num_subarrays_per_mat / num_subarrays_per_row) * subarray.area.get_h() + h_non_cell_area;
422 // area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
423 // area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
424 // area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area();
434 delete sa_mux_lev_1_dec
;
435 delete sa_mux_lev_2_dec
;
437 delete r_predec
->blk1
;
438 delete r_predec
->blk2
;
439 delete b_mux_predec
->blk1
;
440 delete b_mux_predec
->blk2
;
441 delete sa_mux_lev_1_predec
->blk1
;
442 delete sa_mux_lev_1_predec
->blk2
;
443 delete sa_mux_lev_2_predec
->blk1
;
444 delete sa_mux_lev_2_predec
->blk2
;
445 delete dummy_way_sel_predec_blk1
;
446 delete dummy_way_sel_predec_blk2
;
448 delete r_predec
->drv1
;
449 delete r_predec
->drv2
;
450 delete b_mux_predec
->drv1
;
451 delete b_mux_predec
->drv2
;
452 delete sa_mux_lev_1_predec
->drv1
;
453 delete sa_mux_lev_1_predec
->drv2
;
454 delete sa_mux_lev_2_predec
->drv1
;
455 delete sa_mux_lev_2_predec
->drv2
;
457 delete dummy_way_sel_predec_blk_drv2
;
461 delete sa_mux_lev_1_predec
;
462 delete sa_mux_lev_2_predec
;
464 delete subarray_out_wire
;
466 delete bl_precharge_eq_drv
;
468 if (is_fa
|| pure_cam
)
470 delete sl_precharge_eq_drv
;
472 delete cam_bl_precharge_eq_drv
;
473 delete ml_precharge_drv
;
474 delete ml_to_ram_wl_drv
;
480 double Mat::compute_delays(double inrisetime
)
483 double rd
, C_intrinsic
, C_ld
, tf
, R_bl_precharge
,r_b_metal
, R_bl
, C_bl
;
484 double outrisetime_search
, outrisetime
, row_dec_outrisetime
;
485 // delay calculation for tags of fully associative cache
486 if (is_fa
|| pure_cam
)
488 //Compute search access time
489 outrisetime_search
= compute_cam_delay(inrisetime
);
492 bl_precharge_eq_drv
->compute_delay(0);
493 k
= ml_to_ram_wl_drv
->number_gates
- 1;
494 rd
= tr_R_on(ml_to_ram_wl_drv
->width_n
[k
], NCH
, 1, is_dram
, false, true);
495 C_intrinsic
= drain_C_(ml_to_ram_wl_drv
->width_n
[k
], PCH
, 1, 1, 4*cell
.h
, is_dram
, false, true) +
496 drain_C_(ml_to_ram_wl_drv
->width_n
[k
], NCH
, 1, 1, 4*cell
.h
, is_dram
, false, true);
497 C_ld
= ml_to_ram_wl_drv
->c_gate_load
+ ml_to_ram_wl_drv
->c_wire_load
;
498 tf
= rd
* (C_intrinsic
+ C_ld
) + ml_to_ram_wl_drv
->r_wire_load
* C_ld
/ 2;
499 delay_wl_reset
= horowitz(0, tf
, 0.5, 0.5, RISE
);
501 R_bl_precharge
= tr_R_on(g_tp
.w_pmos_bl_precharge
, PCH
, 1, is_dram
, false, false);
502 r_b_metal
= cam_cell
.h
* g_tp
.wire_local
.R_per_um
;//dummy rows in sram are filled in
503 R_bl
= subarray
.num_rows
* r_b_metal
;
504 C_bl
= subarray
.C_bl
;
505 delay_bl_restore
= bl_precharge_eq_drv
->delay
+
506 log((g_tp
.sram
.Vbitpre
- 0.1 * dp
.V_b_sense
) / (g_tp
.sram
.Vbitpre
- dp
.V_b_sense
))*
507 (R_bl_precharge
* C_bl
+ R_bl
* C_bl
/ 2);
510 outrisetime_search
= compute_bitline_delay(outrisetime_search
);
511 outrisetime_search
= compute_sa_delay(outrisetime_search
);
513 outrisetime_search
= compute_subarray_out_drv(outrisetime_search
);
514 subarray_out_wire
->set_in_rise_time(outrisetime_search
);
515 outrisetime_search
= subarray_out_wire
->signal_rise_time();
516 delay_subarray_out_drv_htree
= delay_subarray_out_drv
+ subarray_out_wire
->delay
;
519 //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
520 outrisetime
= r_predec
->compute_delays(inrisetime
);
521 row_dec_outrisetime
= row_dec
->compute_delays(outrisetime
);
523 outrisetime
= b_mux_predec
->compute_delays(inrisetime
);
524 bit_mux_dec
->compute_delays(outrisetime
);
526 outrisetime
= sa_mux_lev_1_predec
->compute_delays(inrisetime
);
527 sa_mux_lev_1_dec
->compute_delays(outrisetime
);
529 outrisetime
= sa_mux_lev_2_predec
->compute_delays(inrisetime
);
530 sa_mux_lev_2_dec
->compute_delays(outrisetime
);
534 outrisetime
= compute_bitline_delay(row_dec_outrisetime
);
535 outrisetime
= compute_sa_delay(outrisetime
);
537 return outrisetime_search
;
541 bl_precharge_eq_drv
->compute_delay(0);
542 if (row_dec
->exist
== true)
544 int k
= row_dec
->num_gates
- 1;
545 double rd
= tr_R_on(row_dec
->w_dec_n
[k
], NCH
, 1, is_dram
, false, true);
546 // TODO: this 4*cell.h number must be revisited
547 double C_intrinsic
= drain_C_(row_dec
->w_dec_p
[k
], PCH
, 1, 1, 4*cell
.h
, is_dram
, false, true) +
548 drain_C_(row_dec
->w_dec_n
[k
], NCH
, 1, 1, 4*cell
.h
, is_dram
, false, true);
549 double C_ld
= row_dec
->C_ld_dec_out
;
550 double tf
= rd
* (C_intrinsic
+ C_ld
) + row_dec
->R_wire_dec_out
* C_ld
/ 2;
551 delay_wl_reset
= horowitz(0, tf
, 0.5, 0.5, RISE
);
553 double R_bl_precharge
= tr_R_on(g_tp
.w_pmos_bl_precharge
, PCH
, 1, is_dram
, false, false);
554 double r_b_metal
= cell
.h
* g_tp
.wire_local
.R_per_um
;
555 double R_bl
= subarray
.num_rows
* r_b_metal
;
556 double C_bl
= subarray
.C_bl
;
560 delay_bl_restore
= bl_precharge_eq_drv
->delay
+ 2.3 * (R_bl_precharge
* C_bl
+ R_bl
* C_bl
/ 2);
564 delay_bl_restore
= bl_precharge_eq_drv
->delay
+
565 log((g_tp
.sram
.Vbitpre
- 0.1 * dp
.V_b_sense
) / (g_tp
.sram
.Vbitpre
- dp
.V_b_sense
))*
566 (R_bl_precharge
* C_bl
+ R_bl
* C_bl
/ 2);
572 outrisetime
= r_predec
->compute_delays(inrisetime
);
573 row_dec_outrisetime
= row_dec
->compute_delays(outrisetime
);
575 outrisetime
= b_mux_predec
->compute_delays(inrisetime
);
576 bit_mux_dec
->compute_delays(outrisetime
);
578 outrisetime
= sa_mux_lev_1_predec
->compute_delays(inrisetime
);
579 sa_mux_lev_1_dec
->compute_delays(outrisetime
);
581 outrisetime
= sa_mux_lev_2_predec
->compute_delays(inrisetime
);
582 sa_mux_lev_2_dec
->compute_delays(outrisetime
);
584 outrisetime
= compute_bitline_delay(row_dec_outrisetime
);
585 outrisetime
= compute_sa_delay(outrisetime
);
586 outrisetime
= compute_subarray_out_drv(outrisetime
);
587 subarray_out_wire
->set_in_rise_time(outrisetime
);
588 outrisetime
= subarray_out_wire
->signal_rise_time();
590 delay_subarray_out_drv_htree
= delay_subarray_out_drv
+ subarray_out_wire
->delay
;
592 if (dp
.is_tag
== true && dp
.fully_assoc
== false)
594 compute_comparator_delay(0);
597 if (row_dec
->exist
== false)
599 delay_wl_reset
= MAX(r_predec
->blk1
->delay
, r_predec
->blk2
->delay
);
606 double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h()
609 double height
= compute_tr_width_after_folding(g_tp
.w_pmos_bl_precharge
, camFlag
? cam_cell
.w
:cell
.w
/ (2 *(RWP
+ ERP
+ SCHP
))) +
610 compute_tr_width_after_folding(g_tp
.w_pmos_bl_eq
, camFlag
? cam_cell
.w
:cell
.w
/ (RWP
+ ERP
+ SCHP
)); // precharge circuitry
612 if (deg_bl_muxing
> 1)
614 height
+= compute_tr_width_after_folding(g_tp
.w_nmos_b_mux
, cell
.w
/ (2 *(RWP
+ ERP
))); // col mux tr height
615 // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height
618 height
+= height_sense_amplifier(/*camFlag? sram_cell.w:*/cell
.w
* deg_bl_muxing
/ (RWP
+ ERP
)); // sense_amp_height
620 if (dp
.Ndsam_lev_1
> 1)
622 height
+= compute_tr_width_after_folding(
623 g_tp
.w_nmos_sa_mux
, cell
.w
* dp
.Ndsam_lev_1
/ (RWP
+ ERP
)); // sense_amp_mux_height
624 //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
627 if (dp
.Ndsam_lev_2
> 1)
629 height
+= compute_tr_width_after_folding(
630 g_tp
.w_nmos_sa_mux
, cell
.w
* deg_bl_muxing
* dp
.Ndsam_lev_1
/ (RWP
+ ERP
)); // sense_amp_mux_height
631 //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP);
633 // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
634 height
+= 2 * compute_tr_width_after_folding(
635 pmos_to_nmos_sz_ratio(is_dram
) * g_tp
.min_w_nmos_
, cell
.w
* dp
.Ndsam_lev_2
/ (RWP
+ ERP
));
636 height
+= 2 * compute_tr_width_after_folding(g_tp
.min_w_nmos_
, cell
.w
* dp
.Ndsam_lev_2
/ (RWP
+ ERP
));
639 // TODO: this should be uncommented...
640 /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
642 //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
643 double width_write_driver_write_mux = width_write_driver_or_write_mux();
644 double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
647 dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
648 height += height_write_driver_write_mux;
656 double Mat::compute_cam_delay(double inrisetime
)
659 double out_time_ramp
, this_delay
;
660 double Rwire
, tf
, c_intrinsic
, rd
, Cwire
, c_gate_load
;
663 double Wdecdrivep
, Wdecdriven
, Wfadriven
, Wfadrivep
, Wfadrive2n
, Wfadrive2p
, Wfadecdrive1n
, Wfadecdrive1p
,
664 Wfadecdrive2n
, Wfadecdrive2p
, Wfadecdriven
, Wfadecdrivep
, Wfaprechn
, Wfaprechp
,
665 Wdummyn
, Wdummyinvn
, Wdummyinvp
, Wfainvn
, Wfainvp
, Waddrnandn
, Waddrnandp
,
666 Wfanandn
, Wfanandp
, Wfanorn
, Wfanorp
, Wdecnandn
, Wdecnandp
, W_hit_miss_n
, W_hit_miss_p
;
668 double c_matchline_metal
, r_matchline_metal
, c_searchline_metal
, r_searchline_metal
, dynSearchEng
;
671 double driver_c_gate_load
;
672 double driver_c_wire_load
;
673 double driver_r_wire_load
;
674 //double searchline_precharge_time;
676 double leak_power_cc_inverters_sram_cell
= 0;
677 double leak_power_acc_tr_RW_or_WR_port_sram_cell
= 0;
678 double leak_power_RD_port_sram_cell
= 0;
679 double leak_power_SCHP_port_sram_cell
= 0;
680 double leak_comparator_cam_cell
=0;
682 double gate_leak_comparator_cam_cell
= 0;
683 double gate_leak_power_cc_inverters_sram_cell
= 0;
684 double gate_leak_power_RD_port_sram_cell
= 0;
685 double gate_leak_power_SCHP_port_sram_cell
= 0;
687 c_matchline_metal
= cam_cell
.get_w() * g_tp
.wire_local
.C_per_um
;
688 c_searchline_metal
= cam_cell
.get_h() * g_tp
.wire_local
.C_per_um
;
689 r_matchline_metal
= cam_cell
.get_w() * g_tp
.wire_local
.R_per_um
;
690 r_searchline_metal
= cam_cell
.get_h() * g_tp
.wire_local
.R_per_um
;
693 delay_matchchline
= 0.0;
694 double p_to_n_sizing_r
= pmos_to_nmos_sz_ratio(is_dram
);
695 bool linear_scaling
= false;
699 Wdecdrivep
= 450 * g_ip
->F_sz_um
;//this was 360 micron for the 0.8 micron process
700 Wdecdriven
= 300 * g_ip
->F_sz_um
;//this was 240 micron for the 0.8 micron process
701 Wfadriven
= 62.5 * g_ip
->F_sz_um
;//this was 50 micron for the 0.8 micron process
702 Wfadrivep
= 125 * g_ip
->F_sz_um
;//this was 100 micron for the 0.8 micron process
703 Wfadrive2n
= 250 * g_ip
->F_sz_um
;//this was 200 micron for the 0.8 micron process
704 Wfadrive2p
= 500 * g_ip
->F_sz_um
;//this was 400 micron for the 0.8 micron process
705 Wfadecdrive1n
= 6.25 * g_ip
->F_sz_um
;//this was 5 micron for the 0.8 micron process
706 Wfadecdrive1p
= 12.5 * g_ip
->F_sz_um
;//this was 10 micron for the 0.8 micron process
707 Wfadecdrive2n
= 25 * g_ip
->F_sz_um
;//this was 20 micron for the 0.8 micron process
708 Wfadecdrive2p
= 50 * g_ip
->F_sz_um
;//this was 40 micron for the 0.8 micron process
709 Wfadecdriven
= 62.5 * g_ip
->F_sz_um
;//this was 50 micron for the 0.8 micron process
710 Wfadecdrivep
= 125 * g_ip
->F_sz_um
;//this was 100 micron for the 0.8 micron process
711 Wfaprechn
= 7.5 * g_ip
->F_sz_um
;//this was 6 micron for the 0.8 micron process
712 Wfainvn
= 12.5 * g_ip
->F_sz_um
;//this was 10 micron for the 0.8 micron process
713 Wfainvp
= 25 * g_ip
->F_sz_um
;//this was 20 micron for the 0.8 micron process
714 Wfanandn
= 25 * g_ip
->F_sz_um
;//this was 20 micron for the 0.8 micron process
715 Wfanandp
= 37.5 * g_ip
->F_sz_um
;//this was 30 micron for the 0.8 micron process
716 Wdecnandn
= 12.5 * g_ip
->F_sz_um
;//this was 10 micron for the 0.8 micron process
717 Wdecnandp
= 37.5 * g_ip
->F_sz_um
;//this was 30 micron for the 0.8 micron process
719 Wfaprechp
= 12.5 * g_ip
->F_sz_um
;//this was 10 micron for the 0.8 micron process
720 Wdummyn
= 12.5 * g_ip
->F_sz_um
;//this was 10 micron for the 0.8 micron process
721 Wdummyinvn
= 75 * g_ip
->F_sz_um
;//this was 60 micron for the 0.8 micron process
722 Wdummyinvp
= 100 * g_ip
->F_sz_um
;//this was 80 micron for the 0.8 micron process
723 Waddrnandn
= 62.5 * g_ip
->F_sz_um
;//this was 50 micron for the 0.8 micron process
724 Waddrnandp
= 62.5 * g_ip
->F_sz_um
;//this was 50 micron for the 0.8 micron process
725 Wfanorn
= 6.25 * g_ip
->F_sz_um
;//this was 5 micron for the 0.8 micron process
726 Wfanorp
= 12.5 * g_ip
->F_sz_um
;//this was 10 micron for the 0.8 micron process
727 W_hit_miss_n
= Wdummyn
;
728 W_hit_miss_p
= g_tp
.min_w_nmos_
*p_to_n_sizing_r
;
729 //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
733 Wdecdrivep
= 450 * g_ip
->F_sz_um
;//this was 360 micron for the 0.8 micron process
734 Wdecdriven
= 300 * g_ip
->F_sz_um
;//this was 240 micron for the 0.8 micron process
735 Wfadriven
= 62.5 * g_ip
->F_sz_um
;//this was 50 micron for the 0.8 micron process
736 Wfadrivep
= 125 * g_ip
->F_sz_um
;//this was 100 micron for the 0.8 micron process
737 Wfadrive2n
= 250 * g_ip
->F_sz_um
;//this was 200 micron for the 0.8 micron process
738 Wfadrive2p
= 500 * g_ip
->F_sz_um
;//this was 400 micron for the 0.8 micron process
739 Wfadecdrive1n
= 6.25 * g_ip
->F_sz_um
;//this was 5 micron for the 0.8 micron process
740 Wfadecdrive1p
= 12.5 * g_ip
->F_sz_um
;//this was 10 micron for the 0.8 micron process
741 Wfadecdrive2n
= 25 * g_ip
->F_sz_um
;//this was 20 micron for the 0.8 micron process
742 Wfadecdrive2p
= 50 * g_ip
->F_sz_um
;//this was 40 micron for the 0.8 micron process
743 Wfadecdriven
= 62.5 * g_ip
->F_sz_um
;//this was 50 micron for the 0.8 micron process
744 Wfadecdrivep
= 125 * g_ip
->F_sz_um
;//this was 100 micron for the 0.8 micron process
745 Wfaprechn
= 7.5 * g_ip
->F_sz_um
;//this was 6 micron for the 0.8 micron process
746 Wfainvn
= 12.5 * g_ip
->F_sz_um
;//this was 10 micron for the 0.8 micron process
747 Wfainvp
= 25 * g_ip
->F_sz_um
;//this was 20 micron for the 0.8 micron process
748 Wfanandn
= 25 * g_ip
->F_sz_um
;//this was 20 micron for the 0.8 micron process
749 Wfanandp
= 37.5 * g_ip
->F_sz_um
;//this was 30 micron for the 0.8 micron process
750 Wdecnandn
= 12.5 * g_ip
->F_sz_um
;//this was 10 micron for the 0.8 micron process
751 Wdecnandp
= 37.5 * g_ip
->F_sz_um
;//this was 30 micron for the 0.8 micron process
753 Wfaprechp
= g_tp
.w_pmos_bl_precharge
;//this was 10 micron for the 0.8 micron process
754 Wdummyn
= g_tp
.cam
.cell_nmos_w
;
755 Wdummyinvn
= 75 * g_ip
->F_sz_um
;//this was 60 micron for the 0.8 micron process
756 Wdummyinvp
= 100 * g_ip
->F_sz_um
;//this was 80 micron for the 0.8 micron process
757 Waddrnandn
= 62.5 * g_ip
->F_sz_um
;//this was 50 micron for the 0.8 micron process
758 Waddrnandp
= 62.5 * g_ip
->F_sz_um
;//this was 50 micron for the 0.8 micron process
759 Wfanorn
= 6.25 * g_ip
->F_sz_um
;//this was 5 micron for the 0.8 micron process
760 Wfanorp
= 12.5 * g_ip
->F_sz_um
;//this was 10 micron for the 0.8 micron process
761 W_hit_miss_n
= Wdummyn
;
762 W_hit_miss_p
= g_tp
.min_w_nmos_
*p_to_n_sizing_r
;
765 Htagbits
= (int)(ceil ((double) (subarray
.num_cols_fa_cam
) / 2.0));
767 /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
768 search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
769 From the driver(am and an) to the comparators in all the rows including the dummy row,
770 Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
772 //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
773 //Searchline precharge routes horizontally
774 driver_c_gate_load
= subarray
.num_cols_fa_cam
* gate_C(2 * g_tp
.w_pmos_bl_precharge
+ g_tp
.w_pmos_bl_eq
, 0, is_dram
, false, false);
775 driver_c_wire_load
= subarray
.num_cols_fa_cam
* cam_cell
.w
* g_tp
.wire_outside_mat
.C_per_um
;
776 driver_r_wire_load
= subarray
.num_cols_fa_cam
* cam_cell
.w
* g_tp
.wire_outside_mat
.R_per_um
;
778 sl_precharge_eq_drv
= new Driver(
784 //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
785 //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
786 driver_c_gate_load
= (subarray
.num_rows
+ 1) * gate_C(Wdummyn
, 0, is_dram
, false, false);
787 driver_c_wire_load
= (subarray
.num_rows
+ 1) * c_searchline_metal
;
788 driver_r_wire_load
= (subarray
.num_rows
+ 1) * r_searchline_metal
;
789 sl_data_drv
= new Driver(
795 sl_precharge_eq_drv
->compute_delay(0);
796 double R_bl_precharge
= tr_R_on(g_tp
.w_pmos_bl_precharge
, PCH
, 1, is_dram
, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
797 double r_b_metal
= cam_cell
.h
* g_tp
.wire_local
.R_per_um
;
798 double R_bl
= (subarray
.num_rows
+ 1) * r_b_metal
;
799 double C_bl
= subarray
.C_bl_cam
;
800 delay_cam_sl_restore
= sl_precharge_eq_drv
->delay
801 + log(g_tp
.cam
.Vbitpre
)* (R_bl_precharge
* C_bl
+ R_bl
* C_bl
/ 2);
803 out_time_ramp
= sl_data_drv
->compute_delay(inrisetime
);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
805 //matchline ops delay
806 delay_matchchline
+= sl_data_drv
->delay
;
808 /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
809 //matchline delay, matchline power, matchline_reset for cycle time computation,
811 ////matchline precharge circuitry routes vertically
812 //There are two matchline precharge driver chains per subarray.
813 driver_c_gate_load
= (subarray
.num_rows
+ 1) * gate_C(Wfaprechp
, 0, is_dram
);
814 driver_c_wire_load
= (subarray
.num_rows
+ 1) * c_searchline_metal
;
815 driver_r_wire_load
= (subarray
.num_rows
+ 1) * r_searchline_metal
;
817 ml_precharge_drv
= new Driver(
823 ml_precharge_drv
->compute_delay(0);
826 rd
= tr_R_on(Wdummyn
, NCH
, 2, is_dram
);
827 c_intrinsic
= Htagbits
*(2*drain_C_(Wdummyn
, NCH
, 2, 1, g_tp
.cell_h_def
, is_dram
)//TODO: the cell_h_def should be revisit
828 + drain_C_(Wfaprechp
, PCH
, 1, 1, g_tp
.cell_h_def
, is_dram
)/Htagbits
);//since each halve only has one precharge tx per matchline
830 Cwire
= c_matchline_metal
* Htagbits
;
831 Rwire
= r_matchline_metal
* Htagbits
;
832 c_gate_load
= gate_C(Waddrnandn
+ Waddrnandp
, 0, is_dram
);
834 double R_ml_precharge
= tr_R_on(Wfaprechp
, PCH
, 1, is_dram
);
835 //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
837 double C_ml
= Cwire
+ c_intrinsic
;
838 delay_cam_ml_reset
= ml_precharge_drv
->delay
839 + log(g_tp
.cam
.Vbitpre
)* (R_ml_precharge
* C_ml
+ R_ml
* C_ml
/ 2);//TODO: latest CAM has sense amps on matchlines too
841 //matchline ops delay
842 tf
= rd
* (c_intrinsic
+ Cwire
/ 2 + c_gate_load
) + Rwire
* (Cwire
/ 2 + c_gate_load
);
843 this_delay
= horowitz(out_time_ramp
, tf
, VTHFA2
, VTHFA3
, FALL
);
844 delay_matchchline
+= this_delay
;
845 out_time_ramp
= this_delay
/ VTHFA3
;
847 dynSearchEng
+= ((c_intrinsic
+ Cwire
+ c_gate_load
)*(subarray
.num_rows
+1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise
848 * g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
*2;//* Ntbl;//each subarry has two halves
850 /* third stage, from the NAND2 gates to the drivers in the dummy row */
851 rd
= tr_R_on(Waddrnandn
, NCH
, 2, is_dram
);
852 c_intrinsic
= drain_C_(Waddrnandn
, NCH
, 2, 1, g_tp
.cell_h_def
, is_dram
) +
853 drain_C_(Waddrnandp
, PCH
, 1, 1, g_tp
.cell_h_def
, is_dram
)*2;
854 c_gate_load
= gate_C(Wdummyinvn
+ Wdummyinvp
, 0, is_dram
);
855 tf
= rd
* (c_intrinsic
+ c_gate_load
);
856 this_delay
= horowitz(out_time_ramp
, tf
, VTHFA3
, VTHFA4
, RISE
);
857 out_time_ramp
= this_delay
/ (1 - VTHFA4
);
858 delay_matchchline
+= this_delay
;
860 //only the dummy row has the extra inverter between NAND and NOR gates
861 dynSearchEng
+= (c_intrinsic
* (subarray
.num_rows
+1)+ c_gate_load
*2) * g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
;// * Ntbl;
863 /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
864 rd
= tr_R_on(Wdummyinvn
, NCH
, 1, is_dram
);
865 c_intrinsic
= drain_C_(Wdummyinvn
, NCH
, 1, 1, g_tp
.cell_h_def
, is_dram
) + drain_C_(Wdummyinvp
, NCH
, 1, 1, g_tp
.cell_h_def
, is_dram
);
866 Cwire
= c_matchline_metal
* Htagbits
+ c_searchline_metal
* (subarray
.num_rows
+1)/2;
867 Rwire
= r_matchline_metal
* Htagbits
+ r_searchline_metal
* (subarray
.num_rows
+1)/2;
868 c_gate_load
= gate_C(Wfanorn
+ Wfanorp
, 0, is_dram
);
869 tf
= rd
* (c_intrinsic
+ Cwire
+ c_gate_load
) + Rwire
* (Cwire
/ 2 + c_gate_load
);
870 this_delay
= horowitz (out_time_ramp
, tf
, VTHFA4
, VTHFA5
, FALL
);
871 out_time_ramp
= this_delay
/ VTHFA5
;
872 delay_matchchline
+= this_delay
;
874 dynSearchEng
+= (c_intrinsic
+ Cwire
+ subarray
.num_rows
*c_gate_load
) * g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
;//* Ntbl;
876 /*final statge from the NOR gate to drive the wordline of the data portion */
878 //searchline data driver There are two matchline precharge driver chains per subarray.
879 driver_c_gate_load
= gate_C(W_hit_miss_n
, 0, is_dram
, false, false);//nmos of the pull down logic
880 driver_c_wire_load
= subarray
.C_wl_ram
;
881 driver_r_wire_load
= subarray
.R_wl_ram
;
883 ml_to_ram_wl_drv
= new Driver(
891 rd
= tr_R_on(Wfanorn
, NCH
, 1, is_dram
);
892 c_intrinsic
= 2* drain_C_(Wfanorn
, NCH
, 1, 1, g_tp
.cell_h_def
, is_dram
) + drain_C_(Wfanorp
, NCH
, 1, 1, g_tp
.cell_h_def
, is_dram
);
893 c_gate_load
= gate_C(ml_to_ram_wl_drv
->width_n
[0] + ml_to_ram_wl_drv
->width_p
[0], 0, is_dram
);
894 tf
= rd
* (c_intrinsic
+ c_gate_load
);
895 this_delay
= horowitz (out_time_ramp
, tf
, 0.5, 0.5, RISE
);
896 out_time_ramp
= this_delay
/ (1-0.5);
897 delay_matchchline
+= this_delay
;
899 out_time_ramp
= ml_to_ram_wl_drv
->compute_delay(out_time_ramp
);
901 //c_gate_load energy is computed in ml_to_ram_wl_drv
902 dynSearchEng
+= (c_intrinsic
) * g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
;//* Ntbl;
905 /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
906 /*Precharge the hitting logic */
907 c_intrinsic
= 2*drain_C_(W_hit_miss_p
, NCH
, 2, 1, g_tp
.cell_h_def
, is_dram
);
908 Cwire
= c_searchline_metal
* subarray
.num_rows
;
909 Rwire
= r_searchline_metal
* subarray
.num_rows
;
910 c_gate_load
= drain_C_(W_hit_miss_n
, NCH
, 1, 1, g_tp
.cell_h_def
, is_dram
)* subarray
.num_rows
;
912 rd
= tr_R_on(W_hit_miss_p
, PCH
, 1, is_dram
, false, false);
913 //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
914 double R_hit_miss
= Rwire
;
915 double C_hit_miss
= Cwire
+ c_intrinsic
;
916 delay_hit_miss_reset
= log(g_tp
.cam
.Vbitpre
)* (rd
* C_hit_miss
+ R_hit_miss
* C_hit_miss
/ 2);
917 dynSearchEng
+= (c_intrinsic
+ Cwire
+ c_gate_load
) * g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
;
919 /*hitting logic evaluation */
920 c_intrinsic
= 2*drain_C_(W_hit_miss_n
, NCH
, 2, 1, g_tp
.cell_h_def
, is_dram
);
921 Cwire
= c_searchline_metal
* subarray
.num_rows
;
922 Rwire
= r_searchline_metal
* subarray
.num_rows
;
923 c_gate_load
= drain_C_(W_hit_miss_n
, NCH
, 1, 1, g_tp
.cell_h_def
, is_dram
)* subarray
.num_rows
;
925 rd
= tr_R_on(W_hit_miss_n
, PCH
, 1, is_dram
, false, false);
926 tf
= rd
* (c_intrinsic
+ Cwire
/ 2 + c_gate_load
) + Rwire
* (Cwire
/ 2 + c_gate_load
);
928 delay_hit_miss
= horowitz(0, tf
, 0.5, 0.5, FALL
);
931 delay_matchchline
+= MAX(ml_to_ram_wl_drv
->delay
, delay_hit_miss
);
933 dynSearchEng
+= (c_intrinsic
+ Cwire
+ c_gate_load
) * g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
;
935 /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
937 power_matchline
.searchOp
.dynamic
= dynSearchEng
;
939 //leakage in one subarray
940 double Iport
= cmos_Isub_leakage(g_tp
.cam
.cell_a_w
, 0, 1, nmos
, false, true);//TODO: how much is the idle time? just by *2?
941 double Iport_erp
= cmos_Isub_leakage(g_tp
.cam
.cell_a_w
, 0, 2, nmos
, false, true);
942 double Icell
= cmos_Isub_leakage(g_tp
.cam
.cell_nmos_w
, g_tp
.cam
.cell_pmos_w
, 1, inv
, false, true)*2;
943 double Icell_comparator
= cmos_Isub_leakage(Wdummyn
, Wdummyn
, 1, inv
, false, true)*2;//approx XOR with Inv
945 leak_power_cc_inverters_sram_cell
= Icell
* g_tp
.cam_cell
.Vdd
;
946 leak_comparator_cam_cell
= Icell_comparator
* g_tp
.cam_cell
.Vdd
;
947 leak_power_acc_tr_RW_or_WR_port_sram_cell
= Iport
* g_tp
.cam_cell
.Vdd
;
948 leak_power_RD_port_sram_cell
= Iport_erp
* g_tp
.cam_cell
.Vdd
;
949 leak_power_SCHP_port_sram_cell
= 0;//search port and r/w port are sperate, therefore no access txs in search ports
951 power_matchline
.searchOp
.leakage
+= leak_power_cc_inverters_sram_cell
+
952 leak_comparator_cam_cell
+
953 leak_power_acc_tr_RW_or_WR_port_sram_cell
+
954 leak_power_acc_tr_RW_or_WR_port_sram_cell
* (RWP
+ EWP
- 1) +
955 leak_power_RD_port_sram_cell
* ERP
+
956 leak_power_SCHP_port_sram_cell
*SCHP
;
957 // power_matchline.searchOp.leakage += leak_comparator_cam_cell;
958 power_matchline
.searchOp
.leakage
*= (subarray
.num_rows
+1) * subarray
.num_cols_fa_cam
;//TODO:dumy line precise
959 power_matchline
.searchOp
.leakage
+= (subarray
.num_rows
+1) * cmos_Isub_leakage(0, Wfaprechp
, 1, pmos
) * g_tp
.cam_cell
.Vdd
;
960 power_matchline
.searchOp
.leakage
+= (subarray
.num_rows
+1) * cmos_Isub_leakage(Waddrnandn
, Waddrnandp
, 2, nand
) * g_tp
.cam_cell
.Vdd
;
961 power_matchline
.searchOp
.leakage
+= (subarray
.num_rows
+1) * cmos_Isub_leakage(Wfanorn
, Wfanorp
,2, nor
) * g_tp
.cam_cell
.Vdd
;
962 //In idle states, the hit/miss txs are closed (on) therefore no Isub
963 power_matchline
.searchOp
.leakage
+= 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
964 // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
966 //in idle state, Ig_on only possibly exist in access transistors of read only ports
967 double Ig_port_erp
= cmos_Ig_leakage(g_tp
.cam
.cell_a_w
, 0, 1, nmos
, false, true);
968 double Ig_cell
= cmos_Ig_leakage(g_tp
.cam
.cell_nmos_w
, g_tp
.cam
.cell_pmos_w
, 1, inv
, false, true)*2;
969 double Ig_cell_comparator
= cmos_Ig_leakage(Wdummyn
, Wdummyn
, 1, inv
, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2;
971 gate_leak_comparator_cam_cell
= Ig_cell_comparator
* g_tp
.cam_cell
.Vdd
;
972 gate_leak_power_cc_inverters_sram_cell
= Ig_cell
*g_tp
.cam_cell
.Vdd
;
973 gate_leak_power_RD_port_sram_cell
= Ig_port_erp
*g_tp
.sram_cell
.Vdd
;
974 gate_leak_power_SCHP_port_sram_cell
= 0;
976 //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
978 power_matchline
.searchOp
.gate_leakage
+= gate_leak_power_cc_inverters_sram_cell
;
979 power_matchline
.searchOp
.gate_leakage
+= gate_leak_comparator_cam_cell
;
980 power_matchline
.searchOp
.gate_leakage
+= gate_leak_power_SCHP_port_sram_cell
*SCHP
+ gate_leak_power_RD_port_sram_cell
* ERP
;
981 power_matchline
.searchOp
.gate_leakage
*= (subarray
.num_rows
+1) * subarray
.num_cols_fa_cam
;//TODO:dumy line precise
982 power_matchline
.searchOp
.gate_leakage
+= (subarray
.num_rows
+1) * cmos_Ig_leakage(0, Wfaprechp
,1, pmos
) * g_tp
.cam_cell
.Vdd
;
983 power_matchline
.searchOp
.gate_leakage
+= (subarray
.num_rows
+1) * cmos_Ig_leakage(Waddrnandn
, Waddrnandp
, 2, nand
) * g_tp
.cam_cell
.Vdd
;
984 power_matchline
.searchOp
.gate_leakage
+= (subarray
.num_rows
+1) * cmos_Ig_leakage(Wfanorn
, Wfanorp
, 2, nor
) * g_tp
.cam_cell
.Vdd
;
985 power_matchline
.searchOp
.gate_leakage
+= subarray
.num_rows
* cmos_Ig_leakage(W_hit_miss_n
, 0,1, nmos
) * g_tp
.cam_cell
.Vdd
+
986 + cmos_Ig_leakage(0, W_hit_miss_p
,1, pmos
) * g_tp
.cam_cell
.Vdd
;
989 return out_time_ramp
;
993 double Mat::width_write_driver_or_write_mux()
995 // calculate resistance of SRAM cell pull-up PMOS transistor
996 // cam and sram have same cell trasistor properties
997 double R_sram_cell_pull_up_tr
= tr_R_on(g_tp
.sram
.cell_pmos_w
, NCH
, 1, is_dram
, true);
998 double R_access_tr
= tr_R_on(g_tp
.sram
.cell_a_w
, NCH
, 1, is_dram
, true);
999 double target_R_write_driver_and_mux
= (2 * R_sram_cell_pull_up_tr
- R_access_tr
) / 2;
1000 double width_write_driver_nmos
= R_to_w(target_R_write_driver_and_mux
, NCH
, is_dram
);
1002 return width_write_driver_nmos
;
1007 double Mat::compute_comparators_height(
1009 int number_ways_in_mat
,
1010 double subarray_mem_cell_area_width
)
1012 double nand2_area
= compute_gate_area(NAND
, 2, 0, g_tp
.w_comp_n
, g_tp
.cell_h_def
);
1013 double cumulative_area
= nand2_area
* number_ways_in_mat
* tagbits
/ 4;
1014 return cumulative_area
/ subarray_mem_cell_area_width
;
1019 double Mat::compute_bitline_delay(double inrisetime
)
1021 double V_b_pre
, v_th_mem_cell
, V_wl
;
1023 double dynRdEnergy
= 0.0, dynWriteEnergy
= 0.0;
1024 double R_cell_pull_down
=0.0, R_cell_acc
=0.0, r_dev
=0.0;
1025 int deg_senseamp_muxing
= dp
.Ndsam_lev_1
* dp
.Ndsam_lev_2
;
1027 double R_b_metal
= camFlag
? cam_cell
.h
:cell
.h
* g_tp
.wire_local
.R_per_um
;
1028 double R_bl
= subarray
.num_rows
* R_b_metal
;
1029 double C_bl
= subarray
.C_bl
;
1031 // TODO: no leakage for DRAMs?
1032 double leak_power_cc_inverters_sram_cell
= 0;
1033 double gate_leak_power_cc_inverters_sram_cell
= 0;
1034 double leak_power_acc_tr_RW_or_WR_port_sram_cell
= 0;
1035 double leak_power_RD_port_sram_cell
= 0;
1036 double gate_leak_power_RD_port_sram_cell
= 0;
1038 if (is_dram
== true)
1040 V_b_pre
= g_tp
.dram
.Vbitpre
;
1041 v_th_mem_cell
= g_tp
.dram_acc
.Vth
;
1043 //The access transistor is not folded. So we just need to specify a threshold value for the
1044 //folding width that is equal to or greater than Wmemcella.
1045 R_cell_acc
= tr_R_on(g_tp
.dram
.cell_a_w
, NCH
, 1, true, true);
1046 r_dev
= g_tp
.dram_cell_Vdd
/ g_tp
.dram_cell_I_on
+ R_bl
/ 2;
1050 V_b_pre
= g_tp
.sram
.Vbitpre
;
1051 v_th_mem_cell
= g_tp
.sram_cell
.Vth
;
1052 V_wl
= g_tp
.sram_cell
.Vdd
;
1053 R_cell_pull_down
= tr_R_on(g_tp
.sram
.cell_nmos_w
, NCH
, 1, false, true);
1054 R_cell_acc
= tr_R_on(g_tp
.sram
.cell_a_w
, NCH
, 1, false, true);
1056 //Leakage current of an SRAM cell
1057 double Iport
= cmos_Isub_leakage(g_tp
.sram
.cell_a_w
, 0, 1, nmos
,false, true);//TODO: how much is the idle time? just by *2?
1058 double Iport_erp
= cmos_Isub_leakage(g_tp
.sram
.cell_a_w
, 0, 2, nmos
,false, true);
1059 double Icell
= cmos_Isub_leakage(g_tp
.sram
.cell_nmos_w
, g_tp
.sram
.cell_pmos_w
, 1, inv
,false, true)*2;//two invs per cell
1061 leak_power_cc_inverters_sram_cell
= Icell
* g_tp
.sram_cell
.Vdd
;
1062 leak_power_acc_tr_RW_or_WR_port_sram_cell
= Iport
* g_tp
.sram_cell
.Vdd
;
1063 leak_power_RD_port_sram_cell
= Iport_erp
* g_tp
.sram_cell
.Vdd
;
1066 //in idle state, Ig_on only possibly exist in access transistors of read only ports
1067 double Ig_port_erp
= cmos_Ig_leakage(g_tp
.sram
.cell_a_w
, 0, 1, nmos
,false, true);
1068 double Ig_cell
= cmos_Ig_leakage(g_tp
.sram
.cell_nmos_w
, g_tp
.sram
.cell_pmos_w
, 1, inv
,false, true);
1070 gate_leak_power_cc_inverters_sram_cell
= Ig_cell
*g_tp
.sram_cell
.Vdd
;
1071 gate_leak_power_RD_port_sram_cell
= Ig_port_erp
*g_tp
.sram_cell
.Vdd
;
1075 double C_drain_bit_mux
= drain_C_(g_tp
.w_nmos_b_mux
, NCH
, 1, 0, camFlag
? cam_cell
.w
:cell
.w
/ (2 *(RWP
+ ERP
+ SCHP
)), is_dram
);
1076 double R_bit_mux
= tr_R_on(g_tp
.w_nmos_b_mux
, NCH
, 1, is_dram
);
1077 double C_drain_sense_amp_iso
= drain_C_(g_tp
.w_iso
, PCH
, 1, 0, camFlag
? cam_cell
.w
:cell
.w
* deg_bl_muxing
/ (RWP
+ ERP
+ SCHP
), is_dram
);
1078 double R_sense_amp_iso
= tr_R_on(g_tp
.w_iso
, PCH
, 1, is_dram
);
1079 double C_sense_amp_latch
= gate_C(g_tp
.w_sense_p
+ g_tp
.w_sense_n
, 0, is_dram
) +
1080 drain_C_(g_tp
.w_sense_n
, NCH
, 1, 0, camFlag
? cam_cell
.w
:cell
.w
* deg_bl_muxing
/ (RWP
+ ERP
+ SCHP
), is_dram
) +
1081 drain_C_(g_tp
.w_sense_p
, PCH
, 1, 0, camFlag
? cam_cell
.w
:cell
.w
* deg_bl_muxing
/ (RWP
+ ERP
+ SCHP
), is_dram
);
1082 double C_drain_sense_amp_mux
= drain_C_(g_tp
.w_nmos_sa_mux
, NCH
, 1, 0, camFlag
? cam_cell
.w
:cell
.w
* deg_bl_muxing
/ (RWP
+ ERP
+ SCHP
), is_dram
);
1086 double fraction
= dp
.V_b_sense
/ ((g_tp
.dram_cell_Vdd
/2) * g_tp
.dram_cell_C
/(g_tp
.dram_cell_C
+ C_bl
));
1087 tstep
= 2.3 * fraction
* r_dev
*
1088 (g_tp
.dram_cell_C
* (C_bl
+ 2*C_drain_sense_amp_iso
+ C_sense_amp_latch
+ C_drain_sense_amp_mux
)) /
1089 (g_tp
.dram_cell_C
+ (C_bl
+ 2*C_drain_sense_amp_iso
+ C_sense_amp_latch
+ C_drain_sense_amp_mux
));
1090 delay_writeback
= tstep
;
1091 dynRdEnergy
+= (C_bl
+ 2*C_drain_sense_amp_iso
+ C_sense_amp_latch
+ C_drain_sense_amp_mux
) *
1092 (g_tp
.dram_cell_Vdd
/ 2) * g_tp
.dram_cell_Vdd
/* subarray.num_cols * num_subarrays_per_mat*/;
1093 dynWriteEnergy
+= (C_bl
+ 2*C_drain_sense_amp_iso
+ C_sense_amp_latch
) *
1094 (g_tp
.dram_cell_Vdd
/ 2) * g_tp
.dram_cell_Vdd
/* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir
*100;
1095 per_bitline_read_energy
= (C_bl
+ 2*C_drain_sense_amp_iso
+ C_sense_amp_latch
+ C_drain_sense_amp_mux
) *
1096 (g_tp
.dram_cell_Vdd
/ 2) * g_tp
.dram_cell_Vdd
;
1102 if (deg_bl_muxing
> 1)
1104 tau
= (R_cell_pull_down
+ R_cell_acc
) *
1105 (C_bl
+ 2*C_drain_bit_mux
+ 2*C_drain_sense_amp_iso
+ C_sense_amp_latch
+ C_drain_sense_amp_mux
) +
1106 R_bl
* (C_bl
/2 + 2*C_drain_bit_mux
+ 2*C_drain_sense_amp_iso
+ C_sense_amp_latch
+ C_drain_sense_amp_mux
) +
1107 R_bit_mux
* (C_drain_bit_mux
+ 2*C_drain_sense_amp_iso
+ C_sense_amp_latch
+ C_drain_sense_amp_mux
) +
1108 R_sense_amp_iso
* (C_drain_sense_amp_iso
+ C_sense_amp_latch
+ C_drain_sense_amp_mux
);
1109 dynRdEnergy
+= (C_bl
+ 2 * C_drain_bit_mux
) * 2 * dp
.V_b_sense
* g_tp
.sram_cell
.Vdd
/*
1110 subarray.num_cols * num_subarrays_per_mat*/;
1111 dynRdEnergy
+= (2 * C_drain_sense_amp_iso
+ C_sense_amp_latch
+ C_drain_sense_amp_mux
) *
1112 2 * dp
.V_b_sense
* g_tp
.sram_cell
.Vdd
* (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing
);
1113 dynWriteEnergy
+= ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing
) / deg_senseamp_muxing
) *
1114 num_act_mats_hor_dir
* (C_bl
+ 2*C_drain_bit_mux
) * g_tp
.sram_cell
.Vdd
* g_tp
.sram_cell
.Vdd
*2;
1115 //Write Ops are differential for SRAM
1119 tau
= (R_cell_pull_down
+ R_cell_acc
) *
1120 (C_bl
+ C_drain_sense_amp_iso
+ C_sense_amp_latch
+ C_drain_sense_amp_mux
) + R_bl
* C_bl
/ 2 +
1121 R_sense_amp_iso
* (C_drain_sense_amp_iso
+ C_sense_amp_latch
+ C_drain_sense_amp_mux
);
1122 dynRdEnergy
+= (C_bl
+ 2 * C_drain_sense_amp_iso
+ C_sense_amp_latch
+ C_drain_sense_amp_mux
) *
1123 2 * dp
.V_b_sense
* g_tp
.sram_cell
.Vdd
/* subarray.num_cols * num_subarrays_per_mat*/;
1124 dynWriteEnergy
+= (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing
) / deg_senseamp_muxing
) *
1125 num_act_mats_hor_dir
* C_bl
) * g_tp
.sram_cell
.Vdd
* g_tp
.sram_cell
.Vdd
*2;
1128 tstep
= tau
* log(V_b_pre
/ (V_b_pre
- dp
.V_b_sense
));
1129 power_bitline
.readOp
.leakage
=
1130 leak_power_cc_inverters_sram_cell
+
1131 leak_power_acc_tr_RW_or_WR_port_sram_cell
+
1132 leak_power_acc_tr_RW_or_WR_port_sram_cell
* (RWP
+ EWP
- 1) +
1133 leak_power_RD_port_sram_cell
* ERP
;
1134 power_bitline
.readOp
.gate_leakage
= gate_leak_power_cc_inverters_sram_cell
+
1135 gate_leak_power_RD_port_sram_cell
* ERP
;
1139 // cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl;
1140 // cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
1141 // cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
1142 // cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl;
1145 /* take input rise time into account */
1146 double m
= V_wl
/ inrisetime
;
1147 if (tstep
<= (0.5 * (V_wl
- v_th_mem_cell
) / m
))
1149 delay_bitline
= sqrt(2 * tstep
* (V_wl
- v_th_mem_cell
)/ m
);
1153 delay_bitline
= tstep
+ (V_wl
- v_th_mem_cell
) / (2 * m
);
1156 bool is_fa
= (dp
.fully_assoc
) ? true : false;
1158 if (dp
.is_tag
== false || is_fa
== false)
1160 power_bitline
.readOp
.dynamic
= dynRdEnergy
;
1161 power_bitline
.writeOp
.dynamic
= dynWriteEnergy
;
1164 double outrisetime
= 0;
1170 double Mat::compute_sa_delay(double inrisetime
)
1172 //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
1174 //Bitline circuitry leakage.
1175 double Iiso
= simplified_pmos_leakage(g_tp
.w_iso
, is_dram
);
1176 double IsenseEn
= simplified_nmos_leakage(g_tp
.w_sense_en
, is_dram
);
1177 double IsenseN
= simplified_nmos_leakage(g_tp
.w_sense_n
, is_dram
);
1178 double IsenseP
= simplified_pmos_leakage(g_tp
.w_sense_p
, is_dram
);
1180 double lkgIdlePh
= IsenseEn
;//+ 2*IoBufP;
1181 //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
1182 double lkgReadPh
= Iiso
+ IsenseN
+ IsenseP
;//+ IoBufN + IoBufP + 2*IsPch ;
1183 //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
1184 // lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
1185 double lkgIdle
= lkgIdlePh
/*num_sa_subarray * num_subarrays_per_mat*/;
1186 leak_power_sense_amps_closed_page_state
= lkgIdlePh
* g_tp
.peri_global
.Vdd
/* num_sa_subarray * num_subarrays_per_mat*/;
1187 leak_power_sense_amps_open_page_state
= lkgReadPh
* g_tp
.peri_global
.Vdd
/* num_sa_subarray * num_subarrays_per_mat*/;
1189 // sense amplifier has to drive logic in "data out driver" and sense precharge load.
1190 // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
1191 //constant as well as the magnitude of input differential voltage.
1192 double C_ld
= gate_C(g_tp
.w_sense_p
+ g_tp
.w_sense_n
, 0, is_dram
) +
1193 drain_C_(g_tp
.w_sense_n
, NCH
, 1, 0, camFlag
? cam_cell
.w
:cell
.w
* deg_bl_muxing
/ (RWP
+ ERP
+ SCHP
), is_dram
) +
1194 drain_C_(g_tp
.w_sense_p
, PCH
, 1, 0, camFlag
? cam_cell
.w
:cell
.w
* deg_bl_muxing
/ (RWP
+ ERP
+ SCHP
), is_dram
) +
1195 drain_C_(g_tp
.w_iso
,PCH
,1, 0, camFlag
? cam_cell
.w
:cell
.w
* deg_bl_muxing
/ (RWP
+ ERP
+ SCHP
), is_dram
) +
1196 drain_C_(g_tp
.w_nmos_sa_mux
, NCH
, 1, 0, camFlag
? cam_cell
.w
:cell
.w
* deg_bl_muxing
/ (RWP
+ ERP
+ SCHP
), is_dram
);
1197 double tau
= C_ld
/ g_tp
.gm_sense_amp_latch
;
1198 delay_sa
= tau
* log(g_tp
.peri_global
.Vdd
/ dp
.V_b_sense
);
1199 power_sa
.readOp
.dynamic
= C_ld
* g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
/* num_sa_subarray
1200 num_subarrays_per_mat * num_act_mats_hor_dir*/;
1201 power_sa
.readOp
.leakage
= lkgIdle
* g_tp
.peri_global
.Vdd
;
1203 double outrisetime
= 0;
1209 double Mat::compute_subarray_out_drv(double inrisetime
)
1211 double C_ld
, rd
, tf
, this_delay
;
1212 double p_to_n_sz_r
= pmos_to_nmos_sz_ratio(is_dram
);
1214 // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
1215 rd
= tr_R_on(g_tp
.w_nmos_sa_mux
, NCH
, 1, is_dram
);
1216 C_ld
= dp
.Ndsam_lev_1
* drain_C_(g_tp
.w_nmos_sa_mux
, NCH
, 1, 0, camFlag
? cam_cell
.w
:cell
.w
* deg_bl_muxing
/ (RWP
+ ERP
+ SCHP
), is_dram
) +
1217 gate_C(g_tp
.min_w_nmos_
+ p_to_n_sz_r
* g_tp
.min_w_nmos_
, 0.0, is_dram
);
1219 this_delay
= horowitz(inrisetime
, tf
, 0.5, 0.5, RISE
);
1220 delay_subarray_out_drv
+= this_delay
;
1221 inrisetime
= this_delay
/(1.0 - 0.5);
1222 power_subarray_out_drv
.readOp
.dynamic
+= C_ld
* 0.5 * g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
;
1223 power_subarray_out_drv
.readOp
.leakage
+= 0; // for now, let leakage of the pass transistor be 0
1224 power_subarray_out_drv
.readOp
.gate_leakage
+= cmos_Ig_leakage(g_tp
.w_nmos_sa_mux
, 0, 1, nmos
)* g_tp
.peri_global
.Vdd
;
1225 // delay of signal through inverter-buffer to second level of sense-amp mux.
1226 // internal delay of buffer
1227 rd
= tr_R_on(g_tp
.min_w_nmos_
, NCH
, 1, is_dram
);
1228 C_ld
= drain_C_(g_tp
.min_w_nmos_
, NCH
, 1, 1, g_tp
.cell_h_def
, is_dram
) +
1229 drain_C_(p_to_n_sz_r
* g_tp
.min_w_nmos_
, PCH
, 1, 1, g_tp
.cell_h_def
, is_dram
) +
1230 gate_C(g_tp
.min_w_nmos_
+ p_to_n_sz_r
* g_tp
.min_w_nmos_
, 0.0, is_dram
);
1232 this_delay
= horowitz(inrisetime
, tf
, 0.5, 0.5, RISE
);
1233 delay_subarray_out_drv
+= this_delay
;
1234 inrisetime
= this_delay
/(1.0 - 0.5);
1235 power_subarray_out_drv
.readOp
.dynamic
+= C_ld
* 0.5 * g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
;
1236 power_subarray_out_drv
.readOp
.leakage
+= cmos_Isub_leakage(g_tp
.min_w_nmos_
, p_to_n_sz_r
* g_tp
.min_w_nmos_
, 1, inv
, is_dram
)* g_tp
.peri_global
.Vdd
;
1237 power_subarray_out_drv
.readOp
.gate_leakage
+= cmos_Ig_leakage(g_tp
.min_w_nmos_
, p_to_n_sz_r
* g_tp
.min_w_nmos_
, 1, inv
)* g_tp
.peri_global
.Vdd
;
1239 // inverter driving drain of pass transistor of second level of sense-amp mux.
1240 rd
= tr_R_on(g_tp
.min_w_nmos_
, NCH
, 1, is_dram
);
1241 C_ld
= drain_C_(g_tp
.min_w_nmos_
, NCH
, 1, 1, g_tp
.cell_h_def
, is_dram
) +
1242 drain_C_(p_to_n_sz_r
* g_tp
.min_w_nmos_
, PCH
, 1, 1, g_tp
.cell_h_def
, is_dram
) +
1243 drain_C_(g_tp
.w_nmos_sa_mux
, NCH
, 1, 0, camFlag
? cam_cell
.w
:cell
.w
* deg_bl_muxing
* dp
.Ndsam_lev_1
/ (RWP
+ ERP
+ SCHP
), is_dram
);
1245 this_delay
= horowitz(inrisetime
, tf
, 0.5, 0.5, RISE
);
1246 delay_subarray_out_drv
+= this_delay
;
1247 inrisetime
= this_delay
/(1.0 - 0.5);
1248 power_subarray_out_drv
.readOp
.dynamic
+= C_ld
* 0.5 * g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
;
1249 power_subarray_out_drv
.readOp
.leakage
+= cmos_Isub_leakage(g_tp
.min_w_nmos_
, p_to_n_sz_r
* g_tp
.min_w_nmos_
, 1, inv
)* g_tp
.peri_global
.Vdd
;
1250 power_subarray_out_drv
.readOp
.gate_leakage
+= cmos_Ig_leakage(g_tp
.min_w_nmos_
, p_to_n_sz_r
* g_tp
.min_w_nmos_
, 1, inv
)* g_tp
.peri_global
.Vdd
;
1253 // delay of signal through pass-transistor to input of subarray output driver.
1254 rd
= tr_R_on(g_tp
.w_nmos_sa_mux
, NCH
, 1, is_dram
);
1255 C_ld
= dp
.Ndsam_lev_2
* drain_C_(g_tp
.w_nmos_sa_mux
, NCH
, 1, 0, camFlag
? cam_cell
.w
:cell
.w
* deg_bl_muxing
* dp
.Ndsam_lev_1
/ (RWP
+ ERP
+ SCHP
), is_dram
) +
1256 //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
1257 gate_C(subarray_out_wire
->repeater_size
*(subarray_out_wire
->wire_length
/subarray_out_wire
->repeater_spacing
) * g_tp
.min_w_nmos_
* (1 + p_to_n_sz_r
), 0.0, is_dram
);
1259 this_delay
= horowitz(inrisetime
, tf
, 0.5, 0.5, RISE
);
1260 delay_subarray_out_drv
+= this_delay
;
1261 inrisetime
= this_delay
/(1.0 - 0.5);
1262 power_subarray_out_drv
.readOp
.dynamic
+= C_ld
* 0.5 * g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
;
1263 power_subarray_out_drv
.readOp
.leakage
+= 0; // for now, let leakage of the pass transistor be 0
1264 power_subarray_out_drv
.readOp
.gate_leakage
+= cmos_Ig_leakage(g_tp
.w_nmos_sa_mux
, 0, 1, nmos
)* g_tp
.peri_global
.Vdd
;
1272 double Mat::compute_comparator_delay(double inrisetime
)
1274 int A
= g_ip
->tag_assoc
;
1276 int tagbits_
= dp
.tagbits
/ 4; // Assuming there are 4 quarter comparators. input tagbits is already
1279 /* First Inverter */
1280 double Ceq
= gate_C(g_tp
.w_comp_inv_n2
+g_tp
.w_comp_inv_p2
, 0, is_dram
) +
1281 drain_C_(g_tp
.w_comp_inv_p1
, PCH
, 1, 1, g_tp
.cell_h_def
, is_dram
) +
1282 drain_C_(g_tp
.w_comp_inv_n1
, NCH
, 1, 1, g_tp
.cell_h_def
, is_dram
);
1283 double Req
= tr_R_on(g_tp
.w_comp_inv_p1
, PCH
, 1, is_dram
);
1284 double tf
= Req
*Ceq
;
1285 double st1del
= horowitz(inrisetime
,tf
,VTHCOMPINV
,VTHCOMPINV
,FALL
);
1286 double nextinputtime
= st1del
/VTHCOMPINV
;
1287 power_comparator
.readOp
.dynamic
+= 0.5 * Ceq
* g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
* 4 * A
;
1289 //For each degree of associativity
1290 //there are 4 such quarter comparators
1291 double lkgCurrent
= cmos_Isub_leakage(g_tp
.w_comp_inv_n1
, g_tp
.w_comp_inv_p1
, 1, inv
, is_dram
)* 4 * A
;
1292 double gatelkgCurrent
= cmos_Ig_leakage(g_tp
.w_comp_inv_n1
, g_tp
.w_comp_inv_p1
, 1, inv
, is_dram
)* 4 * A
;
1293 /* Second Inverter */
1294 Ceq
= gate_C(g_tp
.w_comp_inv_n3
+g_tp
.w_comp_inv_p3
, 0, is_dram
) +
1295 drain_C_(g_tp
.w_comp_inv_p2
, PCH
, 1, 1, g_tp
.cell_h_def
, is_dram
) +
1296 drain_C_(g_tp
.w_comp_inv_n2
, NCH
, 1, 1, g_tp
.cell_h_def
, is_dram
);
1297 Req
= tr_R_on(g_tp
.w_comp_inv_n2
, NCH
, 1, is_dram
);
1299 double st2del
= horowitz(nextinputtime
,tf
,VTHCOMPINV
,VTHCOMPINV
,RISE
);
1300 nextinputtime
= st2del
/(1.0-VTHCOMPINV
);
1301 power_comparator
.readOp
.dynamic
+= 0.5 * Ceq
* g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
* 4 * A
;
1302 lkgCurrent
+= cmos_Isub_leakage(g_tp
.w_comp_inv_n2
, g_tp
.w_comp_inv_p2
, 1, inv
, is_dram
)* 4 * A
;
1303 gatelkgCurrent
+= cmos_Ig_leakage(g_tp
.w_comp_inv_n2
, g_tp
.w_comp_inv_p2
, 1, inv
, is_dram
)* 4 * A
;
1305 /* Third Inverter */
1306 Ceq
= gate_C(g_tp
.w_eval_inv_n
+g_tp
.w_eval_inv_p
, 0, is_dram
) +
1307 drain_C_(g_tp
.w_comp_inv_p3
, PCH
, 1, 1, g_tp
.cell_h_def
, is_dram
) +
1308 drain_C_(g_tp
.w_comp_inv_n3
, NCH
, 1, 1, g_tp
.cell_h_def
, is_dram
);
1309 Req
= tr_R_on(g_tp
.w_comp_inv_p3
, PCH
, 1, is_dram
);
1311 double st3del
= horowitz(nextinputtime
,tf
,VTHCOMPINV
,VTHEVALINV
,FALL
);
1312 nextinputtime
= st3del
/(VTHEVALINV
);
1313 power_comparator
.readOp
.dynamic
+= 0.5 * Ceq
* g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
* 4 * A
;
1314 lkgCurrent
+= cmos_Isub_leakage(g_tp
.w_comp_inv_n3
, g_tp
.w_comp_inv_p3
, 1, inv
, is_dram
)* 4 * A
;
1315 gatelkgCurrent
+= cmos_Ig_leakage(g_tp
.w_comp_inv_n3
, g_tp
.w_comp_inv_p3
, 1, inv
, is_dram
)* 4 * A
;
1317 /* Final Inverter (virtual ground driver) discharging compare part */
1318 double r1
= tr_R_on(g_tp
.w_comp_n
,NCH
,2, is_dram
);
1319 double r2
= tr_R_on(g_tp
.w_eval_inv_n
,NCH
,1, is_dram
); /* was switch */
1320 double c2
= (tagbits_
)*(drain_C_(g_tp
.w_comp_n
,NCH
,1, 1, g_tp
.cell_h_def
, is_dram
) +
1321 drain_C_(g_tp
.w_comp_n
,NCH
,2, 1, g_tp
.cell_h_def
, is_dram
)) +
1322 drain_C_(g_tp
.w_eval_inv_p
,PCH
,1, 1, g_tp
.cell_h_def
, is_dram
) +
1323 drain_C_(g_tp
.w_eval_inv_n
,NCH
,1, 1, g_tp
.cell_h_def
, is_dram
);
1324 double c1
= (tagbits_
)*(drain_C_(g_tp
.w_comp_n
,NCH
,1, 1, g_tp
.cell_h_def
, is_dram
) +
1325 drain_C_(g_tp
.w_comp_n
,NCH
,2, 1, g_tp
.cell_h_def
, is_dram
)) +
1326 drain_C_(g_tp
.w_comp_p
,PCH
,1, 1, g_tp
.cell_h_def
, is_dram
) +
1327 gate_C(WmuxdrvNANDn
+WmuxdrvNANDp
,0, is_dram
);
1328 power_comparator
.readOp
.dynamic
+= 0.5 * c2
* g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
* 4 * A
;
1329 power_comparator
.readOp
.dynamic
+= c1
* g_tp
.peri_global
.Vdd
* g_tp
.peri_global
.Vdd
* (A
- 1);
1330 lkgCurrent
+= cmos_Isub_leakage(g_tp
.w_eval_inv_n
, g_tp
.w_eval_inv_p
, 1, inv
, is_dram
)* 4 * A
;
1331 lkgCurrent
+= cmos_Isub_leakage(g_tp
.w_comp_n
, g_tp
.w_comp_n
, 1, inv
, is_dram
)* 4 * A
; // stack factor of 0.2
1333 gatelkgCurrent
+= cmos_Ig_leakage(g_tp
.w_eval_inv_n
, g_tp
.w_eval_inv_p
, 1, inv
, is_dram
)* 4 * A
;
1334 gatelkgCurrent
+= cmos_Ig_leakage(g_tp
.w_comp_n
, g_tp
.w_comp_n
, 1, inv
, is_dram
)* 4 * A
;//for gate leakage this equals to a inverter
1336 /* time to go to threshold of mux driver */
1337 double tstep
= (r2
*c2
+(r1
+r2
)*c1
)*log(1.0/VTHMUXNAND
);
1338 /* take into account non-zero input rise time */
1339 double m
= g_tp
.peri_global
.Vdd
/nextinputtime
;
1340 double Tcomparatorni
;
1342 if((tstep
) <= (0.5*(g_tp
.peri_global
.Vdd
-g_tp
.peri_global
.Vth
)/m
))
1345 double b
= 2*((g_tp
.peri_global
.Vdd
*VTHEVALINV
)-g_tp
.peri_global
.Vth
);
1346 double c
= -2*(tstep
)*(g_tp
.peri_global
.Vdd
-g_tp
.peri_global
.Vth
)+1/m
*((g_tp
.peri_global
.Vdd
*VTHEVALINV
)-g_tp
.peri_global
.Vth
)*((g_tp
.peri_global
.Vdd
*VTHEVALINV
)-g_tp
.peri_global
.Vth
);
1347 Tcomparatorni
= (-b
+sqrt(b
*b
-4*a
*c
))/(2*a
);
1351 Tcomparatorni
= (tstep
) + (g_tp
.peri_global
.Vdd
+g_tp
.peri_global
.Vth
)/(2*m
) - (g_tp
.peri_global
.Vdd
*VTHEVALINV
)/m
;
1353 delay_comparator
= Tcomparatorni
+st1del
+st2del
+st3del
;
1354 power_comparator
.readOp
.leakage
= lkgCurrent
* g_tp
.peri_global
.Vdd
;
1355 power_comparator
.readOp
.gate_leakage
= gatelkgCurrent
* g_tp
.peri_global
.Vdd
;
1357 return Tcomparatorni
/ (1.0 - VTHMUXNAND
);;
1362 void Mat::compute_power_energy()
1364 //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
1365 //when search all subarrays and all mats are fully active
1366 //when plain read/write only one subarray in a single mat is active.
1368 // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat.
1369 power
.readOp
.dynamic
+= r_predec
->power
.readOp
.dynamic
+
1370 b_mux_predec
->power
.readOp
.dynamic
+
1371 sa_mux_lev_1_predec
->power
.readOp
.dynamic
+
1372 sa_mux_lev_2_predec
->power
.readOp
.dynamic
;
1374 // add energy consumed in decoders
1375 power_row_decoders
.readOp
.dynamic
= row_dec
->power
.readOp
.dynamic
;
1376 if (!(is_fa
||pure_cam
))
1377 power_row_decoders
.readOp
.dynamic
*= num_subarrays_per_mat
;
1379 // add energy consumed in bitline prechagers, SAs, and bitlines
1380 if (!(is_fa
||pure_cam
))
1382 // add energy consumed in bitline prechagers
1383 power_bl_precharge_eq_drv
.readOp
.dynamic
= bl_precharge_eq_drv
->power
.readOp
.dynamic
;
1384 power_bl_precharge_eq_drv
.readOp
.dynamic
*= num_subarrays_per_mat
;
1386 //Add sense amps energy
1387 num_sa_subarray
= subarray
.num_cols
/ deg_bl_muxing
;
1388 power_sa
.readOp
.dynamic
*= num_sa_subarray
*num_subarrays_per_mat
;
1390 // add energy consumed in bitlines
1391 //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
1392 power_bitline
.readOp
.dynamic
*= num_subarrays_per_mat
*subarray
.num_cols
;
1393 power_bitline
.writeOp
.dynamic
*= num_subarrays_per_mat
*subarray
.num_cols
;
1394 //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
1395 //Add subarray output energy
1396 power_subarray_out_drv
.readOp
.dynamic
=
1397 (power_subarray_out_drv
.readOp
.dynamic
+ subarray_out_wire
->power
.readOp
.dynamic
) * num_do_b_mat
;
1399 power
.readOp
.dynamic
+= power_bl_precharge_eq_drv
.readOp
.dynamic
+
1400 power_sa
.readOp
.dynamic
+
1401 power_bitline
.readOp
.dynamic
+
1402 power_subarray_out_drv
.readOp
.dynamic
;
1404 power
.readOp
.dynamic
+= power_row_decoders
.readOp
.dynamic
+
1405 bit_mux_dec
->power
.readOp
.dynamic
+
1406 sa_mux_lev_1_dec
->power
.readOp
.dynamic
+
1407 sa_mux_lev_2_dec
->power
.readOp
.dynamic
+
1408 power_comparator
.readOp
.dynamic
;
1413 //for plain read/write only one subarray in a mat is active
1414 // add energy consumed in bitline prechagers
1415 power_bl_precharge_eq_drv
.readOp
.dynamic
= bl_precharge_eq_drv
->power
.readOp
.dynamic
1416 + cam_bl_precharge_eq_drv
->power
.readOp
.dynamic
;
1417 power_bl_precharge_eq_drv
.searchOp
.dynamic
= bl_precharge_eq_drv
->power
.readOp
.dynamic
;
1419 //Add sense amps energy
1420 num_sa_subarray
= (subarray
.num_cols_fa_cam
+ subarray
.num_cols_fa_ram
)/ deg_bl_muxing
;
1421 num_sa_subarray_search
= subarray
.num_cols_fa_ram
/ deg_bl_muxing
;
1422 power_sa
.searchOp
.dynamic
= power_sa
.readOp
.dynamic
*num_sa_subarray_search
;
1423 power_sa
.readOp
.dynamic
*= num_sa_subarray
;
1426 // add energy consumed in bitlines
1427 power_bitline
.searchOp
.dynamic
= power_bitline
.readOp
.dynamic
;
1428 power_bitline
.readOp
.dynamic
*= (subarray
.num_cols_fa_cam
+subarray
.num_cols_fa_ram
);
1429 power_bitline
.writeOp
.dynamic
*= (subarray
.num_cols_fa_cam
+subarray
.num_cols_fa_ram
);
1430 power_bitline
.searchOp
.dynamic
*= subarray
.num_cols_fa_ram
;
1432 //Add subarray output energy
1433 power_subarray_out_drv
.searchOp
.dynamic
=
1434 (power_subarray_out_drv
.readOp
.dynamic
+ subarray_out_wire
->power
.readOp
.dynamic
) * num_so_b_mat
;
1435 power_subarray_out_drv
.readOp
.dynamic
=
1436 (power_subarray_out_drv
.readOp
.dynamic
+ subarray_out_wire
->power
.readOp
.dynamic
) * num_do_b_mat
;
1439 power
.readOp
.dynamic
+= power_bl_precharge_eq_drv
.readOp
.dynamic
+
1440 power_sa
.readOp
.dynamic
+
1441 power_bitline
.readOp
.dynamic
+
1442 power_subarray_out_drv
.readOp
.dynamic
;
1444 power
.readOp
.dynamic
+= power_row_decoders
.readOp
.dynamic
+
1445 bit_mux_dec
->power
.readOp
.dynamic
+
1446 sa_mux_lev_1_dec
->power
.readOp
.dynamic
+
1447 sa_mux_lev_2_dec
->power
.readOp
.dynamic
+
1448 power_comparator
.readOp
.dynamic
;
1450 //add energy consumed inside cam
1451 power_matchline
.searchOp
.dynamic
*= num_subarrays_per_mat
;
1452 power_searchline_precharge
= sl_precharge_eq_drv
->power
;
1453 power_searchline_precharge
.searchOp
.dynamic
= power_searchline_precharge
.readOp
.dynamic
* num_subarrays_per_mat
;
1454 power_searchline
= sl_data_drv
->power
;
1455 power_searchline
.searchOp
.dynamic
= power_searchline
.readOp
.dynamic
*subarray
.num_cols_fa_cam
* num_subarrays_per_mat
;;
1456 power_matchline_precharge
= ml_precharge_drv
->power
;
1457 power_matchline_precharge
.searchOp
.dynamic
= power_matchline_precharge
.readOp
.dynamic
* num_subarrays_per_mat
;
1458 power_ml_to_ram_wl_drv
= ml_to_ram_wl_drv
->power
;
1459 power_ml_to_ram_wl_drv
.searchOp
.dynamic
= ml_to_ram_wl_drv
->power
.readOp
.dynamic
;
1461 power_cam_all_active
.searchOp
.dynamic
= power_matchline
.searchOp
.dynamic
;
1462 power_cam_all_active
.searchOp
.dynamic
+=power_searchline_precharge
.searchOp
.dynamic
;
1463 power_cam_all_active
.searchOp
.dynamic
+=power_searchline
.searchOp
.dynamic
;
1464 power_cam_all_active
.searchOp
.dynamic
+=power_matchline_precharge
.searchOp
.dynamic
;
1466 power
.searchOp
.dynamic
+= power_cam_all_active
.searchOp
.dynamic
;
1467 //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
1472 // add energy consumed in bitline prechagers
1473 power_bl_precharge_eq_drv
.readOp
.dynamic
= cam_bl_precharge_eq_drv
->power
.readOp
.dynamic
;
1474 //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
1475 //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
1476 //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
1478 //Add sense amps energy
1479 num_sa_subarray
= subarray
.num_cols_fa_cam
/ deg_bl_muxing
;
1480 power_sa
.readOp
.dynamic
*= num_sa_subarray
;//*num_subarrays_per_mat;
1481 power_sa
.searchOp
.dynamic
= 0;
1483 power_bitline
.readOp
.dynamic
*= subarray
.num_cols_fa_cam
;
1484 power_bitline
.searchOp
.dynamic
= 0;
1485 power_bitline
.writeOp
.dynamic
*= subarray
.num_cols_fa_cam
;
1487 power_subarray_out_drv
.searchOp
.dynamic
=
1488 (power_subarray_out_drv
.readOp
.dynamic
+ subarray_out_wire
->power
.readOp
.dynamic
) * num_so_b_mat
;
1489 power_subarray_out_drv
.readOp
.dynamic
=
1490 (power_subarray_out_drv
.readOp
.dynamic
+ subarray_out_wire
->power
.readOp
.dynamic
) * num_do_b_mat
;
1492 power
.readOp
.dynamic
+= power_bl_precharge_eq_drv
.readOp
.dynamic
+
1493 power_sa
.readOp
.dynamic
+
1494 power_bitline
.readOp
.dynamic
+
1495 power_subarray_out_drv
.readOp
.dynamic
;
1497 power
.readOp
.dynamic
+= power_row_decoders
.readOp
.dynamic
+
1498 bit_mux_dec
->power
.readOp
.dynamic
+
1499 sa_mux_lev_1_dec
->power
.readOp
.dynamic
+
1500 sa_mux_lev_2_dec
->power
.readOp
.dynamic
+
1501 power_comparator
.readOp
.dynamic
;
1504 ////add energy consumed inside cam
1505 power_matchline
.searchOp
.dynamic
*= num_subarrays_per_mat
;
1506 power_searchline_precharge
= sl_precharge_eq_drv
->power
;
1507 power_searchline_precharge
.searchOp
.dynamic
= power_searchline_precharge
.readOp
.dynamic
* num_subarrays_per_mat
;
1508 power_searchline
= sl_data_drv
->power
;
1509 power_searchline
.searchOp
.dynamic
= power_searchline
.readOp
.dynamic
*subarray
.num_cols_fa_cam
* num_subarrays_per_mat
;;
1510 power_matchline_precharge
= ml_precharge_drv
->power
;
1511 power_matchline_precharge
.searchOp
.dynamic
= power_matchline_precharge
.readOp
.dynamic
* num_subarrays_per_mat
;
1512 power_ml_to_ram_wl_drv
= ml_to_ram_wl_drv
->power
;
1513 power_ml_to_ram_wl_drv
.searchOp
.dynamic
= ml_to_ram_wl_drv
->power
.readOp
.dynamic
;
1515 power_cam_all_active
.searchOp
.dynamic
= power_matchline
.searchOp
.dynamic
;
1516 power_cam_all_active
.searchOp
.dynamic
+=power_searchline_precharge
.searchOp
.dynamic
;
1517 power_cam_all_active
.searchOp
.dynamic
+=power_searchline
.searchOp
.dynamic
;
1518 power_cam_all_active
.searchOp
.dynamic
+=power_matchline_precharge
.searchOp
.dynamic
;
1520 power
.searchOp
.dynamic
+= power_cam_all_active
.searchOp
.dynamic
;
1521 //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
1527 // calculate leakage power
1528 if (!(is_fa
|| pure_cam
))
1530 int number_output_drivers_subarray
= num_sa_subarray
/ (dp
.Ndsam_lev_1
* dp
.Ndsam_lev_2
);
1532 power_bitline
.readOp
.leakage
*= subarray
.num_rows
* subarray
.num_cols
* num_subarrays_per_mat
;
1533 power_bl_precharge_eq_drv
.readOp
.leakage
= bl_precharge_eq_drv
->power
.readOp
.leakage
* num_subarrays_per_mat
;
1534 power_sa
.readOp
.leakage
*= num_sa_subarray
*num_subarrays_per_mat
*(RWP
+ ERP
);
1536 //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
1537 power_subarray_out_drv
.readOp
.leakage
=
1538 (power_subarray_out_drv
.readOp
.leakage
+ subarray_out_wire
->power
.readOp
.leakage
) *
1539 number_output_drivers_subarray
* num_subarrays_per_mat
* (RWP
+ ERP
);
1541 power
.readOp
.leakage
+= power_bitline
.readOp
.leakage
+
1542 power_bl_precharge_eq_drv
.readOp
.leakage
+
1543 power_sa
.readOp
.leakage
+
1544 power_subarray_out_drv
.readOp
.leakage
;
1545 //cout<<"leakage"<<power.readOp.leakage<<endl;
1547 power_comparator
.readOp
.leakage
*= num_do_b_mat
* (RWP
+ ERP
);
1548 power
.readOp
.leakage
+= power_comparator
.readOp
.leakage
;
1550 //cout<<"leakage1"<<power.readOp.leakage<<endl;
1553 power_row_decoders
.readOp
.leakage
= row_dec
->power
.readOp
.leakage
* subarray
.num_rows
* num_subarrays_per_mat
;
1554 power_bit_mux_decoders
.readOp
.leakage
= bit_mux_dec
->power
.readOp
.leakage
* deg_bl_muxing
;
1555 power_sa_mux_lev_1_decoders
.readOp
.leakage
= sa_mux_lev_1_dec
->power
.readOp
.leakage
* dp
.Ndsam_lev_1
;
1556 power_sa_mux_lev_2_decoders
.readOp
.leakage
= sa_mux_lev_2_dec
->power
.readOp
.leakage
* dp
.Ndsam_lev_2
;
1558 power
.readOp
.leakage
+= r_predec
->power
.readOp
.leakage
+
1559 b_mux_predec
->power
.readOp
.leakage
+
1560 sa_mux_lev_1_predec
->power
.readOp
.leakage
+
1561 sa_mux_lev_2_predec
->power
.readOp
.leakage
+
1562 power_row_decoders
.readOp
.leakage
+
1563 power_bit_mux_decoders
.readOp
.leakage
+
1564 power_sa_mux_lev_1_decoders
.readOp
.leakage
+
1565 power_sa_mux_lev_2_decoders
.readOp
.leakage
;
1566 //cout<<"leakage2"<<power.readOp.leakage<<endl;
1568 //++++Below is gate leakage
1569 power_bitline
.readOp
.gate_leakage
*= subarray
.num_rows
* subarray
.num_cols
* num_subarrays_per_mat
;
1570 power_bl_precharge_eq_drv
.readOp
.gate_leakage
= bl_precharge_eq_drv
->power
.readOp
.gate_leakage
* num_subarrays_per_mat
;
1571 power_sa
.readOp
.gate_leakage
*= num_sa_subarray
*num_subarrays_per_mat
*(RWP
+ ERP
);
1573 //num_sa_subarray = subarray.num_cols / deg_bl_muxing;
1574 power_subarray_out_drv
.readOp
.gate_leakage
=
1575 (power_subarray_out_drv
.readOp
.gate_leakage
+ subarray_out_wire
->power
.readOp
.gate_leakage
) *
1576 number_output_drivers_subarray
* num_subarrays_per_mat
* (RWP
+ ERP
);
1578 power
.readOp
.gate_leakage
+= power_bitline
.readOp
.gate_leakage
+
1579 power_bl_precharge_eq_drv
.readOp
.gate_leakage
+
1580 power_sa
.readOp
.gate_leakage
+
1581 power_subarray_out_drv
.readOp
.gate_leakage
;
1582 //cout<<"leakage"<<power.readOp.leakage<<endl;
1584 power_comparator
.readOp
.gate_leakage
*= num_do_b_mat
* (RWP
+ ERP
);
1585 power
.readOp
.gate_leakage
+= power_comparator
.readOp
.gate_leakage
;
1587 //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
1589 // gate_leakage power
1590 power_row_decoders
.readOp
.gate_leakage
= row_dec
->power
.readOp
.gate_leakage
* subarray
.num_rows
* num_subarrays_per_mat
;
1591 power_bit_mux_decoders
.readOp
.gate_leakage
= bit_mux_dec
->power
.readOp
.gate_leakage
* deg_bl_muxing
;
1592 power_sa_mux_lev_1_decoders
.readOp
.gate_leakage
= sa_mux_lev_1_dec
->power
.readOp
.gate_leakage
* dp
.Ndsam_lev_1
;
1593 power_sa_mux_lev_2_decoders
.readOp
.gate_leakage
= sa_mux_lev_2_dec
->power
.readOp
.gate_leakage
* dp
.Ndsam_lev_2
;
1595 power
.readOp
.gate_leakage
+= r_predec
->power
.readOp
.gate_leakage
+
1596 b_mux_predec
->power
.readOp
.gate_leakage
+
1597 sa_mux_lev_1_predec
->power
.readOp
.gate_leakage
+
1598 sa_mux_lev_2_predec
->power
.readOp
.gate_leakage
+
1599 power_row_decoders
.readOp
.gate_leakage
+
1600 power_bit_mux_decoders
.readOp
.gate_leakage
+
1601 power_sa_mux_lev_1_decoders
.readOp
.gate_leakage
+
1602 power_sa_mux_lev_2_decoders
.readOp
.gate_leakage
;
1606 int number_output_drivers_subarray
= num_sa_subarray
;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1608 power_bitline
.readOp
.leakage
*= subarray
.num_rows
* subarray
.num_cols
* num_subarrays_per_mat
;
1609 power_bl_precharge_eq_drv
.readOp
.leakage
= bl_precharge_eq_drv
->power
.readOp
.leakage
* num_subarrays_per_mat
;
1610 power_bl_precharge_eq_drv
.searchOp
.leakage
= cam_bl_precharge_eq_drv
->power
.readOp
.leakage
* num_subarrays_per_mat
;
1611 power_sa
.readOp
.leakage
*= num_sa_subarray
*num_subarrays_per_mat
*(RWP
+ ERP
+ SCHP
);
1613 //cout<<"leakage3"<<power.readOp.leakage<<endl;
1616 power_subarray_out_drv
.readOp
.leakage
=
1617 (power_subarray_out_drv
.readOp
.leakage
+ subarray_out_wire
->power
.readOp
.leakage
) *
1618 number_output_drivers_subarray
* num_subarrays_per_mat
* (RWP
+ ERP
+ SCHP
);
1620 power
.readOp
.leakage
+= power_bitline
.readOp
.leakage
+
1621 power_bl_precharge_eq_drv
.readOp
.leakage
+
1622 power_bl_precharge_eq_drv
.searchOp
.leakage
+
1623 power_sa
.readOp
.leakage
+
1624 power_subarray_out_drv
.readOp
.leakage
;
1626 //cout<<"leakage4"<<power.readOp.leakage<<endl;
1629 power_row_decoders
.readOp
.leakage
= row_dec
->power
.readOp
.leakage
* subarray
.num_rows
* num_subarrays_per_mat
;
1630 power
.readOp
.leakage
+= r_predec
->power
.readOp
.leakage
+
1631 power_row_decoders
.readOp
.leakage
;
1633 //cout<<"leakage5"<<power.readOp.leakage<<endl;
1636 power_cam_all_active
.searchOp
.leakage
= power_matchline
.searchOp
.leakage
;
1637 power_cam_all_active
.searchOp
.leakage
+=sl_precharge_eq_drv
->power
.readOp
.leakage
;
1638 power_cam_all_active
.searchOp
.leakage
+=sl_data_drv
->power
.readOp
.leakage
*subarray
.num_cols_fa_cam
;
1639 power_cam_all_active
.searchOp
.leakage
+=ml_precharge_drv
->power
.readOp
.dynamic
;
1640 power_cam_all_active
.searchOp
.leakage
*= num_subarrays_per_mat
;
1642 power
.readOp
.leakage
+= power_cam_all_active
.searchOp
.leakage
;
1644 // cout<<"leakage6"<<power.readOp.leakage<<endl;
1646 //+++Below is gate leakage
1647 power_bitline
.readOp
.gate_leakage
*= subarray
.num_rows
* subarray
.num_cols
* num_subarrays_per_mat
;
1648 power_bl_precharge_eq_drv
.readOp
.gate_leakage
= bl_precharge_eq_drv
->power
.readOp
.gate_leakage
* num_subarrays_per_mat
;
1649 power_bl_precharge_eq_drv
.searchOp
.gate_leakage
= cam_bl_precharge_eq_drv
->power
.readOp
.gate_leakage
* num_subarrays_per_mat
;
1650 power_sa
.readOp
.gate_leakage
*= num_sa_subarray
*num_subarrays_per_mat
*(RWP
+ ERP
+ SCHP
);
1652 //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
1655 power_subarray_out_drv
.readOp
.gate_leakage
=
1656 (power_subarray_out_drv
.readOp
.gate_leakage
+ subarray_out_wire
->power
.readOp
.gate_leakage
) *
1657 number_output_drivers_subarray
* num_subarrays_per_mat
* (RWP
+ ERP
+ SCHP
);
1659 power
.readOp
.gate_leakage
+= power_bitline
.readOp
.gate_leakage
+
1660 power_bl_precharge_eq_drv
.readOp
.gate_leakage
+
1661 power_bl_precharge_eq_drv
.searchOp
.gate_leakage
+
1662 power_sa
.readOp
.gate_leakage
+
1663 power_subarray_out_drv
.readOp
.gate_leakage
;
1665 //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
1667 // gate_leakage power
1668 power_row_decoders
.readOp
.gate_leakage
= row_dec
->power
.readOp
.gate_leakage
* subarray
.num_rows
* num_subarrays_per_mat
;
1669 power
.readOp
.gate_leakage
+= r_predec
->power
.readOp
.gate_leakage
+
1670 power_row_decoders
.readOp
.gate_leakage
;
1672 //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
1675 power_cam_all_active
.searchOp
.gate_leakage
= power_matchline
.searchOp
.gate_leakage
;
1676 power_cam_all_active
.searchOp
.gate_leakage
+=sl_precharge_eq_drv
->power
.readOp
.gate_leakage
;
1677 power_cam_all_active
.searchOp
.gate_leakage
+=sl_data_drv
->power
.readOp
.gate_leakage
*subarray
.num_cols_fa_cam
;
1678 power_cam_all_active
.searchOp
.gate_leakage
+=ml_precharge_drv
->power
.readOp
.dynamic
;
1679 power_cam_all_active
.searchOp
.gate_leakage
*= num_subarrays_per_mat
;
1681 power
.readOp
.gate_leakage
+= power_cam_all_active
.searchOp
.gate_leakage
;
1686 int number_output_drivers_subarray
= num_sa_subarray
;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
1688 //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
1689 //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
1690 power_bl_precharge_eq_drv
.searchOp
.leakage
= cam_bl_precharge_eq_drv
->power
.readOp
.leakage
* num_subarrays_per_mat
;
1691 power_sa
.readOp
.leakage
*= num_sa_subarray
*num_subarrays_per_mat
*(RWP
+ ERP
+ SCHP
);
1694 power_subarray_out_drv
.readOp
.leakage
=
1695 (power_subarray_out_drv
.readOp
.leakage
+ subarray_out_wire
->power
.readOp
.leakage
) *
1696 number_output_drivers_subarray
* num_subarrays_per_mat
* (RWP
+ ERP
+ SCHP
);
1698 power
.readOp
.leakage
+= //power_bitline.readOp.leakage +
1699 //power_bl_precharge_eq_drv.readOp.leakage +
1700 power_bl_precharge_eq_drv
.searchOp
.leakage
+
1701 power_sa
.readOp
.leakage
+
1702 power_subarray_out_drv
.readOp
.leakage
;
1705 power_row_decoders
.readOp
.leakage
= row_dec
->power
.readOp
.leakage
* subarray
.num_rows
* num_subarrays_per_mat
*(RWP
+ ERP
+ EWP
);
1706 power
.readOp
.leakage
+= r_predec
->power
.readOp
.leakage
+
1707 power_row_decoders
.readOp
.leakage
;
1710 power_cam_all_active
.searchOp
.leakage
= power_matchline
.searchOp
.leakage
;
1711 power_cam_all_active
.searchOp
.leakage
+=sl_precharge_eq_drv
->power
.readOp
.leakage
;
1712 power_cam_all_active
.searchOp
.leakage
+=sl_data_drv
->power
.readOp
.leakage
*subarray
.num_cols_fa_cam
;
1713 power_cam_all_active
.searchOp
.leakage
+=ml_precharge_drv
->power
.readOp
.dynamic
;
1714 power_cam_all_active
.searchOp
.leakage
*= num_subarrays_per_mat
;
1716 power
.readOp
.leakage
+= power_cam_all_active
.searchOp
.leakage
;
1718 //+++Below is gate leakage
1719 power_bl_precharge_eq_drv
.searchOp
.gate_leakage
= cam_bl_precharge_eq_drv
->power
.readOp
.gate_leakage
* num_subarrays_per_mat
;
1720 power_sa
.readOp
.gate_leakage
*= num_sa_subarray
*num_subarrays_per_mat
*(RWP
+ ERP
+ SCHP
);
1723 power_subarray_out_drv
.readOp
.gate_leakage
=
1724 (power_subarray_out_drv
.readOp
.gate_leakage
+ subarray_out_wire
->power
.readOp
.gate_leakage
) *
1725 number_output_drivers_subarray
* num_subarrays_per_mat
* (RWP
+ ERP
+ SCHP
);
1727 power
.readOp
.gate_leakage
+= //power_bitline.readOp.gate_leakage +
1728 //power_bl_precharge_eq_drv.readOp.gate_leakage +
1729 power_bl_precharge_eq_drv
.searchOp
.gate_leakage
+
1730 power_sa
.readOp
.gate_leakage
+
1731 power_subarray_out_drv
.readOp
.gate_leakage
;
1733 // gate_leakage power
1734 power_row_decoders
.readOp
.gate_leakage
= row_dec
->power
.readOp
.gate_leakage
* subarray
.num_rows
* num_subarrays_per_mat
*(RWP
+ ERP
+ EWP
);
1735 power
.readOp
.gate_leakage
+= r_predec
->power
.readOp
.gate_leakage
+
1736 power_row_decoders
.readOp
.gate_leakage
;
1739 power_cam_all_active
.searchOp
.gate_leakage
= power_matchline
.searchOp
.gate_leakage
;
1740 power_cam_all_active
.searchOp
.gate_leakage
+=sl_precharge_eq_drv
->power
.readOp
.gate_leakage
;
1741 power_cam_all_active
.searchOp
.gate_leakage
+=sl_data_drv
->power
.readOp
.gate_leakage
*subarray
.num_cols_fa_cam
;
1742 power_cam_all_active
.searchOp
.gate_leakage
+=ml_precharge_drv
->power
.readOp
.dynamic
;
1743 power_cam_all_active
.searchOp
.gate_leakage
*= num_subarrays_per_mat
;
1745 power
.readOp
.gate_leakage
+= power_cam_all_active
.searchOp
.gate_leakage
;