ext: McPAT interface changes and fixes
[gem5.git] / ext / mcpat / cacti / mat.cc
old mode 100755 (executable)
new mode 100644 (file)
index ef98107..4479960
@@ -2,6 +2,7 @@
  *                                McPAT/CACTI
  *                      SOFTWARE LICENSE AGREEMENT
  *            Copyright 2012 Hewlett-Packard Development Company, L.P.
+ *            Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
  *                          All Rights Reserved
  *
  * Redistribution and use in source and binary forms, with or without
@@ -25,7 +26,7 @@
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  ***************************************************************************/
 
 #include "mat.h"
 
 Mat::Mat(const DynamicParameter & dyn_p)
- :dp(dyn_p),
-  power_subarray_out_drv(),
-  delay_fa_tag(0), delay_cam(0),
-  delay_before_decoder(0), delay_bitline(0),
-  delay_wl_reset(0), delay_bl_restore(0),
-  delay_searchline(0), delay_matchchline(0),
-  delay_cam_sl_restore(0), delay_cam_ml_reset(0),
-  delay_fa_ram_wl(0),delay_hit_miss_reset(0),
-  delay_hit_miss(0),
-  subarray(dp, dp.fully_assoc),
-  power_bitline(), per_bitline_read_energy(0),
-  deg_bl_muxing(dp.deg_bl_muxing),
-  num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
-  delay_writeback(0),
-  cell(subarray.cell), cam_cell(subarray.cam_cell),
-  is_dram(dyn_p.is_dram),
-  pure_cam(dyn_p.pure_cam),
-  num_mats(dp.num_mats),
-  power_sa(), delay_sa(0),
-  leak_power_sense_amps_closed_page_state(0),
-  leak_power_sense_amps_open_page_state(0),
-  delay_subarray_out_drv(0),
-  delay_comparator(0), power_comparator(),
-  num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
-  num_subarrays_per_mat(dp.num_subarrays/dp.num_mats),
-  num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir)
-{
-  assert(num_subarrays_per_mat <= 4);
-  assert(num_subarrays_per_row <= 2);
-  is_fa = (dp.fully_assoc) ? true : false;
-  camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
-
-  if (is_fa || pure_cam)
-          num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat;
-
-  if (dp.use_inp_params == 1) {
-          RWP  = dp.num_rw_ports;
-          ERP  = dp.num_rd_ports;
-          EWP  = dp.num_wr_ports;
-          SCHP = dp.num_search_ports;
-  }
-  else {
-    RWP = g_ip->num_rw_ports;
-    ERP = g_ip->num_rd_ports;
-    EWP = g_ip->num_wr_ports;
-    SCHP = g_ip->num_search_ports;
-
-  }
-
-  double number_sa_subarray;
-
-  if (!is_fa && !pure_cam)
-  {
-          number_sa_subarray = subarray.num_cols / deg_bl_muxing;
-  }
-  else if (is_fa && !pure_cam)
-  {
-          number_sa_subarray =  (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
-  }
-
-  else
-  {
-          number_sa_subarray =  (subarray.num_cols_fa_cam) / deg_bl_muxing;
-  }
-
-  int    num_dec_signals           = subarray.num_rows;
-  double C_ld_bit_mux_dec_out      = 0;
-  double C_ld_sa_mux_lev_1_dec_out = 0;
-  double C_ld_sa_mux_lev_2_dec_out = 0;
-  double R_wire_wl_drv_out;
-
-  if (!is_fa && !pure_cam)
-    {
-            R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
+    : dp(dyn_p),
+      power_subarray_out_drv(),
+      delay_fa_tag(0), delay_cam(0),
+      delay_before_decoder(0), delay_bitline(0),
+      delay_wl_reset(0), delay_bl_restore(0),
+      delay_searchline(0), delay_matchchline(0),
+      delay_cam_sl_restore(0), delay_cam_ml_reset(0),
+      delay_fa_ram_wl(0), delay_hit_miss_reset(0),
+      delay_hit_miss(0),
+      subarray(dp, dp.fully_assoc),
+      power_bitline(), per_bitline_read_energy(0),
+      deg_bl_muxing(dp.deg_bl_muxing),
+      num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir),
+      delay_writeback(0),
+      cell(subarray.cell), cam_cell(subarray.cam_cell),
+      is_dram(dyn_p.is_dram),
+      pure_cam(dyn_p.pure_cam),
+      num_mats(dp.num_mats),
+      power_sa(), delay_sa(0),
+      leak_power_sense_amps_closed_page_state(0),
+      leak_power_sense_amps_open_page_state(0),
+      delay_subarray_out_drv(0),
+      delay_comparator(0), power_comparator(),
+      num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat),
+      num_subarrays_per_mat(dp.num_subarrays / dp.num_mats),
+      num_subarrays_per_row(dp.Ndwl / dp.num_mats_h_dir) {
+    assert(num_subarrays_per_mat <= 4);
+    assert(num_subarrays_per_row <= 2);
+    is_fa = (dp.fully_assoc) ? true : false;
+    camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them.
+
+    if (is_fa || pure_cam) {
+        num_subarrays_per_row = num_subarrays_per_mat > 2 ?
+            num_subarrays_per_mat / 2 : num_subarrays_per_mat;
     }
-    else if (is_fa && !pure_cam)
-    {
-        R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
+
+    if (dp.use_inp_params == 1) {
+        RWP  = dp.num_rw_ports;
+        ERP  = dp.num_rd_ports;
+        EWP  = dp.num_wr_ports;
+        SCHP = dp.num_search_ports;
+    } else {
+        RWP = g_ip->num_rw_ports;
+        ERP = g_ip->num_rd_ports;
+        EWP = g_ip->num_wr_ports;
+        SCHP = g_ip->num_search_ports;
+
+    }
+
+    double number_sa_subarray;
+
+    if (!is_fa && !pure_cam) {
+        number_sa_subarray = subarray.num_cols / deg_bl_muxing;
+    } else if (is_fa && !pure_cam) {
+        number_sa_subarray =  (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing;
+    }
+
+    else {
+        number_sa_subarray =  (subarray.num_cols_fa_cam) / deg_bl_muxing;
     }
-    else
-    {
+
+    int    num_dec_signals           = subarray.num_rows;
+    double C_ld_bit_mux_dec_out      = 0;
+    double C_ld_sa_mux_lev_1_dec_out = 0;
+    double C_ld_sa_mux_lev_2_dec_out = 0;
+    double R_wire_wl_drv_out;
+
+    if (!is_fa && !pure_cam) {
+        R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um;
+    } else if (is_fa && !pure_cam) {
+        R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ;
+    } else {
         R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um;
     }
 
-  double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
-  double R_wire_sa_mux_dec_out  = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
-
-  if (deg_bl_muxing > 1)
-  {
-    C_ld_bit_mux_dec_out =
-      (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) +  // 2 transistor per cell
-      num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
-  }
-
-  if (dp.Ndsam_lev_1 > 1)
-  {
-    C_ld_sa_mux_lev_1_dec_out =
-      (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
-      num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
-  }
-  if (dp.Ndsam_lev_2 > 1)
-  {
-    C_ld_sa_mux_lev_2_dec_out =
-      (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
-      num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w();
-  }
-
-  if (num_subarrays_per_row >= 2)
-  {
-    // wire heads for both right and left side of a mat, so half the resistance
-    R_wire_bit_mux_dec_out /= 2.0;
-    R_wire_sa_mux_dec_out  /= 2.0;
-  }
-
-
-  row_dec = new Decoder(
-      num_dec_signals,
-      false,
-      subarray.C_wl,
-      R_wire_wl_drv_out,
-      false/*is_fa*/,
-      is_dram,
-      true,
-      camFlag? cam_cell:cell);
+    double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA
+    double R_wire_sa_mux_dec_out  = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;
+
+    if (deg_bl_muxing > 1) {
+        C_ld_bit_mux_dec_out =
+            (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing) *
+            gate_C(g_tp.w_nmos_b_mux, 0, is_dram) +  // 2 transistor per cell
+            num_subarrays_per_row * subarray.num_cols *
+            g_tp.wire_inside_mat.C_per_um * cell.get_w();
+    }
+
+    if (dp.Ndsam_lev_1 > 1) {
+        C_ld_sa_mux_lev_1_dec_out =
+            (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1) *
+            gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
+            num_subarrays_per_row * subarray.num_cols *
+            g_tp.wire_inside_mat.C_per_um * cell.get_w();
+    }
+    if (dp.Ndsam_lev_2 > 1) {
+        C_ld_sa_mux_lev_2_dec_out =
+            (num_subarrays_per_mat * number_sa_subarray /
+             (dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) *
+            gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) +
+            num_subarrays_per_row * subarray.num_cols *
+            g_tp.wire_inside_mat.C_per_um * cell.get_w();
+    }
+
+    if (num_subarrays_per_row >= 2) {
+        // wire heads for both right and left side of a mat, so half the resistance
+        R_wire_bit_mux_dec_out /= 2.0;
+        R_wire_sa_mux_dec_out  /= 2.0;
+    }
+
+
+    row_dec = new Decoder(
+        num_dec_signals,
+        false,
+        subarray.C_wl,
+        R_wire_wl_drv_out,
+        false/*is_fa*/,
+        is_dram,
+        true,
+        camFlag ? cam_cell : cell);
 //  if (is_fa && (!dp.is_tag))
 //  {
 //    row_dec->exist = true;
 //  }
-  bit_mux_dec = new Decoder(
-      deg_bl_muxing,// This number is 1 for FA or CAM
-      false,
-      C_ld_bit_mux_dec_out,
-      R_wire_bit_mux_dec_out,
-      false/*is_fa*/,
-      is_dram,
-      false,
-      camFlag? cam_cell:cell);
-  sa_mux_lev_1_dec = new Decoder(
-      dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
-      dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
-      C_ld_sa_mux_lev_1_dec_out,
-      R_wire_sa_mux_dec_out,
-      false/*is_fa*/,
-      is_dram,
-      false,
-      camFlag? cam_cell:cell);
-  sa_mux_lev_2_dec = new Decoder(
-      dp.Ndsam_lev_2, // This number is 1 for FA or CAM
-      false,
-      C_ld_sa_mux_lev_2_dec_out,
-      R_wire_sa_mux_dec_out,
-      false/*is_fa*/,
-      is_dram,
-      false,
-      camFlag? cam_cell:cell);
-
-  double C_wire_predec_blk_out;
-  double R_wire_predec_blk_out;
-
-  if (!is_fa && !pure_cam)
-      {
+    bit_mux_dec = new Decoder(
+        deg_bl_muxing,// This number is 1 for FA or CAM
+        false,
+        C_ld_bit_mux_dec_out,
+        R_wire_bit_mux_dec_out,
+        false/*is_fa*/,
+        is_dram,
+        false,
+        camFlag ? cam_cell : cell);
+    sa_mux_lev_1_dec = new Decoder(
+        dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM
+        dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal
+        C_ld_sa_mux_lev_1_dec_out,
+        R_wire_sa_mux_dec_out,
+        false/*is_fa*/,
+        is_dram,
+        false,
+        camFlag ? cam_cell : cell);
+    sa_mux_lev_2_dec = new Decoder(
+        dp.Ndsam_lev_2, // This number is 1 for FA or CAM
+        false,
+        C_ld_sa_mux_lev_2_dec_out,
+        R_wire_sa_mux_dec_out,
+        false/*is_fa*/,
+        is_dram,
+        false,
+        camFlag ? cam_cell : cell);
+
+    double C_wire_predec_blk_out;
+    double R_wire_predec_blk_out;
+
+    if (!is_fa && !pure_cam) {
+
+        C_wire_predec_blk_out  = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
+        R_wire_predec_blk_out  = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
+
+    } else { //for pre-decode block's load is same for both FA and CAM
+        C_wire_predec_blk_out  = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
+        R_wire_predec_blk_out  = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
+    }
 
-          C_wire_predec_blk_out  = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h;
-          R_wire_predec_blk_out  = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h;
 
-      }
-      else //for pre-decode block's load is same for both FA and CAM
-      {
-          C_wire_predec_blk_out  = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h;
-          R_wire_predec_blk_out  = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h;
-      }
-
-
-  if (is_fa||pure_cam)
-          num_dec_signals += _log2(num_subarrays_per_mat);
-
-  PredecBlk * r_predec_blk1 = new PredecBlk(
-      num_dec_signals,
-      row_dec,
-      C_wire_predec_blk_out,
-      R_wire_predec_blk_out,
-      num_subarrays_per_mat,
-      is_dram,
-      true);
-  PredecBlk * r_predec_blk2 = new PredecBlk(
-      num_dec_signals,
-      row_dec,
-      C_wire_predec_blk_out,
-      R_wire_predec_blk_out,
-      num_subarrays_per_mat,
-      is_dram,
-      false);
-  PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
-  PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
-  PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
-  PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
-  PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
-  PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
-  dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
-  dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
-
-  PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
-  PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
-  PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
-  PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
-  PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
-  PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
-  PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
-  PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
-  way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
-  dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
-
-  r_predec            = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
-  b_mux_predec        = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
-  sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
-  sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
-
-  subarray_out_wire   = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
-
-  double driver_c_gate_load;
-  double driver_c_wire_load;
-  double driver_r_wire_load;
-
-  if (is_fa || pure_cam)
-
-  {   //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
-          driver_c_gate_load =  (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
-          driver_c_wire_load =  subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
-          driver_r_wire_load =  subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
-          cam_bl_precharge_eq_drv = new Driver(
-                          driver_c_gate_load,
-                          driver_c_wire_load,
-                          driver_r_wire_load,
-                          is_dram);
-
-          if (!pure_cam)
-          {
-                  //This is only used for fully asso not pure CAM
-                  driver_c_gate_load =  (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
-                  driver_c_wire_load =  subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um;
-                  driver_r_wire_load =  subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um;
-                  bl_precharge_eq_drv = new Driver(
-                                  driver_c_gate_load,
-                                  driver_c_wire_load,
-                                  driver_r_wire_load,
-                                  is_dram);
-          }
-  }
-
-  else
-  {
-          driver_c_gate_load =  subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
-          driver_c_wire_load =  subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
-          driver_r_wire_load =  subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
-          bl_precharge_eq_drv = new Driver(
-                          driver_c_gate_load,
-                          driver_c_wire_load,
-                          driver_r_wire_load,
-                          is_dram);
-  }
-  double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
-  double w_row_decoder    = area_row_decoder / subarray.area.get_h();
-
-  double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
-    compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
-
-  double h_subarray_out_drv = subarray_out_wire->area.get_area() *
-    (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
-
-
-  h_subarray_out_drv *= (RWP + ERP + SCHP);
-
-  double h_comparators                = 0.0;
-  double w_row_predecode_output_wires = 0.0;
-  double h_bit_mux_dec_out_wires      = 0.0;
-  double h_senseamp_mux_dec_out_wires = 0.0;
-
-  if ((!is_fa)&&(dp.is_tag))
-  {
-    //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
-    h_comparators  = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
-    h_comparators *= (RWP + ERP);
-  }
+    if (is_fa || pure_cam)
+        num_dec_signals += _log2(num_subarrays_per_mat);
+
+    PredecBlk * r_predec_blk1 = new PredecBlk(
+        num_dec_signals,
+        row_dec,
+        C_wire_predec_blk_out,
+        R_wire_predec_blk_out,
+        num_subarrays_per_mat,
+        is_dram,
+        true);
+    PredecBlk * r_predec_blk2 = new PredecBlk(
+        num_dec_signals,
+        row_dec,
+        C_wire_predec_blk_out,
+        R_wire_predec_blk_out,
+        num_subarrays_per_mat,
+        is_dram,
+        false);
+    PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true);
+    PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false);
+    PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true);
+    PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false);
+    PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true);
+    PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false);
+    dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true);
+    dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false);
+
+    PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram);
+    PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram);
+    PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram);
+    PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram);
+    PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram);
+    PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram);
+    PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram);
+    PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram);
+    way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram);
+    dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram);
+
+    r_predec            = new Predec(r_predec_blk_drv1, r_predec_blk_drv2);
+    b_mux_predec        = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2);
+    sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2);
+    sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2);
+
+    subarray_out_wire   = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng
+
+    double driver_c_gate_load;
+    double driver_c_wire_load;
+    double driver_r_wire_load;
+
+    if (is_fa || pure_cam)
+
+    {   //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same
+        driver_c_gate_load = (subarray.num_cols_fa_cam ) *
+            gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
+                   is_dram, false, false);
+        driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
+            g_tp.wire_outside_mat.C_per_um;
+        driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w *
+            g_tp.wire_outside_mat.R_per_um;
+        cam_bl_precharge_eq_drv = new Driver(
+            driver_c_gate_load,
+            driver_c_wire_load,
+            driver_r_wire_load,
+            is_dram);
+
+        if (!pure_cam) {
+            //This is only used for fully asso not pure CAM
+            driver_c_gate_load = (subarray.num_cols_fa_ram ) *
+                gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0,
+                       is_dram, false, false);
+            driver_c_wire_load = subarray.num_cols_fa_ram * cell.w *
+                g_tp.wire_outside_mat.C_per_um;
+            driver_r_wire_load = subarray.num_cols_fa_ram * cell.w *
+                g_tp.wire_outside_mat.R_per_um;
+            bl_precharge_eq_drv = new Driver(
+                driver_c_gate_load,
+                driver_c_wire_load,
+                driver_r_wire_load,
+                is_dram);
+        }
+    }
+
+    else {
+        driver_c_gate_load =  subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
+        driver_c_wire_load =  subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um;
+        driver_r_wire_load =  subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um;
+        bl_precharge_eq_drv = new Driver(
+            driver_c_gate_load,
+            driver_c_wire_load,
+            driver_r_wire_load,
+            is_dram);
+    }
+    double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP);
+    double w_row_decoder    = area_row_decoder / subarray.area.get_h();
+
+    double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux =
+        compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h();
+
+    double h_subarray_out_drv = subarray_out_wire->area.get_area() *
+                                (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w();
+
+
+    h_subarray_out_drv *= (RWP + ERP + SCHP);
+
+    double h_comparators                = 0.0;
+    double w_row_predecode_output_wires = 0.0;
+    double h_bit_mux_dec_out_wires      = 0.0;
+    double h_senseamp_mux_dec_out_wires = 0.0;
+
+    if ((!is_fa) && (dp.is_tag)) {
+        //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat;
+        h_comparators  = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w());
+        h_comparators *= (RWP + ERP);
+    }
 
 
     int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits);
     int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits);
     w_row_predecode_output_wires   = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) *
-      g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
-
-
-  double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
-                           (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
-                            h_subarray_out_drv + h_comparators);
-
-  double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
-
-  if (deg_bl_muxing > 1)
-  {
-    h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
-  }
-  if (dp.Ndsam_lev_1 > 1)
-  {
-    h_senseamp_mux_dec_out_wires =  dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
-  }
-  if (dp.Ndsam_lev_2 > 1)
-  {
-    h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
-  }
-
-  double h_addr_datain_wires;
-  if (!g_ip->ver_htree_wires_over_array)
-  {
-    h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat +
-                                  (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) *
-                                 g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
+                                     g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
+
+
+    double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) *
+                             (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux +
+                              h_subarray_out_drv + h_comparators);
+
+    double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder);
+
+    if (deg_bl_muxing > 1) {
+        h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+    }
+    if (dp.Ndsam_lev_1 > 1) {
+        h_senseamp_mux_dec_out_wires =  dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+    }
+    if (dp.Ndsam_lev_2 > 1) {
+        h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP);
+    }
+
+    double h_addr_datain_wires;
+    if (!g_ip->ver_htree_wires_over_array) {
+        h_addr_datain_wires = (dp.number_addr_bits_mat +
+                               dp.number_way_select_signals_mat +
+                               (dp.num_di_b_mat + dp.num_do_b_mat) /
+                               num_subarrays_per_row) *
+            g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP);
+
+        if (is_fa || pure_cam) {
+            h_addr_datain_wires =
+                (dp.number_addr_bits_mat +
+                 dp.number_way_select_signals_mat +  //TODO: revisit
+                 (dp.num_di_b_mat + dp.num_do_b_mat ) / num_subarrays_per_row) *
+                g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
+                (dp.num_si_b_mat + dp.num_so_b_mat ) / num_subarrays_per_row *
+                g_tp.wire_inside_mat.pitch * SCHP;
+        }
+        //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
+        //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
+        h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
+                           h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
+                          h_addr_datain_wires +
+                          h_bit_mux_dec_out_wires +
+                          h_senseamp_mux_dec_out_wires;
 
-    if (is_fa || pure_cam)
-    {
-        h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat +     //TODO: revisit
-                                      (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) *
-                                       g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) +
-                                       (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP;
     }
-    //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux +
-    //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv);
-    h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators +
-                       h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) +
-                      h_addr_datain_wires +
-                      h_bit_mux_dec_out_wires +
-                      h_senseamp_mux_dec_out_wires;
-
-  }
-
-  // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
-  double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
-                                      b_mux_predec_blk_drv1->area.get_area() +
-                                      sa_mux_lev_1_predec_blk_drv1->area.get_area() +
-                                      sa_mux_lev_2_predec_blk_drv1->area.get_area() +
-                                      way_sel_drv1->area.get_area() +
-                                      r_predec_blk_drv2->area.get_area() +
-                                      b_mux_predec_blk_drv2->area.get_area() +
-                                      sa_mux_lev_1_predec_blk_drv2->area.get_area() +
-                                      sa_mux_lev_2_predec_blk_drv2->area.get_area() +
-                                      r_predec_blk1->area.get_area() +
-                                      b_mux_predec_blk1->area.get_area() +
-                                      sa_mux_lev_1_predec_blk1->area.get_area() +
-                                      sa_mux_lev_2_predec_blk1->area.get_area() +
-                                      r_predec_blk2->area.get_area() +
-                                      b_mux_predec_blk2->area.get_area() +
-                                      sa_mux_lev_1_predec_blk2->area.get_area() +
-                                      sa_mux_lev_2_predec_blk2->area.get_area() +
-                                      bit_mux_dec->area.get_area() +
-                                      sa_mux_lev_1_dec->area.get_area() +
-                                      sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
-
-  double area_efficiency_mat;
+
+    // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area;
+    double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() +
+                                        b_mux_predec_blk_drv1->area.get_area() +
+                                        sa_mux_lev_1_predec_blk_drv1->area.get_area() +
+                                        sa_mux_lev_2_predec_blk_drv1->area.get_area() +
+                                        way_sel_drv1->area.get_area() +
+                                        r_predec_blk_drv2->area.get_area() +
+                                        b_mux_predec_blk_drv2->area.get_area() +
+                                        sa_mux_lev_1_predec_blk_drv2->area.get_area() +
+                                        sa_mux_lev_2_predec_blk_drv2->area.get_area() +
+                                        r_predec_blk1->area.get_area() +
+                                        b_mux_predec_blk1->area.get_area() +
+                                        sa_mux_lev_1_predec_blk1->area.get_area() +
+                                        sa_mux_lev_2_predec_blk1->area.get_area() +
+                                        r_predec_blk2->area.get_area() +
+                                        b_mux_predec_blk2->area.get_area() +
+                                        sa_mux_lev_1_predec_blk2->area.get_area() +
+                                        sa_mux_lev_2_predec_blk2->area.get_area() +
+                                        bit_mux_dec->area.get_area() +
+                                        sa_mux_lev_1_dec->area.get_area() +
+                                        sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP);
+
+    double area_efficiency_mat;
 
 //  if (!is_fa)
 //  {
-    assert(num_subarrays_per_mat/num_subarrays_per_row>0);
-    area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area;
+    assert(num_subarrays_per_mat / num_subarrays_per_row > 0);
+    area.h = (num_subarrays_per_mat / num_subarrays_per_row) *
+        subarray.area.h + h_non_cell_area;
     area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area;
-    area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
-    area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area();
+    area.w = (area.h * area.w + area_mat_center_circuitry) / area.h;
+    area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat *
+        100.0 / area.get_area();
 
 //    cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<<h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux<<endl;
 //    cout<<"h_comparators"<<h_comparators<<endl;
@@ -413,8 +412,8 @@ Mat::Mat(const DynamicParameter & dyn_p)
 //    cout<<"w_non_cell_area"<<w_non_cell_area<<endl;
 //    cout<<"area_mat_center_circuitry"<<area_mat_center_circuitry<<endl;
 
-    assert(area.h>0);
-    assert(area.w>0);
+    assert(area.h > 0);
+    assert(area.w > 0);
 //  }
 //  else
 //  {
@@ -423,583 +422,609 @@ Mat::Mat(const DynamicParameter & dyn_p)
 //    area.w = (area.h*area.w + area_mat_center_circuitry) / area.h;
 //    area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area();
 //  }
-  }
-
-
-
-Mat::~Mat()
-{
-  delete row_dec;
-  delete bit_mux_dec;
-  delete sa_mux_lev_1_dec;
-  delete sa_mux_lev_2_dec;
-
-  delete r_predec->blk1;
-  delete r_predec->blk2;
-  delete b_mux_predec->blk1;
-  delete b_mux_predec->blk2;
-  delete sa_mux_lev_1_predec->blk1;
-  delete sa_mux_lev_1_predec->blk2;
-  delete sa_mux_lev_2_predec->blk1;
-  delete sa_mux_lev_2_predec->blk2;
-  delete dummy_way_sel_predec_blk1;
-  delete dummy_way_sel_predec_blk2;
-
-  delete r_predec->drv1;
-  delete r_predec->drv2;
-  delete b_mux_predec->drv1;
-  delete b_mux_predec->drv2;
-  delete sa_mux_lev_1_predec->drv1;
-  delete sa_mux_lev_1_predec->drv2;
-  delete sa_mux_lev_2_predec->drv1;
-  delete sa_mux_lev_2_predec->drv2;
-  delete way_sel_drv1;
-  delete dummy_way_sel_predec_blk_drv2;
-
-  delete r_predec;
-  delete b_mux_predec;
-  delete sa_mux_lev_1_predec;
-  delete sa_mux_lev_2_predec;
-
-  delete subarray_out_wire;
-  if (!pure_cam)
-    delete bl_precharge_eq_drv;
-
-  if (is_fa || pure_cam)
-  {
-    delete sl_precharge_eq_drv ;
-    delete sl_data_drv ;
-    delete cam_bl_precharge_eq_drv;
-    delete ml_precharge_drv;
-    delete ml_to_ram_wl_drv;
-  }
 }
 
 
 
-double Mat::compute_delays(double inrisetime)
-{
-        int k;
-        double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl;
-        double outrisetime_search, outrisetime, row_dec_outrisetime;
-        // delay calculation for tags of fully associative cache
-        if (is_fa || pure_cam)
-        {
-                //Compute search access time
-                outrisetime_search = compute_cam_delay(inrisetime);
-                if (is_fa)
-                {
-                        bl_precharge_eq_drv->compute_delay(0);
-                        k = ml_to_ram_wl_drv->number_gates - 1;
-                        rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
-                        C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
-                        drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
-                        C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load;
-                        tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
-                        delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
-
-                        R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
-                        r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
-                        R_bl = subarray.num_rows * r_b_metal;
-                        C_bl = subarray.C_bl;
-                        delay_bl_restore = bl_precharge_eq_drv->delay +
-                                 log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
-                                 (R_bl_precharge * C_bl + R_bl * C_bl / 2);
-
-
-                        outrisetime_search = compute_bitline_delay(outrisetime_search);
-                        outrisetime_search = compute_sa_delay(outrisetime_search);
-                }
-                        outrisetime_search = compute_subarray_out_drv(outrisetime_search);
-                        subarray_out_wire->set_in_rise_time(outrisetime_search);
-                        outrisetime_search = subarray_out_wire->signal_rise_time();
-                        delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
-
-
-                        //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
-                        outrisetime = r_predec->compute_delays(inrisetime);
-                        row_dec_outrisetime = row_dec->compute_delays(outrisetime);
-
-                        outrisetime = b_mux_predec->compute_delays(inrisetime);
-                        bit_mux_dec->compute_delays(outrisetime);
-
-                        outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
-                        sa_mux_lev_1_dec->compute_delays(outrisetime);
-
-                        outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
-                        sa_mux_lev_2_dec->compute_delays(outrisetime);
-
-                        if (pure_cam)
-                        {
-                          outrisetime = compute_bitline_delay(row_dec_outrisetime);
-                          outrisetime = compute_sa_delay(outrisetime);
-                        }
-                        return outrisetime_search;
+Mat::~Mat() {
+    delete row_dec;
+    delete bit_mux_dec;
+    delete sa_mux_lev_1_dec;
+    delete sa_mux_lev_2_dec;
+
+    delete r_predec->blk1;
+    delete r_predec->blk2;
+    delete b_mux_predec->blk1;
+    delete b_mux_predec->blk2;
+    delete sa_mux_lev_1_predec->blk1;
+    delete sa_mux_lev_1_predec->blk2;
+    delete sa_mux_lev_2_predec->blk1;
+    delete sa_mux_lev_2_predec->blk2;
+    delete dummy_way_sel_predec_blk1;
+    delete dummy_way_sel_predec_blk2;
+
+    delete r_predec->drv1;
+    delete r_predec->drv2;
+    delete b_mux_predec->drv1;
+    delete b_mux_predec->drv2;
+    delete sa_mux_lev_1_predec->drv1;
+    delete sa_mux_lev_1_predec->drv2;
+    delete sa_mux_lev_2_predec->drv1;
+    delete sa_mux_lev_2_predec->drv2;
+    delete way_sel_drv1;
+    delete dummy_way_sel_predec_blk_drv2;
+
+    delete r_predec;
+    delete b_mux_predec;
+    delete sa_mux_lev_1_predec;
+    delete sa_mux_lev_2_predec;
+
+    delete subarray_out_wire;
+    if (!pure_cam)
+        delete bl_precharge_eq_drv;
+
+    if (is_fa || pure_cam) {
+        delete sl_precharge_eq_drv ;
+        delete sl_data_drv ;
+        delete cam_bl_precharge_eq_drv;
+        delete ml_precharge_drv;
+        delete ml_to_ram_wl_drv;
+    }
+}
+
+
+
+double Mat::compute_delays(double inrisetime) {
+    int k;
+    double rd, C_intrinsic, C_ld, tf, R_bl_precharge, r_b_metal, R_bl, C_bl;
+    double outrisetime_search, outrisetime, row_dec_outrisetime;
+    // delay calculation for tags of fully associative cache
+    if (is_fa || pure_cam) {
+        //Compute search access time
+        outrisetime_search = compute_cam_delay(inrisetime);
+        if (is_fa) {
+            bl_precharge_eq_drv->compute_delay(0);
+            k = ml_to_ram_wl_drv->number_gates - 1;
+            rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true);
+            C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4 *
+                                   cell.h, is_dram, false, true) +
+                drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4 * cell.h,
+                         is_dram, false, true);
+            C_ld = ml_to_ram_wl_drv->c_gate_load +
+                ml_to_ram_wl_drv->c_wire_load;
+            tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2;
+            delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
+
+            R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
+            r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in
+            R_bl = subarray.num_rows * r_b_metal;
+            C_bl = subarray.C_bl;
+            delay_bl_restore = bl_precharge_eq_drv->delay +
+                log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
+                    (g_tp.sram.Vbitpre - dp.V_b_sense)) *
+                (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+
+
+            outrisetime_search = compute_bitline_delay(outrisetime_search);
+            outrisetime_search = compute_sa_delay(outrisetime_search);
+        }
+        outrisetime_search = compute_subarray_out_drv(outrisetime_search);
+        subarray_out_wire->set_in_rise_time(outrisetime_search);
+        outrisetime_search = subarray_out_wire->signal_rise_time();
+        delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
+
+
+        //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited.
+        outrisetime = r_predec->compute_delays(inrisetime);
+        row_dec_outrisetime = row_dec->compute_delays(outrisetime);
+
+        outrisetime = b_mux_predec->compute_delays(inrisetime);
+        bit_mux_dec->compute_delays(outrisetime);
+
+        outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
+        sa_mux_lev_1_dec->compute_delays(outrisetime);
+
+        outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
+        sa_mux_lev_2_dec->compute_delays(outrisetime);
+
+        if (pure_cam) {
+            outrisetime = compute_bitline_delay(row_dec_outrisetime);
+            outrisetime = compute_sa_delay(outrisetime);
+        }
+        return outrisetime_search;
+    } else {
+        bl_precharge_eq_drv->compute_delay(0);
+        if (row_dec->exist == true) {
+            int k = row_dec->num_gates - 1;
+            double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
+            // TODO: this 4*cell.h number must be revisited
+            double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4 *
+                                          cell.h, is_dram, false, true) +
+                drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, is_dram,
+                         false, true);
+            double C_ld = row_dec->C_ld_dec_out;
+            double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
+            delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
+        }
+        double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
+        double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
+        double R_bl = subarray.num_rows * r_b_metal;
+        double C_bl = subarray.C_bl;
+
+        if (is_dram) {
+            delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+        } else {
+            delay_bl_restore = bl_precharge_eq_drv->delay +
+                log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) /
+                    (g_tp.sram.Vbitpre - dp.V_b_sense)) *
+                (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+        }
+    }
+
+
+
+    outrisetime = r_predec->compute_delays(inrisetime);
+    row_dec_outrisetime = row_dec->compute_delays(outrisetime);
+
+    outrisetime = b_mux_predec->compute_delays(inrisetime);
+    bit_mux_dec->compute_delays(outrisetime);
+
+    outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
+    sa_mux_lev_1_dec->compute_delays(outrisetime);
+
+    outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
+    sa_mux_lev_2_dec->compute_delays(outrisetime);
+
+    outrisetime = compute_bitline_delay(row_dec_outrisetime);
+    outrisetime = compute_sa_delay(outrisetime);
+    outrisetime = compute_subarray_out_drv(outrisetime);
+    subarray_out_wire->set_in_rise_time(outrisetime);
+    outrisetime = subarray_out_wire->signal_rise_time();
+
+    delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
+
+    if (dp.is_tag == true && dp.fully_assoc == false) {
+        compute_comparator_delay(0);
     }
-        else
-        {
-                bl_precharge_eq_drv->compute_delay(0);
-                if (row_dec->exist == true)
-                {
-                        int k = row_dec->num_gates - 1;
-                        double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true);
-                        // TODO: this 4*cell.h number must be revisited
-                        double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) +
-                        drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true);
-                        double C_ld = row_dec->C_ld_dec_out;
-                        double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2;
-                        delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE);
-                }
-                double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);
-                double r_b_metal = cell.h * g_tp.wire_local.R_per_um;
-                double R_bl = subarray.num_rows * r_b_metal;
-                double C_bl = subarray.C_bl;
-
-                if (is_dram)
-                {
-                        delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
-                }
-                else
-                {
-                        delay_bl_restore = bl_precharge_eq_drv->delay +
-                        log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))*
-                        (R_bl_precharge * C_bl + R_bl * C_bl / 2);
-                }
-  }
-
-
-
-  outrisetime = r_predec->compute_delays(inrisetime);
-  row_dec_outrisetime = row_dec->compute_delays(outrisetime);
-
-  outrisetime = b_mux_predec->compute_delays(inrisetime);
-  bit_mux_dec->compute_delays(outrisetime);
-
-  outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime);
-  sa_mux_lev_1_dec->compute_delays(outrisetime);
-
-  outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime);
-  sa_mux_lev_2_dec->compute_delays(outrisetime);
-
-  outrisetime = compute_bitline_delay(row_dec_outrisetime);
-  outrisetime = compute_sa_delay(outrisetime);
-  outrisetime = compute_subarray_out_drv(outrisetime);
-  subarray_out_wire->set_in_rise_time(outrisetime);
-  outrisetime = subarray_out_wire->signal_rise_time();
-
-  delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay;
-
-  if (dp.is_tag == true && dp.fully_assoc == false)
-  {
-    compute_comparator_delay(0);
-  }
-
-  if (row_dec->exist == false)
-    {
-      delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
+
+    if (row_dec->exist == false) {
+        delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay);
     }
-  return outrisetime;
+    return outrisetime;
 }
 
 
 
-double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h()
-{
-
-  double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) +
-    compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP));  // precharge circuitry
-
-  if (deg_bl_muxing > 1)
-  {
-    height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP)));  // col mux tr height
-    // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);  // bit mux dec out wires height
-  }
-
-  height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP));  // sense_amp_height
-
-  if (dp.Ndsam_lev_1 > 1)
-  {
-    height += compute_tr_width_after_folding(
-        g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP));  // sense_amp_mux_height
-    //height_senseamp_mux_decode_output_wires =  Ndsam * wire_inside_mat_pitch * (RWP + ERP);
-  }
-
-  if (dp.Ndsam_lev_2 > 1)
-  {
-    height += compute_tr_width_after_folding(
-        g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP));  // sense_amp_mux_height
-    //height_senseamp_mux_decode_output_wires =  Ndsam * wire_inside_mat_pitch * (RWP + ERP);
-
-    // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
-    height += 2 * compute_tr_width_after_folding(
-        pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
-    height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
-  }
-
-  // TODO: this should be uncommented...
-  /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
-    {
-  //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
-  double width_write_driver_write_mux  = width_write_driver_or_write_mux();
-  double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
-  cell.w *
-  // deg_bl_muxing *
-  dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
-  height += height_write_driver_write_mux;
-  }*/
-
-  return height;
+double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() {
+
+    double height =
+        compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge,
+                                       camFlag ? cam_cell.w :
+                                       cell.w / (2 * (RWP + ERP + SCHP))) +
+        // precharge circuitry
+        compute_tr_width_after_folding(g_tp.w_pmos_bl_eq,
+                                       camFlag ? cam_cell.w :
+                                       cell.w / (RWP + ERP + SCHP));
+
+    if (deg_bl_muxing > 1) {
+        // col mux tr height
+        height +=
+            compute_tr_width_after_folding(g_tp.w_nmos_b_mux,
+                                           cell.w / (2 * (RWP + ERP)));
+        // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP);  // bit mux dec out wires height
+    }
+
+    height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP));  // sense_amp_height
+
+    if (dp.Ndsam_lev_1 > 1) {
+        height += compute_tr_width_after_folding(
+                      g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP));  // sense_amp_mux_height
+        //height_senseamp_mux_decode_output_wires =  Ndsam * wire_inside_mat_pitch * (RWP + ERP);
+    }
+
+    if (dp.Ndsam_lev_2 > 1) {
+        height += compute_tr_width_after_folding(
+                      g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP));  // sense_amp_mux_height
+        //height_senseamp_mux_decode_output_wires =  Ndsam * wire_inside_mat_pitch * (RWP + ERP);
+
+        // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux
+        height += 2 * compute_tr_width_after_folding(
+                      pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
+        height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP));
+    }
+
+    // TODO: this should be uncommented...
+    /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1)
+      {
+    //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP);
+    double width_write_driver_write_mux  = width_write_driver_or_write_mux();
+    double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux,
+    cell.w *
+    // deg_bl_muxing *
+    dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP));
+    height += height_write_driver_write_mux;
+    }*/
+
+    return height;
 }
 
 
 
-double Mat::compute_cam_delay(double inrisetime)
-{
+double Mat::compute_cam_delay(double inrisetime) {
 
-  double out_time_ramp, this_delay;
-  double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
+    double out_time_ramp, this_delay;
+    double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load;
 
 
-  double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
+    double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p,
     Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp,
     Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp,
     Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p;
 
-  double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal,  dynSearchEng;
-  int Htagbits;
-
-  double driver_c_gate_load;
-  double driver_c_wire_load;
-  double driver_r_wire_load;
-  //double searchline_precharge_time;
-
-  double leak_power_cc_inverters_sram_cell         = 0;
-  double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
-  double leak_power_RD_port_sram_cell              = 0;
-  double leak_power_SCHP_port_sram_cell            = 0;
-  double leak_comparator_cam_cell                  =0;
-
-  double gate_leak_comparator_cam_cell          = 0;
-  double gate_leak_power_cc_inverters_sram_cell = 0;
-  double gate_leak_power_RD_port_sram_cell      = 0;
-  double gate_leak_power_SCHP_port_sram_cell    = 0;
-
-  c_matchline_metal   = cam_cell.get_w() * g_tp.wire_local.C_per_um;
-  c_searchline_metal  = cam_cell.get_h() * g_tp.wire_local.C_per_um;
-  r_matchline_metal   = cam_cell.get_w() * g_tp.wire_local.R_per_um;
-  r_searchline_metal  = cam_cell.get_h() * g_tp.wire_local.R_per_um;
-
-  dynSearchEng = 0.0;
-  delay_matchchline = 0.0;
-  double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
-  bool linear_scaling = false;
-
-  if (linear_scaling)
-  {
-          Wdecdrivep    =  450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
-          Wdecdriven    =  300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
-          Wfadriven     = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
-          Wfadrivep     =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
-          Wfadrive2n    =  250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
-          Wfadrive2p    =  500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
-          Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
-          Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
-          Wfadecdrive2n =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
-          Wfadecdrive2p =   50 * g_ip->F_sz_um;//this was  40 micron for the 0.8 micron process
-          Wfadecdriven  = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
-          Wfadecdrivep  =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
-          Wfaprechn     =  7.5 * g_ip->F_sz_um;//this was   6 micron for the 0.8 micron process
-          Wfainvn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
-          Wfainvp       =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
-          Wfanandn      =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
-          Wfanandp      = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
-          Wdecnandn     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
-          Wdecnandp     = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
-
-          Wfaprechp     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
-          Wdummyn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
-          Wdummyinvn    =   75 * g_ip->F_sz_um;//this was  60 micron for the 0.8 micron process
-          Wdummyinvp    =  100 * g_ip->F_sz_um;//this was  80 micron for the 0.8 micron process
-          Waddrnandn    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
-          Waddrnandp    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
-          Wfanorn       = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
-          Wfanorp       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
-          W_hit_miss_n    = Wdummyn;
-          W_hit_miss_p    = g_tp.min_w_nmos_*p_to_n_sizing_r;
-          //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
-  }
-  else
-  {
-          Wdecdrivep    =  450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
-          Wdecdriven    =  300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
-          Wfadriven     = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
-          Wfadrivep     =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
-          Wfadrive2n    =  250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
-          Wfadrive2p    =  500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
-          Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
-          Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
-          Wfadecdrive2n =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
-          Wfadecdrive2p =   50 * g_ip->F_sz_um;//this was  40 micron for the 0.8 micron process
-          Wfadecdriven  = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
-          Wfadecdrivep  =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
-          Wfaprechn     =  7.5 * g_ip->F_sz_um;//this was   6 micron for the 0.8 micron process
-          Wfainvn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
-          Wfainvp       =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
-          Wfanandn      =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
-          Wfanandp      = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
-          Wdecnandn     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
-          Wdecnandp     = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
-
-          Wfaprechp     = g_tp.w_pmos_bl_precharge;//this was  10 micron for the 0.8 micron process
-          Wdummyn       = g_tp.cam.cell_nmos_w;
-          Wdummyinvn    =   75 * g_ip->F_sz_um;//this was  60 micron for the 0.8 micron process
-          Wdummyinvp    =  100 * g_ip->F_sz_um;//this was  80 micron for the 0.8 micron process
-          Waddrnandn    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
-          Waddrnandp    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
-          Wfanorn       = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
-          Wfanorp       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
-          W_hit_miss_n    = Wdummyn;
-          W_hit_miss_p    = g_tp.min_w_nmos_*p_to_n_sizing_r;
-  }
-
-  Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
-
-  /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
-     search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
-     From the driver(am and an) to the comparators in all the rows including the dummy row,
-     Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
-
-  //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
-  //Searchline precharge routes horizontally
-  driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
-  driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
-  driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
-
-  sl_precharge_eq_drv = new Driver(
-      driver_c_gate_load,
-          driver_c_wire_load,
-      driver_r_wire_load,
-      is_dram);
-
-  //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
-  //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
-  driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
-  driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
-  driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
-  sl_data_drv = new Driver(
-      driver_c_gate_load,
-          driver_c_wire_load,
-      driver_r_wire_load,
-      is_dram);
-
-  sl_precharge_eq_drv->compute_delay(0);
-  double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
-  double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
-  double R_bl = (subarray.num_rows + 1) * r_b_metal;
-  double C_bl = subarray.C_bl_cam;
-  delay_cam_sl_restore = sl_precharge_eq_drv->delay
-                         + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2);
-
-  out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
-
-  //matchline ops delay
-  delay_matchchline += sl_data_drv->delay;
-
-  /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
-  //matchline delay, matchline power, matchline_reset for cycle time computation,
-
-  ////matchline precharge circuitry routes vertically
-  //There are two matchline precharge driver chains per subarray.
-  driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
-  driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
-  driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
-
-  ml_precharge_drv = new Driver(
-                                                  driver_c_gate_load,
-                              driver_c_wire_load,
-                          driver_r_wire_load,
-                          is_dram);
-
-  ml_precharge_drv->compute_delay(0);
-
-
-  rd =  tr_R_on(Wdummyn, NCH, 2, is_dram);
-  c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit
-                                  + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline
-
-  Cwire = c_matchline_metal * Htagbits;
-  Rwire = r_matchline_metal * Htagbits;
-  c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
-
-  double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
-  //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
-  double R_ml = Rwire;
-  double C_ml = Cwire + c_intrinsic;
-  delay_cam_ml_reset = ml_precharge_drv->delay
-                           + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too
-
-  //matchline ops delay
-  tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
-  this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
-  delay_matchchline += this_delay;
-  out_time_ramp = this_delay / VTHFA3;
-
-  dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise
-                                          * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves
-
-  /* third stage, from the NAND2 gates to the drivers in the dummy row */
-  rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
-  c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
-                drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2;
-  c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
-  tf = rd * (c_intrinsic + c_gate_load);
-  this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
-  out_time_ramp = this_delay / (1 - VTHFA4);
-  delay_matchchline += this_delay;
-
-  //only the dummy row has the extra inverter between NAND and NOR gates
-  dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//  * Ntbl;
-
-  /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
-  rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
-  c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
-  Cwire = c_matchline_metal * Htagbits +  c_searchline_metal * (subarray.num_rows+1)/2;
-  Rwire = r_matchline_metal * Htagbits +  r_searchline_metal * (subarray.num_rows+1)/2;
-  c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
-  tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
-  this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
-  out_time_ramp = this_delay / VTHFA5;
-  delay_matchchline += this_delay;
-
-  dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
-
-  /*final statge from the NOR gate to drive the wordline of the data portion */
-
-  //searchline data driver There are two matchline precharge driver chains per subarray.
-  driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
-  driver_c_wire_load = subarray.C_wl_ram;
-  driver_r_wire_load = subarray.R_wl_ram;
-
-  ml_to_ram_wl_drv = new Driver(
-                                                  driver_c_gate_load,
-                              driver_c_wire_load,
-                          driver_r_wire_load,
-                          is_dram);
-
-
-
-  rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
-  c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
-  c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
-  tf = rd * (c_intrinsic + c_gate_load);
-  this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
-  out_time_ramp = this_delay / (1-0.5);
-  delay_matchchline += this_delay;
-
-  out_time_ramp   = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
-
-  //c_gate_load energy is computed in ml_to_ram_wl_drv
-  dynSearchEng  += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
-
-
-  /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
-  /*Precharge the hitting logic */
-  c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
-  Cwire = c_searchline_metal * subarray.num_rows;
-  Rwire = r_searchline_metal * subarray.num_rows;
-  c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
-
-  rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
-  //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
-  double R_hit_miss = Rwire;
-  double C_hit_miss = Cwire + c_intrinsic;
-  delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
-  dynSearchEng  += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
-
-  /*hitting logic evaluation */
-  c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
-  Cwire = c_searchline_metal * subarray.num_rows;
-  Rwire = r_searchline_metal * subarray.num_rows;
-  c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows;
-
-  rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
-  tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
-
-  delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
-
-  if (is_fa)
-      delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
-
-  dynSearchEng  += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
-
-  /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
-
-  power_matchline.searchOp.dynamic = dynSearchEng;
-
-  //leakage in one subarray
-  double Iport     = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0,  1, nmos, false, true);//TODO: how much is the idle time? just by *2?
-  double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0,  2, nmos, false, true);
-  double Icell     = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
-  double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv
-
-  leak_power_cc_inverters_sram_cell         = Icell * g_tp.cam_cell.Vdd;
-  leak_comparator_cam_cell                  = Icell_comparator * g_tp.cam_cell.Vdd;
-  leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
-  leak_power_RD_port_sram_cell              = Iport_erp * g_tp.cam_cell.Vdd;
-  leak_power_SCHP_port_sram_cell            = 0;//search port and r/w port are sperate, therefore no access txs in search ports
-
-  power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
-    leak_comparator_cam_cell +
-    leak_power_acc_tr_RW_or_WR_port_sram_cell +
-    leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
-    leak_power_RD_port_sram_cell * ERP +
-    leak_power_SCHP_port_sram_cell*SCHP;
+    double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal,  dynSearchEng;
+    int Htagbits;
+
+    double driver_c_gate_load;
+    double driver_c_wire_load;
+    double driver_r_wire_load;
+    //double searchline_precharge_time;
+
+    double leak_power_cc_inverters_sram_cell         = 0;
+    double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
+    double leak_power_RD_port_sram_cell              = 0;
+    double leak_power_SCHP_port_sram_cell            = 0;
+    double leak_comparator_cam_cell                  =0;
+
+    double gate_leak_comparator_cam_cell          = 0;
+    double gate_leak_power_cc_inverters_sram_cell = 0;
+    double gate_leak_power_RD_port_sram_cell      = 0;
+    double gate_leak_power_SCHP_port_sram_cell    = 0;
+
+    c_matchline_metal   = cam_cell.get_w() * g_tp.wire_local.C_per_um;
+    c_searchline_metal  = cam_cell.get_h() * g_tp.wire_local.C_per_um;
+    r_matchline_metal   = cam_cell.get_w() * g_tp.wire_local.R_per_um;
+    r_searchline_metal  = cam_cell.get_h() * g_tp.wire_local.R_per_um;
+
+    dynSearchEng = 0.0;
+    delay_matchchline = 0.0;
+    double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram);
+    bool linear_scaling = false;
+
+    if (linear_scaling) {
+        Wdecdrivep    =  450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
+        Wdecdriven    =  300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
+        Wfadriven     = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
+        Wfadrivep     =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+        Wfadrive2n    =  250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
+        Wfadrive2p    =  500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
+        Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
+        Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
+        Wfadecdrive2n =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
+        Wfadecdrive2p =   50 * g_ip->F_sz_um;//this was  40 micron for the 0.8 micron process
+        Wfadecdriven  = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
+        Wfadecdrivep  =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+        Wfaprechn     =  7.5 * g_ip->F_sz_um;//this was   6 micron for the 0.8 micron process
+        Wfainvn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
+        Wfainvp       =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
+        Wfanandn      =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
+        Wfanandp      = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
+        Wdecnandn     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
+        Wdecnandp     = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
+
+        Wfaprechp     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
+        Wdummyn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
+        Wdummyinvn    =   75 * g_ip->F_sz_um;//this was  60 micron for the 0.8 micron process
+        Wdummyinvp    =  100 * g_ip->F_sz_um;//this was  80 micron for the 0.8 micron process
+        Waddrnandn    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
+        Waddrnandp    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
+        Wfanorn       = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
+        Wfanorp       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
+        W_hit_miss_n    = Wdummyn;
+        W_hit_miss_p    = g_tp.min_w_nmos_*p_to_n_sizing_r;
+        //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort
+    } else {
+        Wdecdrivep    =  450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process
+        Wdecdriven    =  300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process
+        Wfadriven     = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
+        Wfadrivep     =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+        Wfadrive2n    =  250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process
+        Wfadrive2p    =  500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process
+        Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
+        Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
+        Wfadecdrive2n =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
+        Wfadecdrive2p =   50 * g_ip->F_sz_um;//this was  40 micron for the 0.8 micron process
+        Wfadecdriven  = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
+        Wfadecdrivep  =  125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process
+        Wfaprechn     =  7.5 * g_ip->F_sz_um;//this was   6 micron for the 0.8 micron process
+        Wfainvn       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
+        Wfainvp       =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
+        Wfanandn      =   25 * g_ip->F_sz_um;//this was  20 micron for the 0.8 micron process
+        Wfanandp      = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
+        Wdecnandn     = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
+        Wdecnandp     = 37.5 * g_ip->F_sz_um;//this was  30 micron for the 0.8 micron process
+
+        Wfaprechp     = g_tp.w_pmos_bl_precharge;//this was  10 micron for the 0.8 micron process
+        Wdummyn       = g_tp.cam.cell_nmos_w;
+        Wdummyinvn    =   75 * g_ip->F_sz_um;//this was  60 micron for the 0.8 micron process
+        Wdummyinvp    =  100 * g_ip->F_sz_um;//this was  80 micron for the 0.8 micron process
+        Waddrnandn    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
+        Waddrnandp    = 62.5 * g_ip->F_sz_um;//this was  50 micron for the 0.8 micron process
+        Wfanorn       = 6.25 * g_ip->F_sz_um;//this was   5 micron for the 0.8 micron process
+        Wfanorp       = 12.5 * g_ip->F_sz_um;//this was  10 micron for the 0.8 micron process
+        W_hit_miss_n    = Wdummyn;
+        W_hit_miss_p    = g_tp.min_w_nmos_*p_to_n_sizing_r;
+    }
+
+    Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0));
+
+    /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators.
+       search_line_delay, search_line_power, search_line_restore_delay for cycle time computation.
+       From the driver(am and an) to the comparators in all the rows including the dummy row,
+       Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */
+
+    //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports
+    //Searchline precharge routes horizontally
+    driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false);
+    driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um;
+    driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um;
+
+    sl_precharge_eq_drv = new Driver(
+        driver_c_gate_load,
+        driver_c_wire_load,
+        driver_r_wire_load,
+        is_dram);
+
+    //searchline data driver ; subarray.num_rows + 1 is because of the dummy row
+    //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines
+    driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false);
+    driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
+    driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
+    sl_data_drv = new Driver(
+        driver_c_gate_load,
+        driver_c_wire_load,
+        driver_r_wire_load,
+        is_dram);
+
+    sl_precharge_eq_drv->compute_delay(0);
+    double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr
+    double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;
+    double R_bl = (subarray.num_rows + 1) * r_b_metal;
+    double C_bl = subarray.C_bl_cam;
+    delay_cam_sl_restore = sl_precharge_eq_drv->delay
+        + log(g_tp.cam.Vbitpre) * (R_bl_precharge * C_bl + R_bl * C_bl / 2);
+
+    out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside)
+
+    //matchline ops delay
+    delay_matchchline += sl_data_drv->delay;
+
+    /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/
+    //matchline delay, matchline power, matchline_reset for cycle time computation,
+
+    ////matchline precharge circuitry routes vertically
+    //There are two matchline precharge driver chains per subarray.
+    driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram);
+    driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal;
+    driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal;
+
+    ml_precharge_drv = new Driver(
+        driver_c_gate_load,
+        driver_c_wire_load,
+        driver_r_wire_load,
+        is_dram);
+
+    ml_precharge_drv->compute_delay(0);
+
+
+    rd =  tr_R_on(Wdummyn, NCH, 2, is_dram);
+    c_intrinsic = Htagbits *
+        (2 * drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def,
+                      is_dram)//TODO: the cell_h_def should be revisit
+         + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram) /
+         Htagbits);//since each halve only has one precharge tx per matchline
+
+    Cwire = c_matchline_metal * Htagbits;
+    Rwire = r_matchline_metal * Htagbits;
+    c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram);
+
+    double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram);
+    //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
+    double R_ml = Rwire;
+    double C_ml = Cwire + c_intrinsic;
+    //TODO: latest CAM has sense amps on matchlines too
+    delay_cam_ml_reset = ml_precharge_drv->delay
+        + log(g_tp.cam.Vbitpre) * (R_ml_precharge * C_ml + R_ml * C_ml / 2);
+
+    //matchline ops delay
+    tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+    this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL);
+    delay_matchchline += this_delay;
+    out_time_ramp = this_delay / VTHFA3;
+
+    dynSearchEng += ((c_intrinsic + Cwire + c_gate_load) *
+                     (subarray.num_rows + 1)) //TODO: need to be precise
+        * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *
+        2;//each subarry has two halves
+
+    /* third stage, from the NAND2 gates to the drivers in the dummy row */
+    rd = tr_R_on(Waddrnandn, NCH, 2, is_dram);
+    c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) +
+                  drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram) * 2;
+    c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram);
+    tf = rd * (c_intrinsic + c_gate_load);
+    this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE);
+    out_time_ramp = this_delay / (1 - VTHFA4);
+    delay_matchchline += this_delay;
+
+    //only the dummy row has the extra inverter between NAND and NOR gates
+    dynSearchEng += (c_intrinsic * (subarray.num_rows + 1) + c_gate_load * 2) *
+        g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//  * Ntbl;
+
+    /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */
+    rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram);
+    c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+    Cwire = c_matchline_metal * Htagbits +  c_searchline_metal *
+        (subarray.num_rows + 1) / 2;
+    Rwire = r_matchline_metal * Htagbits +  r_searchline_metal *
+        (subarray.num_rows + 1) / 2;
+    c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram);
+    tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+    this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL);
+    out_time_ramp = this_delay / VTHFA5;
+    delay_matchchline += this_delay;
+
+    dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows * c_gate_load) *
+        g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
+
+    /*final statge from the NOR gate to drive the wordline of the data portion */
+
+    //searchline data driver There are two matchline precharge driver chains per subarray.
+    driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic
+    driver_c_wire_load = subarray.C_wl_ram;
+    driver_r_wire_load = subarray.R_wl_ram;
+
+    ml_to_ram_wl_drv = new Driver(
+        driver_c_gate_load,
+        driver_c_wire_load,
+        driver_r_wire_load,
+        is_dram);
+
+
+
+    rd = tr_R_on(Wfanorn, NCH, 1, is_dram);
+    c_intrinsic = 2 * drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
+        drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+    c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram);
+    tf = rd * (c_intrinsic + c_gate_load);
+    this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE);
+    out_time_ramp = this_delay / (1 - 0.5);
+    delay_matchchline += this_delay;
+
+    out_time_ramp   = ml_to_ram_wl_drv->compute_delay(out_time_ramp);
+
+    //c_gate_load energy is computed in ml_to_ram_wl_drv
+    dynSearchEng  += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl;
+
+
+    /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/
+    /*Precharge the hitting logic */
+    c_intrinsic = 2 *
+        drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram);
+    Cwire = c_searchline_metal * subarray.num_rows;
+    Rwire = r_searchline_metal * subarray.num_rows;
+    c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
+        subarray.num_rows;
+
+    rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false);
+    //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um;
+    double R_hit_miss = Rwire;
+    double C_hit_miss = Cwire + c_intrinsic;
+    delay_hit_miss_reset = log(g_tp.cam.Vbitpre) *
+        (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2);
+    dynSearchEng  += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+
+    /*hitting logic evaluation */
+    c_intrinsic = 2 *
+        drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram);
+    Cwire = c_searchline_metal * subarray.num_rows;
+    Rwire = r_searchline_metal * subarray.num_rows;
+    c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) *
+        subarray.num_rows;
+
+    rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false);
+    tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load);
+
+    delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL);
+
+    if (is_fa)
+        delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss);
+
+    dynSearchEng  += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+
+    /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/
+
+    power_matchline.searchOp.dynamic = dynSearchEng;
+
+    //leakage in one subarray
+    double Iport     = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0,  1, nmos, false, true);//TODO: how much is the idle time? just by *2?
+    double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0,  2, nmos, false, true);
+    double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
+                                     1, inv, false, true) * 2;
+    //approx XOR with Inv
+    double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv,
+                                                false, true) * 2;
+
+    leak_power_cc_inverters_sram_cell         = Icell * g_tp.cam_cell.Vdd;
+    leak_comparator_cam_cell                  = Icell_comparator * g_tp.cam_cell.Vdd;
+    leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd;
+    leak_power_RD_port_sram_cell              = Iport_erp * g_tp.cam_cell.Vdd;
+    leak_power_SCHP_port_sram_cell            = 0;//search port and r/w port are sperate, therefore no access txs in search ports
+
+    power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell +
+                                        leak_comparator_cam_cell +
+                                        leak_power_acc_tr_RW_or_WR_port_sram_cell +
+                                        leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
+                                        leak_power_RD_port_sram_cell * ERP +
+                                        leak_power_SCHP_port_sram_cell * SCHP;
 //  power_matchline.searchOp.leakage += leak_comparator_cam_cell;
-  power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
-  power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
-  power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
-  power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd;
-  //In idle states, the hit/miss txs are closed (on) therefore no Isub
-  power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
+    power_matchline.searchOp.leakage *= (subarray.num_rows + 1) *
+        subarray.num_cols_fa_cam;//TODO:dumy line precise
+    power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
+        cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
+    power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
+        cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
+    power_matchline.searchOp.leakage += (subarray.num_rows + 1) *
+        cmos_Isub_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
+    //In idle states, the hit/miss txs are closed (on) therefore no Isub
+    power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
     // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
 
-  //in idle state, Ig_on only possibly exist in access transistors of read only ports
-  double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
-  double Ig_cell     = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2;
-  double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2;
-
-  gate_leak_comparator_cam_cell          = Ig_cell_comparator* g_tp.cam_cell.Vdd;
-  gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd;
-  gate_leak_power_RD_port_sram_cell      = Ig_port_erp*g_tp.sram_cell.Vdd;
-  gate_leak_power_SCHP_port_sram_cell    = 0;
-
-  //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
-
-  power_matchline.searchOp.gate_leakage += gate_leak_power_cc_inverters_sram_cell;
-  power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
-  power_matchline.searchOp.gate_leakage += gate_leak_power_SCHP_port_sram_cell*SCHP + gate_leak_power_RD_port_sram_cell * ERP;
-  power_matchline.searchOp.gate_leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise
-  power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(0, Wfaprechp,1, pmos) * g_tp.cam_cell.Vdd;
-  power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
-  power_matchline.searchOp.gate_leakage += (subarray.num_rows+1) * cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
-  power_matchline.searchOp.gate_leakage += subarray.num_rows * cmos_Ig_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+
-                                       + cmos_Ig_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd;
-
-
-   return out_time_ramp;
+    //in idle state, Ig_on only possibly exist in access transistors of read only ports
+    double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);
+    double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w,
+                                     1, inv, false, true) * 2;
+    double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv,
+                                                false, true) * 2;
+
+    gate_leak_comparator_cam_cell = Ig_cell_comparator * g_tp.cam_cell.Vdd;
+    gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.cam_cell.Vdd;
+    gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
+    gate_leak_power_SCHP_port_sram_cell = 0;
+
+    //cout<<"power_matchline.searchOp.leakage"<<power_matchline.searchOp.leakage<<endl;
+
+    power_matchline.searchOp.gate_leakage +=
+        gate_leak_power_cc_inverters_sram_cell;
+    power_matchline.searchOp.gate_leakage += gate_leak_comparator_cam_cell;
+    power_matchline.searchOp.gate_leakage +=
+        gate_leak_power_SCHP_port_sram_cell * SCHP +
+        gate_leak_power_RD_port_sram_cell * ERP;
+    power_matchline.searchOp.gate_leakage *= (subarray.num_rows + 1) *
+        subarray.num_cols_fa_cam;//TODO:dumy line precise
+    power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
+        cmos_Ig_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd;
+    power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
+        cmos_Ig_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd;
+    power_matchline.searchOp.gate_leakage += (subarray.num_rows + 1) *
+        cmos_Ig_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd;
+    power_matchline.searchOp.gate_leakage += subarray.num_rows *
+        cmos_Ig_leakage(W_hit_miss_n, 0, 1, nmos) * g_tp.cam_cell.Vdd +
+        + cmos_Ig_leakage(0, W_hit_miss_p, 1, pmos) * g_tp.cam_cell.Vdd;
+
+
+    return out_time_ramp;
 }
 
 
-double Mat::width_write_driver_or_write_mux()
-{
-  // calculate resistance of SRAM cell pull-up PMOS transistor
-  // cam and sram have same cell trasistor properties
-  double R_sram_cell_pull_up_tr  = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
-  double R_access_tr             = tr_R_on(g_tp.sram.cell_a_w,    NCH, 1, is_dram, true);
-  double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
-  double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
+double Mat::width_write_driver_or_write_mux() {
+    // calculate resistance of SRAM cell pull-up PMOS transistor
+    // cam and sram have same cell trasistor properties
+    double R_sram_cell_pull_up_tr  = tr_R_on(g_tp.sram.cell_pmos_w, NCH, 1, is_dram, true);
+    double R_access_tr             = tr_R_on(g_tp.sram.cell_a_w,    NCH, 1, is_dram, true);
+    double target_R_write_driver_and_mux = (2 * R_sram_cell_pull_up_tr - R_access_tr) / 2;
+    double width_write_driver_nmos = R_to_w(target_R_write_driver_and_mux, NCH, is_dram);
 
-  return width_write_driver_nmos;
+    return width_write_driver_nmos;
 }
 
 
@@ -1007,134 +1032,164 @@ double Mat::width_write_driver_or_write_mux()
 double Mat::compute_comparators_height(
     int tagbits,
     int number_ways_in_mat,
-    double subarray_mem_cell_area_width)
-{
-  double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
-  double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
-  return cumulative_area / subarray_mem_cell_area_width;
+    double subarray_mem_cell_area_width) {
+    double nand2_area = compute_gate_area(NAND, 2, 0, g_tp.w_comp_n, g_tp.cell_h_def);
+    double cumulative_area = nand2_area * number_ways_in_mat * tagbits / 4;
+    return cumulative_area / subarray_mem_cell_area_width;
 }
 
 
 
-double Mat::compute_bitline_delay(double inrisetime)
-{
-  double V_b_pre, v_th_mem_cell, V_wl;
-  double tstep;
-  double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
-  double R_cell_pull_down=0.0, R_cell_acc =0.0, r_dev=0.0;
-  int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
-
-  double R_b_metal = camFlag? cam_cell.h:cell.h * g_tp.wire_local.R_per_um;
-  double R_bl      = subarray.num_rows * R_b_metal;
-  double C_bl      = subarray.C_bl;
-
-  // TODO: no leakage for DRAMs?
-  double leak_power_cc_inverters_sram_cell = 0;
-  double gate_leak_power_cc_inverters_sram_cell = 0;
-  double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
-  double leak_power_RD_port_sram_cell = 0;
-  double gate_leak_power_RD_port_sram_cell = 0;
-
-  if (is_dram == true)
-  {
-    V_b_pre = g_tp.dram.Vbitpre;
-    v_th_mem_cell = g_tp.dram_acc.Vth;
-    V_wl = g_tp.vpp;
-    //The access transistor is not folded. So we just need to specify a threshold value for the
-    //folding width that is equal to or greater than Wmemcella.
-    R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
-    r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
-  }
-  else
-  { //SRAM
-    V_b_pre = g_tp.sram.Vbitpre;
-    v_th_mem_cell = g_tp.sram_cell.Vth;
-    V_wl = g_tp.sram_cell.Vdd;
-    R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
-    R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
-
-    //Leakage current of an SRAM cell
-    double Iport     = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0,  1, nmos,false, true);//TODO: how much is the idle time? just by *2?
-    double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0,  2, nmos,false, true);
-    double Icell     = cmos_Isub_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true)*2;//two invs per cell
-
-    leak_power_cc_inverters_sram_cell         = Icell * g_tp.sram_cell.Vdd;
-    leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
-    leak_power_RD_port_sram_cell              = Iport_erp * g_tp.sram_cell.Vdd;
+double Mat::compute_bitline_delay(double inrisetime) {
+    double V_b_pre, v_th_mem_cell, V_wl;
+    double tstep;
+    double dynRdEnergy = 0.0, dynWriteEnergy = 0.0;
+    double R_cell_pull_down = 0.0, R_cell_acc = 0.0, r_dev = 0.0;
+    int deg_senseamp_muxing = dp.Ndsam_lev_1 * dp.Ndsam_lev_2;
+
+    double R_b_metal = camFlag ? cam_cell.h : cell.h * g_tp.wire_local.R_per_um;
+    double R_bl      = subarray.num_rows * R_b_metal;
+    double C_bl      = subarray.C_bl;
+
+    // TODO: no leakage for DRAMs?
+    double leak_power_cc_inverters_sram_cell = 0;
+    double gate_leak_power_cc_inverters_sram_cell = 0;
+    double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0;
+    double leak_power_RD_port_sram_cell = 0;
+    double gate_leak_power_RD_port_sram_cell = 0;
+
+    if (is_dram == true) {
+        V_b_pre = g_tp.dram.Vbitpre;
+        v_th_mem_cell = g_tp.dram_acc.Vth;
+        V_wl = g_tp.vpp;
+        //The access transistor is not folded. So we just need to specify a
+        // threshold value for the folding width that is equal to or greater
+        // than Wmemcella.
+        R_cell_acc = tr_R_on(g_tp.dram.cell_a_w, NCH, 1, true, true);
+        r_dev = g_tp.dram_cell_Vdd / g_tp.dram_cell_I_on + R_bl / 2;
+    } else { //SRAM
+        V_b_pre = g_tp.sram.Vbitpre;
+        v_th_mem_cell = g_tp.sram_cell.Vth;
+        V_wl = g_tp.sram_cell.Vdd;
+        R_cell_pull_down = tr_R_on(g_tp.sram.cell_nmos_w, NCH, 1, false, true);
+        R_cell_acc = tr_R_on(g_tp.sram.cell_a_w, NCH, 1, false, true);
+
+        //Leakage current of an SRAM cell
+        //TODO: how much is the idle time? just by *2?
+        double Iport = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0,  1, nmos,
+                                         false, true);
+        double Iport_erp = cmos_Isub_leakage(g_tp.sram.cell_a_w, 0,  2, nmos,
+                                             false, true);
+        double Icell = cmos_Isub_leakage(g_tp.sram.cell_nmos_w,
+                                         g_tp.sram.cell_pmos_w, 1, inv, false,
+                                         true) * 2;//two invs per cell
+
+        leak_power_cc_inverters_sram_cell         = Icell * g_tp.sram_cell.Vdd;
+        leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.sram_cell.Vdd;
+        leak_power_RD_port_sram_cell              = Iport_erp * g_tp.sram_cell.Vdd;
+
+
+        //in idle state, Ig_on only possibly exist in access transistors of read only ports
+        double Ig_port_erp = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,
+                                             false, true);
+        double Ig_cell = cmos_Ig_leakage(g_tp.sram.cell_nmos_w,
+                                         g_tp.sram.cell_pmos_w, 1, inv, false,
+                                         true);
+
+        gate_leak_power_cc_inverters_sram_cell = Ig_cell * g_tp.sram_cell.Vdd;
+        gate_leak_power_RD_port_sram_cell = Ig_port_erp * g_tp.sram_cell.Vdd;
+    }
 
 
-    //in idle state, Ig_on only possibly exist in access transistors of read only ports
-    double Ig_port_erp   = cmos_Ig_leakage(g_tp.sram.cell_a_w, 0, 1, nmos,false, true);
-    double Ig_cell   = cmos_Ig_leakage(g_tp.sram.cell_nmos_w, g_tp.sram.cell_pmos_w, 1, inv,false, true);
-
-    gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.sram_cell.Vdd;
-    gate_leak_power_RD_port_sram_cell      = Ig_port_erp*g_tp.sram_cell.Vdd;
-  }
-
-
-  double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP)), is_dram);
-  double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
-  double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
-  double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
-  double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
-    drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
-    drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
-  double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
-
-  if (is_dram)
-  {
-    double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl));
-    tstep = 2.3 * fraction * r_dev *
-      (g_tp.dram_cell_C * (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux)) /
-      (g_tp.dram_cell_C + (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux));
-    delay_writeback = tstep;
-    dynRdEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
-      (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
-    dynWriteEnergy += (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch) *
-      (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * num_act_mats_hor_dir*100;
-    per_bitline_read_energy = (C_bl + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
-      (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
-  }
-  else
-  {
-    double tau;
-
-    if (deg_bl_muxing > 1)
-    {
-      tau = (R_cell_pull_down + R_cell_acc) *
-        (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
-        R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
-        R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) +
-        R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
-      dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /*
-        subarray.num_cols * num_subarrays_per_mat*/;
-      dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch +  C_drain_sense_amp_mux) *
-        2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing);
-      dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
-          num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
-      //Write Ops are differential for SRAM
-    }
-    else
-    {
-      tau = (R_cell_pull_down + R_cell_acc) *
-        (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
-        R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
-      dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
-        2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
-      dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) *
-          num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2;
+    double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0,
+                                      camFlag ? cam_cell.w : cell.w /
+                                      (2 * (RWP + ERP + SCHP)), is_dram);
+    double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram);
+    double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0,
+                                            camFlag ? cam_cell.w :
+                                            cell.w * deg_bl_muxing /
+                                            (RWP + ERP + SCHP), is_dram);
+    double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram);
+    double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0,
+                                      is_dram) +
+        drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag ? cam_cell.w :
+                 cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
+        drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ? cam_cell.w :
+                 cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
+    double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
+                                            camFlag ? cam_cell.w :
+                                            cell.w * deg_bl_muxing /
+                                            (RWP + ERP + SCHP), is_dram);
+
+    if (is_dram) {
+        double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd / 2) *
+                                          g_tp.dram_cell_C /
+                                          (g_tp.dram_cell_C + C_bl));
+        tstep = 2.3 * fraction * r_dev *
+            (g_tp.dram_cell_C * (C_bl + 2 * C_drain_sense_amp_iso +
+                                 C_sense_amp_latch + C_drain_sense_amp_mux)) /
+            (g_tp.dram_cell_C + (C_bl + 2 * C_drain_sense_amp_iso +
+                                 C_sense_amp_latch + C_drain_sense_amp_mux));
+        delay_writeback = tstep;
+        dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch +
+                        C_drain_sense_amp_mux) *
+            (g_tp.dram_cell_Vdd / 2) *
+            g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
+        dynWriteEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch) *
+            (g_tp.dram_cell_Vdd / 2) *
+            g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ *
+            num_act_mats_hor_dir * 100;
+        per_bitline_read_energy = (C_bl + 2 * C_drain_sense_amp_iso +
+                                   C_sense_amp_latch + C_drain_sense_amp_mux) *
+            (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd;
+    } else {
+        double tau;
+
+        if (deg_bl_muxing > 1) {
+            tau = (R_cell_pull_down + R_cell_acc) *
+                (C_bl + 2 * C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
+                 C_sense_amp_latch + C_drain_sense_amp_mux) +
+                R_bl * (C_bl / 2 + 2 * C_drain_bit_mux + 2 *
+                        C_drain_sense_amp_iso + C_sense_amp_latch +
+                        C_drain_sense_amp_mux) +
+                R_bit_mux * (C_drain_bit_mux + 2 * C_drain_sense_amp_iso +
+                             C_sense_amp_latch + C_drain_sense_amp_mux) +
+                R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch +
+                                   C_drain_sense_amp_mux);
+            dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense *
+                g_tp.sram_cell.Vdd;
+            dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch +
+                            C_drain_sense_amp_mux) *
+                2 * dp.V_b_sense * g_tp.sram_cell.Vdd *
+                (1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
+                 deg_bl_muxing);
+            dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ /
+                                deg_bl_muxing) / deg_senseamp_muxing) *
+                num_act_mats_hor_dir * (C_bl + 2 * C_drain_bit_mux) *
+                g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
+            //Write Ops are differential for SRAM
+        } else {
+            tau = (R_cell_pull_down + R_cell_acc) *
+                  (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 +
+                  R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux);
+            dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) *
+                           2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/;
+            dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ /
+                                 deg_bl_muxing) / deg_senseamp_muxing) *
+                               num_act_mats_hor_dir * C_bl) *
+                g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2;
+
+        }
+        tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
+        power_bitline.readOp.leakage =
+            leak_power_cc_inverters_sram_cell +
+            leak_power_acc_tr_RW_or_WR_port_sram_cell +
+            leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
+            leak_power_RD_port_sram_cell * ERP;
+        power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
+                                            gate_leak_power_RD_port_sram_cell * ERP;
 
     }
-    tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense));
-    power_bitline.readOp.leakage =
-      leak_power_cc_inverters_sram_cell +
-      leak_power_acc_tr_RW_or_WR_port_sram_cell +
-      leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) +
-      leak_power_RD_port_sram_cell * ERP;
-    power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell +
-      gate_leak_power_RD_port_sram_cell * ERP;
-
-  }
 
 //  cout<<"leak_power_cc_inverters_sram_cell"<<leak_power_cc_inverters_sram_cell<<endl;
 //  cout<<"leak_power_acc_tr_RW_or_WR_port_sram_cell"<<leak_power_acc_tr_RW_or_WR_port_sram_cell<<endl;
@@ -1142,607 +1197,684 @@ double Mat::compute_bitline_delay(double inrisetime)
 //  cout<<"leak_power_RD_port_sram_cell"<<leak_power_RD_port_sram_cell<<endl;
 
 
-  /* take input rise time into account */
-  double m = V_wl / inrisetime;
-  if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m))
-  {
-    delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell)/ m);
-  }
-  else
-  {
-    delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
-  }
+    /* take input rise time into account */
+    double m = V_wl / inrisetime;
+    if (tstep <= (0.5 * (V_wl - v_th_mem_cell) / m)) {
+        delay_bitline = sqrt(2 * tstep * (V_wl - v_th_mem_cell) / m);
+    } else {
+        delay_bitline = tstep + (V_wl - v_th_mem_cell) / (2 * m);
+    }
 
-  bool is_fa = (dp.fully_assoc) ? true : false;
+    bool is_fa = (dp.fully_assoc) ? true : false;
 
-  if (dp.is_tag == false || is_fa == false)
-  {
-    power_bitline.readOp.dynamic  = dynRdEnergy;
-    power_bitline.writeOp.dynamic = dynWriteEnergy;
-  }
+    if (dp.is_tag == false || is_fa == false) {
+        power_bitline.readOp.dynamic  = dynRdEnergy;
+        power_bitline.writeOp.dynamic = dynWriteEnergy;
+    }
 
-  double outrisetime = 0;
-  return outrisetime;
+    double outrisetime = 0;
+    return outrisetime;
 }
 
 
 
-double Mat::compute_sa_delay(double inrisetime)
-{
-  //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
-
-  //Bitline circuitry leakage.
-  double Iiso     = simplified_pmos_leakage(g_tp.w_iso, is_dram);
-  double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
-  double IsenseN  = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
-  double IsenseP  = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
-
-  double lkgIdlePh  = IsenseEn;//+ 2*IoBufP;
-  //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
-  double lkgReadPh  = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
-  //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
-  //    lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
-  double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
-  leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
-  leak_power_sense_amps_open_page_state   = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
-
-  // sense amplifier has to drive logic in "data out driver" and sense precharge load.
-  // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
-  //constant as well as the magnitude of input differential voltage.
-  double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
-    drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
-    drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
-    drain_C_(g_tp.w_iso,PCH,1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
-    drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram);
-  double tau = C_ld / g_tp.gm_sense_amp_latch;
-  delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
-  power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
+double Mat::compute_sa_delay(double inrisetime) {
+    //int num_sa_subarray = subarray.num_cols / deg_bl_muxing; //in a subarray
+
+    //Bitline circuitry leakage.
+    double Iiso     = simplified_pmos_leakage(g_tp.w_iso, is_dram);
+    double IsenseEn = simplified_nmos_leakage(g_tp.w_sense_en, is_dram);
+    double IsenseN  = simplified_nmos_leakage(g_tp.w_sense_n, is_dram);
+    double IsenseP  = simplified_pmos_leakage(g_tp.w_sense_p, is_dram);
+
+    double lkgIdlePh  = IsenseEn;//+ 2*IoBufP;
+    //double lkgWritePh = Iiso + IsenseEn;// + 2*IoBufP + 2*Ipch;
+    double lkgReadPh  = Iiso + IsenseN + IsenseP;//+ IoBufN + IoBufP + 2*IsPch ;
+    //double lkgRead = lkgReadPh * num_sa_subarray * 4 * num_act_mats_hor_dir +
+    //    lkgIdlePh * num_sa_subarray * 4 * (num_mats - num_act_mats_hor_dir);
+    double lkgIdle = lkgIdlePh /*num_sa_subarray * num_subarrays_per_mat*/;
+    leak_power_sense_amps_closed_page_state = lkgIdlePh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
+    leak_power_sense_amps_open_page_state   = lkgReadPh * g_tp.peri_global.Vdd /* num_sa_subarray * num_subarrays_per_mat*/;
+
+    // sense amplifier has to drive logic in "data out driver" and sense precharge load.
+    // load seen by sense amp. New delay model for sense amp that is sensitive to both the output time
+    //constant as well as the magnitude of input differential voltage.
+    double C_ld = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, is_dram) +
+        drain_C_(g_tp.w_sense_n, NCH, 1, 0,
+                 camFlag ? cam_cell.w : cell.w * deg_bl_muxing /
+                 (RWP + ERP + SCHP), is_dram) +
+        drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ?
+                 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
+                 is_dram) +
+        drain_C_(g_tp.w_iso, PCH, 1, 0, camFlag ?
+                 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
+                 is_dram) +
+        drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
+                 cam_cell.w : cell.w * deg_bl_muxing / (RWP + ERP + SCHP),
+                 is_dram);
+    double tau = C_ld / g_tp.gm_sense_amp_latch;
+    delay_sa = tau * log(g_tp.peri_global.Vdd / dp.V_b_sense);
+    power_sa.readOp.dynamic = C_ld * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd /* num_sa_subarray
                             num_subarrays_per_mat * num_act_mats_hor_dir*/;
-  power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
+    power_sa.readOp.leakage = lkgIdle * g_tp.peri_global.Vdd;
 
-  double outrisetime = 0;
-  return outrisetime;
+    double outrisetime = 0;
+    return outrisetime;
 }
 
 
 
-double Mat::compute_subarray_out_drv(double inrisetime)
-{
-  double C_ld, rd, tf, this_delay;
-  double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
-
-  // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
-  rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
-  C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) +
-    gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
-  tf = rd * C_ld;
-  this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
-  delay_subarray_out_drv += this_delay;
-  inrisetime = this_delay/(1.0 - 0.5);
-  power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
-  power_subarray_out_drv.readOp.leakage += 0;  // for now, let leakage of the pass transistor be 0
-  power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
-  // delay of signal through inverter-buffer to second level of sense-amp mux.
-  // internal delay of buffer
-  rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
-  C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
-    drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
-    gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
-  tf = rd * C_ld;
-  this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
-  delay_subarray_out_drv += this_delay;
-  inrisetime = this_delay/(1.0 - 0.5);
-  power_subarray_out_drv.readOp.dynamic      += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
-  power_subarray_out_drv.readOp.leakage      += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv, is_dram)* g_tp.peri_global.Vdd;
-  power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
-
-  // inverter driving drain of pass transistor of second level of sense-amp mux.
-  rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
-  C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
-    drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
-    drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram);
-  tf = rd * C_ld;
-  this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
-  delay_subarray_out_drv += this_delay;
-  inrisetime = this_delay/(1.0 - 0.5);
-  power_subarray_out_drv.readOp.dynamic      += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
-  power_subarray_out_drv.readOp.leakage      += cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
-  power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, inv)* g_tp.peri_global.Vdd;
-
-
-  // delay of signal through pass-transistor to input of subarray output driver.
-  rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
-  C_ld = dp.Ndsam_lev_2 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag? cam_cell.w:cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), is_dram) +
-    //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
-    gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
-  tf = rd * C_ld;
-  this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
-  delay_subarray_out_drv += this_delay;
-  inrisetime = this_delay/(1.0 - 0.5);
-  power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
-  power_subarray_out_drv.readOp.leakage += 0;  // for now, let leakage of the pass transistor be 0
-  power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd;
-
-
-  return inrisetime;
+double Mat::compute_subarray_out_drv(double inrisetime) {
+    double C_ld, rd, tf, this_delay;
+    double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram);
+
+    // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer.
+    rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
+    C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0,
+                                     camFlag ? cam_cell.w : cell.w *
+                                     deg_bl_muxing / (RWP + ERP + SCHP),
+                                     is_dram) +
+        gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
+    tf = rd * C_ld;
+    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+    delay_subarray_out_drv += this_delay;
+    inrisetime = this_delay / (1.0 - 0.5);
+    power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+    power_subarray_out_drv.readOp.leakage += 0;  // for now, let leakage of the pass transistor be 0
+    power_subarray_out_drv.readOp.gate_leakage +=
+        cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
+    // delay of signal through inverter-buffer to second level of sense-amp mux.
+    // internal delay of buffer
+    rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
+    C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
+           drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+           gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram);
+    tf = rd * C_ld;
+    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+    delay_subarray_out_drv += this_delay;
+    inrisetime = this_delay / (1.0 - 0.5);
+    power_subarray_out_drv.readOp.dynamic      += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+    power_subarray_out_drv.readOp.leakage +=
+        cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+                          inv, is_dram) * g_tp.peri_global.Vdd;
+    power_subarray_out_drv.readOp.gate_leakage +=
+        cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+                        inv) * g_tp.peri_global.Vdd;
+
+    // inverter driving drain of pass transistor of second level of sense-amp mux.
+    rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram);
+    C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) +
+        drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def,
+                 is_dram) +
+        drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ?
+                 cam_cell.w : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 /
+                 (RWP + ERP + SCHP), is_dram);
+    tf = rd * C_ld;
+    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+    delay_subarray_out_drv += this_delay;
+    inrisetime = this_delay / (1.0 - 0.5);
+    power_subarray_out_drv.readOp.dynamic      += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+    power_subarray_out_drv.readOp.leakage +=
+        cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+                          inv) * g_tp.peri_global.Vdd;
+    power_subarray_out_drv.readOp.gate_leakage +=
+        cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1,
+                        inv) * g_tp.peri_global.Vdd;
+
+
+    // delay of signal through pass-transistor to input of subarray output driver.
+    rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram);
+    C_ld = dp.Ndsam_lev_2 *
+        drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? cam_cell.w :
+                 cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP),
+                 is_dram) +
+           //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram);
+        gate_C(subarray_out_wire->repeater_size *
+               (subarray_out_wire->wire_length /
+                subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ *
+               (1 + p_to_n_sz_r), 0.0, is_dram);
+    tf = rd * C_ld;
+    this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE);
+    delay_subarray_out_drv += this_delay;
+    inrisetime = this_delay / (1.0 - 0.5);
+    power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;
+    power_subarray_out_drv.readOp.leakage += 0;  // for now, let leakage of the pass transistor be 0
+    power_subarray_out_drv.readOp.gate_leakage +=
+        cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd;
+
+
+    return inrisetime;
 }
 
 
 
-double Mat::compute_comparator_delay(double inrisetime)
-{
-  int A = g_ip->tag_assoc;
-
-  int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
-  // a multiple of 4.
-
-  /* First Inverter */
-  double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) +
-               drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
-               drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
-  double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
-  double tf  = Req*Ceq;
-  double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL);
-  double nextinputtime = st1del/VTHCOMPINV;
-  power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
-
-  //For each degree of associativity
-  //there are 4 such quarter comparators
-  double lkgCurrent   = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
-  double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A;
-  /* Second Inverter */
-  Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) +
-    drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
-    drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
-  Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
-  tf = Req*Ceq;
-  double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE);
-  nextinputtime = st2del/(1.0-VTHCOMPINV);
-  power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
-  lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
-  gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A;
-
-  /* Third Inverter */
-  Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) +
-    drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
-    drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
-  Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
-  tf = Req*Ceq;
-  double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL);
-  nextinputtime = st3del/(VTHEVALINV);
-  power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
-  lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
-  gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A;
-
-  /* Final Inverter (virtual ground driver) discharging compare part */
-  double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram);
-  double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */
-  double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
-                   drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
-       drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
-       drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram);
-  double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) +
-                          drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) +
-    drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) +
-    gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram);
-  power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
-  power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *  (A - 1);
-  lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
-  lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;  // stack factor of 0.2
-
-  gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A;
-  gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter
-
-  /* time to go to threshold of mux driver */
-  double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND);
-  /* take into account non-zero input rise time */
-  double m = g_tp.peri_global.Vdd/nextinputtime;
-  double Tcomparatorni;
-
-  if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m))
-  {
-    double a = m;
-    double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
-    double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth);
-    Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a);
-  }
-  else
-  {
-    Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m;
-  }
-  delay_comparator = Tcomparatorni+st1del+st2del+st3del;
-  power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
-  power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
-
-  return Tcomparatorni / (1.0 - VTHMUXNAND);;
+double Mat::compute_comparator_delay(double inrisetime) {
+    int A = g_ip->tag_assoc;
+
+    int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already
+    // a multiple of 4.
+
+    /* First Inverter */
+    double Ceq = gate_C(g_tp.w_comp_inv_n2 + g_tp.w_comp_inv_p2, 0, is_dram) +
+                 drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+                 drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+    double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram);
+    double tf  = Req * Ceq;
+    double st1del = horowitz(inrisetime, tf, VTHCOMPINV, VTHCOMPINV, FALL);
+    double nextinputtime = st1del / VTHCOMPINV;
+    power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+
+    //For each degree of associativity
+    //there are 4 such quarter comparators
+    double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1,
+                                          g_tp.w_comp_inv_p1, 1, inv,
+                                          is_dram) * 4 * A;
+    double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1,
+                                            g_tp.w_comp_inv_p1, 1, inv,
+                                            is_dram) * 4 * A;
+    /* Second Inverter */
+    Ceq = gate_C(g_tp.w_comp_inv_n3 + g_tp.w_comp_inv_p3, 0, is_dram) +
+          drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+          drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+    Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram);
+    tf = Req * Ceq;
+    double st2del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHCOMPINV, RISE);
+    nextinputtime = st2del / (1.0 - VTHCOMPINV);
+    power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+    lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
+                                    inv, is_dram) * 4 * A;
+    gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1,
+                                      inv, is_dram) * 4 * A;
+
+    /* Third Inverter */
+    Ceq = gate_C(g_tp.w_eval_inv_n + g_tp.w_eval_inv_p, 0, is_dram) +
+          drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+          drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+    Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram);
+    tf = Req * Ceq;
+    double st3del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHEVALINV, FALL);
+    nextinputtime = st3del / (VTHEVALINV);
+    power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+    lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1,
+                                    inv, is_dram) * 4 * A;
+    gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3,
+                                      1, inv, is_dram) * 4 * A;
+
+    /* Final Inverter (virtual ground driver) discharging compare part */
+    double r1 = tr_R_on(g_tp.w_comp_n, NCH, 2, is_dram);
+    double r2 = tr_R_on(g_tp.w_eval_inv_n, NCH, 1, is_dram); /* was switch */
+    double c2 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
+                                       g_tp.cell_h_def, is_dram) +
+                              drain_C_(g_tp.w_comp_n, NCH, 2, 1,
+                                       g_tp.cell_h_def, is_dram)) +
+        drain_C_(g_tp.w_eval_inv_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+        drain_C_(g_tp.w_eval_inv_n, NCH, 1, 1, g_tp.cell_h_def, is_dram);
+    double c1 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1,
+                                       g_tp.cell_h_def, is_dram) +
+                              drain_C_(g_tp.w_comp_n, NCH, 2, 1,
+                                       g_tp.cell_h_def, is_dram)) +
+        drain_C_(g_tp.w_comp_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) +
+        gate_C(WmuxdrvNANDn + WmuxdrvNANDp, 0, is_dram);
+    power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A;
+    power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *  (A - 1);
+    lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
+                                    inv, is_dram) * 4 * A;
+    lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
+                                    is_dram) * 4 * A; // stack factor of 0.2
+
+    gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1,
+                                      inv, is_dram) * 4 * A;
+    //for gate leakage this equals to a inverter
+    gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv,
+                                      is_dram) * 4 * A;
+
+    /* time to go to threshold of mux driver */
+    double tstep = (r2 * c2 + (r1 + r2) * c1) * log(1.0 / VTHMUXNAND);
+    /* take into account non-zero input rise time */
+    double m = g_tp.peri_global.Vdd / nextinputtime;
+    double Tcomparatorni;
+
+    if ((tstep) <= (0.5*(g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / m)) {
+        double a = m;
+        double b = 2 * ((g_tp.peri_global.Vdd * VTHEVALINV) -
+                        g_tp.peri_global.Vth);
+        double c = -2 * (tstep) * (g_tp.peri_global.Vdd -
+                                   g_tp.peri_global.Vth) + 1 / m *
+            ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) *
+            ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth);
+        Tcomparatorni = (-b + sqrt(b * b - 4 * a * c)) / (2 * a);
+    } else {
+        Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd +
+                                   g_tp.peri_global.Vth) / (2 * m) -
+            (g_tp.peri_global.Vdd * VTHEVALINV) / m;
+    }
+    delay_comparator = Tcomparatorni + st1del + st2del + st3del;
+    power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd;
+    power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd;
+
+    return Tcomparatorni / (1.0 - VTHMUXNAND);;
 }
 
 
 
-void Mat::compute_power_energy()
-{
-        //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
+void Mat::compute_power_energy() {
+    //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power
     //when search all subarrays and all mats are fully active
-        //when plain read/write only one subarray in a single mat is active.
+    //when plain read/write only one subarray in a single mat is active.
 
     // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat.
-  power.readOp.dynamic += r_predec->power.readOp.dynamic +
-                          b_mux_predec->power.readOp.dynamic +
-                          sa_mux_lev_1_predec->power.readOp.dynamic +
-                          sa_mux_lev_2_predec->power.readOp.dynamic;
-
-  // add energy consumed in decoders
-  power_row_decoders.readOp.dynamic        = row_dec->power.readOp.dynamic;
-  if (!(is_fa||pure_cam))
-    power_row_decoders.readOp.dynamic        *= num_subarrays_per_mat;
-
-  // add energy consumed in bitline prechagers, SAs, and bitlines
-  if (!(is_fa||pure_cam))
-  {
-          // add energy consumed in bitline prechagers
-          power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
-          power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
-
-          //Add sense amps energy
-          num_sa_subarray = subarray.num_cols / deg_bl_muxing;
-          power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ;
-
-          // add energy consumed in bitlines
-          //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
-          power_bitline.readOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
-          power_bitline.writeOp.dynamic *= num_subarrays_per_mat*subarray.num_cols;
-          //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
-          //Add subarray output energy
-          power_subarray_out_drv.readOp.dynamic =
-                  (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
-
-          power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
-                                  power_sa.readOp.dynamic +
-                                  power_bitline.readOp.dynamic +
-                                  power_subarray_out_drv.readOp.dynamic;
-
-          power.readOp.dynamic += power_row_decoders.readOp.dynamic +
-                                  bit_mux_dec->power.readOp.dynamic +
-                                  sa_mux_lev_1_dec->power.readOp.dynamic +
-                                  sa_mux_lev_2_dec->power.readOp.dynamic +
-                                  power_comparator.readOp.dynamic;
-  }
-
-  else if (is_fa)
-  {
-          //for plain read/write only one subarray in a mat is active
-          // add energy consumed in bitline prechagers
-          power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
-                   + cam_bl_precharge_eq_drv->power.readOp.dynamic;
-          power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
-
-          //Add sense amps energy
-          num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing;
-          num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing;
-          power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search;
-          power_sa.readOp.dynamic *= num_sa_subarray;
-
-
-          // add energy consumed in bitlines
-          power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
-          power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
-          power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram);
-          power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
-
-          //Add subarray output energy
-      power_subarray_out_drv.searchOp.dynamic =
-                  (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
-          power_subarray_out_drv.readOp.dynamic =
-                  (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
-
-
-          power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
-                                  power_sa.readOp.dynamic +
-                                  power_bitline.readOp.dynamic +
-                                  power_subarray_out_drv.readOp.dynamic;
-
-          power.readOp.dynamic += power_row_decoders.readOp.dynamic +
-                                  bit_mux_dec->power.readOp.dynamic +
-                                  sa_mux_lev_1_dec->power.readOp.dynamic +
-                                  sa_mux_lev_2_dec->power.readOp.dynamic +
-                                  power_comparator.readOp.dynamic;
-
-          //add energy consumed inside cam
-          power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
-          power_searchline_precharge = sl_precharge_eq_drv->power;
-      power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
-      power_searchline = sl_data_drv->power;
-      power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
-      power_matchline_precharge  = ml_precharge_drv->power;
-      power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
-      power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
-      power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
-
-          power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
-          power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
-          power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
-          power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
-
-          power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
-          //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
-
-  }
-  else
-  {
-          // add energy consumed in bitline prechagers
-          power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
-          //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
-          //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
-          //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
-
-          //Add sense amps energy
-          num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing;
-          power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
-          power_sa.searchOp.dynamic = 0;
-
-          power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
-          power_bitline.searchOp.dynamic = 0;
-          power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
-
-          power_subarray_out_drv.searchOp.dynamic =
-                  (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
-          power_subarray_out_drv.readOp.dynamic =
-                          (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
-
-          power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
-                                  power_sa.readOp.dynamic +
-                                  power_bitline.readOp.dynamic +
-                                  power_subarray_out_drv.readOp.dynamic;
-
-          power.readOp.dynamic += power_row_decoders.readOp.dynamic +
-                                  bit_mux_dec->power.readOp.dynamic +
-                                  sa_mux_lev_1_dec->power.readOp.dynamic +
-                                  sa_mux_lev_2_dec->power.readOp.dynamic +
-                                  power_comparator.readOp.dynamic;
-
-
-          ////add energy consumed inside cam
-          power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
-          power_searchline_precharge = sl_precharge_eq_drv->power;
-      power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
-      power_searchline = sl_data_drv->power;
-      power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;;
-      power_matchline_precharge  = ml_precharge_drv->power;
-      power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat;
-      power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power;
-      power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic;
-
-          power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
-          power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic;
-          power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic;
-          power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic;
-
-          power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
-          //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
-
-  }
-
-
-
-  // calculate leakage power
-  if (!(is_fa || pure_cam))
-  {
-        int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
-
-        power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
-    power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
-    power_sa.readOp.leakage                 *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
-
-    //num_sa_subarray             = subarray.num_cols / deg_bl_muxing;
-    power_subarray_out_drv.readOp.leakage =
-      (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
-      number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
-
-    power.readOp.leakage += power_bitline.readOp.leakage +
-                            power_bl_precharge_eq_drv.readOp.leakage +
-                            power_sa.readOp.leakage +
-                            power_subarray_out_drv.readOp.leakage;
-    //cout<<"leakage"<<power.readOp.leakage<<endl;
-
-    power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
-    power.readOp.leakage += power_comparator.readOp.leakage;
-
-    //cout<<"leakage1"<<power.readOp.leakage<<endl;
-
-    // leakage power
-    power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
-    power_bit_mux_decoders.readOp.leakage      = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
-    power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
-    power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
-
-    power.readOp.leakage += r_predec->power.readOp.leakage +
-                          b_mux_predec->power.readOp.leakage +
-                          sa_mux_lev_1_predec->power.readOp.leakage +
-                          sa_mux_lev_2_predec->power.readOp.leakage +
-                          power_row_decoders.readOp.leakage +
-                          power_bit_mux_decoders.readOp.leakage +
-                          power_sa_mux_lev_1_decoders.readOp.leakage +
-                          power_sa_mux_lev_2_decoders.readOp.leakage;
-    //cout<<"leakage2"<<power.readOp.leakage<<endl;
-
-    //++++Below is gate leakage
-        power_bitline.readOp.gate_leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
-    power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
-    power_sa.readOp.gate_leakage                 *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP);
-
-    //num_sa_subarray             = subarray.num_cols / deg_bl_muxing;
-    power_subarray_out_drv.readOp.gate_leakage =
-      (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
-      number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
-
-    power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
-                            power_bl_precharge_eq_drv.readOp.gate_leakage +
-                            power_sa.readOp.gate_leakage +
-                            power_subarray_out_drv.readOp.gate_leakage;
-    //cout<<"leakage"<<power.readOp.leakage<<endl;
-
-    power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
-    power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
-
-    //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
-
-    // gate_leakage power
-    power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
-    power_bit_mux_decoders.readOp.gate_leakage      = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
-    power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
-    power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
-
-    power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
-                          b_mux_predec->power.readOp.gate_leakage +
-                          sa_mux_lev_1_predec->power.readOp.gate_leakage +
-                          sa_mux_lev_2_predec->power.readOp.gate_leakage +
-                          power_row_decoders.readOp.gate_leakage +
-                          power_bit_mux_decoders.readOp.gate_leakage +
-                          power_sa_mux_lev_1_decoders.readOp.gate_leakage +
-                          power_sa_mux_lev_2_decoders.readOp.gate_leakage;
-  }
-  else if (is_fa)
-  {
-          int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
-
-          power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
-          power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
-          power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
-          power_sa.readOp.leakage                 *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
-
-          //cout<<"leakage3"<<power.readOp.leakage<<endl;
-
-
-          power_subarray_out_drv.readOp.leakage =
-                  (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
-                  number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
-
-          power.readOp.leakage += power_bitline.readOp.leakage +
-                                  power_bl_precharge_eq_drv.readOp.leakage +
-                                  power_bl_precharge_eq_drv.searchOp.leakage +
-                                  power_sa.readOp.leakage +
-                                  power_subarray_out_drv.readOp.leakage;
-
-          //cout<<"leakage4"<<power.readOp.leakage<<endl;
-
-          // leakage power
-          power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
-          power.readOp.leakage += r_predec->power.readOp.leakage +
-                                  power_row_decoders.readOp.leakage;
-
-          //cout<<"leakage5"<<power.readOp.leakage<<endl;
-
-          //inside cam
-          power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
-          power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
-          power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
-          power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
-          power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
-
-          power.readOp.leakage += power_cam_all_active.searchOp.leakage;
-
-//       cout<<"leakage6"<<power.readOp.leakage<<endl;
-
-          //+++Below is gate leakage
-          power_bitline.readOp.gate_leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
-          power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
-          power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
-          power_sa.readOp.gate_leakage                 *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
-
-          //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
-
-
-          power_subarray_out_drv.readOp.gate_leakage =
-                  (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
-                  number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
-
-          power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
-          power_bl_precharge_eq_drv.readOp.gate_leakage +
-          power_bl_precharge_eq_drv.searchOp.gate_leakage +
-          power_sa.readOp.gate_leakage +
-          power_subarray_out_drv.readOp.gate_leakage;
-
-          //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
-
-          // gate_leakage power
-          power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
-          power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
-          power_row_decoders.readOp.gate_leakage;
+    power.readOp.dynamic += r_predec->power.readOp.dynamic +
+                            b_mux_predec->power.readOp.dynamic +
+                            sa_mux_lev_1_predec->power.readOp.dynamic +
+                            sa_mux_lev_2_predec->power.readOp.dynamic;
+
+    // add energy consumed in decoders
+    power_row_decoders.readOp.dynamic        = row_dec->power.readOp.dynamic;
+    if (!(is_fa || pure_cam))
+        power_row_decoders.readOp.dynamic        *= num_subarrays_per_mat;
+
+    // add energy consumed in bitline prechagers, SAs, and bitlines
+    if (!(is_fa || pure_cam)) {
+        // add energy consumed in bitline prechagers
+        power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
+        power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
+
+        //Add sense amps energy
+        num_sa_subarray = subarray.num_cols / deg_bl_muxing;
+        power_sa.readOp.dynamic *= num_sa_subarray * num_subarrays_per_mat ;
+
+        // add energy consumed in bitlines
+        //cout<<"bitline power"<<power_bitline.readOp.dynamic<<endl;
+        power_bitline.readOp.dynamic *= num_subarrays_per_mat *
+            subarray.num_cols;
+        power_bitline.writeOp.dynamic *= num_subarrays_per_mat *
+            subarray.num_cols;
+        //cout<<"bitline power"<<power_bitline.readOp.dynamic<<"subarray"<<num_subarrays_per_mat<<"cols"<<subarray.num_cols<<endl;
+        //Add subarray output energy
+        power_subarray_out_drv.readOp.dynamic =
+            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+        power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+                                power_sa.readOp.dynamic +
+                                power_bitline.readOp.dynamic +
+                                power_subarray_out_drv.readOp.dynamic;
+
+        power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+                                bit_mux_dec->power.readOp.dynamic +
+                                sa_mux_lev_1_dec->power.readOp.dynamic +
+                                sa_mux_lev_2_dec->power.readOp.dynamic +
+                                power_comparator.readOp.dynamic;
+    }
 
-          //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
+    else if (is_fa) {
+        //for plain read/write only one subarray in a mat is active
+        // add energy consumed in bitline prechagers
+        power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic
+                + cam_bl_precharge_eq_drv->power.readOp.dynamic;
+        power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic;
+
+        //Add sense amps energy
+        num_sa_subarray = (subarray.num_cols_fa_cam +
+                           subarray.num_cols_fa_ram) / deg_bl_muxing;
+        num_sa_subarray_search = subarray.num_cols_fa_ram / deg_bl_muxing;
+        power_sa.searchOp.dynamic = power_sa.readOp.dynamic *
+            num_sa_subarray_search;
+        power_sa.readOp.dynamic *= num_sa_subarray;
+
+
+        // add energy consumed in bitlines
+        power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic;
+        power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam +
+                                         subarray.num_cols_fa_ram);
+        power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam +
+                                          subarray.num_cols_fa_ram);
+        power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram;
+
+        //Add subarray output energy
+        power_subarray_out_drv.searchOp.dynamic =
+            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
+        power_subarray_out_drv.readOp.dynamic =
+            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+
+        power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+                                power_sa.readOp.dynamic +
+                                power_bitline.readOp.dynamic +
+                                power_subarray_out_drv.readOp.dynamic;
+
+        power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+                                bit_mux_dec->power.readOp.dynamic +
+                                sa_mux_lev_1_dec->power.readOp.dynamic +
+                                sa_mux_lev_2_dec->power.readOp.dynamic +
+                                power_comparator.readOp.dynamic;
+
+        //add energy consumed inside cam
+        power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
+        power_searchline_precharge = sl_precharge_eq_drv->power;
+        power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+        power_searchline = sl_data_drv->power;
+        power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
+            subarray.num_cols_fa_cam * num_subarrays_per_mat;;
+        power_matchline_precharge  = ml_precharge_drv->power;
+        power_matchline_precharge.searchOp.dynamic =
+            power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+        power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
+        power_ml_to_ram_wl_drv.searchOp.dynamic =
+            ml_to_ram_wl_drv->power.readOp.dynamic;
+
+        power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic;
+        power_cam_all_active.searchOp.dynamic +=
+            power_searchline_precharge.searchOp.dynamic;
+        power_cam_all_active.searchOp.dynamic +=
+            power_searchline.searchOp.dynamic;
+        power_cam_all_active.searchOp.dynamic +=
+            power_matchline_precharge.searchOp.dynamic;
+
+        power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
+        //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
+
+    } else {
+        // add energy consumed in bitline prechagers
+        power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
+        //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat;
+        //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic;
+        //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat;
+
+        //Add sense amps energy
+        num_sa_subarray = subarray.num_cols_fa_cam / deg_bl_muxing;
+        power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat;
+        power_sa.searchOp.dynamic = 0;
+
+        power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam;
+        power_bitline.searchOp.dynamic = 0;
+        power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam;
+
+        power_subarray_out_drv.searchOp.dynamic =
+            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat;
+        power_subarray_out_drv.readOp.dynamic =
+            (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat;
+
+        power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic +
+                                power_sa.readOp.dynamic +
+                                power_bitline.readOp.dynamic +
+                                power_subarray_out_drv.readOp.dynamic;
+
+        power.readOp.dynamic += power_row_decoders.readOp.dynamic +
+                                bit_mux_dec->power.readOp.dynamic +
+                                sa_mux_lev_1_dec->power.readOp.dynamic +
+                                sa_mux_lev_2_dec->power.readOp.dynamic +
+                                power_comparator.readOp.dynamic;
+
+
+        ////add energy consumed inside cam
+        power_matchline.searchOp.dynamic *= num_subarrays_per_mat;
+        power_searchline_precharge = sl_precharge_eq_drv->power;
+        power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+        power_searchline = sl_data_drv->power;
+        power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic *
+            subarray.num_cols_fa_cam * num_subarrays_per_mat;;
+        power_matchline_precharge  = ml_precharge_drv->power;
+        power_matchline_precharge.searchOp.dynamic =
+            power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat;
+        power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power;
+        power_ml_to_ram_wl_drv.searchOp.dynamic =
+            ml_to_ram_wl_drv->power.readOp.dynamic;
+
+        power_cam_all_active.searchOp.dynamic =
+            power_matchline.searchOp.dynamic;
+        power_cam_all_active.searchOp.dynamic +=
+            power_searchline_precharge.searchOp.dynamic;
+        power_cam_all_active.searchOp.dynamic +=
+            power_searchline.searchOp.dynamic;
+        power_cam_all_active.searchOp.dynamic +=
+            power_matchline_precharge.searchOp.dynamic;
+
+        power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic;
+        //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic;
 
-          //inside cam
-          power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
-          power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
-          power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
-          power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
-          power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
+    }
 
-          power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
 
-  }
-  else
-  {
-          int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
 
-          //power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
-          //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
-          power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
-          power_sa.readOp.leakage                 *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
+    // calculate leakage power
+    if (!(is_fa || pure_cam)) {
+        int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
 
+        power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+        power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+        power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
+            (RWP + ERP);
+
+        //num_sa_subarray             = subarray.num_cols / deg_bl_muxing;
+        power_subarray_out_drv.readOp.leakage =
+            (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
+
+        power.readOp.leakage += power_bitline.readOp.leakage +
+                                power_bl_precharge_eq_drv.readOp.leakage +
+                                power_sa.readOp.leakage +
+                                power_subarray_out_drv.readOp.leakage;
+        //cout<<"leakage"<<power.readOp.leakage<<endl;
+
+        power_comparator.readOp.leakage *= num_do_b_mat * (RWP + ERP);
+        power.readOp.leakage += power_comparator.readOp.leakage;
+
+        //cout<<"leakage1"<<power.readOp.leakage<<endl;
+
+        // leakage power
+        power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
+        power_bit_mux_decoders.readOp.leakage      = bit_mux_dec->power.readOp.leakage * deg_bl_muxing;
+        power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1;
+        power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2;
+
+        power.readOp.leakage += r_predec->power.readOp.leakage +
+                                b_mux_predec->power.readOp.leakage +
+                                sa_mux_lev_1_predec->power.readOp.leakage +
+                                sa_mux_lev_2_predec->power.readOp.leakage +
+                                power_row_decoders.readOp.leakage +
+                                power_bit_mux_decoders.readOp.leakage +
+                                power_sa_mux_lev_1_decoders.readOp.leakage +
+                                power_sa_mux_lev_2_decoders.readOp.leakage;
+        //cout<<"leakage2"<<power.readOp.leakage<<endl;
+
+        //++++Below is gate leakage
+        power_bitline.readOp.gate_leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+        power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+        power_sa.readOp.gate_leakage *= num_sa_subarray *
+            num_subarrays_per_mat * (RWP + ERP);
+
+        //num_sa_subarray             = subarray.num_cols / deg_bl_muxing;
+        power_subarray_out_drv.readOp.gate_leakage =
+            (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP);
+
+        power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
+                                     power_bl_precharge_eq_drv.readOp.gate_leakage +
+                                     power_sa.readOp.gate_leakage +
+                                     power_subarray_out_drv.readOp.gate_leakage;
+        //cout<<"leakage"<<power.readOp.leakage<<endl;
+
+        power_comparator.readOp.gate_leakage *= num_do_b_mat * (RWP + ERP);
+        power.readOp.gate_leakage += power_comparator.readOp.gate_leakage;
+
+        //cout<<"leakage1"<<power.readOp.gate_leakage<<endl;
+
+        // gate_leakage power
+        power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
+        power_bit_mux_decoders.readOp.gate_leakage      = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing;
+        power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1;
+        power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2;
+
+        power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+                                     b_mux_predec->power.readOp.gate_leakage +
+                                     sa_mux_lev_1_predec->power.readOp.gate_leakage +
+                                     sa_mux_lev_2_predec->power.readOp.gate_leakage +
+                                     power_row_decoders.readOp.gate_leakage +
+                                     power_bit_mux_decoders.readOp.gate_leakage +
+                                     power_sa_mux_lev_1_decoders.readOp.gate_leakage +
+                                     power_sa_mux_lev_2_decoders.readOp.gate_leakage;
+    } else if (is_fa) {
+        int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
 
-          power_subarray_out_drv.readOp.leakage =
-                  (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
-                  number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+        power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+        power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+        power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+        power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
+            (RWP + ERP + SCHP);
 
-          power.readOp.leakage += //power_bitline.readOp.leakage +
-                                  //power_bl_precharge_eq_drv.readOp.leakage +
-                                  power_bl_precharge_eq_drv.searchOp.leakage +
-                                  power_sa.readOp.leakage +
-                                  power_subarray_out_drv.readOp.leakage;
+        //cout<<"leakage3"<<power.readOp.leakage<<endl;
 
-          // leakage power
-          power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
-          power.readOp.leakage += r_predec->power.readOp.leakage +
-                                  power_row_decoders.readOp.leakage;
 
-          //inside cam
-          power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
-          power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage;
-          power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam;
-          power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic;
-          power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
+        power_subarray_out_drv.readOp.leakage =
+            (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
 
-          power.readOp.leakage += power_cam_all_active.searchOp.leakage;
+        power.readOp.leakage += power_bitline.readOp.leakage +
+                                power_bl_precharge_eq_drv.readOp.leakage +
+                                power_bl_precharge_eq_drv.searchOp.leakage +
+                                power_sa.readOp.leakage +
+                                power_subarray_out_drv.readOp.leakage;
 
-          //+++Below is gate leakage
-          power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
-          power_sa.readOp.gate_leakage                 *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP);
+        //cout<<"leakage4"<<power.readOp.leakage<<endl;
 
+        // leakage power
+        power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat;
+        power.readOp.leakage += r_predec->power.readOp.leakage +
+                                power_row_decoders.readOp.leakage;
 
-          power_subarray_out_drv.readOp.gate_leakage =
-                  (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
-                  number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+        //cout<<"leakage5"<<power.readOp.leakage<<endl;
 
-          power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
-                                  //power_bl_precharge_eq_drv.readOp.gate_leakage +
-                                  power_bl_precharge_eq_drv.searchOp.gate_leakage +
-                                  power_sa.readOp.gate_leakage +
-                                  power_subarray_out_drv.readOp.gate_leakage;
+        //inside cam
+        power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
+        power_cam_all_active.searchOp.leakage +=
+            sl_precharge_eq_drv->power.readOp.leakage;
+        power_cam_all_active.searchOp.leakage +=
+            sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
+        power_cam_all_active.searchOp.leakage +=
+            ml_precharge_drv->power.readOp.dynamic;
+        power_cam_all_active.searchOp.leakage *=
+            num_subarrays_per_mat;
 
-          // gate_leakage power
-          power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP);
-          power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
-                                  power_row_decoders.readOp.gate_leakage;
+        power.readOp.leakage += power_cam_all_active.searchOp.leakage;
 
-          //inside cam
-          power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage;
-          power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage;
-          power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam;
-          power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic;
-          power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
+//       cout<<"leakage6"<<power.readOp.leakage<<endl;
 
-          power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
-  }
+        //+++Below is gate leakage
+        power_bitline.readOp.gate_leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+        power_bl_precharge_eq_drv.readOp.gate_leakage = bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+        power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+        power_sa.readOp.gate_leakage *= num_sa_subarray *
+            num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+        //cout<<"leakage3"<<power.readOp.gate_leakage<<endl;
+
+
+        power_subarray_out_drv.readOp.gate_leakage =
+            (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+        power.readOp.gate_leakage += power_bitline.readOp.gate_leakage +
+                                     power_bl_precharge_eq_drv.readOp.gate_leakage +
+                                     power_bl_precharge_eq_drv.searchOp.gate_leakage +
+                                     power_sa.readOp.gate_leakage +
+                                     power_subarray_out_drv.readOp.gate_leakage;
+
+        //cout<<"leakage4"<<power.readOp.gate_leakage<<endl;
+
+        // gate_leakage power
+        power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat;
+        power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+                                     power_row_decoders.readOp.gate_leakage;
+
+        //cout<<"leakage5"<<power.readOp.gate_leakage<<endl;
+
+        //inside cam
+        power_cam_all_active.searchOp.gate_leakage =
+            power_matchline.searchOp.gate_leakage;
+        power_cam_all_active.searchOp.gate_leakage +=
+            sl_precharge_eq_drv->power.readOp.gate_leakage;
+        power_cam_all_active.searchOp.gate_leakage +=
+            sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
+        power_cam_all_active.searchOp.gate_leakage +=
+            ml_precharge_drv->power.readOp.dynamic;
+        power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat;
+
+        power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
+
+    } else {
+        int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2);
+
+        //power_bitline.readOp.leakage            *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat;
+        //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+        power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat;
+        power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat *
+            (RWP + ERP + SCHP);
+
+
+        power_subarray_out_drv.readOp.leakage =
+            (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) *
+            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+        power.readOp.leakage += //power_bitline.readOp.leakage +
+            //power_bl_precharge_eq_drv.readOp.leakage +
+            power_bl_precharge_eq_drv.searchOp.leakage +
+            power_sa.readOp.leakage +
+            power_subarray_out_drv.readOp.leakage;
+
+        // leakage power
+        power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage *
+            subarray.num_rows * num_subarrays_per_mat * (RWP + ERP + EWP);
+        power.readOp.leakage += r_predec->power.readOp.leakage +
+                                power_row_decoders.readOp.leakage;
+
+        //inside cam
+        power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage;
+        power_cam_all_active.searchOp.leakage +=
+            sl_precharge_eq_drv->power.readOp.leakage;
+        power_cam_all_active.searchOp.leakage +=
+            sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam;
+        power_cam_all_active.searchOp.leakage +=
+            ml_precharge_drv->power.readOp.dynamic;
+        power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat;
+
+        power.readOp.leakage += power_cam_all_active.searchOp.leakage;
+
+        //+++Below is gate leakage
+        power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat;
+        power_sa.readOp.gate_leakage *= num_sa_subarray *
+            num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+
+        power_subarray_out_drv.readOp.gate_leakage =
+            (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) *
+            number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP);
+
+        power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage +
+            //power_bl_precharge_eq_drv.readOp.gate_leakage +
+            power_bl_precharge_eq_drv.searchOp.gate_leakage +
+            power_sa.readOp.gate_leakage +
+            power_subarray_out_drv.readOp.gate_leakage;
+
+        // gate_leakage power
+        power_row_decoders.readOp.gate_leakage =
+            row_dec->power.readOp.gate_leakage * subarray.num_rows *
+            num_subarrays_per_mat * (RWP + ERP + EWP);
+        power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage +
+                                     power_row_decoders.readOp.gate_leakage;
+
+        //inside cam
+        power_cam_all_active.searchOp.gate_leakage =
+            power_matchline.searchOp.gate_leakage;
+        power_cam_all_active.searchOp.gate_leakage +=
+            sl_precharge_eq_drv->power.readOp.gate_leakage;
+        power_cam_all_active.searchOp.gate_leakage +=
+            sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam;
+        power_cam_all_active.searchOp.gate_leakage +=
+            ml_precharge_drv->power.readOp.dynamic;
+        power_cam_all_active.searchOp.gate_leakage *=
+            num_subarrays_per_mat;
+
+        power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage;
+    }
 }