From: Yasuko Eckert Date: Tue, 3 Jun 2014 20:32:59 +0000 (-0700) Subject: ext: McPAT interface changes and fixes X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0deef376d96bfe0a3a2496714ac22471d9ee818a;p=gem5.git ext: McPAT interface changes and fixes This patch includes software engineering changes and some generic bug fixes Joel Hestness and Yasuko Eckert made to McPAT 0.8. There are still known issues/concernts we did not have a chance to address in this patch. High-level changes in this patch include: 1) Making XML parsing modular and hierarchical: - Shift parsing responsibility into the components - Read XML in a (mostly) context-free recursive manner so that McPAT input files can contain arbitrary component hierarchies 2) Making power, energy, and area calculations a hierarchical and recursive process - Components track their subcomponents and recursively call compute functions in stages - Make C++ object hierarchy reflect inheritance of classes of components with similar structures - Simplify computeArea() and computeEnergy() functions to eliminate successive calls to calculate separate TDP vs. runtime energy - Remove Processor component (now unnecessary) and introduce a more abstract System component 3) Standardizing McPAT output across all components - Use a single, common data structure for storing and printing McPAT output - Recursively call print functions through component hierarchy 4) For caches, allow splitting data array and tag array reads and writes for better accuracy 5) Improving the usability of CACTI by printing more helpful warning and error messages 6) Minor: Impose more rigorous code style for clarity (more work still to be done) Overall, these changes greatly reduce the amount of replicated code, and they improve McPAT runtime and decrease memory footprint. --- diff --git a/ext/mcpat/XML_Parse.cc b/ext/mcpat/XML_Parse.cc deleted file mode 100644 index ae3ee6f17..000000000 --- a/ext/mcpat/XML_Parse.cc +++ /dev/null @@ -1,1798 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ - - -#include -#include - -#include "XML_Parse.h" -#include "xmlParser.h" - -using namespace std; - -void ParseXML::parse(char* filepath) -{ - unsigned int i,j,k,m,n; - unsigned int NumofCom_4; - unsigned int itmp; - //Initialize all structures - ParseXML::initialize(); - - // this open and parse the XML file: - XMLNode xMainNode=XMLNode::openFileHelper(filepath,"component"); //the 'component' in the first layer - - XMLNode xNode2=xMainNode.getChildNode("component"); // the 'component' in the second layer - //get all params in the second layer - itmp=xNode2.nChildNode("param"); - for(i=0; iOrderofComponents_3layer) - { - //___________________________get all system.core0-n________________________________________________ - if (sys.homogeneous_cores==1) OrderofComponents_3layer=0; - else OrderofComponents_3layer=sys.number_of_cores-1; - for (i=0; i<=OrderofComponents_3layer; i++) - { - xNode3=xNode2.getChildNode("component",i); - if (xNode3.isEmpty()==1) { - printf("The value of homogeneous_cores or number_of_cores is not correct!"); - exit(0); - } - else{ - if (strstr(xNode3.getAttribute("name"),"core")!=NULL) - { - { //For cpu0-cpui - //Get all params with system.core? - itmp=xNode3.nChildNode("param"); - for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; - xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); - if (xNode3.isEmpty()==1) { - printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); - exit(0); - } - if (strstr(xNode3.getAttribute("id"),"system.mem")!=NULL) - { - - itmp=xNode3.nChildNode("param"); - for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; - xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); - if (xNode3.isEmpty()==1) { - printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); - exit(0); - } - if (strstr(xNode3.getAttribute("id"),"system.mc")!=NULL) - { - itmp=xNode3.nChildNode("param"); - for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; - xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); - if (xNode3.isEmpty()==1) { - printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); - exit(0); - } - if (strstr(xNode3.getAttribute("id"),"system.niu")!=NULL) - { - itmp=xNode3.nChildNode("param"); - for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; - xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); - if (xNode3.isEmpty()==1) { - printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); - exit(0); - } - if (strstr(xNode3.getAttribute("id"),"system.pcie")!=NULL) - { - itmp=xNode3.nChildNode("param"); - for(k=0; k0) OrderofComponents_3layer=OrderofComponents_3layer+1; - xNode3=xNode2.getChildNode("component",OrderofComponents_3layer); - if (xNode3.isEmpty()==1) { - printf("some value(s) of number_of_cores/number_of_L2s/number_of_L3s/number_of_NoCs is/are not correct!"); - exit(0); - } - if (strstr(xNode3.getAttribute("id"),"system.flashc")!=NULL) - { - itmp=xNode3.nChildNode("param"); - for(k=0; k -#include - -#include - -#include "xmlParser.h" -using namespace std; - -/* -void myfree(char *t); // {free(t);} -ToXMLStringTool tx,tx2; -*/ -//all subnodes at the level of system.core(0-n) -//cache_policy is added into cache property arrays;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - -typedef struct{ - int prediction_width; - char prediction_scheme[20]; - int predictor_size; - int predictor_entries; - int local_predictor_size[20]; - int local_predictor_entries; - int global_predictor_entries; - int global_predictor_bits; - int chooser_predictor_entries; - int chooser_predictor_bits; - double predictor_accesses; -} predictor_systemcore; -typedef struct{ - int number_entries; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - double total_hits; - double total_accesses; - double total_misses; - double conflicts; -} itlb_systemcore; -typedef struct{ - //params - double icache_config[20]; - int buffer_sizes[20]; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - //stats - double total_accesses; - double read_accesses; - double read_misses; - double replacements; - double read_hits; - double total_hits; - double total_misses; - double miss_buffer_access; - double fill_buffer_accesses; - double prefetch_buffer_accesses; - double prefetch_buffer_writes; - double prefetch_buffer_reads; - double prefetch_buffer_hits; - double conflicts; -} icache_systemcore; -typedef struct{ - //params - int number_entries; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double write_hits; - double read_hits; - double read_misses; - double write_misses; - double total_hits; - double total_misses; - double conflicts; -} dtlb_systemcore; -typedef struct{ - //params - double dcache_config[20]; - int buffer_sizes[20]; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double total_hits; - double total_misses; - double read_hits; - double write_hits; - double read_misses; - double write_misses; - double replacements; - double write_backs; - double miss_buffer_access; - double fill_buffer_accesses; - double prefetch_buffer_accesses; - double prefetch_buffer_writes; - double prefetch_buffer_reads; - double prefetch_buffer_hits; - double wbb_writes; - double wbb_reads; - double conflicts; -} dcache_systemcore; -typedef struct{ - //params - int BTB_config[20]; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double total_hits; - double total_misses; - double read_hits; - double write_hits; - double read_misses; - double write_misses; - double replacements; -} BTB_systemcore; -typedef struct{ - //all params at the level of system.core(0-n) - int clock_rate; - bool opt_local; - bool x86; - int machine_bits; - int virtual_address_width; - int physical_address_width; - int opcode_width; - int micro_opcode_width; - int instruction_length; - int machine_type; - int internal_datapath_width; - int number_hardware_threads; - int fetch_width; - int number_instruction_fetch_ports; - int decode_width; - int issue_width; - int peak_issue_width; - int commit_width; - int pipelines_per_core[20]; - int pipeline_depth[20]; - char FPU[20]; - char divider_multiplier[20]; - int ALU_per_core; - double FPU_per_core; - int MUL_per_core; - int instruction_buffer_size; - int decoded_stream_buffer_size; - int instruction_window_scheme; - int instruction_window_size; - int fp_instruction_window_size; - int ROB_size; - int archi_Regs_IRF_size; - int archi_Regs_FRF_size; - int phy_Regs_IRF_size; - int phy_Regs_FRF_size; - int rename_scheme; - int register_windows_size; - char LSU_order[20]; - int store_buffer_size; - int load_buffer_size; - int memory_ports; - char Dcache_dual_pump[20]; - int RAS_size; - int fp_issue_width; - int prediction_width; - int number_of_BTB; - int number_of_BPT; - - //all stats at the level of system.core(0-n) - double total_instructions; - double int_instructions; - double fp_instructions; - double branch_instructions; - double branch_mispredictions; - double committed_instructions; - double committed_int_instructions; - double committed_fp_instructions; - double load_instructions; - double store_instructions; - double total_cycles; - double idle_cycles; - double busy_cycles; - double instruction_buffer_reads; - double instruction_buffer_write; - double ROB_reads; - double ROB_writes; - double rename_accesses; - double fp_rename_accesses; - double rename_reads; - double rename_writes; - double fp_rename_reads; - double fp_rename_writes; - double inst_window_reads; - double inst_window_writes; - double inst_window_wakeup_accesses; - double inst_window_selections; - double fp_inst_window_reads; - double fp_inst_window_writes; - double fp_inst_window_wakeup_accesses; - double fp_inst_window_selections; - double archi_int_regfile_reads; - double archi_float_regfile_reads; - double phy_int_regfile_reads; - double phy_float_regfile_reads; - double phy_int_regfile_writes; - double phy_float_regfile_writes; - double archi_int_regfile_writes; - double archi_float_regfile_writes; - double int_regfile_reads; - double float_regfile_reads; - double int_regfile_writes; - double float_regfile_writes; - double windowed_reg_accesses; - double windowed_reg_transports; - double function_calls; - double context_switches; - double ialu_accesses; - double fpu_accesses; - double mul_accesses; - double cdb_alu_accesses; - double cdb_mul_accesses; - double cdb_fpu_accesses; - double load_buffer_reads; - double load_buffer_writes; - double load_buffer_cams; - double store_buffer_reads; - double store_buffer_writes; - double store_buffer_cams; - double store_buffer_forwards; - double main_memory_access; - double main_memory_read; - double main_memory_write; - double pipeline_duty_cycle; - - double IFU_duty_cycle ; - double BR_duty_cycle ; - double LSU_duty_cycle ; - double MemManU_I_duty_cycle; - double MemManU_D_duty_cycle ; - double ALU_duty_cycle ; - double MUL_duty_cycle ; - double FPU_duty_cycle ; - double ALU_cdb_duty_cycle ; - double MUL_cdb_duty_cycle ; - double FPU_cdb_duty_cycle ; - - //all subnodes at the level of system.core(0-n) - predictor_systemcore predictor; - itlb_systemcore itlb; - icache_systemcore icache; - dtlb_systemcore dtlb; - dcache_systemcore dcache; - BTB_systemcore BTB; - -} system_core; -typedef struct{ - //params - int Directory_type; - double Dir_config[20]; - int buffer_sizes[20]; - int clockrate; - int ports[20]; - int device_type; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - char threeD_stack[20]; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double read_misses; - double write_misses; - double conflicts; - double duty_cycle; -} system_L1Directory; -typedef struct{ - //params - int Directory_type; - double Dir_config[20]; - int buffer_sizes[20]; - int clockrate; - int ports[20]; - int device_type; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - char threeD_stack[20]; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double read_misses; - double write_misses; - double conflicts; - double duty_cycle; -} system_L2Directory; -typedef struct{ - //params - double L2_config[20]; - int clockrate; - int ports[20]; - int device_type; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - char threeD_stack[20]; - int buffer_sizes[20]; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double total_hits; - double total_misses; - double read_hits; - double write_hits; - double read_misses; - double write_misses; - double replacements; - double write_backs; - double miss_buffer_accesses; - double fill_buffer_accesses; - double prefetch_buffer_accesses; - double prefetch_buffer_writes; - double prefetch_buffer_reads; - double prefetch_buffer_hits; - double wbb_writes; - double wbb_reads; - double conflicts; - double duty_cycle; - - bool merged_dir; - double homenode_read_accesses; - double homenode_write_accesses; - double homenode_read_hits; - double homenode_write_hits; - double homenode_read_misses; - double homenode_write_misses; - double dir_duty_cycle; -} system_L2; -typedef struct{ - //params - double L3_config[20]; - int clockrate; - int ports[20]; - int device_type; - int cache_policy;//0 no write or write-though with non-write allocate;1 write-back with write-allocate - char threeD_stack[20]; - int buffer_sizes[20]; - //stats - double total_accesses; - double read_accesses; - double write_accesses; - double total_hits; - double total_misses; - double read_hits; - double write_hits; - double read_misses; - double write_misses; - double replacements; - double write_backs; - double miss_buffer_accesses; - double fill_buffer_accesses; - double prefetch_buffer_accesses; - double prefetch_buffer_writes; - double prefetch_buffer_reads; - double prefetch_buffer_hits; - double wbb_writes; - double wbb_reads; - double conflicts; - double duty_cycle; - - bool merged_dir; - double homenode_read_accesses; - double homenode_write_accesses; - double homenode_read_hits; - double homenode_write_hits; - double homenode_read_misses; - double homenode_write_misses; - double dir_duty_cycle; -} system_L3; -typedef struct{ - //params - int number_of_inputs_of_crossbars; - int number_of_outputs_of_crossbars; - int flit_bits; - int input_buffer_entries_per_port; - int ports_of_input_buffer[20]; - //stats - double crossbar_accesses; -} xbar0_systemNoC; -typedef struct{ - //params - int clockrate; - bool type; - bool has_global_link; - char topology[20]; - int horizontal_nodes; - int vertical_nodes; - int link_throughput; - int link_latency; - int input_ports; - int output_ports; - int virtual_channel_per_port; - int flit_bits; - int input_buffer_entries_per_vc; - int ports_of_input_buffer[20]; - int dual_pump; - int number_of_crossbars; - char crossbar_type[20]; - char crosspoint_type[20]; - xbar0_systemNoC xbar0; - int arbiter_type; - double chip_coverage; - //stats - double total_accesses; - double duty_cycle; - double route_over_perc; -} system_NoC; -typedef struct{ - //params - int mem_tech_node; - int device_clock; - int peak_transfer_rate; - int internal_prefetch_of_DRAM_chip; - int capacity_per_channel; - int number_ranks; - int num_banks_of_DRAM_chip; - int Block_width_of_DRAM_chip; - int output_width_of_DRAM_chip; - int page_size_of_DRAM_chip; - int burstlength_of_DRAM_chip; - //stats - double memory_accesses; - double memory_reads; - double memory_writes; -} system_mem; -typedef struct{ - //params - //Common Param for mc and fc - double peak_transfer_rate; - int number_mcs; - bool withPHY; - int type; - - //FCParam - //stats - double duty_cycle; - double total_load_perc; - - //McParam - int mc_clock; - int llc_line_length; - int memory_channels_per_mc; - int number_ranks; - int req_window_size_per_channel; - int IO_buffer_size_per_channel; - int databus_width; - int addressbus_width; - bool LVDS; - - //stats - double memory_accesses; - double memory_reads; - double memory_writes; -} system_mc; - -typedef struct{ - //params - int clockrate; - int number_units; - int type; - //stats - double duty_cycle; - double total_load_perc; -} system_niu; - -typedef struct{ - //params - int clockrate; - int number_units; - int num_channels; - int type; - bool withPHY; - //stats - double duty_cycle; - double total_load_perc; -} system_pcie; - -typedef struct{ - //All number_of_* at the level of 'system' Ying 03/21/2009 - int number_of_cores; - int number_of_L1Directories; - int number_of_L2Directories; - int number_of_L2s; - bool Private_L2; - int number_of_L3s; - int number_of_NoCs; - int number_of_dir_levels; - int domain_size; - int first_level_dir; - // All params at the level of 'system' - int homogeneous_cores; - int homogeneous_L1Directories; - int homogeneous_L2Directories; - double core_tech_node; - int target_core_clockrate; - int target_chip_area; - int temperature; - int number_cache_levels; - int L1_property; - int L2_property; - int homogeneous_L2s; - int L3_property; - int homogeneous_L3s; - int homogeneous_NoCs; - int homogeneous_ccs; - int Max_area_deviation; - int Max_power_deviation; - int device_type; - bool longer_channel_device; - bool Embedded; - bool opt_dynamic_power; - bool opt_lakage_power; - bool opt_clockrate; - bool opt_area; - int interconnect_projection_type; - int machine_bits; - int virtual_address_width; - int physical_address_width; - int virtual_memory_page_size; - double total_cycles; - //system.core(0-n):3rd level - system_core core[64]; - system_L1Directory L1Directory[64]; - system_L2Directory L2Directory[64]; - system_L2 L2[64]; - system_L3 L3[64]; - system_NoC NoC[64]; - system_mem mem; - system_mc mc; - system_mc flashc; - system_niu niu; - system_pcie pcie; -} root_system; - -class ParseXML -{ -public: - void parse(char* filepath); - void initialize(); -public: - root_system sys; -}; - - -#endif /* XML_PARSE_H_ */ - - - - diff --git a/ext/mcpat/array.cc b/ext/mcpat/array.cc index 975f82fad..0e46afe03 100644 --- a/ext/mcpat/array.cc +++ b/ext/mcpat/array.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,232 +26,242 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ -#define GLOBALVAR -#include -#include #include +#include #include "area.h" #include "array.h" +#include "common.h" #include "decoder.h" -#include "globalvar.h" #include "parameter.h" using namespace std; -ArrayST::ArrayST(const InputParameter *configure_interface, - string _name, - enum Device_ty device_ty_, - bool opt_local_, - enum Core_type core_ty_, - bool _is_default) -:l_ip(*configure_interface), - name(_name), - device_ty(device_ty_), - opt_local(opt_local_), - core_ty(core_ty_), - is_default(_is_default) - { - - if (l_ip.cache_sz<64) l_ip.cache_sz=64; - l_ip.error_checking();//not only do the error checking but also fill some missing parameters - optimize_array(); +double ArrayST::area_efficiency_threshold = 20.0; +int ArrayST::ed = 0; +//Fixed number, make sure timing can be satisfied. +int ArrayST::delay_wt = 100; +int ArrayST::cycle_time_wt = 1000; +//Fixed number, This is used to exhaustive search for individual components. +int ArrayST::area_wt = 10; +//Fixed number, This is used to exhaustive search for individual components. +int ArrayST::dynamic_power_wt = 10; +int ArrayST::leakage_power_wt = 10; +//Fixed number, make sure timing can be satisfied. +int ArrayST::delay_dev = 1000000; +int ArrayST::cycle_time_dev = 100; +//Fixed number, This is used to exhaustive search for individual components. +int ArrayST::area_dev = 1000000; +//Fixed number, This is used to exhaustive search for individual components. +int ArrayST::dynamic_power_dev = 1000000; +int ArrayST::leakage_power_dev = 1000000; +int ArrayST::cycle_time_dev_threshold = 10; + + +ArrayST::ArrayST(XMLNode* _xml_data, + const InputParameter *configure_interface, string _name, + enum Device_ty device_ty_, double _clockRate, + bool opt_local_, enum Core_type core_ty_, bool _is_default) + : McPATComponent(_xml_data), l_ip(*configure_interface), + device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_), + is_default(_is_default) { + name = _name; + clockRate = _clockRate; + if (l_ip.cache_sz < MIN_BUFFER_SIZE) + l_ip.cache_sz = MIN_BUFFER_SIZE; + + if (!l_ip.error_checking(name)) { + exit(1); + } -} + output_data.reset(); + computeEnergy(); + computeArea(); +} -void ArrayST::compute_base_power() - { - //l_ip.out_w =l_ip.line_sz*8; - local_result=cacti_interface(&l_ip); +void ArrayST::compute_base_power() { + local_result = cacti_interface(&l_ip); +} - } +void ArrayST::computeArea() { + area.set_area(local_result.area); + output_data.area = local_result.area / 1e6; +} -void ArrayST::optimize_array() -{ - list candidate_solutions(0); - list::iterator candidate_iter, min_dynamic_energy_iter; +void ArrayST::computeEnergy() { + list candidate_solutions(0); + list::iterator candidate_iter, min_dynamic_energy_iter; - uca_org_t * temp_res = 0; - local_result.valid=false; + uca_org_t* temp_res = NULL; + local_result.valid = false; - double throughput=l_ip.throughput, latency=l_ip.latency; - double area_efficiency_threshold = 20.0; - bool throughput_overflow=true, latency_overflow=true; - compute_base_power(); + double throughput = l_ip.throughput; + double latency = l_ip.latency; + bool throughput_overflow = true; + bool latency_overflow = true; + compute_base_power(); - if ((local_result.cycle_time - throughput) <= 1e-10 ) - throughput_overflow=false; - if ((local_result.access_time - latency)<= 1e-10) - latency_overflow=false; + if ((local_result.cycle_time - throughput) <= 1e-10 ) + throughput_overflow = false; + if ((local_result.access_time - latency) <= 1e-10) + latency_overflow = false; - if (opt_for_clk && opt_local) - { - if (throughput_overflow || latency_overflow) - { - l_ip.ed=0; + if (opt_for_clk && opt_local) { + if (throughput_overflow || latency_overflow) { + l_ip.ed = ed; - l_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied. - l_ip.cycle_time_wt = 1000; + l_ip.delay_wt = delay_wt; + l_ip.cycle_time_wt = cycle_time_wt; - l_ip.area_wt = 10;//Fixed number, This is used to exhaustive search for individual components. - l_ip.dynamic_power_wt = 10;//Fixed number, This is used to exhaustive search for individual components. - l_ip.leakage_power_wt = 10; + l_ip.area_wt = area_wt; + l_ip.dynamic_power_wt = dynamic_power_wt; + l_ip.leakage_power_wt = leakage_power_wt; - l_ip.delay_dev = 1000000;//Fixed number, make sure timing can be satisfied. - l_ip.cycle_time_dev = 100; + l_ip.delay_dev = delay_dev; + l_ip.cycle_time_dev = cycle_time_dev; - l_ip.area_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components. - l_ip.dynamic_power_dev = 1000000;//Fixed number, This is used to exhaustive search for individual components. - l_ip.leakage_power_dev = 1000000; + l_ip.area_dev = area_dev; + l_ip.dynamic_power_dev = dynamic_power_dev; + l_ip.leakage_power_dev = leakage_power_dev; - throughput_overflow=true; //Reset overflow flag before start optimization iterations - latency_overflow=true; + //Reset overflow flag before start optimization iterations + throughput_overflow = true; + latency_overflow = true; - temp_res = &local_result; //Clean up the result for optimized for ED^2P - temp_res->cleanup(); - } + //Clean up the result for optimized for ED^2P + temp_res = &local_result; + temp_res->cleanup(); + } - while ((throughput_overflow || latency_overflow)&&l_ip.cycle_time_dev > 10)// && l_ip.delay_dev > 10 - { - compute_base_power(); - - l_ip.cycle_time_dev-=10;//This is the time_dev to be used for next iteration - - // from best area to worst area -->worst timing to best timing - if ((((local_result.cycle_time - throughput) <= 1e-10 ) && (local_result.access_time - latency)<= 1e-10)|| - (local_result.data_array2->area_efficiency < area_efficiency_threshold && l_ip.assoc == 0)) - { //if no satisfiable solution is found,the most aggressive one is left - candidate_solutions.push_back(local_result); - //output_data_csv(candidate_solutions.back()); - if (((local_result.cycle_time - throughput) <= 1e-10) && ((local_result.access_time - latency)<= 1e-10)) - //ensure stop opt not because of cam - { - throughput_overflow=false; - latency_overflow=false; - } - - } - else - { - //TODO: whether checking the partial satisfied results too, or just change the mark??? - if ((local_result.cycle_time - throughput) <= 1e-10) - throughput_overflow=false; - if ((local_result.access_time - latency)<= 1e-10) - latency_overflow=false; - - if (l_ip.cycle_time_dev > 10) - { //if not >10 local_result is the last result, it cannot be cleaned up - temp_res = &local_result; //Only solutions not saved in the list need to be cleaned up - temp_res->cleanup(); - } - } -// l_ip.cycle_time_dev-=10; -// l_ip.delay_dev-=10; + while ((throughput_overflow || latency_overflow) && + l_ip.cycle_time_dev > cycle_time_dev_threshold) { + compute_base_power(); + + //This is the time_dev to be used for next iteration + l_ip.cycle_time_dev -= cycle_time_dev_threshold; + + // from best area to worst area -->worst timing to best timing + if ((((local_result.cycle_time - throughput) <= 1e-10 ) && + (local_result.access_time - latency) <= 1e-10) || + (local_result.data_array2->area_efficiency < + area_efficiency_threshold && l_ip.assoc == 0)) { + //if no satisfiable solution is found,the most aggressive one + //is left + candidate_solutions.push_back(local_result); + if (((local_result.cycle_time - throughput) <= 1e-10) && + ((local_result.access_time - latency) <= 1e-10)) { + //ensure stop opt not because of cam + throughput_overflow = false; + latency_overflow = false; + } + } else { + if ((local_result.cycle_time - throughput) <= 1e-10) + throughput_overflow = false; + if ((local_result.access_time - latency) <= 1e-10) + latency_overflow = false; + + //if not >10 local_result is the last result, it cannot be + //cleaned up + if (l_ip.cycle_time_dev > cycle_time_dev_threshold) { + //Only solutions not saved in the list need to be + //cleaned up + temp_res = &local_result; + temp_res->cleanup(); } + } + } - if (l_ip.assoc > 0) - { - //For array structures except CAM and FA, Give warning but still provide a result with best timing found - if (throughput_overflow==true) - cout<< "Warning: " << name<<" array structure cannot satisfy throughput constraint." << endl; - if (latency_overflow==true) - cout<< "Warning: " << name<<" array structure cannot satisfy latency constraint." << endl; + if (l_ip.assoc > 0) { + //For array structures except CAM and FA, Give warning but still + //provide a result with best timing found + if (throughput_overflow == true) + cout << "Warning: " << name + << " array structure cannot satisfy throughput constraint." + << endl; + if (latency_overflow == true) + cout << "Warning: " << name + << " array structure cannot satisfy latency constraint." + << endl; } -// else -// { -// /*According to "Content-Addressable Memory (CAM) Circuits and -// Architectures": A Tutorial and Survey -// by Kostas Pagiamtzis et al. -// CAM structures can be heavily pipelined and use look-ahead techniques, -// therefore timing can be relaxed. But McPAT does not model the advanced -// techniques. If continue optimizing, the area efficiency will be too low -// */ -// //For CAM and FA, stop opt if area efficiency is too low -// if (throughput_overflow==true) -// cout<< "Warning: " <<" McPAT stopped optimization on throughput for "<< name -// <<" array structure because its area efficiency is below "< (candidate_iter)->power.readOp.dynamic) - { - min_dynamic_energy = (candidate_iter)->power.readOp.dynamic; - min_dynamic_energy_iter = candidate_iter; - local_result = *(min_dynamic_energy_iter); - //TODO: since results are reordered results and l_ip may miss match. Therefore, the final output spread sheets may show the miss match. - - } - else - { - candidate_iter->cleanup() ; - } - - } + double min_dynamic_energy = BIGNUM; + if (candidate_solutions.empty() == false) { + local_result.valid = true; + for (candidate_iter = candidate_solutions.begin(); + candidate_iter != candidate_solutions.end(); + ++candidate_iter) { + if (min_dynamic_energy > + (candidate_iter)->power.readOp.dynamic) { + min_dynamic_energy = + (candidate_iter)->power.readOp.dynamic; + min_dynamic_energy_iter = candidate_iter; + local_result = *(min_dynamic_energy_iter); + } else { + candidate_iter->cleanup() ; + } + } - } - candidate_solutions.clear(); - } - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); - - double macro_layout_overhead = g_tp.macro_layout_overhead; - double chip_PR_overhead = g_tp.chip_layout_overhead; - double total_overhead = macro_layout_overhead*chip_PR_overhead; - local_result.area *= total_overhead; - - //maintain constant power density - double pppm_t[4] = {total_overhead,1,1,total_overhead}; - - double sckRation = g_tp.sckt_co_eff; - local_result.power.readOp.dynamic *= sckRation; - local_result.power.writeOp.dynamic *= sckRation; - local_result.power.searchOp.dynamic *= sckRation; - local_result.power.readOp.leakage *= l_ip.nbanks; - local_result.power.readOp.longer_channel_leakage = - local_result.power.readOp.leakage*long_channel_device_reduction; - local_result.power = local_result.power* pppm_t; - - local_result.data_array2->power.readOp.dynamic *= sckRation; - local_result.data_array2->power.writeOp.dynamic *= sckRation; - local_result.data_array2->power.searchOp.dynamic *= sckRation; - local_result.data_array2->power.readOp.leakage *= l_ip.nbanks; - local_result.data_array2->power.readOp.longer_channel_leakage = - local_result.data_array2->power.readOp.leakage*long_channel_device_reduction; - local_result.data_array2->power = local_result.data_array2->power* pppm_t; - - - if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) - { - local_result.tag_array2->power.readOp.dynamic *= sckRation; - local_result.tag_array2->power.writeOp.dynamic *= sckRation; - local_result.tag_array2->power.searchOp.dynamic *= sckRation; - local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks; - local_result.tag_array2->power.readOp.longer_channel_leakage = - local_result.tag_array2->power.readOp.leakage*long_channel_device_reduction; - local_result.tag_array2->power = local_result.tag_array2->power* pppm_t; } + candidate_solutions.clear(); + } + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + + double macro_layout_overhead = g_tp.macro_layout_overhead; + double chip_PR_overhead = g_tp.chip_layout_overhead; + double total_overhead = macro_layout_overhead * chip_PR_overhead; + local_result.area *= total_overhead; + + //maintain constant power density + double pppm_t[4] = {total_overhead, 1, 1, total_overhead}; + + double sckRation = g_tp.sckt_co_eff; + local_result.power.readOp.dynamic *= sckRation; + local_result.power.writeOp.dynamic *= sckRation; + local_result.power.searchOp.dynamic *= sckRation; + local_result.power.readOp.leakage *= l_ip.nbanks; + local_result.power.readOp.longer_channel_leakage = + local_result.power.readOp.leakage * long_channel_device_reduction; + local_result.power = local_result.power * pppm_t; + + local_result.data_array2->power.readOp.dynamic *= sckRation; + local_result.data_array2->power.writeOp.dynamic *= sckRation; + local_result.data_array2->power.searchOp.dynamic *= sckRation; + local_result.data_array2->power.readOp.leakage *= l_ip.nbanks; + local_result.data_array2->power.readOp.longer_channel_leakage = + local_result.data_array2->power.readOp.leakage * + long_channel_device_reduction; + local_result.data_array2->power = local_result.data_array2->power * pppm_t; + + + if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) { + local_result.tag_array2->power.readOp.dynamic *= sckRation; + local_result.tag_array2->power.writeOp.dynamic *= sckRation; + local_result.tag_array2->power.searchOp.dynamic *= sckRation; + local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks; + local_result.tag_array2->power.readOp.longer_channel_leakage = + local_result.tag_array2->power.readOp.leakage * + long_channel_device_reduction; + local_result.tag_array2->power = + local_result.tag_array2->power * pppm_t; + } + power = local_result.power; + + output_data.peak_dynamic_power = power.readOp.dynamic * clockRate; + output_data.subthreshold_leakage_power = power.readOp.leakage; + output_data.gate_leakage_power = power.readOp.gate_leakage; } void ArrayST::leakage_feedback(double temperature) @@ -296,7 +307,6 @@ void ArrayST::leakage_feedback(double temperature) } } -ArrayST:: ~ArrayST() -{ - local_result.cleanup(); +ArrayST::~ArrayST() { + local_result.cleanup(); } diff --git a/ext/mcpat/array.h b/ext/mcpat/array.h index 8c6124d46..6a4c0b6cb 100644 --- a/ext/mcpat/array.h +++ b/ext/mcpat/array.h @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -43,59 +44,42 @@ using namespace std; -class ArrayST :public Component{ - public: - ArrayST(){}; - ArrayST(const InputParameter *configure_interface, string _name, enum Device_ty device_ty_, bool opt_local_=true, enum Core_type core_ty_=Inorder, bool _is_default=true); - - InputParameter l_ip; - string name; - enum Device_ty device_ty; - bool opt_local; - enum Core_type core_ty; - bool is_default; - uca_org_t local_result; +class ArrayST : public McPATComponent { +public: + static double area_efficiency_threshold; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; + // These are used for the CACTI interface. + static int ed; + static int delay_wt; + static int cycle_time_wt; + static int area_wt; + static int dynamic_power_wt; + static int leakage_power_wt; + static int delay_dev; + static int cycle_time_dev; + static int area_dev; + static int dynamic_power_dev; + static int leakage_power_dev; + static int cycle_time_dev_threshold; - virtual void optimize_array(); - virtual void compute_base_power(); - virtual ~ArrayST(); + InputParameter l_ip; + enum Device_ty device_ty; + bool opt_local; + enum Core_type core_ty; + bool is_default; + uca_org_t local_result; + statsDef stats_t; - void leakage_feedback(double temperature); -}; + ArrayST(XMLNode* _xml_data, const InputParameter *configure_interface, + string _name, enum Device_ty device_ty_, double _clockRate = 0.0f, + bool opt_local_ = true, + enum Core_type core_ty_ = Inorder, bool _is_default = true); + void computeArea(); + void computeEnergy(); + void compute_base_power(); + ~ArrayST(); -class InstCache :public Component{ -public: - ArrayST* caches; - ArrayST* missb; - ArrayST* ifb; - ArrayST* prefetchb; - powerDef power_t;//temp value holder for both (max) power and runtime power - InstCache(){caches=0;missb=0;ifb=0;prefetchb=0;}; - ~InstCache(){ - if (caches) {//caches->local_result.cleanup(); - delete caches; caches=0;} - if (missb) {//missb->local_result.cleanup(); - delete missb; missb=0;} - if (ifb) {//ifb->local_result.cleanup(); - delete ifb; ifb=0;} - if (prefetchb) {//prefetchb->local_result.cleanup(); - delete prefetchb; prefetchb=0;} - }; -}; - -class DataCache :public InstCache{ -public: - ArrayST* wbb; - DataCache(){wbb=0;}; - ~DataCache(){ - if (wbb) {//wbb->local_result.cleanup(); - delete wbb; wbb=0;} - }; + void leakage_feedback(double temperature); }; -#endif /* TLB_H_ */ +#endif /* ARRAY_H_ */ diff --git a/ext/mcpat/basic_components.cc b/ext/mcpat/basic_components.cc index f288d7479..3835460f3 100644 --- a/ext/mcpat/basic_components.cc +++ b/ext/mcpat/basic_components.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -34,94 +35,327 @@ #include #include "basic_components.h" +#include "cacheunit.h" +#include "common.h" -double longer_channel_device_reduction( - enum Device_ty device_ty, - enum Core_type core_ty) -{ +// Turn this to true to get debugging messages +bool McPATComponent::debug = false; - double longer_channel_device_percentage_core; - double longer_channel_device_percentage_uncore; - double longer_channel_device_percentage_llc; +bool McPATComponent::opt_for_clk = true; +int McPATComponent::longer_channel_device = 0; +// Number of cycles per second, 2GHz = 2e9 +double McPATComponent::target_core_clockrate = 2e9; +double McPATComponent::total_cycles = 0.0f; +double McPATComponent::execution_time = 0.0f; +int McPATComponent::physical_address_width = 0; +int McPATComponent::virtual_address_width = 0; +int McPATComponent::virtual_memory_page_size = 0; +int McPATComponent::data_path_width = 0; - double long_channel_device_reduction; +void McPATOutput::reset() { + storage = 0.0; + area = 0.0; + peak_dynamic_power = 0.0; + subthreshold_leakage_power = 0.0; + gate_leakage_power = 0.0; + runtime_dynamic_energy = 0.0; +} - longer_channel_device_percentage_llc = 1.0; - longer_channel_device_percentage_uncore = 0.82; - if (core_ty==OOO) - { - longer_channel_device_percentage_core = 0.56;//0.54 Xeon Tulsa //0.58 Nehelam - //longer_channel_device_percentage_uncore = 0.76;//0.85 Nehelam +McPATOutput operator+(const McPATOutput &lhs, const McPATOutput &rhs) { + McPATOutput to_return; + to_return.storage = lhs.storage + rhs.storage; + to_return.area = lhs.area + rhs.area; + to_return.peak_dynamic_power = lhs.peak_dynamic_power + + rhs.peak_dynamic_power; + to_return.subthreshold_leakage_power = lhs.subthreshold_leakage_power + + rhs.subthreshold_leakage_power; + to_return.gate_leakage_power = lhs.gate_leakage_power + + rhs.gate_leakage_power; + to_return.runtime_dynamic_energy = lhs.runtime_dynamic_energy + + rhs.runtime_dynamic_energy; + return to_return; +} + +void McPATOutput::operator+=(const McPATOutput &rhs) { + storage += rhs.storage; + area += rhs.area; + peak_dynamic_power += rhs.peak_dynamic_power; + subthreshold_leakage_power += rhs.subthreshold_leakage_power; + gate_leakage_power += rhs.gate_leakage_power; + runtime_dynamic_energy += rhs.runtime_dynamic_energy; +} + +McPATComponent::McPATComponent() + : xml_data(NULL), name("") { +} + +McPATComponent::McPATComponent(XMLNode* _xml_data) + : xml_data(_xml_data), name("") { +} + +McPATComponent::McPATComponent(XMLNode* _xml_data, + InputParameter* _interface_ip) + : xml_data(_xml_data), interface_ip(*_interface_ip), name("") { +} + +McPATComponent::~McPATComponent() { +} + +void McPATComponent::recursiveInstantiate() { + if (debug) { + fprintf(stderr, "WARNING: Called recursiveInstantiate from %s, with ", + "'type' %s\n", name.c_str(), xml_data->getAttribute("type")); + } + int i; + int numChildren = xml_data->nChildNode("component"); + for (i = 0; i < numChildren; i++ ) { + // For each child node of the system, + XMLNode* childXML = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = childXML->getAttribute("type"); + + if (!type) + warnMissingComponentType(childXML->getAttribute("id")); + + STRCMP(type, "Core") + warnIncompleteComponentType(type); + STRCMP(type, "CacheUnit") + children.push_back(new CacheUnit(childXML, &interface_ip)); + STRCMP(type, "CacheController") + warnIncompleteComponentType(type); + STRCMP(type, "MemoryController") + warnIncompleteComponentType(type); + STRCMP(type, "Memory") + warnIncompleteComponentType(type); + STRCMP(type, "OnChipNetwork") + warnIncompleteComponentType(type); + STRCMP(type, "BusInterconnect") + warnIncompleteComponentType(type); + STRCMP(type, "Directory") + warnIncompleteComponentType(type); - } - else - { - longer_channel_device_percentage_core = 0.8;//0.8;//Niagara - //longer_channel_device_percentage_uncore = 0.9;//Niagara - } - - if (device_ty==Core_device) - { - long_channel_device_reduction = (1- longer_channel_device_percentage_core) - + longer_channel_device_percentage_core * g_tp.peri_global.long_channel_leakage_reduction; - } - else if (device_ty==Uncore_device) - { - long_channel_device_reduction = (1- longer_channel_device_percentage_uncore) - + longer_channel_device_percentage_uncore * g_tp.peri_global.long_channel_leakage_reduction; - } - else if (device_ty==LLC_device) - { - long_channel_device_reduction = (1- longer_channel_device_percentage_llc) - + longer_channel_device_percentage_llc * g_tp.peri_global.long_channel_leakage_reduction; - } else - { - cout<<"unknown device category"<getAttribute("type")); + } + + // TODO: This calculation is incorrect and is overwritten by computeEnergy + // Fix it up so that the values are available at the correct times + int i; + int numChildren = children.size(); + area.set_area(0.0); + output_data.area = 0.0; + for (i = 0; i < numChildren; i++) { + children[i]->computeArea(); + output_data.area += area.get_area(); + } +} + +void McPATComponent::computeEnergy() { + if (debug) { + fprintf(stderr, "WARNING: Called computeEnergy from %s, with 'type' ", + "%s\n", name.c_str(), xml_data->getAttribute("type")); + } + + power.reset(); + rt_power.reset(); + memset(&output_data, 0, sizeof(McPATOutput)); + int i; + int numChildren = children.size(); + for (i = 0; i < numChildren; i++) { + children[i]->computeEnergy(); + output_data += children[i]->output_data; + } +} + +void McPATComponent::displayData(uint32_t indent, int plevel) { + if (debug) { + fprintf(stderr, "WARNING: Called displayData from %s, with 'type' ", + "%s\n", name.c_str(), xml_data->getAttribute("type")); + } + + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + + double leakage_power = output_data.subthreshold_leakage_power + + output_data.gate_leakage_power; + double total_runtime_energy = output_data.runtime_dynamic_energy + + leakage_power * execution_time; + cout << indent_str << name << ":" << endl; + cout << indent_str_next << "Area = " << output_data.area << " mm^2" + << endl; + cout << indent_str_next << "Peak Dynamic Power = " + << output_data.peak_dynamic_power << " W" << endl; + cout << indent_str_next << "Subthreshold Leakage Power = " + << output_data.subthreshold_leakage_power << " W" << endl; + cout << indent_str_next << "Gate Leakage Power = " + << output_data.gate_leakage_power << " W" << endl; + cout << indent_str_next << "Runtime Dynamic Power = " + << (output_data.runtime_dynamic_energy / execution_time) << " W" + << endl; + cout << indent_str_next << "Runtime Dynamic Energy = " + << output_data.runtime_dynamic_energy << " J" << endl; + cout << indent_str_next << "Total Runtime Energy = " + << total_runtime_energy << " J" << endl; + cout << endl; + + // Recursively print children + int i; + int numChildren = children.size(); + for (i = 0; i < numChildren; i++) { + children[i]->displayData(indent + 4, plevel); + } +} + +void McPATComponent::errorUnspecifiedParam(string param) { + fprintf(stderr, "ERROR: Parameter must be specified in %s: %s\n", + name.c_str(), param.c_str()); + exit(1); +} + +void McPATComponent::errorNonPositiveParam(string param) { + fprintf(stderr, "ERROR: Parameter must be positive in %s: %s\n", + name.c_str(), param.c_str()); + exit(1); +} + +void McPATComponent::warnUnrecognizedComponent(XMLCSTR component) { + fprintf(stderr, "WARNING: Component type not recognized in %s: %s\n", + name.c_str(), component); +} + +void McPATComponent::warnUnrecognizedParam(XMLCSTR param) { + fprintf(stderr, "WARNING: Parameter not recognized in %s: %s\n", + name.c_str(), param); +} + +void McPATComponent::warnUnrecognizedStat(XMLCSTR stat) { + fprintf(stderr, "WARNING: Statistic not recognized in %s: %s\n", + name.c_str(), stat); +} + +void McPATComponent::warnIncompleteComponentType(XMLCSTR type) { + fprintf(stderr, " WARNING: %s handling not yet complete\n", type); +} + +void McPATComponent::warnMissingComponentType(XMLCSTR id) { + if (id) { + fprintf(stderr, + "WARNING: Ignoring a component due to the missing type: %s\n", + id); + } else { + fprintf(stderr, + "WARNING: Ignoring a component in %s due to the missing type\n", + name.c_str()); + } +} + +void McPATComponent::warnMissingParamName(XMLCSTR id) { + if (id) { + fprintf(stderr, + "WARNING: Ignoring a parameter due to the missing name: %s\n", + id); + } else { + fprintf(stderr, + "WARNING: Ignoring a parameter in %s due to the missing name\n", + name.c_str()); + } +} + +void McPATComponent::warnMissingStatName(XMLCSTR id) { + if (id) { + fprintf(stderr, + "WARNING: Ignoring a statistic due to the missing name: %s\n", + id); + } else { + fprintf(stderr, + "WARNING: Ignoring a statistic in %s due to the missing name\n", + name.c_str()); + } +} + +double longer_channel_device_reduction( + enum Device_ty device_ty, + enum Core_type core_ty) { + + double longer_channel_device_percentage_core; + double longer_channel_device_percentage_uncore; + double longer_channel_device_percentage_llc; + + double long_channel_device_reduction; + + longer_channel_device_percentage_llc = 1.0; + longer_channel_device_percentage_uncore = 0.82; + if (core_ty == OOO) { + //0.54 Xeon Tulsa //0.58 Nehelam + longer_channel_device_percentage_core = 0.56; + } else { + //0.8;//Niagara + longer_channel_device_percentage_core = 0.8; + } + + if (device_ty == Core_device) { + long_channel_device_reduction = + (1 - longer_channel_device_percentage_core) + + longer_channel_device_percentage_core * + g_tp.peri_global.long_channel_leakage_reduction; + } else if (device_ty == Uncore_device) { + long_channel_device_reduction = + (1 - longer_channel_device_percentage_uncore) + + longer_channel_device_percentage_uncore * + g_tp.peri_global.long_channel_leakage_reduction; + } else if (device_ty == LLC_device) { + long_channel_device_reduction = + (1 - longer_channel_device_percentage_llc) + + longer_channel_device_percentage_llc * + g_tp.peri_global.long_channel_leakage_reduction; + } else { + cout << "ERROR: Unknown device category: " << device_ty << endl; + exit(0); + } - return long_channel_device_reduction; + return long_channel_device_reduction; } -statsComponents operator+(const statsComponents & x, const statsComponents & y) -{ - statsComponents z; +statsComponents operator+(const statsComponents & x, const statsComponents & y) { + statsComponents z; - z.access = x.access + y.access; - z.hit = x.hit + y.hit; - z.miss = x.miss + y.miss; + z.access = x.access + y.access; + z.hit = x.hit + y.hit; + z.miss = x.miss + y.miss; - return z; + return z; } -statsComponents operator*(const statsComponents & x, double const * const y) -{ - statsComponents z; +statsComponents operator*(const statsComponents & x, double const * const y) { + statsComponents z; - z.access = x.access*y[0]; - z.hit = x.hit*y[1]; - z.miss = x.miss*y[2]; + z.access = x.access * y[0]; + z.hit = x.hit * y[1]; + z.miss = x.miss * y[2]; - return z; + return z; } -statsDef operator+(const statsDef & x, const statsDef & y) -{ - statsDef z; +statsDef operator+(const statsDef & x, const statsDef & y) { + statsDef z; - z.readAc = x.readAc + y.readAc; - z.writeAc = x.writeAc + y.writeAc; - z.searchAc = x.searchAc + y.searchAc; - return z; + z.readAc = x.readAc + y.readAc; + z.writeAc = x.writeAc + y.writeAc; + z.searchAc = x.searchAc + y.searchAc; + return z; } -statsDef operator*(const statsDef & x, double const * const y) -{ - statsDef z; +statsDef operator*(const statsDef & x, double const * const y) { + statsDef z; - z.readAc = x.readAc*y; - z.writeAc = x.writeAc*y; - z.searchAc = x.searchAc*y; - return z; + z.readAc = x.readAc * y; + z.writeAc = x.writeAc * y; + z.searchAc = x.searchAc * y; + return z; } diff --git a/ext/mcpat/basic_components.h b/ext/mcpat/basic_components.h index ce3e639cd..ea07d2779 100644 --- a/ext/mcpat/basic_components.h +++ b/ext/mcpat/basic_components.h @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -34,9 +35,15 @@ #include -#include "XML_Parse.h" +#include "component.h" #include "parameter.h" +#include "xmlParser.h" +/** + * TODO: Since revisions to McPAT aim to make the component hierarchy more + * modular, many of the parameter and statistics classes/structs included in + * this file should be moved to the files for their respective components. + */ const double cdb_overhead = 1.1; enum FU_type { @@ -46,21 +53,28 @@ enum FU_type { }; enum Core_type { - OOO, - Inorder + OOO, + Inorder }; enum Renaming_type { RAMbased, - CAMbased + CAMbased }; enum Scheduler_type { PhysicalRegFile, - ReservationStation + ReservationStation }; -enum cache_level { +enum Cache_type { + DATA_CACHE, + INSTRUCTION_CACHE, + MIXED +}; + +enum CacheLevel { + L1, L2, L3, L1Directory, @@ -68,198 +82,408 @@ enum cache_level { }; enum MemoryCtrl_type { - MC, //memory controller - FLASHC //flash controller + MC, //memory controller + FLASHC //flash controller }; enum Dir_type { - ST,//shadowed tag - DC,//directory cache - SBT,//static bank tag - NonDir + ST,//shadowed tag + DC,//directory cache + SBT,//static bank tag + NonDir }; enum Cache_policy { - Write_through, - Write_back + Write_through, + Write_back }; enum Device_ty { - Core_device, - Uncore_device, - LLC_device + Core_device, + Uncore_device, + LLC_device }; -class statsComponents -{ - public: +enum Access_mode { + Normal, + Sequential, + Fast +}; + +class statsComponents { +public: double access; double hit; double miss; statsComponents() : access(0), hit(0), miss(0) {} - statsComponents(const statsComponents & obj) { *this = obj; } - statsComponents & operator=(const statsComponents & rhs) - { - access = rhs.access; - hit = rhs.hit; - miss = rhs.miss; - return *this; + statsComponents(const statsComponents & obj) { + *this = obj; + } + statsComponents & operator=(const statsComponents & rhs) { + access = rhs.access; + hit = rhs.hit; + miss = rhs.miss; + return *this; + } + void reset() { + access = 0; + hit = 0; + miss = 0; } - void reset() { access = 0; hit = 0; miss = 0;} - friend statsComponents operator+(const statsComponents & x, const statsComponents & y); - friend statsComponents operator*(const statsComponents & x, double const * const y); + friend statsComponents operator+(const statsComponents & x, + const statsComponents & y); + friend statsComponents operator*(const statsComponents & x, + double const * const y); }; -class statsDef -{ - public: +class statsDef { +public: statsComponents readAc; statsComponents writeAc; statsComponents searchAc; - - statsDef() : readAc(), writeAc(),searchAc() { } - void reset() { readAc.reset(); writeAc.reset();searchAc.reset();} + statsComponents dataReadAc; + statsComponents dataWriteAc; + statsComponents tagReadAc; + statsComponents tagWriteAc; + + statsDef() : readAc(), writeAc(), searchAc() { } + void reset() { + readAc.reset(); + writeAc.reset(); + searchAc.reset(); + } friend statsDef operator+(const statsDef & x, const statsDef & y); friend statsDef operator*(const statsDef & x, double const * const y); }; +/** + * An object to store the computed data that will be output from McPAT on a + * per-component-instance basis. Currently, this includes the amount of storage + * that the component comprises, its chip area, and power and energy + * calculations. + */ +class McPATOutput { +public: + // Storage is in bytes (B) + double storage; + // Area is in mm^2 + double area; + // Peak Dynamic Power is in W + double peak_dynamic_power; + // Subthreshold Leakage Power is in W + double subthreshold_leakage_power; + // Gate Leakage Power is in W + double gate_leakage_power; + // Runtime Dynamic Energy is in J + double runtime_dynamic_energy; + + void reset(); + + friend McPATOutput operator+(const McPATOutput &lhs, const McPATOutput &rhs); + void operator+=(const McPATOutput &rhs); +}; + +/** + * A McPATComponent encompasses all the parts that are common to any component + * for which McPAT may compute and print power, area, and timing data. It + * includes a pointer to the XML data from which the component gathers its + * input parameters, it stores the variables that are commonly used in all + * components, and it maintains the hierarchical structure to recursively + * compute and print output. This is a base class from which all components + * should inherit these functionality (possibly through other descended + * classes. +*/ +class McPATComponent : public Component { +public: + static bool debug; + + // Variables shared across the system by all McPATComponents + static bool opt_for_clk; + static int longer_channel_device; + static double execution_time; + static int physical_address_width; + static int virtual_address_width; + static int virtual_memory_page_size; + static int data_path_width; + + // Although these two variables are static right now, they need to be + // modulated on a per-frequency-domain basis eventually. + static double target_core_clockrate; + static double total_cycles; + + XMLNode* xml_data; + InputParameter interface_ip; + string name; + // Number of cycles per second (consider changing name) + double clockRate; + vector children; + // The data structure that is printed in displayData + McPATOutput output_data; + // Set this to contain the stats to calculate peak dynamic power + statsDef tdp_stats; + // Set this to contain the stats to calculate runtime dynamic energy/power + statsDef rtp_stats; + // Holds the peak dynamic power calculation + powerDef power_t; + // Holds the runtime dynamic power calculation + powerDef rt_power; + + McPATComponent(); + // Which of these is a better way of doing things?! + McPATComponent(XMLNode* _xml_data); + McPATComponent(XMLNode* _xml_data, InputParameter* _interface_ip); + virtual void recursiveInstantiate(); + virtual void computeArea(); + // This function should probably be pure virtual, but it's too early in + // the modifying process to know for sure. Note that each component has + // to calculate it's own power consumption + virtual void computeEnergy(); + virtual void displayData(uint32_t indent, int plevel); + ~McPATComponent(); + + protected: + void errorUnspecifiedParam(string param); + void errorNonPositiveParam(string param); + void warnUnrecognizedComponent(XMLCSTR component); + void warnUnrecognizedParam(XMLCSTR param); + void warnUnrecognizedStat(XMLCSTR stat); + void warnIncompleteComponentType(XMLCSTR type); + void warnMissingComponentType(XMLCSTR id); + void warnMissingParamName(XMLCSTR id); + void warnMissingStatName(XMLCSTR id); +}; + double longer_channel_device_reduction( - enum Device_ty device_ty=Core_device, - enum Core_type core_ty=Inorder); + enum Device_ty device_ty = Core_device, + enum Core_type core_ty = Inorder); -class CoreDynParam { +class CoreParameters { public: - CoreDynParam(){}; - CoreDynParam(ParseXML *XML_interface, int ithCore_); - // :XML(XML_interface), - // ithCore(ithCore_) - // core_ty(inorder), - // rm_ty(CAMbased), - // scheu_ty(PhysicalRegFile), - // clockRate(1e9),//1GHz - // arch_ireg_width(32), - // arch_freg_width(32), - // phy_ireg_width(128), - // phy_freg_width(128), - // perThreadState(8), - // globalCheckpoint(32), - // instructionLength(32){}; - //ParseXML * XML; - bool opt_local; - bool x86; - bool Embedded; - enum Core_type core_ty; - enum Renaming_type rm_ty; + bool opt_local; + bool x86; + bool Embedded; + enum Core_type core_ty; + enum Renaming_type rm_ty; enum Scheduler_type scheu_ty; - double clockRate,executionTime; - int arch_ireg_width, arch_freg_width, phy_ireg_width, phy_freg_width; - int num_IRF_entry, num_FRF_entry, num_ifreelist_entries, num_ffreelist_entries; - int fetchW, decodeW,issueW,peak_issueW, commitW,peak_commitW, predictionW, fp_issueW, fp_decodeW; - int perThreadState, globalCheckpoint, instruction_length, pc_width, opcode_length, micro_opcode_length; - int num_hthreads, pipeline_stages, fp_pipeline_stages, num_pipelines, num_fp_pipelines; - int num_alus, num_muls; + double clockRate; + int arch_ireg_width; + int arch_freg_width; + int archi_Regs_IRF_size; + int archi_Regs_FRF_size; + int phy_ireg_width; + int phy_freg_width; + int num_IRF_entry; + int num_FRF_entry; + int num_ifreelist_entries; + int num_ffreelist_entries; + int fetchW; + int decodeW; + int issueW; + int peak_issueW; + int commitW; + int peak_commitW; + int predictionW; + int fp_issueW; + int fp_decodeW; + int perThreadState; + int globalCheckpoint; + int instruction_length; + int pc_width; + int opcode_width; + int micro_opcode_length; + int num_hthreads; + int pipeline_stages; + int fp_pipeline_stages; + int num_pipelines; + int num_fp_pipelines; + int num_alus; + int num_muls; double num_fpus; - int int_data_width, fp_data_width,v_address_width, p_address_width; - double pipeline_duty_cycle, total_cycles, busy_cycles, idle_cycles; - bool regWindowing,multithreaded; + int int_data_width; + int fp_data_width; + int v_address_width; + int p_address_width; + bool regWindowing; + bool multithreaded; double pppm_lkg_multhread[4]; - double IFU_duty_cycle,BR_duty_cycle,LSU_duty_cycle,MemManU_I_duty_cycle, - MemManU_D_duty_cycle, ALU_duty_cycle,MUL_duty_cycle, - FPU_duty_cycle, ALU_cdb_duty_cycle,MUL_cdb_duty_cycle, - FPU_cdb_duty_cycle; - ~CoreDynParam(){}; + int ROB_size; + int ROB_assoc; + int ROB_nbanks; + int ROB_tag_width; + int scheduler_assoc; + int scheduler_nbanks; + int register_window_size; + double register_window_throughput; + double register_window_latency; + int register_window_assoc; + int register_window_nbanks; + int register_window_tag_width; + int register_window_rw_ports; + int phy_Regs_IRF_size; + int phy_Regs_IRF_assoc; + int phy_Regs_IRF_nbanks; + int phy_Regs_IRF_tag_width; + int phy_Regs_IRF_rd_ports; + int phy_Regs_IRF_wr_ports; + int phy_Regs_FRF_size; + int phy_Regs_FRF_assoc; + int phy_Regs_FRF_nbanks; + int phy_Regs_FRF_tag_width; + int phy_Regs_FRF_rd_ports; + int phy_Regs_FRF_wr_ports; + int front_rat_nbanks; + int front_rat_rw_ports; + int retire_rat_nbanks; + int retire_rat_rw_ports; + int freelist_nbanks; + int freelist_rw_ports; + int memory_ports; + int load_buffer_size; + int load_buffer_assoc; + int load_buffer_nbanks; + int store_buffer_size; + int store_buffer_assoc; + int store_buffer_nbanks; + int instruction_window_size; + int fp_instruction_window_size; + int instruction_buffer_size; + int instruction_buffer_assoc; + int instruction_buffer_nbanks; + int instruction_buffer_tag_width; + int number_instruction_fetch_ports; + int RAS_size; + int execu_int_bypass_ports; + int execu_mul_bypass_ports; + int execu_fp_bypass_ports; + Wire_type execu_bypass_wire_type; + Wire_type execu_broadcast_wt; + int execu_wire_mat_type; + double execu_bypass_base_width; + double execu_bypass_base_height; + int execu_bypass_start_wiring_level; + double execu_bypass_route_over_perc; + double broadcast_numerator; }; -class CacheDynParam { +class CoreStatistics { public: - CacheDynParam(){}; - CacheDynParam(ParseXML *XML_interface, int ithCache_); - string name; - enum Dir_type dir_ty; - double clockRate,executionTime; - double capacity, blockW, assoc, nbanks; - double throughput, latency; - double duty_cycle, dir_duty_cycle; - //double duty_cycle; - int missb_size, fu_size, prefetchb_size, wbb_size; - ~CacheDynParam(){}; + double pipeline_duty_cycle; + double total_cycles; + double busy_cycles; + double idle_cycles; + double IFU_duty_cycle; + double BR_duty_cycle; + double LSU_duty_cycle; + double MemManU_I_duty_cycle; + double MemManU_D_duty_cycle; + double ALU_duty_cycle; + double MUL_duty_cycle; + double FPU_duty_cycle; + double ALU_cdb_duty_cycle; + double MUL_cdb_duty_cycle; + double FPU_cdb_duty_cycle; + double ROB_reads; + double ROB_writes; + double total_instructions; + double int_instructions; + double fp_instructions; + double branch_instructions; + double branch_mispredictions; + double load_instructions; + double store_instructions; + double committed_instructions; + double committed_int_instructions; + double committed_fp_instructions; + double rename_reads; + double rename_writes; + double fp_rename_reads; + double fp_rename_writes; + double inst_window_reads; + double inst_window_writes; + double inst_window_wakeup_accesses; + double fp_inst_window_reads; + double fp_inst_window_writes; + double fp_inst_window_wakeup_accesses; + double int_regfile_reads; + double float_regfile_reads; + double int_regfile_writes; + double float_regfile_writes; + double context_switches; + double ialu_accesses; + double fpu_accesses; + double mul_accesses; + double cdb_alu_accesses; + double cdb_fpu_accesses; + double cdb_mul_accesses; + double function_calls; }; -class MCParam { +class MCParameters { public: - MCParam(){}; - MCParam(ParseXML *XML_interface, int ithCache_); - string name; - double clockRate,num_mcs, peakDataTransferRate, num_channels; - // double mcTEPowerperGhz; - // double mcPHYperGbit; - // double area; - int llcBlockSize, dataBusWidth, addressBusWidth; - int opcodeW; - int memAccesses; - int memRank; - int type; - double frontend_duty_cycle, duty_cycle, perc_load; - double executionTime, reads, writes; - bool LVDS, withPHY; - - ~MCParam(){}; + double clockRate; + enum MemoryCtrl_type mc_type; + double num_mcs; + int num_channels; + int llcBlockSize; + int dataBusWidth; + int databus_width; + int llc_line_length; + int req_window_size_per_channel; + int IO_buffer_size_per_channel; + int addressbus_width; + int opcodeW; + int type; + bool LVDS; + bool withPHY; + int peak_transfer_rate; + int number_ranks; + int reorder_buffer_assoc; + int reorder_buffer_nbanks; + int read_buffer_assoc; + int read_buffer_nbanks; + int read_buffer_tag_width; + int write_buffer_assoc; + int write_buffer_nbanks; + int write_buffer_tag_width; }; -class NoCParam { +class MCStatistics { public: - NoCParam(){}; - NoCParam(ParseXML *XML_interface, int ithCache_); - string name; - double clockRate; - int flit_size; - int input_ports, output_ports, min_ports, global_linked_ports; - int virtual_channel_per_port,input_buffer_entries_per_vc; - int horizontal_nodes,vertical_nodes, total_nodes; - double executionTime, total_access, link_throughput,link_latency, - duty_cycle, chip_coverage, route_over_perc; - bool has_global_link, type; - - ~NoCParam(){}; + double duty_cycle; + double perc_load; + double reads; + double writes; }; -class ProcParam { -public: - ProcParam(){}; - ProcParam(ParseXML *XML_interface, int ithCache_); - string name; - int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir,numMC, numMCChannel; - bool homoCore, homoL2, homoL3, homoNOC, homoL1Dir, homoL2Dir; +class NIUParameters { + public: + double clockRate; + int num_units; + int type; +}; - ~ProcParam(){}; +class NIUStatistics { + public: + double duty_cycle; + double perc_load; }; -class NIUParam { -public: - NIUParam(){}; - NIUParam(ParseXML *XML_interface, int ithCache_); - string name; - double clockRate; - int num_units; - int type; - double duty_cycle, perc_load; - ~NIUParam(){}; +class PCIeParameters { + public: + double clockRate; + int num_channels; + int num_units; + bool withPHY; + int type; }; -class PCIeParam { -public: - PCIeParam(){}; - PCIeParam(ParseXML *XML_interface, int ithCache_); - string name; - double clockRate; - int num_channels, num_units; - bool withPHY; - int type; - double duty_cycle, perc_load; - ~PCIeParam(){}; +class PCIeStatistics { + public: + double duty_cycle; + double perc_load; }; #endif /* BASIC_COMPONENTS_H_ */ diff --git a/ext/mcpat/bus_interconnect.cc b/ext/mcpat/bus_interconnect.cc new file mode 100644 index 000000000..1dee2c338 --- /dev/null +++ b/ext/mcpat/bus_interconnect.cc @@ -0,0 +1,179 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Author: Joel Hestness + * + ***************************************************************************/ + +#include +#include +#include +#include +#include + +#include "basic_circuit.h" +#include "bus_interconnect.h" +#include "common.h" +#include "const.h" +#include "io.h" +#include "parameter.h" + +BusInterconnect::BusInterconnect(XMLNode* _xml_data, + InputParameter* interface_ip_) + : McPATComponent(_xml_data), link_bus(NULL), interface_ip(*interface_ip_) { + name = "Bus Interconnect"; + set_param_stats(); + local_result = init_interface(&interface_ip, name); + scktRatio = g_tp.sckt_co_eff; + + interface_ip.throughput = bus_params.link_throughput / bus_params.clockRate; + interface_ip.latency = bus_params.link_latency / bus_params.clockRate; + + link_len /= bus_params.total_nodes; + if (bus_params.total_nodes > 1) { + //All links are shared by neighbors + link_len /= 2; + } + + link_bus = new Interconnect(xml_data, "Link", Uncore_device, + bus_params.link_base_width, + bus_params.link_base_height, + bus_params.flit_size, link_len, &interface_ip, + bus_params.link_start_wiring_level, + bus_params.clockRate, + bus_params.pipelinable, + bus_params.route_over_perc); + children.push_back(link_bus); +} + +void BusInterconnect::computeEnergy() { + // Initialize stats for TDP + tdp_stats.reset(); + tdp_stats.readAc.access = bus_stats.duty_cycle; + link_bus->int_params.active_ports = bus_params.min_ports - 1; + link_bus->int_stats.duty_cycle = + bus_params.M_traffic_pattern * bus_stats.duty_cycle; + + // Initialize stats for runtime energy and power + rtp_stats.reset(); + rtp_stats.readAc.access = bus_stats.total_access; + link_bus->int_stats.accesses = bus_stats.total_access; + + // Recursively compute energy + McPATComponent::computeEnergy(); +} + +void BusInterconnect::set_param_stats() { + memset(&bus_params, 0, sizeof(BusInterconnectParameters)); + + int num_children = xml_data->nChildNode("param"); + int i; + int mat_type; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_FP_IF("clockrate", bus_params.clockRate); + ASSIGN_INT_IF("flit_bits", bus_params.flit_size); + ASSIGN_FP_IF("link_throughput", bus_params.link_throughput); + ASSIGN_FP_IF("link_latency", bus_params.link_latency); + ASSIGN_INT_IF("total_nodes", bus_params.total_nodes); + ASSIGN_INT_IF("input_ports", bus_params.input_ports); + ASSIGN_INT_IF("output_ports", bus_params.output_ports); + ASSIGN_INT_IF("global_linked_ports", bus_params.global_linked_ports); + ASSIGN_FP_IF("chip_coverage", bus_params.chip_coverage); + ASSIGN_INT_IF("pipelinable", bus_params.pipelinable); + ASSIGN_FP_IF("link_routing_over_percentage", + bus_params.route_over_perc); + ASSIGN_INT_IF("virtual_channel_per_port", + bus_params.virtual_channel_per_port); + ASSIGN_FP_IF("M_traffic_pattern", bus_params.M_traffic_pattern); + ASSIGN_FP_IF("link_len", link_len); + ASSIGN_FP_IF("link_base_width", bus_params.link_base_width); + ASSIGN_FP_IF("link_base_height", bus_params.link_base_height); + ASSIGN_FP_IF("link_start_wiring_level", + bus_params.link_start_wiring_level); + ASSIGN_INT_IF("wire_mat_type", mat_type); + ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type); + + else { + warnUnrecognizedParam(node_name); + } + } + + // Change from MHz to Hz + bus_params.clockRate *= 1e6; + + interface_ip.wire_is_mat_type = mat_type; + interface_ip.wire_os_mat_type = mat_type; + + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("duty_cycle", bus_stats.duty_cycle); + ASSIGN_FP_IF("total_accesses", bus_stats.total_access); + + else { + warnUnrecognizedStat(node_name); + } + } + + clockRate = bus_params.clockRate; + bus_params.min_ports = + min(bus_params.input_ports, bus_params.output_ports); + + assert(bus_params.chip_coverage <= 1); + assert(bus_params.route_over_perc <= 1); + assert(link_len > 0); +} + +void +BusInterconnect::set_duty_cycle(double duty_cycle) { + bus_stats.duty_cycle = duty_cycle; +} + +void +BusInterconnect::set_number_of_accesses(double total_accesses) { + bus_stats.total_access = total_accesses; +} + +BusInterconnect::~BusInterconnect() { + delete link_bus; + link_bus = NULL; +} diff --git a/ext/mcpat/bus_interconnect.h b/ext/mcpat/bus_interconnect.h new file mode 100644 index 000000000..5c8b00420 --- /dev/null +++ b/ext/mcpat/bus_interconnect.h @@ -0,0 +1,95 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Author: Joel Hestness + * + ***************************************************************************/ + +#ifndef BUS_INTERCONNECT_H_ +#define BUS_INTERCONNECT_H_ + +#include "array.h" +#include "basic_components.h" +#include "interconnect.h" +#include "logic.h" +#include "parameter.h" + +class BusInterconnectParameters { +public: + double clockRate; + int flit_size; + int input_ports; + int output_ports; + int min_ports; + int global_linked_ports; + int virtual_channel_per_port; + int input_buffer_entries_per_vc; + int total_nodes; + double link_throughput; + double link_latency; + double chip_coverage; + bool pipelinable; + double route_over_perc; + bool has_global_link; + bool type; + double M_traffic_pattern; + double link_base_width; + double link_base_height; + int link_start_wiring_level; +}; + +class BusInterconnectStatistics { +public: + double duty_cycle; + double total_access; +}; + +class BusInterconnect : public McPATComponent { +public: + Interconnect* link_bus; + + int ithNoC; + InputParameter interface_ip; + double link_len; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + BusInterconnectParameters bus_params; + BusInterconnectStatistics bus_stats; + uca_org_t local_result; + statsDef stats_t; + double M_traffic_pattern; + + BusInterconnect(XMLNode* _xml_data, InputParameter* interface_ip_); + void set_param_stats(); + void set_duty_cycle(double duty_cycle); + void set_number_of_accesses(double total_accesses); + void computeEnergy(); + ~BusInterconnect(); +}; + +#endif /* BUS_INTERCONNECT_H_ */ diff --git a/ext/mcpat/cachearray.cc b/ext/mcpat/cachearray.cc new file mode 100644 index 000000000..cebea289e --- /dev/null +++ b/ext/mcpat/cachearray.cc @@ -0,0 +1,321 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Joel Hestness + * Yasuko Eckert + * + ***************************************************************************/ + +#include +#include + +#include "area.h" +#include "cachearray.h" +#include "common.h" +#include "decoder.h" +#include "parameter.h" + +using namespace std; + +double CacheArray::area_efficiency_threshold = 20.0; +int CacheArray::ed = 0; +//Fixed number, make sure timing can be satisfied. +int CacheArray::delay_wt = 100; +int CacheArray::cycle_time_wt = 1000; +//Fixed number, This is used to exhaustive search for individual components. +int CacheArray::area_wt = 10; +//Fixed number, This is used to exhaustive search for individual components. +int CacheArray::dynamic_power_wt = 10; +int CacheArray::leakage_power_wt = 10; +//Fixed number, make sure timing can be satisfied. +int CacheArray::delay_dev = 1000000; +int CacheArray::cycle_time_dev = 100; +//Fixed number, This is used to exhaustive search for individual components. +int CacheArray::area_dev = 1000000; +//Fixed number, This is used to exhaustive search for individual components. +int CacheArray::dynamic_power_dev = 1000000; +int CacheArray::leakage_power_dev = 1000000; +int CacheArray::cycle_time_dev_threshold = 10; + +CacheArray::CacheArray(XMLNode* _xml_data, + const InputParameter *configure_interface, string _name, + enum Device_ty device_ty_, double _clockRate, + bool opt_local_, enum Core_type core_ty_, bool _is_default) + : McPATComponent(_xml_data), l_ip(*configure_interface), + device_ty(device_ty_), opt_local(opt_local_), core_ty(core_ty_), + is_default(_is_default), sbt_dir_overhead(0) { + name = _name; + clockRate = _clockRate; + if (l_ip.cache_sz < MIN_BUFFER_SIZE) { + l_ip.cache_sz = MIN_BUFFER_SIZE; + } + + if (!l_ip.error_checking(name)) { + exit(1); + } + + sbt_tdp_stats.reset(); + sbt_rtp_stats.reset(); + + // Compute initial search point + local_result.valid = false; + compute_base_power(); + + // Set up the cache by searching design space with cacti + list candidate_solutions(0); + list::iterator candidate_iter, min_dynamic_energy_iter; + uca_org_t* temp_res = NULL; + double throughput = l_ip.throughput; + double latency = l_ip.latency; + bool throughput_overflow = true; + bool latency_overflow = true; + + if ((local_result.cycle_time - throughput) <= 1e-10 ) + throughput_overflow = false; + if ((local_result.access_time - latency) <= 1e-10) + latency_overflow = false; + + if (opt_for_clk && opt_local) { + if (throughput_overflow || latency_overflow) { + l_ip.ed = ed; + + l_ip.delay_wt = delay_wt; + l_ip.cycle_time_wt = cycle_time_wt; + + l_ip.area_wt = area_wt; + l_ip.dynamic_power_wt = dynamic_power_wt; + l_ip.leakage_power_wt = leakage_power_wt; + + l_ip.delay_dev = delay_dev; + l_ip.cycle_time_dev = cycle_time_dev; + + l_ip.area_dev = area_dev; + l_ip.dynamic_power_dev = dynamic_power_dev; + l_ip.leakage_power_dev = leakage_power_dev; + + //Reset overflow flag before start optimization iterations + throughput_overflow = true; + latency_overflow = true; + + //Clean up the result for optimized for ED^2P + temp_res = &local_result; + temp_res->cleanup(); + } + + + while ((throughput_overflow || latency_overflow) && + l_ip.cycle_time_dev > cycle_time_dev_threshold) { + compute_base_power(); + + //This is the time_dev to be used for next iteration + l_ip.cycle_time_dev -= cycle_time_dev_threshold; + + // from best area to worst area -->worst timing to best timing + if ((((local_result.cycle_time - throughput) <= 1e-10 ) && + (local_result.access_time - latency) <= 1e-10) || + (local_result.data_array2->area_efficiency < + area_efficiency_threshold && l_ip.assoc == 0)) { + //if no satisfiable solution is found,the most aggressive one + //is left + candidate_solutions.push_back(local_result); + if (((local_result.cycle_time - throughput) <= 1e-10) && + ((local_result.access_time - latency) <= 1e-10)) { + //ensure stop opt not because of cam + throughput_overflow = false; + latency_overflow = false; + } + + } else { + if ((local_result.cycle_time - throughput) <= 1e-10) + throughput_overflow = false; + if ((local_result.access_time - latency) <= 1e-10) + latency_overflow = false; + + //if not >10 local_result is the last result, it cannot be + //cleaned up + if (l_ip.cycle_time_dev > cycle_time_dev_threshold) { + //Only solutions not saved in the list need to be + //cleaned up + temp_res = &local_result; + temp_res->cleanup(); + } + } + } + + + if (l_ip.assoc > 0) { + //For array structures except CAM and FA, Give warning but still + //provide a result with best timing found + if (throughput_overflow == true) + cout << "Warning: " << name + << " array structure cannot satisfy throughput constraint." + << endl; + if (latency_overflow == true) + cout << "Warning: " << name + << " array structure cannot satisfy latency constraint." + << endl; + } + + double min_dynamic_energy = BIGNUM; + if (candidate_solutions.empty() == false) { + local_result.valid = true; + for (candidate_iter = candidate_solutions.begin(); + candidate_iter != candidate_solutions.end(); + ++candidate_iter) { + if (min_dynamic_energy > + (candidate_iter)->power.readOp.dynamic) { + min_dynamic_energy = + (candidate_iter)->power.readOp.dynamic; + min_dynamic_energy_iter = candidate_iter; + local_result = *(min_dynamic_energy_iter); + + } else { + candidate_iter->cleanup() ; + } + + } + + + } + candidate_solutions.clear(); + } + + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + + double macro_layout_overhead = g_tp.macro_layout_overhead; + double chip_PR_overhead = g_tp.chip_layout_overhead; + double total_overhead = macro_layout_overhead * chip_PR_overhead; + local_result.area *= total_overhead; + + //maintain constant power density + double pppm_t[4] = {total_overhead, 1, 1, total_overhead}; + + double sckRation = g_tp.sckt_co_eff; + local_result.power.readOp.dynamic *= sckRation; + local_result.power.writeOp.dynamic *= sckRation; + local_result.power.searchOp.dynamic *= sckRation; + local_result.power.readOp.leakage *= l_ip.nbanks; + local_result.power.readOp.longer_channel_leakage = + local_result.power.readOp.leakage * long_channel_device_reduction; + local_result.power = local_result.power * pppm_t; + + local_result.data_array2->power.readOp.dynamic *= sckRation; + local_result.data_array2->power.writeOp.dynamic *= sckRation; + local_result.data_array2->power.searchOp.dynamic *= sckRation; + local_result.data_array2->power.readOp.leakage *= l_ip.nbanks; + local_result.data_array2->power.readOp.longer_channel_leakage = + local_result.data_array2->power.readOp.leakage * + long_channel_device_reduction; + local_result.data_array2->power = local_result.data_array2->power * pppm_t; + + + if (!(l_ip.pure_cam || l_ip.pure_ram || l_ip.fully_assoc) && l_ip.is_cache) { + local_result.tag_array2->power.readOp.dynamic *= sckRation; + local_result.tag_array2->power.writeOp.dynamic *= sckRation; + local_result.tag_array2->power.searchOp.dynamic *= sckRation; + local_result.tag_array2->power.readOp.leakage *= l_ip.nbanks; + local_result.tag_array2->power.readOp.longer_channel_leakage = + local_result.tag_array2->power.readOp.leakage * + long_channel_device_reduction; + local_result.tag_array2->power = + local_result.tag_array2->power * pppm_t; + } +} + +void CacheArray::compute_base_power() { + local_result = cacti_interface(&l_ip); +} + +void CacheArray::computeArea() { + area.set_area(local_result.area); + output_data.area = local_result.area / 1e6; +} + +void CacheArray::computeEnergy() { + // Set the leakage power numbers + output_data.subthreshold_leakage_power = local_result.power.readOp.leakage; + output_data.gate_leakage_power = local_result.power.readOp.gate_leakage; + + if (l_ip.assoc && l_ip.is_cache) { + // This is a standard cache array with data and tags + // Calculate peak dynamic power + output_data.peak_dynamic_power = + (local_result.tag_array2->power.readOp.dynamic + + local_result.data_array2->power.readOp.dynamic) * + tdp_stats.readAc.hit + + (local_result.tag_array2->power.readOp.dynamic) * + tdp_stats.readAc.miss + + (local_result.tag_array2->power.readOp.dynamic + + local_result.data_array2->power.writeOp.dynamic) * + tdp_stats.writeAc.hit + + (local_result.tag_array2->power.readOp.dynamic) * + tdp_stats.writeAc.miss; + output_data.peak_dynamic_power *= clockRate; + + // Calculate the runtime dynamic power + output_data.runtime_dynamic_energy = + local_result.data_array2->power.readOp.dynamic * + rtp_stats.dataReadAc.access + + local_result.data_array2->power.writeOp.dynamic * + rtp_stats.dataWriteAc.access + + (local_result.tag_array2->power.readOp.dynamic * + rtp_stats.tagReadAc.access + + local_result.tag_array2->power.writeOp.dynamic * + rtp_stats.tagWriteAc.access) * l_ip.assoc; + } else { + // Calculate peak dynamic power + output_data.peak_dynamic_power = + local_result.power.readOp.dynamic * tdp_stats.readAc.access + + local_result.power.writeOp.dynamic * tdp_stats.writeAc.access + + local_result.power.searchOp.dynamic * tdp_stats.searchAc.access; + output_data.peak_dynamic_power *= clockRate; + + // Calculate the runtime dynamic power + output_data.runtime_dynamic_energy = + local_result.power.readOp.dynamic * rtp_stats.readAc.access + + local_result.power.writeOp.dynamic * rtp_stats.writeAc.access + + local_result.power.searchOp.dynamic * rtp_stats.searchAc.access; + } + + // An SBT directory has more dynamic power + if (sbt_dir_overhead > 0) { + // Calculate peak dynamic power + output_data.peak_dynamic_power += + (computeSBTDynEnergy(&sbt_tdp_stats) * clockRate); + + // Calculate the runtime dynamic power + output_data.runtime_dynamic_energy += + computeSBTDynEnergy(&sbt_rtp_stats); + } +} + +CacheArray::~CacheArray() { + local_result.cleanup(); +} diff --git a/ext/mcpat/cachearray.h b/ext/mcpat/cachearray.h new file mode 100644 index 000000000..ba55ffcd1 --- /dev/null +++ b/ext/mcpat/cachearray.h @@ -0,0 +1,117 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Joel Hestness + * Yasuko Eckert + * + ***************************************************************************/ + +#ifndef CACHEARRAY_H_ +#define CACHEARRAY_H_ + +#include +#include + +#include "basic_components.h" +#include "cacti_interface.h" +#include "component.h" +#include "const.h" +#include "parameter.h" + +class CacheArray : public McPATComponent { +public: + static double area_efficiency_threshold; + + // These are used for the CACTI interface. + static int ed; + static int delay_wt; + static int cycle_time_wt; + static int area_wt; + static int dynamic_power_wt; + static int leakage_power_wt; + static int delay_dev; + static int cycle_time_dev; + static int area_dev; + static int dynamic_power_dev; + static int leakage_power_dev; + static int cycle_time_dev_threshold; + + InputParameter l_ip; + enum Device_ty device_ty; + bool opt_local; + enum Core_type core_ty; + bool is_default; + uca_org_t local_result; + + // These are only used for static bank tag (SBT) directory type. + double sbt_dir_overhead; + // Set this to contain SBT peak power stats + statsDef sbt_tdp_stats; + // Set this to contain SBT runtime power stats + statsDef sbt_rtp_stats; + + CacheArray(XMLNode* _xml_data, const InputParameter *configure_interface, + string _name, enum Device_ty device_ty_, double _clockRate = 0.0f, + bool opt_local_ = true, + enum Core_type core_ty_ = Inorder, bool _is_default = true); + void computeArea(); + void computeEnergy(); + void compute_base_power(); + void setSBTDirOverhead(double overhead) { sbt_dir_overhead = overhead; } + ~CacheArray(); + + private: + double computeSBTDynEnergy(statsDef *sbt_stats_ptr); +}; + +extern inline +double CacheArray::computeSBTDynEnergy(statsDef *sbt_stats_p) { + if (sbt_dir_overhead == 0) { + return 0; + } + + // Write miss on dynamic home node will generate a replacement write on + // whole cache block + double dynamic = + sbt_stats_p->readAc.hit * + (local_result.data_array2->power.readOp.dynamic * sbt_dir_overhead + + local_result.tag_array2->power.readOp.dynamic) + + sbt_stats_p->readAc.miss * + local_result.tag_array2->power.readOp.dynamic + + sbt_stats_p->writeAc.miss * + local_result.tag_array2->power.readOp.dynamic + + sbt_stats_p->writeAc.hit * + (local_result.data_array2->power.writeOp.dynamic * sbt_dir_overhead + + local_result.tag_array2->power.readOp.dynamic+ + sbt_stats_p->writeAc.miss * + local_result.power.writeOp.dynamic); + return dynamic; +} + +#endif /* CACHEARRAY_H_ */ diff --git a/ext/mcpat/cachecontroller.cc b/ext/mcpat/cachecontroller.cc new file mode 100644 index 000000000..6b505aac3 --- /dev/null +++ b/ext/mcpat/cachecontroller.cc @@ -0,0 +1,42 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Author: Joel Hestness + * + ***************************************************************************/ + +#include "cachecontroller.h" + +CacheController::CacheController(XMLNode* _xml_data, + InputParameter* _interface_ip) + : McPATComponent(_xml_data, _interface_ip) { + name = "Cache Controller"; + clockRate = target_core_clockrate; + McPATComponent::recursiveInstantiate(); +} diff --git a/ext/mcpat/cachecontroller.h b/ext/mcpat/cachecontroller.h new file mode 100644 index 000000000..26eccb6de --- /dev/null +++ b/ext/mcpat/cachecontroller.h @@ -0,0 +1,45 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Author: Joel Hestness + * + ***************************************************************************/ + +#ifndef CACHECONTROLLER_H_ +#define CACHECONTROLLER_H_ + +#include "basic_components.h" + +class CacheController : public McPATComponent { +public: + CacheController(XMLNode* _xml_data, InputParameter* _interface_ip); + ~CacheController(); +}; + +#endif /* CACHECONTROLLER_H_ */ diff --git a/ext/mcpat/cacheunit.cc b/ext/mcpat/cacheunit.cc new file mode 100644 index 000000000..3b9e84749 --- /dev/null +++ b/ext/mcpat/cacheunit.cc @@ -0,0 +1,647 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Joel Hestness + * Yasuko Eckert + * + ***************************************************************************/ + +#include +#include +#include +#include + +#include "arbiter.h" +#include "array.h" +#include "basic_circuit.h" +#include "cachearray.h" +#include "cacheunit.h" +#include "common.h" +#include "const.h" +#include "io.h" +#include "logic.h" +#include "parameter.h" + +bool CacheUnit::is_cache = true; +bool CacheUnit::pure_cam = false; +bool CacheUnit::opt_local = true; +bool CacheUnit::force_cache_config = false; + +CacheUnit::CacheUnit(XMLNode* _xml_data, InputParameter* _interface_ip) + : dir_overhead(0), McPATComponent(_xml_data, _interface_ip) { + + int tag; + int data; + + name = "Cache Unit"; + CacheArray* arrayPtr = NULL; + + set_cache_param_from_xml_data(); + + //All lower level cache are physically indexed and tagged. + double size; + double line; + double assoc; + double banks; + size = cache_params.capacity; + line = cache_params.blockW; + assoc = cache_params.assoc; + banks = cache_params.nbanks; + if ((cache_params.dir_ty == ST && + cache_params.cache_level == L1Directory) || + (cache_params.dir_ty == ST && + cache_params.cache_level == L2Directory)) { + tag = physical_address_width + EXTRA_TAG_BITS; + } else { + tag = physical_address_width - int(ceil(log2(size / line / assoc))) - + int(ceil(log2(line))) + EXTRA_TAG_BITS; + + if (cache_params.dir_ty == SBT) { + dir_overhead = ceil(cache_params.num_cores / BITS_PER_BYTE) * + BITS_PER_BYTE / (line * BITS_PER_BYTE); + line *= (1 + dir_overhead); + size *= (1 + dir_overhead); + } + } + + interface_ip.cache_sz = (int)size; + interface_ip.line_sz = (int)line; + interface_ip.assoc = (int)assoc; + interface_ip.nbanks = (int)banks; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + + if (cache_params.cache_level == L1) { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + } else { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2; + } + + interface_ip.access_mode = cache_params.cache_access_mode; + interface_ip.throughput= cache_params.throughput; + interface_ip.latency = cache_params.latency; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.is_cache = is_cache; + interface_ip.pure_ram = cache_params.pure_ram; + interface_ip.pure_cam = pure_cam; + interface_ip.num_rw_ports = cache_params.cache_rw_ports; + interface_ip.num_rd_ports = cache_params.cache_rd_ports; + interface_ip.num_wr_ports = cache_params.cache_wr_ports; + interface_ip.num_se_rd_ports = cache_params.cache_se_rd_ports; + interface_ip.num_search_ports = cache_params.cache_search_ports; + + arrayPtr = new CacheArray(xml_data, &interface_ip, "Data and Tag Arrays", + cache_params.device_ty, clockRate, opt_local, + cache_params.core_ty); + children.push_back(arrayPtr); + + // This is for calculating TDP, which depends on the number of + // available ports + int num_tdp_ports = arrayPtr->l_ip.num_rw_ports + + arrayPtr->l_ip.num_rd_ports + arrayPtr->l_ip.num_wr_ports; + + // Set new array stats for calculating TDP and runtime power + arrayPtr->tdp_stats.reset(); + arrayPtr->tdp_stats.readAc.access = cache_stats.tdp_read_access_scalar * + num_tdp_ports * cache_stats.duty_cycle * + cache_stats.homenode_access_scalar; + arrayPtr->tdp_stats.readAc.miss = 0; + arrayPtr->tdp_stats.readAc.hit = arrayPtr->tdp_stats.readAc.access - + arrayPtr->tdp_stats.readAc.miss; + arrayPtr->tdp_stats.writeAc.access = cache_stats.tdp_write_access_scalar * + num_tdp_ports * cache_stats.duty_cycle * + cache_stats.homenode_access_scalar; + arrayPtr->tdp_stats.writeAc.miss = 0; + arrayPtr->tdp_stats.writeAc.hit = arrayPtr->tdp_stats.writeAc.access - + arrayPtr->tdp_stats.writeAc.miss; + arrayPtr->tdp_stats.searchAc.access = 0; + arrayPtr->tdp_stats.searchAc.miss = 0; + arrayPtr->tdp_stats.searchAc.hit = 0; + + arrayPtr->rtp_stats.reset(); + if (cache_stats.use_detailed_stats) { + arrayPtr->rtp_stats.dataReadAc.access = + cache_stats.num_data_array_reads; + arrayPtr->rtp_stats.dataWriteAc.access = + cache_stats.num_data_array_writes; + arrayPtr->rtp_stats.tagReadAc.access = + cache_stats.num_tag_array_reads; + arrayPtr->rtp_stats.tagWriteAc.access = + cache_stats.num_tag_array_writes; + } else { + // This code makes assumptions. For instance, it assumes that + // tag and data arrays are accessed in parallel on a read request and + // this is a write-allocate cache. It also ignores any coherence + // requests. Using detailed stats as above can avoid the ambiguity + // that is introduced here + arrayPtr->rtp_stats.dataReadAc.access = + cache_stats.read_accesses + cache_stats.write_misses; + arrayPtr->rtp_stats.dataWriteAc.access = + cache_stats.write_accesses + cache_stats.read_misses; + arrayPtr->rtp_stats.tagReadAc.access = + cache_stats.read_accesses + cache_stats.write_accesses; + arrayPtr->rtp_stats.tagWriteAc.access = + cache_stats.read_misses + cache_stats.write_misses; + } + + // Set SBT stats if this is an SBT directory type + if (dir_overhead > 0) { + arrayPtr->setSBTDirOverhead(dir_overhead); + + // TDP stats + arrayPtr->sbt_tdp_stats.readAc.access = + cache_stats.tdp_read_access_scalar * + num_tdp_ports * cache_stats.dir_duty_cycle * + (1 - cache_stats.homenode_access_scalar); + arrayPtr->sbt_tdp_stats.readAc.miss = 0; + arrayPtr->sbt_tdp_stats.readAc.hit = + arrayPtr->sbt_tdp_stats.readAc.access - + arrayPtr->sbt_tdp_stats.readAc.miss; + arrayPtr->sbt_tdp_stats.writeAc.access = + cache_stats.tdp_sbt_write_access_scalar * + num_tdp_ports * cache_stats.dir_duty_cycle * + (1 - cache_stats.homenode_access_scalar); + arrayPtr->sbt_tdp_stats.writeAc.miss = 0; + arrayPtr->sbt_tdp_stats.writeAc.hit = + arrayPtr->sbt_tdp_stats.writeAc.access - + arrayPtr->sbt_tdp_stats.writeAc.miss; + + // Runtime power stats + arrayPtr->sbt_rtp_stats.readAc.access = + cache_stats.homenode_read_accesses; + arrayPtr->sbt_rtp_stats.readAc.miss = + cache_stats.homenode_read_misses; + arrayPtr->sbt_rtp_stats.readAc.access = + cache_stats.homenode_read_accesses - + cache_stats.homenode_read_misses; + arrayPtr->sbt_rtp_stats.writeAc.access = + cache_stats.homenode_write_accesses; + arrayPtr->sbt_rtp_stats.writeAc.miss = + cache_stats.homenode_write_misses; + arrayPtr->sbt_rtp_stats.writeAc.hit = + cache_stats.homenode_write_accesses - + cache_stats.homenode_write_misses; + } + + interface_ip.force_cache_config = force_cache_config; + if (!((cache_params.dir_ty == ST && + cache_params.cache_level == L1Directory) || + (cache_params.dir_ty == ST && + cache_params.cache_level== L2Directory))) { + // Miss Buffer + tag = physical_address_width + EXTRA_TAG_BITS; + data = (physical_address_width) + + int(ceil(log2(size / cache_params.blockW))) + + (cache_params.blockW * BITS_PER_BYTE); + line = int(ceil(data / BITS_PER_BYTE)); + size = cache_params.missb_size * line; + + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = cache_params.missb_assoc; + interface_ip.nbanks = cache_params.missb_banks; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + + if (cache_params.cache_level == L1) { + interface_ip.out_w = line * BITS_PER_BYTE; + } else { + interface_ip.out_w = line * BITS_PER_BYTE / 2; + } + + interface_ip.access_mode = cache_params.miss_buff_access_mode; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.is_cache = is_cache; + interface_ip.pure_ram = cache_params.pure_ram; + interface_ip.pure_cam = pure_cam; + interface_ip.throughput = cache_params.throughput; + interface_ip.latency = cache_params.latency; + interface_ip.num_rw_ports = cache_params.miss_buff_rw_ports; + interface_ip.num_rd_ports = cache_params.miss_buff_rd_ports; + interface_ip.num_wr_ports = cache_params.miss_buff_wr_ports; + interface_ip.num_se_rd_ports = cache_params.miss_buff_se_rd_ports; + interface_ip.num_search_ports = cache_params.miss_buff_search_ports; + + arrayPtr = new CacheArray(xml_data, &interface_ip, "Miss Buffer", + cache_params.device_ty, clockRate, opt_local, + cache_params.core_ty); + children.push_back(arrayPtr); + + arrayPtr->tdp_stats.reset(); + arrayPtr->tdp_stats.readAc.access = 0; + arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports; + arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports; + + arrayPtr->rtp_stats.reset(); + arrayPtr->rtp_stats.readAc.access = + cache_stats.read_misses + cache_stats.write_misses; + arrayPtr->rtp_stats.writeAc.access = + cache_stats.read_misses + cache_stats.write_misses; + arrayPtr->rtp_stats.searchAc.access = 0; + + if (cache_params.dir_ty == SBT) { + arrayPtr->rtp_stats.readAc.access += + cache_stats.homenode_write_misses; + arrayPtr->rtp_stats.writeAc.access += + cache_stats.homenode_write_misses; + } + + // Fill Buffer + tag = physical_address_width + EXTRA_TAG_BITS; + data = cache_params.blockW; + + interface_ip.cache_sz = data * cache_params.fu_size; + interface_ip.line_sz = data; + interface_ip.assoc = cache_params.fu_assoc; + interface_ip.nbanks = cache_params.fu_banks; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + + if (cache_params.cache_level == L1) { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + } else { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2; + } + + interface_ip.access_mode = cache_params.fetch_buff_access_mode; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.is_cache = is_cache; + interface_ip.pure_cam = pure_cam; + interface_ip.throughput = cache_params.throughput; + interface_ip.latency = cache_params.latency; + interface_ip.num_rw_ports = cache_params.fetch_buff_rw_ports; + interface_ip.num_rd_ports = cache_params.fetch_buff_rd_ports; + interface_ip.num_wr_ports = cache_params.fetch_buff_wr_ports; + interface_ip.num_se_rd_ports = cache_params.fetch_buff_se_rd_ports; + interface_ip.num_search_ports = cache_params.fetch_buff_search_ports; + arrayPtr = new CacheArray(xml_data, &interface_ip, "Fill Buffer", + cache_params.device_ty, clockRate, opt_local, + cache_params.core_ty); + children.push_back(arrayPtr); + + arrayPtr->tdp_stats.reset(); + arrayPtr->tdp_stats.readAc.access = 0; + arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports; + arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports; + + arrayPtr->rtp_stats.reset(); + arrayPtr->rtp_stats.readAc.access = + cache_stats.read_misses + cache_stats.write_misses; + arrayPtr->rtp_stats.writeAc.access = + cache_stats.read_misses + cache_stats.write_misses; + arrayPtr->rtp_stats.searchAc.access = 0; + + if (cache_params.dir_ty == SBT) { + arrayPtr->rtp_stats.readAc.access += + cache_stats.homenode_write_misses; + arrayPtr->rtp_stats.writeAc.access += + cache_stats.homenode_write_misses; + } + + // Prefetch Buffer + tag = physical_address_width + EXTRA_TAG_BITS; + line = cache_params.blockW; + + interface_ip.cache_sz = cache_params.prefetchb_size * line; + interface_ip.line_sz = line; + interface_ip.assoc = cache_params.prefetchb_assoc; + interface_ip.nbanks = cache_params.prefetchb_banks; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + + if (cache_params.cache_level == L1) { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + } else { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2; + } + + interface_ip.access_mode = cache_params.prefetch_buff_access_mode; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.is_cache = is_cache; + interface_ip.pure_ram = cache_params.pure_ram; + interface_ip.pure_cam = pure_cam; + interface_ip.throughput = cache_params.throughput; + interface_ip.latency = cache_params.latency; + interface_ip.num_rw_ports = cache_params.pf_buff_rw_ports; + interface_ip.num_rd_ports = cache_params.pf_buff_rd_ports; + interface_ip.num_wr_ports = cache_params.pf_buff_wr_ports; + interface_ip.num_se_rd_ports = cache_params.pf_buff_se_rd_ports; + interface_ip.num_search_ports = cache_params.pf_buff_search_ports; + arrayPtr = new CacheArray(xml_data, &interface_ip, "Prefetch Buffer", + cache_params.device_ty, clockRate, opt_local, + cache_params.core_ty); + children.push_back(arrayPtr); + + arrayPtr->tdp_stats.reset(); + arrayPtr->tdp_stats.readAc.access = 0; + arrayPtr->tdp_stats.writeAc.access = arrayPtr->l_ip.num_search_ports; + arrayPtr->tdp_stats.searchAc.access = arrayPtr->l_ip.num_search_ports; + + arrayPtr->rtp_stats.reset(); + arrayPtr->rtp_stats.readAc.access = cache_stats.read_misses; + arrayPtr->rtp_stats.writeAc.access = cache_stats.read_misses; + arrayPtr->rtp_stats.searchAc.access = 0; + + if (cache_params.dir_ty == SBT) { + arrayPtr->rtp_stats.readAc.access += + cache_stats.homenode_write_misses; + arrayPtr->rtp_stats.writeAc.access += + cache_stats.homenode_write_misses; + } + + // Writeback Buffer + if (cache_params.wbb_size > 0) { + tag = physical_address_width + EXTRA_TAG_BITS; + line = cache_params.blockW; + + interface_ip.cache_sz = cache_params.wbb_size * line; + interface_ip.line_sz = line; + interface_ip.assoc = cache_params.wbb_assoc; + interface_ip.nbanks = cache_params.wbb_banks; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + + if (cache_params.cache_level == L1) { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + } else { + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE / 2; + } + + interface_ip.access_mode = cache_params.writeback_buff_access_mode; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.is_cache = is_cache; + interface_ip.pure_ram = cache_params.pure_ram; + interface_ip.pure_cam = pure_cam; + interface_ip.throughput = cache_params.throughput; + interface_ip.latency = cache_params.latency; + interface_ip.num_rw_ports = cache_params.wb_buff_rw_ports; + interface_ip.num_rd_ports = cache_params.wb_buff_rd_ports; + interface_ip.num_wr_ports = cache_params.wb_buff_wr_ports; + interface_ip.num_se_rd_ports = cache_params.wb_buff_se_rd_ports; + interface_ip.num_search_ports = cache_params.wb_buff_search_ports; + arrayPtr = new CacheArray(xml_data, &interface_ip, + "Writeback Buffer", + cache_params.device_ty, clockRate, + opt_local, cache_params.core_ty); + children.push_back(arrayPtr); + + arrayPtr->tdp_stats.reset(); + arrayPtr->tdp_stats.readAc.access = 0; + arrayPtr->tdp_stats.writeAc.access = + arrayPtr->l_ip.num_search_ports; + arrayPtr->tdp_stats.searchAc.access = + arrayPtr->l_ip.num_search_ports; + + arrayPtr->rtp_stats.reset(); + arrayPtr->rtp_stats.readAc.access = cache_stats.write_misses; + arrayPtr->rtp_stats.writeAc.access = cache_stats.write_misses; + arrayPtr->rtp_stats.searchAc.access = 0; + + if (cache_params.dir_ty == SBT) { + arrayPtr->rtp_stats.readAc.access += + cache_stats.homenode_write_misses; + arrayPtr->rtp_stats.writeAc.access += + cache_stats.homenode_write_misses; + } + } + } +} + +void CacheUnit::computeEnergy() { + McPATComponent::computeEnergy(); +} + +void CacheUnit::set_cache_param_from_xml_data() { + int level, type; + + // Initialization... move this? + memset(&cache_params, 0, sizeof(CacheParameters)); + memset(&cache_stats, 0, sizeof(CacheStatistics)); + + // By default, use the core clock frequency. This can be changed by + // setting the clockrate param in the XML definition of the CacheUnit + clockRate = target_core_clockrate; + XMLCSTR comp_name = xml_data->getAttribute("name"); + if (comp_name) { + name = comp_name; + } + + int num_children = xml_data->nChildNode("param"); + int i; + int tech_type; + int mat_type; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("level", level); + ASSIGN_FP_IF("size", cache_params.capacity); + ASSIGN_FP_IF("block_size", cache_params.blockW); + ASSIGN_FP_IF("assoc", cache_params.assoc); + ASSIGN_FP_IF("num_banks", cache_params.nbanks); + ASSIGN_FP_IF("latency", cache_params.latency); + ASSIGN_FP_IF("throughput", cache_params.throughput); + ASSIGN_INT_IF("miss_buffer_size", cache_params.missb_size); + ASSIGN_INT_IF("fetch_buffer_size", cache_params.fu_size); + ASSIGN_INT_IF("prefetch_buffer_size", cache_params.prefetchb_size); + ASSIGN_INT_IF("writeback_buffer_size", cache_params.wbb_size); + ASSIGN_INT_IF("miss_buffer_assoc", cache_params.missb_assoc); + ASSIGN_INT_IF("fetch_buffer_assoc", cache_params.fu_assoc); + ASSIGN_INT_IF("prefetch_buffer_assoc", cache_params.prefetchb_assoc); + ASSIGN_INT_IF("writeback_buffer_assoc", cache_params.wbb_assoc); + ASSIGN_INT_IF("miss_buffer_banks", cache_params.missb_banks); + ASSIGN_INT_IF("fetch_buffer_banks", cache_params.fu_banks); + ASSIGN_INT_IF("prefetch_buffer_banks", cache_params.prefetchb_banks); + ASSIGN_INT_IF("writeback_buffer_banks", cache_params.wbb_banks); + ASSIGN_ENUM_IF("cache_access_mode", + cache_params.cache_access_mode, Access_mode); + ASSIGN_ENUM_IF("miss_buff_access_mode", + cache_params.miss_buff_access_mode, Access_mode); + ASSIGN_ENUM_IF("fetch_buff_access_mode", + cache_params.fetch_buff_access_mode, Access_mode); + ASSIGN_ENUM_IF("prefetch_buff_access_mode", + cache_params.prefetch_buff_access_mode, Access_mode); + ASSIGN_ENUM_IF("writeback_buff_access_mode", + cache_params.writeback_buff_access_mode, Access_mode); + ASSIGN_INT_IF("cache_rw_ports", cache_params.cache_rw_ports); + ASSIGN_INT_IF("cache_rd_ports", cache_params.cache_rd_ports); + ASSIGN_INT_IF("cache_wr_ports", cache_params.cache_wr_ports); + ASSIGN_INT_IF("cache_se_rd_ports", cache_params.cache_se_rd_ports); + ASSIGN_INT_IF("cache_search_ports", cache_params.cache_search_ports); + ASSIGN_INT_IF("miss_buff_rw_ports", cache_params.miss_buff_rw_ports); + ASSIGN_INT_IF("miss_buff_rd_ports", cache_params.miss_buff_rd_ports); + ASSIGN_INT_IF("miss_buff_wr_ports", cache_params.miss_buff_wr_ports); + ASSIGN_INT_IF("miss_buff_se_rd_ports" , + cache_params.miss_buff_se_rd_ports); + ASSIGN_INT_IF("miss_buff_search_ports", + cache_params.miss_buff_search_ports); + ASSIGN_INT_IF("fetch_buff_rw_ports", cache_params.fetch_buff_rw_ports); + ASSIGN_INT_IF("fetch_buff_rd_ports", cache_params.fetch_buff_rd_ports); + ASSIGN_INT_IF("fetch_buff_wr_ports", cache_params.fetch_buff_wr_ports); + ASSIGN_INT_IF("fetch_buff_se_rd_ports", + cache_params.fetch_buff_se_rd_ports); + ASSIGN_INT_IF("fetch_buff_search_ports", + cache_params.fetch_buff_search_ports); + ASSIGN_INT_IF("pf_buff_rw_ports", cache_params.pf_buff_rw_ports); + ASSIGN_INT_IF("pf_buff_rd_ports", cache_params.pf_buff_rd_ports); + ASSIGN_INT_IF("pf_buff_wr_ports", cache_params.pf_buff_wr_ports); + ASSIGN_INT_IF("pf_buff_se_rd_ports", cache_params.pf_buff_se_rd_ports); + ASSIGN_INT_IF("pf_buff_search_ports", + cache_params.pf_buff_search_ports); + ASSIGN_INT_IF("wb_buff_rw_ports", cache_params.wb_buff_rw_ports); + ASSIGN_INT_IF("wb_buff_rd_ports", cache_params.wb_buff_rd_ports); + ASSIGN_INT_IF("wb_buff_wr_ports", cache_params.wb_buff_wr_ports); + ASSIGN_INT_IF("wb_buff_se_rd_ports", cache_params.wb_buff_se_rd_ports); + ASSIGN_INT_IF("wb_buff_search_ports", + cache_params.wb_buff_search_ports); + ASSIGN_FP_IF("clockrate", cache_params.clockRate); + ASSIGN_INT_IF("pure_ram", cache_params.pure_ram); + ASSIGN_INT_IF("tech_type", tech_type); + ASSIGN_ENUM_IF("Directory_type", cache_params.dir_ty, Dir_type); + ASSIGN_ENUM_IF("device_type", cache_params.device_ty, Device_ty); + ASSIGN_ENUM_IF("core_type", cache_params.core_ty, Core_type); + ASSIGN_INT_IF("num_cores", cache_params.num_cores); + ASSIGN_INT_IF("wire_mat_type", mat_type); + ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type); + + else { + warnUnrecognizedParam(node_name); + } + } + + // Change from MHz to Hz + cache_params.clockRate *= 1e6; + if (cache_params.clockRate > 0) { + clockRate = cache_params.clockRate; + } + + interface_ip.data_arr_ram_cell_tech_type = tech_type; + interface_ip.data_arr_peri_global_tech_type = tech_type; + interface_ip.tag_arr_ram_cell_tech_type = tech_type; + interface_ip.tag_arr_peri_global_tech_type = tech_type; + + interface_ip.wire_is_mat_type = mat_type; + interface_ip.wire_os_mat_type = mat_type; + + switch(level) { + case 1: + cache_params.cache_level = L1; + break; + case 2: + cache_params.cache_level = L2; + break; + case 3: + cache_params.cache_level = L3; + break; + case 4: + cache_params.cache_level = L1Directory; + break; + case 5: + cache_params.cache_level = L2Directory; + break; + + default: + fprintf(stderr, "ERROR: Unrecognized cache level in %s: %d\n", + name.c_str(), level); + exit(1); + } + + cache_stats.use_detailed_stats = false; + + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("num_data_array_reads", cache_stats.num_data_array_reads); + ASSIGN_FP_IF("num_data_array_writes", + cache_stats.num_data_array_writes); + ASSIGN_FP_IF("num_tag_array_reads", cache_stats.num_tag_array_reads); + ASSIGN_FP_IF("num_tag_array_writes", cache_stats.num_tag_array_writes); + ASSIGN_FP_IF("duty_cycle", cache_stats.duty_cycle); + ASSIGN_FP_IF("read_accesses", cache_stats.read_accesses); + ASSIGN_FP_IF("write_accesses", cache_stats.write_accesses); + ASSIGN_FP_IF("read_misses", cache_stats.read_misses); + ASSIGN_FP_IF("write_misses", cache_stats.write_misses); + ASSIGN_FP_IF("conflicts", cache_stats.conflicts); + ASSIGN_INT_IF("homenode_read_accesses", + cache_stats.homenode_read_accesses); + ASSIGN_INT_IF("homenode_write_accesses", + cache_stats.homenode_write_accesses); + ASSIGN_INT_IF("homenode_read_misses", + cache_stats.homenode_read_misses); + ASSIGN_INT_IF("homenode_write_misses", + cache_stats.homenode_write_misses); + ASSIGN_FP_IF("homenode_access_scalar", + cache_stats.homenode_access_scalar); + ASSIGN_FP_IF("tdp_read_access_scalar", + cache_stats.tdp_read_access_scalar); + ASSIGN_FP_IF("tdp_write_access_scalar", + cache_stats.tdp_write_access_scalar); + ASSIGN_FP_IF("tdp_sbt_write_access_scalar", + cache_stats.tdp_sbt_write_access_scalar); + ASSIGN_FP_IF("dir_duty_cycle", + cache_stats.dir_duty_cycle); + + else { + warnUnrecognizedStat(node_name); + } + } + + if (cache_stats.num_data_array_reads > 0 || + cache_stats.num_data_array_writes > 0 || + cache_stats.num_tag_array_reads > 0 || + cache_stats.num_tag_array_writes > 0) { + cache_stats.use_detailed_stats = true; + calculate_runtime_data_and_tag = true; + } +} diff --git a/ext/mcpat/cacheunit.h b/ext/mcpat/cacheunit.h new file mode 100644 index 000000000..e4429e74b --- /dev/null +++ b/ext/mcpat/cacheunit.h @@ -0,0 +1,167 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Joel Hestness + * Yasuko Eckert + * + ***************************************************************************/ + +#ifndef CACHEUNIT_H_ +#define CACHEUNIT_H_ + +#include "area.h" +#include "array.h" +#include "basic_components.h" +#include "logic.h" +#include "parameter.h" + +class CacheParameters { +public: + enum Dir_type dir_ty; + double clockRate; + double capacity; + double blockW; + double assoc; + double nbanks; + double throughput; + double latency; + int missb_size; + int fu_size; + int prefetchb_size; + int wbb_size; + int missb_assoc; + int fu_assoc; + int prefetchb_assoc; + int wbb_assoc; + int missb_banks; + int fu_banks; + int prefetchb_banks; + int wbb_banks; + enum Access_mode cache_access_mode; + enum Access_mode miss_buff_access_mode; + enum Access_mode fetch_buff_access_mode; + enum Access_mode prefetch_buff_access_mode; + enum Access_mode writeback_buff_access_mode; + int cache_rw_ports; + int cache_rd_ports; + int cache_wr_ports; + int cache_se_rd_ports; + int cache_search_ports; + int miss_buff_rw_ports; + int miss_buff_rd_ports; + int miss_buff_wr_ports; + int miss_buff_se_rd_ports; + int miss_buff_search_ports; + int fetch_buff_rw_ports; + int fetch_buff_rd_ports; + int fetch_buff_wr_ports; + int fetch_buff_se_rd_ports; + int fetch_buff_search_ports; + int pf_buff_rw_ports; + int pf_buff_rd_ports; + int pf_buff_wr_ports; + int pf_buff_se_rd_ports; + int pf_buff_search_ports; + int wb_buff_rw_ports; + int wb_buff_rd_ports; + int wb_buff_wr_ports; + int wb_buff_se_rd_ports; + int wb_buff_search_ports; + bool pure_ram; + enum CacheLevel cache_level; + enum Device_ty device_ty; + enum Core_type core_ty; + int num_cores; +}; + +class CacheStatistics { +public: + // Duty cycle is used for estimating TDP. It should reflect the highest + // sustainable rate of access to the cache unit in execution of a benchmark + // Default should be 1.0: one access per cycle + double duty_cycle; + // This duty cycle is only used for SBT directory types + double dir_duty_cycle; + // The following two stats are also used for estimating TDP. + double tdp_read_access_scalar; + double tdp_write_access_scalar; + // There are 2 ways to calculate dynamic power from activity statistics: + // Default is false + bool use_detailed_stats; + // 1) Count the number and type of accesses to each cache array + // splitting data and tag arrays (use_detailed_stats = true). + // These are extremely detailed statistics. + // read_misses and write_misses are still required for this method for + // various buffers associated with this cache. + double num_data_array_reads; + double num_data_array_writes; + double num_tag_array_reads; + double num_tag_array_writes; + // 2) Count the number and type of access to the cache unit and + // use them to extrapolate the number of accesses to the other + // subcomponents (cache arrays and buffers) + double read_accesses; + double write_accesses; + double read_misses; + double write_misses; + double conflicts; + // The following is only used for SBT directory types + int homenode_read_accesses; + int homenode_write_accesses; + int homenode_read_misses; + int homenode_write_misses; + double homenode_access_scalar; + double tdp_sbt_write_access_scalar; +}; + +class CacheUnit : public McPATComponent { +public: + static bool is_cache; + static bool pure_cam; + // This is used for CacheArray objects + static bool opt_local; + static bool force_cache_config; + + int ithCache; + CacheParameters cache_params; + CacheStatistics cache_stats; + Cache_type cacheType; + bool calculate_runtime_data_and_tag; + double dir_overhead; + + double scktRatio; + + // TODO: REMOVE _interface_ip... It promotes a mess. Find a better way... + CacheUnit(XMLNode* _xml_data, InputParameter* _interface_ip); + void set_cache_param_from_xml_data(); + void computeEnergy(); + ~CacheUnit() {}; +}; + +#endif /* CACHEUNIT_H_ */ diff --git a/ext/mcpat/cacti/Ucache.cc b/ext/mcpat/cacti/Ucache.cc index f3e1227df..ada9c5aa1 100644 --- a/ext/mcpat/cacti/Ucache.cc +++ b/ext/mcpat/cacti/Ucache.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -54,176 +55,163 @@ using namespace std; const uint32_t nthreads = NTHREADS; -void min_values_t::update_min_values(const min_values_t * val) -{ - min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay; - min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn; - min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage; - min_area = (min_area > val->min_area) ? val->min_area : min_area; - min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc; +void min_values_t::update_min_values(const min_values_t * val) { + min_delay = (min_delay > val->min_delay) ? val->min_delay : min_delay; + min_dyn = (min_dyn > val->min_dyn) ? val->min_dyn : min_dyn; + min_leakage = (min_leakage > val->min_leakage) ? val->min_leakage : min_leakage; + min_area = (min_area > val->min_area) ? val->min_area : min_area; + min_cyc = (min_cyc > val->min_cyc) ? val->min_cyc : min_cyc; } -void min_values_t::update_min_values(const uca_org_t & res) -{ - min_delay = (min_delay > res.access_time) ? res.access_time : min_delay; - min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn; - min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage; - min_area = (min_area > res.area) ? res.area : min_area; - min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc; +void min_values_t::update_min_values(const uca_org_t & res) { + min_delay = (min_delay > res.access_time) ? res.access_time : min_delay; + min_dyn = (min_dyn > res.power.readOp.dynamic) ? res.power.readOp.dynamic : min_dyn; + min_leakage = (min_leakage > res.power.readOp.leakage) ? res.power.readOp.leakage : min_leakage; + min_area = (min_area > res.area) ? res.area : min_area; + min_cyc = (min_cyc > res.cycle_time) ? res.cycle_time : min_cyc; } -void min_values_t::update_min_values(const nuca_org_t * res) -{ - min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay; - min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn; - min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage; - min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area; - min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc; +void min_values_t::update_min_values(const nuca_org_t * res) { + min_delay = (min_delay > res->nuca_pda.delay) ? res->nuca_pda.delay : min_delay; + min_dyn = (min_dyn > res->nuca_pda.power.readOp.dynamic) ? res->nuca_pda.power.readOp.dynamic : min_dyn; + min_leakage = (min_leakage > res->nuca_pda.power.readOp.leakage) ? res->nuca_pda.power.readOp.leakage : min_leakage; + min_area = (min_area > res->nuca_pda.area.get_area()) ? res->nuca_pda.area.get_area() : min_area; + min_cyc = (min_cyc > res->nuca_pda.cycle_time) ? res->nuca_pda.cycle_time : min_cyc; } -void min_values_t::update_min_values(const mem_array * res) -{ - min_delay = (min_delay > res->access_time) ? res->access_time : min_delay; - min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn; - min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage; - min_area = (min_area > res->area) ? res->area : min_area; - min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc; +void min_values_t::update_min_values(const mem_array * res) { + min_delay = (min_delay > res->access_time) ? res->access_time : min_delay; + min_dyn = (min_dyn > res->power.readOp.dynamic) ? res->power.readOp.dynamic : min_dyn; + min_leakage = (min_leakage > res->power.readOp.leakage) ? res->power.readOp.leakage : min_leakage; + min_area = (min_area > res->area) ? res->area : min_area; + min_cyc = (min_cyc > res->cycle_time) ? res->cycle_time : min_cyc; } -void * calc_time_mt_wrapper(void * void_obj) -{ - calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj; - uint32_t tid = calc_obj->tid; - list & data_arr = calc_obj->data_arr; - list & tag_arr = calc_obj->tag_arr; - bool is_tag = calc_obj->is_tag; - bool pure_ram = calc_obj->pure_ram; - bool pure_cam = calc_obj->pure_cam; - bool is_main_mem = calc_obj->is_main_mem; - double Nspd_min = calc_obj->Nspd_min; - min_values_t * data_res = calc_obj->data_res; - min_values_t * tag_res = calc_obj->tag_res; - - data_arr.clear(); - data_arr.push_back(new mem_array); - tag_arr.clear(); - tag_arr.push_back(new mem_array); - - uint32_t Ndwl_niter = _log2(MAXDATAN) + 1; - uint32_t Ndbl_niter = _log2(MAXDATAN) + 1; - uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1; - uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter; - - - bool is_valid_partition; - int wt_min, wt_max; - - if (g_ip->force_wiretype) { - if (g_ip->wt == 0) { - wt_min = Low_swing; - wt_max = Low_swing; - } - else { - wt_min = Global; - wt_max = Low_swing-1; - } - } - else { - wt_min = Global; - wt_max = Low_swing; - } +void * calc_time_mt_wrapper(void * void_obj) { + calc_time_mt_wrapper_struct * calc_obj = (calc_time_mt_wrapper_struct *) void_obj; + uint32_t tid = calc_obj->tid; + list & data_arr = calc_obj->data_arr; + list & tag_arr = calc_obj->tag_arr; + bool is_tag = calc_obj->is_tag; + bool pure_ram = calc_obj->pure_ram; + bool pure_cam = calc_obj->pure_cam; + bool is_main_mem = calc_obj->is_main_mem; + double Nspd_min = calc_obj->Nspd_min; + min_values_t * data_res = calc_obj->data_res; + min_values_t * tag_res = calc_obj->tag_res; - for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2) - { - for (int wr = wt_min; wr <= wt_max; wr++) - { - for (uint32_t iter = tid; iter < niter; iter += nthreads) - { - // reconstruct Ndwl, Ndbl, Ndcm - unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter)); - unsigned int Ndbl = 1 << ((iter / (Ndcm_niter))%Ndbl_niter); - unsigned int Ndcm = 1 << (iter % Ndcm_niter); - for(unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; Ndsam_lev_1 *= 2) - { - for(unsigned int Ndsam_lev_2 = 1; Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2) - { - //for debuging - if (g_ip->force_cache_config && is_tag == false) - { - wr = g_ip->wt; - Ndwl = g_ip->ndwl; - Ndbl = g_ip->ndbl; - Ndcm = g_ip->ndcm; - if(g_ip->nspd != 0) { - Nspd = g_ip->nspd; - } - if(g_ip->ndsam1 != 0) { - Ndsam_lev_1 = g_ip->ndsam1; - Ndsam_lev_2 = g_ip->ndsam2; - } - } - - if (is_tag == true) - { - is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl, - Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, - tag_arr.back(), 0, NULL, NULL, - is_main_mem); - } - // If it's a fully-associative cache, the data array partition parameters are identical to that of - // the tag array, so compute data array partition properties also here. - if (is_tag == false || g_ip->fully_assoc) - { - is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl, - Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, - data_arr.back(), 0, NULL, NULL, - is_main_mem); - } - - if (is_valid_partition) - { - if (is_tag == true) - { - tag_arr.back()->wt = (enum Wire_type) wr; - tag_res->update_min_values(tag_arr.back()); - tag_arr.push_back(new mem_array); - } - if (is_tag == false || g_ip->fully_assoc) - { - data_arr.back()->wt = (enum Wire_type) wr; - data_res->update_min_values(data_arr.back()); - data_arr.push_back(new mem_array); - } - } + data_arr.clear(); + data_arr.push_back(new mem_array); + tag_arr.clear(); + tag_arr.push_back(new mem_array); + + uint32_t Ndwl_niter = _log2(MAXDATAN) + 1; + uint32_t Ndbl_niter = _log2(MAXDATAN) + 1; + uint32_t Ndcm_niter = _log2(MAX_COL_MUX) + 1; + uint32_t niter = Ndwl_niter * Ndbl_niter * Ndcm_niter; + + + bool is_valid_partition; + int wt_min, wt_max; + + if (g_ip->force_wiretype) { + if (g_ip->wt == 0) { + wt_min = Low_swing; + wt_max = Low_swing; + } else { + wt_min = Global; + wt_max = Low_swing - 1; + } + } else { + wt_min = Global; + wt_max = Low_swing; + } - if (g_ip->force_cache_config && is_tag == false) - { - wr = wt_max; - iter = niter; - if(g_ip->nspd != 0) { - Nspd = MAXDATASPD; - } - if (g_ip->ndsam1 != 0) { - Ndsam_lev_1 = MAX_COL_MUX+1; - Ndsam_lev_2 = MAX_COL_MUX+1; + for (double Nspd = Nspd_min; Nspd <= MAXDATASPD; Nspd *= 2) { + for (int wr = wt_min; wr <= wt_max; wr++) { + for (uint32_t iter = tid; iter < niter; iter += nthreads) { + // reconstruct Ndwl, Ndbl, Ndcm + unsigned int Ndwl = 1 << (iter / (Ndbl_niter * Ndcm_niter)); + unsigned int Ndbl = 1 << ((iter / (Ndcm_niter)) % Ndbl_niter); + unsigned int Ndcm = 1 << (iter % Ndcm_niter); + for (unsigned int Ndsam_lev_1 = 1; Ndsam_lev_1 <= MAX_COL_MUX; + Ndsam_lev_1 *= 2) { + for (unsigned int Ndsam_lev_2 = 1; + Ndsam_lev_2 <= MAX_COL_MUX; Ndsam_lev_2 *= 2) { + //for debuging + if (g_ip->force_cache_config && is_tag == false) { + wr = g_ip->wt; + Ndwl = g_ip->ndwl; + Ndbl = g_ip->ndbl; + Ndcm = g_ip->ndcm; + if (g_ip->nspd != 0) { + Nspd = g_ip->nspd; + } + if (g_ip->ndsam1 != 0) { + Ndsam_lev_1 = g_ip->ndsam1; + Ndsam_lev_2 = g_ip->ndsam2; + } + } + + if (is_tag == true) { + is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl, + Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, + tag_arr.back(), 0, NULL, NULL, + is_main_mem); + } + // If it's a fully-associative cache, the data array partition parameters are identical to that of + // the tag array, so compute data array partition properties also here. + if (is_tag == false || g_ip->fully_assoc) { + is_valid_partition = calculate_time(is_tag/*false*/, pure_ram, pure_cam, Nspd, Ndwl, + Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, + data_arr.back(), 0, NULL, NULL, + is_main_mem); + } + + if (is_valid_partition) { + if (is_tag == true) { + tag_arr.back()->wt = (enum Wire_type) wr; + tag_res->update_min_values(tag_arr.back()); + tag_arr.push_back(new mem_array); + } + if (is_tag == false || g_ip->fully_assoc) { + data_arr.back()->wt = (enum Wire_type) wr; + data_res->update_min_values(data_arr.back()); + data_arr.push_back(new mem_array); + } + } + + if (g_ip->force_cache_config && is_tag == false) { + wr = wt_max; + iter = niter; + if (g_ip->nspd != 0) { + Nspd = MAXDATASPD; + } + if (g_ip->ndsam1 != 0) { + Ndsam_lev_1 = MAX_COL_MUX + 1; + Ndsam_lev_2 = MAX_COL_MUX + 1; + } + } + } } } - } } - } } - } - delete data_arr.back(); - delete tag_arr.back(); - data_arr.pop_back(); - tag_arr.pop_back(); + delete data_arr.back(); + delete tag_arr.back(); + data_arr.pop_back(); + tag_arr.pop_back(); - pthread_exit(NULL); +#ifndef DEBUG + pthread_exit(NULL); +#else + return NULL; +#endif } @@ -242,423 +230,448 @@ bool calculate_time( int flag_results_populate, results_mem_array *ptr_results, uca_org_t *ptr_fin_res, - bool is_main_mem) -{ - DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem); + bool is_main_mem) { + DynamicParameter dyn_p(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, is_main_mem); - if (dyn_p.is_valid == false) - { - return false; - } + if (dyn_p.is_valid == false) { + return false; + } - UCA * uca = new UCA(dyn_p); + UCA * uca = new UCA(dyn_p); - if (flag_results_populate) - { //For the final solution, populate the ptr_results data structure -- TODO: copy only necessary variables - } - else - { - int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir; - int num_mats = uca->bank.dp.num_mats; - bool is_fa = uca->bank.dp.fully_assoc; - bool pure_cam = uca->bank.dp.pure_cam; + //For the final solution, populate the ptr_results data structure + //-- TODO: copy only necessary variables + if (flag_results_populate) { + } else { + int num_act_mats_hor_dir = uca->bank.dp.num_act_mats_hor_dir; + int num_mats = uca->bank.dp.num_mats; + bool is_fa = uca->bank.dp.fully_assoc; + bool pure_cam = uca->bank.dp.pure_cam; ptr_array->Ndwl = Ndwl; - ptr_array->Ndbl = Ndbl; - ptr_array->Nspd = Nspd; - ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing; - ptr_array->Ndsam_lev_1 = Ndsam_lev_1; - ptr_array->Ndsam_lev_2 = Ndsam_lev_2; - ptr_array->access_time = uca->access_time; - ptr_array->cycle_time = uca->cycle_time; - ptr_array->multisubbank_interleave_cycle_time = uca->multisubbank_interleave_cycle_time; - ptr_array->area_ram_cells = uca->area_all_dataramcells; - ptr_array->area = uca->area.get_area(); - ptr_array->height = uca->area.h; - ptr_array->width = uca->area.w; - ptr_array->mat_height = uca->bank.mat.area.h; - ptr_array->mat_length = uca->bank.mat.area.w; - ptr_array->subarray_height = uca->bank.mat.subarray.area.h; - ptr_array->subarray_length = uca->bank.mat.subarray.area.w; - ptr_array->power = uca->power; - ptr_array->delay_senseamp_mux_decoder = - MAX(uca->delay_array_to_sa_mux_lev_1_decoder, - uca->delay_array_to_sa_mux_lev_2_decoder); - ptr_array->delay_before_subarray_output_driver = uca->delay_before_subarray_output_driver; - ptr_array->delay_from_subarray_output_driver_to_output = uca->delay_from_subarray_out_drv_to_out; - - ptr_array->delay_route_to_bank = uca->htree_in_add->delay; - ptr_array->delay_input_htree = uca->bank.htree_in_add->delay; - ptr_array->delay_row_predecode_driver_and_block = uca->bank.mat.r_predec->delay; - ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay; - ptr_array->delay_bitlines = uca->bank.mat.delay_bitline; - ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline; - ptr_array->delay_sense_amp = uca->bank.mat.delay_sa; - ptr_array->delay_subarray_output_driver = uca->bank.mat.delay_subarray_out_drv_htree; - ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay; - ptr_array->delay_comparator = uca->bank.mat.delay_comparator; - - ptr_array->all_banks_height = uca->area.h; - ptr_array->all_banks_width = uca->area.w; - ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / (uca->area.get_area()); - - ptr_array->power_routing_to_bank = uca->power_routing_to_bank; - ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power; - ptr_array->power_data_input_htree = uca->bank.htree_in_data->power; -// cout<<"power_data_input_htree"<bank.htree_in_data->power.readOp.leakage<power_data_output_htree = uca->bank.htree_out_data->power; -// cout<<"power_data_output_htree"<bank.htree_out_data->power.readOp.leakage<power_row_predecoder_drivers = uca->bank.mat.r_predec->driver_power; - ptr_array->power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_row_predecoder_blocks = uca->bank.mat.r_predec->block_power; - ptr_array->power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders; - ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_bit_mux_predecoder_drivers = uca->bank.mat.b_mux_predec->driver_power; - ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_bit_mux_predecoder_blocks = uca->bank.mat.b_mux_predec->block_power; - ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders; - ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = uca->bank.mat.sa_mux_lev_1_predec->driver_power; - ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = uca->bank.mat.sa_mux_lev_1_predec->block_power; - ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_senseamp_mux_lev_1_decoders = uca->bank.mat.power_sa_mux_lev_1_decoders; - ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = uca->bank.mat.sa_mux_lev_2_predec->driver_power; - ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = uca->bank.mat.sa_mux_lev_2_predec->block_power; - ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_senseamp_mux_lev_2_decoders = uca->bank.mat.power_sa_mux_lev_2_decoders; - ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_bitlines = uca->bank.mat.power_bitline; - ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_sense_amps = uca->bank.mat.power_sa; - ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_prechg_eq_drivers = uca->bank.mat.power_bl_precharge_eq_drv; - ptr_array->power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_output_drivers_at_subarray = uca->bank.mat.power_subarray_out_drv; - ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir; - - ptr_array->power_comparators = uca->bank.mat.power_comparator; - ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir; - ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir; - -// cout << " num of mats: " << dyn_p.num_mats << endl; - if (is_fa || pure_cam) - { - ptr_array->power_htree_in_search = uca->bank.htree_in_search->power; -// cout<<"power_htree_in_search"<bank.htree_in_search->power.readOp.leakage<power_htree_out_search = uca->bank.htree_out_search->power; -// cout<<"power_htree_out_search"<bank.htree_out_search->power.readOp.leakage<power_searchline = uca->bank.mat.power_searchline; -// cout<<"power_searchlineh"<bank.mat.power_searchline.readOp.leakage<power_searchline.searchOp.dynamic *= num_mats; - ptr_array->power_searchline_precharge = uca->bank.mat.power_searchline_precharge; - ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats; - ptr_array->power_matchlines = uca->bank.mat.power_matchline; - ptr_array->power_matchlines.searchOp.dynamic *= num_mats; - ptr_array->power_matchline_precharge = uca->bank.mat.power_matchline_precharge; - ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats; - ptr_array->power_matchline_to_wordline_drv = uca->bank.mat.power_ml_to_ram_wl_drv; -// cout<<"power_matchline.searchOp.leakage"<bank.mat.power_matchline.searchOp.leakage<activate_energy = uca->activate_energy; - ptr_array->read_energy = uca->read_energy; - ptr_array->write_energy = uca->write_energy; - ptr_array->precharge_energy = uca->precharge_energy; - ptr_array->refresh_power = uca->refresh_power; - ptr_array->leak_power_subbank_closed_page = uca->leak_power_subbank_closed_page; - ptr_array->leak_power_subbank_open_page = uca->leak_power_subbank_open_page; - ptr_array->leak_power_request_and_reply_networks = uca->leak_power_request_and_reply_networks; - - ptr_array->precharge_delay = uca->precharge_delay; - - -// cout<<"power_matchline.searchOp.leakage"<bank.mat.<bank.mat.subarray.get_total_cell_area()<Ndbl = Ndbl; + ptr_array->Nspd = Nspd; + ptr_array->deg_bl_muxing = dyn_p.deg_bl_muxing; + ptr_array->Ndsam_lev_1 = Ndsam_lev_1; + ptr_array->Ndsam_lev_2 = Ndsam_lev_2; + ptr_array->access_time = uca->access_time; + ptr_array->cycle_time = uca->cycle_time; + ptr_array->multisubbank_interleave_cycle_time = + uca->multisubbank_interleave_cycle_time; + ptr_array->area_ram_cells = uca->area_all_dataramcells; + ptr_array->area = uca->area.get_area(); + ptr_array->height = uca->area.h; + ptr_array->width = uca->area.w; + ptr_array->mat_height = uca->bank.mat.area.h; + ptr_array->mat_length = uca->bank.mat.area.w; + ptr_array->subarray_height = uca->bank.mat.subarray.area.h; + ptr_array->subarray_length = uca->bank.mat.subarray.area.w; + ptr_array->power = uca->power; + ptr_array->delay_senseamp_mux_decoder = + MAX(uca->delay_array_to_sa_mux_lev_1_decoder, + uca->delay_array_to_sa_mux_lev_2_decoder); + ptr_array->delay_before_subarray_output_driver = + uca->delay_before_subarray_output_driver; + ptr_array->delay_from_subarray_output_driver_to_output = + uca->delay_from_subarray_out_drv_to_out; + + ptr_array->delay_route_to_bank = uca->htree_in_add->delay; + ptr_array->delay_input_htree = uca->bank.htree_in_add->delay; + ptr_array->delay_row_predecode_driver_and_block = + uca->bank.mat.r_predec->delay; + ptr_array->delay_row_decoder = uca->bank.mat.row_dec->delay; + ptr_array->delay_bitlines = uca->bank.mat.delay_bitline; + ptr_array->delay_matchlines = uca->bank.mat.delay_matchchline; + ptr_array->delay_sense_amp = uca->bank.mat.delay_sa; + ptr_array->delay_subarray_output_driver = + uca->bank.mat.delay_subarray_out_drv_htree; + ptr_array->delay_dout_htree = uca->bank.htree_out_data->delay; + ptr_array->delay_comparator = uca->bank.mat.delay_comparator; + + ptr_array->all_banks_height = uca->area.h; + ptr_array->all_banks_width = uca->area.w; + ptr_array->area_efficiency = uca->area_all_dataramcells * 100 / + (uca->area.get_area()); + + ptr_array->power_routing_to_bank = uca->power_routing_to_bank; + ptr_array->power_addr_input_htree = uca->bank.htree_in_add->power; + ptr_array->power_data_input_htree = uca->bank.htree_in_data->power; + ptr_array->power_data_output_htree = uca->bank.htree_out_data->power; + + ptr_array->power_row_predecoder_drivers = + uca->bank.mat.r_predec->driver_power; + ptr_array->power_row_predecoder_drivers.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_row_predecoder_drivers.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_row_predecoder_drivers.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_row_predecoder_blocks = + uca->bank.mat.r_predec->block_power; + ptr_array->power_row_predecoder_blocks.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_row_predecoder_blocks.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_row_predecoder_blocks.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_row_decoders = uca->bank.mat.power_row_decoders; + ptr_array->power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_bit_mux_predecoder_drivers = + uca->bank.mat.b_mux_predec->driver_power; + ptr_array->power_bit_mux_predecoder_drivers.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_drivers.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_drivers.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_bit_mux_predecoder_blocks = + uca->bank.mat.b_mux_predec->block_power; + ptr_array->power_bit_mux_predecoder_blocks.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_blocks.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_bit_mux_predecoder_blocks.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_bit_mux_decoders = uca->bank.mat.power_bit_mux_decoders; + ptr_array->power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bit_mux_decoders.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_bit_mux_decoders.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers = + uca->bank.mat.sa_mux_lev_1_predec->driver_power; + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_drivers .searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks = + uca->bank.mat.sa_mux_lev_1_predec->block_power; + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_1_decoders = + uca->bank.mat.power_sa_mux_lev_1_decoders; + ptr_array->power_senseamp_mux_lev_1_decoders.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers = + uca->bank.mat.sa_mux_lev_2_predec->driver_power; + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks = + uca->bank.mat.sa_mux_lev_2_predec->block_power; + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_senseamp_mux_lev_2_decoders = + uca->bank.mat.power_sa_mux_lev_2_decoders; + ptr_array->power_senseamp_mux_lev_2_decoders .readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_decoders .writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_senseamp_mux_lev_2_decoders .searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_bitlines = uca->bank.mat.power_bitline; + ptr_array->power_bitlines.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_sense_amps = uca->bank.mat.power_sa; + ptr_array->power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir; + + ptr_array->power_prechg_eq_drivers = + uca->bank.mat.power_bl_precharge_eq_drv; + ptr_array->power_prechg_eq_drivers.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_prechg_eq_drivers.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_prechg_eq_drivers.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_output_drivers_at_subarray = + uca->bank.mat.power_subarray_out_drv; + ptr_array->power_output_drivers_at_subarray.readOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_output_drivers_at_subarray.writeOp.dynamic *= + num_act_mats_hor_dir; + ptr_array->power_output_drivers_at_subarray.searchOp.dynamic *= + num_act_mats_hor_dir; + + ptr_array->power_comparators = uca->bank.mat.power_comparator; + ptr_array->power_comparators.readOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_comparators.writeOp.dynamic *= num_act_mats_hor_dir; + ptr_array->power_comparators.searchOp.dynamic *= num_act_mats_hor_dir; + + if (is_fa || pure_cam) { + ptr_array->power_htree_in_search = + uca->bank.htree_in_search->power; + ptr_array->power_htree_out_search = + uca->bank.htree_out_search->power; + ptr_array->power_searchline = uca->bank.mat.power_searchline; + ptr_array->power_searchline.searchOp.dynamic *= num_mats; + ptr_array->power_searchline_precharge = + uca->bank.mat.power_searchline_precharge; + ptr_array->power_searchline_precharge.searchOp.dynamic *= num_mats; + ptr_array->power_matchlines = uca->bank.mat.power_matchline; + ptr_array->power_matchlines.searchOp.dynamic *= num_mats; + ptr_array->power_matchline_precharge = + uca->bank.mat.power_matchline_precharge; + ptr_array->power_matchline_precharge.searchOp.dynamic *= num_mats; + ptr_array->power_matchline_to_wordline_drv = + uca->bank.mat.power_ml_to_ram_wl_drv; + } + + ptr_array->activate_energy = uca->activate_energy; + ptr_array->read_energy = uca->read_energy; + ptr_array->write_energy = uca->write_energy; + ptr_array->precharge_energy = uca->precharge_energy; + ptr_array->refresh_power = uca->refresh_power; + ptr_array->leak_power_subbank_closed_page = + uca->leak_power_subbank_closed_page; + ptr_array->leak_power_subbank_open_page = + uca->leak_power_subbank_open_page; + ptr_array->leak_power_request_and_reply_networks = + uca->leak_power_request_and_reply_networks; + + ptr_array->precharge_delay = uca->precharge_delay; + } - delete uca; - return true; + delete uca; + return true; } -bool check_uca_org(uca_org_t & u, min_values_t *minval) -{ - if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) { - return false; - } - if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > - g_ip->dynamic_power_dev) { - return false; - } - if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > - g_ip->leakage_power_dev) { - return false; - } - if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > - g_ip->cycle_time_dev) { - return false; - } - if (((u.area - minval->min_area)/minval->min_area)*100 > - g_ip->area_dev) { - return false; - } - return true; +bool check_uca_org(uca_org_t & u, min_values_t *minval) { + if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) > + g_ip->delay_dev) { + return false; + } + if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn)*100 > + g_ip->dynamic_power_dev) { + return false; + } + if (((u.power.readOp.leakage - minval->min_leakage) / + minval->min_leakage) * 100 > + g_ip->leakage_power_dev) { + return false; + } + if (((u.cycle_time - minval->min_cyc) / minval->min_cyc)*100 > + g_ip->cycle_time_dev) { + return false; + } + if (((u.area - minval->min_area) / minval->min_area)*100 > + g_ip->area_dev) { + return false; + } + return true; } -bool check_mem_org(mem_array & u, const min_values_t *minval) -{ - if (((u.access_time - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev) { - return false; - } - if (((u.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > - g_ip->dynamic_power_dev) { - return false; - } - if (((u.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > - g_ip->leakage_power_dev) { - return false; - } - if (((u.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > - g_ip->cycle_time_dev) { - return false; - } - if (((u.area - minval->min_area)/minval->min_area)*100 > - g_ip->area_dev) { - return false; - } - return true; +bool check_mem_org(mem_array & u, const min_values_t *minval) { + if (((u.access_time - minval->min_delay) * 100 / minval->min_delay) > + g_ip->delay_dev) { + return false; + } + if (((u.power.readOp.dynamic - minval->min_dyn) / minval->min_dyn)*100 > + g_ip->dynamic_power_dev) { + return false; + } + if (((u.power.readOp.leakage - minval->min_leakage) / + minval->min_leakage) * 100 > + g_ip->leakage_power_dev) { + return false; + } + if (((u.cycle_time - minval->min_cyc) / minval->min_cyc) * 100 > + g_ip->cycle_time_dev) { + return false; + } + if (((u.area - minval->min_area) / minval->min_area) * 100 > + g_ip->area_dev) { + return false; + } + return true; } -void find_optimal_uca(uca_org_t *res, min_values_t * minval, list & ulist) -{ - double cost = 0; - double min_cost = BIGNUM; - float d, a, dp, lp, c; - - dp = g_ip->dynamic_power_wt; - lp = g_ip->leakage_power_wt; - a = g_ip->area_wt; - d = g_ip->delay_wt; - c = g_ip->cycle_time_wt; +void find_optimal_uca(uca_org_t *res, min_values_t * minval, + list & ulist) { + double cost = 0; + double min_cost = BIGNUM; + float d, a, dp, lp, c; - if (ulist.empty() == true) - { - cout << "ERROR: no valid cache organizations found" << endl; - exit(0); - } + dp = g_ip->dynamic_power_wt; + lp = g_ip->leakage_power_wt; + a = g_ip->area_wt; + d = g_ip->delay_wt; + c = g_ip->cycle_time_wt; - for (list::iterator niter = ulist.begin(); niter != ulist.end(); niter++) - { - if (g_ip->ed == 1) - { - cost = ((niter)->access_time/minval->min_delay) * ((niter)->power.readOp.dynamic/minval->min_dyn); - if (min_cost > cost) - { - min_cost = cost; - *res = (*(niter)); - } - } - else if (g_ip->ed == 2) - { - cost = ((niter)->access_time/minval->min_delay)* - ((niter)->access_time/minval->min_delay)* - ((niter)->power.readOp.dynamic/minval->min_dyn); - if (min_cost > cost) - { - min_cost = cost; - *res = (*(niter)); - } + if (ulist.empty() == true) { + cout << "ERROR: no valid cache organizations found" << endl; + exit(0); } - else - { - /* - * check whether the current organization - * meets the input deviation constraints - */ - bool v = check_uca_org(*niter, minval); - //if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling - - if (v) - { - cost = (d * ((niter)->access_time/minval->min_delay) + - c * ((niter)->cycle_time/minval->min_cyc) + - dp * ((niter)->power.readOp.dynamic/minval->min_dyn) + - lp * ((niter)->power.readOp.leakage/minval->min_leakage) + - a * ((niter)->area/minval->min_area)); - //fprintf(stderr, "cost = %g\n", cost); - - if (min_cost > cost) { - min_cost = cost; - *res = (*(niter)); - niter = ulist.erase(niter); - if (niter!=ulist.begin()) - niter--; + + for (list::iterator niter = ulist.begin(); niter != ulist.end(); + niter++) { + if (g_ip->ed == 1) { + cost = ((niter)->access_time / minval->min_delay) * + ((niter)->power.readOp.dynamic / minval->min_dyn); + if (min_cost > cost) { + min_cost = cost; + *res = (*(niter)); + } + } else if (g_ip->ed == 2) { + cost = ((niter)->access_time / minval->min_delay) * + ((niter)->access_time / minval->min_delay) * + ((niter)->power.readOp.dynamic / minval->min_dyn); + if (min_cost > cost) { + min_cost = cost; + *res = (*(niter)); + } + } else { + /* + * check whether the current organization + * meets the input deviation constraints + */ + bool v = check_uca_org(*niter, minval); + + if (v) { + cost = (d * ((niter)->access_time / minval->min_delay) + + c * ((niter)->cycle_time / minval->min_cyc) + + dp * ((niter)->power.readOp.dynamic / minval->min_dyn) + + lp * + ((niter)->power.readOp.leakage / minval->min_leakage) + + a * ((niter)->area / minval->min_area)); + + if (min_cost > cost) { + min_cost = cost; + *res = (*(niter)); + niter = ulist.erase(niter); + if (niter != ulist.begin()) + niter--; + } + } else { + niter = ulist.erase(niter); + if (niter != ulist.begin()) + niter--; + } } - } - else { - niter = ulist.erase(niter); - if (niter!=ulist.begin()) - niter--; - } } - } - if (min_cost == BIGNUM) - { - cout << "ERROR: no cache organizations met optimization criteria" << endl; - exit(0); - } + if (min_cost == BIGNUM) { + cout << "ERROR: no cache organizations met optimization criteria" + << endl; + exit(0); + } } -void filter_tag_arr(const min_values_t * min, list & list) -{ - double cost = BIGNUM; - double cur_cost; - double wt_delay = g_ip->delay_wt, wt_dyn = g_ip->dynamic_power_wt, wt_leakage = g_ip->leakage_power_wt, wt_cyc = g_ip->cycle_time_wt, wt_area = g_ip->area_wt; - mem_array * res = NULL; +void filter_tag_arr(const min_values_t * min, list & list) { + double cost = BIGNUM; + double cur_cost; + double wt_delay = g_ip->delay_wt; + double wt_dyn = g_ip->dynamic_power_wt; + double wt_leakage = g_ip->leakage_power_wt; + double wt_cyc = g_ip->cycle_time_wt; + double wt_area = g_ip->area_wt; + mem_array * res = NULL; - if (list.empty() == true) - { - cout << "ERROR: no valid tag organizations found" << endl; - exit(1); - } + if (list.empty() == true) { + cout << "ERROR: no valid tag organizations found" << endl; + exit(1); + } - while (list.empty() != true) - { - bool v = check_mem_org(*list.back(), min); - if (v) - { - cur_cost = wt_delay * (list.back()->access_time/min->min_delay) + - wt_dyn * (list.back()->power.readOp.dynamic/min->min_dyn) + - wt_leakage * (list.back()->power.readOp.leakage/min->min_leakage) + - wt_area * (list.back()->area/min->min_area) + - wt_cyc * (list.back()->cycle_time/min->min_cyc); - } - else - { - cur_cost = BIGNUM; - } - if (cur_cost < cost) - { - if (res != NULL) - { - delete res; - } - cost = cur_cost; - res = list.back(); + while (list.empty() != true) { + bool v = check_mem_org(*list.back(), min); + if (v) { + cur_cost = wt_delay * (list.back()->access_time / min->min_delay) + + wt_dyn * (list.back()->power.readOp.dynamic / + min->min_dyn) + + wt_leakage * (list.back()->power.readOp.leakage / + min->min_leakage) + + wt_area * (list.back()->area / min->min_area) + + wt_cyc * (list.back()->cycle_time / min->min_cyc); + } else { + cur_cost = BIGNUM; + } + if (cur_cost < cost) { + if (res != NULL) { + delete res; + } + cost = cur_cost; + res = list.back(); + } else { + delete list.back(); + } + list.pop_back(); } - else - { - delete list.back(); + if (!res) { + cout << "ERROR: no valid tag organizations found" << endl; + exit(0); } - list.pop_back(); - } - if(!res) - { - cout << "ERROR: no valid tag organizations found" << endl; - exit(0); - } - list.push_back(res); + list.push_back(res); } -void filter_data_arr(list & curr_list) -{ - if (curr_list.empty() == true) - { - cout << "ERROR: no valid data array organizations found" << endl; - exit(1); - } +void filter_data_arr(list & curr_list) { + if (curr_list.empty() == true) { + cout << "ERROR: no valid data array organizations found" << endl; + exit(1); + } - list::iterator iter; + list::iterator iter; - for (iter = curr_list.begin(); iter != curr_list.end(); ++iter) - { - mem_array * m = *iter; + for (iter = curr_list.begin(); iter != curr_list.end(); ++iter) { + mem_array * m = *iter; - if (m == NULL) exit(1); + if (m == NULL) exit(1); - if(((m->access_time - m->arr_min->min_delay)/m->arr_min->min_delay > 0.5) && - ((m->power.readOp.dynamic - m->arr_min->min_dyn)/m->arr_min->min_dyn > 0.5)) - { - delete m; - iter = curr_list.erase(iter); - iter --; + if (((m->access_time - m->arr_min->min_delay) / m->arr_min->min_delay > + 0.5) && + ((m->power.readOp.dynamic - m->arr_min->min_dyn) / + m->arr_min->min_dyn > 0.5)) { + delete m; + iter = curr_list.erase(iter); + iter --; + } } - } } @@ -675,210 +688,199 @@ void filter_data_arr(list & curr_list) * above results * 4. Cache model with least cost is picked from sol_list */ -void solve(uca_org_t *fin_res) -{ - bool is_dram = false; - int pure_ram = g_ip->pure_ram; - bool pure_cam = g_ip->pure_cam; - - init_tech_params(g_ip->F_sz_um, false); - - - list tag_arr (0); - list data_arr(0); - list::iterator miter; - list sol_list(1, uca_org_t()); - - fin_res->tag_array.access_time = 0; - fin_res->tag_array.Ndwl = 0; - fin_res->tag_array.Ndbl = 0; - fin_res->tag_array.Nspd = 0; - fin_res->tag_array.deg_bl_muxing = 0; - fin_res->tag_array.Ndsam_lev_1 = 0; - fin_res->tag_array.Ndsam_lev_2 = 0; - - - // distribute calculate_time() execution to multiple threads - calc_time_mt_wrapper_struct * calc_array = new calc_time_mt_wrapper_struct[nthreads]; - pthread_t threads[nthreads]; - - for (uint32_t t = 0; t < nthreads; t++) - { - calc_array[t].tid = t; - calc_array[t].pure_ram = pure_ram; - calc_array[t].pure_cam = pure_cam; - calc_array[t].data_res = new min_values_t(); - calc_array[t].tag_res = new min_values_t(); - } - - bool is_tag; - uint32_t ram_cell_tech_type; - - // If it's a cache, first calculate the area, delay and power for all tag array partitions. - if (!(pure_ram||pure_cam||g_ip->fully_assoc)) - { //cache - is_tag = true; - ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type; - is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); - init_tech_params(g_ip->F_sz_um, is_tag); - - for (uint32_t t = 0; t < nthreads; t++) - { - calc_array[t].is_tag = is_tag; - calc_array[t].is_main_mem = false; - calc_array[t].Nspd_min = 0.125; - pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t]))); +void solve(uca_org_t *fin_res) { + bool is_dram = false; + int pure_ram = g_ip->pure_ram; + bool pure_cam = g_ip->pure_cam; + + init_tech_params(g_ip->F_sz_um, false); + + + list tag_arr (0); + list data_arr(0); + list::iterator miter; + list sol_list(1, uca_org_t()); + + fin_res->tag_array.access_time = 0; + fin_res->tag_array.Ndwl = 0; + fin_res->tag_array.Ndbl = 0; + fin_res->tag_array.Nspd = 0; + fin_res->tag_array.deg_bl_muxing = 0; + fin_res->tag_array.Ndsam_lev_1 = 0; + fin_res->tag_array.Ndsam_lev_2 = 0; + + + // distribute calculate_time() execution to multiple threads + calc_time_mt_wrapper_struct * calc_array = + new calc_time_mt_wrapper_struct[nthreads]; + pthread_t threads[nthreads]; + + for (uint32_t t = 0; t < nthreads; t++) { + calc_array[t].tid = t; + calc_array[t].pure_ram = pure_ram; + calc_array[t].pure_cam = pure_cam; + calc_array[t].data_res = new min_values_t(); + calc_array[t].tag_res = new min_values_t(); } - for (uint32_t t = 0; t < nthreads; t++) - { - pthread_join(threads[t], NULL); - } + bool is_tag; + uint32_t ram_cell_tech_type; + + // If it's a cache, first calculate the area, delay and power for all tag array partitions. + if (!(pure_ram || pure_cam || g_ip->fully_assoc)) { //cache + is_tag = true; + ram_cell_tech_type = g_ip->tag_arr_ram_cell_tech_type; + is_dram = ((ram_cell_tech_type == lp_dram) || + (ram_cell_tech_type == comm_dram)); + init_tech_params(g_ip->F_sz_um, is_tag); + + for (uint32_t t = 0; t < nthreads; t++) { + calc_array[t].is_tag = is_tag; + calc_array[t].is_main_mem = false; + calc_array[t].Nspd_min = 0.125; +#ifndef DEBUG + pthread_create(&threads[t], NULL, calc_time_mt_wrapper, + (void *)(&(calc_array[t]))); +#else + calc_time_mt_wrapper((void *)(&(calc_array[t]))); +#endif + } - for (uint32_t t = 0; t < nthreads; t++) - { - calc_array[t].data_arr.sort(mem_array::lt); - data_arr.merge(calc_array[t].data_arr, mem_array::lt); - calc_array[t].tag_arr.sort(mem_array::lt); - tag_arr.merge(calc_array[t].tag_arr, mem_array::lt); +#ifndef DEBUG + for (uint32_t t = 0; t < nthreads; t++) { + pthread_join(threads[t], NULL); + } +#endif + + for (uint32_t t = 0; t < nthreads; t++) { + calc_array[t].data_arr.sort(mem_array::lt); + data_arr.merge(calc_array[t].data_arr, mem_array::lt); + calc_array[t].tag_arr.sort(mem_array::lt); + tag_arr.merge(calc_array[t].tag_arr, mem_array::lt); + } } - } - // calculate the area, delay and power for all data array partitions (for cache or plain RAM). -// if (!g_ip->fully_assoc) -// {//in the new cacti, cam, fully_associative cache are processed as single array in the data portion + // calculate the area, delay and power for all data array partitions (for cache or plain RAM). + // in the new cacti, cam, fully_associative cache are processed as single array in the data portion is_tag = false; ram_cell_tech_type = g_ip->data_arr_ram_cell_tech_type; is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); init_tech_params(g_ip->F_sz_um, is_tag); - for (uint32_t t = 0; t < nthreads; t++) - { - calc_array[t].is_tag = is_tag; - calc_array[t].is_main_mem = g_ip->is_main_mem; - if (!(pure_cam||g_ip->fully_assoc)) - { - calc_array[t].Nspd_min = (double)(g_ip->out_w)/(double)(g_ip->block_sz*8); - } - else - { - calc_array[t].Nspd_min = 1; - } + for (uint32_t t = 0; t < nthreads; t++) { + calc_array[t].is_tag = is_tag; + calc_array[t].is_main_mem = g_ip->is_main_mem; + if (!(pure_cam || g_ip->fully_assoc)) { + calc_array[t].Nspd_min = (double)(g_ip->out_w) / + (double)(g_ip->block_sz * 8); + } else { + calc_array[t].Nspd_min = 1; + } - pthread_create(&threads[t], NULL, calc_time_mt_wrapper, (void *)(&(calc_array[t]))); +#ifndef DEBUG + pthread_create(&threads[t], NULL, calc_time_mt_wrapper, + (void *)(&(calc_array[t]))); +#else + calc_time_mt_wrapper((void *)(&(calc_array[t]))); +#endif } - for (uint32_t t = 0; t < nthreads; t++) - { - pthread_join(threads[t], NULL); +#ifndef DEBUG + for (uint32_t t = 0; t < nthreads; t++) { + pthread_join(threads[t], NULL); } +#endif data_arr.clear(); - for (uint32_t t = 0; t < nthreads; t++) - { - calc_array[t].data_arr.sort(mem_array::lt); - data_arr.merge(calc_array[t].data_arr, mem_array::lt); - } -// } + for (uint32_t t = 0; t < nthreads; t++) { + calc_array[t].data_arr.sort(mem_array::lt); + data_arr.merge(calc_array[t].data_arr, mem_array::lt); - min_values_t * d_min = new min_values_t(); - min_values_t * t_min = new min_values_t(); - min_values_t * cache_min = new min_values_t(); + } - for (uint32_t t = 0; t < nthreads; t++) - { - d_min->update_min_values(calc_array[t].data_res); - t_min->update_min_values(calc_array[t].tag_res); - } - for (miter = data_arr.begin(); miter != data_arr.end(); miter++) - { - (*miter)->arr_min = d_min; - } + min_values_t * d_min = new min_values_t(); + min_values_t * t_min = new min_values_t(); + min_values_t * cache_min = new min_values_t(); - //cout << data_arr.size() << "\t" << tag_arr.size() <<" before\n"; - filter_data_arr(data_arr); - if(!(pure_ram||pure_cam||g_ip->fully_assoc)) - { - filter_tag_arr(t_min, tag_arr); - } - //cout << data_arr.size() << "\t" << tag_arr.size() <<" after\n"; + for (uint32_t t = 0; t < nthreads; t++) { + d_min->update_min_values(calc_array[t].data_res); + t_min->update_min_values(calc_array[t].tag_res); + } + for (miter = data_arr.begin(); miter != data_arr.end(); miter++) { + (*miter)->arr_min = d_min; + } - if (pure_ram||pure_cam||g_ip->fully_assoc) - { - for (miter = data_arr.begin(); miter != data_arr.end(); miter++) - { - uca_org_t & curr_org = sol_list.back(); - curr_org.tag_array2 = NULL; - curr_org.data_array2 = (*miter); + filter_data_arr(data_arr); + if (!(pure_ram || pure_cam || g_ip->fully_assoc)) { + filter_tag_arr(t_min, tag_arr); + } - curr_org.find_delay(); - curr_org.find_energy(); - curr_org.find_area(); - curr_org.find_cyc(); + if (pure_ram || pure_cam || g_ip->fully_assoc) { + for (miter = data_arr.begin(); miter != data_arr.end(); miter++) { + uca_org_t & curr_org = sol_list.back(); + curr_org.tag_array2 = NULL; + curr_org.data_array2 = (*miter); - //update min values for the entire cache - cache_min->update_min_values(curr_org); + curr_org.find_delay(); + curr_org.find_energy(); + curr_org.find_area(); + curr_org.find_cyc(); - sol_list.push_back(uca_org_t()); - } - } - else - { - while (tag_arr.empty() != true) - { - mem_array * arr_temp = (tag_arr.back()); - //delete tag_arr.back(); - tag_arr.pop_back(); + //update min values for the entire cache + cache_min->update_min_values(curr_org); - for (miter = data_arr.begin(); miter != data_arr.end(); miter++) - { - uca_org_t & curr_org = sol_list.back(); - curr_org.tag_array2 = arr_temp; - curr_org.data_array2 = (*miter); + sol_list.push_back(uca_org_t()); + } + } else { + while (tag_arr.empty() != true) { + mem_array * arr_temp = (tag_arr.back()); + tag_arr.pop_back(); - curr_org.find_delay(); - curr_org.find_energy(); - curr_org.find_area(); - curr_org.find_cyc(); + for (miter = data_arr.begin(); miter != data_arr.end(); miter++) { + uca_org_t & curr_org = sol_list.back(); + curr_org.tag_array2 = arr_temp; + curr_org.data_array2 = (*miter); - //update min values for the entire cache - cache_min->update_min_values(curr_org); + curr_org.find_delay(); + curr_org.find_energy(); + curr_org.find_area(); + curr_org.find_cyc(); - sol_list.push_back(uca_org_t()); - } + //update min values for the entire cache + cache_min->update_min_values(curr_org); + + sol_list.push_back(uca_org_t()); + } + } } - } - sol_list.pop_back(); + sol_list.pop_back(); - find_optimal_uca(fin_res, cache_min, sol_list); + find_optimal_uca(fin_res, cache_min, sol_list); - sol_list.clear(); + sol_list.clear(); - for (miter = data_arr.begin(); miter != data_arr.end(); ++miter) - { - if (*miter != fin_res->data_array2) - { - delete *miter; + for (miter = data_arr.begin(); miter != data_arr.end(); ++miter) { + if (*miter != fin_res->data_array2) { + delete *miter; + } } - } - data_arr.clear(); + data_arr.clear(); - for (uint32_t t = 0; t < nthreads; t++) - { - delete calc_array[t].data_res; - delete calc_array[t].tag_res; - } + for (uint32_t t = 0; t < nthreads; t++) { + delete calc_array[t].data_res; + delete calc_array[t].tag_res; + } - delete [] calc_array; - delete cache_min; - delete d_min; - delete t_min; + delete [] calc_array; + delete cache_min; + delete d_min; + delete t_min; } void update(uca_org_t *fin_res) @@ -886,7 +888,14 @@ void update(uca_org_t *fin_res) if(fin_res->tag_array2) { init_tech_params(g_ip->F_sz_um,true); - DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, fin_res->tag_array2->Nspd, fin_res->tag_array2->Ndwl, fin_res->tag_array2->Ndbl, fin_res->tag_array2->Ndcm, fin_res->tag_array2->Ndsam_lev_1, fin_res->tag_array2->Ndsam_lev_2, g_ip->is_main_mem); + DynamicParameter tag_arr_dyn_p(true, g_ip->pure_ram, g_ip->pure_cam, + fin_res->tag_array2->Nspd, + fin_res->tag_array2->Ndwl, + fin_res->tag_array2->Ndbl, + fin_res->tag_array2->Ndcm, + fin_res->tag_array2->Ndsam_lev_1, + fin_res->tag_array2->Ndsam_lev_2, + g_ip->is_main_mem); if(tag_arr_dyn_p.is_valid) { UCA * tag_arr = new UCA(tag_arr_dyn_p); @@ -894,12 +903,20 @@ void update(uca_org_t *fin_res) } else { - cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl; + cout << "ERROR: Cannot retrieve array structure for leakage feedback" + << endl; exit(1); } } init_tech_params(g_ip->F_sz_um,false); - DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, fin_res->data_array2->Nspd, fin_res->data_array2->Ndwl, fin_res->data_array2->Ndbl, fin_res->data_array2->Ndcm, fin_res->data_array2->Ndsam_lev_1, fin_res->data_array2->Ndsam_lev_2, g_ip->is_main_mem); + DynamicParameter data_arr_dyn_p(false, g_ip->pure_ram, g_ip->pure_cam, + fin_res->data_array2->Nspd, + fin_res->data_array2->Ndwl, + fin_res->data_array2->Ndbl, + fin_res->data_array2->Ndcm, + fin_res->data_array2->Ndsam_lev_1, + fin_res->data_array2->Ndsam_lev_2, + g_ip->is_main_mem); if(data_arr_dyn_p.is_valid) { UCA * data_arr = new UCA(data_arr_dyn_p); @@ -907,7 +924,8 @@ void update(uca_org_t *fin_res) } else { - cout << "ERROR: Cannot retrieve array structure for leakage feedback" << endl; + cout << "ERROR: Cannot retrieve array structure for leakage feedback" + << endl; exit(1); } diff --git a/ext/mcpat/cacti/Ucache.h b/ext/mcpat/cacti/Ucache.h index 20985fff1..87836adcd 100644 --- a/ext/mcpat/cacti/Ucache.h +++ b/ext/mcpat/cacti/Ucache.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -39,9 +40,8 @@ #include "nuca.h" #include "router.h" -class min_values_t -{ - public: +class min_values_t { +public: double min_delay; double min_dyn; double min_leakage; @@ -58,17 +58,16 @@ class min_values_t -struct solution -{ - int tag_array_index; - int data_array_index; - list::iterator tag_array_iter; - list::iterator data_array_iter; - double access_time; - double cycle_time; - double area; - double efficiency; - powerDef total_power; +struct solution { + int tag_array_index; + int data_array_index; + list::iterator tag_array_iter; + list::iterator data_array_iter; + double access_time; + double cycle_time; + double area; + double efficiency; + powerDef total_power; }; @@ -94,20 +93,19 @@ void solve(uca_org_t *fin_res); void init_tech_params(double tech, bool is_tag); -struct calc_time_mt_wrapper_struct -{ - uint32_t tid; - bool is_tag; - bool pure_ram; - bool pure_cam; - bool is_main_mem; - double Nspd_min; +struct calc_time_mt_wrapper_struct { + uint32_t tid; + bool is_tag; + bool pure_ram; + bool pure_cam; + bool is_main_mem; + double Nspd_min; - min_values_t * data_res; - min_values_t * tag_res; + min_values_t * data_res; + min_values_t * tag_res; - list data_arr; - list tag_arr; + list data_arr; + list tag_arr; }; void *calc_time_mt_wrapper(void * void_obj); diff --git a/ext/mcpat/cacti/arbiter.cc b/ext/mcpat/cacti/arbiter.cc index 6664abf13..8106d2025 100644 --- a/ext/mcpat/cacti/arbiter.cc +++ b/ext/mcpat/cacti/arbiter.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -36,95 +37,107 @@ Arbiter::Arbiter( double flit_size_, double output_len, TechnologyParameter::DeviceType *dt - ):R(n_req), flit_size(flit_size_), - o_len (output_len), deviceType(dt) -{ - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; - Vdd = dt->Vdd; - double technology = g_ip->F_sz_um; - NTn1 = 13.5*technology/2; - PTn1 = 76*technology/2; - NTn2 = 13.5*technology/2; - PTn2 = 76*technology/2; - NTi = 12.5*technology/2; - PTi = 25*technology/2; - NTtr = 10*technology/2; /*Transmission gate's nmos tr. length*/ - PTtr = 20*technology/2; /* pmos tr. length*/ + ): R(n_req), flit_size(flit_size_), + o_len (output_len), deviceType(dt) { + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + Vdd = dt->Vdd; + double technology = g_ip->F_sz_um; + NTn1 = 13.5 * technology / 2; + PTn1 = 76 * technology / 2; + NTn2 = 13.5 * technology / 2; + PTn2 = 76 * technology / 2; + NTi = 12.5 * technology / 2; + PTi = 25 * technology / 2; + NTtr = 10 * technology / 2; /*Transmission gate's nmos tr. length*/ + PTtr = 20 * technology / 2; /* pmos tr. length*/ } -Arbiter::~Arbiter(){} +Arbiter::~Arbiter() {} double Arbiter::arb_req() { - double temp = ((R-1)*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)) + 2*gate_C(NTn2, 0) + - gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) + - drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def)); - return temp; + double temp = ((R - 1) * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0)) + 2 * + gate_C(NTn2, 0) + + gate_C(PTn2, 0) + gate_C(NTi, 0) + gate_C(PTi, 0) + + drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def)); + return temp; } double Arbiter::arb_pri() { - double temp = 2*(2*gate_C(NTn1, 0)+gate_C(PTn1, 0)); /* switching capacitance - of flip-flop is ignored */ - return temp; + /* switching capacitance of flip-flop is ignored */ + double temp = 2 * (2 * gate_C(NTn1, 0) + gate_C(PTn1, 0)); + return temp; } double Arbiter::arb_grant() { - double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline(); - return temp; + double temp = drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 + + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + crossbar_ctrline(); + return temp; } double Arbiter::arb_int() { - double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def)*2 + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + - 2*gate_C(NTn2, 0) + gate_C(PTn2, 0)); - return temp; + double temp = (drain_C_(NTn1, 0, 1, 1, g_tp.cell_h_def) * 2 + + drain_C_(PTn1, 1, 1, 1, g_tp.cell_h_def) + + 2 * gate_C(NTn2, 0) + gate_C(PTn2, 0)); + return temp; } void Arbiter::compute_power() { - power.readOp.dynamic = (R*arb_req()*Vdd*Vdd/2 + R*arb_pri()*Vdd*Vdd/2 + - arb_grant()*Vdd*Vdd + arb_int()*0.5*Vdd*Vdd); - double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor); - double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor); - double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv); - double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn1*2, min_w_pmos * PTn1*2, 2, nor); - double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTn2*R, min_w_pmos * PTn2*R, 2, nor); - double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_*NTi, min_w_pmos * PTi, 1, inv); - power.readOp.leakage = (nor1_leak + nor2_leak + not_leak)*Vdd; //FIXME include priority table leakage - power.readOp.gate_leakage = nor1_leak_gate*Vdd + nor2_leak_gate*Vdd + not_leak_gate*Vdd; + power.readOp.dynamic = (R * arb_req() * Vdd * Vdd / 2 + R * arb_pri() * + Vdd * Vdd / 2 + + arb_grant() * Vdd * Vdd + arb_int() * 0.5 * Vdd * + Vdd); + double nor1_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn1 * 2, + min_w_pmos * PTn1 * 2, 2, nor); + double nor2_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTn2 * R, + min_w_pmos * PTn2 * R, 2, nor); + double not_leak = cmos_Isub_leakage(g_tp.min_w_nmos_ * NTi, + min_w_pmos * PTi, 1, inv); + double nor1_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn1 * 2, + min_w_pmos * PTn1 * 2, 2, nor); + double nor2_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTn2 * R, + min_w_pmos * PTn2 * R, 2, nor); + double not_leak_gate = cmos_Ig_leakage(g_tp.min_w_nmos_ * NTi, + min_w_pmos * PTi, 1, inv); + //FIXME include priority table leakage + power.readOp.leakage = (nor1_leak + nor2_leak + not_leak) * Vdd; + power.readOp.gate_leakage = nor1_leak_gate * Vdd + nor2_leak_gate * Vdd + + not_leak_gate * Vdd; } double //wire cap with triple spacing Arbiter::Cw3(double length) { - Wire wc(g_ip->wt, length, 1, 3, 3); - double temp = (wc.wire_cap(length,true)); - return temp; + Wire wc(g_ip->wt, length, 1, 3, 3); + double temp = (wc.wire_cap(length, true)); + return temp; } double Arbiter::crossbar_ctrline() { - double temp = (Cw3(o_len * 1e-6 /* m */) + - drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) + - gate_C(NTi, 0) + gate_C(PTi, 0)); - return temp; + double temp = (Cw3(o_len * 1e-6 /* m */) + + drain_C_(NTi, 0, 1, 1, g_tp.cell_h_def) + drain_C_(PTi, 1, 1, 1, g_tp.cell_h_def) + + gate_C(NTi, 0) + gate_C(PTi, 0)); + return temp; } double Arbiter::transmission_buf_ctrcap() { - double temp = gate_C(NTtr, 0)+gate_C(PTtr, 0); - return temp; + double temp = gate_C(NTtr, 0) + gate_C(PTtr, 0); + return temp; } -void Arbiter::print_arbiter() -{ - cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n"; - cout << "Flit size : " << flit_size << " bits" << endl; - cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl; - cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl; +void Arbiter::print_arbiter() { + cout << "\nArbiter Stats (" << R << " input arbiter" << ")\n\n"; + cout << "Flit size : " << flit_size << " bits" << endl; + cout << "Dynamic Power : " << power.readOp.dynamic*1e9 << " (nJ)" << endl; + cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl; } diff --git a/ext/mcpat/cacti/bank.cc b/ext/mcpat/cacti/bank.cc old mode 100755 new mode 100644 index a18c7f1ed..b4fd95090 --- a/ext/mcpat/cacti/bank.cc +++ b/ext/mcpat/cacti/bank.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -36,163 +37,174 @@ #include "bank.h" Bank::Bank(const DynamicParameter & dyn_p): - dp(dyn_p), mat(dp), - num_addr_b_mat(dyn_p.number_addr_bits_mat), - num_mats_hor_dir(dyn_p.num_mats_h_dir), num_mats_ver_dir(dyn_p.num_mats_v_dir) -{ - int RWP; - int ERP; - int EWP; - int SCHP; - - if (dp.use_inp_params) - { - RWP = dp.num_rw_ports; - ERP = dp.num_rd_ports; - EWP = dp.num_wr_ports; - SCHP = dp.num_search_ports; - } - else - { - RWP = g_ip->num_rw_ports; - ERP = g_ip->num_rd_ports; - EWP = g_ip->num_wr_ports; - SCHP = g_ip->num_search_ports; - } - - int total_addrbits = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP); - int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP); - int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP); - int searchinbits; - int searchoutbits; - - if (dp.fully_assoc || dp.pure_cam) - { - datainbits = dp.num_di_b_bank_per_port * (RWP + EWP); - dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP); - searchinbits = dp.num_si_b_bank_per_port * SCHP; - searchoutbits = dp.num_so_b_bank_per_port * SCHP; - } - - if (!(dp.fully_assoc || dp.pure_cam)) - { - if (g_ip->fast_access && dp.is_tag == false) - { - dataoutbits *= g_ip->data_assoc; + dp(dyn_p), mat(dp), + num_addr_b_mat(dyn_p.number_addr_bits_mat), + num_mats_hor_dir(dyn_p.num_mats_h_dir), + num_mats_ver_dir(dyn_p.num_mats_v_dir) { + int RWP; + int ERP; + int EWP; + int SCHP; + + if (dp.use_inp_params) { + RWP = dp.num_rw_ports; + ERP = dp.num_rd_ports; + EWP = dp.num_wr_ports; + SCHP = dp.num_search_ports; + } else { + RWP = g_ip->num_rw_ports; + ERP = g_ip->num_rd_ports; + EWP = g_ip->num_wr_ports; + SCHP = g_ip->num_search_ports; } - htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree); - htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree); - htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); + int total_addrbits = (dp.number_addr_bits_mat + + dp.number_subbanks_decode) * (RWP + ERP + EWP); + int datainbits = dp.num_di_b_bank_per_port * (RWP + EWP); + int dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP); + int searchinbits; + int searchoutbits; + + if (dp.fully_assoc || dp.pure_cam) { + datainbits = dp.num_di_b_bank_per_port * (RWP + EWP); + dataoutbits = dp.num_do_b_bank_per_port * (RWP + ERP); + searchinbits = dp.num_si_b_bank_per_port * SCHP; + searchoutbits = dp.num_so_b_bank_per_port * SCHP; + } + + if (!(dp.fully_assoc || dp.pure_cam)) { + if (g_ip->fast_access && dp.is_tag == false) { + dataoutbits *= g_ip->data_assoc; + } + + htree_in_add = new Htree2(g_ip->wt, (double) mat.area.w, + (double)mat.area.h, + total_addrbits, datainbits, 0, dataoutbits, + 0, num_mats_ver_dir * 2, num_mats_hor_dir * 2, + Add_htree); + htree_in_data = new Htree2(g_ip->wt, (double) mat.area.w, + (double)mat.area.h, + total_addrbits, datainbits, 0, dataoutbits, + 0, num_mats_ver_dir * 2, num_mats_hor_dir * 2, + Data_in_htree); + htree_out_data = new Htree2(g_ip->wt, (double) mat.area.w, + (double)mat.area.h, + total_addrbits, datainbits, 0, dataoutbits, + 0, num_mats_ver_dir * 2, + num_mats_hor_dir * 2, Data_out_htree); // htree_out_data = new Htree2 (g_ip->wt,(double) 100, (double)100, -// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); - - area.w = htree_in_data->area.w; - area.h = htree_in_data->area.h; - } - else - { - htree_in_add = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits, searchinbits,dataoutbits,searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Add_htree); - htree_in_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree); - htree_out_data = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); - htree_in_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_in_htree,true, true); - htree_out_search = new Htree2 (g_ip->wt,(double) mat.area.w, (double)mat.area.h, - total_addrbits, datainbits,searchinbits, dataoutbits, searchoutbits,num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree,true); - - area.w = htree_in_data->area.w; - area.h = htree_in_data->area.h; - } - - num_addr_b_row_dec = _log2(mat.subarray.num_rows); - num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec; - num_addr_b_routed_to_mat_for_rd_or_wr = num_addr_b_mat - num_addr_b_row_dec; +// total_addrbits, datainbits, 0,dataoutbits,0, num_mats_ver_dir*2, num_mats_hor_dir*2, Data_out_htree); + + area.w = htree_in_data->area.w; + area.h = htree_in_data->area.h; + } else { + htree_in_add = + new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, searchinbits, dataoutbits, + searchoutbits, num_mats_ver_dir * 2, + num_mats_hor_dir * 2, Add_htree); + htree_in_data = + new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, searchinbits, dataoutbits, + searchoutbits, num_mats_ver_dir * 2, + num_mats_hor_dir * 2, Data_in_htree); + htree_out_data = + new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, searchinbits, dataoutbits, + searchoutbits, num_mats_ver_dir * 2, + num_mats_hor_dir * 2, Data_out_htree); + htree_in_search = + new Htree2(g_ip->wt, (double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, searchinbits, dataoutbits, + searchoutbits, num_mats_ver_dir * 2, + num_mats_hor_dir * 2, Data_in_htree, true, true); + htree_out_search = + new Htree2 (g_ip->wt, (double) mat.area.w, (double)mat.area.h, + total_addrbits, datainbits, searchinbits, dataoutbits, + searchoutbits, num_mats_ver_dir * 2, + num_mats_hor_dir * 2, Data_out_htree, true); + + area.w = htree_in_data->area.w; + area.h = htree_in_data->area.h; + } + + num_addr_b_row_dec = _log2(mat.subarray.num_rows); + num_addr_b_routed_to_mat_for_act = num_addr_b_row_dec; + num_addr_b_routed_to_mat_for_rd_or_wr = + num_addr_b_mat - num_addr_b_row_dec; } -Bank::~Bank() -{ - delete htree_in_add; - delete htree_out_data; - delete htree_in_data; - if (dp.fully_assoc || dp.pure_cam) - { - delete htree_in_search; - delete htree_out_search; - } +Bank::~Bank() { + delete htree_in_add; + delete htree_out_data; + delete htree_in_data; + if (dp.fully_assoc || dp.pure_cam) { + delete htree_in_search; + delete htree_out_search; + } } -double Bank::compute_delays(double inrisetime) -{ - return mat.compute_delays(inrisetime); +double Bank::compute_delays(double inrisetime) { + return mat.compute_delays(inrisetime); } -void Bank::compute_power_energy() -{ - mat.compute_power_energy(); +void Bank::compute_power_energy() { + mat.compute_power_energy(); - if (!(dp.fully_assoc || dp.pure_cam)) - { - power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir; - power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats; - power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats; + if (!(dp.fully_assoc || dp.pure_cam)) { + power.readOp.dynamic += mat.power.readOp.dynamic * dp.num_act_mats_hor_dir; + power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats; + power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats; - power.readOp.dynamic += htree_in_add->power.readOp.dynamic; - power.readOp.dynamic += htree_out_data->power.readOp.dynamic; + power.readOp.dynamic += htree_in_add->power.readOp.dynamic; + power.readOp.dynamic += htree_out_data->power.readOp.dynamic; - power.readOp.leakage += htree_in_add->power.readOp.leakage; - power.readOp.leakage += htree_in_data->power.readOp.leakage; - power.readOp.leakage += htree_out_data->power.readOp.leakage; - power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage; - } - else - { + power.readOp.leakage += htree_in_add->power.readOp.leakage; + power.readOp.leakage += htree_in_data->power.readOp.leakage; + power.readOp.leakage += htree_out_data->power.readOp.leakage; + power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage; + } else { - power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w - power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats; - power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats; + power.readOp.dynamic += mat.power.readOp.dynamic ;//for fa and cam num_act_mats_hor_dir is 1 for plain r/w + power.readOp.leakage += mat.power.readOp.leakage * dp.num_mats; + power.readOp.gate_leakage += mat.power.readOp.gate_leakage * dp.num_mats; - power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats; - power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic + - mat.power_sa.searchOp.dynamic + - mat.power_bitline.searchOp.dynamic + - mat.power_subarray_out_drv.searchOp.dynamic+ - mat.ml_to_ram_wl_drv->power.readOp.dynamic; + power.searchOp.dynamic += mat.power.searchOp.dynamic * dp.num_mats; + power.searchOp.dynamic += mat.power_bl_precharge_eq_drv.searchOp.dynamic + + mat.power_sa.searchOp.dynamic + + mat.power_bitline.searchOp.dynamic + + mat.power_subarray_out_drv.searchOp.dynamic + + mat.ml_to_ram_wl_drv->power.readOp.dynamic; - power.readOp.dynamic += htree_in_add->power.readOp.dynamic; - power.readOp.dynamic += htree_out_data->power.readOp.dynamic; + power.readOp.dynamic += htree_in_add->power.readOp.dynamic; + power.readOp.dynamic += htree_out_data->power.readOp.dynamic; - power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic; - power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic; + power.searchOp.dynamic += htree_in_search->power.searchOp.dynamic; + power.searchOp.dynamic += htree_out_search->power.searchOp.dynamic; - power.readOp.leakage += htree_in_add->power.readOp.leakage; - power.readOp.leakage += htree_in_data->power.readOp.leakage; - power.readOp.leakage += htree_out_data->power.readOp.leakage; - power.readOp.leakage += htree_in_search->power.readOp.leakage; - power.readOp.leakage += htree_out_search->power.readOp.leakage; + power.readOp.leakage += htree_in_add->power.readOp.leakage; + power.readOp.leakage += htree_in_data->power.readOp.leakage; + power.readOp.leakage += htree_out_data->power.readOp.leakage; + power.readOp.leakage += htree_in_search->power.readOp.leakage; + power.readOp.leakage += htree_out_search->power.readOp.leakage; - power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage; - power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_in_data->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_out_data->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage; + power.readOp.gate_leakage += htree_out_search->power.readOp.gate_leakage; - } + } } diff --git a/ext/mcpat/cacti/bank.h b/ext/mcpat/cacti/bank.h index 153609ab0..49151f050 100755 --- a/ext/mcpat/cacti/bank.h +++ b/ext/mcpat/cacti/bank.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -39,9 +40,8 @@ #include "htree2.h" #include "mat.h" -class Bank : public Component -{ - public: +class Bank : public Component { +public: Bank(const DynamicParameter & dyn_p); ~Bank(); double compute_delays(double inrisetime); // return outrisetime diff --git a/ext/mcpat/cacti/basic_circuit.cc b/ext/mcpat/cacti/basic_circuit.cc index 6efd5dd27..00ea3ce9d 100644 --- a/ext/mcpat/cacti/basic_circuit.cc +++ b/ext/mcpat/cacti/basic_circuit.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -39,59 +40,48 @@ #include "basic_circuit.h" #include "parameter.h" -uint32_t _log2(uint64_t num) -{ - uint32_t log2 = 0; +uint32_t _log2(uint64_t num) { + uint32_t log2 = 0; - if (num == 0) - { - std::cerr << "log0?" << std::endl; - exit(1); - } + if (num == 0) { + std::cerr << "log0?" << std::endl; + exit(1); + } - while (num > 1) - { - num = (num >> 1); - log2++; - } + while (num > 1) { + num = (num >> 1); + log2++; + } - return log2; + return log2; } -bool is_pow2(int64_t val) -{ - if (val <= 0) - { - return false; - } - else if (val == 1) - { - return true; - } - else - { - return (_log2(val) != _log2(val-1)); - } +bool is_pow2(int64_t val) { + if (val <= 0) { + return false; + } else if (val == 1) { + return true; + } else { + return (_log2(val) != _log2(val - 1)); + } } -int powers (int base, int n) -{ - int i, p; +int powers (int base, int n) { + int i, p; - p = 1; - for (i = 1; i <= n; ++i) - p *= base; - return p; + p = 1; + for (i = 1; i <= n; ++i) + p *= base; + return p; } /*----------------------------------------------------------------------*/ -double logtwo (double x) -{ - assert(x > 0); - return ((double) (log (x) / log (2.0))); +double logtwo (double x) { + assert(x > 0); + return ((double) (log (x) / log (2.0))); } /*----------------------------------------------------------------------*/ @@ -102,28 +92,20 @@ double gate_C( double wirelength, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - const TechnologyParameter::DeviceType * dt; - - if (_is_dram && _is_cell) - { - dt = &g_tp.dram_acc; //DRAM cell access transistor - } - else if (_is_dram && _is_wl_tr) - { - dt = &g_tp.dram_wl; //DRAM wordline transistor - } - else if (!_is_dram && _is_cell) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else - { - dt = &g_tp.peri_global; - } - - return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire; + bool _is_wl_tr) { + const TechnologyParameter::DeviceType * dt; + + if (_is_dram && _is_cell) { + dt = &g_tp.dram_acc; //DRAM cell access transistor + } else if (_is_dram && _is_wl_tr) { + dt = &g_tp.dram_wl; //DRAM wordline transistor + } else if (!_is_dram && _is_cell) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else { + dt = &g_tp.peri_global; + } + + return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire; } @@ -134,29 +116,21 @@ double gate_C_pass( double wirelength, // poly wire length going to gate in lambda bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - // v5.0 - const TechnologyParameter::DeviceType * dt; - - if ((_is_dram) && (_is_cell)) - { - dt = &g_tp.dram_acc; //DRAM cell access transistor - } - else if ((_is_dram) && (_is_wl_tr)) - { - dt = &g_tp.dram_wl; //DRAM wordline transistor - } - else if ((!_is_dram) && _is_cell) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else - { - dt = &g_tp.peri_global; - } - - return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire; + bool _is_wl_tr) { + // v5.0 + const TechnologyParameter::DeviceType * dt; + + if ((_is_dram) && (_is_cell)) { + dt = &g_tp.dram_acc; //DRAM cell access transistor + } else if ((_is_dram) && (_is_wl_tr)) { + dt = &g_tp.dram_wl; //DRAM wordline transistor + } else if ((!_is_dram) && _is_cell) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else { + dt = &g_tp.peri_global; + } + + return (dt->C_g_ideal + dt->C_overlap + 3*dt->C_fringe)*width + dt->l_phy*Cpolywire; } @@ -169,83 +143,67 @@ double drain_C_( double fold_dimension, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - double w_folded_tr; - const TechnologyParameter::DeviceType * dt; - - if ((_is_dram) && (_is_cell)) - { - dt = &g_tp.dram_acc; // DRAM cell access transistor - } - else if ((_is_dram) && (_is_wl_tr)) - { - dt = &g_tp.dram_wl; // DRAM wordline transistor - } - else if ((!_is_dram) && _is_cell) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else - { - dt = &g_tp.peri_global; - } - - double c_junc_area = dt->C_junc; - double c_junc_sidewall = dt->C_junc_sidewall; - double c_fringe = 2*dt->C_fringe; - double c_overlap = 2*dt->C_overlap; - double drain_C_metal_connecting_folded_tr = 0; - - // determine the width of the transistor after folding (if it is getting folded) - if (next_arg_thresh_folding_width_or_height_cell == 0) - { // interpret fold_dimension as the the folding width threshold - // i.e. the value of transistor width above which the transistor gets folded - w_folded_tr = fold_dimension; - } - else - { // interpret fold_dimension as the height of the cell that this transistor is part of. - double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL; - // TODO : w_folded_tr must come from Component::compute_gate_area() - double ratio_p_to_n = 2.0 / (2.0 + 1.0); - if (nchannel) - { - w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS); + bool _is_wl_tr) { + double w_folded_tr; + const TechnologyParameter::DeviceType * dt; + + if ((_is_dram) && (_is_cell)) { + dt = &g_tp.dram_acc; // DRAM cell access transistor + } else if ((_is_dram) && (_is_wl_tr)) { + dt = &g_tp.dram_wl; // DRAM wordline transistor + } else if ((!_is_dram) && _is_cell) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else { + dt = &g_tp.peri_global; + } + + double c_junc_area = dt->C_junc; + double c_junc_sidewall = dt->C_junc_sidewall; + double c_fringe = 2 * dt->C_fringe; + double c_overlap = 2 * dt->C_overlap; + double drain_C_metal_connecting_folded_tr = 0; + + // determine the width of the transistor after folding (if it is getting folded) + if (next_arg_thresh_folding_width_or_height_cell == 0) { + // interpret fold_dimension as the the folding width threshold + // i.e. the value of transistor width above which the transistor gets folded + w_folded_tr = fold_dimension; + } else { // interpret fold_dimension as the height of the cell that this transistor is part of. + double h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL; + // TODO : w_folded_tr must come from Component::compute_gate_area() + double ratio_p_to_n = 2.0 / (2.0 + 1.0); + if (nchannel) { + w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS); + } else { + w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS); + } } - else - { - w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS); + int num_folded_tr = (int) (ceil(width / w_folded_tr)); + + if (num_folded_tr < 2) { + w_folded_tr = width; } - } - int num_folded_tr = (int) (ceil(width / w_folded_tr)); - - if (num_folded_tr < 2) - { - w_folded_tr = width; - } - - double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain - (stack - 1) * g_tp.spacing_poly_to_poly; - double drain_h_for_sidewall = w_folded_tr; - double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1); - if (num_folded_tr > 1) - { - total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + - (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly); - - if (num_folded_tr%2 == 0) - { - drain_h_for_sidewall = 0; + + double total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + // only for drain + (stack - 1) * g_tp.spacing_poly_to_poly; + double drain_h_for_sidewall = w_folded_tr; + double total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1); + if (num_folded_tr > 1) { + total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + + (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly); + + if (num_folded_tr % 2 == 0) { + drain_h_for_sidewall = 0; + } + total_drain_height_for_cap_wrt_gate *= num_folded_tr; + drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w; } - total_drain_height_for_cap_wrt_gate *= num_folded_tr; - drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w; - } - double drain_C_area = c_junc_area * total_drain_w * w_folded_tr; - double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w); - double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate; + double drain_C_area = c_junc_area * total_drain_w * w_folded_tr; + double drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w); + double drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate; - return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr); + return (drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr); } @@ -255,29 +213,21 @@ double tr_R_on( int stack, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - const TechnologyParameter::DeviceType * dt; - - if ((_is_dram) && (_is_cell)) - { - dt = &g_tp.dram_acc; //DRAM cell access transistor - } - else if ((_is_dram) && (_is_wl_tr)) - { - dt = &g_tp.dram_wl; //DRAM wordline transistor - } - else if ((!_is_dram) && _is_cell) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else - { - dt = &g_tp.peri_global; - } - - double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on; - return (stack * restrans / width); + bool _is_wl_tr) { + const TechnologyParameter::DeviceType * dt; + + if ((_is_dram) && (_is_cell)) { + dt = &g_tp.dram_acc; //DRAM cell access transistor + } else if ((_is_dram) && (_is_wl_tr)) { + dt = &g_tp.dram_wl; //DRAM wordline transistor + } else if ((!_is_dram) && _is_cell) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else { + dt = &g_tp.peri_global; + } + + double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on; + return (stack * restrans / width); } @@ -291,46 +241,34 @@ double R_to_w( int nchannel, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - const TechnologyParameter::DeviceType * dt; - - if ((_is_dram) && (_is_cell)) - { - dt = &g_tp.dram_acc; //DRAM cell access transistor - } - else if ((_is_dram) && (_is_wl_tr)) - { - dt = &g_tp.dram_wl; //DRAM wordline transistor - } - else if ((!_is_dram) && (_is_cell)) - { - dt = &g_tp.sram_cell; // SRAM cell access transistor - } - else - { - dt = &g_tp.peri_global; - } - - double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on; - return (restrans / res); + bool _is_wl_tr) { + const TechnologyParameter::DeviceType * dt; + + if ((_is_dram) && (_is_cell)) { + dt = &g_tp.dram_acc; //DRAM cell access transistor + } else if ((_is_dram) && (_is_wl_tr)) { + dt = &g_tp.dram_wl; //DRAM wordline transistor + } else if ((!_is_dram) && (_is_cell)) { + dt = &g_tp.sram_cell; // SRAM cell access transistor + } else { + dt = &g_tp.peri_global; + } + + double restrans = (nchannel) ? dt->R_nch_on : dt->R_pch_on; + return (restrans / res); } double pmos_to_nmos_sz_ratio( bool _is_dram, - bool _is_wl_tr) -{ - double p_to_n_sizing_ratio; - if ((_is_dram) && (_is_wl_tr)) - { //DRAM wordline transistor - p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio; - } - else - { //DRAM or SRAM all other transistors - p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio; - } - return p_to_n_sizing_ratio; + bool _is_wl_tr) { + double p_to_n_sizing_ratio; + if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor + p_to_n_sizing_ratio = g_tp.dram_wl.n_to_p_eff_curr_drv_ratio; + } else { //DRAM or SRAM all other transistors + p_to_n_sizing_ratio = g_tp.peri_global.n_to_p_eff_curr_drv_ratio; + } + return p_to_n_sizing_ratio; } @@ -340,26 +278,23 @@ double horowitz( double tf, // time constant of gate double vs1, // threshold voltage double vs2, // threshold voltage - int rise) // whether input rises or fall -{ - if (inputramptime == 0 && vs1 == vs2) - { - return tf * (vs1 < 1 ? -log(vs1) : log(vs1)); - } - double a, b, td; - - a = inputramptime / tf; - if (rise == RISE) - { - b = 0.5; - td = tf * sqrt(log(vs1)*log(vs1) + 2*a*b*(1.0 - vs1)) + tf*(log(vs1) - log(vs2)); - } - else - { - b = 0.4; - td = tf * sqrt(log(1.0 - vs1)*log(1.0 - vs1) + 2*a*b*(vs1)) + tf*(log(1.0 - vs1) - log(1.0 - vs2)); - } - return (td); + int rise) { // whether input rises or fall + if (inputramptime == 0 && vs1 == vs2) { + return tf * (vs1 < 1 ? -log(vs1) : log(vs1)); + } + double a, b, td; + + a = inputramptime / tf; + if (rise == RISE) { + b = 0.5; + td = tf * sqrt(log(vs1) * log(vs1) + 2 * a * b * (1.0 - vs1)) + + tf * (log(vs1) - log(vs2)); + } else { + b = 0.4; + td = tf * sqrt(log(1.0 - vs1) * log(1.0 - vs1) + 2 * a * b * (vs1)) + + tf * (log(1.0 - vs1) - log(1.0 - vs2)); + } + return (td); } double cmos_Ileak( @@ -367,23 +302,17 @@ double cmos_Ileak( double pWidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - TechnologyParameter::DeviceType * dt; - - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor - dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor - dt = &(g_tp.dram_wl); - } - else - { //DRAM or SRAM all other transistors - dt = &(g_tp.peri_global); - } - return nWidth*dt->I_off_n + pWidth*dt->I_off_p; + bool _is_wl_tr) { + TechnologyParameter::DeviceType * dt; + + if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } else { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return nWidth*dt->I_off_n + pWidth*dt->I_off_p; } @@ -391,107 +320,81 @@ double simplified_nmos_leakage( double nwidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - TechnologyParameter::DeviceType * dt; - - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor - dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor - dt = &(g_tp.dram_wl); - } - else - { //DRAM or SRAM all other transistors - dt = &(g_tp.peri_global); - } - return nwidth * dt->I_off_n; + bool _is_wl_tr) { + TechnologyParameter::DeviceType * dt; + + if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } else { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return nwidth * dt->I_off_n; } -int factorial(int n, int m) -{ - int fa = m, i; - for (i=m+1; i<=n; i++) - fa *=i; - return fa; +int factorial(int n, int m) { + int fa = m, i; + for (i = m + 1; i <= n; i++) + fa *= i; + return fa; } -int combination(int n, int m) -{ - int ret; - ret = factorial(n, m+1) / factorial(n - m); - return ret; +int combination(int n, int m) { + int ret; + ret = factorial(n, m + 1) / factorial(n - m); + return ret; } double simplified_pmos_leakage( double pwidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - TechnologyParameter::DeviceType * dt; - - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor - dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor - dt = &(g_tp.dram_wl); - } - else - { //DRAM or SRAM all other transistors - dt = &(g_tp.peri_global); - } - return pwidth * dt->I_off_p; + bool _is_wl_tr) { + TechnologyParameter::DeviceType * dt; + + if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } else { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return pwidth * dt->I_off_p; } double cmos_Ig_n( double nWidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - TechnologyParameter::DeviceType * dt; - - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor - dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor - dt = &(g_tp.dram_wl); - } - else - { //DRAM or SRAM all other transistors - dt = &(g_tp.peri_global); - } - return nWidth*dt->I_g_on_n; + bool _is_wl_tr) { + TechnologyParameter::DeviceType * dt; + + if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } else { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return nWidth*dt->I_g_on_n; } double cmos_Ig_p( double pWidth, bool _is_dram, bool _is_cell, - bool _is_wl_tr) -{ - TechnologyParameter::DeviceType * dt; - - if ((!_is_dram)&&(_is_cell)) - { //SRAM cell access transistor - dt = &(g_tp.sram_cell); - } - else if ((_is_dram)&&(_is_wl_tr)) - { //DRAM wordline transistor - dt = &(g_tp.dram_wl); - } - else - { //DRAM or SRAM all other transistors - dt = &(g_tp.peri_global); - } - return pWidth*dt->I_g_on_p; + bool _is_wl_tr) { + TechnologyParameter::DeviceType * dt; + + if ((!_is_dram) && (_is_cell)) { //SRAM cell access transistor + dt = &(g_tp.sram_cell); + } else if ((_is_dram) && (_is_wl_tr)) { //DRAM wordline transistor + dt = &(g_tp.dram_wl); + } else { //DRAM or SRAM all other transistors + dt = &(g_tp.peri_global); + } + return pWidth*dt->I_g_on_p; } double cmos_Isub_leakage( @@ -502,98 +405,93 @@ double cmos_Isub_leakage( bool _is_dram, bool _is_cell, bool _is_wl_tr, - enum Half_net_topology topo) -{ - assert (fanin>=1); - double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr); - double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr); - double Isub=0; + enum Half_net_topology topo) { + assert (fanin >= 1); + double nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr); + double pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr); + double Isub = 0; int num_states; int num_off_tx; num_states = int(pow(2.0, fanin)); - switch (g_type) - { + switch (g_type) { case nmos: - if (fanin==1) - { - Isub = nmos_leak/num_states; - } - else - { - if (topo==parallel) - { - Isub=nmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states - } - else - { - for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power - { - //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); - Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); - } - Isub /=num_states; + if (fanin == 1) { + Isub = nmos_leak / num_states; + } else { + if (topo == parallel) { + //only when all tx are off, leakage power is non-zero. + //The possibility of this state is 1/num_states + Isub = nmos_leak * fanin / num_states; + } else { + for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) { + //when num_off_tx ==0 there is no leakage power + Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR, + (num_off_tx - 1)) * + combination(fanin, num_off_tx); } + Isub /= num_states; + } } break; case pmos: - if (fanin==1) - { - Isub = pmos_leak/num_states; - } - else - { - if (topo==parallel) - { - Isub=pmos_leak*fanin/num_states; //only when all tx are off, leakage power is non-zero. The possibility of this state is 1/num_states - } - else - { - for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) //when num_off_tx ==0 there is no leakage power - { - //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); - Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); - } - Isub /=num_states; + if (fanin == 1) { + Isub = pmos_leak / num_states; + } else { + if (topo == parallel) { + //only when all tx are off, leakage power is non-zero. + //The possibility of this state is 1/num_states + Isub = pmos_leak * fanin / num_states; + } else { + for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) { + //when num_off_tx ==0 there is no leakage power + Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR, + (num_off_tx - 1)) * + combination(fanin, num_off_tx); } + Isub /= num_states; + } } break; case inv: - Isub = (nmos_leak + pmos_leak)/2; + Isub = (nmos_leak + pmos_leak) / 2; break; case nand: - Isub += fanin*pmos_leak;//the pullup network - for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pulldown network - { - //Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); - Isub += nmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); + Isub += fanin * pmos_leak;//the pullup network + for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) { + // the pulldown network + Isub += nmos_leak * pow(UNI_LEAK_STACK_FACTOR, + (num_off_tx - 1)) * + combination(fanin, num_off_tx); } - Isub /=num_states; + Isub /= num_states; break; case nor: - for (num_off_tx=1; num_off_tx<=fanin; num_off_tx++) // the pullup network - { - //Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*(factorial(fanin)/(factorial(fanin, num_off_tx)*factorial(num_off_tx))); - Isub += pmos_leak*pow(UNI_LEAK_STACK_FACTOR,(num_off_tx-1))*combination(fanin, num_off_tx); + for (num_off_tx = 1; num_off_tx <= fanin; num_off_tx++) { + // the pullup network + Isub += pmos_leak * pow(UNI_LEAK_STACK_FACTOR, + (num_off_tx - 1)) * + combination(fanin, num_off_tx); } - Isub += fanin*nmos_leak;//the pulldown network - Isub /=num_states; + Isub += fanin * nmos_leak;//the pulldown network + Isub /= num_states; break; case tri: - Isub += (nmos_leak + pmos_leak)/2;//enabled - Isub += nmos_leak*UNI_LEAK_STACK_FACTOR; //disabled upper bound of leakage power - Isub /=2; + Isub += (nmos_leak + pmos_leak) / 2;//enabled + //disabled upper bound of leakage power + Isub += nmos_leak * UNI_LEAK_STACK_FACTOR; + Isub /= 2; break; case tg: - Isub = (nmos_leak + pmos_leak)/2; + Isub = (nmos_leak + pmos_leak) / 2; break; default: assert(0); break; - } + } return Isub; } @@ -607,120 +505,116 @@ double cmos_Ig_leakage( bool _is_dram, bool _is_cell, bool _is_wl_tr, - enum Half_net_topology topo) -{ - assert (fanin>=1); - double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr); - double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr); - double Ig_on=0; - int num_states; - int num_on_tx; - - num_states = int(pow(2.0, fanin)); - - switch (g_type) - { - case nmos: - if (fanin==1) - { - Ig_on = nmos_leak/num_states; - } - else - { - if (topo==parallel) - { - for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++) - { - Ig_on += nmos_leak*combination(fanin, num_on_tx)*num_on_tx; - } - } - else - { - Ig_on += nmos_leak * fanin;//pull down network when all TXs are on. - //num_on_tx is the number of on tx - for (num_on_tx=1; num_on_tx= 1); + double nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr); + double pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr); + double Ig_on = 0; + int num_states; + int num_on_tx; + + num_states = int(pow(2.0, fanin)); + + switch (g_type) { + case nmos: + if (fanin == 1) { + Ig_on = nmos_leak / num_states; + } else { + if (topo == parallel) { + for (num_on_tx = 1; num_on_tx <= fanin; num_on_tx++) { + Ig_on += nmos_leak * combination(fanin, num_on_tx) * + num_on_tx; } - else - { - if (topo==parallel) - { - for (num_on_tx=1; num_on_tx<=fanin; num_on_tx++) - { - Ig_on += pmos_leak*combination(fanin, num_on_tx)*num_on_tx; - } - } - else - { - Ig_on += pmos_leak * fanin;//pull down network when all TXs are on. - //num_on_tx is the number of on tx - for (num_on_tx=1; num_on_txNspd < m2->Nspd) return true; - else if (m1->Nspd > m2->Nspd) return false; - else if (m1->Ndwl < m2->Ndwl) return true; - else if (m1->Ndwl > m2->Ndwl) return false; - else if (m1->Ndbl < m2->Ndbl) return true; - else if (m1->Ndbl > m2->Ndbl) return false; - else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true; - else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false; - else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true; - else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false; - else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true; - else return false; +bool mem_array::lt(const mem_array * m1, const mem_array * m2) { + if (m1->Nspd < m2->Nspd) return true; + else if (m1->Nspd > m2->Nspd) return false; + else if (m1->Ndwl < m2->Ndwl) return true; + else if (m1->Ndwl > m2->Ndwl) return false; + else if (m1->Ndbl < m2->Ndbl) return true; + else if (m1->Ndbl > m2->Ndbl) return false; + else if (m1->deg_bl_muxing < m2->deg_bl_muxing) return true; + else if (m1->deg_bl_muxing > m2->deg_bl_muxing) return false; + else if (m1->Ndsam_lev_1 < m2->Ndsam_lev_1) return true; + else if (m1->Ndsam_lev_1 > m2->Ndsam_lev_1) return false; + else if (m1->Ndsam_lev_2 < m2->Ndsam_lev_2) return true; + else return false; } -void uca_org_t::find_delay() -{ - mem_array * data_arr = data_array2; - mem_array * tag_arr = tag_array2; - - // check whether it is a regular cache or scratch ram - if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc) - { - access_time = data_arr->access_time; - } - // Both tag and data lookup happen in parallel - // and the entire set is sent over the data array h-tree without - // waiting for the way-select signal --TODO add the corresponding - // power overhead Nav - else if (g_ip->fast_access == true) - { - access_time = MAX(tag_arr->access_time, data_arr->access_time); - } - // Tag is accessed first. On a hit, way-select signal along with the - // address is sent to read/write the appropriate block in the data - // array - else if (g_ip->is_seq_acc == true) - { - access_time = tag_arr->access_time + data_arr->access_time; - } - // Normal access: tag array access and data array access happen in parallel. - // But, the data array will wait for the way-select and transfer only the - // appropriate block over the h-tree. - else - { - access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder, - data_arr->delay_before_subarray_output_driver) + - data_arr->delay_from_subarray_output_driver_to_output; - } +void uca_org_t::find_delay() { + mem_array * data_arr = data_array2; + mem_array * tag_arr = tag_array2; + + // check whether it is a regular cache or scratch ram + if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) { + access_time = data_arr->access_time; + } + // Both tag and data lookup happen in parallel + // and the entire set is sent over the data array h-tree without + // waiting for the way-select signal --TODO add the corresponding + // power overhead Nav + else if (g_ip->fast_access == true) { + access_time = MAX(tag_arr->access_time, data_arr->access_time); + } + // Tag is accessed first. On a hit, way-select signal along with the + // address is sent to read/write the appropriate block in the data + // array + else if (g_ip->is_seq_acc == true) { + access_time = tag_arr->access_time + data_arr->access_time; + } + // Normal access: tag array access and data array access happen in parallel. + // But, the data array will wait for the way-select and transfer only the + // appropriate block over the h-tree. + else { + access_time = MAX(tag_arr->access_time + data_arr->delay_senseamp_mux_decoder, + data_arr->delay_before_subarray_output_driver) + + data_arr->delay_from_subarray_output_driver_to_output; + } } -void uca_org_t::find_energy() -{ - if (!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache) - power = data_array2->power + tag_array2->power; - else - power = data_array2->power; +void uca_org_t::find_energy() { + if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) + power = data_array2->power + tag_array2->power; + else + power = data_array2->power; } -void uca_org_t::find_area() -{ - if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)//(g_ip->is_cache == false) - { - cache_ht = data_array2->height; - cache_len = data_array2->width; - } - else - { - cache_ht = MAX(tag_array2->height, data_array2->height); - cache_len = tag_array2->width + data_array2->width; - } - area = cache_ht * cache_len; +void uca_org_t::find_area() { + if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) { + cache_ht = data_array2->height; + cache_len = data_array2->width; + } else { + cache_ht = MAX(tag_array2->height, data_array2->height); + cache_len = tag_array2->width + data_array2->width; + } + area = cache_ht * cache_len; } -void uca_org_t::adjust_area() -{ - double area_adjust; - if (g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc) - { - if (data_array2->area_efficiency/100.0<0.2) - { - //area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2)); - area_adjust = sqrt(0.2/(data_array2->area_efficiency/100.0)); - cache_ht = cache_ht/area_adjust; - cache_len = cache_len/area_adjust; +void uca_org_t::adjust_area() { + double area_adjust; + if (g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc) { + if (data_array2->area_efficiency / 100.0 < 0.2) { + //area_adjust = sqrt(area/(area*(data_array2->area_efficiency/100.0)/0.2)); + area_adjust = sqrt(0.2 / (data_array2->area_efficiency / 100.0)); + cache_ht = cache_ht / area_adjust; + cache_len = cache_len / area_adjust; + } } - } - area = cache_ht * cache_len; + area = cache_ht * cache_len; } -void uca_org_t::find_cyc() -{ - if ((g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc))//(g_ip->is_cache == false) - { - cycle_time = data_array2->cycle_time; - } - else - { - cycle_time = MAX(tag_array2->cycle_time, - data_array2->cycle_time); - } +void uca_org_t::find_cyc() { + if ((g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) { + cycle_time = data_array2->cycle_time; + } else { + cycle_time = MAX(tag_array2->cycle_time, + data_array2->cycle_time); + } } uca_org_t :: uca_org_t() -:tag_array2(0), - data_array2(0) -{ + : tag_array2(0), + data_array2(0) { } -void uca_org_t :: cleanup() -{ - if (data_array2!=0) - delete data_array2; - if (tag_array2!=0) - delete tag_array2; +void uca_org_t :: cleanup() { + if (data_array2 != 0) + delete data_array2; + if (tag_array2 != 0) + delete tag_array2; } diff --git a/ext/mcpat/cacti/cacti_interface.h b/ext/mcpat/cacti/cacti_interface.h index f37596554..a2bddd819 100644 --- a/ext/mcpat/cacti/cacti_interface.h +++ b/ext/mcpat/cacti/cacti_interface.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -50,9 +51,8 @@ class mem_array; class uca_org_t; -class powerComponents -{ - public: +class powerComponents { +public: double dynamic; double leakage; double gate_leakage; @@ -60,17 +60,24 @@ class powerComponents double longer_channel_leakage; powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { } - powerComponents(const powerComponents & obj) { *this = obj; } - powerComponents & operator=(const powerComponents & rhs) - { - dynamic = rhs.dynamic; - leakage = rhs.leakage; - gate_leakage = rhs.gate_leakage; - short_circuit = rhs.short_circuit; - longer_channel_leakage = rhs.longer_channel_leakage; - return *this; + powerComponents(const powerComponents & obj) { + *this = obj; + } + powerComponents & operator=(const powerComponents & rhs) { + dynamic = rhs.dynamic; + leakage = rhs.leakage; + gate_leakage = rhs.gate_leakage; + short_circuit = rhs.short_circuit; + longer_channel_leakage = rhs.longer_channel_leakage; + return *this; + } + void reset() { + dynamic = 0; + leakage = 0; + gate_leakage = 0; + short_circuit = 0; + longer_channel_leakage = 0; } - void reset() { dynamic = 0; leakage = 0; gate_leakage = 0; short_circuit = 0;longer_channel_leakage = 0;} friend powerComponents operator+(const powerComponents & x, const powerComponents & y); friend powerComponents operator*(const powerComponents & x, double const * const y); @@ -78,22 +85,24 @@ class powerComponents -class powerDef -{ - public: +class powerDef { +public: powerComponents readOp; powerComponents writeOp; powerComponents searchOp;//Sheng: for CAM and FA powerDef() : readOp(), writeOp(), searchOp() { } - void reset() { readOp.reset(); writeOp.reset(); searchOp.reset();} + void reset() { + readOp.reset(); + writeOp.reset(); + searchOp.reset(); + } friend powerDef operator+(const powerDef & x, const powerDef & y); friend powerDef operator*(const powerDef & x, double const * const y); }; -enum Wire_type -{ +enum Wire_type { Global /* gloabl wires with repeaters */, Global_5 /* 5% delay penalty */, Global_10 /* 10% delay penalty */, @@ -108,12 +117,12 @@ enum Wire_type -class InputParameter -{ - public: +class InputParameter { +public: void parse_cfg(const string & infile); - bool error_checking(); // return false if the input parameters are problematic + // return false if the input parameters are problematic + bool error_checking(string name = "CACTI"); void display_ip(); unsigned int cache_sz; // in bytes @@ -172,14 +181,14 @@ class InputParameter int force_nuca_bank; int delay_wt, dynamic_power_wt, leakage_power_wt, - cycle_time_wt, area_wt; + cycle_time_wt, area_wt; int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca, - cycle_time_wt_nuca, area_wt_nuca; + cycle_time_wt_nuca, area_wt_nuca; int delay_dev, dynamic_power_dev, leakage_power_dev, - cycle_time_dev, area_dev; + cycle_time_dev, area_dev; int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca, - cycle_time_dev_nuca, area_dev_nuca; + cycle_time_dev_nuca, area_dev_nuca; int ed; //ED or ED2 optimization int nuca; @@ -194,167 +203,113 @@ class InputParameter bool add_ecc_b_; - //parameters for design constraint - double throughput; - double latency; - bool pipelinable; - int pipeline_stages; - int per_stage_vector; - bool with_clock_grid; + //parameters for design constraint + double throughput; + double latency; + bool pipelinable; + int pipeline_stages; + int per_stage_vector; + bool with_clock_grid; }; -typedef struct{ - int Ndwl; - int Ndbl; - double Nspd; - int deg_bl_muxing; - int Ndsam_lev_1; - int Ndsam_lev_2; - int number_activated_mats_horizontal_direction; - int number_subbanks; - int page_size_in_bits; - double delay_route_to_bank; - double delay_crossbar; - double delay_addr_din_horizontal_htree; - double delay_addr_din_vertical_htree; - double delay_row_predecode_driver_and_block; - double delay_row_decoder; - double delay_bitlines; - double delay_sense_amp; - double delay_subarray_output_driver; - double delay_bit_mux_predecode_driver_and_block; - double delay_bit_mux_decoder; - double delay_senseamp_mux_lev_1_predecode_driver_and_block; - double delay_senseamp_mux_lev_1_decoder; - double delay_senseamp_mux_lev_2_predecode_driver_and_block; - double delay_senseamp_mux_lev_2_decoder; - double delay_input_htree; - double delay_output_htree; - double delay_dout_vertical_htree; - double delay_dout_horizontal_htree; - double delay_comparator; - double access_time; - double cycle_time; - double multisubbank_interleave_cycle_time; - double delay_request_network; - double delay_inside_mat; - double delay_reply_network; - double trcd; - double cas_latency; - double precharge_delay; - powerDef power_routing_to_bank; - powerDef power_addr_input_htree; - powerDef power_data_input_htree; - powerDef power_data_output_htree; - powerDef power_addr_horizontal_htree; - powerDef power_datain_horizontal_htree; - powerDef power_dataout_horizontal_htree; - powerDef power_addr_vertical_htree; - powerDef power_datain_vertical_htree; - powerDef power_row_predecoder_drivers; - powerDef power_row_predecoder_blocks; - powerDef power_row_decoders; - powerDef power_bit_mux_predecoder_drivers; - powerDef power_bit_mux_predecoder_blocks; - powerDef power_bit_mux_decoders; - powerDef power_senseamp_mux_lev_1_predecoder_drivers; - powerDef power_senseamp_mux_lev_1_predecoder_blocks; - powerDef power_senseamp_mux_lev_1_decoders; - powerDef power_senseamp_mux_lev_2_predecoder_drivers; - powerDef power_senseamp_mux_lev_2_predecoder_blocks; - powerDef power_senseamp_mux_lev_2_decoders; - powerDef power_bitlines; - powerDef power_sense_amps; - powerDef power_prechg_eq_drivers; - powerDef power_output_drivers_at_subarray; - powerDef power_dataout_vertical_htree; - powerDef power_comparators; - powerDef power_crossbar; - powerDef total_power; - double area; - double all_banks_height; - double all_banks_width; - double bank_height; - double bank_width; - double subarray_memory_cell_area_height; - double subarray_memory_cell_area_width; - double mat_height; - double mat_width; - double routing_area_height_within_bank; - double routing_area_width_within_bank; - double area_efficiency; -// double perc_power_dyn_routing_to_bank; -// double perc_power_dyn_addr_horizontal_htree; -// double perc_power_dyn_datain_horizontal_htree; -// double perc_power_dyn_dataout_horizontal_htree; -// double perc_power_dyn_addr_vertical_htree; -// double perc_power_dyn_datain_vertical_htree; -// double perc_power_dyn_row_predecoder_drivers; -// double perc_power_dyn_row_predecoder_blocks; -// double perc_power_dyn_row_decoders; -// double perc_power_dyn_bit_mux_predecoder_drivers; -// double perc_power_dyn_bit_mux_predecoder_blocks; -// double perc_power_dyn_bit_mux_decoders; -// double perc_power_dyn_senseamp_mux_lev_1_predecoder_drivers; -// double perc_power_dyn_senseamp_mux_lev_1_predecoder_blocks; -// double perc_power_dyn_senseamp_mux_lev_1_decoders; -// double perc_power_dyn_senseamp_mux_lev_2_predecoder_drivers; -// double perc_power_dyn_senseamp_mux_lev_2_predecoder_blocks; -// double perc_power_dyn_senseamp_mux_lev_2_decoders; -// double perc_power_dyn_bitlines; -// double perc_power_dyn_sense_amps; -// double perc_power_dyn_prechg_eq_drivers; -// double perc_power_dyn_subarray_output_drivers; -// double perc_power_dyn_dataout_vertical_htree; -// double perc_power_dyn_comparators; -// double perc_power_dyn_crossbar; -// double perc_power_dyn_spent_outside_mats; -// double perc_power_leak_routing_to_bank; -// double perc_power_leak_addr_horizontal_htree; -// double perc_power_leak_datain_horizontal_htree; -// double perc_power_leak_dataout_horizontal_htree; -// double perc_power_leak_addr_vertical_htree; -// double perc_power_leak_datain_vertical_htree; -// double perc_power_leak_row_predecoder_drivers; -// double perc_power_leak_row_predecoder_blocks; -// double perc_power_leak_row_decoders; -// double perc_power_leak_bit_mux_predecoder_drivers; -// double perc_power_leak_bit_mux_predecoder_blocks; -// double perc_power_leak_bit_mux_decoders; -// double perc_power_leak_senseamp_mux_lev_1_predecoder_drivers; -// double perc_power_leak_senseamp_mux_lev_1_predecoder_blocks; -// double perc_power_leak_senseamp_mux_lev_1_decoders; -// double perc_power_leak_senseamp_mux_lev_2_predecoder_drivers; -// double perc_power_leak_senseamp_mux_lev_2_predecoder_blocks; -// double perc_power_leak_senseamp_mux_lev_2_decoders; -// double perc_power_leak_bitlines; -// double perc_power_leak_sense_amps; -// double perc_power_leak_prechg_eq_drivers; -// double perc_power_leak_subarray_output_drivers; -// double perc_power_leak_dataout_vertical_htree; -// double perc_power_leak_comparators; -// double perc_power_leak_crossbar; -// double perc_leak_mats; -// double perc_active_mats; - double refresh_power; - double dram_refresh_period; - double dram_array_availability; - double dyn_read_energy_from_closed_page; - double dyn_read_energy_from_open_page; - double leak_power_subbank_closed_page; - double leak_power_subbank_open_page; - double leak_power_request_and_reply_networks; - double activate_energy; - double read_energy; - double write_energy; - double precharge_energy; +typedef struct { + int Ndwl; + int Ndbl; + double Nspd; + int deg_bl_muxing; + int Ndsam_lev_1; + int Ndsam_lev_2; + int number_activated_mats_horizontal_direction; + int number_subbanks; + int page_size_in_bits; + double delay_route_to_bank; + double delay_crossbar; + double delay_addr_din_horizontal_htree; + double delay_addr_din_vertical_htree; + double delay_row_predecode_driver_and_block; + double delay_row_decoder; + double delay_bitlines; + double delay_sense_amp; + double delay_subarray_output_driver; + double delay_bit_mux_predecode_driver_and_block; + double delay_bit_mux_decoder; + double delay_senseamp_mux_lev_1_predecode_driver_and_block; + double delay_senseamp_mux_lev_1_decoder; + double delay_senseamp_mux_lev_2_predecode_driver_and_block; + double delay_senseamp_mux_lev_2_decoder; + double delay_input_htree; + double delay_output_htree; + double delay_dout_vertical_htree; + double delay_dout_horizontal_htree; + double delay_comparator; + double access_time; + double cycle_time; + double multisubbank_interleave_cycle_time; + double delay_request_network; + double delay_inside_mat; + double delay_reply_network; + double trcd; + double cas_latency; + double precharge_delay; + powerDef power_routing_to_bank; + powerDef power_addr_input_htree; + powerDef power_data_input_htree; + powerDef power_data_output_htree; + powerDef power_addr_horizontal_htree; + powerDef power_datain_horizontal_htree; + powerDef power_dataout_horizontal_htree; + powerDef power_addr_vertical_htree; + powerDef power_datain_vertical_htree; + powerDef power_row_predecoder_drivers; + powerDef power_row_predecoder_blocks; + powerDef power_row_decoders; + powerDef power_bit_mux_predecoder_drivers; + powerDef power_bit_mux_predecoder_blocks; + powerDef power_bit_mux_decoders; + powerDef power_senseamp_mux_lev_1_predecoder_drivers; + powerDef power_senseamp_mux_lev_1_predecoder_blocks; + powerDef power_senseamp_mux_lev_1_decoders; + powerDef power_senseamp_mux_lev_2_predecoder_drivers; + powerDef power_senseamp_mux_lev_2_predecoder_blocks; + powerDef power_senseamp_mux_lev_2_decoders; + powerDef power_bitlines; + powerDef power_sense_amps; + powerDef power_prechg_eq_drivers; + powerDef power_output_drivers_at_subarray; + powerDef power_dataout_vertical_htree; + powerDef power_comparators; + powerDef power_crossbar; + powerDef total_power; + double area; + double all_banks_height; + double all_banks_width; + double bank_height; + double bank_width; + double subarray_memory_cell_area_height; + double subarray_memory_cell_area_width; + double mat_height; + double mat_width; + double routing_area_height_within_bank; + double routing_area_width_within_bank; + double area_efficiency; + double refresh_power; + double dram_refresh_period; + double dram_array_availability; + double dyn_read_energy_from_closed_page; + double dyn_read_energy_from_open_page; + double leak_power_subbank_closed_page; + double leak_power_subbank_open_page; + double leak_power_request_and_reply_networks; + double activate_energy; + double read_energy; + double write_energy; + double precharge_energy; } results_mem_array; -class uca_org_t -{ - public: +class uca_org_t { +public: mem_array * tag_array2; mem_array * data_array2; double access_time; @@ -378,7 +333,7 @@ class uca_org_t void find_cyc(); void adjust_area();//for McPAT only to adjust routing overhead void cleanup(); - ~uca_org_t(){}; + ~uca_org_t() {}; }; void reconfigure(InputParameter *local_interface, uca_org_t *fin_res); @@ -387,103 +342,62 @@ uca_org_t cacti_interface(const string & infile_name); //McPAT's plain interface, please keep !!! uca_org_t cacti_interface(InputParameter * const local_interface); //McPAT's plain interface, please keep !!! -uca_org_t init_interface(InputParameter * const local_interface); +uca_org_t init_interface(InputParameter * const local_interface, + const string &name); //McPAT's plain interface, please keep !!! uca_org_t cacti_interface( - int cache_size, - int line_size, - int associativity, - int rw_ports, - int excl_read_ports, - int excl_write_ports, - int single_ended_read_ports, - int search_ports, - int banks, - double tech_node, - int output_width, - int specific_tag, - int tag_width, - int access_mode, - int cache, - int main_mem, - int obj_func_delay, - int obj_func_dynamic_power, - int obj_func_leakage_power, - int obj_func_cycle_time, - int obj_func_area, - int dev_func_delay, - int dev_func_dynamic_power, - int dev_func_leakage_power, - int dev_func_area, - int dev_func_cycle_time, - int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate - int temp, - int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing - int data_arr_ram_cell_tech_flavor_in, - int data_arr_peri_global_tech_flavor_in, - int tag_arr_ram_cell_tech_flavor_in, - int tag_arr_peri_global_tech_flavor_in, - int interconnect_projection_type_in, - int wire_inside_mat_type_in, - int wire_outside_mat_type_in, - int REPEATERS_IN_HTREE_SEGMENTS_in, - int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, - int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, - int PAGE_SIZE_BITS_in, - int BURST_LENGTH_in, - int INTERNAL_PREFETCH_WIDTH_in, - int force_wiretype, - int wiretype, - int force_config, - int ndwl, - int ndbl, - int nspd, - int ndcm, - int ndsam1, - int ndsam2, - int ecc); -// int cache_size, -// int line_size, -// int associativity, -// int rw_ports, -// int excl_read_ports, -// int excl_write_ports, -// int single_ended_read_ports, -// int banks, -// double tech_node, -// int output_width, -// int specific_tag, -// int tag_width, -// int access_mode, -// int cache, -// int main_mem, -// int obj_func_delay, -// int obj_func_dynamic_power, -// int obj_func_leakage_power, -// int obj_func_area, -// int obj_func_cycle_time, -// int dev_func_delay, -// int dev_func_dynamic_power, -// int dev_func_leakage_power, -// int dev_func_area, -// int dev_func_cycle_time, -// int temp, -// int data_arr_ram_cell_tech_flavor_in, -// int data_arr_peri_global_tech_flavor_in, -// int tag_arr_ram_cell_tech_flavor_in, -// int tag_arr_peri_global_tech_flavor_in, -// int interconnect_projection_type_in, -// int wire_inside_mat_type_in, -// int wire_outside_mat_type_in, -// int REPEATERS_IN_HTREE_SEGMENTS_in, -// int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, -// int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, -//// double MAXAREACONSTRAINT_PERC_in, -//// double MAXACCTIMECONSTRAINT_PERC_in, -//// double MAX_PERC_DIFF_IN_DELAY_FROM_BEST_DELAY_REPEATER_SOLUTION_in, -// int PAGE_SIZE_BITS_in, -// int BURST_LENGTH_in, -// int INTERNAL_PREFETCH_WIDTH_in); + int cache_size, + int line_size, + int associativity, + int rw_ports, + int excl_read_ports, + int excl_write_ports, + int single_ended_read_ports, + int search_ports, + int banks, + double tech_node, + int output_width, + int specific_tag, + int tag_width, + int access_mode, + int cache, + int main_mem, + int obj_func_delay, + int obj_func_dynamic_power, + int obj_func_leakage_power, + int obj_func_cycle_time, + int obj_func_area, + int dev_func_delay, + int dev_func_dynamic_power, + int dev_func_leakage_power, + int dev_func_area, + int dev_func_cycle_time, + int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate + int temp, + int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing + int data_arr_ram_cell_tech_flavor_in, + int data_arr_peri_global_tech_flavor_in, + int tag_arr_ram_cell_tech_flavor_in, + int tag_arr_peri_global_tech_flavor_in, + int interconnect_projection_type_in, + int wire_inside_mat_type_in, + int wire_outside_mat_type_in, + int REPEATERS_IN_HTREE_SEGMENTS_in, + int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in, + int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in, + int PAGE_SIZE_BITS_in, + int BURST_LENGTH_in, + int INTERNAL_PREFETCH_WIDTH_in, + int force_wiretype, + int wiretype, + int force_config, + int ndwl, + int ndbl, + int nspd, + int ndcm, + int ndsam1, + int ndsam2, + int ecc); //Naveen's interface uca_org_t cacti_interface( @@ -542,91 +456,90 @@ uca_org_t cacti_interface( int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported int p_input); -class mem_array -{ - public: - int Ndcm; - int Ndwl; - int Ndbl; - double Nspd; - int deg_bl_muxing; - int Ndsam_lev_1; - int Ndsam_lev_2; - double access_time; - double cycle_time; - double multisubbank_interleave_cycle_time; - double area_ram_cells; - double area; - powerDef power; - double delay_senseamp_mux_decoder; - double delay_before_subarray_output_driver; - double delay_from_subarray_output_driver_to_output; - double height; - double width; - - double mat_height; - double mat_length; - double subarray_length; - double subarray_height; - - double delay_route_to_bank, - delay_input_htree, - delay_row_predecode_driver_and_block, - delay_row_decoder, - delay_bitlines, - delay_sense_amp, - delay_subarray_output_driver, - delay_dout_htree, - delay_comparator, - delay_matchlines; - - double all_banks_height, - all_banks_width, - area_efficiency; - - powerDef power_routing_to_bank; - powerDef power_addr_input_htree; - powerDef power_data_input_htree; - powerDef power_data_output_htree; - powerDef power_htree_in_search; - powerDef power_htree_out_search; - powerDef power_row_predecoder_drivers; - powerDef power_row_predecoder_blocks; - powerDef power_row_decoders; - powerDef power_bit_mux_predecoder_drivers; - powerDef power_bit_mux_predecoder_blocks; - powerDef power_bit_mux_decoders; - powerDef power_senseamp_mux_lev_1_predecoder_drivers; - powerDef power_senseamp_mux_lev_1_predecoder_blocks; - powerDef power_senseamp_mux_lev_1_decoders; - powerDef power_senseamp_mux_lev_2_predecoder_drivers; - powerDef power_senseamp_mux_lev_2_predecoder_blocks; - powerDef power_senseamp_mux_lev_2_decoders; - powerDef power_bitlines; - powerDef power_sense_amps; - powerDef power_prechg_eq_drivers; - powerDef power_output_drivers_at_subarray; - powerDef power_dataout_vertical_htree; - powerDef power_comparators; - - powerDef power_cam_bitline_precharge_eq_drv; - powerDef power_searchline; - powerDef power_searchline_precharge; - powerDef power_matchlines; - powerDef power_matchline_precharge; - powerDef power_matchline_to_wordline_drv; - - min_values_t *arr_min; - enum Wire_type wt; - - // dram stats - double activate_energy, read_energy, write_energy, precharge_energy, - refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page, - leak_power_request_and_reply_networks; - - double precharge_delay; - - static bool lt(const mem_array * m1, const mem_array * m2); +class mem_array { +public: + int Ndcm; + int Ndwl; + int Ndbl; + double Nspd; + int deg_bl_muxing; + int Ndsam_lev_1; + int Ndsam_lev_2; + double access_time; + double cycle_time; + double multisubbank_interleave_cycle_time; + double area_ram_cells; + double area; + powerDef power; + double delay_senseamp_mux_decoder; + double delay_before_subarray_output_driver; + double delay_from_subarray_output_driver_to_output; + double height; + double width; + + double mat_height; + double mat_length; + double subarray_length; + double subarray_height; + + double delay_route_to_bank, + delay_input_htree, + delay_row_predecode_driver_and_block, + delay_row_decoder, + delay_bitlines, + delay_sense_amp, + delay_subarray_output_driver, + delay_dout_htree, + delay_comparator, + delay_matchlines; + + double all_banks_height, + all_banks_width, + area_efficiency; + + powerDef power_routing_to_bank; + powerDef power_addr_input_htree; + powerDef power_data_input_htree; + powerDef power_data_output_htree; + powerDef power_htree_in_search; + powerDef power_htree_out_search; + powerDef power_row_predecoder_drivers; + powerDef power_row_predecoder_blocks; + powerDef power_row_decoders; + powerDef power_bit_mux_predecoder_drivers; + powerDef power_bit_mux_predecoder_blocks; + powerDef power_bit_mux_decoders; + powerDef power_senseamp_mux_lev_1_predecoder_drivers; + powerDef power_senseamp_mux_lev_1_predecoder_blocks; + powerDef power_senseamp_mux_lev_1_decoders; + powerDef power_senseamp_mux_lev_2_predecoder_drivers; + powerDef power_senseamp_mux_lev_2_predecoder_blocks; + powerDef power_senseamp_mux_lev_2_decoders; + powerDef power_bitlines; + powerDef power_sense_amps; + powerDef power_prechg_eq_drivers; + powerDef power_output_drivers_at_subarray; + powerDef power_dataout_vertical_htree; + powerDef power_comparators; + + powerDef power_cam_bitline_precharge_eq_drv; + powerDef power_searchline; + powerDef power_searchline_precharge; + powerDef power_matchlines; + powerDef power_matchline_precharge; + powerDef power_matchline_to_wordline_drv; + + min_values_t *arr_min; + enum Wire_type wt; + + // dram stats + double activate_energy, read_energy, write_energy, precharge_energy, + refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page, + leak_power_request_and_reply_networks; + + double precharge_delay; + + static bool lt(const mem_array * m1, const mem_array * m2); }; diff --git a/ext/mcpat/cacti/component.cc b/ext/mcpat/cacti/component.cc index 733108407..90e9baedf 100644 --- a/ext/mcpat/cacti/component.cc +++ b/ext/mcpat/cacti/component.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -45,34 +46,30 @@ using namespace std; Component::Component() - :area(), power(), rt_power(),delay(0) -{ + : area(), power(), rt_power(), delay(0) { } -Component::~Component() -{ +Component::~Component() { } -double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr) -{ - double w_poly = g_ip->F_sz_um; - double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact; - double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain - num_stacked_in * w_poly + - (num_stacked_in - 1) * g_tp.spacing_poly_to_poly; +double Component::compute_diffusion_width(int num_stacked_in, int num_folded_tr) { + double w_poly = g_ip->F_sz_um; + double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact; + double total_diff_w = 2 * spacing_poly_to_poly + // for both source and drain + num_stacked_in * w_poly + + (num_stacked_in - 1) * g_tp.spacing_poly_to_poly; - if (num_folded_tr > 1) - { - total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly + - (num_folded_tr - 1) * num_stacked_in * w_poly + - (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly; - } + if (num_folded_tr > 1) { + total_diff_w += (num_folded_tr - 2) * 2 * spacing_poly_to_poly + + (num_folded_tr - 1) * num_stacked_in * w_poly + + (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly; + } - return total_diff_w; + return total_diff_w; } @@ -82,105 +79,96 @@ double Component::compute_gate_area( int num_inputs, double w_pmos, double w_nmos, - double h_gate) -{ - if (w_pmos <= 0.0 || w_nmos <= 0.0) - { - return 0.0; - } - - double w_folded_pmos, w_folded_nmos; - int num_folded_pmos, num_folded_nmos; - double total_ndiff_w, total_pdiff_w; - Area gate; - - double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL; - double ratio_p_to_n = w_pmos / (w_pmos + w_nmos); - - if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0) - { - return 0.0; - } - - w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n; - w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n); - assert(w_folded_pmos > 0); - - num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos)); - num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos)); - - switch (gate_type) - { + double h_gate) { + if (w_pmos <= 0.0 || w_nmos <= 0.0) { + return 0.0; + } + + double w_folded_pmos, w_folded_nmos; + int num_folded_pmos, num_folded_nmos; + double total_ndiff_w, total_pdiff_w; + Area gate; + + double h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL; + double ratio_p_to_n = w_pmos / (w_pmos + w_nmos); + + if (ratio_p_to_n >= 1 || ratio_p_to_n <= 0) { + return 0.0; + } + + w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n; + w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n); + assert(w_folded_pmos > 0); + + num_folded_pmos = (int) (ceil(w_pmos / w_folded_pmos)); + num_folded_nmos = (int) (ceil(w_nmos / w_folded_nmos)); + + switch (gate_type) { case INV: - total_ndiff_w = compute_diffusion_width(1, num_folded_nmos); - total_pdiff_w = compute_diffusion_width(1, num_folded_pmos); - break; + total_ndiff_w = compute_diffusion_width(1, num_folded_nmos); + total_pdiff_w = compute_diffusion_width(1, num_folded_pmos); + break; case NOR: - total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos); - total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos); - break; + total_ndiff_w = compute_diffusion_width(1, num_inputs * num_folded_nmos); + total_pdiff_w = compute_diffusion_width(num_inputs, num_folded_pmos); + break; case NAND: - total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos); - total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos); - break; + total_ndiff_w = compute_diffusion_width(num_inputs, num_folded_nmos); + total_pdiff_w = compute_diffusion_width(1, num_inputs * num_folded_pmos); + break; default: - cout << "Unknown gate type: " << gate_type << endl; - exit(1); - } - - gate.w = MAX(total_ndiff_w, total_pdiff_w); - - if (w_folded_nmos > w_nmos) - { - //means that the height of the gate can - //be made smaller than the input height specified, so calculate the height of the gate. - gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL; - } - else - { - gate.h = h_gate; - } - return gate.get_area(); + cout << "Unknown gate type: " << gate_type << endl; + exit(1); + } + + gate.w = MAX(total_ndiff_w, total_pdiff_w); + + if (w_folded_nmos > w_nmos) { + //means that the height of the gate can + //be made smaller than the input height specified, so calculate the height of the gate. + gate.h = w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL; + } else { + gate.h = h_gate; + } + return gate.get_area(); } double Component::compute_tr_width_after_folding( double input_width, - double threshold_folding_width) -{//This is actually the width of the cell not the width of a device. -//The width of a cell and the width of a device is orthogonal. - if (input_width <= 0) - { - return 0; - } - - int num_folded_tr = (int) (ceil(input_width / threshold_folding_width)); - double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact; - double width_poly = g_ip->F_sz_um; - double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly; - - return total_diff_width; + double threshold_folding_width) { + //This is actually the width of the cell not the width of a device. + //The width of a cell and the width of a device is orthogonal. + if (input_width <= 0) { + return 0; + } + + int num_folded_tr = (int) (ceil(input_width / threshold_folding_width)); + double spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact; + double width_poly = g_ip->F_sz_um; + double total_diff_width = num_folded_tr * width_poly + (num_folded_tr + 1) * spacing_poly_to_poly; + + return total_diff_width; } -double Component::height_sense_amplifier(double pitch_sense_amp) -{ - // compute the height occupied by all PMOS transistors - double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 + - compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) + - 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS; +double Component::height_sense_amplifier(double pitch_sense_amp) { + // compute the height occupied by all PMOS transistors + double h_pmos_tr = compute_tr_width_after_folding(g_tp.w_sense_p, pitch_sense_amp) * 2 + + compute_tr_width_after_folding(g_tp.w_iso, pitch_sense_amp) + + 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS; - // compute the height occupied by all NMOS transistors - double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 + - compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) + - 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS; + // compute the height occupied by all NMOS transistors + double h_nmos_tr = compute_tr_width_after_folding(g_tp.w_sense_n, pitch_sense_amp) * 2 + + compute_tr_width_after_folding(g_tp.w_sense_en, pitch_sense_amp) + + 2 * g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS; - // compute total height by considering gap between the p and n diffusion areas - return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS; + // compute total height by considering gap between the p and n diffusion areas + return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS; } @@ -195,42 +183,39 @@ int Component::logical_effort( double p_to_n_sz_ratio, bool is_dram_, bool is_wl_tr_, - double max_w_nmos) -{ - int num_gates = (int) (log(F) / log(fopt)); - - // check if num_gates is odd. if so, add 1 to make it even - num_gates+= (num_gates % 2) ? 1 : 0; - num_gates = MAX(num_gates, num_gates_min); - - // recalculate the effective fanout of each stage - double f = pow(F, 1.0 / num_gates); - int i = num_gates - 1; - double C_in = C_load / f; - w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_); - w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_); - w_p[i] = p_to_n_sz_ratio * w_n[i]; - - if (w_n[i] > max_w_nmos) - { - double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_); - F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_); - num_gates = (int) (log(F) / log(fopt)) + 1; - num_gates+= (num_gates % 2) ? 1 : 0; + double max_w_nmos) { + int num_gates = (int) (log(F) / log(fopt)); + + // check if num_gates is odd. if so, add 1 to make it even + num_gates += (num_gates % 2) ? 1 : 0; num_gates = MAX(num_gates, num_gates_min); - f = pow(F, 1.0 / (num_gates - 1)); - i = num_gates - 1; - w_n[i] = max_w_nmos; - w_p[i] = p_to_n_sz_ratio * w_n[i]; - } - for (i = num_gates - 2; i >= 1; i--) - { - w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_); - w_p[i] = p_to_n_sz_ratio * w_n[i]; - } + // recalculate the effective fanout of each stage + double f = pow(F, 1.0 / num_gates); + int i = num_gates - 1; + double C_in = C_load / f; + w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, false, is_wl_tr_); + w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_); + w_p[i] = p_to_n_sz_ratio * w_n[i]; - assert(num_gates <= MAX_NUMBER_GATES_STAGE); - return num_gates; + if (w_n[i] > max_w_nmos) { + double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_); + F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, false, is_wl_tr_); + num_gates = (int) (log(F) / log(fopt)) + 1; + num_gates += (num_gates % 2) ? 1 : 0; + num_gates = MAX(num_gates, num_gates_min); + f = pow(F, 1.0 / (num_gates - 1)); + i = num_gates - 1; + w_n[i] = max_w_nmos; + w_p[i] = p_to_n_sz_ratio * w_n[i]; + } + + for (i = num_gates - 2; i >= 1; i--) { + w_n[i] = MAX(w_n[i+1] / f, g_tp.min_w_nmos_); + w_p[i] = p_to_n_sz_ratio * w_n[i]; + } + + assert(num_gates <= MAX_NUMBER_GATES_STAGE); + return num_gates; } diff --git a/ext/mcpat/cacti/component.h b/ext/mcpat/cacti/component.h index 75e2cb075..416e4e8e5 100644 --- a/ext/mcpat/cacti/component.h +++ b/ext/mcpat/cacti/component.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -42,41 +43,32 @@ using namespace std; class Crossbar; class Bank; -class Component -{ - public: +class Component { +public: Component(); ~Component(); Area area; - powerDef power,rt_power; + // TODO: THERE IS LITTLE AGREEMENT THROUGHOUT THIS CODE ABOUT HOW THESE + // VARIABLES SHOULD BE USED. PART OF THE PROBLEM IS NAMING. SO THAT THIS + // MAKES MORE SENSE, ENERGY CALCULATIONS SHOULD BE SPLIT FROM POWER + // CALCULATIONS. THIS IS THE WORST DESIGN PROBLEM THAT STILL EXISTS + powerDef power, rt_power; double delay; double cycle_time; - double compute_gate_area( - int gate_type, - int num_inputs, - double w_pmos, - double w_nmos, - double h_gate); - - double compute_tr_width_after_folding(double input_width, double threshold_folding_width); + double compute_gate_area(int gate_type, int num_inputs, double w_pmos, + double w_nmos, double h_gate); + double compute_tr_width_after_folding(double input_width, + double threshold_folding_width); double height_sense_amplifier(double pitch_sense_amp); - protected: - int logical_effort( - int num_gates_min, - double g, - double F, - double * w_n, - double * w_p, - double C_load, - double p_to_n_sz_ratio, - bool is_dram_, - bool is_wl_tr_, - double max_w_nmos); +protected: + int logical_effort(int num_gates_min, double g, double F, double * w_n, + double * w_p, double C_load, double p_to_n_sz_ratio, + bool is_dram_, bool is_wl_tr_, double max_w_nmos); - private: +private: double compute_diffusion_width(int num_stacked_in, int num_folded_tr); }; diff --git a/ext/mcpat/cacti/const.h b/ext/mcpat/cacti/const.h index aef7d019b..c9b3905bf 100644 --- a/ext/mcpat/cacti/const.h +++ b/ext/mcpat/cacti/const.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -249,21 +250,20 @@ const double bit_to_byte = 8.0; // v : vertical or velocity -enum ram_cell_tech_type_num -{ - itrs_hp = 0, - itrs_lstp = 1, - itrs_lop = 2, - lp_dram = 3, - comm_dram = 4 +enum ram_cell_tech_type_num { + itrs_hp = 0, + itrs_lstp = 1, + itrs_lop = 2, + lp_dram = 3, + comm_dram = 4 }; -const double pppm[4] = {1,1,1,1}; -const double pppm_lkg[4] = {0,1,1,0}; -const double pppm_dyn[4] = {1,0,0,0}; -const double pppm_Isub[4] = {0,1,0,0}; -const double pppm_Ig[4] = {0,0,1,0}; -const double pppm_sc[4] = {0,0,0,1}; +const double pppm[4] = {1, 1, 1, 1}; +const double pppm_lkg[4] = {0, 1, 1, 0}; +const double pppm_dyn[4] = {1, 0, 0, 0}; +const double pppm_Isub[4] = {0, 1, 0, 0}; +const double pppm_Ig[4] = {0, 0, 1, 0}; +const double pppm_sc[4] = {0, 0, 0, 1}; diff --git a/ext/mcpat/cacti/crossbar.cc b/ext/mcpat/cacti/crossbar.cc index a3d8532d5..ef2a373d6 100644 --- a/ext/mcpat/cacti/crossbar.cc +++ b/ext/mcpat/cacti/crossbar.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -39,123 +40,140 @@ Crossbar::Crossbar( double n_out_, double flit_size_, TechnologyParameter::DeviceType *dt - ):n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) -{ - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; - Vdd = dt->Vdd; - CB_ADJ = 1; +): n_inp(n_inp_), n_out(n_out_), flit_size(flit_size_), deviceType(dt) { + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + Vdd = dt->Vdd; + CB_ADJ = 1; } -Crossbar::~Crossbar(){} +Crossbar::~Crossbar() {} -double Crossbar::output_buffer() -{ +double Crossbar::output_buffer() { - //Wire winit(4, 4); - double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch; - Wire w1(g_ip->wt, l_eff); - //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing; - double s1 = w1.repeater_size * (l_eff n_to_p_eff_curr_drv_ratio; - // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor - TriS1 = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size); - TriS2 = s1; //driver transistor + //Wire winit(4, 4); + double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch; + Wire w1(g_ip->wt, l_eff); + //double s1 = w1.repeater_size *l_eff*ADJ/w1.repeater_spacing; + double s1 = w1.repeater_size * (l_eff < w1.repeater_spacing ? + l_eff * ADJ / w1.repeater_spacing : ADJ); + double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; + // the model assumes input capacitance of the wire driver = input capacitance of nand + nor = input cap of the driver transistor + TriS1 = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size); + TriS2 = s1; //driver transistor - if (TriS1 < 1) - TriS1 = 1; + if (TriS1 < 1) + TriS1 = 1; - double input_cap = gate_C(TriS1*(2*min_w_pmos + g_tp.min_w_nmos_), 0) + - gate_C(TriS1*(min_w_pmos + 2*g_tp.min_w_nmos_), 0); + double input_cap = gate_C(TriS1 * (2 * min_w_pmos + g_tp.min_w_nmos_), 0) + + gate_C(TriS1 * (min_w_pmos + 2 * g_tp.min_w_nmos_), 0); // input_cap += drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + // drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + // gate_C(TriS2*g_tp.min_w_nmos_, 0)+ // drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 + // drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + // gate_C(TriS2*min_w_pmos, 0); - tri_int_cap = drain_C_(TriS1*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + - gate_C(TriS2*g_tp.min_w_nmos_, 0)+ - drain_C_(TriS1*min_w_pmos, NCH, 1, 1, g_tp.cell_h_def)*2 + - drain_C_(TriS1*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(TriS2*min_w_pmos, 0); - double output_cap = drain_C_(TriS2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(TriS2*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def); - double ctr_cap = gate_C(TriS2 *(min_w_pmos + g_tp.min_w_nmos_), 0); - - tri_inp_cap = input_cap; - tri_out_cap = output_cap; - tri_ctr_cap = ctr_cap; - return input_cap + output_cap + ctr_cap; + tri_int_cap = drain_C_(TriS1 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 + + gate_C(TriS2 * g_tp.min_w_nmos_, 0) + + drain_C_(TriS1 * min_w_pmos, NCH, 1, 1, g_tp.cell_h_def) * 2 + + drain_C_(TriS1 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(TriS2 * min_w_pmos, 0); + double output_cap = drain_C_(TriS2 * g_tp.min_w_nmos_, NCH, 1, 1, + g_tp.cell_h_def) + + drain_C_(TriS2 * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def); + double ctr_cap = gate_C(TriS2 * (min_w_pmos + g_tp.min_w_nmos_), 0); + + tri_inp_cap = input_cap; + tri_out_cap = output_cap; + tri_ctr_cap = ctr_cap; + return input_cap + output_cap + ctr_cap; } -void Crossbar::compute_power() -{ - - Wire winit(4, 4); - double tri_cap = output_buffer(); - assert(tri_cap > 0); - //area of a tristate logic - double g_area = compute_gate_area(INV, 1, TriS2*g_tp.min_w_nmos_, TriS2*min_w_pmos, g_tp.cell_h_def); - g_area *= 2; // to model area of output transistors - g_area += compute_gate_area (NAND, 2, TriS1*2*g_tp.min_w_nmos_, TriS1*min_w_pmos, g_tp.cell_h_def); - g_area += compute_gate_area (NOR, 2, TriS1*g_tp.min_w_nmos_, TriS1*2*min_w_pmos, g_tp.cell_h_def); - double width /*per tristate*/ = g_area/(CB_ADJ * g_tp.cell_h_def); - // effective no. of tristate buffers that need to be laid side by side - int ntri = (int)ceil(g_tp.cell_h_def/(g_tp.wire_outside_mat.pitch)); - double wire_len = MAX(width*ntri*n_out, flit_size*g_tp.wire_outside_mat.pitch*n_out); - Wire w1(g_ip->wt, wire_len); - - area.w = wire_len; - area.h = g_tp.wire_outside_mat.pitch*n_inp*flit_size * CB_ADJ; - Wire w2(g_ip->wt, area.h); - - double aspect_ratio_cb = (area.h/area.w)*(n_out/n_inp); - if (aspect_ratio_cb > 1) aspect_ratio_cb = 1/aspect_ratio_cb; - - if (aspect_ratio_cb < ASPECT_THRESHOLD) { - if (n_out > 2 && n_inp > 2) { - CB_ADJ+=0.2; - //cout << "CB ADJ " << CB_ADJ << endl; - if (CB_ADJ < 4) { - this->compute_power(); - } +void Crossbar::compute_power() { + + Wire winit(4, 4); + double tri_cap = output_buffer(); + assert(tri_cap > 0); + //area of a tristate logic + double g_area = compute_gate_area(INV, 1, TriS2 * g_tp.min_w_nmos_, + TriS2 * min_w_pmos, g_tp.cell_h_def); + g_area *= 2; // to model area of output transistors + g_area += compute_gate_area (NAND, 2, TriS1 * 2 * g_tp.min_w_nmos_, + TriS1 * min_w_pmos, g_tp.cell_h_def); + g_area += compute_gate_area (NOR, 2, TriS1 * g_tp.min_w_nmos_, + TriS1 * 2 * min_w_pmos, g_tp.cell_h_def); + double width /*per tristate*/ = g_area / (CB_ADJ * g_tp.cell_h_def); + // effective no. of tristate buffers that need to be laid side by side + int ntri = (int)ceil(g_tp.cell_h_def / (g_tp.wire_outside_mat.pitch)); + double wire_len = MAX(width * ntri * n_out, + flit_size * g_tp.wire_outside_mat.pitch * n_out); + Wire w1(g_ip->wt, wire_len); + + area.w = wire_len; + area.h = g_tp.wire_outside_mat.pitch * n_inp * flit_size * CB_ADJ; + Wire w2(g_ip->wt, area.h); + + double aspect_ratio_cb = (area.h / area.w) * (n_out / n_inp); + if (aspect_ratio_cb > 1) aspect_ratio_cb = 1 / aspect_ratio_cb; + + if (aspect_ratio_cb < ASPECT_THRESHOLD) { + if (n_out > 2 && n_inp > 2) { + CB_ADJ += 0.2; + //cout << "CB ADJ " << CB_ADJ << endl; + if (CB_ADJ < 4) { + this->compute_power(); + } + } } - } - - - - power.readOp.dynamic = (w1.power.readOp.dynamic + w2.power.readOp.dynamic + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + tri_int_cap) * Vdd*Vdd)*flit_size; - power.readOp.leakage = n_inp * n_out * flit_size * ( - cmos_Isub_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+ - cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+ - cmos_Isub_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+ - w1.power.readOp.leakage + w2.power.readOp.leakage); - power.readOp.gate_leakage = n_inp * n_out * flit_size * ( - cmos_Ig_leakage(g_tp.min_w_nmos_*TriS2*2, min_w_pmos*TriS2*2, 1, inv) *Vdd+ - cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nand)*Vdd+ - cmos_Ig_leakage(g_tp.min_w_nmos_*TriS1*3, min_w_pmos*TriS1*3, 2, nor) *Vdd+ - w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage); - - // delay calculation - double l_eff = n_inp*flit_size*g_tp.wire_outside_mat.pitch; - Wire wdriver(g_ip->wt, l_eff); - double res = g_tp.wire_outside_mat.R_per_um * (area.w+area.h) + tr_R_on(g_tp.min_w_nmos_*wdriver.repeater_size, NCH, 1); - double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out*tri_inp_cap + n_inp*tri_out_cap; - delay = horowitz(w1.signal_rise_time(), res*cap, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); - - Wire wreset(); + + + + power.readOp.dynamic = + (w1.power.readOp.dynamic + w2.power.readOp.dynamic + + (tri_inp_cap * n_out + tri_out_cap * n_inp + tri_ctr_cap + + tri_int_cap) * Vdd * Vdd) * flit_size; + power.readOp.leakage = n_inp * n_out * flit_size * ( + cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2, + 1, inv) * Vdd + + cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, + 2, nand) * Vdd + + cmos_Isub_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, + 2, nor) * Vdd + + w1.power.readOp.leakage + w2.power.readOp.leakage); + power.readOp.gate_leakage = n_inp * n_out * flit_size * ( + cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS2 * 2, min_w_pmos * TriS2 * 2, + 1, inv) * Vdd + + cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, + 2, nand) * Vdd + + cmos_Ig_leakage(g_tp.min_w_nmos_ * TriS1 * 3, min_w_pmos * TriS1 * 3, + 2, nor) * Vdd + + w1.power.readOp.gate_leakage + w2.power.readOp.gate_leakage); + + // delay calculation + double l_eff = n_inp * flit_size * g_tp.wire_outside_mat.pitch; + Wire wdriver(g_ip->wt, l_eff); + double res = g_tp.wire_outside_mat.R_per_um * (area.w + area.h) + + tr_R_on(g_tp.min_w_nmos_ * wdriver.repeater_size, NCH, 1); + double cap = g_tp.wire_outside_mat.C_per_um * (area.w + area.h) + n_out * + tri_inp_cap + n_inp * tri_out_cap; + delay = horowitz(w1.signal_rise_time(), res * cap, deviceType->Vth / + deviceType->Vdd, deviceType->Vth / deviceType->Vdd, RISE); + + Wire wreset(); } -void Crossbar::print_crossbar() -{ - cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n"; - cout << "Flit size : " << flit_size << " bits" << endl; - cout << "Width : " << area.w << " u" << endl; - cout << "Height : " << area.h << " u" << endl; - cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * MIN(n_inp, n_out) << " (nJ)" << endl; - cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" << endl; - cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 << " (mW)" << endl; - cout << "Crossbar Delay : " << delay*1e12 << " ps\n"; +void Crossbar::print_crossbar() { + cout << "\nCrossbar Stats (" << n_inp << "x" << n_out << ")\n\n"; + cout << "Flit size : " << flit_size << " bits" << endl; + cout << "Width : " << area.w << " u" << endl; + cout << "Height : " << area.h << " u" << endl; + cout << "Dynamic Power : " << power.readOp.dynamic*1e9 * + MIN(n_inp, n_out) << " (nJ)" << endl; + cout << "Leakage Power : " << power.readOp.leakage*1e3 << " (mW)" + << endl; + cout << "Gate Leakage Power : " << power.readOp.gate_leakage*1e3 + << " (mW)" << endl; + cout << "Crossbar Delay : " << delay*1e12 << " ps\n"; } diff --git a/ext/mcpat/cacti/crossbar.h b/ext/mcpat/cacti/crossbar.h index 3b926517c..b8de7547b 100644 --- a/ext/mcpat/cacti/crossbar.h +++ b/ext/mcpat/cacti/crossbar.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -44,14 +45,13 @@ #include "parameter.h" #include "wire.h" -class Crossbar : public Component -{ - public: +class Crossbar : public Component { +public: Crossbar( - double in, - double out, - double flit_sz, - TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); + double in, + double out, + double flit_sz, + TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); ~Crossbar(); void print_crossbar(); @@ -62,18 +62,18 @@ class Crossbar : public Component double flit_size; double tri_inp_cap, tri_out_cap, tri_ctr_cap, tri_int_cap; - private: - double CB_ADJ; - /* - * Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar - * buffer is adjusted to get an aspect ratio of whole cross bar close to one; - * when adjust the ratio, the number of wires route over the tri-state buffers does not change, - * however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase - * during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch - * will increase. As a result, the height of the crossbar (area.h) will increase. - */ - - TechnologyParameter::DeviceType *deviceType; +private: + double CB_ADJ; + /* + * Adjust factor of the height of the cross-point (tri-state buffer) cell (layout) in crossbar + * buffer is adjusted to get an aspect ratio of whole cross bar close to one; + * when adjust the ratio, the number of wires route over the tri-state buffers does not change, + * however, the effective wiring pitch changes. Specifically, since CB_ADJ will increase + * during the adjust, the tri-state buffer will become taller and thiner, and the effective wiring pitch + * will increase. As a result, the height of the crossbar (area.h) will increase. + */ + + TechnologyParameter::DeviceType *deviceType; double TriS1, TriS2; double min_w_pmos, Vdd; diff --git a/ext/mcpat/cacti/decoder.cc b/ext/mcpat/cacti/decoder.cc index 0de6f6157..7fa66b4ff 100644 --- a/ext/mcpat/cacti/decoder.cc +++ b/ext/mcpat/cacti/decoder.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -51,207 +52,184 @@ Decoder::Decoder( bool is_dram_, bool is_wl_tr_, const Area & cell_) -:exist(false), - C_ld_dec_out(_C_ld_dec_out), - R_wire_dec_out(_R_wire_dec_out), - num_gates(0), num_gates_min(2), - delay(0), - //power(), - fully_assoc(fully_assoc_), is_dram(is_dram_), - is_wl_tr(is_wl_tr_), cell(cell_) -{ - - for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) - { - w_dec_n[i] = 0; - w_dec_p[i] = 0; - } - - /* - * _num_dec_signals is the number of decoded signal as output - * num_addr_bits_dec is the number of signal to be decoded - * as the decoders input. - */ - int num_addr_bits_dec = _log2(_num_dec_signals); - - if (num_addr_bits_dec < 4) - { - if (flag_way_select) - { - exist = true; - num_in_signals = 2; + : exist(false), + C_ld_dec_out(_C_ld_dec_out), + R_wire_dec_out(_R_wire_dec_out), + num_gates(0), num_gates_min(2), + delay(0), + //power(), + fully_assoc(fully_assoc_), is_dram(is_dram_), + is_wl_tr(is_wl_tr_), cell(cell_) { + + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { + w_dec_n[i] = 0; + w_dec_p[i] = 0; } - else - { - num_in_signals = 0; - } - } - else - { - exist = true; - if (flag_way_select) - { - num_in_signals = 3; - } - else - { - num_in_signals = 2; + /* + * _num_dec_signals is the number of decoded signal as output + * num_addr_bits_dec is the number of signal to be decoded + * as the decoders input. + */ + int num_addr_bits_dec = _log2(_num_dec_signals); + + if (num_addr_bits_dec < 4) { + if (flag_way_select) { + exist = true; + num_in_signals = 2; + } else { + num_in_signals = 0; + } + } else { + exist = true; + + if (flag_way_select) { + num_in_signals = 3; + } else { + num_in_signals = 2; + } } - } - assert(cell.h>0); - assert(cell.w>0); - // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; - //area.h = 4 * cell.h; - area.h = g_tp.h_dec * cell.h; + assert(cell.h > 0); + assert(cell.w > 0); + // the height of a row-decoder-driver cell is fixed to be 4 * cell.h; + //area.h = 4 * cell.h; + area.h = g_tp.h_dec * cell.h; - compute_widths(); - compute_area(); + compute_widths(); + compute_area(); } -void Decoder::compute_widths() -{ - double F; - double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr); - double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); - double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); - - if (exist) - { - if (num_in_signals == 2 || fully_assoc) - { - w_dec_n[0] = 2 * g_tp.min_w_nmos_; - w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F = gnand2; +void Decoder::compute_widths() { + double F; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram, is_wl_tr); + double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + + if (exist) { + if (num_in_signals == 2 || fully_assoc) { + w_dec_n[0] = 2 * g_tp.min_w_nmos_; + w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand2; + } else { + w_dec_n[0] = 3 * g_tp.min_w_nmos_; + w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand3; + } + + F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) + + gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr)); + num_gates = logical_effort( + num_gates_min, + num_in_signals == 2 ? gnand2 : gnand3, + F, + w_dec_n, + w_dec_p, + C_ld_dec_out, + p_to_n_sz_ratio, + is_dram, + is_wl_tr, + g_tp.max_w_nmos_dec); } - else - { - w_dec_n[0] = 3 * g_tp.min_w_nmos_; - w_dec_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F = gnand3; - } - - F *= C_ld_dec_out / (gate_C(w_dec_n[0], 0, is_dram, false, is_wl_tr) + - gate_C(w_dec_p[0], 0, is_dram, false, is_wl_tr)); - num_gates = logical_effort( - num_gates_min, - num_in_signals == 2 ? gnand2 : gnand3, - F, - w_dec_n, - w_dec_p, - C_ld_dec_out, - p_to_n_sz_ratio, - is_dram, - is_wl_tr, - g_tp.max_w_nmos_dec); - } } -void Decoder::compute_area() -{ - double cumulative_area = 0; - double cumulative_curr = 0; // cumulative leakage current - double cumulative_curr_Ig = 0; // cumulative leakage current - - if (exist) - { // First check if this decoder exists - if (num_in_signals == 2) - { - cumulative_area = compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h); - cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); - cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand,is_dram); - } - else if (num_in_signals == 3) - { - cumulative_area = compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h); - cumulative_curr = cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);; - cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); - } - - for (int i = 1; i < num_gates; i++) - { - cumulative_area += compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h); - cumulative_curr += cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); - cumulative_curr_Ig = cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); +void Decoder::compute_area() { + double cumulative_area = 0; + double cumulative_curr = 0; // cumulative leakage current + double cumulative_curr_Ig = 0; // cumulative leakage current + + if (exist) { // First check if this decoder exists + if (num_in_signals == 2) { + cumulative_area = + compute_gate_area(NAND, 2, w_dec_p[0], w_dec_n[0], area.h); + cumulative_curr = + cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram); + cumulative_curr_Ig = + cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 2, nand, is_dram); + } else if (num_in_signals == 3) { + cumulative_area = + compute_gate_area(NAND, 3, w_dec_p[0], w_dec_n[0], area.h); + cumulative_curr = + cmos_Isub_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram);; + cumulative_curr_Ig = + cmos_Ig_leakage(w_dec_n[0], w_dec_p[0], 3, nand, is_dram); + } + + for (int i = 1; i < num_gates; i++) { + cumulative_area += + compute_gate_area(INV, 1, w_dec_p[i], w_dec_n[i], area.h); + cumulative_curr += + cmos_Isub_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); + cumulative_curr_Ig = + cmos_Ig_leakage(w_dec_n[i], w_dec_p[i], 1, inv, is_dram); + } + power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; + power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; + + area.w = (cumulative_area / area.h); } - power.readOp.leakage = cumulative_curr * g_tp.peri_global.Vdd; - power.readOp.gate_leakage = cumulative_curr_Ig * g_tp.peri_global.Vdd; - - area.w = (cumulative_area / area.h); - } } -double Decoder::compute_delays(double inrisetime) -{ - if (exist) - { - double ret_val = 0; // outrisetime - int i; - double rd, tf, this_delay, c_load, c_intrinsic, Vpp; - double Vdd = g_tp.peri_global.Vdd; +double Decoder::compute_delays(double inrisetime) { + if (exist) { + double ret_val = 0; // outrisetime + int i; + double rd, tf, this_delay, c_load, c_intrinsic, Vpp; + double Vdd = g_tp.peri_global.Vdd; - if ((is_wl_tr) && (is_dram)) - { - Vpp = g_tp.vpp; - } - else if (is_wl_tr) - { - Vpp = g_tp.sram_cell.Vdd; - } - else - { - Vpp = g_tp.peri_global.Vdd; - } + if ((is_wl_tr) && (is_dram)) { + Vpp = g_tp.vpp; + } else if (is_wl_tr) { + Vpp = g_tp.sram_cell.Vdd; + } else { + Vpp = g_tp.peri_global.Vdd; + } - // first check whether a decoder is required at all - rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr); - c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr); - c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals + - drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay += this_delay; - inrisetime = this_delay / (1.0 - 0.5); - power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; - - for (i = 1; i < num_gates - 1; ++i) - { - rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); - c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr); - c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + - drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay += this_delay; - inrisetime = this_delay / (1.0 - 0.5); - power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; + // first check whether a decoder is required at all + rd = tr_R_on(w_dec_n[0], NCH, num_in_signals, is_dram, false, is_wl_tr); + c_load = gate_C(w_dec_n[1] + w_dec_p[1], 0.0, is_dram, false, is_wl_tr); + c_intrinsic = drain_C_(w_dec_p[0], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) * num_in_signals + + drain_C_(w_dec_n[0], NCH, num_in_signals, 1, area.h, is_dram, false, is_wl_tr); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; + + for (i = 1; i < num_gates - 1; ++i) { + rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); + c_load = gate_C(w_dec_p[i+1] + w_dec_n[i+1], 0.0, is_dram, false, is_wl_tr); + c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + + drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; + } + + // add delay of final inverter that drives the wordline + i = num_gates - 1; + c_load = C_ld_dec_out; + rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); + c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + + drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); + tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2; + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + ret_val = this_delay / (1.0 - 0.5); + power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd; + + return ret_val; + } else { + return 0.0; } - - // add delay of final inverter that drives the wordline - i = num_gates - 1; - c_load = C_ld_dec_out; - rd = tr_R_on(w_dec_n[i], NCH, 1, is_dram, false, is_wl_tr); - c_intrinsic = drain_C_(w_dec_p[i], PCH, 1, 1, area.h, is_dram, false, is_wl_tr) + - drain_C_(w_dec_n[i], NCH, 1, 1, area.h, is_dram, false, is_wl_tr); - tf = rd * (c_intrinsic + c_load) + R_wire_dec_out * c_load / 2; - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay += this_delay; - ret_val = this_delay / (1.0 - 0.5); - power.readOp.dynamic += c_load * Vpp * Vpp + c_intrinsic * Vdd * Vdd; - - return ret_val; - } - else - { - return 0.0; - } } void Decoder::leakage_feedback(double temperature) @@ -291,610 +269,568 @@ PredecBlk::PredecBlk( int num_dec_per_predec, bool is_dram, bool is_blk1) - :dec(dec_), - exist(false), - number_input_addr_bits(0), - C_ld_predec_blk_out(0), - R_wire_predec_blk_out(0), - branch_effort_nand2_gate_output(1), - branch_effort_nand3_gate_output(1), - flag_two_unique_paths(false), - flag_L2_gate(0), - number_inputs_L1_gate(0), - number_gates_L1_nand2_path(0), - number_gates_L1_nand3_path(0), - number_gates_L2(0), - min_number_gates_L1(2), - min_number_gates_L2(2), - num_L1_active_nand2_path(0), - num_L1_active_nand3_path(0), - delay_nand2_path(0), - delay_nand3_path(0), - power_nand2_path(), - power_nand3_path(), - power_L2(), - is_dram_(is_dram) -{ - int branch_effort_predec_out; - double C_ld_dec_gate; - int num_addr_bits_dec = _log2(num_dec_signals); - int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2; - int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits; - - w_L1_nand2_n[0] = 0; - w_L1_nand2_p[0] = 0; - w_L1_nand3_n[0] = 0; - w_L1_nand3_p[0] = 0; - - if (is_blk1 == true) - { - if (num_addr_bits_dec <= 0) - { - return; + : dec(dec_), + exist(false), + number_input_addr_bits(0), + C_ld_predec_blk_out(0), + R_wire_predec_blk_out(0), + branch_effort_nand2_gate_output(1), + branch_effort_nand3_gate_output(1), + flag_two_unique_paths(false), + flag_L2_gate(0), + number_inputs_L1_gate(0), + number_gates_L1_nand2_path(0), + number_gates_L1_nand3_path(0), + number_gates_L2(0), + min_number_gates_L1(2), + min_number_gates_L2(2), + num_L1_active_nand2_path(0), + num_L1_active_nand3_path(0), + delay_nand2_path(0), + delay_nand3_path(0), + power_nand2_path(), + power_nand3_path(), + power_L2(), + is_dram_(is_dram) { + int branch_effort_predec_out; + double C_ld_dec_gate; + int num_addr_bits_dec = _log2(num_dec_signals); + int blk1_num_input_addr_bits = (num_addr_bits_dec + 1) / 2; + int blk2_num_input_addr_bits = num_addr_bits_dec - blk1_num_input_addr_bits; + + w_L1_nand2_n[0] = 0; + w_L1_nand2_p[0] = 0; + w_L1_nand3_n[0] = 0; + w_L1_nand3_p[0] = 0; + + if (is_blk1 == true) { + if (num_addr_bits_dec <= 0) { + return; + } else if (num_addr_bits_dec < 4) { + // Just one predecoder block is required with NAND2 gates. No decoder required. + // The first level of predecoding directly drives the decoder output load + exist = true; + number_input_addr_bits = num_addr_bits_dec; + R_wire_predec_blk_out = dec->R_wire_dec_out; + C_ld_predec_blk_out = dec->C_ld_dec_out; + } else { + exist = true; + number_input_addr_bits = blk1_num_input_addr_bits; + branch_effort_predec_out = (1 << blk2_num_input_addr_bits); + C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); + R_wire_predec_blk_out = R_wire_predec_blk_out_; + C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; + } + } else { + if (num_addr_bits_dec >= 4) { + exist = true; + number_input_addr_bits = blk2_num_input_addr_bits; + branch_effort_predec_out = (1 << blk1_num_input_addr_bits); + C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); + R_wire_predec_blk_out = R_wire_predec_blk_out_; + C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; + } } - else if (num_addr_bits_dec < 4) - { - // Just one predecoder block is required with NAND2 gates. No decoder required. - // The first level of predecoding directly drives the decoder output load - exist = true; - number_input_addr_bits = num_addr_bits_dec; - R_wire_predec_blk_out = dec->R_wire_dec_out; - C_ld_predec_blk_out = dec->C_ld_dec_out; - } - else - { - exist = true; - number_input_addr_bits = blk1_num_input_addr_bits; - branch_effort_predec_out = (1 << blk2_num_input_addr_bits); - C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); - R_wire_predec_blk_out = R_wire_predec_blk_out_; - C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; - } - } - else - { - if (num_addr_bits_dec >= 4) - { - exist = true; - number_input_addr_bits = blk2_num_input_addr_bits; - branch_effort_predec_out = (1 << blk1_num_input_addr_bits); - C_ld_dec_gate = num_dec_per_predec * gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_, false, false); - R_wire_predec_blk_out = R_wire_predec_blk_out_; - C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out; - } - } - compute_widths(); - compute_area(); + compute_widths(); + compute_area(); } -void PredecBlk::compute_widths() -{ - double F, c_load_nand3_path, c_load_nand2_path; - double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); - double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); - double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); +void PredecBlk::compute_widths() { + double F, c_load_nand3_path, c_load_nand2_path; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); + double gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); + double gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio); - if (exist == false) return; + if (exist == false) return; - switch (number_input_addr_bits) - { + switch (number_input_addr_bits) { case 1: - flag_two_unique_paths = false; - number_inputs_L1_gate = 2; - flag_L2_gate = 0; - break; - case 2: - flag_two_unique_paths = false; - number_inputs_L1_gate = 2; - flag_L2_gate = 0; - break; - case 3: - flag_two_unique_paths = false; - number_inputs_L1_gate = 3; - flag_L2_gate = 0; - break; - case 4: - flag_two_unique_paths = false; - number_inputs_L1_gate = 2; - flag_L2_gate = 2; - branch_effort_nand2_gate_output = 4; - break; - case 5: - flag_two_unique_paths = true; - flag_L2_gate = 2; - branch_effort_nand2_gate_output = 8; - branch_effort_nand3_gate_output = 4; - break; - case 6: - flag_two_unique_paths = false; - number_inputs_L1_gate = 3; - flag_L2_gate = 2; - branch_effort_nand3_gate_output = 8; - break; - case 7: - flag_two_unique_paths = true; - flag_L2_gate = 3; - branch_effort_nand2_gate_output = 32; - branch_effort_nand3_gate_output = 16; - break; - case 8: - flag_two_unique_paths = true; - flag_L2_gate = 3; - branch_effort_nand2_gate_output = 64; - branch_effort_nand3_gate_output = 32; - break; - case 9: - flag_two_unique_paths = false; - number_inputs_L1_gate = 3; - flag_L2_gate = 3; - branch_effort_nand3_gate_output = 64; - break; - default: - assert(0); - break; - } - - // find the number of gates and sizing in second level of predecoder (if there is a second level) - if (flag_L2_gate) - { - if (flag_L2_gate == 2) - { // 2nd level is a NAND2 gate - w_L2_n[0] = 2 * g_tp.min_w_nmos_; - F = gnand2; - } - else - { // 2nd level is a NAND3 gate - w_L2_n[0] = 3 * g_tp.min_w_nmos_; - F = gnand3; - } - w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); - number_gates_L2 = logical_effort( - min_number_gates_L2, - flag_L2_gate == 2 ? gnand2 : gnand3, - F, - w_L2_n, - w_L2_p, - C_ld_predec_blk_out, - p_to_n_sz_ratio, - is_dram_, false, - g_tp.max_w_nmos_); - - // Now find the number of gates and widths in first level of predecoder - if ((flag_two_unique_paths)||(number_inputs_L1_gate == 2)) - { // Whenever flag_two_unique_paths is true, it means first level of decoder employs - // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, it means - // a NAND2 gate is used in the first level of the predecoder - c_load_nand2_path = branch_effort_nand2_gate_output * - (gate_C(w_L2_n[0], 0, is_dram_) + - gate_C(w_L2_p[0], 0, is_dram_)); - w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; - w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F = gnand2 * c_load_nand2_path / - (gate_C(w_L1_nand2_n[0], 0, is_dram_) + - gate_C(w_L1_nand2_p[0], 0, is_dram_)); - number_gates_L1_nand2_path = logical_effort( - min_number_gates_L1, - gnand2, - F, - w_L1_nand2_n, - w_L1_nand2_p, - c_load_nand2_path, - p_to_n_sz_ratio, - is_dram_, false, - g_tp.max_w_nmos_); - } - - //Now find widths of gates along path in which first gate is a NAND3 - if ((flag_two_unique_paths)||(number_inputs_L1_gate == 3)) - { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs - // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means - // a NAND3 gate is used in the first level of the predecoder - c_load_nand3_path = branch_effort_nand3_gate_output * - (gate_C(w_L2_n[0], 0, is_dram_) + - gate_C(w_L2_p[0], 0, is_dram_)); - w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; - w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F = gnand3 * c_load_nand3_path / - (gate_C(w_L1_nand3_n[0], 0, is_dram_) + - gate_C(w_L1_nand3_p[0], 0, is_dram_)); - number_gates_L1_nand3_path = logical_effort( - min_number_gates_L1, - gnand3, - F, - w_L1_nand3_n, - w_L1_nand3_p, - c_load_nand3_path, - p_to_n_sz_ratio, - is_dram_, false, - g_tp.max_w_nmos_); - } - } - else - { // find number of gates and widths in first level of predecoder block when there is no second level - if (number_inputs_L1_gate == 2) - { - w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; - w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F = gnand2*C_ld_predec_blk_out / - (gate_C(w_L1_nand2_n[0], 0, is_dram_) + - gate_C(w_L1_nand2_p[0], 0, is_dram_)); - number_gates_L1_nand2_path = logical_effort( - min_number_gates_L1, - gnand2, - F, - w_L1_nand2_n, - w_L1_nand2_p, - C_ld_predec_blk_out, - p_to_n_sz_ratio, - is_dram_, false, - g_tp.max_w_nmos_); - } - else if (number_inputs_L1_gate == 3) - { - w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; - w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - F = gnand3*C_ld_predec_blk_out / - (gate_C(w_L1_nand3_n[0], 0, is_dram_) + - gate_C(w_L1_nand3_p[0], 0, is_dram_)); - number_gates_L1_nand3_path = logical_effort( - min_number_gates_L1, - gnand3, - F, - w_L1_nand3_n, - w_L1_nand3_p, - C_ld_predec_blk_out, - p_to_n_sz_ratio, - is_dram_, false, - g_tp.max_w_nmos_); - } - } -} - - - -void PredecBlk::compute_area() -{ - if (exist) - { // First check whether a predecoder block is needed - int num_L1_nand2 = 0; - int num_L1_nand3 = 0; - int num_L2 = 0; - double tot_area_L1_nand3 =0; - double leak_L1_nand3 =0; - double gate_leak_L1_nand3 =0; - - double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def); - double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); - double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); - if (number_inputs_L1_gate != 3) { - tot_area_L1_nand3 = 0; - leak_L1_nand3 = 0; - gate_leak_L1_nand3 =0; - } - else { - tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def); - leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); - gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); - } - - switch (number_input_addr_bits) - { - case 1: //2 NAND2 gates - num_L1_nand2 = 2; - num_L2 = 0; - num_L1_active_nand2_path =1; - num_L1_active_nand3_path =0; + flag_two_unique_paths = false; + number_inputs_L1_gate = 2; + flag_L2_gate = 0; break; - case 2: //4 NAND2 gates - num_L1_nand2 = 4; - num_L2 = 0; - num_L1_active_nand2_path =1; - num_L1_active_nand3_path =0; + case 2: + flag_two_unique_paths = false; + number_inputs_L1_gate = 2; + flag_L2_gate = 0; break; - case 3: //8 NAND3 gates - num_L1_nand3 = 8; - num_L2 = 0; - num_L1_active_nand2_path =0; - num_L1_active_nand3_path =1; + case 3: + flag_two_unique_paths = false; + number_inputs_L1_gate = 3; + flag_L2_gate = 0; break; - case 4: //4 + 4 NAND2 gates - num_L1_nand2 = 8; - num_L2 = 16; - num_L1_active_nand2_path =2; - num_L1_active_nand3_path =0; + case 4: + flag_two_unique_paths = false; + number_inputs_L1_gate = 2; + flag_L2_gate = 2; + branch_effort_nand2_gate_output = 4; break; - case 5: //4 NAND2 gates, 8 NAND3 gates - num_L1_nand2 = 4; - num_L1_nand3 = 8; - num_L2 = 32; - num_L1_active_nand2_path =1; - num_L1_active_nand3_path =1; + case 5: + flag_two_unique_paths = true; + flag_L2_gate = 2; + branch_effort_nand2_gate_output = 8; + branch_effort_nand3_gate_output = 4; break; - case 6: //8 + 8 NAND3 gates - num_L1_nand3 = 16; - num_L2 = 64; - num_L1_active_nand2_path =0; - num_L1_active_nand3_path =2; + case 6: + flag_two_unique_paths = false; + number_inputs_L1_gate = 3; + flag_L2_gate = 2; + branch_effort_nand3_gate_output = 8; break; - case 7: //4 + 4 NAND2 gates, 8 NAND3 gates - num_L1_nand2 = 8; - num_L1_nand3 = 8; - num_L2 = 128; - num_L1_active_nand2_path =2; - num_L1_active_nand3_path =1; + case 7: + flag_two_unique_paths = true; + flag_L2_gate = 3; + branch_effort_nand2_gate_output = 32; + branch_effort_nand3_gate_output = 16; break; - case 8: //4 NAND2 gates, 8 + 8 NAND3 gates - num_L1_nand2 = 4; - num_L1_nand3 = 16; - num_L2 = 256; - num_L1_active_nand2_path =2; - num_L1_active_nand3_path =2; + case 8: + flag_two_unique_paths = true; + flag_L2_gate = 3; + branch_effort_nand2_gate_output = 64; + branch_effort_nand3_gate_output = 32; break; - case 9: //8 + 8 + 8 NAND3 gates - num_L1_nand3 = 24; - num_L2 = 512; - num_L1_active_nand2_path =0; - num_L1_active_nand3_path =3; + case 9: + flag_two_unique_paths = false; + number_inputs_L1_gate = 3; + flag_L2_gate = 3; + branch_effort_nand3_gate_output = 64; break; - default: + default: + assert(0); break; } - for (int i = 1; i < number_gates_L1_nand2_path; ++i) - { - tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def); - leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); - gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); + // find the number of gates and sizing in second level of predecoder (if there is a second level) + if (flag_L2_gate) { + if (flag_L2_gate == 2) { // 2nd level is a NAND2 gate + w_L2_n[0] = 2 * g_tp.min_w_nmos_; + F = gnand2; + } else { // 2nd level is a NAND3 gate + w_L2_n[0] = 3 * g_tp.min_w_nmos_; + F = gnand3; + } + w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F *= C_ld_predec_blk_out / (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); + number_gates_L2 = logical_effort( + min_number_gates_L2, + flag_L2_gate == 2 ? gnand2 : gnand3, + F, + w_L2_n, + w_L2_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + + // Now find the number of gates and widths in first level of predecoder + if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) { + // Whenever flag_two_unique_paths is true, it means first level of + // decoder employs + // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 2, + // it means + // a NAND2 gate is used in the first level of the predecoder + c_load_nand2_path = branch_effort_nand2_gate_output * + (gate_C(w_L2_n[0], 0, is_dram_) + + gate_C(w_L2_p[0], 0, is_dram_)); + w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; + w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand2 * c_load_nand2_path / + (gate_C(w_L1_nand2_n[0], 0, is_dram_) + + gate_C(w_L1_nand2_p[0], 0, is_dram_)); + number_gates_L1_nand2_path = logical_effort( + min_number_gates_L1, + gnand2, + F, + w_L1_nand2_n, + w_L1_nand2_p, + c_load_nand2_path, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + } + + //Now find widths of gates along path in which first gate is a NAND3 + if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { // Whenever flag_two_unique_paths is TRUE, it means first level of decoder employs + // both NAND2 and NAND3 gates. Or when number_inputs_L1_gate is 3, it means + // a NAND3 gate is used in the first level of the predecoder + c_load_nand3_path = branch_effort_nand3_gate_output * + (gate_C(w_L2_n[0], 0, is_dram_) + + gate_C(w_L2_p[0], 0, is_dram_)); + w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; + w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand3 * c_load_nand3_path / + (gate_C(w_L1_nand3_n[0], 0, is_dram_) + + gate_C(w_L1_nand3_p[0], 0, is_dram_)); + number_gates_L1_nand3_path = logical_effort( + min_number_gates_L1, + gnand3, + F, + w_L1_nand3_n, + w_L1_nand3_p, + c_load_nand3_path, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + } + } else { // find number of gates and widths in first level of predecoder block when there is no second level + if (number_inputs_L1_gate == 2) { + w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_; + w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand2 * C_ld_predec_blk_out / + (gate_C(w_L1_nand2_n[0], 0, is_dram_) + + gate_C(w_L1_nand2_p[0], 0, is_dram_)); + number_gates_L1_nand2_path = logical_effort( + min_number_gates_L1, + gnand2, + F, + w_L1_nand2_n, + w_L1_nand2_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + } else if (number_inputs_L1_gate == 3) { + w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_; + w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + F = gnand3 * C_ld_predec_blk_out / + (gate_C(w_L1_nand3_n[0], 0, is_dram_) + + gate_C(w_L1_nand3_p[0], 0, is_dram_)); + number_gates_L1_nand3_path = logical_effort( + min_number_gates_L1, + gnand3, + F, + w_L1_nand3_n, + w_L1_nand3_p, + C_ld_predec_blk_out, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); + } } - tot_area_L1_nand2 *= num_L1_nand2; - leak_L1_nand2 *= num_L1_nand2; - gate_leak_L1_nand2 *= num_L1_nand2; - - for (int i = 1; i < number_gates_L1_nand3_path; ++i) - { - tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def); - leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); - gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); - } - tot_area_L1_nand3 *= num_L1_nand3; - leak_L1_nand3 *= num_L1_nand3; - gate_leak_L1_nand3 *= num_L1_nand3; +} - double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3; - double cumulative_area_L2 = 0.0; - double leakage_L2 = 0.0; - double gate_leakage_L2 = 0.0; - if (flag_L2_gate == 2) - { - cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); - leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); - gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); - } - else if (flag_L2_gate == 3) - { - cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); - leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); - gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); - } - for (int i = 1; i < number_gates_L2; ++i) - { - cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def); - leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); - gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); +void PredecBlk::compute_area() { + if (exist) { // First check whether a predecoder block is needed + int num_L1_nand2 = 0; + int num_L1_nand3 = 0; + int num_L2 = 0; + double tot_area_L1_nand3 = 0; + double leak_L1_nand3 = 0; + double gate_leak_L1_nand3 = 0; + + double tot_area_L1_nand2 = compute_gate_area(NAND, 2, w_L1_nand2_p[0], w_L1_nand2_n[0], g_tp.cell_h_def); + double leak_L1_nand2 = cmos_Isub_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); + double gate_leak_L1_nand2 = cmos_Ig_leakage(w_L1_nand2_n[0], w_L1_nand2_p[0], 2, nand, is_dram_); + if (number_inputs_L1_gate != 3) { + tot_area_L1_nand3 = 0; + leak_L1_nand3 = 0; + gate_leak_L1_nand3 = 0; + } else { + tot_area_L1_nand3 = compute_gate_area(NAND, 3, w_L1_nand3_p[0], w_L1_nand3_n[0], g_tp.cell_h_def); + leak_L1_nand3 = cmos_Isub_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); + gate_leak_L1_nand3 = cmos_Ig_leakage(w_L1_nand3_n[0], w_L1_nand3_p[0], 3, nand); + } + + switch (number_input_addr_bits) { + case 1: //2 NAND2 gates + num_L1_nand2 = 2; + num_L2 = 0; + num_L1_active_nand2_path = 1; + num_L1_active_nand3_path = 0; + break; + case 2: //4 NAND2 gates + num_L1_nand2 = 4; + num_L2 = 0; + num_L1_active_nand2_path = 1; + num_L1_active_nand3_path = 0; + break; + case 3: //8 NAND3 gates + num_L1_nand3 = 8; + num_L2 = 0; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 1; + break; + case 4: //4 + 4 NAND2 gates + num_L1_nand2 = 8; + num_L2 = 16; + num_L1_active_nand2_path = 2; + num_L1_active_nand3_path = 0; + break; + case 5: //4 NAND2 gates, 8 NAND3 gates + num_L1_nand2 = 4; + num_L1_nand3 = 8; + num_L2 = 32; + num_L1_active_nand2_path = 1; + num_L1_active_nand3_path = 1; + break; + case 6: //8 + 8 NAND3 gates + num_L1_nand3 = 16; + num_L2 = 64; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 2; + break; + case 7: //4 + 4 NAND2 gates, 8 NAND3 gates + num_L1_nand2 = 8; + num_L1_nand3 = 8; + num_L2 = 128; + num_L1_active_nand2_path = 2; + num_L1_active_nand3_path = 1; + break; + case 8: //4 NAND2 gates, 8 + 8 NAND3 gates + num_L1_nand2 = 4; + num_L1_nand3 = 16; + num_L2 = 256; + num_L1_active_nand2_path = 2; + num_L1_active_nand3_path = 2; + break; + case 9: //8 + 8 + 8 NAND3 gates + num_L1_nand3 = 24; + num_L2 = 512; + num_L1_active_nand2_path = 0; + num_L1_active_nand3_path = 3; + break; + default: + break; + } + + for (int i = 1; i < number_gates_L1_nand2_path; ++i) { + tot_area_L1_nand2 += compute_gate_area(INV, 1, w_L1_nand2_p[i], w_L1_nand2_n[i], g_tp.cell_h_def); + leak_L1_nand2 += cmos_Isub_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); + gate_leak_L1_nand2 += cmos_Ig_leakage(w_L1_nand2_n[i], w_L1_nand2_p[i], 2, nand, is_dram_); + } + tot_area_L1_nand2 *= num_L1_nand2; + leak_L1_nand2 *= num_L1_nand2; + gate_leak_L1_nand2 *= num_L1_nand2; + + for (int i = 1; i < number_gates_L1_nand3_path; ++i) { + tot_area_L1_nand3 += compute_gate_area(INV, 1, w_L1_nand3_p[i], w_L1_nand3_n[i], g_tp.cell_h_def); + leak_L1_nand3 += cmos_Isub_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); + gate_leak_L1_nand3 += cmos_Ig_leakage(w_L1_nand3_n[i], w_L1_nand3_p[i], 3, nand, is_dram_); + } + tot_area_L1_nand3 *= num_L1_nand3; + leak_L1_nand3 *= num_L1_nand3; + gate_leak_L1_nand3 *= num_L1_nand3; + + double cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3; + double cumulative_area_L2 = 0.0; + double leakage_L2 = 0.0; + double gate_leakage_L2 = 0.0; + + if (flag_L2_gate == 2) { + cumulative_area_L2 = compute_gate_area(NAND, 2, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); + leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); + gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 2, nand, is_dram_); + } else if (flag_L2_gate == 3) { + cumulative_area_L2 = compute_gate_area(NAND, 3, w_L2_p[0], w_L2_n[0], g_tp.cell_h_def); + leakage_L2 = cmos_Isub_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); + gate_leakage_L2 = cmos_Ig_leakage(w_L2_n[0], w_L2_p[0], 3, nand, is_dram_); + } + + for (int i = 1; i < number_gates_L2; ++i) { + cumulative_area_L2 += compute_gate_area(INV, 1, w_L2_p[i], w_L2_n[i], g_tp.cell_h_def); + leakage_L2 += cmos_Isub_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); + gate_leakage_L2 += cmos_Ig_leakage(w_L2_n[i], w_L2_p[i], 2, inv, is_dram_); + } + cumulative_area_L2 *= num_L2; + leakage_L2 *= num_L2; + gate_leakage_L2 *= num_L2; + + power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd; + power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd; + power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd; + area.set_area(cumulative_area_L1 + cumulative_area_L2); + power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd; + power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd; + power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; } - cumulative_area_L2 *= num_L2; - leakage_L2 *= num_L2; - gate_leakage_L2 *= num_L2; - - power_nand2_path.readOp.leakage = leak_L1_nand2 * g_tp.peri_global.Vdd; - power_nand3_path.readOp.leakage = leak_L1_nand3 * g_tp.peri_global.Vdd; - power_L2.readOp.leakage = leakage_L2 * g_tp.peri_global.Vdd; - area.set_area(cumulative_area_L1 + cumulative_area_L2); - power_nand2_path.readOp.gate_leakage = gate_leak_L1_nand2 * g_tp.peri_global.Vdd; - power_nand3_path.readOp.gate_leakage = gate_leak_L1_nand3 * g_tp.peri_global.Vdd; - power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd; - } } pair PredecBlk::compute_delays( - pair inrisetime) // -{ - pair ret_val; - ret_val.first = 0; // outrisetime_nand2_path - ret_val.second = 0; // outrisetime_nand3_path - - double inrisetime_nand2_path = inrisetime.first; - double inrisetime_nand3_path = inrisetime.second; - int i; - double rd, c_load, c_intrinsic, tf, this_delay; - double Vdd = g_tp.peri_global.Vdd; - - // TODO: following delay calculation part can be greatly simplified. - // first check whether a predecoder block is required - if (exist) - { - //Find delay in first level of predecoder block - //First find delay in path - if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) - { - //First gate is a NAND2 gate - rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_); - c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_); - c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - inrisetime_nand2_path = this_delay / (1.0 - 0.5); - power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; - - //Add delays of all but the last inverter in the chain - for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) - { - rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); - c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_); - c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - inrisetime_nand2_path = this_delay / (1.0 - 0.5); - power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - - //Add delay of the last inverter - i = number_gates_L1_nand2_path - 1; - rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); - if (flag_L2_gate) - { - c_load = branch_effort_nand2_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); - c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - inrisetime_nand2_path = this_delay / (1.0 - 0.5); - power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - else - { //First level directly drives decoder output load - c_load = C_ld_predec_blk_out; - c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - ret_val.first = this_delay / (1.0 - 0.5); - power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - } + pair inrisetime) { // + pair ret_val; + ret_val.first = 0; // outrisetime_nand2_path + ret_val.second = 0; // outrisetime_nand3_path - if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) - { //Check if the number of gates in the first level is more than 1. - //First gate is a NAND3 gate - rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_); - c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_); - c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); - delay_nand3_path += this_delay; - inrisetime_nand3_path = this_delay / (1.0 - 0.5); - power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - - //Add delays of all but the last inverter in the chain - for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) - { - rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); - c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_); - c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); - delay_nand3_path += this_delay; - inrisetime_nand3_path = this_delay / (1.0 - 0.5); - power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - - //Add delay of the last inverter - i = number_gates_L1_nand3_path - 1; - rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); - if (flag_L2_gate) - { - c_load = branch_effort_nand3_gate_output*(gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, is_dram_)); - c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); - delay_nand3_path += this_delay; - inrisetime_nand3_path = this_delay / (1.0 - 0.5); - power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - else - { //First level directly drives decoder output load - c_load = C_ld_predec_blk_out; - c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; - this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); - delay_nand3_path += this_delay; - ret_val.second = this_delay / (1.0 - 0.5); - power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - } + double inrisetime_nand2_path = inrisetime.first; + double inrisetime_nand3_path = inrisetime.second; + int i; + double rd, c_load, c_intrinsic, tf, this_delay; + double Vdd = g_tp.peri_global.Vdd; - // Find delay through second level - if (flag_L2_gate) - { - if (flag_L2_gate == 2) - { - rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_); - c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); - c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - inrisetime_nand2_path = this_delay / (1.0 - 0.5); - power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - else - { // flag_L2_gate = 3 - rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_); - c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); - c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); - delay_nand3_path += this_delay; - inrisetime_nand3_path = this_delay / (1.0 - 0.5); - power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - - for (i = 1; i < number_gates_L2 - 1; ++i) - { - rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); - c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_); - c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - inrisetime_nand2_path = this_delay / (1.0 - 0.5); - this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); - delay_nand3_path += this_delay; - inrisetime_nand3_path = this_delay / (1.0 - 0.5); - power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; - } - - //Add delay of final inverter that drives the wordline decoders - i = number_gates_L2 - 1; - c_load = C_ld_predec_blk_out; - rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); - c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - ret_val.first = this_delay / (1.0 - 0.5); - this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); - delay_nand3_path += this_delay; - ret_val.second = this_delay / (1.0 - 0.5); - power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + // TODO: following delay calculation part can be greatly simplified. + // first check whether a predecoder block is required + if (exist) { + //Find delay in first level of predecoder block + //First find delay in path + if ((flag_two_unique_paths) || (number_inputs_L1_gate == 2)) { + //First gate is a NAND2 gate + rd = tr_R_on(w_L1_nand2_n[0], NCH, 2, is_dram_); + c_load = gate_C(w_L1_nand2_n[1] + w_L1_nand2_p[1], 0.0, is_dram_); + c_intrinsic = 2 * drain_C_(w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_load + c_intrinsic) * Vdd * Vdd; + + //Add delays of all but the last inverter in the chain + for (i = 1; i < number_gates_L1_nand2_path - 1; ++i) { + rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); + c_load = gate_C(w_L1_nand2_n[i+1] + w_L1_nand2_p[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + //Add delay of the last inverter + i = number_gates_L1_nand2_path - 1; + rd = tr_R_on(w_L1_nand2_n[i], NCH, 1, is_dram_); + if (flag_L2_gate) { + c_load = branch_effort_nand2_gate_output * + (gate_C(w_L2_n[0], 0, is_dram_) + + gate_C(w_L2_p[0], 0, is_dram_)); + c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } else { //First level directly drives decoder output load + c_load = C_ld_predec_blk_out; + c_intrinsic = drain_C_(w_L1_nand2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + ret_val.first = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + } + + if ((flag_two_unique_paths) || (number_inputs_L1_gate == 3)) { + //Check if the number of gates in the first level is more than 1. + //First gate is a NAND3 gate + rd = tr_R_on(w_L1_nand3_n[0], NCH, 3, is_dram_); + c_load = gate_C(w_L1_nand3_n[1] + w_L1_nand3_p[1], 0.0, is_dram_); + c_intrinsic = 3 * drain_C_(w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + + //Add delays of all but the last inverter in the chain + for (i = 1; i < number_gates_L1_nand3_path - 1; ++i) { + rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); + c_load = gate_C(w_L1_nand3_n[i+1] + w_L1_nand3_p[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + //Add delay of the last inverter + i = number_gates_L1_nand3_path - 1; + rd = tr_R_on(w_L1_nand3_n[i], NCH, 1, is_dram_); + if (flag_L2_gate) { + c_load = branch_effort_nand3_gate_output * + (gate_C(w_L2_n[0], 0, is_dram_) + gate_C(w_L2_p[0], 0, + is_dram_)); + c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } else { //First level directly drives decoder output load + c_load = C_ld_predec_blk_out; + c_intrinsic = drain_C_(w_L1_nand3_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L1_nand3_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + ret_val.second = this_delay / (1.0 - 0.5); + power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + } + + // Find delay through second level + if (flag_L2_gate) { + if (flag_L2_gate == 2) { + rd = tr_R_on(w_L2_n[0], NCH, 2, is_dram_); + c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); + c_intrinsic = 2 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[0], NCH, 2, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } else { // flag_L2_gate = 3 + rd = tr_R_on(w_L2_n[0], NCH, 3, is_dram_); + c_load = gate_C(w_L2_n[1] + w_L2_p[1], 0.0, is_dram_); + c_intrinsic = 3 * drain_C_(w_L2_p[0], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[0], NCH, 3, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + for (i = 1; i < number_gates_L2 - 1; ++i) { + rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); + c_load = gate_C(w_L2_n[i+1] + w_L2_p[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + inrisetime_nand3_path = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } + + //Add delay of final inverter that drives the wordline decoders + i = number_gates_L2 - 1; + c_load = C_ld_predec_blk_out; + rd = tr_R_on(w_L2_n[i], NCH, 1, is_dram_); + c_intrinsic = drain_C_(w_L2_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(w_L2_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + R_wire_predec_blk_out * c_load / 2; + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + ret_val.first = this_delay / (1.0 - 0.5); + this_delay = horowitz(inrisetime_nand3_path, tf, 0.5, 0.5, RISE); + delay_nand3_path += this_delay; + ret_val.second = this_delay / (1.0 - 0.5); + power_L2.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd; + } } - } - delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second; - return ret_val; + delay = (ret_val.first > ret_val.second) ? ret_val.first : ret_val.second; + return ret_val; } void PredecBlk::leakage_feedback(double temperature) @@ -1033,302 +969,287 @@ PredecBlkDrv::PredecBlkDrv( int way_select_, PredecBlk * blk_, bool is_dram) - :flag_driver_exists(0), - number_gates_nand2_path(0), - number_gates_nand3_path(0), - min_number_gates(2), - num_buffers_driving_1_nand2_load(0), - num_buffers_driving_2_nand2_load(0), - num_buffers_driving_4_nand2_load(0), - num_buffers_driving_2_nand3_load(0), - num_buffers_driving_8_nand3_load(0), - num_buffers_nand3_path(0), - c_load_nand2_path_out(0), - c_load_nand3_path_out(0), - r_load_nand2_path_out(0), - r_load_nand3_path_out(0), - delay_nand2_path(0), - delay_nand3_path(0), - power_nand2_path(), - power_nand3_path(), - blk(blk_), dec(blk->dec), - is_dram_(is_dram), - way_select(way_select_) -{ - for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) - { - width_nand2_path_n[i] = 0; - width_nand2_path_p[i] = 0; - width_nand3_path_n[i] = 0; - width_nand3_path_p[i] = 0; - } - - number_input_addr_bits = blk->number_input_addr_bits; - - if (way_select > 1) - { - flag_driver_exists = 1; - number_input_addr_bits = way_select; - if (dec->num_in_signals == 2) - { - c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); - num_buffers_driving_2_nand2_load = number_input_addr_bits; - } - else if (dec->num_in_signals == 3) - { - c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); - num_buffers_driving_2_nand3_load = number_input_addr_bits; + : flag_driver_exists(0), + number_gates_nand2_path(0), + number_gates_nand3_path(0), + min_number_gates(2), + num_buffers_driving_1_nand2_load(0), + num_buffers_driving_2_nand2_load(0), + num_buffers_driving_4_nand2_load(0), + num_buffers_driving_2_nand3_load(0), + num_buffers_driving_8_nand3_load(0), + num_buffers_nand3_path(0), + c_load_nand2_path_out(0), + c_load_nand3_path_out(0), + r_load_nand2_path_out(0), + r_load_nand3_path_out(0), + delay_nand2_path(0), + delay_nand3_path(0), + power_nand2_path(), + power_nand3_path(), + blk(blk_), dec(blk->dec), + is_dram_(is_dram), + way_select(way_select_) { + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { + width_nand2_path_n[i] = 0; + width_nand2_path_p[i] = 0; + width_nand3_path_n[i] = 0; + width_nand3_path_p[i] = 0; } - } - else if (way_select == 0) - { - if (blk->exist) - { - flag_driver_exists = 1; + + number_input_addr_bits = blk->number_input_addr_bits; + + if (way_select > 1) { + flag_driver_exists = 1; + number_input_addr_bits = way_select; + if (dec->num_in_signals == 2) { + c_load_nand2_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); + num_buffers_driving_2_nand2_load = number_input_addr_bits; + } else if (dec->num_in_signals == 3) { + c_load_nand3_path_out = gate_C(dec->w_dec_n[0] + dec->w_dec_p[0], 0, is_dram_); + num_buffers_driving_2_nand3_load = number_input_addr_bits; + } + } else if (way_select == 0) { + if (blk->exist) { + flag_driver_exists = 1; + } } - } - compute_widths(); - compute_area(); + compute_widths(); + compute_area(); } -void PredecBlkDrv::compute_widths() -{ - // The predecode block driver accepts as input the address bits from the h-tree network. For - // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of - // inversion to generate addrbar and simply treat addrbar as addr. - - double F; - double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); - - if (flag_driver_exists) - { - double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_); - double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_); - - if (way_select == 0) - { - if (blk->number_input_addr_bits == 1) - { //2 NAND2 gates - num_buffers_driving_2_nand2_load = 1; - c_load_nand2_path_out = 2 * C_nand2_gate_blk; - } - else if (blk->number_input_addr_bits == 2) - { //4 NAND2 gates one 2-4 decoder - num_buffers_driving_4_nand2_load = 2; - c_load_nand2_path_out = 4 * C_nand2_gate_blk; - } - else if (blk->number_input_addr_bits == 3) - { //8 NAND3 gates one 3-8 decoder - num_buffers_driving_8_nand3_load = 3; - c_load_nand3_path_out = 8 * C_nand3_gate_blk; - } - else if (blk->number_input_addr_bits == 4) - { //4 + 4 NAND2 gates two 2-4 decoder - num_buffers_driving_4_nand2_load = 4; - c_load_nand2_path_out = 4 * C_nand2_gate_blk; - } - else if (blk->number_input_addr_bits == 5) - { //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 decoder - num_buffers_driving_4_nand2_load = 2; - num_buffers_driving_8_nand3_load = 3; - c_load_nand2_path_out = 4 * C_nand2_gate_blk; - c_load_nand3_path_out = 8 * C_nand3_gate_blk; - } - else if (blk->number_input_addr_bits == 6) - { //8 + 8 NAND3 gates two 3-8 decoder - num_buffers_driving_8_nand3_load = 6; - c_load_nand3_path_out = 8 * C_nand3_gate_blk; - } - else if (blk->number_input_addr_bits == 7) - { //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 decoder - num_buffers_driving_4_nand2_load = 4; - num_buffers_driving_8_nand3_load = 3; - c_load_nand2_path_out = 4 * C_nand2_gate_blk; - c_load_nand3_path_out = 8 * C_nand3_gate_blk; - } - else if (blk->number_input_addr_bits == 8) - { //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 decoder - num_buffers_driving_4_nand2_load = 2; - num_buffers_driving_8_nand3_load = 6; - c_load_nand2_path_out = 4 * C_nand2_gate_blk; - c_load_nand3_path_out = 8 * C_nand3_gate_blk; - } - else if (blk->number_input_addr_bits == 9) - { //8 + 8 + 8 NAND3 gates three 3-8 decoder - num_buffers_driving_8_nand3_load = 9; - c_load_nand3_path_out = 8 * C_nand3_gate_blk; - } - } - - if ((blk->flag_two_unique_paths) || - (blk->number_inputs_L1_gate == 2) || - (number_input_addr_bits == 0) || - ((way_select)&&(dec->num_in_signals == 2))) - { //this means that way_select is driving NAND2 in decoder. - width_nand2_path_n[0] = g_tp.min_w_nmos_; - width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0]; - F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_); - number_gates_nand2_path = logical_effort( - min_number_gates, - 1, - F, - width_nand2_path_n, - width_nand2_path_p, - c_load_nand2_path_out, - p_to_n_sz_ratio, - is_dram_, false, g_tp.max_w_nmos_); - } - - if ((blk->flag_two_unique_paths) || - (blk->number_inputs_L1_gate == 3) || - ((way_select)&&(dec->num_in_signals == 3))) - { //this means that way_select is driving NAND3 in decoder. - width_nand3_path_n[0] = g_tp.min_w_nmos_; - width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0]; - F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_); - number_gates_nand3_path = logical_effort( - min_number_gates, - 1, - F, - width_nand3_path_n, - width_nand3_path_p, - c_load_nand3_path_out, - p_to_n_sz_ratio, - is_dram_, false, g_tp.max_w_nmos_); +void PredecBlkDrv::compute_widths() { + // The predecode block driver accepts as input the address bits from the h-tree network. For + // each addr bit it then generates addr and addrbar as outputs. For now ignore the effect of + // inversion to generate addrbar and simply treat addrbar as addr. + + double F; + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); + + if (flag_driver_exists) { + double C_nand2_gate_blk = gate_C(blk->w_L1_nand2_n[0] + blk->w_L1_nand2_p[0], 0, is_dram_); + double C_nand3_gate_blk = gate_C(blk->w_L1_nand3_n[0] + blk->w_L1_nand3_p[0], 0, is_dram_); + + if (way_select == 0) { + if (blk->number_input_addr_bits == 1) { + //2 NAND2 gates + num_buffers_driving_2_nand2_load = 1; + c_load_nand2_path_out = 2 * C_nand2_gate_blk; + } else if (blk->number_input_addr_bits == 2) { + //4 NAND2 gates one 2-4 decoder + num_buffers_driving_4_nand2_load = 2; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + } else if (blk->number_input_addr_bits == 3) { + //8 NAND3 gates one 3-8 decoder + num_buffers_driving_8_nand3_load = 3; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == 4) { + //4 + 4 NAND2 gates two 2-4 decoder + num_buffers_driving_4_nand2_load = 4; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + } else if (blk->number_input_addr_bits == 5) { + //4 NAND2 gates, 8 NAND3 gates one 2-4 decoder and one 3-8 + //decoder + num_buffers_driving_4_nand2_load = 2; + num_buffers_driving_8_nand3_load = 3; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == 6) { + //8 + 8 NAND3 gates two 3-8 decoder + num_buffers_driving_8_nand3_load = 6; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == 7) { + //4 + 4 NAND2 gates, 8 NAND3 gates two 2-4 decoder and one 3-8 + //decoder + num_buffers_driving_4_nand2_load = 4; + num_buffers_driving_8_nand3_load = 3; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == 8) { + //4 NAND2 gates, 8 + 8 NAND3 gates one 2-4 decoder and two 3-8 + //decoder + num_buffers_driving_4_nand2_load = 2; + num_buffers_driving_8_nand3_load = 6; + c_load_nand2_path_out = 4 * C_nand2_gate_blk; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } else if (blk->number_input_addr_bits == 9) { + //8 + 8 + 8 NAND3 gates three 3-8 decoder + num_buffers_driving_8_nand3_load = 9; + c_load_nand3_path_out = 8 * C_nand3_gate_blk; + } + } + + if ((blk->flag_two_unique_paths) || + (blk->number_inputs_L1_gate == 2) || + (number_input_addr_bits == 0) || + ((way_select) && (dec->num_in_signals == 2))) { + //this means that way_select is driving NAND2 in decoder. + width_nand2_path_n[0] = g_tp.min_w_nmos_; + width_nand2_path_p[0] = p_to_n_sz_ratio * width_nand2_path_n[0]; + F = c_load_nand2_path_out / gate_C(width_nand2_path_n[0] + width_nand2_path_p[0], 0, is_dram_); + number_gates_nand2_path = logical_effort( + min_number_gates, + 1, + F, + width_nand2_path_n, + width_nand2_path_p, + c_load_nand2_path_out, + p_to_n_sz_ratio, + is_dram_, false, g_tp.max_w_nmos_); + } + + if ((blk->flag_two_unique_paths) || + (blk->number_inputs_L1_gate == 3) || + ((way_select) && (dec->num_in_signals == 3))) { + //this means that way_select is driving NAND3 in decoder. + width_nand3_path_n[0] = g_tp.min_w_nmos_; + width_nand3_path_p[0] = p_to_n_sz_ratio * width_nand3_path_n[0]; + F = c_load_nand3_path_out / gate_C(width_nand3_path_n[0] + width_nand3_path_p[0], 0, is_dram_); + number_gates_nand3_path = logical_effort( + min_number_gates, + 1, + F, + width_nand3_path_n, + width_nand3_path_p, + c_load_nand3_path_out, + p_to_n_sz_ratio, + is_dram_, false, g_tp.max_w_nmos_); + } } - } } -void PredecBlkDrv::compute_area() -{ - double area_nand2_path = 0; - double area_nand3_path = 0; - double leak_nand2_path = 0; - double leak_nand3_path = 0; - double gate_leak_nand2_path = 0; - double gate_leak_nand3_path = 0; - - if (flag_driver_exists) - { // first check whether a predecoder block driver is needed - for (int i = 0; i < number_gates_nand2_path; ++i) - { - area_nand2_path += compute_gate_area(INV, 1, width_nand2_path_p[i], width_nand2_path_n[i], g_tp.cell_h_def); - leak_nand2_path += cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); - gate_leak_nand2_path += cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], 1, inv,is_dram_); - } - area_nand2_path *= (num_buffers_driving_1_nand2_load + - num_buffers_driving_2_nand2_load + - num_buffers_driving_4_nand2_load); - leak_nand2_path *= (num_buffers_driving_1_nand2_load + - num_buffers_driving_2_nand2_load + - num_buffers_driving_4_nand2_load); - gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load + +void PredecBlkDrv::compute_area() { + double area_nand2_path = 0; + double area_nand3_path = 0; + double leak_nand2_path = 0; + double leak_nand3_path = 0; + double gate_leak_nand2_path = 0; + double gate_leak_nand3_path = 0; + + if (flag_driver_exists) { + // first check whether a predecoder block driver is needed + for (int i = 0; i < number_gates_nand2_path; ++i) { + area_nand2_path += + compute_gate_area(INV, 1, width_nand2_path_p[i], + width_nand2_path_n[i], g_tp.cell_h_def); + leak_nand2_path += + cmos_Isub_leakage(width_nand2_path_n[i], width_nand2_path_p[i], + 1, inv, is_dram_); + gate_leak_nand2_path += + cmos_Ig_leakage(width_nand2_path_n[i], width_nand2_path_p[i], + 1, inv, is_dram_); + } + area_nand2_path *= (num_buffers_driving_1_nand2_load + num_buffers_driving_2_nand2_load + num_buffers_driving_4_nand2_load); - - for (int i = 0; i < number_gates_nand3_path; ++i) - { - area_nand3_path += compute_gate_area(INV, 1, width_nand3_path_p[i], width_nand3_path_n[i], g_tp.cell_h_def); - leak_nand3_path += cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); - gate_leak_nand3_path += cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], 1, inv,is_dram_); + leak_nand2_path *= (num_buffers_driving_1_nand2_load + + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load); + gate_leak_nand2_path *= (num_buffers_driving_1_nand2_load + + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load); + + for (int i = 0; i < number_gates_nand3_path; ++i) { + area_nand3_path += + compute_gate_area(INV, 1, width_nand3_path_p[i], + width_nand3_path_n[i], g_tp.cell_h_def); + leak_nand3_path += + cmos_Isub_leakage(width_nand3_path_n[i], width_nand3_path_p[i], + 1, inv, is_dram_); + gate_leak_nand3_path += + cmos_Ig_leakage(width_nand3_path_n[i], width_nand3_path_p[i], + 1, inv, is_dram_); + } + area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); + + power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; + power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; + power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; + power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; + area.set_area(area_nand2_path + area_nand3_path); } - area_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); - leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); - gate_leak_nand3_path *= (num_buffers_driving_2_nand3_load + num_buffers_driving_8_nand3_load); - - power_nand2_path.readOp.leakage = leak_nand2_path * g_tp.peri_global.Vdd; - power_nand3_path.readOp.leakage = leak_nand3_path * g_tp.peri_global.Vdd; - power_nand2_path.readOp.gate_leakage = gate_leak_nand2_path * g_tp.peri_global.Vdd; - power_nand3_path.readOp.gate_leakage = gate_leak_nand3_path * g_tp.peri_global.Vdd; - area.set_area(area_nand2_path + area_nand3_path); - } } pair PredecBlkDrv::compute_delays( double inrisetime_nand2_path, - double inrisetime_nand3_path) -{ - pair ret_val; - ret_val.first = 0; // outrisetime_nand2_path - ret_val.second = 0; // outrisetime_nand3_path - int i; - double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay; - double Vdd = g_tp.peri_global.Vdd; - - if (flag_driver_exists) - { - for (i = 0; i < number_gates_nand2_path - 1; ++i) - { - rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); - c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_); - c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_gate_load); - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - inrisetime_nand2_path = this_delay / (1.0 - 0.5); - power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd; - } + double inrisetime_nand3_path) { + pair ret_val; + ret_val.first = 0; // outrisetime_nand2_path + ret_val.second = 0; // outrisetime_nand3_path + int i; + double rd, c_gate_load, c_load, c_intrinsic, tf, this_delay; + double Vdd = g_tp.peri_global.Vdd; - // Final inverter drives the predecoder block or the decoder output load - if (number_gates_nand2_path != 0) - { - i = number_gates_nand2_path - 1; - rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); - c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - c_load = c_load_nand2_path_out; - tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out*c_load/ 2; - this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); - delay_nand2_path += this_delay; - ret_val.first = this_delay / (1.0 - 0.5); - power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd; + if (flag_driver_exists) { + for (i = 0; i < number_gates_nand2_path - 1; ++i) { + rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); + c_gate_load = gate_C(width_nand2_path_p[i+1] + width_nand2_path_n[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_gate_load); + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + inrisetime_nand2_path = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd; + } + + // Final inverter drives the predecoder block or the decoder output load + if (number_gates_nand2_path != 0) { + i = number_gates_nand2_path - 1; + rd = tr_R_on(width_nand2_path_n[i], NCH, 1, is_dram_); + c_intrinsic = drain_C_(width_nand2_path_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(width_nand2_path_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + c_load = c_load_nand2_path_out; + tf = rd * (c_intrinsic + c_load) + r_load_nand2_path_out * c_load / 2; + this_delay = horowitz(inrisetime_nand2_path, tf, 0.5, 0.5, RISE); + delay_nand2_path += this_delay; + ret_val.first = this_delay / (1.0 - 0.5); + power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd; // cout<< "c_intrinsic = " << c_intrinsic << "c_load" << c_load <blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) -{ - driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + - drv1->power_nand3_path.readOp.leakage + - drv2->power_nand2_path.readOp.leakage + - drv2->power_nand3_path.readOp.leakage; - block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + - blk1->power_nand3_path.readOp.leakage + - blk1->power_L2.readOp.leakage + - blk2->power_nand2_path.readOp.leakage + - blk2->power_nand3_path.readOp.leakage + - blk2->power_L2.readOp.leakage; - power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; - - driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + - drv1->power_nand3_path.readOp.gate_leakage + - drv2->power_nand2_path.readOp.gate_leakage + - drv2->power_nand3_path.readOp.gate_leakage; - block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + - blk1->power_nand3_path.readOp.gate_leakage + - blk1->power_L2.readOp.gate_leakage + - blk2->power_nand2_path.readOp.gate_leakage + - blk2->power_nand3_path.readOp.gate_leakage + - blk2->power_L2.readOp.gate_leakage; - power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; + : blk1(drv1_->blk), blk2(drv2_->blk), drv1(drv1_), drv2(drv2_) { + driver_power.readOp.leakage = drv1->power_nand2_path.readOp.leakage + + drv1->power_nand3_path.readOp.leakage + + drv2->power_nand2_path.readOp.leakage + + drv2->power_nand3_path.readOp.leakage; + block_power.readOp.leakage = blk1->power_nand2_path.readOp.leakage + + blk1->power_nand3_path.readOp.leakage + + blk1->power_L2.readOp.leakage + + blk2->power_nand2_path.readOp.leakage + + blk2->power_nand3_path.readOp.leakage + + blk2->power_L2.readOp.leakage; + power.readOp.leakage = driver_power.readOp.leakage + block_power.readOp.leakage; + + driver_power.readOp.gate_leakage = drv1->power_nand2_path.readOp.gate_leakage + + drv1->power_nand3_path.readOp.gate_leakage + + drv2->power_nand2_path.readOp.gate_leakage + + drv2->power_nand3_path.readOp.gate_leakage; + block_power.readOp.gate_leakage = blk1->power_nand2_path.readOp.gate_leakage + + blk1->power_nand3_path.readOp.gate_leakage + + blk1->power_L2.readOp.gate_leakage + + blk2->power_nand2_path.readOp.gate_leakage + + blk2->power_nand3_path.readOp.gate_leakage + + blk2->power_L2.readOp.gate_leakage; + power.readOp.gate_leakage = driver_power.readOp.gate_leakage + block_power.readOp.gate_leakage; } void PredecBlkDrv::leakage_feedback(double temperature) @@ -1399,37 +1319,35 @@ void PredecBlkDrv::leakage_feedback(double temperature) } } -double Predec::compute_delays(double inrisetime) -{ - // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block. - pair tmp_pair1, tmp_pair2; - tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime); - tmp_pair1 = blk1->compute_delays(tmp_pair1); - tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime); - tmp_pair2 = blk2->compute_delays(tmp_pair2); - tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2); - - driver_power.readOp.dynamic = - drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic + - drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic + - drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic + - drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic; - - block_power.readOp.dynamic = - blk1->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + - blk1->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + - blk1->power_L2.readOp.dynamic + - blk2->power_nand2_path.readOp.dynamic*blk1->num_L1_active_nand2_path + - blk2->power_nand3_path.readOp.dynamic*blk1->num_L1_active_nand3_path + - blk2->power_L2.readOp.dynamic; - - power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic; - - delay = tmp_pair1.first; - return tmp_pair1.second; +double Predec::compute_delays(double inrisetime) { + // TODO: Jung Ho thinks that predecoder block driver locates between decoder and predecoder block. + pair tmp_pair1, tmp_pair2; + tmp_pair1 = drv1->compute_delays(inrisetime, inrisetime); + tmp_pair1 = blk1->compute_delays(tmp_pair1); + tmp_pair2 = drv2->compute_delays(inrisetime, inrisetime); + tmp_pair2 = blk2->compute_delays(tmp_pair2); + tmp_pair1 = get_max_delay_before_decoder(tmp_pair1, tmp_pair2); + + driver_power.readOp.dynamic = + drv1->num_addr_bits_nand2_path() * drv1->power_nand2_path.readOp.dynamic + + drv1->num_addr_bits_nand3_path() * drv1->power_nand3_path.readOp.dynamic + + drv2->num_addr_bits_nand2_path() * drv2->power_nand2_path.readOp.dynamic + + drv2->num_addr_bits_nand3_path() * drv2->power_nand3_path.readOp.dynamic; + + block_power.readOp.dynamic = + blk1->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path + + blk1->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path + + blk1->power_L2.readOp.dynamic + + blk2->power_nand2_path.readOp.dynamic * blk1->num_L1_active_nand2_path + + blk2->power_nand3_path.readOp.dynamic * blk1->num_L1_active_nand3_path + + blk2->power_L2.readOp.dynamic; + + power.readOp.dynamic = driver_power.readOp.dynamic + block_power.readOp.dynamic; + + delay = tmp_pair1.first; + return tmp_pair1.second; } - void Predec::leakage_feedback(double temperature) { drv1->leakage_feedback(temperature); @@ -1465,113 +1383,116 @@ void Predec::leakage_feedback(double temperature) // returns pair Predec::get_max_delay_before_decoder( pair input_pair1, - pair input_pair2) -{ - pair ret_val; - double delay; - - delay = drv1->delay_nand2_path + blk1->delay_nand2_path; - ret_val.first = delay; - ret_val.second = input_pair1.first; - delay = drv1->delay_nand3_path + blk1->delay_nand3_path; - if (ret_val.first < delay) - { - ret_val.first = delay; - ret_val.second = input_pair1.second; - } - delay = drv2->delay_nand2_path + blk2->delay_nand2_path; - if (ret_val.first < delay) - { - ret_val.first = delay; - ret_val.second = input_pair2.first; - } - delay = drv2->delay_nand3_path + blk2->delay_nand3_path; - if (ret_val.first < delay) - { + pair input_pair2) { + pair ret_val; + double delay; + + delay = drv1->delay_nand2_path + blk1->delay_nand2_path; ret_val.first = delay; - ret_val.second = input_pair2.second; - } + ret_val.second = input_pair1.first; + delay = drv1->delay_nand3_path + blk1->delay_nand3_path; + if (ret_val.first < delay) { + ret_val.first = delay; + ret_val.second = input_pair1.second; + } + delay = drv2->delay_nand2_path + blk2->delay_nand2_path; + if (ret_val.first < delay) { + ret_val.first = delay; + ret_val.second = input_pair2.first; + } + delay = drv2->delay_nand3_path + blk2->delay_nand3_path; + if (ret_val.first < delay) { + ret_val.first = delay; + ret_val.second = input_pair2.second; + } - return ret_val; + return ret_val; } -Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram) -:number_gates(0), - min_number_gates(2), - c_gate_load(c_gate_load_), - c_wire_load(c_wire_load_), - r_wire_load(r_wire_load_), - delay(0), - power(), - is_dram_(is_dram) -{ - for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) - { - width_n[i] = 0; - width_p[i] = 0; - } +Driver::Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, + bool is_dram) + : number_gates(0), + min_number_gates(2), + c_gate_load(c_gate_load_), + c_wire_load(c_wire_load_), + r_wire_load(r_wire_load_), + delay(0), + power(), + is_dram_(is_dram) { + for (int i = 0; i < MAX_NUMBER_GATES_STAGE; i++) { + width_n[i] = 0; + width_p[i] = 0; + } - compute_widths(); + compute_widths(); } -void Driver::compute_widths() -{ - double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); - double c_load = c_gate_load + c_wire_load; - width_n[0] = g_tp.min_w_nmos_; - width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; - - double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_); - number_gates = logical_effort( - min_number_gates, - 1, - F, - width_n, - width_p, - c_load, - p_to_n_sz_ratio, - is_dram_, false, - g_tp.max_w_nmos_); +void Driver::compute_widths() { + double p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(is_dram_); + double c_load = c_gate_load + c_wire_load; + width_n[0] = g_tp.min_w_nmos_; + width_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_; + + double F = c_load / gate_C(width_n[0] + width_p[0], 0, is_dram_); + number_gates = logical_effort( + min_number_gates, + 1, + F, + width_n, + width_p, + c_load, + p_to_n_sz_ratio, + is_dram_, false, + g_tp.max_w_nmos_); } -double Driver::compute_delay(double inrisetime) -{ - int i; - double rd, c_load, c_intrinsic, tf; - double this_delay = 0; +double Driver::compute_delay(double inrisetime) { + int i; + double rd, c_load, c_intrinsic, tf; + double this_delay = 0; + + for (i = 0; i < number_gates - 1; ++i) { + rd = tr_R_on(width_n[i], NCH, 1, is_dram_); + c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_); + c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + + drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load); + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd; + power.readOp.leakage += + cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; + power.readOp.gate_leakage += + cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; + } - for (i = 0; i < number_gates - 1; ++i) - { + i = number_gates - 1; + c_load = c_gate_load + c_wire_load; rd = tr_R_on(width_n[i], NCH, 1, is_dram_); - c_load = gate_C(width_n[i+1] + width_p[i+1], 0.0, is_dram_); c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load); + drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); + tf = rd * (c_intrinsic + c_load) + r_wire_load * + (c_wire_load / 2 + c_gate_load); this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); delay += this_delay; - inrisetime = this_delay / (1.0 - 0.5); - power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) *g_tp.peri_global.Vdd; - power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; - } - - i = number_gates - 1; - c_load = c_gate_load + c_wire_load; - rd = tr_R_on(width_n[i], NCH, 1, is_dram_); - c_intrinsic = drain_C_(width_p[i], PCH, 1, 1, g_tp.cell_h_def, is_dram_) + - drain_C_(width_n[i], NCH, 1, 1, g_tp.cell_h_def, is_dram_); - tf = rd * (c_intrinsic + c_load) + r_wire_load * (c_wire_load / 2 + c_gate_load); - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay += this_delay; - power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power.readOp.leakage += cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * g_tp.peri_global.Vdd; - power.readOp.gate_leakage += cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_)* g_tp.peri_global.Vdd; - - return this_delay / (1.0 - 0.5); + power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd; + power.readOp.leakage += + cmos_Isub_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; + power.readOp.gate_leakage += + cmos_Ig_leakage(width_n[i], width_p[i], 1, inv, is_dram_) * + g_tp.peri_global.Vdd; + + return this_delay / (1.0 - 0.5); } diff --git a/ext/mcpat/cacti/decoder.h b/ext/mcpat/cacti/decoder.h index 35631e84b..a2ddf722c 100644 --- a/ext/mcpat/cacti/decoder.h +++ b/ext/mcpat/cacti/decoder.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -42,9 +43,8 @@ using namespace std; -class Decoder : public Component -{ - public: +class Decoder : public Component { +public: Decoder( int _num_dec_signals, bool flag_way_select, @@ -80,125 +80,120 @@ class Decoder : public Component -class PredecBlk : public Component -{ - public: - PredecBlk( - int num_dec_signals, - Decoder * dec, - double C_wire_predec_blk_out, - double R_wire_predec_blk_out, - int num_dec_per_predec, - bool is_dram_, - bool is_blk1); - - Decoder * dec; - bool exist; - int number_input_addr_bits; - double C_ld_predec_blk_out; - double R_wire_predec_blk_out; - int branch_effort_nand2_gate_output; - int branch_effort_nand3_gate_output; - bool flag_two_unique_paths; - int flag_L2_gate; - int number_inputs_L1_gate; - int number_gates_L1_nand2_path; - int number_gates_L1_nand3_path; - int number_gates_L2; - int min_number_gates_L1; - int min_number_gates_L2; - int num_L1_active_nand2_path; - int num_L1_active_nand3_path; - double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE]; - double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE]; - double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE]; - double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE]; - double w_L2_n[MAX_NUMBER_GATES_STAGE]; - double w_L2_p[MAX_NUMBER_GATES_STAGE]; - double delay_nand2_path; - double delay_nand3_path; - powerDef power_nand2_path; - powerDef power_nand3_path; - powerDef power_L2; - - bool is_dram_; - - void compute_widths(); - void compute_area(); - - void leakage_feedback(double temperature); - - pair compute_delays(pair inrisetime); // - // return +class PredecBlk : public Component { +public: + PredecBlk( + int num_dec_signals, + Decoder * dec, + double C_wire_predec_blk_out, + double R_wire_predec_blk_out, + int num_dec_per_predec, + bool is_dram_, + bool is_blk1); + + Decoder * dec; + bool exist; + int number_input_addr_bits; + double C_ld_predec_blk_out; + double R_wire_predec_blk_out; + int branch_effort_nand2_gate_output; + int branch_effort_nand3_gate_output; + bool flag_two_unique_paths; + int flag_L2_gate; + int number_inputs_L1_gate; + int number_gates_L1_nand2_path; + int number_gates_L1_nand3_path; + int number_gates_L2; + int min_number_gates_L1; + int min_number_gates_L2; + int num_L1_active_nand2_path; + int num_L1_active_nand3_path; + double w_L1_nand2_n[MAX_NUMBER_GATES_STAGE]; + double w_L1_nand2_p[MAX_NUMBER_GATES_STAGE]; + double w_L1_nand3_n[MAX_NUMBER_GATES_STAGE]; + double w_L1_nand3_p[MAX_NUMBER_GATES_STAGE]; + double w_L2_n[MAX_NUMBER_GATES_STAGE]; + double w_L2_p[MAX_NUMBER_GATES_STAGE]; + double delay_nand2_path; + double delay_nand3_path; + powerDef power_nand2_path; + powerDef power_nand3_path; + powerDef power_L2; + + bool is_dram_; + + void compute_widths(); + void compute_area(); + + void leakage_feedback(double temperature); + + pair compute_delays(pair inrisetime); // + // return }; -class PredecBlkDrv : public Component -{ - public: - PredecBlkDrv( - int way_select, - PredecBlk * blk_, - bool is_dram); - - int flag_driver_exists; - int number_input_addr_bits; - int number_gates_nand2_path; - int number_gates_nand3_path; - int min_number_gates; - int num_buffers_driving_1_nand2_load; - int num_buffers_driving_2_nand2_load; - int num_buffers_driving_4_nand2_load; - int num_buffers_driving_2_nand3_load; - int num_buffers_driving_8_nand3_load; - int num_buffers_nand3_path; - double c_load_nand2_path_out; - double c_load_nand3_path_out; - double r_load_nand2_path_out; - double r_load_nand3_path_out; - double width_nand2_path_n[MAX_NUMBER_GATES_STAGE]; - double width_nand2_path_p[MAX_NUMBER_GATES_STAGE]; - double width_nand3_path_n[MAX_NUMBER_GATES_STAGE]; - double width_nand3_path_p[MAX_NUMBER_GATES_STAGE]; - double delay_nand2_path; - double delay_nand3_path; - powerDef power_nand2_path; - powerDef power_nand3_path; - - PredecBlk * blk; - Decoder * dec; - bool is_dram_; - int way_select; - - void compute_widths(); - void compute_area(); - - void leakage_feedback(double temperature); - - - pair compute_delays( - double inrisetime_nand2_path, - double inrisetime_nand3_path); // return - - inline int num_addr_bits_nand2_path() - { - return num_buffers_driving_1_nand2_load + - num_buffers_driving_2_nand2_load + - num_buffers_driving_4_nand2_load; - } - inline int num_addr_bits_nand3_path() - { - return num_buffers_driving_2_nand3_load + - num_buffers_driving_8_nand3_load; - } - double get_rdOp_dynamic_E(int num_act_mats_hor_dir); +class PredecBlkDrv : public Component { +public: + PredecBlkDrv( + int way_select, + PredecBlk * blk_, + bool is_dram); + + int flag_driver_exists; + int number_input_addr_bits; + int number_gates_nand2_path; + int number_gates_nand3_path; + int min_number_gates; + int num_buffers_driving_1_nand2_load; + int num_buffers_driving_2_nand2_load; + int num_buffers_driving_4_nand2_load; + int num_buffers_driving_2_nand3_load; + int num_buffers_driving_8_nand3_load; + int num_buffers_nand3_path; + double c_load_nand2_path_out; + double c_load_nand3_path_out; + double r_load_nand2_path_out; + double r_load_nand3_path_out; + double width_nand2_path_n[MAX_NUMBER_GATES_STAGE]; + double width_nand2_path_p[MAX_NUMBER_GATES_STAGE]; + double width_nand3_path_n[MAX_NUMBER_GATES_STAGE]; + double width_nand3_path_p[MAX_NUMBER_GATES_STAGE]; + double delay_nand2_path; + double delay_nand3_path; + powerDef power_nand2_path; + powerDef power_nand3_path; + + PredecBlk * blk; + Decoder * dec; + bool is_dram_; + int way_select; + + void compute_widths(); + void compute_area(); + + void leakage_feedback(double temperature); + + + pair compute_delays( + double inrisetime_nand2_path, + double inrisetime_nand3_path); // return + + inline int num_addr_bits_nand2_path() { + return num_buffers_driving_1_nand2_load + + num_buffers_driving_2_nand2_load + + num_buffers_driving_4_nand2_load; + } + inline int num_addr_bits_nand3_path() { + return num_buffers_driving_2_nand3_load + + num_buffers_driving_8_nand3_load; + } + double get_rdOp_dynamic_E(int num_act_mats_hor_dir); }; -class Predec : public Component -{ - public: +class Predec : public Component { +public: Predec( PredecBlkDrv * drv1, PredecBlkDrv * drv2); @@ -214,7 +209,7 @@ class Predec : public Component powerDef block_power; powerDef driver_power; - private: +private: // returns pair get_max_delay_before_decoder( pair input_pair1, @@ -223,24 +218,23 @@ class Predec : public Component -class Driver : public Component -{ - public: - Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram); +class Driver : public Component { +public: + Driver(double c_gate_load_, double c_wire_load_, double r_wire_load_, bool is_dram); - int number_gates; - int min_number_gates; - double width_n[MAX_NUMBER_GATES_STAGE]; - double width_p[MAX_NUMBER_GATES_STAGE]; - double c_gate_load; - double c_wire_load; - double r_wire_load; - double delay; - powerDef power; - bool is_dram_; + int number_gates; + int min_number_gates; + double width_n[MAX_NUMBER_GATES_STAGE]; + double width_p[MAX_NUMBER_GATES_STAGE]; + double c_gate_load; + double c_wire_load; + double r_wire_load; + double delay; + powerDef power; + bool is_dram_; - void compute_widths(); - double compute_delay(double inrisetime); + void compute_widths(); + double compute_delay(double inrisetime); }; diff --git a/ext/mcpat/cacti/htree2.cc b/ext/mcpat/cacti/htree2.cc index 817ea6a7c..55724c397 100644 --- a/ext/mcpat/cacti/htree2.cc +++ b/ext/mcpat/cacti/htree2.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -39,15 +40,17 @@ Htree2::Htree2( enum Wire_type wire_model, double mat_w, double mat_h, - int a_bits, int d_inbits, int search_data_in, int d_outbits, int search_data_out, int bl, int wl, enum Htree_type htree_type, + int a_bits, int d_inbits, int search_data_in, int d_outbits, + int search_data_out, int bl, int wl, enum Htree_type htree_type, bool uca_tree_, bool search_tree_, TechnologyParameter::DeviceType *dt) - :in_rise_time(0), out_rise_time(0), - tree_type(htree_type), mat_width(mat_w), mat_height(mat_h), - add_bits(a_bits), data_in_bits(d_inbits), search_data_in_bits(search_data_in),data_out_bits(d_outbits), - search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl), - uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), deviceType(dt) -{ - assert(ndbl >= 2 && ndwl >= 2); + : in_rise_time(0), out_rise_time(0), + tree_type(htree_type), mat_width(mat_w), mat_height(mat_h), + add_bits(a_bits), data_in_bits(d_inbits), + search_data_in_bits(search_data_in), data_out_bits(d_outbits), + search_data_out_bits(search_data_out), ndbl(bl), ndwl(wl), + uca_tree(uca_tree_), search_tree(search_tree_), wt(wire_model), + deviceType(dt) { + assert(ndbl >= 2 && ndwl >= 2); // if (ndbl == 1 && ndwl == 1) // { @@ -61,177 +64,211 @@ Htree2::Htree2( // if (ndwl == 1) ndwl++; // if (ndbl == 1) ndbl++; - max_unpipelined_link_delay = 0; //TODO - min_w_nmos = g_tp.min_w_nmos_; - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos; + max_unpipelined_link_delay = 0; //TODO + min_w_nmos = g_tp.min_w_nmos_; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos; - switch (htree_type) - { + switch (htree_type) { case Add_htree: - wire_bw = init_wire_bw = add_bits; - in_htree(); - break; + wire_bw = init_wire_bw = add_bits; + in_htree(); + break; case Data_in_htree: - wire_bw = init_wire_bw = data_in_bits; - in_htree(); - break; + wire_bw = init_wire_bw = data_in_bits; + in_htree(); + break; case Data_out_htree: - wire_bw = init_wire_bw = data_out_bits; - out_htree(); - break; + wire_bw = init_wire_bw = data_out_bits; + out_htree(); + break; case Search_in_htree: - wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not. - in_htree(); - break; + wire_bw = init_wire_bw = search_data_in_bits;//in_search_tree is broad cast, out_htree is not. + in_htree(); + break; case Search_out_htree: - wire_bw = init_wire_bw = search_data_out_bits; - out_htree(); - break; + wire_bw = init_wire_bw = search_data_out_bits; + out_htree(); + break; default: - assert(0); - break; - } + assert(0); + break; + } - power_bit = power; - power.readOp.dynamic *= init_wire_bw; + power_bit = power; + power.readOp.dynamic *= init_wire_bw; - assert(power.readOp.dynamic >= 0); - assert(power.readOp.leakage >= 0); + assert(power.readOp.dynamic >= 0); + assert(power.readOp.leakage >= 0); } // nand gate sizing calculation -void Htree2::input_nand(double s1, double s2, double l_eff) -{ - Wire w1(wt, l_eff); - double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; - // input capacitance of a repeater = input capacitance of nand. - double nsize = s1*(1 + pton_size)/(2 + pton_size); - nsize = (nsize < 1) ? 1 : nsize; - - double tc = 2*tr_R_on(nsize*min_w_nmos, NCH, 1) * - (drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)*2 + - 2 * gate_C(s2*(min_w_nmos + min_w_pmos), 0)); - delay+= horowitz (w1.out_rise_time, tc, - deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); - power.readOp.dynamic += 0.5 * - (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; +void Htree2::input_nand(double s1, double s2, double l_eff) { + Wire w1(wt, l_eff); + double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; + // input capacitance of a repeater = input capacitance of nand. + double nsize = s1 * (1 + pton_size) / (2 + pton_size); + nsize = (nsize < 1) ? 1 : nsize; + + double tc = 2 * tr_R_on(nsize * min_w_nmos, NCH, 1) * + (drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) * 2 + + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)); + delay += horowitz(w1.out_rise_time, tc, + deviceType->Vth / deviceType->Vdd, deviceType->Vth / + deviceType->Vdd, RISE); + power.readOp.dynamic += 0.5 * + (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; power.searchOp.dynamic += 0.5 * - (2*drain_C_(pton_size * nsize*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - + drain_C_(nsize*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - + 2*gate_C(s2*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd * wire_bw ; - power.readOp.leakage += (wire_bw*cmos_Isub_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd; - power.readOp.gate_leakage += (wire_bw*cmos_Ig_leakage(min_w_nmos*(nsize*2), min_w_pmos * nsize * 2, 2, nand))*deviceType->Vdd; + (2 * drain_C_(pton_size * nsize * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(nsize * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + 2 * gate_C(s2 * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * wire_bw ; + power.readOp.leakage += (wire_bw * + cmos_Isub_leakage(min_w_nmos * (nsize * 2), + min_w_pmos * nsize * 2, 2, + nand)) * deviceType->Vdd; + power.readOp.gate_leakage += (wire_bw * + cmos_Ig_leakage(min_w_nmos * (nsize * 2), + min_w_pmos * nsize * 2, 2, + nand)) * deviceType->Vdd; } // tristate buffer model consisting of not, nand, nor, and driver transistors -void Htree2::output_buffer(double s1, double s2, double l_eff) -{ - Wire w1(wt, l_eff); - double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; - // input capacitance of repeater = input capacitance of nand + nor. - double size = s1*(1 + pton_size)/(2 + pton_size + 1 + 2*pton_size); - double s_eff = //stage eff of a repeater in a wire - (gate_C(s2*(min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff*1e-6,true))/ - gate_C(s2*(min_w_nmos + min_w_pmos), 0); - double tr_size = gate_C(s1*(min_w_nmos + min_w_pmos), 0) * 1/2/(s_eff*gate_C(min_w_pmos, 0)); - size = (size < 1) ? 1 : size; - - double res_nor = 2*tr_R_on(size*min_w_pmos, PCH, 1); - double res_ptrans = tr_R_on(tr_size*min_w_nmos, NCH, 1); - double cap_nand_out = drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def)*2 + - gate_C(tr_size*min_w_pmos, 0); - double cap_ptrans_out = 2 *(drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) + - gate_C(s1*(min_w_nmos + min_w_pmos), 0); - - double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out; - - - delay += horowitz (w1.out_rise_time, tc, - deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE); - - //nand - power.readOp.dynamic += 0.5 * - (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + - gate_C(tr_size*(min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; +void Htree2::output_buffer(double s1, double s2, double l_eff) { + Wire w1(wt, l_eff); + double pton_size = deviceType->n_to_p_eff_curr_drv_ratio; + // input capacitance of repeater = input capacitance of nand + nor. + double size = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size); + double s_eff = //stage eff of a repeater in a wire + (gate_C(s2 * (min_w_nmos + min_w_pmos), 0) + w1.wire_cap(l_eff * 1e-6, + true)) / + gate_C(s2 * (min_w_nmos + min_w_pmos), 0); + double tr_size = gate_C(s1 * (min_w_nmos + min_w_pmos), 0) * 1 / 2 / + (s_eff * gate_C(min_w_pmos, 0)); + size = (size < 1) ? 1 : size; + + double res_nor = 2 * tr_R_on(size * min_w_pmos, PCH, 1); + double res_ptrans = tr_R_on(tr_size * min_w_nmos, NCH, 1); + double cap_nand_out = + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 + + gate_C(tr_size * min_w_pmos, 0); + double cap_ptrans_out = 2 * + (drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) + + gate_C(s1 * (min_w_nmos + min_w_pmos), 0); + + double tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out; + + + delay += horowitz(w1.out_rise_time, tc, + deviceType->Vth / deviceType->Vdd, deviceType->Vth / + deviceType->Vdd, RISE); + + //nand + power.readOp.dynamic += 0.5 * + (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size * (min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; power.searchOp.dynamic += 0.5 * - (2*drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + - gate_C(tr_size*(min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd*init_wire_bw; - - //not - power.readOp.dynamic += 0.5 * - (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - +gate_C(size*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; + (2 * drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size * (min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * init_wire_bw; + + //not + power.readOp.dynamic += 0.5 * + (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(size * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; power.searchOp.dynamic += 0.5 * - (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - +drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - +gate_C(size*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd*init_wire_bw; - - //nor - power.readOp.dynamic += 0.5 * - (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; + (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(size * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * init_wire_bw; + + //nor + power.readOp.dynamic += 0.5 * + (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; power.searchOp.dynamic += 0.5 * - (drain_C_(size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - + 2*drain_C_(size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) - +gate_C(tr_size*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd*init_wire_bw; - - //output transistor - power.readOp.dynamic += 0.5 * - ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2 - + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd; + (drain_C_(size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + 2 * drain_C_(size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * init_wire_bw; + + //output transistor + power.readOp.dynamic += 0.5 * + ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * 2 + + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd; power.searchOp.dynamic += 0.5 * - ((drain_C_(tr_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) - +drain_C_(tr_size*min_w_nmos, NCH, 1, 1, g_tp.cell_h_def))*2 - + gate_C(s1*(min_w_nmos + min_w_pmos), 0)) * - deviceType->Vdd * deviceType->Vdd*init_wire_bw; - - if(uca_tree) { - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor - - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor - //power.readOp.gate_leakage *=; - } - else { - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand - power.readOp.leakage += cmos_Isub_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor - - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*tr_size*2, min_w_pmos*tr_size*2, 1, inv)*deviceType->Vdd*wire_bw;/*inverter + output tr*/ - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nand)*deviceType->Vdd*wire_bw;//nand - power.readOp.gate_leakage += cmos_Ig_leakage(min_w_nmos*size*3, min_w_pmos*size*3, 2, nor)*deviceType->Vdd*wire_bw;//nor - //power.readOp.gate_leakage *=deviceType->Vdd*wire_bw; - } + ((drain_C_(tr_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(tr_size * min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) * 2 + + gate_C(s1 * (min_w_nmos + min_w_pmos), 0)) * + deviceType->Vdd * deviceType->Vdd * init_wire_bw; + + if (uca_tree) { + power.readOp.leakage += + cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * + 2, 1, inv) * + deviceType->Vdd * wire_bw;/*inverter + output tr*/ + power.readOp.leakage += + cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nand) * deviceType->Vdd * wire_bw;//nand + power.readOp.leakage += + cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nor) * deviceType->Vdd * wire_bw;//nor + + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, + 1, inv) * + deviceType->Vdd * wire_bw;/*inverter + output tr*/ + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nand) * deviceType->Vdd * wire_bw;//nand + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nor) * deviceType->Vdd * wire_bw;//nor + } else { + power.readOp.leakage += + cmos_Isub_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * + 2, 1, inv) * + deviceType->Vdd * wire_bw;/*inverter + output tr*/ + power.readOp.leakage += + cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nand) * deviceType->Vdd * wire_bw;//nand + power.readOp.leakage += + cmos_Isub_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nor) * deviceType->Vdd * wire_bw;//nor + + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * tr_size * 2, min_w_pmos * tr_size * 2, + 1, inv) * + deviceType->Vdd * wire_bw;/*inverter + output tr*/ + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nand) * deviceType->Vdd * wire_bw;//nand + power.readOp.gate_leakage += + cmos_Ig_leakage(min_w_nmos * size * 3, min_w_pmos * size * 3, 2, + nor) * deviceType->Vdd * wire_bw;//nor + } } @@ -250,192 +287,200 @@ void Htree2::output_buffer(double s1, double s2, double l_eff) * hor. links left. After this it goes through the remaining vertical * links. */ - void -Htree2::in_htree() -{ - //temp var - double s1 = 0, s2 = 0, s3 = 0; - double l_eff = 0; - Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; - double len = 0, ht = 0; - int option = 0; - - int h = (int) _log2(ndwl/2); // horizontal nodes - int v = (int) _log2(ndbl/2); // vertical nodes - double len_temp; - double ht_temp; - if (uca_tree) - {//Sheng: this computation do not consider the wires that route from edge to middle. - ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */ - ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * - 2 * (1-pow(0.5,h))))/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * - 2 * (1-pow(0.5,v))))/2; - } - else - { - if (ndwl == ndbl) { - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndbl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) - )/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; - } - else if (ndwl > ndbl) { - double excess_part = (_log2(ndwl/2) - _log2(ndbl/2)); - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * - (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; +void +Htree2::in_htree() { + //temp var + double s1 = 0, s2 = 0, s3 = 0; + double l_eff = 0; + Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; + double len = 0, ht = 0; + int option = 0; + + int h = (int) _log2(ndwl / 2); // horizontal nodes + int v = (int) _log2(ndbl / 2); // vertical nodes + double len_temp; + double ht_temp; + if (uca_tree) { + //Sheng: this computation do not consider the wires that route from + //edge to middle. + ht_temp = (mat_height * ndbl / 2 + + /* since uca_tree models interbank tree, + mat_height => bank height */ + ((add_bits + data_in_bits + data_out_bits + + (search_data_in_bits + search_data_out_bits)) * + g_tp.wire_outside_mat.pitch * + 2 * (1 - pow(0.5, h)))) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + data_in_bits + data_out_bits + + (search_data_in_bits + search_data_out_bits)) * + g_tp.wire_outside_mat.pitch * + 2 * (1 - pow(0.5, v)))) / 2; + } else { + if (ndwl == ndbl) { + ht_temp = ((mat_height * ndbl / 2) + + ((add_bits + (search_data_in_bits + + search_data_out_bits)) * (ndbl / 2 - 1) * + g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * h) + ) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + (search_data_in_bits + + search_data_out_bits)) * (ndwl / 2 - 1) * + g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * v)) / 2; + } else if (ndwl > ndbl) { + double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2)); + ht_temp = ((mat_height * ndbl / 2) + + ((add_bits + + (search_data_in_bits + + search_data_out_bits)) * + ((ndbl / 2 - 1) + excess_part) * + g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * + (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + + (search_data_in_bits + search_data_out_bits)) * + (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * v)) / 2; + } else { + double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2)); + ht_temp = ((mat_height * ndbl / 2) + + ((add_bits + + (search_data_in_bits + search_data_out_bits)) * + ((ndwl / 2 - 1) + excess_part) * + g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * h) + ) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + + (search_data_in_bits + search_data_out_bits)) * + ((ndwl / 2 - 1) + excess_part) * + g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * + (h + 2 * (1 - pow(0.5, v - h)))) / 2; + } } - else { - double excess_part = (_log2(ndbl/2) - _log2(ndwl/2)); - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) - )/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2; + + area.h = ht_temp * 2; + area.w = len_temp * 2; + delay = 0; + power.readOp.dynamic = 0; + power.readOp.leakage = 0; + power.searchOp.dynamic = 0; + len = len_temp; + ht = ht_temp / 2; + + while (v > 0 || h > 0) { + if (wtemp1) delete wtemp1; + if (wtemp2) delete wtemp2; + if (wtemp3) delete wtemp3; + + if (h > v) { + //the iteration considers only one horizontal link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, len / 2); // ver + len_temp = len; + len /= 2; + wtemp3 = 0; + h--; + option = 0; + } else if (v > 0 && h > 0) { + //considers one horizontal link and one vertical link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, ht); // ver + wtemp3 = new Wire(wt, len / 2); // next hor + len_temp = len; + ht_temp = ht; + len /= 2; + ht /= 2; + v--; + h--; + option = 1; + } else { + // considers only one vertical link + assert(h == 0); + wtemp1 = new Wire(wt, ht); // ver + wtemp2 = new Wire(wt, ht / 2); // hor + ht_temp = ht; + ht /= 2; + wtemp3 = 0; + v--; + option = 2; + } + + delay += wtemp1->delay; + power.readOp.dynamic += wtemp1->power.readOp.dynamic; + power.searchOp.dynamic += wtemp1->power.readOp.dynamic * wire_bw; + power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw; + power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw; + if ((uca_tree == false && option == 2) || search_tree == true) { + wire_bw *= 2; // wire bandwidth doubles only for vertical branches + } + + if (uca_tree == false) { + if (len_temp > wtemp1->repeater_spacing) { + s1 = wtemp1->repeater_size; + l_eff = wtemp1->repeater_spacing; + } else { + s1 = (len_temp / wtemp1->repeater_spacing) * + wtemp1->repeater_size; + l_eff = len_temp; + } + + if (ht_temp > wtemp2->repeater_spacing) { + s2 = wtemp2->repeater_size; + } else { + s2 = (len_temp / wtemp2->repeater_spacing) * + wtemp2->repeater_size; + } + // first level + input_nand(s1, s2, l_eff); + } + + + if (option != 1) { + continue; + } + + // second level + delay += wtemp2->delay; + power.readOp.dynamic += wtemp2->power.readOp.dynamic; + power.searchOp.dynamic += wtemp2->power.readOp.dynamic * wire_bw; + power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw; + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw; + + if (uca_tree) { + power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw); + power.readOp.gate_leakage += + wtemp2->power.readOp.gate_leakage * wire_bw; + } else { + power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw); + power.readOp.gate_leakage += + wtemp2->power.readOp.gate_leakage * wire_bw; + wire_bw *= 2; + + if (ht_temp > wtemp3->repeater_spacing) { + s3 = wtemp3->repeater_size; + l_eff = wtemp3->repeater_spacing; + } else { + s3 = (len_temp / wtemp3->repeater_spacing) * + wtemp3->repeater_size; + l_eff = ht_temp; + } + + input_nand(s2, s3, l_eff); + } } - } - - area.h = ht_temp * 2; - area.w = len_temp * 2; - delay = 0; - power.readOp.dynamic = 0; - power.readOp.leakage = 0; - power.searchOp.dynamic =0; - len = len_temp; - ht = ht_temp/2; - - while (v > 0 || h > 0) - { + if (wtemp1) delete wtemp1; if (wtemp2) delete wtemp2; if (wtemp3) delete wtemp3; - - if (h > v) - { - //the iteration considers only one horizontal link - wtemp1 = new Wire(wt, len); // hor - wtemp2 = new Wire(wt, len/2); // ver - len_temp = len; - len /= 2; - wtemp3 = 0; - h--; - option = 0; - } - else if (v>0 && h>0) - { - //considers one horizontal link and one vertical link - wtemp1 = new Wire(wt, len); // hor - wtemp2 = new Wire(wt, ht); // ver - wtemp3 = new Wire(wt, len/2); // next hor - len_temp = len; - ht_temp = ht; - len /= 2; - ht /= 2; - v--; - h--; - option = 1; - } - else - { - // considers only one vertical link - assert(h == 0); - wtemp1 = new Wire(wt, ht); // ver - wtemp2 = new Wire(wt, ht/2); // hor - ht_temp = ht; - ht /= 2; - wtemp3 = 0; - v--; - option = 2; - } - - delay += wtemp1->delay; - power.readOp.dynamic += wtemp1->power.readOp.dynamic; - power.searchOp.dynamic += wtemp1->power.readOp.dynamic*wire_bw; - power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw; - power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw; - if ((uca_tree == false && option == 2) || search_tree==true) - { - wire_bw*=2; // wire bandwidth doubles only for vertical branches - } - - if (uca_tree == false) - { - if (len_temp > wtemp1->repeater_spacing) - { - s1 = wtemp1->repeater_size; - l_eff = wtemp1->repeater_spacing; - } - else - { - s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size; - l_eff = len_temp; - } - - if (ht_temp > wtemp2->repeater_spacing) - { - s2 = wtemp2->repeater_size; - } - else - { - s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size; - } - // first level - input_nand(s1, s2, l_eff); - } - - - if (option != 1) - { - continue; - } - - // second level - delay += wtemp2->delay; - power.readOp.dynamic += wtemp2->power.readOp.dynamic; - power.searchOp.dynamic += wtemp2->power.readOp.dynamic*wire_bw; - power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw; - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - - if (uca_tree) - { - power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - } - else - { - power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - wire_bw*=2; - - if (ht_temp > wtemp3->repeater_spacing) - { - s3 = wtemp3->repeater_size; - l_eff = wtemp3->repeater_spacing; - } - else - { - s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size; - l_eff = ht_temp; - } - - input_nand(s2, s3, l_eff); - } - } - - if (wtemp1) delete wtemp1; - if (wtemp2) delete wtemp2; - if (wtemp3) delete wtemp3; } @@ -452,190 +497,198 @@ Htree2::in_htree() * hor. links left. After this it goes through the remaining vertical * links. */ -void Htree2::out_htree() -{ - //temp var - double s1 = 0, s2 = 0, s3 = 0; - double l_eff = 0; - Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; - double len = 0, ht = 0; - int option = 0; - - int h = (int) _log2(ndwl/2); - int v = (int) _log2(ndbl/2); - double len_temp; - double ht_temp; - if (uca_tree) - { - ht_temp = (mat_height*ndbl/2 +/* since uca_tree models interbank tree, mat_height => bank height */ - ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * - 2 * (1-pow(0.5,h))))/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + data_in_bits + data_out_bits + (search_data_in_bits + search_data_out_bits)) * g_tp.wire_outside_mat.pitch * - 2 * (1-pow(0.5,v))))/2; - } - else - { - if (ndwl == ndbl) { - ht_temp = ((mat_height*ndbl/2) + - ((add_bits+ (search_data_in_bits + search_data_out_bits)) * (ndbl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) - )/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits)) * (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; - +void Htree2::out_htree() { + //temp var + double s1 = 0, s2 = 0, s3 = 0; + double l_eff = 0; + Wire *wtemp1 = 0, *wtemp2 = 0, *wtemp3 = 0; + double len = 0, ht = 0; + int option = 0; + + int h = (int) _log2(ndwl / 2); + int v = (int) _log2(ndbl / 2); + double len_temp; + double ht_temp; + if (uca_tree) { + ht_temp = (mat_height * ndbl / 2 + + /* since uca_tree models interbank tree, + mat_height => bank height */ + ((add_bits + data_in_bits + data_out_bits + + (search_data_in_bits + search_data_out_bits)) * + g_tp.wire_outside_mat.pitch * + 2 * (1 - pow(0.5, h)))) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + data_in_bits + data_out_bits + + (search_data_in_bits + search_data_out_bits)) * + g_tp.wire_outside_mat.pitch * + 2 * (1 - pow(0.5, v)))) / 2; + } else { + if (ndwl == ndbl) { + ht_temp = ((mat_height * ndbl / 2) + + ((add_bits + (search_data_in_bits + + search_data_out_bits)) * + (ndbl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * h) + ) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + (search_data_in_bits + + search_data_out_bits)) * (ndwl / 2 - 1) * + g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * v)) / 2; + + } else if (ndwl > ndbl) { + double excess_part = (_log2(ndwl / 2) - _log2(ndbl / 2)); + ht_temp = ((mat_height * ndbl / 2) + + ((add_bits + + (search_data_in_bits + search_data_out_bits)) * + ((ndbl / 2 - 1) + excess_part) * + g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * + (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + + (search_data_in_bits + search_data_out_bits)) * + (ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * v)) / 2; + } else { + double excess_part = (_log2(ndbl / 2) - _log2(ndwl / 2)); + ht_temp = ((mat_height * ndbl / 2) + + ((add_bits + + (search_data_in_bits + search_data_out_bits)) * + ((ndwl / 2 - 1) + excess_part) * + g_tp.wire_outside_mat.pitch) + + ((data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * h) + ) / 2; + len_temp = (mat_width * ndwl / 2 + + ((add_bits + (search_data_in_bits + + search_data_out_bits)) * + ((ndwl / 2 - 1) + excess_part) * + g_tp.wire_outside_mat.pitch) + + (data_in_bits + data_out_bits) * + g_tp.wire_outside_mat.pitch * + (h + 2 * (1 - pow(0.5, v - h)))) / 2; + } } - else if (ndwl > ndbl) { - double excess_part = (_log2(ndwl/2) - _log2(ndbl/2)); - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + (search_data_in_bits + search_data_out_bits)) * ((ndbl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * - (2*(1 - pow(0.5, h-v)) + pow(0.5, v-h) * v))/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits))* (ndwl/2-1) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * v))/2; - } - else { - double excess_part = (_log2(ndbl/2) - _log2(ndwl/2)); - ht_temp = ((mat_height*ndbl/2) + - ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - ((data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * h) - )/2; - len_temp = (mat_width*ndwl/2 + - ((add_bits + (search_data_in_bits + search_data_out_bits))* ((ndwl/2-1) + excess_part) * g_tp.wire_outside_mat.pitch) + - (data_in_bits + data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2*(1-pow(0.5, v-h))))/2; + area.h = ht_temp * 2; + area.w = len_temp * 2; + delay = 0; + power.readOp.dynamic = 0; + power.readOp.leakage = 0; + power.readOp.gate_leakage = 0; + //cout<<"power.readOp.gate_leakage"< 0 || h > 0) { //finds delay/power of each link in the tree + if (wtemp1) delete wtemp1; + if (wtemp2) delete wtemp2; + if (wtemp3) delete wtemp3; + + if (h > v) { + //the iteration considers only one horizontal link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, len / 2); // ver + len_temp = len; + len /= 2; + wtemp3 = 0; + h--; + option = 0; + } else if (v > 0 && h > 0) { + //considers one horizontal link and one vertical link + wtemp1 = new Wire(wt, len); // hor + wtemp2 = new Wire(wt, ht); // ver + wtemp3 = new Wire(wt, len / 2); // next hor + len_temp = len; + ht_temp = ht; + len /= 2; + ht /= 2; + v--; + h--; + option = 1; + } else { + // considers only one vertical link + assert(h == 0); + wtemp1 = new Wire(wt, ht); // hor + wtemp2 = new Wire(wt, ht / 2); // ver + ht_temp = ht; + ht /= 2; + wtemp3 = 0; + v--; + option = 2; + } + delay += wtemp1->delay; + power.readOp.dynamic += wtemp1->power.readOp.dynamic; + power.searchOp.dynamic += wtemp1->power.readOp.dynamic * init_wire_bw; + power.readOp.leakage += wtemp1->power.readOp.leakage * wire_bw; + power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage * wire_bw; + if ((uca_tree == false && option == 2) || search_tree == true) { + wire_bw *= 2; + } + + if (uca_tree == false) { + if (len_temp > wtemp1->repeater_spacing) { + s1 = wtemp1->repeater_size; + l_eff = wtemp1->repeater_spacing; + } else { + s1 = (len_temp / wtemp1->repeater_spacing) * + wtemp1->repeater_size; + l_eff = len_temp; + } + if (ht_temp > wtemp2->repeater_spacing) { + s2 = wtemp2->repeater_size; + } else { + s2 = (len_temp / wtemp2->repeater_spacing) * + wtemp2->repeater_size; + } + // first level + output_buffer(s1, s2, l_eff); + } + + + if (option != 1) { + continue; + } + + // second level + delay += wtemp2->delay; + power.readOp.dynamic += wtemp2->power.readOp.dynamic; + power.searchOp.dynamic += wtemp2->power.readOp.dynamic * init_wire_bw; + power.readOp.leakage += wtemp2->power.readOp.leakage * wire_bw; + power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage * wire_bw; + //cout<<"power.readOp.gate_leakage"<power.readOp.leakage * wire_bw); + power.readOp.gate_leakage += + wtemp2->power.readOp.gate_leakage * wire_bw; + } else { + power.readOp.leakage += (wtemp2->power.readOp.leakage * wire_bw); + power.readOp.gate_leakage += + wtemp2->power.readOp.gate_leakage * wire_bw; + wire_bw *= 2; + + if (ht_temp > wtemp3->repeater_spacing) { + s3 = wtemp3->repeater_size; + l_eff = wtemp3->repeater_spacing; + } else { + s3 = (len_temp / wtemp3->repeater_spacing) * + wtemp3->repeater_size; + l_eff = ht_temp; + } + + output_buffer(s2, s3, l_eff); + } + //cout<<"power.readOp.leakage"<power.readOp.gate_leakage"<power.readOp.gate_leakage< 0 || h > 0) - { //finds delay/power of each link in the tree + if (wtemp1) delete wtemp1; if (wtemp2) delete wtemp2; if (wtemp3) delete wtemp3; - - if(h > v) { - //the iteration considers only one horizontal link - wtemp1 = new Wire(wt, len); // hor - wtemp2 = new Wire(wt, len/2); // ver - len_temp = len; - len /= 2; - wtemp3 = 0; - h--; - option = 0; - } - else if (v>0 && h>0) { - //considers one horizontal link and one vertical link - wtemp1 = new Wire(wt, len); // hor - wtemp2 = new Wire(wt, ht); // ver - wtemp3 = new Wire(wt, len/2); // next hor - len_temp = len; - ht_temp = ht; - len /= 2; - ht /= 2; - v--; - h--; - option = 1; - } - else { - // considers only one vertical link - assert(h == 0); - wtemp1 = new Wire(wt, ht); // hor - wtemp2 = new Wire(wt, ht/2); // ver - ht_temp = ht; - ht /= 2; - wtemp3 = 0; - v--; - option = 2; - } - delay += wtemp1->delay; - power.readOp.dynamic += wtemp1->power.readOp.dynamic; - power.searchOp.dynamic += wtemp1->power.readOp.dynamic*init_wire_bw; - power.readOp.leakage += wtemp1->power.readOp.leakage*wire_bw; - power.readOp.gate_leakage += wtemp1->power.readOp.gate_leakage*wire_bw; - //cout<<"power.readOp.gate_leakage"< wtemp1->repeater_spacing) - { - s1 = wtemp1->repeater_size; - l_eff = wtemp1->repeater_spacing; - } - else - { - s1 = (len_temp/wtemp1->repeater_spacing) * wtemp1->repeater_size; - l_eff = len_temp; - } - if (ht_temp > wtemp2->repeater_spacing) - { - s2 = wtemp2->repeater_size; - } - else - { - s2 = (len_temp/wtemp2->repeater_spacing) * wtemp2->repeater_size; - } - // first level - output_buffer(s1, s2, l_eff); - } - - - if (option != 1) - { - continue; - } - - // second level - delay += wtemp2->delay; - power.readOp.dynamic += wtemp2->power.readOp.dynamic; - power.searchOp.dynamic += wtemp2->power.readOp.dynamic*init_wire_bw; - power.readOp.leakage += wtemp2->power.readOp.leakage*wire_bw; - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - //cout<<"power.readOp.gate_leakage"<power.readOp.leakage*wire_bw); - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - } - else - { - power.readOp.leakage += (wtemp2->power.readOp.leakage*wire_bw); - power.readOp.gate_leakage += wtemp2->power.readOp.gate_leakage*wire_bw; - wire_bw*=2; - - if (ht_temp > wtemp3->repeater_spacing) - { - s3 = wtemp3->repeater_size; - l_eff = wtemp3->repeater_spacing; - } - else - { - s3 = (len_temp/wtemp3->repeater_spacing) * wtemp3->repeater_size; - l_eff = ht_temp; - } - - output_buffer(s2, s3, l_eff); - } - //cout<<"power.readOp.leakage"<power.readOp.gate_leakage"<power.readOp.gate_leakage< 16) { - printf("No. of cores should be less than 16!\n"); - } - continue; - } - if(!strncmp("-Cache level", line, strlen("-Cache level"))) { - sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("L2", temp_var, strlen("L2"))) { - cache_level = 0; - } - else { - cache_level = 1; - } - } + if (!strncmp("-Core", line, strlen("-Core"))) { + sscanf(line, "-Core count %d\n", &(cores)); + if (cores > 16) { + printf("No. of cores should be less than 16!\n"); + } + continue; + } - if(!strncmp("-Print level", line, strlen("-Print level"))) { - sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) { - print_detail = 1; - } - else { - print_detail = 0; - } + if (!strncmp("-Cache level", line, strlen("-Cache level"))) { + sscanf(line, "-Cache l%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("L2", temp_var, strlen("L2"))) { + cache_level = 0; + } else { + cache_level = 1; + } + } - } - if(!strncmp("-Add ECC", line, strlen("-Add ECC"))) { - sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("true", temp_var, strlen("true"))) { - add_ecc_b_ = true; - } - else { - add_ecc_b_ = false; - } - } + if (!strncmp("-Print level", line, strlen("-Print level"))) { + sscanf(line, "-Print l%[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("DETAILED", temp_var, strlen("DETAILED"))) { + print_detail = 1; + } else { + print_detail = 0; + } - if(!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) { - sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("true", temp_var, strlen("true"))) { - print_input_args = true; - } - else { - print_input_args = false; - } - } + } + if (!strncmp("-Add ECC", line, strlen("-Add ECC"))) { + sscanf(line, "-Add ECC %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + add_ecc_b_ = true; + } else { + add_ecc_b_ = false; + } + } - if(!strncmp("-Force cache config", line, strlen("-Force cache config"))) { - sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var); - if (!strncmp("true", temp_var, strlen("true"))) { - force_cache_config = true; - } - else { - force_cache_config = false; - } - } + if (!strncmp("-Print input parameters", line, strlen("-Print input parameters"))) { + sscanf(line, "-Print input %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + print_input_args = true; + } else { + print_input_args = false; + } + } - if(!strncmp("-Ndbl", line, strlen("-Ndbl"))) { - sscanf(line, "-Ndbl %d\n", &(ndbl)); - continue; - } - if(!strncmp("-Ndwl", line, strlen("-Ndwl"))) { - sscanf(line, "-Ndwl %d\n", &(ndwl)); - continue; - } - if(!strncmp("-Nspd", line, strlen("-Nspd"))) { - sscanf(line, "-Nspd %d\n", &(nspd)); - continue; - } - if(!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) { - sscanf(line, "-Ndsam1 %d\n", &(ndsam1)); - continue; - } - if(!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) { - sscanf(line, "-Ndsam2 %d\n", &(ndsam2)); - continue; - } - if(!strncmp("-Ndcm", line, strlen("-Ndcm"))) { - sscanf(line, "-Ndcm %d\n", &(ndcm)); - continue; - } + if (!strncmp("-Force cache config", line, strlen("-Force cache config"))) { + sscanf(line, "-Force cache %[^\"]\"%[^\"]\"", jk, temp_var); + if (!strncmp("true", temp_var, strlen("true"))) { + force_cache_config = true; + } else { + force_cache_config = false; + } + } - } - rpters_in_htree = true; - fclose(fp); + if (!strncmp("-Ndbl", line, strlen("-Ndbl"))) { + sscanf(line, "-Ndbl %d\n", &(ndbl)); + continue; + } + if (!strncmp("-Ndwl", line, strlen("-Ndwl"))) { + sscanf(line, "-Ndwl %d\n", &(ndwl)); + continue; + } + if (!strncmp("-Nspd", line, strlen("-Nspd"))) { + sscanf(line, "-Nspd %d\n", &(nspd)); + continue; + } + if (!strncmp("-Ndsam1", line, strlen("-Ndsam1"))) { + sscanf(line, "-Ndsam1 %d\n", &(ndsam1)); + continue; + } + if (!strncmp("-Ndsam2", line, strlen("-Ndsam2"))) { + sscanf(line, "-Ndsam2 %d\n", &(ndsam2)); + continue; + } + if (!strncmp("-Ndcm", line, strlen("-Ndcm"))) { + sscanf(line, "-Ndcm %d\n", &(ndcm)); + continue; + } + + } + rpters_in_htree = true; + fclose(fp); } - void -InputParameter::display_ip() -{ - cout << "Cache size : " << cache_sz << endl; - cout << "Block size : " << line_sz << endl; - cout << "Associativity : " << assoc << endl; - cout << "Read only ports : " << num_rd_ports << endl; - cout << "Write only ports : " << num_wr_ports << endl; - cout << "Read write ports : " << num_rw_ports << endl; - cout << "Single ended read ports : " << num_se_rd_ports << endl; - if (fully_assoc||pure_cam) - { - cout << "Search ports : " << num_search_ports << endl; - } - cout << "Cache banks (UCA) : " << nbanks << endl; - cout << "Technology : " << F_sz_um << endl; - cout << "Temperature : " << temp << endl; - cout << "Tag size : " << tag_w << endl; - if (is_cache) { - cout << "array type : " << "Cache" << endl; - } - if (pure_ram) { - cout << "array type : " << "Scratch RAM" << endl; - } - if (pure_cam) - { - cout << "array type : " << "CAM" << endl; - } - cout << "Model as memory : " << is_main_mem << endl; - cout << "Access mode : " << access_mode << endl; - cout << "Data array cell type : " << data_arr_ram_cell_tech_type << endl; - cout << "Data array peripheral type : " << data_arr_peri_global_tech_type << endl; - cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type << endl; - cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type << endl; - cout << "Optimization target : " << ed << endl; - cout << "Design objective (UCA wt) : " << delay_wt << " " - << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt - << " " << area_wt << endl; - cout << "Design objective (UCA dev) : " << delay_dev << " " - << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev - << " " << area_dev << endl; - if (nuca) - { - cout << "Cores : " << cores << endl; - - - cout << "Design objective (NUCA wt) : " << delay_wt_nuca << " " - << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca - << " " << area_wt_nuca << endl; - cout << "Design objective (NUCA dev) : " << delay_dev_nuca << " " - << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca - << " " << area_dev_nuca << endl; +void +InputParameter::display_ip() { + cout << "Cache size : " << cache_sz << endl; + cout << "Block size : " << line_sz << endl; + cout << "Associativity : " << assoc << endl; + cout << "Read only ports : " << num_rd_ports << endl; + cout << "Write only ports : " << num_wr_ports << endl; + cout << "Read write ports : " << num_rw_ports << endl; + cout << "Single ended read ports : " << num_se_rd_ports << endl; + if (fully_assoc || pure_cam) { + cout << "Search ports : " << num_search_ports << endl; + } + cout << "Cache banks (UCA) : " << nbanks << endl; + cout << "Technology : " << F_sz_um << endl; + cout << "Temperature : " << temp << endl; + cout << "Tag size : " << tag_w << endl; + if (is_cache) { + cout << "array type : " << "Cache" << endl; + } + if (pure_ram) { + cout << "array type : " << "Scratch RAM" << endl; + } + if (pure_cam) { + cout << "array type : " << "CAM" << endl; + } + cout << "Model as memory : " << is_main_mem << endl; + cout << "Access mode : " << access_mode << endl; + cout << "Data array cell type : " << data_arr_ram_cell_tech_type << endl; + cout << "Data array peripheral type : " << data_arr_peri_global_tech_type << endl; + cout << "Tag array cell type : " << tag_arr_ram_cell_tech_type << endl; + cout << "Tag array peripheral type : " << tag_arr_peri_global_tech_type << endl; + cout << "Optimization target : " << ed << endl; + cout << "Design objective (UCA wt) : " << delay_wt << " " + << dynamic_power_wt << " " << leakage_power_wt << " " << cycle_time_wt + << " " << area_wt << endl; + cout << "Design objective (UCA dev) : " << delay_dev << " " + << dynamic_power_dev << " " << leakage_power_dev << " " << cycle_time_dev + << " " << area_dev << endl; + if (nuca) { + cout << "Cores : " << cores << endl; + + + cout << "Design objective (NUCA wt) : " << delay_wt_nuca << " " + << dynamic_power_wt_nuca << " " << leakage_power_wt_nuca << " " << cycle_time_wt_nuca + << " " << area_wt_nuca << endl; + cout << "Design objective (NUCA dev) : " << delay_dev_nuca << " " + << dynamic_power_dev_nuca << " " << leakage_power_dev_nuca << " " << cycle_time_dev_nuca + << " " << area_dev_nuca << endl; + } + cout << "Cache model : " << nuca << endl; + cout << "Nuca bank : " << nuca_bank_count << endl; + cout << "Wire inside mat : " << wire_is_mat_type << endl; + cout << "Wire outside mat : " << wire_os_mat_type << endl; + cout << "Interconnect projection : " << ic_proj_type << endl; + cout << "Wire signalling : " << force_wiretype << endl; + cout << "Print level : " << print_detail << endl; + cout << "ECC overhead : " << add_ecc_b_ << endl; + cout << "Page size : " << page_sz_bits << endl; + cout << "Burst length : " << burst_len << endl; + cout << "Internal prefetch width : " << int_prefetch_w << endl; + cout << "Force cache config : " << g_ip->force_cache_config << endl; + if (g_ip->force_cache_config) { + cout << "Ndwl : " << g_ip->ndwl << endl; + cout << "Ndbl : " << g_ip->ndbl << endl; + cout << "Nspd : " << g_ip->nspd << endl; + cout << "Ndcm : " << g_ip->ndcm << endl; + cout << "Ndsam1 : " << g_ip->ndsam1 << endl; + cout << "Ndsam2 : " << g_ip->ndsam2 << endl; } - cout << "Cache model : " << nuca << endl; - cout << "Nuca bank : " << nuca_bank_count << endl; - cout << "Wire inside mat : " << wire_is_mat_type << endl; - cout << "Wire outside mat : " << wire_os_mat_type << endl; - cout << "Interconnect projection : " << ic_proj_type << endl; - cout << "Wire signalling : " << force_wiretype << endl; - cout << "Print level : " << print_detail << endl; - cout << "ECC overhead : " << add_ecc_b_ << endl; - cout << "Page size : " << page_sz_bits << endl; - cout << "Burst length : " << burst_len << endl; - cout << "Internal prefetch width : " << int_prefetch_w << endl; - cout << "Force cache config : " << g_ip->force_cache_config << endl; - if (g_ip->force_cache_config) { - cout << "Ndwl : " << g_ip->ndwl << endl; - cout << "Ndbl : " << g_ip->ndbl << endl; - cout << "Nspd : " << g_ip->nspd << endl; - cout << "Ndcm : " << g_ip->ndcm << endl; - cout << "Ndsam1 : " << g_ip->ndsam1 << endl; - cout << "Ndsam2 : " << g_ip->ndsam2 << endl; - } } -powerComponents operator+(const powerComponents & x, const powerComponents & y) -{ - powerComponents z; +powerComponents operator+(const powerComponents & x, const powerComponents & y) { + powerComponents z; - z.dynamic = x.dynamic + y.dynamic; - z.leakage = x.leakage + y.leakage; - z.gate_leakage = x.gate_leakage + y.gate_leakage; - z.short_circuit = x.short_circuit + y.short_circuit; - z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage; + z.dynamic = x.dynamic + y.dynamic; + z.leakage = x.leakage + y.leakage; + z.gate_leakage = x.gate_leakage + y.gate_leakage; + z.short_circuit = x.short_circuit + y.short_circuit; + z.longer_channel_leakage = x.longer_channel_leakage + y.longer_channel_leakage; - return z; + return z; } -powerComponents operator*(const powerComponents & x, double const * const y) -{ - powerComponents z; +powerComponents operator*(const powerComponents & x, double const * const y) { + powerComponents z; - z.dynamic = x.dynamic*y[0]; - z.leakage = x.leakage*y[1]; - z.gate_leakage = x.gate_leakage*y[2]; - z.short_circuit = x.short_circuit*y[3]; - z.longer_channel_leakage = x.longer_channel_leakage*y[1];//longer channel leakage has the same behavior as normal leakage + z.dynamic = x.dynamic * y[0]; + z.leakage = x.leakage * y[1]; + z.gate_leakage = x.gate_leakage * y[2]; + z.short_circuit = x.short_circuit * y[3]; + //longer channel leakage has the same behavior as normal leakage + z.longer_channel_leakage = x.longer_channel_leakage * y[1]; - return z; + return z; } -powerDef operator+(const powerDef & x, const powerDef & y) -{ - powerDef z; +powerDef operator+(const powerDef & x, const powerDef & y) { + powerDef z; - z.readOp = x.readOp + y.readOp; - z.writeOp = x.writeOp + y.writeOp; - z.searchOp = x.searchOp + y.searchOp; - return z; + z.readOp = x.readOp + y.readOp; + z.writeOp = x.writeOp + y.writeOp; + z.searchOp = x.searchOp + y.searchOp; + return z; } -powerDef operator*(const powerDef & x, double const * const y) -{ - powerDef z; +powerDef operator*(const powerDef & x, double const * const y) { + powerDef z; - z.readOp = x.readOp*y; - z.writeOp = x.writeOp*y; - z.searchOp = x.searchOp*y; - return z; + z.readOp = x.readOp * y; + z.writeOp = x.writeOp * y; + z.searchOp = x.searchOp * y; + return z; } -uca_org_t cacti_interface(const string & infile_name) -{ +uca_org_t cacti_interface(const string & infile_name) { - uca_org_t fin_res; - //uca_org_t result; - fin_res.valid = false; + uca_org_t fin_res; + //uca_org_t result; + fin_res.valid = false; - g_ip = new InputParameter(); - g_ip->parse_cfg(infile_name); - if(!g_ip->error_checking()) - exit(0); - if (g_ip->print_input_args) - g_ip->display_ip(); + g_ip = new InputParameter(); + g_ip->parse_cfg(infile_name); + if (!g_ip->error_checking(infile_name)) + exit(0); + if (g_ip->print_input_args) + g_ip->display_ip(); - init_tech_params(g_ip->F_sz_um, false); - Wire winit; // Do not delete this line. It initializes wires. + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. // For HighRadix Only @@ -703,19 +653,18 @@ uca_org_t cacti_interface(const string & infile_name) // exit(0); // For HighRadix Only End - if (g_ip->nuca == 1) - { - Nuca n(&g_tp.peri_global); - n.sim_nuca(); - } - g_ip->display_ip(); - solve(&fin_res); + if (g_ip->nuca == 1) { + Nuca n(&g_tp.peri_global); + n.sim_nuca(); + } + g_ip->display_ip(); + solve(&fin_res); - output_UCA(&fin_res); - output_data_csv(fin_res); + output_UCA(&fin_res); + output_data_csv(fin_res); - delete (g_ip); - return fin_res; + delete (g_ip); + return fin_res; } //cacti6.5's plain interface, please keep !!! @@ -773,142 +722,139 @@ uca_org_t cacti_interface( int nuca_dev_func_area, int nuca_dev_func_cycle_time, int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported - int p_input) -{ - g_ip = new InputParameter(); - g_ip->add_ecc_b_ = true; - - g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; - g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; - g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; - g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; - - g_ip->ic_proj_type = interconnect_projection_type_in; - g_ip->wire_is_mat_type = wire_inside_mat_type_in; - g_ip->wire_os_mat_type = wire_outside_mat_type_in; - g_ip->burst_len = burst_length; - g_ip->int_prefetch_w = pre_width; - g_ip->page_sz_bits = page_sz; - - g_ip->cache_sz = cache_size; - g_ip->line_sz = line_size; - g_ip->assoc = associativity; - g_ip->nbanks = banks; - g_ip->out_w = output_width; - g_ip->specific_tag = specific_tag; - if (tag_width == 0) { - g_ip->tag_w = 42; - } - else { - g_ip->tag_w = tag_width; - } - - g_ip->access_mode = access_mode; - g_ip->delay_wt = obj_func_delay; - g_ip->dynamic_power_wt = obj_func_dynamic_power; - g_ip->leakage_power_wt = obj_func_leakage_power; - g_ip->area_wt = obj_func_area; - g_ip->cycle_time_wt = obj_func_cycle_time; - g_ip->delay_dev = dev_func_delay; - g_ip->dynamic_power_dev = dev_func_dynamic_power; - g_ip->leakage_power_dev = dev_func_leakage_power; - g_ip->area_dev = dev_func_area; - g_ip->cycle_time_dev = dev_func_cycle_time; - g_ip->ed = ed_ed2_none; - - switch(wt) { + int p_input) { + g_ip = new InputParameter(); + g_ip->add_ecc_b_ = true; + + g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; + g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; + g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; + g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; + + g_ip->ic_proj_type = interconnect_projection_type_in; + g_ip->wire_is_mat_type = wire_inside_mat_type_in; + g_ip->wire_os_mat_type = wire_outside_mat_type_in; + g_ip->burst_len = burst_length; + g_ip->int_prefetch_w = pre_width; + g_ip->page_sz_bits = page_sz; + + g_ip->cache_sz = cache_size; + g_ip->line_sz = line_size; + g_ip->assoc = associativity; + g_ip->nbanks = banks; + g_ip->out_w = output_width; + g_ip->specific_tag = specific_tag; + if (tag_width == 0) { + g_ip->tag_w = 42; + } else { + g_ip->tag_w = tag_width; + } + + g_ip->access_mode = access_mode; + g_ip->delay_wt = obj_func_delay; + g_ip->dynamic_power_wt = obj_func_dynamic_power; + g_ip->leakage_power_wt = obj_func_leakage_power; + g_ip->area_wt = obj_func_area; + g_ip->cycle_time_wt = obj_func_cycle_time; + g_ip->delay_dev = dev_func_delay; + g_ip->dynamic_power_dev = dev_func_dynamic_power; + g_ip->leakage_power_dev = dev_func_leakage_power; + g_ip->area_dev = dev_func_area; + g_ip->cycle_time_dev = dev_func_cycle_time; + g_ip->ed = ed_ed2_none; + + switch (wt) { case (0): - g_ip->force_wiretype = 0; - g_ip->wt = Global; - break; + g_ip->force_wiretype = 0; + g_ip->wt = Global; + break; case (1): - g_ip->force_wiretype = 1; - g_ip->wt = Global; - break; + g_ip->force_wiretype = 1; + g_ip->wt = Global; + break; case (2): - g_ip->force_wiretype = 1; - g_ip->wt = Global_5; - break; + g_ip->force_wiretype = 1; + g_ip->wt = Global_5; + break; case (3): - g_ip->force_wiretype = 1; - g_ip->wt = Global_10; - break; + g_ip->force_wiretype = 1; + g_ip->wt = Global_10; + break; case (4): - g_ip->force_wiretype = 1; - g_ip->wt = Global_20; - break; + g_ip->force_wiretype = 1; + g_ip->wt = Global_20; + break; case (5): - g_ip->force_wiretype = 1; - g_ip->wt = Global_30; - break; + g_ip->force_wiretype = 1; + g_ip->wt = Global_30; + break; case (6): - g_ip->force_wiretype = 1; - g_ip->wt = Low_swing; - break; + g_ip->force_wiretype = 1; + g_ip->wt = Low_swing; + break; default: - cout << "Unknown wire type!\n"; - exit(0); - } - - g_ip->delay_wt_nuca = nuca_obj_func_delay; - g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power; - g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power; - g_ip->area_wt_nuca = nuca_obj_func_area; - g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time; - g_ip->delay_dev_nuca = dev_func_delay; - g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power; - g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power; - g_ip->area_dev_nuca = nuca_dev_func_area; - g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time; - g_ip->nuca = is_nuca; - g_ip->nuca_bank_count = nuca_bank_count; - if(nuca_bank_count > 0) { - g_ip->force_nuca_bank = 1; - } - g_ip->cores = core_count; - g_ip->cache_level = cache_level; - - g_ip->temp = temp; - - g_ip->F_sz_nm = tech_node; - g_ip->F_sz_um = tech_node / 1000; - g_ip->is_main_mem = (main_mem != 0) ? true : false; - g_ip->is_cache = (cache != 0) ? true : false; - g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; - - g_ip->num_rw_ports = rw_ports; - g_ip->num_rd_ports = excl_read_ports; - g_ip->num_wr_ports = excl_write_ports; - g_ip->num_se_rd_ports = single_ended_read_ports; - g_ip->print_detail = 1; - g_ip->nuca = 0; - - g_ip->wt = Global_5; - g_ip->force_cache_config = false; - g_ip->force_wiretype = false; - g_ip->print_input_args = p_input; - - - uca_org_t fin_res; - fin_res.valid = false; - - if (g_ip->error_checking() == false) exit(0); - if (g_ip->print_input_args) - g_ip->display_ip(); - init_tech_params(g_ip->F_sz_um, false); - Wire winit; // Do not delete this line. It initializes wires. - - if (g_ip->nuca == 1) - { - Nuca n(&g_tp.peri_global); - n.sim_nuca(); - } - solve(&fin_res); - - output_UCA(&fin_res); + cout << "Unknown wire type!\n"; + exit(0); + } - delete (g_ip); - return fin_res; + g_ip->delay_wt_nuca = nuca_obj_func_delay; + g_ip->dynamic_power_wt_nuca = nuca_obj_func_dynamic_power; + g_ip->leakage_power_wt_nuca = nuca_obj_func_leakage_power; + g_ip->area_wt_nuca = nuca_obj_func_area; + g_ip->cycle_time_wt_nuca = nuca_obj_func_cycle_time; + g_ip->delay_dev_nuca = dev_func_delay; + g_ip->dynamic_power_dev_nuca = nuca_dev_func_dynamic_power; + g_ip->leakage_power_dev_nuca = nuca_dev_func_leakage_power; + g_ip->area_dev_nuca = nuca_dev_func_area; + g_ip->cycle_time_dev_nuca = nuca_dev_func_cycle_time; + g_ip->nuca = is_nuca; + g_ip->nuca_bank_count = nuca_bank_count; + if (nuca_bank_count > 0) { + g_ip->force_nuca_bank = 1; + } + g_ip->cores = core_count; + g_ip->cache_level = cache_level; + + g_ip->temp = temp; + + g_ip->F_sz_nm = tech_node; + g_ip->F_sz_um = tech_node / 1000; + g_ip->is_main_mem = (main_mem != 0) ? true : false; + g_ip->is_cache = (cache != 0) ? true : false; + g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; + + g_ip->num_rw_ports = rw_ports; + g_ip->num_rd_ports = excl_read_ports; + g_ip->num_wr_ports = excl_write_ports; + g_ip->num_se_rd_ports = single_ended_read_ports; + g_ip->print_detail = 1; + g_ip->nuca = 0; + + g_ip->wt = Global_5; + g_ip->force_cache_config = false; + g_ip->force_wiretype = false; + g_ip->print_input_args = p_input; + + + uca_org_t fin_res; + fin_res.valid = false; + + if (g_ip->error_checking() == false) exit(0); + if (g_ip->print_input_args) + g_ip->display_ip(); + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. + + if (g_ip->nuca == 1) { + Nuca n(&g_tp.peri_global); + n.sim_nuca(); + } + solve(&fin_res); + + output_UCA(&fin_res); + + delete (g_ip); + return fin_res; } //McPAT's plain interface, please keep !!! @@ -964,200 +910,187 @@ uca_org_t cacti_interface( int ndcm, int ndsam1,//para50 int ndsam2, - int ecc) -{ - g_ip = new InputParameter(); - - uca_org_t fin_res; - fin_res.valid = false; - - g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; - g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; - g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; - g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; - - g_ip->ic_proj_type = interconnect_projection_type_in; - g_ip->wire_is_mat_type = wire_inside_mat_type_in; - g_ip->wire_os_mat_type = wire_outside_mat_type_in; - g_ip->burst_len = BURST_LENGTH_in; - g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; - g_ip->page_sz_bits = PAGE_SIZE_BITS_in; - - g_ip->cache_sz = cache_size; - g_ip->line_sz = line_size; - g_ip->assoc = associativity; - g_ip->nbanks = banks; - g_ip->out_w = output_width; - g_ip->specific_tag = specific_tag; - if (specific_tag == 0) { - g_ip->tag_w = 42; - } - else { - g_ip->tag_w = tag_width; - } - - g_ip->access_mode = access_mode; - g_ip->delay_wt = obj_func_delay; - g_ip->dynamic_power_wt = obj_func_dynamic_power; - g_ip->leakage_power_wt = obj_func_leakage_power; - g_ip->area_wt = obj_func_area; - g_ip->cycle_time_wt = obj_func_cycle_time; - g_ip->delay_dev = dev_func_delay; - g_ip->dynamic_power_dev = dev_func_dynamic_power; - g_ip->leakage_power_dev = dev_func_leakage_power; - g_ip->area_dev = dev_func_area; - g_ip->cycle_time_dev = dev_func_cycle_time; - g_ip->temp = temp; - g_ip->ed = ed_ed2_none; - - g_ip->F_sz_nm = tech_node; - g_ip->F_sz_um = tech_node / 1000; - g_ip->is_main_mem = (main_mem != 0) ? true : false; - g_ip->is_cache = (cache ==1) ? true : false; - g_ip->pure_ram = (cache ==0) ? true : false; - g_ip->pure_cam = (cache ==2) ? true : false; - g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; - g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; - g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; - - g_ip->num_rw_ports = rw_ports; - g_ip->num_rd_ports = excl_read_ports; - g_ip->num_wr_ports = excl_write_ports; - g_ip->num_se_rd_ports = single_ended_read_ports; - g_ip->num_search_ports = search_ports; - - g_ip->print_detail = 1; - g_ip->nuca = 0; - - if (force_wiretype == 0) - { - g_ip->wt = Global; - g_ip->force_wiretype = false; - } - else - { g_ip->force_wiretype = true; - if (wiretype==10) { - g_ip->wt = Global_10; - } - if (wiretype==20) { - g_ip->wt = Global_20; - } - if (wiretype==30) { - g_ip->wt = Global_30; - } - if (wiretype==5) { - g_ip->wt = Global_5; - } - if (wiretype==0) { - g_ip->wt = Low_swing; - } - } - //g_ip->wt = Global_5; - if (force_config == 0) - { - g_ip->force_cache_config = false; + int ecc) { + g_ip = new InputParameter(); + + uca_org_t fin_res; + fin_res.valid = false; + + g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; + g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; + g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; + g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; + + g_ip->ic_proj_type = interconnect_projection_type_in; + g_ip->wire_is_mat_type = wire_inside_mat_type_in; + g_ip->wire_os_mat_type = wire_outside_mat_type_in; + g_ip->burst_len = BURST_LENGTH_in; + g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; + g_ip->page_sz_bits = PAGE_SIZE_BITS_in; + + g_ip->cache_sz = cache_size; + g_ip->line_sz = line_size; + g_ip->assoc = associativity; + g_ip->nbanks = banks; + g_ip->out_w = output_width; + g_ip->specific_tag = specific_tag; + if (specific_tag == 0) { + g_ip->tag_w = 42; + } else { + g_ip->tag_w = tag_width; + } + + g_ip->access_mode = access_mode; + g_ip->delay_wt = obj_func_delay; + g_ip->dynamic_power_wt = obj_func_dynamic_power; + g_ip->leakage_power_wt = obj_func_leakage_power; + g_ip->area_wt = obj_func_area; + g_ip->cycle_time_wt = obj_func_cycle_time; + g_ip->delay_dev = dev_func_delay; + g_ip->dynamic_power_dev = dev_func_dynamic_power; + g_ip->leakage_power_dev = dev_func_leakage_power; + g_ip->area_dev = dev_func_area; + g_ip->cycle_time_dev = dev_func_cycle_time; + g_ip->temp = temp; + g_ip->ed = ed_ed2_none; + + g_ip->F_sz_nm = tech_node; + g_ip->F_sz_um = tech_node / 1000; + g_ip->is_main_mem = (main_mem != 0) ? true : false; + g_ip->is_cache = (cache == 1) ? true : false; + g_ip->pure_ram = (cache == 0) ? true : false; + g_ip->pure_cam = (cache == 2) ? true : false; + g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; + g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; + g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; + + g_ip->num_rw_ports = rw_ports; + g_ip->num_rd_ports = excl_read_ports; + g_ip->num_wr_ports = excl_write_ports; + g_ip->num_se_rd_ports = single_ended_read_ports; + g_ip->num_search_ports = search_ports; + + g_ip->print_detail = 1; + g_ip->nuca = 0; + + if (force_wiretype == 0) { + g_ip->wt = Global; + g_ip->force_wiretype = false; + } else { + g_ip->force_wiretype = true; + if (wiretype == 10) { + g_ip->wt = Global_10; + } + if (wiretype == 20) { + g_ip->wt = Global_20; + } + if (wiretype == 30) { + g_ip->wt = Global_30; + } + if (wiretype == 5) { + g_ip->wt = Global_5; + } + if (wiretype == 0) { + g_ip->wt = Low_swing; + } } - else - { + //g_ip->wt = Global_5; + if (force_config == 0) { + g_ip->force_cache_config = false; + } else { g_ip->force_cache_config = true; - g_ip->ndbl=ndbl; - g_ip->ndwl=ndwl; - g_ip->nspd=nspd; - g_ip->ndcm=ndcm; - g_ip->ndsam1=ndsam1; - g_ip->ndsam2=ndsam2; + g_ip->ndbl = ndbl; + g_ip->ndwl = ndwl; + g_ip->nspd = nspd; + g_ip->ndcm = ndcm; + g_ip->ndsam1 = ndsam1; + g_ip->ndsam2 = ndsam2; } - if (ecc==0){ - g_ip->add_ecc_b_=false; - } - else - { - g_ip->add_ecc_b_=true; - } + if (ecc == 0) { + g_ip->add_ecc_b_ = false; + } else { + g_ip->add_ecc_b_ = true; + } - if(!g_ip->error_checking()) - exit(0); + if (!g_ip->error_checking()) + exit(0); - init_tech_params(g_ip->F_sz_um, false); - Wire winit; // Do not delete this line. It initializes wires. + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. - g_ip->display_ip(); - solve(&fin_res); - output_UCA(&fin_res); - output_data_csv(fin_res); - delete (g_ip); + g_ip->display_ip(); + solve(&fin_res); + output_UCA(&fin_res); + output_data_csv(fin_res); + delete (g_ip); - return fin_res; + return fin_res; } -bool InputParameter::error_checking() -{ - int A; - bool seq_access = false; - fast_access = true; +bool InputParameter::error_checking(string name) { + int A; + bool seq_access = false; + fast_access = true; - switch (access_mode) - { + switch (access_mode) { case 0: - seq_access = false; - fast_access = false; - break; + seq_access = false; + fast_access = false; + break; case 1: - seq_access = true; - fast_access = false; - break; + seq_access = true; + fast_access = false; + break; case 2: - seq_access = false; - fast_access = true; - break; - } - - if(is_main_mem) - { - if(ic_proj_type == 0) - { - cerr << "DRAM model supports only conservative interconnect projection!\n\n"; - return false; + seq_access = false; + fast_access = true; + break; } - } - - - uint32_t B = line_sz; - - if (B < 1) - { - cerr << "Block size must >= 1" << endl; - return false; - } - else if (B*8 < out_w) - { - cerr << "Block size must be at least " << out_w/8 << endl; - return false; - } - - if (F_sz_um <= 0) - { - cerr << "Feature size must be > 0" << endl; - return false; - } - else if (F_sz_um > 0.091) - { - cerr << "Feature size must be <= 90 nm" << endl; - return false; - } - - - uint32_t RWP = num_rw_ports; - uint32_t ERP = num_rd_ports; - uint32_t EWP = num_wr_ports; - uint32_t NSER = num_se_rd_ports; - uint32_t SCHP = num_search_ports; + + if (is_main_mem) { + if (ic_proj_type == 0) { + cerr << name + << ": DRAM model supports only conservative interconnect " + << "projection but is set to aggressive!\n\n"; + return false; + } + } + + + uint32_t B = line_sz; + + if (B < 1) { + cerr << name << ": Block size must be >= 1, but is set to " << B + << endl; + return false; + } else if (B*8 < out_w) { + cerr << name << ": Block size must be at least " << out_w / 8 + << ", but is set to " << B << endl; + return false; + } + + if (F_sz_um <= 0) { + cerr << name << ": Feature size must be > 0, but is set to " + << F_sz_um << endl; + return false; + } else if (F_sz_um > 0.091) { + cerr << name << ": Feature size must be <= 90 nm, but is set to " + << F_sz_um << endl; + return false; + } + + + uint32_t RWP = num_rw_ports; + uint32_t ERP = num_rd_ports; + uint32_t EWP = num_wr_ports; + uint32_t NSER = num_se_rd_ports; + uint32_t SCHP = num_search_ports; //TODO: revisit this. This is an important feature. Sheng thought this should be used // // If multiple banks and multiple ports are specified, then if number of ports is less than or equal to @@ -1181,26 +1114,26 @@ bool InputParameter::error_checking() // return false; // } // else if ((RWP+ERP+EWP) < 1) - // Changed to new implementation: - // The number of ports specified at input is per bank - if ((RWP+ERP+EWP) < 1) - { - cerr << "Must have at least one port" << endl; - return false; - } - - if (is_pow2(nbanks) == false) - { - cerr << "Number of subbanks should be greater than or equal to 1 and should be a power of 2" << endl; - return false; - } - - int C = cache_sz/nbanks; - if (C < 64) - { - cerr << "Cache size must >=64" << endl; - return false; - } + // Changed to new implementation: + // The number of ports specified at input is per bank + if ((RWP + ERP + EWP) < 1) { + cerr << name << ": Must have at least one port" << endl; + return false; + } + + if (is_pow2(nbanks) == false) { + cerr << name << ": Number of subbanks should be greater than or " + << "equal to 1 and should be a power of 2, but is set to " + << nbanks << endl; + return false; + } + + int C = cache_sz / nbanks; + if (C < 64) { + cerr << name << ": Cache size must be >=64, but is set to " << C + << endl; + return false; + } //TODO: revisit this // if (pure_ram==true && assoc!=1) @@ -1210,54 +1143,64 @@ bool InputParameter::error_checking() // } //fully assoc and cam check - if (is_cache && assoc==0) - fully_assoc =true; + if (is_cache && assoc == 0) + fully_assoc = true; else fully_assoc = false; - if (pure_cam==true && assoc!=0) - { - cerr << "Pure CAM must have associativity as 0" << endl; - return false; + if (pure_cam == true && assoc != 0) { + cerr << name + << ": Pure CAM must have associativity as 0, but is set to" + << assoc << endl; + return false; } - if (assoc==0 && (pure_cam==false && is_cache ==false)) - { - cerr << "Only CAM or Fully associative cache can have associativity as 0" << endl; - return false; + if (assoc == 0 && (pure_cam == false && is_cache == false)) { + cerr << name + << ": Only CAM or Fully associative cache can have associativity " + << "as 0" << endl; + return false; } - if ((fully_assoc==true || pure_cam==true) - && (data_arr_ram_cell_tech_type!= tag_arr_ram_cell_tech_type - || data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type )) - { - cerr << "CAM and fully associative cache must have same device type for both data and tag array" << endl; - return false; + if ((fully_assoc == true || pure_cam == true) + && (data_arr_ram_cell_tech_type != tag_arr_ram_cell_tech_type + || data_arr_peri_global_tech_type != tag_arr_peri_global_tech_type)) { + cerr << name + << ": CAM and fully associative cache must have same device type " + << "for both data and tag array" << endl; + cerr << "\tData array RAM cell = " << data_arr_ram_cell_tech_type + << ", Tag array RAM cell = " << tag_arr_ram_cell_tech_type << endl + << "\tData array peripheral = " << data_arr_peri_global_tech_type + << ", Tag array peripheral = " << tag_arr_peri_global_tech_type + << endl; + return false; } - if ((fully_assoc==true || pure_cam==true) - && (data_arr_ram_cell_tech_type== lp_dram || data_arr_ram_cell_tech_type== comm_dram)) - { - cerr << "DRAM based CAM and fully associative cache are not supported" << endl; - return false; + if ((fully_assoc == true || pure_cam == true) + && (data_arr_ram_cell_tech_type == lp_dram || + data_arr_ram_cell_tech_type == comm_dram)) { + cerr << name << ": DRAM based CAM and fully associative cache are not " + << "supported" << endl; + return false; } - if ((fully_assoc==true || pure_cam==true) - && (is_main_mem==true)) - { - cerr << "CAM and fully associative cache cannot be as main memory" << endl; - return false; + if ((fully_assoc == true || pure_cam == true) + && (is_main_mem == true)) { + cerr << name + << ": CAM and fully associative cache cannot be as main memory" + << endl; + return false; } - if ((fully_assoc || pure_cam) && SCHP<1) - { - cerr << "CAM and fully associative must have at least 1 search port" << endl; - return false; + if ((fully_assoc || pure_cam) && SCHP < 1) { + cerr << name + << ": CAM and fully associative must have at least 1 search port," + << " but are set to " << SCHP << endl; + return false; } - if (RWP==0 && ERP==0 && SCHP>0 && ((fully_assoc || pure_cam))) - { - ERP=SCHP; + if (RWP == 0 && ERP == 0 && SCHP > 0 && ((fully_assoc || pure_cam))) { + ERP = SCHP; } // if ((!(fully_assoc || pure_cam)) && SCHP>=1) @@ -1266,140 +1209,112 @@ bool InputParameter::error_checking() // return false; // } - if (assoc == 0) - { - A = C/B; - //fully_assoc = true; - } - else - { - if (assoc == 1) - { - A = 1; - //fully_assoc = false; + if (assoc == 0) { + A = C / B; + //fully_assoc = true; + } else { + if (assoc == 1) { + A = 1; + //fully_assoc = false; + } else { + //fully_assoc = false; + A = assoc; + if (is_pow2(A) == false) { + cerr << name + << ": Associativity must be a power of 2, but is set to " + << A << endl; + return false; + } + } } - else - { - //fully_assoc = false; - A = assoc; - if (is_pow2(A) == false) - { - cerr << "Associativity must be a power of 2" << endl; + + if (C / (B*A) <= 1 && assoc != 0) { + cerr << name << ": Number of sets (" << (C / (B * A)) + << ") is too small: " << endl; + cerr << " Need to either increase cache size, or decrease " + << "associativity or block size" << endl; + cerr << " (or use fully associative cache)" << endl; return false; - } } - } - - if (C/(B*A) <= 1 && assoc!=0) - { - cerr << "Number of sets is too small: " << endl; - cerr << " Need to either increase cache size, or decrease associativity or block size" << endl; - cerr << " (or use fully associative cache)" << endl; - return false; - } - - block_sz = B; - - /*dt: testing sequential access mode*/ - if(seq_access) - { - tag_assoc = A; - data_assoc = 1; - is_seq_acc = true; - } - else - { - tag_assoc = A; - data_assoc = A; - is_seq_acc = false; - } - - if (assoc==0) - { - data_assoc = 1; - } - num_rw_ports = RWP; - num_rd_ports = ERP; - num_wr_ports = EWP; - num_se_rd_ports = NSER; - if (!(fully_assoc || pure_cam)) - num_search_ports = 0; - nsets = C/(B*A); - - if (temp < 300 || temp > 400 || temp%10 != 0) - { - cerr << temp << " Temperature must be between 300 and 400 Kelvin and multiple of 10." << endl; - return false; - } - - if (nsets < 1) - { - cerr << "Less than one set..." << endl; - return false; - } - - return true; + + block_sz = B; + + /*dt: testing sequential access mode*/ + if (seq_access) { + tag_assoc = A; + data_assoc = 1; + is_seq_acc = true; + } else { + tag_assoc = A; + data_assoc = A; + is_seq_acc = false; + } + + if (assoc == 0) { + data_assoc = 1; + } + num_rw_ports = RWP; + num_rd_ports = ERP; + num_wr_ports = EWP; + num_se_rd_ports = NSER; + if (!(fully_assoc || pure_cam)) + num_search_ports = 0; + nsets = C / (B * A); + + if (temp < 300 || temp > 400 || temp % 10 != 0) { + cerr << name << ": " << temp + << " Temperature must be between 300 and 400 Kelvin and multiple " + << "of 10." << endl; + return false; + } + + if (nsets < 1) { + cerr << name << ": Less than one set..." << endl; + return false; + } + + return true; } -void output_data_csv(const uca_org_t & fin_res) -{ - //TODO: the csv output should remain - fstream file("out.csv", ios::in); - bool print_index = file.fail(); - file.close(); - - file.open("out.csv", ios::out|ios::app); - if (file.fail() == true) - { - cerr << "File out.csv could not be opened successfully" << endl; - } - else - { - if (print_index == true) - { - file << "Tech node (nm), "; - file << "Capacity (bytes), "; - file << "Number of banks, "; - file << "Associativity, "; - file << "Output width (bits), "; - file << "Access time (ns), "; - file << "Random cycle time (ns), "; -// file << "Multisubbank interleave cycle time (ns), "; - -// file << "Delay request network (ns), "; -// file << "Delay inside mat (ns), "; -// file << "Delay reply network (ns), "; -// file << "Tag array access time (ns), "; -// file << "Data array access time (ns), "; -// file << "Refresh period (microsec), "; -// file << "DRAM array availability (%), "; - file << "Dynamic search energy (nJ), "; - file << "Dynamic read energy (nJ), "; - file << "Dynamic write energy (nJ), "; -// file << "Tag Dynamic read energy (nJ), "; -// file << "Data Dynamic read energy (nJ), "; -// file << "Dynamic read power (mW), "; - file << "Standby leakage per bank(mW), "; -// file << "Leakage per bank with leak power management (mW), "; -// file << "Leakage per bank with leak power management (mW), "; -// file << "Refresh power as percentage of standby leakage, "; - file << "Area (mm2), "; - file << "Ndwl, "; - file << "Ndbl, "; - file << "Nspd, "; - file << "Ndcm, "; - file << "Ndsam_level_1, "; - file << "Ndsam_level_2, "; - file << "Data arrary area efficiency %, "; - file << "Ntwl, "; - file << "Ntbl, "; - file << "Ntspd, "; - file << "Ntcm, "; - file << "Ntsam_level_1, "; - file << "Ntsam_level_2, "; - file << "Tag arrary area efficiency %, "; +void output_data_csv(const uca_org_t & fin_res) { + //TODO: the csv output should remain + fstream file("out.csv", ios::in); + bool print_index = file.fail(); + file.close(); + + file.open("out.csv", ios::out | ios::app); + if (file.fail() == true) { + cerr << "File out.csv could not be opened successfully" << endl; + } else { + if (print_index == true) { + file << "Tech node (nm), "; + file << "Capacity (bytes), "; + file << "Number of banks, "; + file << "Associativity, "; + file << "Output width (bits), "; + file << "Access time (ns), "; + file << "Random cycle time (ns), "; + file << "Dynamic search energy (nJ), "; + file << "Dynamic read energy (nJ), "; + file << "Dynamic write energy (nJ), "; + file << "Standby leakage per bank(mW), "; + file << "Area (mm2), "; + file << "Ndwl, "; + file << "Ndbl, "; + file << "Nspd, "; + file << "Ndcm, "; + file << "Ndsam_level_1, "; + file << "Ndsam_level_2, "; + file << "Data arrary area efficiency %, "; + file << "Ntwl, "; + file << "Ntbl, "; + file << "Ntspd, "; + file << "Ntcm, "; + file << "Ntsam_level_1, "; + file << "Ntsam_level_2, "; + file << "Tag arrary area efficiency %, "; // file << "Resistance per unit micron (ohm-micron), "; // file << "Capacitance per unit micron (fF per micron), "; @@ -1428,15 +1343,15 @@ void output_data_csv(const uca_org_t & fin_res) // file << "Delay opt (perc), "; // file << "Repeater opt (perc), "; // file << "Aspect ratio"; - file << endl; - } - file << g_ip->F_sz_nm << ", "; - file << g_ip->cache_sz << ", "; - file << g_ip->nbanks << ", "; - file << g_ip->tag_assoc << ", "; - file << g_ip->out_w << ", "; - file << fin_res.access_time*1e+9 << ", "; - file << fin_res.cycle_time*1e+9 << ", "; + file << endl; + } + file << g_ip->F_sz_nm << ", "; + file << g_ip->cache_sz << ", "; + file << g_ip->nbanks << ", "; + file << g_ip->tag_assoc << ", "; + file << g_ip->out_w << ", "; + file << fin_res.access_time*1e+9 << ", "; + file << fin_res.cycle_time*1e+9 << ", "; // file << fin_res.data_array2->multisubbank_interleave_cycle_time*1e+9 << ", "; // file << fin_res.data_array2->delay_request_network*1e+9 << ", "; // file << fin_res.data_array2->delay_inside_mat*1e+9 << ", "; @@ -1453,16 +1368,13 @@ void output_data_csv(const uca_org_t & fin_res) // file << fin_res.data_array2->access_time*1e+9 << ", "; // file << fin_res.data_array2->dram_refresh_period*1e+6 << ", "; // file << fin_res.data_array2->dram_array_availability << ", "; - if (g_ip->fully_assoc || g_ip->pure_cam) - { - file << fin_res.power.searchOp.dynamic*1e+9 << ", "; - } - else - { - file << "N/A" << ", "; - } - file << fin_res.power.readOp.dynamic*1e+9 << ", "; - file << fin_res.power.writeOp.dynamic*1e+9 << ", "; + if (g_ip->fully_assoc || g_ip->pure_cam) { + file << fin_res.power.searchOp.dynamic*1e+9 << ", "; + } else { + file << "N/A" << ", "; + } + file << fin_res.power.readOp.dynamic*1e+9 << ", "; + file << fin_res.power.writeOp.dynamic*1e+9 << ", "; // if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) // { // file << fin_res.tag_array2->power.readOp.dynamic*1e+9 << ", "; @@ -1484,27 +1396,24 @@ void output_data_csv(const uca_org_t & fin_res) file <<( fin_res.power.readOp.leakage + fin_res.power.readOp.gate_leakage )*1000 << ", "; // file << fin_res.leak_power_with_sleep_transistors_in_mats*1000 << ", "; // file << fin_res.data_array.refresh_power / fin_res.data_array.total_power.readOp.leakage << ", "; - file << fin_res.area*1e-6 << ", "; - - file << fin_res.data_array2->Ndwl << ", "; - file << fin_res.data_array2->Ndbl << ", "; - file << fin_res.data_array2->Nspd << ", "; - file << fin_res.data_array2->deg_bl_muxing << ", "; - file << fin_res.data_array2->Ndsam_lev_1 << ", "; - file << fin_res.data_array2->Ndsam_lev_2 << ", "; - file << fin_res.data_array2->area_efficiency << ", "; - if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) - { - file << fin_res.tag_array2->Ndwl << ", "; - file << fin_res.tag_array2->Ndbl << ", "; - file << fin_res.tag_array2->Nspd << ", "; - file << fin_res.tag_array2->deg_bl_muxing << ", "; - file << fin_res.tag_array2->Ndsam_lev_1 << ", "; - file << fin_res.tag_array2->Ndsam_lev_2 << ", "; - file << fin_res.tag_array2->area_efficiency << ", "; - } - else - { + file << fin_res.area*1e-6 << ", "; + + file << fin_res.data_array2->Ndwl << ", "; + file << fin_res.data_array2->Ndbl << ", "; + file << fin_res.data_array2->Nspd << ", "; + file << fin_res.data_array2->deg_bl_muxing << ", "; + file << fin_res.data_array2->Ndsam_lev_1 << ", "; + file << fin_res.data_array2->Ndsam_lev_2 << ", "; + file << fin_res.data_array2->area_efficiency << ", "; + if (!(g_ip->fully_assoc || g_ip->pure_cam || g_ip->pure_ram)) { + file << fin_res.tag_array2->Ndwl << ", "; + file << fin_res.tag_array2->Ndbl << ", "; + file << fin_res.tag_array2->Nspd << ", "; + file << fin_res.tag_array2->deg_bl_muxing << ", "; + file << fin_res.tag_array2->Ndsam_lev_1 << ", "; + file << fin_res.tag_array2->Ndsam_lev_2 << ", "; + file << fin_res.tag_array2->area_efficiency << ", "; + } else { file << "N/A" << ", "; file << "N/A"<< ", "; file << "N/A" << ", "; @@ -1535,803 +1444,552 @@ void output_data_csv(const uca_org_t & fin_res) // file << fin_res.data_array.cas_latency * 1e9 << ", " ; // file << fin_res.data_array.precharge_delay * 1e9 << ", " ; // file << fin_res.data_array.all_banks_height / fin_res.data_array.all_banks_width; - file<cache_sz); - } - else { - if (g_ip->data_arr_ram_cell_tech_type == 3) { - cout << "\n---------- CACTI version 6.5, Uniform Cache Access " << - "Logic Process Based DRAM Model ----------\n"; - } - else if (g_ip->data_arr_ram_cell_tech_type == 4) { - cout << "\n---------- CACTI version 6.5, Uniform" << - "Cache Access Commodity DRAM Model ----------\n"; +void output_UCA(uca_org_t *fr) { + // if (NUCA) + if (0) { + cout << "\n\n Detailed Bank Stats:\n"; + cout << " Bank Size (bytes): %d\n" << + (int) (g_ip->cache_sz); + } else { + if (g_ip->data_arr_ram_cell_tech_type == 3) { + cout << "\n---------- CACTI version 6.5, Uniform Cache Access " << + "Logic Process Based DRAM Model ----------\n"; + } else if (g_ip->data_arr_ram_cell_tech_type == 4) { + cout << "\n---------- CACTI version 6.5, Uniform" << + "Cache Access Commodity DRAM Model ----------\n"; + } else { + cout << "\n---------- CACTI version 6.5, Uniform Cache Access " + "SRAM Model ----------\n"; + } + cout << "\nCache Parameters:\n"; + cout << " Total cache size (bytes): " << + (int) (g_ip->cache_sz) << endl; } + + cout << " Number of banks: " << (int) g_ip->nbanks << endl; + if (g_ip->fully_assoc || g_ip->pure_cam) + cout << " Associativity: fully associative\n"; else { - cout << "\n---------- CACTI version 6.5, Uniform Cache Access " - "SRAM Model ----------\n"; + if (g_ip->tag_assoc == 1) + cout << " Associativity: direct mapped\n"; + else + cout << " Associativity: " << + g_ip->tag_assoc << endl; } - cout << "\nCache Parameters:\n"; - cout << " Total cache size (bytes): " << - (int) (g_ip->cache_sz) << endl; - } - - cout << " Number of banks: " << (int) g_ip->nbanks << endl; - if (g_ip->fully_assoc|| g_ip->pure_cam) - cout << " Associativity: fully associative\n"; - else { - if (g_ip->tag_assoc == 1) - cout << " Associativity: direct mapped\n"; - else - cout << " Associativity: " << - g_ip->tag_assoc << endl; - } - - - cout << " Block size (bytes): " << g_ip->line_sz << endl; - cout << " Read/write Ports: " << - g_ip->num_rw_ports << endl; - cout << " Read ports: " << - g_ip->num_rd_ports << endl; - cout << " Write ports: " << - g_ip->num_wr_ports << endl; - if (g_ip->fully_assoc|| g_ip->pure_cam) - cout << " search ports: " << - g_ip->num_search_ports << endl; - cout << " Technology size (nm): " << - g_ip->F_sz_nm << endl << endl; - - cout << " Access time (ns): " << fr->access_time*1e9 << endl; - cout << " Cycle time (ns): " << fr->cycle_time*1e9 << endl; - if (g_ip->data_arr_ram_cell_tech_type >= 4) { - cout << " Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl; - cout << " Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl; - cout << " Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl; - cout << " Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl; - cout << " Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl; - cout << " Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl; - cout << " Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl; - cout << " Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl; - cout << " Refresh power (mW): " << - fr->data_array2->refresh_power*1e3 << endl; - } - else { - if ((g_ip->fully_assoc|| g_ip->pure_cam)) - { - cout << " Total dynamic associative search energy per access (nJ): " << - fr->power.searchOp.dynamic*1e9 << endl; + + + cout << " Block size (bytes): " << g_ip->line_sz << endl; + cout << " Read/write Ports: " << + g_ip->num_rw_ports << endl; + cout << " Read ports: " << + g_ip->num_rd_ports << endl; + cout << " Write ports: " << + g_ip->num_wr_ports << endl; + if (g_ip->fully_assoc || g_ip->pure_cam) + cout << " search ports: " << + g_ip->num_search_ports << endl; + cout << " Technology size (nm): " << + g_ip->F_sz_nm << endl << endl; + + cout << " Access time (ns): " << fr->access_time*1e9 << endl; + cout << " Cycle time (ns): " << fr->cycle_time*1e9 << endl; + if (g_ip->data_arr_ram_cell_tech_type >= 4) { + cout << " Precharge Delay (ns): " << fr->data_array2->precharge_delay*1e9 << endl; + cout << " Activate Energy (nJ): " << fr->data_array2->activate_energy*1e9 << endl; + cout << " Read Energy (nJ): " << fr->data_array2->read_energy*1e9 << endl; + cout << " Write Energy (nJ): " << fr->data_array2->write_energy*1e9 << endl; + cout << " Precharge Energy (nJ): " << fr->data_array2->precharge_energy*1e9 << endl; + cout << " Leakage Power Closed Page (mW): " << fr->data_array2->leak_power_subbank_closed_page*1e3 << endl; + cout << " Leakage Power Open Page (mW): " << fr->data_array2->leak_power_subbank_open_page*1e3 << endl; + cout << " Leakage Power I/O (mW): " << fr->data_array2->leak_power_request_and_reply_networks*1e3 << endl; + cout << " Refresh power (mW): " << + fr->data_array2->refresh_power*1e3 << endl; + } else { + if ((g_ip->fully_assoc || g_ip->pure_cam)) { + cout << " Total dynamic associative search energy per access (nJ): " << + fr->power.searchOp.dynamic*1e9 << endl; // cout << " Total dynamic read energy per access (nJ): " << // fr->power.readOp.dynamic*1e9 << endl; // cout << " Total dynamic write energy per access (nJ): " << // fr->power.writeOp.dynamic*1e9 << endl; - } + } // else // { - cout << " Total dynamic read energy per access (nJ): " << - fr->power.readOp.dynamic*1e9 << endl; - cout << " Total dynamic write energy per access (nJ): " << - fr->power.writeOp.dynamic*1e9 << endl; + cout << " Total dynamic read energy per access (nJ): " << + fr->power.readOp.dynamic*1e9 << endl; + cout << " Total dynamic write energy per access (nJ): " << + fr->power.writeOp.dynamic*1e9 << endl; // } - cout << " Total leakage power of a bank" - " (mW): " << fr->power.readOp.leakage*1e3 << endl; - cout << " Total gate leakage power of a bank" - " (mW): " << fr->power.readOp.gate_leakage*1e3 << endl; - } - - if (g_ip->data_arr_ram_cell_tech_type ==3 || g_ip->data_arr_ram_cell_tech_type ==4) - { - } - cout << " Cache height x width (mm): " << - fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl << endl; - - - cout << " Best Ndwl : " << fr->data_array2->Ndwl << endl; - cout << " Best Ndbl : " << fr->data_array2->Ndbl << endl; - cout << " Best Nspd : " << fr->data_array2->Nspd << endl; - cout << " Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl; - cout << " Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl; - cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl; - - if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { - cout << " Best Ntwl : " << fr->tag_array2->Ndwl << endl; - cout << " Best Ntbl : " << fr->tag_array2->Ndbl << endl; - cout << " Best Ntspd : " << fr->tag_array2->Nspd << endl; - cout << " Best Ntcm : " << fr->tag_array2->deg_bl_muxing << endl; - cout << " Best Ntsam L1 : " << fr->tag_array2->Ndsam_lev_1 << endl; - cout << " Best Ntsam L2 : " << fr->tag_array2->Ndsam_lev_2 << endl; - } - - switch (fr->data_array2->wt) { + cout << " Total leakage power of a bank" + " (mW): " << fr->power.readOp.leakage*1e3 << endl; + cout << " Total gate leakage power of a bank" + " (mW): " << fr->power.readOp.gate_leakage*1e3 << endl; + } + + if (g_ip->data_arr_ram_cell_tech_type == 3 || g_ip->data_arr_ram_cell_tech_type == 4) { + } + cout << " Cache height x width (mm): " << + fr->cache_ht*1e-3 << " x " << fr->cache_len*1e-3 << endl << endl; + + + cout << " Best Ndwl : " << fr->data_array2->Ndwl << endl; + cout << " Best Ndbl : " << fr->data_array2->Ndbl << endl; + cout << " Best Nspd : " << fr->data_array2->Nspd << endl; + cout << " Best Ndcm : " << fr->data_array2->deg_bl_muxing << endl; + cout << " Best Ndsam L1 : " << fr->data_array2->Ndsam_lev_1 << endl; + cout << " Best Ndsam L2 : " << fr->data_array2->Ndsam_lev_2 << endl << endl; + + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { + cout << " Best Ntwl : " << fr->tag_array2->Ndwl << endl; + cout << " Best Ntbl : " << fr->tag_array2->Ndbl << endl; + cout << " Best Ntspd : " << fr->tag_array2->Nspd << endl; + cout << " Best Ntcm : " << fr->tag_array2->deg_bl_muxing << endl; + cout << " Best Ntsam L1 : " << fr->tag_array2->Ndsam_lev_1 << endl; + cout << " Best Ntsam L2 : " << fr->tag_array2->Ndsam_lev_2 << endl; + } + + switch (fr->data_array2->wt) { case (0): - cout << " Data array, H-tree wire type: Delay optimized global wires\n"; - break; - case (1): - cout << " Data array, H-tree wire type: Global wires with 5\% delay penalty\n"; - break; - case (2): - cout << " Data array, H-tree wire type: Global wires with 10\% delay penalty\n"; - break; - case (3): - cout << " Data array, H-tree wire type: Global wires with 20\% delay penalty\n"; - break; - case (4): - cout << " Data array, H-tree wire type: Global wires with 30\% delay penalty\n"; - break; - case (5): - cout << " Data array, wire type: Low swing wires\n"; - break; - default: - cout << "ERROR - Unknown wire type " << (int) fr->data_array2->wt <pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) { - switch (fr->tag_array2->wt) { - case (0): - cout << " Tag array, H-tree wire type: Delay optimized global wires\n"; + cout << " Data array, H-tree wire type: Delay optimized global wires\n"; break; - case (1): - cout << " Tag array, H-tree wire type: Global wires with 5\% delay penalty\n"; + case (1): + cout << " Data array, H-tree wire type: Global wires with 5\% delay penalty\n"; break; - case (2): - cout << " Tag array, H-tree wire type: Global wires with 10\% delay penalty\n"; + case (2): + cout << " Data array, H-tree wire type: Global wires with 10\% delay penalty\n"; break; - case (3): - cout << " Tag array, H-tree wire type: Global wires with 20\% delay penalty\n"; + case (3): + cout << " Data array, H-tree wire type: Global wires with 20\% delay penalty\n"; break; - case (4): - cout << " Tag array, H-tree wire type: Global wires with 30\% delay penalty\n"; + case (4): + cout << " Data array, H-tree wire type: Global wires with 30\% delay penalty\n"; break; - case (5): - cout << " Tag array, wire type: Low swing wires\n"; + case (5): + cout << " Data array, wire type: Low swing wires\n"; break; - default: - cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt <data_array2->wt << endl; + exit(0); } - } - - if (g_ip->print_detail) - { - //if(g_ip->fully_assoc) return; - - /* Delay stats */ - /* data array stats */ - cout << endl << "Time Components:" << endl << endl; - - cout << " Data side (with Output driver) (ns): " << - fr->data_array2->access_time/1e-9 << endl; - cout << "\tH-tree input delay (ns): " << - fr->data_array2->delay_route_to_bank * 1e9 + - fr->data_array2->delay_input_htree * 1e9 << endl; - - if (!(g_ip->pure_cam || g_ip->fully_assoc)) - { - cout << "\tDecoder + wordline delay (ns): " << - fr->data_array2->delay_row_predecode_driver_and_block * 1e9 + - fr->data_array2->delay_row_decoder * 1e9 << endl; - } - else - { - cout << "\tCAM search delay (ns): " << - fr->data_array2->delay_matchlines * 1e9 << endl; + if (!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) { + switch (fr->tag_array2->wt) { + case (0): + cout << " Tag array, H-tree wire type: Delay optimized global wires\n"; + break; + case (1): + cout << " Tag array, H-tree wire type: Global wires with 5\% delay penalty\n"; + break; + case (2): + cout << " Tag array, H-tree wire type: Global wires with 10\% delay penalty\n"; + break; + case (3): + cout << " Tag array, H-tree wire type: Global wires with 20\% delay penalty\n"; + break; + case (4): + cout << " Tag array, H-tree wire type: Global wires with 30\% delay penalty\n"; + break; + case (5): + cout << " Tag array, wire type: Low swing wires\n"; + break; + default: + cout << "ERROR - Unknown wire type " << (int) fr->tag_array2->wt << endl; + exit(-1); + } } - cout << "\tBitline delay (ns): " << - fr->data_array2->delay_bitlines/1e-9 << endl; + if (g_ip->print_detail) { + /* Delay stats */ + /* data array stats */ + cout << endl << "Time Components:" << endl << endl; + + cout << " Data side (with Output driver) (ns): " << + fr->data_array2->access_time / 1e-9 << endl; + + cout << "\tH-tree input delay (ns): " << + fr->data_array2->delay_route_to_bank * 1e9 + + fr->data_array2->delay_input_htree * 1e9 << endl; + + if (!(g_ip->pure_cam || g_ip->fully_assoc)) { + cout << "\tDecoder + wordline delay (ns): " << + fr->data_array2->delay_row_predecode_driver_and_block * 1e9 + + fr->data_array2->delay_row_decoder * 1e9 << endl; + } else { + cout << "\tCAM search delay (ns): " << + fr->data_array2->delay_matchlines * 1e9 << endl; + } + + cout << "\tBitline delay (ns): " << + fr->data_array2->delay_bitlines / 1e-9 << endl; - cout << "\tSense Amplifier delay (ns): " << - fr->data_array2->delay_sense_amp * 1e9 << endl; + cout << "\tSense Amplifier delay (ns): " << + fr->data_array2->delay_sense_amp * 1e9 << endl; - cout << "\tH-tree output delay (ns): " << - fr->data_array2->delay_subarray_output_driver * 1e9 + - fr->data_array2->delay_dout_htree * 1e9 << endl; + cout << "\tH-tree output delay (ns): " << + fr->data_array2->delay_subarray_output_driver * 1e9 + + fr->data_array2->delay_dout_htree * 1e9 << endl; - if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { - /* tag array stats */ - cout << endl << " Tag side (with Output driver) (ns): " << - fr->tag_array2->access_time/1e-9 << endl; + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { + /* tag array stats */ + cout << endl << " Tag side (with Output driver) (ns): " << + fr->tag_array2->access_time / 1e-9 << endl; - cout << "\tH-tree input delay (ns): " << - fr->tag_array2->delay_route_to_bank * 1e9 + - fr->tag_array2->delay_input_htree * 1e9 << endl; + cout << "\tH-tree input delay (ns): " << + fr->tag_array2->delay_route_to_bank * 1e9 + + fr->tag_array2->delay_input_htree * 1e9 << endl; - cout << "\tDecoder + wordline delay (ns): " << - fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 + - fr->tag_array2->delay_row_decoder * 1e9 << endl; + cout << "\tDecoder + wordline delay (ns): " << + fr->tag_array2->delay_row_predecode_driver_and_block * 1e9 + + fr->tag_array2->delay_row_decoder * 1e9 << endl; - cout << "\tBitline delay (ns): " << - fr->tag_array2->delay_bitlines/1e-9 << endl; + cout << "\tBitline delay (ns): " << + fr->tag_array2->delay_bitlines / 1e-9 << endl; - cout << "\tSense Amplifier delay (ns): " << - fr->tag_array2->delay_sense_amp * 1e9 << endl; + cout << "\tSense Amplifier delay (ns): " << + fr->tag_array2->delay_sense_amp * 1e9 << endl; - cout << "\tComparator delay (ns): " << - fr->tag_array2->delay_comparator * 1e9 << endl; + cout << "\tComparator delay (ns): " << + fr->tag_array2->delay_comparator * 1e9 << endl; - cout << "\tH-tree output delay (ns): " << - fr->tag_array2->delay_subarray_output_driver * 1e9 + - fr->tag_array2->delay_dout_htree * 1e9 << endl; - } + cout << "\tH-tree output delay (ns): " << + fr->tag_array2->delay_subarray_output_driver * 1e9 + + fr->tag_array2->delay_dout_htree * 1e9 << endl; + } - /* Energy/Power stats */ - cout << endl << endl << "Power Components:" << endl << endl; + /* Energy/Power stats */ + cout << endl << endl << "Power Components:" << endl << endl; - if (!(g_ip->pure_cam || g_ip->fully_assoc)) - { - cout << " Data array: Total dynamic read energy/access (nJ): " << - fr->data_array2->power.readOp.dynamic * 1e9 << endl; - cout << "\tTotal leakage read/write power of a bank (mW): " << - fr->data_array2->power.readOp.leakage * 1e3 << endl; + if (!(g_ip->pure_cam || g_ip->fully_assoc)) { + cout << " Data array: Total dynamic read energy/access (nJ): " << + fr->data_array2->power.readOp.dynamic * 1e9 << endl; + cout << "\tTotal leakage read/write power of a bank (mW): " << + fr->data_array2->power.readOp.leakage * 1e3 << endl; - cout << "\tTotal energy in H-tree (that includes both " - "address and data transfer) (nJ): " << - (fr->data_array2->power_addr_input_htree.readOp.dynamic + - fr->data_array2->power_data_output_htree.readOp.dynamic + - fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "address and data transfer) (nJ): " << + (fr->data_array2->power_addr_input_htree.readOp.dynamic + + fr->data_array2->power_data_output_htree.readOp.dynamic + + fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; - cout << "\tTotal leakage power in H-tree (that includes both " - "address and data network) ((mW)): " << + cout << "\tTotal leakage power in H-tree (that includes both " + "address and data network) ((mW)): " << (fr->data_array2->power_addr_input_htree.readOp.leakage + fr->data_array2->power_data_output_htree.readOp.leakage + - fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl; + fr->data_array2->power_routing_to_bank.readOp.leakage) * 1e3 + << endl; - cout << "\tTotal gate leakage power in H-tree (that includes both " - "address and data network) ((mW)): " << + cout << "\tTotal gate leakage power in H-tree (that includes both " + "address and data network) ((mW)): " << (fr->data_array2->power_addr_input_htree.readOp.gate_leakage + fr->data_array2->power_data_output_htree.readOp.gate_leakage + - fr->data_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl; - - cout << "\tOutput Htree inside bank Energy (nJ): " << - fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; - cout << "\tDecoder (nJ): " << - fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; - cout << "\tWordline (nJ): " << - fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitline mux & associated drivers (nJ): " << - fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tSense amp mux & associated drivers (nJ): " << - fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; - - cout << "\tBitlines precharge and equalization circuit (nJ): " << - fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; - cout << "\tBitlines (nJ): " << - fr->data_array2->power_bitlines.readOp.dynamic * 1e9 << endl; - cout << "\tSense amplifier energy (nJ): " << - fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; - cout << "\tSub-array output driver (nJ): " << - fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; - } + fr->data_array2->power_routing_to_bank.readOp.gate_leakage) * + 1e3 << endl; + + cout << "\tOutput Htree inside bank Energy (nJ): " << + fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; + cout << "\tDecoder (nJ): " << + fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; + cout << "\tWordline (nJ): " << + fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitline mux & associated drivers (nJ): " << + fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tSense amp mux & associated drivers (nJ): " << + fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; + + cout << "\tBitlines precharge and equalization circuit (nJ): " << + fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines (nJ): " << + fr->data_array2->power_bitlines.readOp.dynamic * 1e9 << endl; + cout << "\tSense amplifier energy (nJ): " << + fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; + } - else if (g_ip->pure_cam) - { - - cout << " CAM array:"<data_array2->power.searchOp.dynamic * 1e9 << endl; - cout << "\tTotal energy in H-tree (that includes both " - "match key and data transfer) (nJ): " << - (fr->data_array2->power_htree_in_search.searchOp.dynamic + - fr->data_array2->power_htree_out_search.searchOp.dynamic + - fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl; - cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " << - (fr->data_array2->power_htree_in_search.searchOp.dynamic + - fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl; - cout << "\tSearchlines (nJ): " << - fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + - fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl; - cout << "\tMatchlines (nJ): " << - fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + - fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl; - cout << "\tSub-array output driver (nJ): " << - fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl; - - - cout <data_array2->power.readOp.dynamic * 1e9 << endl; - cout << "\tTotal energy in H-tree (that includes both " - "address and data transfer) (nJ): " << - (fr->data_array2->power_addr_input_htree.readOp.dynamic + - fr->data_array2->power_data_output_htree.readOp.dynamic + - fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; - cout << "\tOutput Htree inside bank Energy (nJ): " << - fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; - cout << "\tDecoder (nJ): " << - fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; - cout << "\tWordline (nJ): " << - fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitline mux & associated drivers (nJ): " << - fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tSense amp mux & associated drivers (nJ): " << - fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitlines (nJ): " << - fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + - fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< endl; - cout << "\tSense amplifier energy (nJ): " << - fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; - cout << "\tSub-array output driver (nJ): " << - fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; - - cout << endl <<" Total leakage power of a bank (mW): " << - fr->data_array2->power.readOp.leakage * 1e3 << endl; + else if (g_ip->pure_cam) { + + cout << " CAM array:" << endl; + cout << " Total dynamic associative search energy/access (nJ): " << + fr->data_array2->power.searchOp.dynamic * 1e9 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "match key and data transfer) (nJ): " << + (fr->data_array2->power_htree_in_search.searchOp.dynamic + + fr->data_array2->power_htree_out_search.searchOp.dynamic + + fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl; + cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " << + (fr->data_array2->power_htree_in_search.searchOp.dynamic + + fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl; + cout << "\tSearchlines (nJ): " << + fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + + fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl; + cout << "\tMatchlines (nJ): " << + fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + + fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl; + + + cout << endl << " Total dynamic read energy/access (nJ): " << + fr->data_array2->power.readOp.dynamic * 1e9 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "address and data transfer) (nJ): " << + (fr->data_array2->power_addr_input_htree.readOp.dynamic + + fr->data_array2->power_data_output_htree.readOp.dynamic + + fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; + cout << "\tOutput Htree inside bank Energy (nJ): " << + fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; + cout << "\tDecoder (nJ): " << + fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; + cout << "\tWordline (nJ): " << + fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitline mux & associated drivers (nJ): " << + fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tSense amp mux & associated drivers (nJ): " << + fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines (nJ): " << + fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + + fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; + cout << "\tSense amplifier energy (nJ): " << + fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; + + cout << endl << " Total leakage power of a bank (mW): " << + fr->data_array2->power.readOp.leakage * 1e3 << endl; + } else { + cout << " Fully associative array:" << endl; + cout << " Total dynamic associative search energy/access (nJ): " << + fr->data_array2->power.searchOp.dynamic * 1e9 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "match key and data transfer) (nJ): " << + (fr->data_array2->power_htree_in_search.searchOp.dynamic + + fr->data_array2->power_htree_out_search.searchOp.dynamic + + fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl; + cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " << + (fr->data_array2->power_htree_in_search.searchOp.dynamic + + fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl; + cout << "\tSearchlines (nJ): " << + fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + + fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl; + cout << "\tMatchlines (nJ): " << + fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + + fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl; + cout << "\tData portion wordline (nJ): " << + fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 << endl; + cout << "\tData Bitlines (nJ): " << + fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 + + fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9 << endl; + cout << "\tSense amplifier energy (nJ): " << + fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl; + + + cout << endl << " Total dynamic read energy/access (nJ): " << + fr->data_array2->power.readOp.dynamic * 1e9 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "address and data transfer) (nJ): " << + (fr->data_array2->power_addr_input_htree.readOp.dynamic + + fr->data_array2->power_data_output_htree.readOp.dynamic + + fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; + cout << "\tOutput Htree inside bank Energy (nJ): " << + fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; + cout << "\tDecoder (nJ): " << + fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; + cout << "\tWordline (nJ): " << + fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitline mux & associated drivers (nJ): " << + fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tSense amp mux & associated drivers (nJ): " << + fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + + fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines (nJ): " << + fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + + fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; + cout << "\tSense amplifier energy (nJ): " << + fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; + + cout << endl << " Total leakage power of a bank (mW): " << + fr->data_array2->power.readOp.leakage * 1e3 << endl; } - else - { - cout << " Fully associative array:"<data_array2->power.searchOp.dynamic * 1e9 << endl; - cout << "\tTotal energy in H-tree (that includes both " - "match key and data transfer) (nJ): " << - (fr->data_array2->power_htree_in_search.searchOp.dynamic + - fr->data_array2->power_htree_out_search.searchOp.dynamic + - fr->data_array2->power_routing_to_bank.searchOp.dynamic) * 1e9 << endl; - cout << "\tKeyword input and result output Htrees inside bank Energy (nJ): " << - (fr->data_array2->power_htree_in_search.searchOp.dynamic + - fr->data_array2->power_htree_out_search.searchOp.dynamic) * 1e9 << endl; - cout << "\tSearchlines (nJ): " << - fr->data_array2->power_searchline.searchOp.dynamic * 1e9 + - fr->data_array2->power_searchline_precharge.searchOp.dynamic * 1e9 << endl; - cout << "\tMatchlines (nJ): " << - fr->data_array2->power_matchlines.searchOp.dynamic * 1e9 + - fr->data_array2->power_matchline_precharge.searchOp.dynamic * 1e9 << endl; - cout << "\tData portion wordline (nJ): " << - fr->data_array2->power_matchline_to_wordline_drv.searchOp.dynamic * 1e9 << endl; - cout << "\tData Bitlines (nJ): " << - fr->data_array2->power_bitlines.searchOp.dynamic * 1e9 + - fr->data_array2->power_prechg_eq_drivers.searchOp.dynamic * 1e9 << endl; - cout << "\tSense amplifier energy (nJ): " << - fr->data_array2->power_sense_amps.searchOp.dynamic * 1e9 << endl; - cout << "\tSub-array output driver (nJ): " << - fr->data_array2->power_output_drivers_at_subarray.searchOp.dynamic * 1e9 << endl; - - - cout <data_array2->power.readOp.dynamic * 1e9 << endl; - cout << "\tTotal energy in H-tree (that includes both " - "address and data transfer) (nJ): " << - (fr->data_array2->power_addr_input_htree.readOp.dynamic + - fr->data_array2->power_data_output_htree.readOp.dynamic + - fr->data_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; - cout << "\tOutput Htree inside bank Energy (nJ): " << - fr->data_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; - cout << "\tDecoder (nJ): " << - fr->data_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; - cout << "\tWordline (nJ): " << - fr->data_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitline mux & associated drivers (nJ): " << - fr->data_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tSense amp mux & associated drivers (nJ): " << - fr->data_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - fr->data_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitlines (nJ): " << - fr->data_array2->power_bitlines.readOp.dynamic * 1e9 + - fr->data_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9<< endl; - cout << "\tSense amplifier energy (nJ): " << - fr->data_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; - cout << "\tSub-array output driver (nJ): " << - fr->data_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; - - cout << endl <<" Total leakage power of a bank (mW): " << - fr->data_array2->power.readOp.leakage * 1e3 << endl; - } - - - if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { - cout << endl << " Tag array: Total dynamic read energy/access (nJ): " << - fr->tag_array2->power.readOp.dynamic * 1e9 << endl; - cout << "\tTotal leakage read/write power of a bank (mW): " << - fr->tag_array2->power.readOp.leakage * 1e3 << endl; - cout << "\tTotal energy in H-tree (that includes both " - "address and data transfer) (nJ): " << - (fr->tag_array2->power_addr_input_htree.readOp.dynamic + - fr->tag_array2->power_data_output_htree.readOp.dynamic + - fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; - - cout << "\tTotal leakage power in H-tree (that includes both " - "address and data network) ((mW)): " << + + + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { + cout << endl << " Tag array: Total dynamic read energy/access (nJ): " << + fr->tag_array2->power.readOp.dynamic * 1e9 << endl; + cout << "\tTotal leakage read/write power of a bank (mW): " << + fr->tag_array2->power.readOp.leakage * 1e3 << endl; + cout << "\tTotal energy in H-tree (that includes both " + "address and data transfer) (nJ): " << + (fr->tag_array2->power_addr_input_htree.readOp.dynamic + + fr->tag_array2->power_data_output_htree.readOp.dynamic + + fr->tag_array2->power_routing_to_bank.readOp.dynamic) * 1e9 << endl; + + cout << "\tTotal leakage power in H-tree (that includes both " + "address and data network) ((mW)): " << (fr->tag_array2->power_addr_input_htree.readOp.leakage + fr->tag_array2->power_data_output_htree.readOp.leakage + - fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3 << endl; + fr->tag_array2->power_routing_to_bank.readOp.leakage) * 1e3 + << endl; - cout << "\tTotal gate leakage power in H-tree (that includes both " - "address and data network) ((mW)): " << + cout << "\tTotal gate leakage power in H-tree (that includes both " + "address and data network) ((mW)): " << (fr->tag_array2->power_addr_input_htree.readOp.gate_leakage + fr->tag_array2->power_data_output_htree.readOp.gate_leakage + - fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) * 1e3 << endl; - - cout << "\tOutput Htree inside a bank Energy (nJ): " << - fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; - cout << "\tDecoder (nJ): " << - fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + - fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; - cout << "\tWordline (nJ): " << - fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitline mux & associated drivers (nJ): " << - fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + - fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + - fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tSense amp mux & associated drivers (nJ): " << - fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + - fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; - cout << "\tBitlines precharge and equalization circuit (nJ): " << - fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; - cout << "\tBitlines (nJ): " << - fr->tag_array2->power_bitlines.readOp.dynamic * 1e9 << endl; - cout << "\tSense amplifier energy (nJ): " << - fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; - cout << "\tSub-array output driver (nJ): " << - fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; - } + fr->tag_array2->power_routing_to_bank.readOp.gate_leakage) * + 1e3 << endl; + + cout << "\tOutput Htree inside a bank Energy (nJ): " << + fr->tag_array2->power_data_output_htree.readOp.dynamic * 1e9 << endl; + cout << "\tDecoder (nJ): " << + fr->tag_array2->power_row_predecoder_drivers.readOp.dynamic * 1e9 + + fr->tag_array2->power_row_predecoder_blocks.readOp.dynamic * 1e9 << endl; + cout << "\tWordline (nJ): " << + fr->tag_array2->power_row_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitline mux & associated drivers (nJ): " << + fr->tag_array2->power_bit_mux_predecoder_drivers.readOp.dynamic * 1e9 + + fr->tag_array2->power_bit_mux_predecoder_blocks.readOp.dynamic * 1e9 + + fr->tag_array2->power_bit_mux_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tSense amp mux & associated drivers (nJ): " << + fr->tag_array2->power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_1_decoders.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic * 1e9 + + fr->tag_array2->power_senseamp_mux_lev_2_decoders.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines precharge and equalization circuit (nJ): " << + fr->tag_array2->power_prechg_eq_drivers.readOp.dynamic * 1e9 << endl; + cout << "\tBitlines (nJ): " << + fr->tag_array2->power_bitlines.readOp.dynamic * 1e9 << endl; + cout << "\tSense amplifier energy (nJ): " << + fr->tag_array2->power_sense_amps.readOp.dynamic * 1e9 << endl; + cout << "\tSub-array output driver (nJ): " << + fr->tag_array2->power_output_drivers_at_subarray.readOp.dynamic * 1e9 << endl; + } - cout << endl << endl << "Area Components:" << endl << endl; - /* Data array area stats */ - if (!(g_ip->pure_cam || g_ip->fully_assoc)) - cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; - else if (g_ip->pure_cam) - cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; - else - cout << " Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; - cout << "\tHeight (mm): " << - fr->data_array2->all_banks_height*1e-3 << endl; - cout << "\tWidth (mm): " << - fr->data_array2->all_banks_width*1e-3 << endl; - if (g_ip->print_detail) { - cout << "\tArea efficiency (Memory cell area/Total area) - " << - fr->data_array2->area_efficiency << " %" << endl; - cout << "\t\tMAT Height (mm): " << - fr->data_array2->mat_height*1e-3 << endl; - cout << "\t\tMAT Length (mm): " << - fr->data_array2->mat_length*1e-3 << endl; - cout << "\t\tSubarray Height (mm): " << - fr->data_array2->subarray_height*1e-3 << endl; - cout << "\t\tSubarray Length (mm): " << - fr->data_array2->subarray_length*1e-3 << endl; - } + cout << endl << endl << "Area Components:" << endl << endl; + /* Data array area stats */ + if (!(g_ip->pure_cam || g_ip->fully_assoc)) + cout << " Data array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; + else if (g_ip->pure_cam) + cout << " CAM array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; + else + cout << " Fully associative cache array: Area (mm2): " << fr->data_array2->area * 1e-6 << endl; + cout << "\tHeight (mm): " << + fr->data_array2->all_banks_height*1e-3 << endl; + cout << "\tWidth (mm): " << + fr->data_array2->all_banks_width*1e-3 << endl; + if (g_ip->print_detail) { + cout << "\tArea efficiency (Memory cell area/Total area) - " << + fr->data_array2->area_efficiency << " %" << endl; + cout << "\t\tMAT Height (mm): " << + fr->data_array2->mat_height*1e-3 << endl; + cout << "\t\tMAT Length (mm): " << + fr->data_array2->mat_length*1e-3 << endl; + cout << "\t\tSubarray Height (mm): " << + fr->data_array2->subarray_height*1e-3 << endl; + cout << "\t\tSubarray Length (mm): " << + fr->data_array2->subarray_length*1e-3 << endl; + } - /* Tag array area stats */ - if ((!(g_ip->pure_ram|| g_ip->pure_cam || g_ip->fully_assoc)) && !g_ip->is_main_mem) - { - cout << endl << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl; - cout << "\tHeight (mm): " << - fr->tag_array2->all_banks_height*1e-3 << endl; - cout << "\tWidth (mm): " << - fr->tag_array2->all_banks_width*1e-3 << endl; - if (g_ip->print_detail) - { - cout << "\tArea efficiency (Memory cell area/Total area) - " << - fr->tag_array2->area_efficiency << " %" << endl; - cout << "\t\tMAT Height (mm): " << - fr->tag_array2->mat_height*1e-3 << endl; - cout << "\t\tMAT Length (mm): " << - fr->tag_array2->mat_length*1e-3 << endl; - cout << "\t\tSubarray Height (mm): " << - fr->tag_array2->subarray_height*1e-3 << endl; - cout << "\t\tSubarray Length (mm): " << - fr->tag_array2->subarray_length*1e-3 << endl; - } + /* Tag array area stats */ + if ((!(g_ip->pure_ram || g_ip->pure_cam || g_ip->fully_assoc)) && + !g_ip->is_main_mem) { + cout << endl << " Tag array: Area (mm2): " << fr->tag_array2->area * 1e-6 << endl; + cout << "\tHeight (mm): " << + fr->tag_array2->all_banks_height*1e-3 << endl; + cout << "\tWidth (mm): " << + fr->tag_array2->all_banks_width*1e-3 << endl; + if (g_ip->print_detail) { + cout << "\tArea efficiency (Memory cell area/Total area) - " << + fr->tag_array2->area_efficiency << " %" << endl; + cout << "\t\tMAT Height (mm): " << + fr->tag_array2->mat_height*1e-3 << endl; + cout << "\t\tMAT Length (mm): " << + fr->tag_array2->mat_length*1e-3 << endl; + cout << "\t\tSubarray Height (mm): " << + fr->tag_array2->subarray_height*1e-3 << endl; + cout << "\t\tSubarray Length (mm): " << + fr->tag_array2->subarray_length*1e-3 << endl; + } + } + Wire wpr; + wpr.print_wire(); } - Wire wpr; - wpr.print_wire(); - - //cout << "FO4 = " << g_tp.FO4 << endl; - } } //McPAT's plain interface, please keep !!! -uca_org_t cacti_interface(InputParameter * const local_interface) -{ -// g_ip = new InputParameter(); - //g_ip->add_ecc_b_ = true; - - uca_org_t fin_res; - fin_res.valid = false; - - g_ip = local_interface; - - -// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; -// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; -// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; -// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; -// -// g_ip->ic_proj_type = interconnect_projection_type_in; -// g_ip->wire_is_mat_type = wire_inside_mat_type_in; -// g_ip->wire_os_mat_type = wire_outside_mat_type_in; -// g_ip->burst_len = BURST_LENGTH_in; -// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; -// g_ip->page_sz_bits = PAGE_SIZE_BITS_in; -// -// g_ip->cache_sz = cache_size; -// g_ip->line_sz = line_size; -// g_ip->assoc = associativity; -// g_ip->nbanks = banks; -// g_ip->out_w = output_width; -// g_ip->specific_tag = specific_tag; -// if (tag_width == 0) { -// g_ip->tag_w = 42; -// } -// else { -// g_ip->tag_w = tag_width; -// } -// -// g_ip->access_mode = access_mode; -// g_ip->delay_wt = obj_func_delay; -// g_ip->dynamic_power_wt = obj_func_dynamic_power; -// g_ip->leakage_power_wt = obj_func_leakage_power; -// g_ip->area_wt = obj_func_area; -// g_ip->cycle_time_wt = obj_func_cycle_time; -// g_ip->delay_dev = dev_func_delay; -// g_ip->dynamic_power_dev = dev_func_dynamic_power; -// g_ip->leakage_power_dev = dev_func_leakage_power; -// g_ip->area_dev = dev_func_area; -// g_ip->cycle_time_dev = dev_func_cycle_time; -// g_ip->temp = temp; -// -// g_ip->F_sz_nm = tech_node; -// g_ip->F_sz_um = tech_node / 1000; -// g_ip->is_main_mem = (main_mem != 0) ? true : false; -// g_ip->is_cache = (cache ==1) ? true : false; -// g_ip->pure_ram = (cache ==0) ? true : false; -// g_ip->pure_cam = (cache ==2) ? true : false; -// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; -// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; -// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; -// -// g_ip->num_rw_ports = rw_ports; -// g_ip->num_rd_ports = excl_read_ports; -// g_ip->num_wr_ports = excl_write_ports; -// g_ip->num_se_rd_ports = single_ended_read_ports; -// g_ip->num_search_ports = search_ports; -// -// g_ip->print_detail = 1; -// g_ip->nuca = 0; -// g_ip->is_cache=true; -// -// if (force_wiretype == 0) -// { -// g_ip->wt = Global; -// g_ip->force_wiretype = false; -// } -// else -// { g_ip->force_wiretype = true; -// if (wiretype==10) { -// g_ip->wt = Global_10; -// } -// if (wiretype==20) { -// g_ip->wt = Global_20; -// } -// if (wiretype==30) { -// g_ip->wt = Global_30; -// } -// if (wiretype==5) { -// g_ip->wt = Global_5; -// } -// if (wiretype==0) { -// g_ip->wt = Low_swing; -// } -// } -// //g_ip->wt = Global_5; -// if (force_config == 0) -// { -// g_ip->force_cache_config = false; -// } -// else -// { -// g_ip->force_cache_config = true; -// g_ip->ndbl=ndbl; -// g_ip->ndwl=ndwl; -// g_ip->nspd=nspd; -// g_ip->ndcm=ndcm; -// g_ip->ndsam1=ndsam1; -// g_ip->ndsam2=ndsam2; -// -// -// } -// -// if (ecc==0){ -// g_ip->add_ecc_b_=false; -// } -// else -// { -// g_ip->add_ecc_b_=true; -// } - +uca_org_t cacti_interface(InputParameter * const local_interface) { + uca_org_t fin_res; + fin_res.valid = false; - g_ip->error_checking(); - - - init_tech_params(g_ip->F_sz_um, false); - Wire winit; // Do not delete this line. It initializes wires. + g_ip = local_interface; - solve(&fin_res); + if (!g_ip->error_checking()) { + exit(0); + } -// g_ip->display_ip(); -// output_UCA(&fin_res); -// output_data_csv(fin_res); + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. - // delete (g_ip); + solve(&fin_res); - return fin_res; + return fin_res; } //McPAT's plain interface, please keep !!! -uca_org_t init_interface(InputParameter* const local_interface) -{ - // g_ip = new InputParameter(); - //g_ip->add_ecc_b_ = true; - - uca_org_t fin_res; - fin_res.valid = false; - - g_ip = local_interface; +uca_org_t init_interface(InputParameter* const local_interface, + const string &name) { + uca_org_t fin_res; + fin_res.valid = false; + g_ip = local_interface; -// g_ip->data_arr_ram_cell_tech_type = data_arr_ram_cell_tech_flavor_in; -// g_ip->data_arr_peri_global_tech_type = data_arr_peri_global_tech_flavor_in; -// g_ip->tag_arr_ram_cell_tech_type = tag_arr_ram_cell_tech_flavor_in; -// g_ip->tag_arr_peri_global_tech_type = tag_arr_peri_global_tech_flavor_in; -// -// g_ip->ic_proj_type = interconnect_projection_type_in; -// g_ip->wire_is_mat_type = wire_inside_mat_type_in; -// g_ip->wire_os_mat_type = wire_outside_mat_type_in; -// g_ip->burst_len = BURST_LENGTH_in; -// g_ip->int_prefetch_w = INTERNAL_PREFETCH_WIDTH_in; -// g_ip->page_sz_bits = PAGE_SIZE_BITS_in; -// -// g_ip->cache_sz = cache_size; -// g_ip->line_sz = line_size; -// g_ip->assoc = associativity; -// g_ip->nbanks = banks; -// g_ip->out_w = output_width; -// g_ip->specific_tag = specific_tag; -// if (tag_width == 0) { -// g_ip->tag_w = 42; -// } -// else { -// g_ip->tag_w = tag_width; -// } -// -// g_ip->access_mode = access_mode; -// g_ip->delay_wt = obj_func_delay; -// g_ip->dynamic_power_wt = obj_func_dynamic_power; -// g_ip->leakage_power_wt = obj_func_leakage_power; -// g_ip->area_wt = obj_func_area; -// g_ip->cycle_time_wt = obj_func_cycle_time; -// g_ip->delay_dev = dev_func_delay; -// g_ip->dynamic_power_dev = dev_func_dynamic_power; -// g_ip->leakage_power_dev = dev_func_leakage_power; -// g_ip->area_dev = dev_func_area; -// g_ip->cycle_time_dev = dev_func_cycle_time; -// g_ip->temp = temp; -// -// g_ip->F_sz_nm = tech_node; -// g_ip->F_sz_um = tech_node / 1000; -// g_ip->is_main_mem = (main_mem != 0) ? true : false; -// g_ip->is_cache = (cache ==1) ? true : false; -// g_ip->pure_ram = (cache ==0) ? true : false; -// g_ip->pure_cam = (cache ==2) ? true : false; -// g_ip->rpters_in_htree = (REPEATERS_IN_HTREE_SEGMENTS_in != 0) ? true : false; -// g_ip->ver_htree_wires_over_array = VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in; -// g_ip->broadcast_addr_din_over_ver_htrees = BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in; -// -// g_ip->num_rw_ports = rw_ports; -// g_ip->num_rd_ports = excl_read_ports; -// g_ip->num_wr_ports = excl_write_ports; -// g_ip->num_se_rd_ports = single_ended_read_ports; -// g_ip->num_search_ports = search_ports; -// -// g_ip->print_detail = 1; -// g_ip->nuca = 0; -// -// if (force_wiretype == 0) -// { -// g_ip->wt = Global; -// g_ip->force_wiretype = false; -// } -// else -// { g_ip->force_wiretype = true; -// if (wiretype==10) { -// g_ip->wt = Global_10; -// } -// if (wiretype==20) { -// g_ip->wt = Global_20; -// } -// if (wiretype==30) { -// g_ip->wt = Global_30; -// } -// if (wiretype==5) { -// g_ip->wt = Global_5; -// } -// if (wiretype==0) { -// g_ip->wt = Low_swing; -// } -// } -// //g_ip->wt = Global_5; -// if (force_config == 0) -// { -// g_ip->force_cache_config = false; -// } -// else -// { -// g_ip->force_cache_config = true; -// g_ip->ndbl=ndbl; -// g_ip->ndwl=ndwl; -// g_ip->nspd=nspd; -// g_ip->ndcm=ndcm; -// g_ip->ndsam1=ndsam1; -// g_ip->ndsam2=ndsam2; -// -// -// } -// -// if (ecc==0){ -// g_ip->add_ecc_b_=false; -// } -// else -// { -// g_ip->add_ecc_b_=true; -// } - - - g_ip->error_checking(); - - init_tech_params(g_ip->F_sz_um, false); - Wire winit; // Do not delete this line. It initializes wires. - //solve(&fin_res); - //g_ip->display_ip(); - - //solve(&fin_res); - //output_UCA(&fin_res); - //output_data_csv(fin_res); - // delete (g_ip); + if (!g_ip->error_checking(name)) { + exit(0); + } - return fin_res; + init_tech_params(g_ip->F_sz_um, false); + Wire winit; // Do not delete this line. It initializes wires. + return fin_res; } void reconfigure(InputParameter *local_interface, uca_org_t *fin_res) diff --git a/ext/mcpat/cacti/mat.cc b/ext/mcpat/cacti/mat.cc old mode 100755 new mode 100644 index ef98107c7..447996053 --- a/ext/mcpat/cacti/mat.cc +++ b/ext/mcpat/cacti/mat.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -36,371 +37,369 @@ #include "mat.h" Mat::Mat(const DynamicParameter & dyn_p) - :dp(dyn_p), - power_subarray_out_drv(), - delay_fa_tag(0), delay_cam(0), - delay_before_decoder(0), delay_bitline(0), - delay_wl_reset(0), delay_bl_restore(0), - delay_searchline(0), delay_matchchline(0), - delay_cam_sl_restore(0), delay_cam_ml_reset(0), - delay_fa_ram_wl(0),delay_hit_miss_reset(0), - delay_hit_miss(0), - subarray(dp, dp.fully_assoc), - power_bitline(), per_bitline_read_energy(0), - deg_bl_muxing(dp.deg_bl_muxing), - num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir), - delay_writeback(0), - cell(subarray.cell), cam_cell(subarray.cam_cell), - is_dram(dyn_p.is_dram), - pure_cam(dyn_p.pure_cam), - num_mats(dp.num_mats), - power_sa(), delay_sa(0), - leak_power_sense_amps_closed_page_state(0), - leak_power_sense_amps_open_page_state(0), - delay_subarray_out_drv(0), - delay_comparator(0), power_comparator(), - num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat), - num_subarrays_per_mat(dp.num_subarrays/dp.num_mats), - num_subarrays_per_row(dp.Ndwl/dp.num_mats_h_dir) -{ - assert(num_subarrays_per_mat <= 4); - assert(num_subarrays_per_row <= 2); - is_fa = (dp.fully_assoc) ? true : false; - camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them. - - if (is_fa || pure_cam) - num_subarrays_per_row = num_subarrays_per_mat>2?num_subarrays_per_mat/2:num_subarrays_per_mat; - - if (dp.use_inp_params == 1) { - RWP = dp.num_rw_ports; - ERP = dp.num_rd_ports; - EWP = dp.num_wr_ports; - SCHP = dp.num_search_ports; - } - else { - RWP = g_ip->num_rw_ports; - ERP = g_ip->num_rd_ports; - EWP = g_ip->num_wr_ports; - SCHP = g_ip->num_search_ports; - - } - - double number_sa_subarray; - - if (!is_fa && !pure_cam) - { - number_sa_subarray = subarray.num_cols / deg_bl_muxing; - } - else if (is_fa && !pure_cam) - { - number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing; - } - - else - { - number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing; - } - - int num_dec_signals = subarray.num_rows; - double C_ld_bit_mux_dec_out = 0; - double C_ld_sa_mux_lev_1_dec_out = 0; - double C_ld_sa_mux_lev_2_dec_out = 0; - double R_wire_wl_drv_out; - - if (!is_fa && !pure_cam) - { - R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um; + : dp(dyn_p), + power_subarray_out_drv(), + delay_fa_tag(0), delay_cam(0), + delay_before_decoder(0), delay_bitline(0), + delay_wl_reset(0), delay_bl_restore(0), + delay_searchline(0), delay_matchchline(0), + delay_cam_sl_restore(0), delay_cam_ml_reset(0), + delay_fa_ram_wl(0), delay_hit_miss_reset(0), + delay_hit_miss(0), + subarray(dp, dp.fully_assoc), + power_bitline(), per_bitline_read_energy(0), + deg_bl_muxing(dp.deg_bl_muxing), + num_act_mats_hor_dir(dyn_p.num_act_mats_hor_dir), + delay_writeback(0), + cell(subarray.cell), cam_cell(subarray.cam_cell), + is_dram(dyn_p.is_dram), + pure_cam(dyn_p.pure_cam), + num_mats(dp.num_mats), + power_sa(), delay_sa(0), + leak_power_sense_amps_closed_page_state(0), + leak_power_sense_amps_open_page_state(0), + delay_subarray_out_drv(0), + delay_comparator(0), power_comparator(), + num_do_b_mat(dyn_p.num_do_b_mat), num_so_b_mat(dyn_p.num_so_b_mat), + num_subarrays_per_mat(dp.num_subarrays / dp.num_mats), + num_subarrays_per_row(dp.Ndwl / dp.num_mats_h_dir) { + assert(num_subarrays_per_mat <= 4); + assert(num_subarrays_per_row <= 2); + is_fa = (dp.fully_assoc) ? true : false; + camFlag = (is_fa || pure_cam);//although cam_cell.w = cell.w for fa, we still differentiate them. + + if (is_fa || pure_cam) { + num_subarrays_per_row = num_subarrays_per_mat > 2 ? + num_subarrays_per_mat / 2 : num_subarrays_per_mat; } - else if (is_fa && !pure_cam) - { - R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ; + + if (dp.use_inp_params == 1) { + RWP = dp.num_rw_ports; + ERP = dp.num_rd_ports; + EWP = dp.num_wr_ports; + SCHP = dp.num_search_ports; + } else { + RWP = g_ip->num_rw_ports; + ERP = g_ip->num_rd_ports; + EWP = g_ip->num_wr_ports; + SCHP = g_ip->num_search_ports; + + } + + double number_sa_subarray; + + if (!is_fa && !pure_cam) { + number_sa_subarray = subarray.num_cols / deg_bl_muxing; + } else if (is_fa && !pure_cam) { + number_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram) / deg_bl_muxing; + } + + else { + number_sa_subarray = (subarray.num_cols_fa_cam) / deg_bl_muxing; } - else - { + + int num_dec_signals = subarray.num_rows; + double C_ld_bit_mux_dec_out = 0; + double C_ld_sa_mux_lev_1_dec_out = 0; + double C_ld_sa_mux_lev_2_dec_out = 0; + double R_wire_wl_drv_out; + + if (!is_fa && !pure_cam) { + R_wire_wl_drv_out = subarray.num_cols * cell.w * g_tp.wire_local.R_per_um; + } else if (is_fa && !pure_cam) { + R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w + subarray.num_cols_fa_ram * cell.w) * g_tp.wire_local.R_per_um ; + } else { R_wire_wl_drv_out = (subarray.num_cols_fa_cam * cam_cell.w ) * g_tp.wire_local.R_per_um; } - double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA - double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w; - - if (deg_bl_muxing > 1) - { - C_ld_bit_mux_dec_out = - (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing)*gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell - num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); - } - - if (dp.Ndsam_lev_1 > 1) - { - C_ld_sa_mux_lev_1_dec_out = - (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1)*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + - num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); - } - if (dp.Ndsam_lev_2 > 1) - { - C_ld_sa_mux_lev_2_dec_out = - (num_subarrays_per_mat * number_sa_subarray / (dp.Ndsam_lev_1*dp.Ndsam_lev_2))*gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + - num_subarrays_per_row * subarray.num_cols*g_tp.wire_inside_mat.C_per_um*cell.get_w(); - } - - if (num_subarrays_per_row >= 2) - { - // wire heads for both right and left side of a mat, so half the resistance - R_wire_bit_mux_dec_out /= 2.0; - R_wire_sa_mux_dec_out /= 2.0; - } - - - row_dec = new Decoder( - num_dec_signals, - false, - subarray.C_wl, - R_wire_wl_drv_out, - false/*is_fa*/, - is_dram, - true, - camFlag? cam_cell:cell); + double R_wire_bit_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w;//TODO:revisit for FA + double R_wire_sa_mux_dec_out = num_subarrays_per_row * subarray.num_cols * g_tp.wire_inside_mat.R_per_um * cell.w; + + if (deg_bl_muxing > 1) { + C_ld_bit_mux_dec_out = + (2 * num_subarrays_per_mat * subarray.num_cols / deg_bl_muxing) * + gate_C(g_tp.w_nmos_b_mux, 0, is_dram) + // 2 transistor per cell + num_subarrays_per_row * subarray.num_cols * + g_tp.wire_inside_mat.C_per_um * cell.get_w(); + } + + if (dp.Ndsam_lev_1 > 1) { + C_ld_sa_mux_lev_1_dec_out = + (num_subarrays_per_mat * number_sa_subarray / dp.Ndsam_lev_1) * + gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + + num_subarrays_per_row * subarray.num_cols * + g_tp.wire_inside_mat.C_per_um * cell.get_w(); + } + if (dp.Ndsam_lev_2 > 1) { + C_ld_sa_mux_lev_2_dec_out = + (num_subarrays_per_mat * number_sa_subarray / + (dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) * + gate_C(g_tp.w_nmos_sa_mux, 0, is_dram) + + num_subarrays_per_row * subarray.num_cols * + g_tp.wire_inside_mat.C_per_um * cell.get_w(); + } + + if (num_subarrays_per_row >= 2) { + // wire heads for both right and left side of a mat, so half the resistance + R_wire_bit_mux_dec_out /= 2.0; + R_wire_sa_mux_dec_out /= 2.0; + } + + + row_dec = new Decoder( + num_dec_signals, + false, + subarray.C_wl, + R_wire_wl_drv_out, + false/*is_fa*/, + is_dram, + true, + camFlag ? cam_cell : cell); // if (is_fa && (!dp.is_tag)) // { // row_dec->exist = true; // } - bit_mux_dec = new Decoder( - deg_bl_muxing,// This number is 1 for FA or CAM - false, - C_ld_bit_mux_dec_out, - R_wire_bit_mux_dec_out, - false/*is_fa*/, - is_dram, - false, - camFlag? cam_cell:cell); - sa_mux_lev_1_dec = new Decoder( - dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM - dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal - C_ld_sa_mux_lev_1_dec_out, - R_wire_sa_mux_dec_out, - false/*is_fa*/, - is_dram, - false, - camFlag? cam_cell:cell); - sa_mux_lev_2_dec = new Decoder( - dp.Ndsam_lev_2, // This number is 1 for FA or CAM - false, - C_ld_sa_mux_lev_2_dec_out, - R_wire_sa_mux_dec_out, - false/*is_fa*/, - is_dram, - false, - camFlag? cam_cell:cell); - - double C_wire_predec_blk_out; - double R_wire_predec_blk_out; - - if (!is_fa && !pure_cam) - { + bit_mux_dec = new Decoder( + deg_bl_muxing,// This number is 1 for FA or CAM + false, + C_ld_bit_mux_dec_out, + R_wire_bit_mux_dec_out, + false/*is_fa*/, + is_dram, + false, + camFlag ? cam_cell : cell); + sa_mux_lev_1_dec = new Decoder( + dp.deg_senseamp_muxing_non_associativity, // This number is 1 for FA or CAM + dp.number_way_select_signals_mat ? true : false,//only sa_mux_lev_1_dec needs way select signal + C_ld_sa_mux_lev_1_dec_out, + R_wire_sa_mux_dec_out, + false/*is_fa*/, + is_dram, + false, + camFlag ? cam_cell : cell); + sa_mux_lev_2_dec = new Decoder( + dp.Ndsam_lev_2, // This number is 1 for FA or CAM + false, + C_ld_sa_mux_lev_2_dec_out, + R_wire_sa_mux_dec_out, + false/*is_fa*/, + is_dram, + false, + camFlag ? cam_cell : cell); + + double C_wire_predec_blk_out; + double R_wire_predec_blk_out; + + if (!is_fa && !pure_cam) { + + C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h; + R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h; + + } else { //for pre-decode block's load is same for both FA and CAM + C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h; + R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h; + } - C_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cell.h; - R_wire_predec_blk_out = num_subarrays_per_row * subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cell.h; - } - else //for pre-decode block's load is same for both FA and CAM - { - C_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.C_per_um * cam_cell.h; - R_wire_predec_blk_out = subarray.num_rows * g_tp.wire_inside_mat.R_per_um * cam_cell.h; - } - - - if (is_fa||pure_cam) - num_dec_signals += _log2(num_subarrays_per_mat); - - PredecBlk * r_predec_blk1 = new PredecBlk( - num_dec_signals, - row_dec, - C_wire_predec_blk_out, - R_wire_predec_blk_out, - num_subarrays_per_mat, - is_dram, - true); - PredecBlk * r_predec_blk2 = new PredecBlk( - num_dec_signals, - row_dec, - C_wire_predec_blk_out, - R_wire_predec_blk_out, - num_subarrays_per_mat, - is_dram, - false); - PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true); - PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false); - PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true); - PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false); - PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true); - PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false); - dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true); - dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false); - - PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram); - PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram); - PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram); - PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram); - PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram); - PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram); - PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram); - PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram); - way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram); - dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram); - - r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2); - b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2); - sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2); - sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2); - - subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng - - double driver_c_gate_load; - double driver_c_wire_load; - double driver_r_wire_load; - - if (is_fa || pure_cam) - - { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same - driver_c_gate_load = (subarray.num_cols_fa_cam )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; - cam_bl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - - if (!pure_cam) - { - //This is only used for fully asso not pure CAM - driver_c_gate_load = (subarray.num_cols_fa_ram )* gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * g_tp.wire_outside_mat.R_per_um; - bl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - } - } - - else - { - driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um; - bl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - } - double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP); - double w_row_decoder = area_row_decoder / subarray.area.get_h(); - - double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux = - compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); - - double h_subarray_out_drv = subarray_out_wire->area.get_area() * - (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w(); - - - h_subarray_out_drv *= (RWP + ERP + SCHP); - - double h_comparators = 0.0; - double w_row_predecode_output_wires = 0.0; - double h_bit_mux_dec_out_wires = 0.0; - double h_senseamp_mux_dec_out_wires = 0.0; - - if ((!is_fa)&&(dp.is_tag)) - { - //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat; - h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w()); - h_comparators *= (RWP + ERP); - } + if (is_fa || pure_cam) + num_dec_signals += _log2(num_subarrays_per_mat); + + PredecBlk * r_predec_blk1 = new PredecBlk( + num_dec_signals, + row_dec, + C_wire_predec_blk_out, + R_wire_predec_blk_out, + num_subarrays_per_mat, + is_dram, + true); + PredecBlk * r_predec_blk2 = new PredecBlk( + num_dec_signals, + row_dec, + C_wire_predec_blk_out, + R_wire_predec_blk_out, + num_subarrays_per_mat, + is_dram, + false); + PredecBlk * b_mux_predec_blk1 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, true); + PredecBlk * b_mux_predec_blk2 = new PredecBlk(deg_bl_muxing, bit_mux_dec, 0, 0, 1, is_dram, false); + PredecBlk * sa_mux_lev_1_predec_blk1 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, true); + PredecBlk * sa_mux_lev_1_predec_blk2 = new PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, sa_mux_lev_1_dec, 0, 0, 1, is_dram, false); + PredecBlk * sa_mux_lev_2_predec_blk1 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, true); + PredecBlk * sa_mux_lev_2_predec_blk2 = new PredecBlk(dp.Ndsam_lev_2, sa_mux_lev_2_dec, 0, 0, 1, is_dram, false); + dummy_way_sel_predec_blk1 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, true); + dummy_way_sel_predec_blk2 = new PredecBlk(1, sa_mux_lev_1_dec, 0, 0, 0, is_dram, false); + + PredecBlkDrv * r_predec_blk_drv1 = new PredecBlkDrv(0, r_predec_blk1, is_dram); + PredecBlkDrv * r_predec_blk_drv2 = new PredecBlkDrv(0, r_predec_blk2, is_dram); + PredecBlkDrv * b_mux_predec_blk_drv1 = new PredecBlkDrv(0, b_mux_predec_blk1, is_dram); + PredecBlkDrv * b_mux_predec_blk_drv2 = new PredecBlkDrv(0, b_mux_predec_blk2, is_dram); + PredecBlkDrv * sa_mux_lev_1_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk1, is_dram); + PredecBlkDrv * sa_mux_lev_1_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_1_predec_blk2, is_dram); + PredecBlkDrv * sa_mux_lev_2_predec_blk_drv1 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk1, is_dram); + PredecBlkDrv * sa_mux_lev_2_predec_blk_drv2 = new PredecBlkDrv(0, sa_mux_lev_2_predec_blk2, is_dram); + way_sel_drv1 = new PredecBlkDrv(dyn_p.number_way_select_signals_mat, dummy_way_sel_predec_blk1, is_dram); + dummy_way_sel_predec_blk_drv2 = new PredecBlkDrv(1, dummy_way_sel_predec_blk2, is_dram); + + r_predec = new Predec(r_predec_blk_drv1, r_predec_blk_drv2); + b_mux_predec = new Predec(b_mux_predec_blk_drv1, b_mux_predec_blk_drv2); + sa_mux_lev_1_predec = new Predec(sa_mux_lev_1_predec_blk_drv1, sa_mux_lev_1_predec_blk_drv2); + sa_mux_lev_2_predec = new Predec(sa_mux_lev_2_predec_blk_drv1, sa_mux_lev_2_predec_blk_drv2); + + subarray_out_wire = new Wire(g_ip->wt, subarray.area.h);//Bug should be subarray.area.w Owen and Sheng + + double driver_c_gate_load; + double driver_c_wire_load; + double driver_r_wire_load; + + if (is_fa || pure_cam) + + { //Although CAM and RAM use different bl pre-charge driver, assuming the precharge p size is the same + driver_c_gate_load = (subarray.num_cols_fa_cam ) * + gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, + is_dram, false, false); + driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * + g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * + g_tp.wire_outside_mat.R_per_um; + cam_bl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + if (!pure_cam) { + //This is only used for fully asso not pure CAM + driver_c_gate_load = (subarray.num_cols_fa_ram ) * + gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, + is_dram, false, false); + driver_c_wire_load = subarray.num_cols_fa_ram * cell.w * + g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols_fa_ram * cell.w * + g_tp.wire_outside_mat.R_per_um; + bl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + } + } + + else { + driver_c_gate_load = subarray.num_cols * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); + driver_c_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols * cell.w * g_tp.wire_outside_mat.R_per_um; + bl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + } + double area_row_decoder = row_dec->area.get_area() * subarray.num_rows * (RWP + ERP + EWP); + double w_row_decoder = area_row_decoder / subarray.area.get_h(); + + double h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux = + compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); + + double h_subarray_out_drv = subarray_out_wire->area.get_area() * + (subarray.num_cols / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / subarray.area.get_w(); + + + h_subarray_out_drv *= (RWP + ERP + SCHP); + + double h_comparators = 0.0; + double w_row_predecode_output_wires = 0.0; + double h_bit_mux_dec_out_wires = 0.0; + double h_senseamp_mux_dec_out_wires = 0.0; + + if ((!is_fa) && (dp.is_tag)) { + //tagbits = (4 * num_cols_subarray / (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2)) / num_do_b_mat; + h_comparators = compute_comparators_height(dp.tagbits, dyn_p.num_do_b_mat, subarray.area.get_w()); + h_comparators *= (RWP + ERP); + } int branch_effort_predec_blk1_out = (1 << r_predec_blk2->number_input_addr_bits); int branch_effort_predec_blk2_out = (1 << r_predec_blk1->number_input_addr_bits); w_row_predecode_output_wires = (branch_effort_predec_blk1_out + branch_effort_predec_blk2_out) * - g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); - - - double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) * - (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + - h_subarray_out_drv + h_comparators); - - double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder); - - if (deg_bl_muxing > 1) - { - h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); - } - if (dp.Ndsam_lev_1 > 1) - { - h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP); - } - if (dp.Ndsam_lev_2 > 1) - { - h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP); - } - - double h_addr_datain_wires; - if (!g_ip->ver_htree_wires_over_array) - { - h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + - (dp.num_di_b_mat + dp.num_do_b_mat)/num_subarrays_per_row) * - g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); + g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); + + + double h_non_cell_area = (num_subarrays_per_mat / num_subarrays_per_row) * + (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + + h_subarray_out_drv + h_comparators); + + double w_non_cell_area = MAX(w_row_predecode_output_wires, num_subarrays_per_row * w_row_decoder); + + if (deg_bl_muxing > 1) { + h_bit_mux_dec_out_wires = deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); + } + if (dp.Ndsam_lev_1 > 1) { + h_senseamp_mux_dec_out_wires = dp.Ndsam_lev_1 * g_tp.wire_inside_mat.pitch * (RWP + ERP); + } + if (dp.Ndsam_lev_2 > 1) { + h_senseamp_mux_dec_out_wires += dp.Ndsam_lev_2 * g_tp.wire_inside_mat.pitch * (RWP + ERP); + } + + double h_addr_datain_wires; + if (!g_ip->ver_htree_wires_over_array) { + h_addr_datain_wires = (dp.number_addr_bits_mat + + dp.number_way_select_signals_mat + + (dp.num_di_b_mat + dp.num_do_b_mat) / + num_subarrays_per_row) * + g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP); + + if (is_fa || pure_cam) { + h_addr_datain_wires = + (dp.number_addr_bits_mat + + dp.number_way_select_signals_mat + //TODO: revisit + (dp.num_di_b_mat + dp.num_do_b_mat ) / num_subarrays_per_row) * + g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) + + (dp.num_si_b_mat + dp.num_so_b_mat ) / num_subarrays_per_row * + g_tp.wire_inside_mat.pitch * SCHP; + } + //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux + + //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv); + h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators + + h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) + + h_addr_datain_wires + + h_bit_mux_dec_out_wires + + h_senseamp_mux_dec_out_wires; - if (is_fa || pure_cam) - { - h_addr_datain_wires = (dp.number_addr_bits_mat + dp.number_way_select_signals_mat + //TODO: revisit - (dp.num_di_b_mat+ dp.num_do_b_mat )/num_subarrays_per_row) * - g_tp.wire_inside_mat.pitch * (RWP + ERP + EWP) + - (dp.num_si_b_mat + dp.num_so_b_mat )/num_subarrays_per_row * g_tp.wire_inside_mat.pitch * SCHP; } - //h_non_cell_area = 2 * h_bit_mux_sense_amp_precharge_sa_mux + - //MAX(h_addr_datain_wires, 2 * h_subarray_out_drv); - h_non_cell_area = (h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux + h_comparators + - h_subarray_out_drv) * (num_subarrays_per_mat / num_subarrays_per_row) + - h_addr_datain_wires + - h_bit_mux_dec_out_wires + - h_senseamp_mux_dec_out_wires; - - } - - // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area; - double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() + - b_mux_predec_blk_drv1->area.get_area() + - sa_mux_lev_1_predec_blk_drv1->area.get_area() + - sa_mux_lev_2_predec_blk_drv1->area.get_area() + - way_sel_drv1->area.get_area() + - r_predec_blk_drv2->area.get_area() + - b_mux_predec_blk_drv2->area.get_area() + - sa_mux_lev_1_predec_blk_drv2->area.get_area() + - sa_mux_lev_2_predec_blk_drv2->area.get_area() + - r_predec_blk1->area.get_area() + - b_mux_predec_blk1->area.get_area() + - sa_mux_lev_1_predec_blk1->area.get_area() + - sa_mux_lev_2_predec_blk1->area.get_area() + - r_predec_blk2->area.get_area() + - b_mux_predec_blk2->area.get_area() + - sa_mux_lev_1_predec_blk2->area.get_area() + - sa_mux_lev_2_predec_blk2->area.get_area() + - bit_mux_dec->area.get_area() + - sa_mux_lev_1_dec->area.get_area() + - sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP); - - double area_efficiency_mat; + + // double area_rectangle_center_mat = h_non_cell_area * w_non_cell_area; + double area_mat_center_circuitry = (r_predec_blk_drv1->area.get_area() + + b_mux_predec_blk_drv1->area.get_area() + + sa_mux_lev_1_predec_blk_drv1->area.get_area() + + sa_mux_lev_2_predec_blk_drv1->area.get_area() + + way_sel_drv1->area.get_area() + + r_predec_blk_drv2->area.get_area() + + b_mux_predec_blk_drv2->area.get_area() + + sa_mux_lev_1_predec_blk_drv2->area.get_area() + + sa_mux_lev_2_predec_blk_drv2->area.get_area() + + r_predec_blk1->area.get_area() + + b_mux_predec_blk1->area.get_area() + + sa_mux_lev_1_predec_blk1->area.get_area() + + sa_mux_lev_2_predec_blk1->area.get_area() + + r_predec_blk2->area.get_area() + + b_mux_predec_blk2->area.get_area() + + sa_mux_lev_1_predec_blk2->area.get_area() + + sa_mux_lev_2_predec_blk2->area.get_area() + + bit_mux_dec->area.get_area() + + sa_mux_lev_1_dec->area.get_area() + + sa_mux_lev_2_dec->area.get_area()) * (RWP + ERP + EWP); + + double area_efficiency_mat; // if (!is_fa) // { - assert(num_subarrays_per_mat/num_subarrays_per_row>0); - area.h = (num_subarrays_per_mat/num_subarrays_per_row)* subarray.area.h + h_non_cell_area; + assert(num_subarrays_per_mat / num_subarrays_per_row > 0); + area.h = (num_subarrays_per_mat / num_subarrays_per_row) * + subarray.area.h + h_non_cell_area; area.w = num_subarrays_per_row * subarray.area.get_w() + w_non_cell_area; - area.w = (area.h*area.w + area_mat_center_circuitry) / area.h; - area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * 100.0 / area.get_area(); + area.w = (area.h * area.w + area_mat_center_circuitry) / area.h; + area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_mat * + 100.0 / area.get_area(); // cout<<"h_bit_mux_sense_amp_precharge_sa_mux_write_driver_write_mux"<0); - assert(area.w>0); + assert(area.h > 0); + assert(area.w > 0); // } // else // { @@ -423,583 +422,609 @@ Mat::Mat(const DynamicParameter & dyn_p) // area.w = (area.h*area.w + area_mat_center_circuitry) / area.h; // area_efficiency_mat = subarray.area.get_area() * num_subarrays_per_row * 100.0 / area.get_area(); // } - } - - - -Mat::~Mat() -{ - delete row_dec; - delete bit_mux_dec; - delete sa_mux_lev_1_dec; - delete sa_mux_lev_2_dec; - - delete r_predec->blk1; - delete r_predec->blk2; - delete b_mux_predec->blk1; - delete b_mux_predec->blk2; - delete sa_mux_lev_1_predec->blk1; - delete sa_mux_lev_1_predec->blk2; - delete sa_mux_lev_2_predec->blk1; - delete sa_mux_lev_2_predec->blk2; - delete dummy_way_sel_predec_blk1; - delete dummy_way_sel_predec_blk2; - - delete r_predec->drv1; - delete r_predec->drv2; - delete b_mux_predec->drv1; - delete b_mux_predec->drv2; - delete sa_mux_lev_1_predec->drv1; - delete sa_mux_lev_1_predec->drv2; - delete sa_mux_lev_2_predec->drv1; - delete sa_mux_lev_2_predec->drv2; - delete way_sel_drv1; - delete dummy_way_sel_predec_blk_drv2; - - delete r_predec; - delete b_mux_predec; - delete sa_mux_lev_1_predec; - delete sa_mux_lev_2_predec; - - delete subarray_out_wire; - if (!pure_cam) - delete bl_precharge_eq_drv; - - if (is_fa || pure_cam) - { - delete sl_precharge_eq_drv ; - delete sl_data_drv ; - delete cam_bl_precharge_eq_drv; - delete ml_precharge_drv; - delete ml_to_ram_wl_drv; - } } -double Mat::compute_delays(double inrisetime) -{ - int k; - double rd, C_intrinsic, C_ld, tf, R_bl_precharge,r_b_metal, R_bl, C_bl; - double outrisetime_search, outrisetime, row_dec_outrisetime; - // delay calculation for tags of fully associative cache - if (is_fa || pure_cam) - { - //Compute search access time - outrisetime_search = compute_cam_delay(inrisetime); - if (is_fa) - { - bl_precharge_eq_drv->compute_delay(0); - k = ml_to_ram_wl_drv->number_gates - 1; - rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true); - C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) + - drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true); - C_ld = ml_to_ram_wl_drv->c_gate_load+ ml_to_ram_wl_drv->c_wire_load; - tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2; - delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); - - R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); - r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in - R_bl = subarray.num_rows * r_b_metal; - C_bl = subarray.C_bl; - delay_bl_restore = bl_precharge_eq_drv->delay + - log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))* - (R_bl_precharge * C_bl + R_bl * C_bl / 2); - - - outrisetime_search = compute_bitline_delay(outrisetime_search); - outrisetime_search = compute_sa_delay(outrisetime_search); - } - outrisetime_search = compute_subarray_out_drv(outrisetime_search); - subarray_out_wire->set_in_rise_time(outrisetime_search); - outrisetime_search = subarray_out_wire->signal_rise_time(); - delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; - - - //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited. - outrisetime = r_predec->compute_delays(inrisetime); - row_dec_outrisetime = row_dec->compute_delays(outrisetime); - - outrisetime = b_mux_predec->compute_delays(inrisetime); - bit_mux_dec->compute_delays(outrisetime); - - outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); - sa_mux_lev_1_dec->compute_delays(outrisetime); - - outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); - sa_mux_lev_2_dec->compute_delays(outrisetime); - - if (pure_cam) - { - outrisetime = compute_bitline_delay(row_dec_outrisetime); - outrisetime = compute_sa_delay(outrisetime); - } - return outrisetime_search; +Mat::~Mat() { + delete row_dec; + delete bit_mux_dec; + delete sa_mux_lev_1_dec; + delete sa_mux_lev_2_dec; + + delete r_predec->blk1; + delete r_predec->blk2; + delete b_mux_predec->blk1; + delete b_mux_predec->blk2; + delete sa_mux_lev_1_predec->blk1; + delete sa_mux_lev_1_predec->blk2; + delete sa_mux_lev_2_predec->blk1; + delete sa_mux_lev_2_predec->blk2; + delete dummy_way_sel_predec_blk1; + delete dummy_way_sel_predec_blk2; + + delete r_predec->drv1; + delete r_predec->drv2; + delete b_mux_predec->drv1; + delete b_mux_predec->drv2; + delete sa_mux_lev_1_predec->drv1; + delete sa_mux_lev_1_predec->drv2; + delete sa_mux_lev_2_predec->drv1; + delete sa_mux_lev_2_predec->drv2; + delete way_sel_drv1; + delete dummy_way_sel_predec_blk_drv2; + + delete r_predec; + delete b_mux_predec; + delete sa_mux_lev_1_predec; + delete sa_mux_lev_2_predec; + + delete subarray_out_wire; + if (!pure_cam) + delete bl_precharge_eq_drv; + + if (is_fa || pure_cam) { + delete sl_precharge_eq_drv ; + delete sl_data_drv ; + delete cam_bl_precharge_eq_drv; + delete ml_precharge_drv; + delete ml_to_ram_wl_drv; + } +} + + + +double Mat::compute_delays(double inrisetime) { + int k; + double rd, C_intrinsic, C_ld, tf, R_bl_precharge, r_b_metal, R_bl, C_bl; + double outrisetime_search, outrisetime, row_dec_outrisetime; + // delay calculation for tags of fully associative cache + if (is_fa || pure_cam) { + //Compute search access time + outrisetime_search = compute_cam_delay(inrisetime); + if (is_fa) { + bl_precharge_eq_drv->compute_delay(0); + k = ml_to_ram_wl_drv->number_gates - 1; + rd = tr_R_on(ml_to_ram_wl_drv->width_n[k], NCH, 1, is_dram, false, true); + C_intrinsic = drain_C_(ml_to_ram_wl_drv->width_n[k], PCH, 1, 1, 4 * + cell.h, is_dram, false, true) + + drain_C_(ml_to_ram_wl_drv->width_n[k], NCH, 1, 1, 4 * cell.h, + is_dram, false, true); + C_ld = ml_to_ram_wl_drv->c_gate_load + + ml_to_ram_wl_drv->c_wire_load; + tf = rd * (C_intrinsic + C_ld) + ml_to_ram_wl_drv->r_wire_load * C_ld / 2; + delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); + + R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); + r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um;//dummy rows in sram are filled in + R_bl = subarray.num_rows * r_b_metal; + C_bl = subarray.C_bl; + delay_bl_restore = bl_precharge_eq_drv->delay + + log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / + (g_tp.sram.Vbitpre - dp.V_b_sense)) * + (R_bl_precharge * C_bl + R_bl * C_bl / 2); + + + outrisetime_search = compute_bitline_delay(outrisetime_search); + outrisetime_search = compute_sa_delay(outrisetime_search); + } + outrisetime_search = compute_subarray_out_drv(outrisetime_search); + subarray_out_wire->set_in_rise_time(outrisetime_search); + outrisetime_search = subarray_out_wire->signal_rise_time(); + delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; + + + //TODO: this is just for compute plain read/write energy for fa and cam, plain read/write access timing need to be revisited. + outrisetime = r_predec->compute_delays(inrisetime); + row_dec_outrisetime = row_dec->compute_delays(outrisetime); + + outrisetime = b_mux_predec->compute_delays(inrisetime); + bit_mux_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); + sa_mux_lev_1_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); + sa_mux_lev_2_dec->compute_delays(outrisetime); + + if (pure_cam) { + outrisetime = compute_bitline_delay(row_dec_outrisetime); + outrisetime = compute_sa_delay(outrisetime); + } + return outrisetime_search; + } else { + bl_precharge_eq_drv->compute_delay(0); + if (row_dec->exist == true) { + int k = row_dec->num_gates - 1; + double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true); + // TODO: this 4*cell.h number must be revisited + double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4 * + cell.h, is_dram, false, true) + + drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4 * cell.h, is_dram, + false, true); + double C_ld = row_dec->C_ld_dec_out; + double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2; + delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); + } + double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); + double r_b_metal = cell.h * g_tp.wire_local.R_per_um; + double R_bl = subarray.num_rows * r_b_metal; + double C_bl = subarray.C_bl; + + if (is_dram) { + delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2); + } else { + delay_bl_restore = bl_precharge_eq_drv->delay + + log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / + (g_tp.sram.Vbitpre - dp.V_b_sense)) * + (R_bl_precharge * C_bl + R_bl * C_bl / 2); + } + } + + + + outrisetime = r_predec->compute_delays(inrisetime); + row_dec_outrisetime = row_dec->compute_delays(outrisetime); + + outrisetime = b_mux_predec->compute_delays(inrisetime); + bit_mux_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); + sa_mux_lev_1_dec->compute_delays(outrisetime); + + outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); + sa_mux_lev_2_dec->compute_delays(outrisetime); + + outrisetime = compute_bitline_delay(row_dec_outrisetime); + outrisetime = compute_sa_delay(outrisetime); + outrisetime = compute_subarray_out_drv(outrisetime); + subarray_out_wire->set_in_rise_time(outrisetime); + outrisetime = subarray_out_wire->signal_rise_time(); + + delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; + + if (dp.is_tag == true && dp.fully_assoc == false) { + compute_comparator_delay(0); } - else - { - bl_precharge_eq_drv->compute_delay(0); - if (row_dec->exist == true) - { - int k = row_dec->num_gates - 1; - double rd = tr_R_on(row_dec->w_dec_n[k], NCH, 1, is_dram, false, true); - // TODO: this 4*cell.h number must be revisited - double C_intrinsic = drain_C_(row_dec->w_dec_p[k], PCH, 1, 1, 4*cell.h, is_dram, false, true) + - drain_C_(row_dec->w_dec_n[k], NCH, 1, 1, 4*cell.h, is_dram, false, true); - double C_ld = row_dec->C_ld_dec_out; - double tf = rd * (C_intrinsic + C_ld) + row_dec->R_wire_dec_out * C_ld / 2; - delay_wl_reset = horowitz(0, tf, 0.5, 0.5, RISE); - } - double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false); - double r_b_metal = cell.h * g_tp.wire_local.R_per_um; - double R_bl = subarray.num_rows * r_b_metal; - double C_bl = subarray.C_bl; - - if (is_dram) - { - delay_bl_restore = bl_precharge_eq_drv->delay + 2.3 * (R_bl_precharge * C_bl + R_bl * C_bl / 2); - } - else - { - delay_bl_restore = bl_precharge_eq_drv->delay + - log((g_tp.sram.Vbitpre - 0.1 * dp.V_b_sense) / (g_tp.sram.Vbitpre - dp.V_b_sense))* - (R_bl_precharge * C_bl + R_bl * C_bl / 2); - } - } - - - - outrisetime = r_predec->compute_delays(inrisetime); - row_dec_outrisetime = row_dec->compute_delays(outrisetime); - - outrisetime = b_mux_predec->compute_delays(inrisetime); - bit_mux_dec->compute_delays(outrisetime); - - outrisetime = sa_mux_lev_1_predec->compute_delays(inrisetime); - sa_mux_lev_1_dec->compute_delays(outrisetime); - - outrisetime = sa_mux_lev_2_predec->compute_delays(inrisetime); - sa_mux_lev_2_dec->compute_delays(outrisetime); - - outrisetime = compute_bitline_delay(row_dec_outrisetime); - outrisetime = compute_sa_delay(outrisetime); - outrisetime = compute_subarray_out_drv(outrisetime); - subarray_out_wire->set_in_rise_time(outrisetime); - outrisetime = subarray_out_wire->signal_rise_time(); - - delay_subarray_out_drv_htree = delay_subarray_out_drv + subarray_out_wire->delay; - - if (dp.is_tag == true && dp.fully_assoc == false) - { - compute_comparator_delay(0); - } - - if (row_dec->exist == false) - { - delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay); + + if (row_dec->exist == false) { + delay_wl_reset = MAX(r_predec->blk1->delay, r_predec->blk2->delay); } - return outrisetime; + return outrisetime; } -double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() -{ - - double height = compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, camFlag? cam_cell.w:cell.w / (2 *(RWP + ERP + SCHP))) + - compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, camFlag? cam_cell.w:cell.w / (RWP + ERP + SCHP)); // precharge circuitry - - if (deg_bl_muxing > 1) - { - height += compute_tr_width_after_folding(g_tp.w_nmos_b_mux, cell.w / (2 *(RWP + ERP))); // col mux tr height - // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height - } - - height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height - - if (dp.Ndsam_lev_1 > 1) - { - height += compute_tr_width_after_folding( - g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height - //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); - } - - if (dp.Ndsam_lev_2 > 1) - { - height += compute_tr_width_after_folding( - g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height - //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); - - // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux - height += 2 * compute_tr_width_after_folding( - pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); - height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); - } - - // TODO: this should be uncommented... - /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1) - { - //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP); - double width_write_driver_write_mux = width_write_driver_or_write_mux(); - double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux, - cell.w * - // deg_bl_muxing * - dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP)); - height += height_write_driver_write_mux; - }*/ - - return height; +double Mat::compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h() { + + double height = + compute_tr_width_after_folding(g_tp.w_pmos_bl_precharge, + camFlag ? cam_cell.w : + cell.w / (2 * (RWP + ERP + SCHP))) + + // precharge circuitry + compute_tr_width_after_folding(g_tp.w_pmos_bl_eq, + camFlag ? cam_cell.w : + cell.w / (RWP + ERP + SCHP)); + + if (deg_bl_muxing > 1) { + // col mux tr height + height += + compute_tr_width_after_folding(g_tp.w_nmos_b_mux, + cell.w / (2 * (RWP + ERP))); + // height += deg_bl_muxing * g_tp.wire_inside_mat.pitch * (RWP + ERP); // bit mux dec out wires height + } + + height += height_sense_amplifier(/*camFlag? sram_cell.w:*/cell.w * deg_bl_muxing / (RWP + ERP)); // sense_amp_height + + if (dp.Ndsam_lev_1 > 1) { + height += compute_tr_width_after_folding( + g_tp.w_nmos_sa_mux, cell.w * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height + //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); + } + + if (dp.Ndsam_lev_2 > 1) { + height += compute_tr_width_after_folding( + g_tp.w_nmos_sa_mux, cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP)); // sense_amp_mux_height + //height_senseamp_mux_decode_output_wires = Ndsam * wire_inside_mat_pitch * (RWP + ERP); + + // add height of inverter-buffers between the two levels (pass-transistors) of sense-amp mux + height += 2 * compute_tr_width_after_folding( + pmos_to_nmos_sz_ratio(is_dram) * g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); + height += 2 * compute_tr_width_after_folding(g_tp.min_w_nmos_, cell.w * dp.Ndsam_lev_2 / (RWP + ERP)); + } + + // TODO: this should be uncommented... + /*if (deg_bl_muxing * dp.Ndsam_lev_1 * dp.Ndsam_lev_2 > 1) + { + //height_write_mux_decode_output_wires = deg_bl_muxing * Ndsam * g_tp.wire_inside_mat.pitch * (RWP + EWP); + double width_write_driver_write_mux = width_write_driver_or_write_mux(); + double height_write_driver_write_mux = compute_tr_width_after_folding(2 * width_write_driver_write_mux, + cell.w * + // deg_bl_muxing * + dp.Ndsam_lev_1 * dp.Ndsam_lev_2 / (RWP + EWP)); + height += height_write_driver_write_mux; + }*/ + + return height; } -double Mat::compute_cam_delay(double inrisetime) -{ +double Mat::compute_cam_delay(double inrisetime) { - double out_time_ramp, this_delay; - double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load; + double out_time_ramp, this_delay; + double Rwire, tf, c_intrinsic, rd, Cwire, c_gate_load; - double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p, + double Wdecdrivep, Wdecdriven, Wfadriven, Wfadrivep, Wfadrive2n, Wfadrive2p, Wfadecdrive1n, Wfadecdrive1p, Wfadecdrive2n, Wfadecdrive2p, Wfadecdriven, Wfadecdrivep, Wfaprechn, Wfaprechp, Wdummyn, Wdummyinvn, Wdummyinvp, Wfainvn, Wfainvp, Waddrnandn, Waddrnandp, Wfanandn, Wfanandp, Wfanorn, Wfanorp, Wdecnandn, Wdecnandp, W_hit_miss_n, W_hit_miss_p; - double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng; - int Htagbits; - - double driver_c_gate_load; - double driver_c_wire_load; - double driver_r_wire_load; - //double searchline_precharge_time; - - double leak_power_cc_inverters_sram_cell = 0; - double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0; - double leak_power_RD_port_sram_cell = 0; - double leak_power_SCHP_port_sram_cell = 0; - double leak_comparator_cam_cell =0; - - double gate_leak_comparator_cam_cell = 0; - double gate_leak_power_cc_inverters_sram_cell = 0; - double gate_leak_power_RD_port_sram_cell = 0; - double gate_leak_power_SCHP_port_sram_cell = 0; - - c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um; - c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um; - r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um; - r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um; - - dynSearchEng = 0.0; - delay_matchchline = 0.0; - double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram); - bool linear_scaling = false; - - if (linear_scaling) - { - Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process - Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process - Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process - Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process - Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process - Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process - Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process - Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process - Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - - Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process - Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process - Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process - Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - W_hit_miss_n = Wdummyn; - W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; - //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort - } - else - { - Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process - Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process - Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process - Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process - Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process - Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process - Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process - Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process - Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - - Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process - Wdummyn = g_tp.cam.cell_nmos_w; - Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process - Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process - Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process - Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process - Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - W_hit_miss_n = Wdummyn; - W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; - } - - Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0)); - - /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators. - search_line_delay, search_line_power, search_line_restore_delay for cycle time computation. - From the driver(am and an) to the comparators in all the rows including the dummy row, - Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */ - - //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports - //Searchline precharge routes horizontally - driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); - driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; - driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; - - sl_precharge_eq_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - - //searchline data driver ; subarray.num_rows + 1 is because of the dummy row - //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines - driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false); - driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; - driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; - sl_data_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - - sl_precharge_eq_drv->compute_delay(0); - double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr - double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um; - double R_bl = (subarray.num_rows + 1) * r_b_metal; - double C_bl = subarray.C_bl_cam; - delay_cam_sl_restore = sl_precharge_eq_drv->delay - + log(g_tp.cam.Vbitpre)* (R_bl_precharge * C_bl + R_bl * C_bl / 2); - - out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside) - - //matchline ops delay - delay_matchchline += sl_data_drv->delay; - - /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/ - //matchline delay, matchline power, matchline_reset for cycle time computation, - - ////matchline precharge circuitry routes vertically - //There are two matchline precharge driver chains per subarray. - driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram); - driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; - driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; - - ml_precharge_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - - ml_precharge_drv->compute_delay(0); - - - rd = tr_R_on(Wdummyn, NCH, 2, is_dram); - c_intrinsic = Htagbits*(2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram)//TODO: the cell_h_def should be revisit - + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram)/Htagbits);//since each halve only has one precharge tx per matchline - - Cwire = c_matchline_metal * Htagbits; - Rwire = r_matchline_metal * Htagbits; - c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram); - - double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram); - //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; - double R_ml = Rwire; - double C_ml = Cwire + c_intrinsic; - delay_cam_ml_reset = ml_precharge_drv->delay - + log(g_tp.cam.Vbitpre)* (R_ml_precharge * C_ml + R_ml * C_ml / 2);//TODO: latest CAM has sense amps on matchlines too - - //matchline ops delay - tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); - this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL); - delay_matchchline += this_delay; - out_time_ramp = this_delay / VTHFA3; - - dynSearchEng += ((c_intrinsic + Cwire + c_gate_load)*(subarray.num_rows +1)) //+ 2*drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, is_dram))//TODO: need to be precise - * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd *2;//* Ntbl;//each subarry has two halves - - /* third stage, from the NAND2 gates to the drivers in the dummy row */ - rd = tr_R_on(Waddrnandn, NCH, 2, is_dram); - c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + - drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram)*2; - c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram); - tf = rd * (c_intrinsic + c_gate_load); - this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE); - out_time_ramp = this_delay / (1 - VTHFA4); - delay_matchchline += this_delay; - - //only the dummy row has the extra inverter between NAND and NOR gates - dynSearchEng += (c_intrinsic* (subarray.num_rows+1)+ c_gate_load*2) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl; - - /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */ - rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram); - c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram); - Cwire = c_matchline_metal * Htagbits + c_searchline_metal * (subarray.num_rows+1)/2; - Rwire = r_matchline_metal * Htagbits + r_searchline_metal * (subarray.num_rows+1)/2; - c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram); - tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); - this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL); - out_time_ramp = this_delay / VTHFA5; - delay_matchchline += this_delay; - - dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows*c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; - - /*final statge from the NOR gate to drive the wordline of the data portion */ - - //searchline data driver There are two matchline precharge driver chains per subarray. - driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic - driver_c_wire_load = subarray.C_wl_ram; - driver_r_wire_load = subarray.R_wl_ram; - - ml_to_ram_wl_drv = new Driver( - driver_c_gate_load, - driver_c_wire_load, - driver_r_wire_load, - is_dram); - - - - rd = tr_R_on(Wfanorn, NCH, 1, is_dram); - c_intrinsic = 2* drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram); - c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram); - tf = rd * (c_intrinsic + c_gate_load); - this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE); - out_time_ramp = this_delay / (1-0.5); - delay_matchchline += this_delay; - - out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp); - - //c_gate_load energy is computed in ml_to_ram_wl_drv - dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; - - - /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/ - /*Precharge the hitting logic */ - c_intrinsic = 2*drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram); - Cwire = c_searchline_metal * subarray.num_rows; - Rwire = r_searchline_metal * subarray.num_rows; - c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows; - - rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false); - //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; - double R_hit_miss = Rwire; - double C_hit_miss = Cwire + c_intrinsic; - delay_hit_miss_reset = log(g_tp.cam.Vbitpre)* (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2); - dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - - /*hitting logic evaluation */ - c_intrinsic = 2*drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram); - Cwire = c_searchline_metal * subarray.num_rows; - Rwire = r_searchline_metal * subarray.num_rows; - c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram)* subarray.num_rows; - - rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false); - tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); - - delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL); - - if (is_fa) - delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss); - - dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - - /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/ - - power_matchline.searchOp.dynamic = dynSearchEng; - - //leakage in one subarray - double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2? - double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true); - double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2; - double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;//approx XOR with Inv - - leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd; - leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd; - leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd; - leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd; - leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports - - power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell + - leak_comparator_cam_cell + - leak_power_acc_tr_RW_or_WR_port_sram_cell + - leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + - leak_power_RD_port_sram_cell * ERP + - leak_power_SCHP_port_sram_cell*SCHP; + double c_matchline_metal, r_matchline_metal, c_searchline_metal, r_searchline_metal, dynSearchEng; + int Htagbits; + + double driver_c_gate_load; + double driver_c_wire_load; + double driver_r_wire_load; + //double searchline_precharge_time; + + double leak_power_cc_inverters_sram_cell = 0; + double leak_power_acc_tr_RW_or_WR_port_sram_cell = 0; + double leak_power_RD_port_sram_cell = 0; + double leak_power_SCHP_port_sram_cell = 0; + double leak_comparator_cam_cell =0; + + double gate_leak_comparator_cam_cell = 0; + double gate_leak_power_cc_inverters_sram_cell = 0; + double gate_leak_power_RD_port_sram_cell = 0; + double gate_leak_power_SCHP_port_sram_cell = 0; + + c_matchline_metal = cam_cell.get_w() * g_tp.wire_local.C_per_um; + c_searchline_metal = cam_cell.get_h() * g_tp.wire_local.C_per_um; + r_matchline_metal = cam_cell.get_w() * g_tp.wire_local.R_per_um; + r_searchline_metal = cam_cell.get_h() * g_tp.wire_local.R_per_um; + + dynSearchEng = 0.0; + delay_matchchline = 0.0; + double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(is_dram); + bool linear_scaling = false; + + if (linear_scaling) { + Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process + Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process + Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process + Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process + Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process + Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process + Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + + Wfaprechp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wdummyn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process + Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process + Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + W_hit_miss_n = Wdummyn; + W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; + //TODO: this number should updated using new layout; from the NAND to output NOR should be computed using logical effort + } else { + Wdecdrivep = 450 * g_ip->F_sz_um;//this was 360 micron for the 0.8 micron process + Wdecdriven = 300 * g_ip->F_sz_um;//this was 240 micron for the 0.8 micron process + Wfadriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfadrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + Wfadrive2n = 250 * g_ip->F_sz_um;//this was 200 micron for the 0.8 micron process + Wfadrive2p = 500 * g_ip->F_sz_um;//this was 400 micron for the 0.8 micron process + Wfadecdrive1n = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + Wfadecdrive1p = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wfadecdrive2n = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfadecdrive2p = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process + Wfadecdriven = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfadecdrivep = 125 * g_ip->F_sz_um;//this was 100 micron for the 0.8 micron process + Wfaprechn = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process + Wfainvn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wfainvp = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfanandn = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + Wfanandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + Wdecnandn = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + Wdecnandp = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + + Wfaprechp = g_tp.w_pmos_bl_precharge;//this was 10 micron for the 0.8 micron process + Wdummyn = g_tp.cam.cell_nmos_w; + Wdummyinvn = 75 * g_ip->F_sz_um;//this was 60 micron for the 0.8 micron process + Wdummyinvp = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process + Waddrnandn = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Waddrnandp = 62.5 * g_ip->F_sz_um;//this was 50 micron for the 0.8 micron process + Wfanorn = 6.25 * g_ip->F_sz_um;//this was 5 micron for the 0.8 micron process + Wfanorp = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + W_hit_miss_n = Wdummyn; + W_hit_miss_p = g_tp.min_w_nmos_*p_to_n_sizing_r; + } + + Htagbits = (int)(ceil ((double) (subarray.num_cols_fa_cam) / 2.0)); + + /* First stage, searchline is precharged. searchline data driver drives the searchline to open (if miss) the comparators. + search_line_delay, search_line_power, search_line_restore_delay for cycle time computation. + From the driver(am and an) to the comparators in all the rows including the dummy row, + Assuming that comparators in both the normal matching line and the dummy matching line have the same sizing */ + + //Searchline precharge circuitry is same as that of bitline. However, no sharing between search ports and r/w ports + //Searchline precharge routes horizontally + driver_c_gate_load = subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, is_dram, false, false); + driver_c_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.C_per_um; + driver_r_wire_load = subarray.num_cols_fa_cam * cam_cell.w * g_tp.wire_outside_mat.R_per_um; + + sl_precharge_eq_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + //searchline data driver ; subarray.num_rows + 1 is because of the dummy row + //data drv should only have gate_C not 2*gate_C since the two searchlines are differential--same as bitlines + driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wdummyn, 0, is_dram, false, false); + driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; + driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; + sl_data_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + sl_precharge_eq_drv->compute_delay(0); + double R_bl_precharge = tr_R_on(g_tp.w_pmos_bl_precharge, PCH, 1, is_dram, false, false);//Assuming CAM and SRAM have same Pre_eq_dr + double r_b_metal = cam_cell.h * g_tp.wire_local.R_per_um; + double R_bl = (subarray.num_rows + 1) * r_b_metal; + double C_bl = subarray.C_bl_cam; + delay_cam_sl_restore = sl_precharge_eq_drv->delay + + log(g_tp.cam.Vbitpre) * (R_bl_precharge * C_bl + R_bl * C_bl / 2); + + out_time_ramp = sl_data_drv->compute_delay(inrisetime);//After entering one mat, start to consider the inrisetime from 0(0 is passed from outside) + + //matchline ops delay + delay_matchchline += sl_data_drv->delay; + + /* second stage, from the trasistors in the comparators(both normal row and dummy row) to the NAND gates that combins both half*/ + //matchline delay, matchline power, matchline_reset for cycle time computation, + + ////matchline precharge circuitry routes vertically + //There are two matchline precharge driver chains per subarray. + driver_c_gate_load = (subarray.num_rows + 1) * gate_C(Wfaprechp, 0, is_dram); + driver_c_wire_load = (subarray.num_rows + 1) * c_searchline_metal; + driver_r_wire_load = (subarray.num_rows + 1) * r_searchline_metal; + + ml_precharge_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + ml_precharge_drv->compute_delay(0); + + + rd = tr_R_on(Wdummyn, NCH, 2, is_dram); + c_intrinsic = Htagbits * + (2 * drain_C_(Wdummyn, NCH, 2, 1, g_tp.cell_h_def, + is_dram)//TODO: the cell_h_def should be revisit + + drain_C_(Wfaprechp, PCH, 1, 1, g_tp.cell_h_def, is_dram) / + Htagbits);//since each halve only has one precharge tx per matchline + + Cwire = c_matchline_metal * Htagbits; + Rwire = r_matchline_metal * Htagbits; + c_gate_load = gate_C(Waddrnandn + Waddrnandp, 0, is_dram); + + double R_ml_precharge = tr_R_on(Wfaprechp, PCH, 1, is_dram); + //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; + double R_ml = Rwire; + double C_ml = Cwire + c_intrinsic; + //TODO: latest CAM has sense amps on matchlines too + delay_cam_ml_reset = ml_precharge_drv->delay + + log(g_tp.cam.Vbitpre) * (R_ml_precharge * C_ml + R_ml * C_ml / 2); + + //matchline ops delay + tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); + this_delay = horowitz(out_time_ramp, tf, VTHFA2, VTHFA3, FALL); + delay_matchchline += this_delay; + out_time_ramp = this_delay / VTHFA3; + + dynSearchEng += ((c_intrinsic + Cwire + c_gate_load) * + (subarray.num_rows + 1)) //TODO: need to be precise + * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * + 2;//each subarry has two halves + + /* third stage, from the NAND2 gates to the drivers in the dummy row */ + rd = tr_R_on(Waddrnandn, NCH, 2, is_dram); + c_intrinsic = drain_C_(Waddrnandn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + + drain_C_(Waddrnandp, PCH, 1, 1, g_tp.cell_h_def, is_dram) * 2; + c_gate_load = gate_C(Wdummyinvn + Wdummyinvp, 0, is_dram); + tf = rd * (c_intrinsic + c_gate_load); + this_delay = horowitz(out_time_ramp, tf, VTHFA3, VTHFA4, RISE); + out_time_ramp = this_delay / (1 - VTHFA4); + delay_matchchline += this_delay; + + //only the dummy row has the extra inverter between NAND and NOR gates + dynSearchEng += (c_intrinsic * (subarray.num_rows + 1) + c_gate_load * 2) * + g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;// * Ntbl; + + /* fourth stage, from the driver in dummy matchline to the NOR2 gate which drives the wordline of the data portion */ + rd = tr_R_on(Wdummyinvn, NCH, 1, is_dram); + c_intrinsic = drain_C_(Wdummyinvn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + drain_C_(Wdummyinvp, NCH, 1, 1, g_tp.cell_h_def, is_dram); + Cwire = c_matchline_metal * Htagbits + c_searchline_metal * + (subarray.num_rows + 1) / 2; + Rwire = r_matchline_metal * Htagbits + r_searchline_metal * + (subarray.num_rows + 1) / 2; + c_gate_load = gate_C(Wfanorn + Wfanorp, 0, is_dram); + tf = rd * (c_intrinsic + Cwire + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); + this_delay = horowitz (out_time_ramp, tf, VTHFA4, VTHFA5, FALL); + out_time_ramp = this_delay / VTHFA5; + delay_matchchline += this_delay; + + dynSearchEng += (c_intrinsic + Cwire + subarray.num_rows * c_gate_load) * + g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; + + /*final statge from the NOR gate to drive the wordline of the data portion */ + + //searchline data driver There are two matchline precharge driver chains per subarray. + driver_c_gate_load = gate_C(W_hit_miss_n, 0, is_dram, false, false);//nmos of the pull down logic + driver_c_wire_load = subarray.C_wl_ram; + driver_r_wire_load = subarray.R_wl_ram; + + ml_to_ram_wl_drv = new Driver( + driver_c_gate_load, + driver_c_wire_load, + driver_r_wire_load, + is_dram); + + + + rd = tr_R_on(Wfanorn, NCH, 1, is_dram); + c_intrinsic = 2 * drain_C_(Wfanorn, NCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(Wfanorp, NCH, 1, 1, g_tp.cell_h_def, is_dram); + c_gate_load = gate_C(ml_to_ram_wl_drv->width_n[0] + ml_to_ram_wl_drv->width_p[0], 0, is_dram); + tf = rd * (c_intrinsic + c_gate_load); + this_delay = horowitz (out_time_ramp, tf, 0.5, 0.5, RISE); + out_time_ramp = this_delay / (1 - 0.5); + delay_matchchline += this_delay; + + out_time_ramp = ml_to_ram_wl_drv->compute_delay(out_time_ramp); + + //c_gate_load energy is computed in ml_to_ram_wl_drv + dynSearchEng += (c_intrinsic) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;//* Ntbl; + + + /* peripheral-- hitting logic "CMOS VLSI Design Fig11.51*/ + /*Precharge the hitting logic */ + c_intrinsic = 2 * + drain_C_(W_hit_miss_p, NCH, 2, 1, g_tp.cell_h_def, is_dram); + Cwire = c_searchline_metal * subarray.num_rows; + Rwire = r_searchline_metal * subarray.num_rows; + c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) * + subarray.num_rows; + + rd = tr_R_on(W_hit_miss_p, PCH, 1, is_dram, false, false); + //double r_ml_metal = cam_cell.w * g_tp.wire_local.R_per_um; + double R_hit_miss = Rwire; + double C_hit_miss = Cwire + c_intrinsic; + delay_hit_miss_reset = log(g_tp.cam.Vbitpre) * + (rd * C_hit_miss + R_hit_miss * C_hit_miss / 2); + dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + + /*hitting logic evaluation */ + c_intrinsic = 2 * + drain_C_(W_hit_miss_n, NCH, 2, 1, g_tp.cell_h_def, is_dram); + Cwire = c_searchline_metal * subarray.num_rows; + Rwire = r_searchline_metal * subarray.num_rows; + c_gate_load = drain_C_(W_hit_miss_n, NCH, 1, 1, g_tp.cell_h_def, is_dram) * + subarray.num_rows; + + rd = tr_R_on(W_hit_miss_n, PCH, 1, is_dram, false, false); + tf = rd * (c_intrinsic + Cwire / 2 + c_gate_load) + Rwire * (Cwire / 2 + c_gate_load); + + delay_hit_miss = horowitz(0, tf, 0.5, 0.5, FALL); + + if (is_fa) + delay_matchchline += MAX(ml_to_ram_wl_drv->delay, delay_hit_miss); + + dynSearchEng += (c_intrinsic + Cwire + c_gate_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + + /* TODO: peripheral-- Priority Encoder, usually this is not necessary in processor components*/ + + power_matchline.searchOp.dynamic = dynSearchEng; + + //leakage in one subarray + double Iport = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true);//TODO: how much is the idle time? just by *2? + double Iport_erp = cmos_Isub_leakage(g_tp.cam.cell_a_w, 0, 2, nmos, false, true); + double Icell = cmos_Isub_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, + 1, inv, false, true) * 2; + //approx XOR with Inv + double Icell_comparator = cmos_Isub_leakage(Wdummyn, Wdummyn, 1, inv, + false, true) * 2; + + leak_power_cc_inverters_sram_cell = Icell * g_tp.cam_cell.Vdd; + leak_comparator_cam_cell = Icell_comparator * g_tp.cam_cell.Vdd; + leak_power_acc_tr_RW_or_WR_port_sram_cell = Iport * g_tp.cam_cell.Vdd; + leak_power_RD_port_sram_cell = Iport_erp * g_tp.cam_cell.Vdd; + leak_power_SCHP_port_sram_cell = 0;//search port and r/w port are sperate, therefore no access txs in search ports + + power_matchline.searchOp.leakage += leak_power_cc_inverters_sram_cell + + leak_comparator_cam_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + + leak_power_RD_port_sram_cell * ERP + + leak_power_SCHP_port_sram_cell * SCHP; // power_matchline.searchOp.leakage += leak_comparator_cam_cell; - power_matchline.searchOp.leakage *= (subarray.num_rows+1) * subarray.num_cols_fa_cam;//TODO:dumy line precise - power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd; - power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd; - power_matchline.searchOp.leakage += (subarray.num_rows+1) * cmos_Isub_leakage(Wfanorn, Wfanorp,2, nor) * g_tp.cam_cell.Vdd; - //In idle states, the hit/miss txs are closed (on) therefore no Isub - power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+ + power_matchline.searchOp.leakage *= (subarray.num_rows + 1) * + subarray.num_cols_fa_cam;//TODO:dumy line precise + power_matchline.searchOp.leakage += (subarray.num_rows + 1) * + cmos_Isub_leakage(0, Wfaprechp, 1, pmos) * g_tp.cam_cell.Vdd; + power_matchline.searchOp.leakage += (subarray.num_rows + 1) * + cmos_Isub_leakage(Waddrnandn, Waddrnandp, 2, nand) * g_tp.cam_cell.Vdd; + power_matchline.searchOp.leakage += (subarray.num_rows + 1) * + cmos_Isub_leakage(Wfanorn, Wfanorp, 2, nor) * g_tp.cam_cell.Vdd; + //In idle states, the hit/miss txs are closed (on) therefore no Isub + power_matchline.searchOp.leakage += 0;// subarray.num_rows * cmos_Isub_leakage(W_hit_miss_n, 0,1, nmos) * g_tp.cam_cell.Vdd+ // + cmos_Isub_leakage(0, W_hit_miss_p,1, pmos) * g_tp.cam_cell.Vdd; - //in idle state, Ig_on only possibly exist in access transistors of read only ports - double Ig_port_erp = cmos_Ig_leakage(g_tp.cam.cell_a_w, 0, 1, nmos, false, true); - double Ig_cell = cmos_Ig_leakage(g_tp.cam.cell_nmos_w, g_tp.cam.cell_pmos_w, 1, inv, false, true)*2; - double Ig_cell_comparator = cmos_Ig_leakage(Wdummyn, Wdummyn, 1, inv, false, true)*2;// cmos_Ig_leakage(Wdummyn, 0, 2, nmos)*2; - - gate_leak_comparator_cam_cell = Ig_cell_comparator* g_tp.cam_cell.Vdd; - gate_leak_power_cc_inverters_sram_cell = Ig_cell*g_tp.cam_cell.Vdd; - gate_leak_power_RD_port_sram_cell = Ig_port_erp*g_tp.sram_cell.Vdd; - gate_leak_power_SCHP_port_sram_cell = 0; - - //cout<<"power_matchline.searchOp.leakage"< 1) - { - tau = (R_cell_pull_down + R_cell_acc) * - (C_bl + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + - R_bl * (C_bl/2 + 2*C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + - R_bit_mux * (C_drain_bit_mux + 2*C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + - R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); - dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* - subarray.num_cols * num_subarrays_per_mat*/; - dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * - 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing); - dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) * - num_act_mats_hor_dir * (C_bl + 2*C_drain_bit_mux) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2; - //Write Ops are differential for SRAM - } - else - { - tau = (R_cell_pull_down + R_cell_acc) * - (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 + - R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); - dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * - 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/; - dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / deg_bl_muxing) / deg_senseamp_muxing) * - num_act_mats_hor_dir * C_bl) * g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd*2; + double C_drain_bit_mux = drain_C_(g_tp.w_nmos_b_mux, NCH, 1, 0, + camFlag ? cam_cell.w : cell.w / + (2 * (RWP + ERP + SCHP)), is_dram); + double R_bit_mux = tr_R_on(g_tp.w_nmos_b_mux, NCH, 1, is_dram); + double C_drain_sense_amp_iso = drain_C_(g_tp.w_iso, PCH, 1, 0, + camFlag ? cam_cell.w : + cell.w * deg_bl_muxing / + (RWP + ERP + SCHP), is_dram); + double R_sense_amp_iso = tr_R_on(g_tp.w_iso, PCH, 1, is_dram); + double C_sense_amp_latch = gate_C(g_tp.w_sense_p + g_tp.w_sense_n, 0, + is_dram) + + drain_C_(g_tp.w_sense_n, NCH, 1, 0, camFlag ? cam_cell.w : + cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram) + + drain_C_(g_tp.w_sense_p, PCH, 1, 0, camFlag ? cam_cell.w : + cell.w * deg_bl_muxing / (RWP + ERP + SCHP), is_dram); + double C_drain_sense_amp_mux = drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, + camFlag ? cam_cell.w : + cell.w * deg_bl_muxing / + (RWP + ERP + SCHP), is_dram); + + if (is_dram) { + double fraction = dp.V_b_sense / ((g_tp.dram_cell_Vdd / 2) * + g_tp.dram_cell_C / + (g_tp.dram_cell_C + C_bl)); + tstep = 2.3 * fraction * r_dev * + (g_tp.dram_cell_C * (C_bl + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux)) / + (g_tp.dram_cell_C + (C_bl + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux)); + delay_writeback = tstep; + dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) * + (g_tp.dram_cell_Vdd / 2) * + g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/; + dynWriteEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch) * + (g_tp.dram_cell_Vdd / 2) * + g_tp.dram_cell_Vdd /* subarray.num_cols * num_subarrays_per_mat*/ * + num_act_mats_hor_dir * 100; + per_bitline_read_energy = (C_bl + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux) * + (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_Vdd; + } else { + double tau; + + if (deg_bl_muxing > 1) { + tau = (R_cell_pull_down + R_cell_acc) * + (C_bl + 2 * C_drain_bit_mux + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux) + + R_bl * (C_bl / 2 + 2 * C_drain_bit_mux + 2 * + C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) + + R_bit_mux * (C_drain_bit_mux + 2 * C_drain_sense_amp_iso + + C_sense_amp_latch + C_drain_sense_amp_mux) + + R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux); + dynRdEnergy += (C_bl + 2 * C_drain_bit_mux) * 2 * dp.V_b_sense * + g_tp.sram_cell.Vdd; + dynRdEnergy += (2 * C_drain_sense_amp_iso + C_sense_amp_latch + + C_drain_sense_amp_mux) * + 2 * dp.V_b_sense * g_tp.sram_cell.Vdd * + (1.0/*subarray.num_cols * num_subarrays_per_mat*/ / + deg_bl_muxing); + dynWriteEnergy += ((1.0/*subarray.num_cols *num_subarrays_per_mat*/ / + deg_bl_muxing) / deg_senseamp_muxing) * + num_act_mats_hor_dir * (C_bl + 2 * C_drain_bit_mux) * + g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2; + //Write Ops are differential for SRAM + } else { + tau = (R_cell_pull_down + R_cell_acc) * + (C_bl + C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) + R_bl * C_bl / 2 + + R_sense_amp_iso * (C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux); + dynRdEnergy += (C_bl + 2 * C_drain_sense_amp_iso + C_sense_amp_latch + C_drain_sense_amp_mux) * + 2 * dp.V_b_sense * g_tp.sram_cell.Vdd /* subarray.num_cols * num_subarrays_per_mat*/; + dynWriteEnergy += (((1.0/*subarray.num_cols * num_subarrays_per_mat*/ / + deg_bl_muxing) / deg_senseamp_muxing) * + num_act_mats_hor_dir * C_bl) * + g_tp.sram_cell.Vdd * g_tp.sram_cell.Vdd * 2; + + } + tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense)); + power_bitline.readOp.leakage = + leak_power_cc_inverters_sram_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell + + leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + + leak_power_RD_port_sram_cell * ERP; + power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell + + gate_leak_power_RD_port_sram_cell * ERP; } - tstep = tau * log(V_b_pre / (V_b_pre - dp.V_b_sense)); - power_bitline.readOp.leakage = - leak_power_cc_inverters_sram_cell + - leak_power_acc_tr_RW_or_WR_port_sram_cell + - leak_power_acc_tr_RW_or_WR_port_sram_cell * (RWP + EWP - 1) + - leak_power_RD_port_sram_cell * ERP; - power_bitline.readOp.gate_leakage = gate_leak_power_cc_inverters_sram_cell + - gate_leak_power_RD_port_sram_cell * ERP; - - } // cout<<"leak_power_cc_inverters_sram_cell"<repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); - gate_C(subarray_out_wire->repeater_size *(subarray_out_wire->wire_length/subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); - tf = rd * C_ld; - this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); - delay_subarray_out_drv += this_delay; - inrisetime = this_delay/(1.0 - 0.5); - power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; - power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 - power_subarray_out_drv.readOp.gate_leakage += cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos)* g_tp.peri_global.Vdd; - - - return inrisetime; +double Mat::compute_subarray_out_drv(double inrisetime) { + double C_ld, rd, tf, this_delay; + double p_to_n_sz_r = pmos_to_nmos_sz_ratio(is_dram); + + // delay of signal through pass-transistor of first level of sense-amp mux to input of inverter-buffer. + rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram); + C_ld = dp.Ndsam_lev_1 * drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, + camFlag ? cam_cell.w : cell.w * + deg_bl_muxing / (RWP + ERP + SCHP), + is_dram) + + gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram); + tf = rd * C_ld; + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay_subarray_out_drv += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 + power_subarray_out_drv.readOp.gate_leakage += + cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd; + // delay of signal through inverter-buffer to second level of sense-amp mux. + // internal delay of buffer + rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram); + C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + gate_C(g_tp.min_w_nmos_ + p_to_n_sz_r * g_tp.min_w_nmos_, 0.0, is_dram); + tf = rd * C_ld; + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay_subarray_out_drv += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.leakage += + cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, + inv, is_dram) * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.gate_leakage += + cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, + inv) * g_tp.peri_global.Vdd; + + // inverter driving drain of pass transistor of second level of sense-amp mux. + rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1, is_dram); + C_ld = drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(p_to_n_sz_r * g_tp.min_w_nmos_, PCH, 1, 1, g_tp.cell_h_def, + is_dram) + + drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? + cam_cell.w : cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / + (RWP + ERP + SCHP), is_dram); + tf = rd * C_ld; + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay_subarray_out_drv += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.leakage += + cmos_Isub_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, + inv) * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.gate_leakage += + cmos_Ig_leakage(g_tp.min_w_nmos_, p_to_n_sz_r * g_tp.min_w_nmos_, 1, + inv) * g_tp.peri_global.Vdd; + + + // delay of signal through pass-transistor to input of subarray output driver. + rd = tr_R_on(g_tp.w_nmos_sa_mux, NCH, 1, is_dram); + C_ld = dp.Ndsam_lev_2 * + drain_C_(g_tp.w_nmos_sa_mux, NCH, 1, 0, camFlag ? cam_cell.w : + cell.w * deg_bl_muxing * dp.Ndsam_lev_1 / (RWP + ERP + SCHP), + is_dram) + + //gate_C(subarray_out_wire->repeater_size * g_tp.min_w_nmos_ * (1 + p_to_n_sz_r), 0.0, is_dram); + gate_C(subarray_out_wire->repeater_size * + (subarray_out_wire->wire_length / + subarray_out_wire->repeater_spacing) * g_tp.min_w_nmos_ * + (1 + p_to_n_sz_r), 0.0, is_dram); + tf = rd * C_ld; + this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE); + delay_subarray_out_drv += this_delay; + inrisetime = this_delay / (1.0 - 0.5); + power_subarray_out_drv.readOp.dynamic += C_ld * 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd; + power_subarray_out_drv.readOp.leakage += 0; // for now, let leakage of the pass transistor be 0 + power_subarray_out_drv.readOp.gate_leakage += + cmos_Ig_leakage(g_tp.w_nmos_sa_mux, 0, 1, nmos) * g_tp.peri_global.Vdd; + + + return inrisetime; } -double Mat::compute_comparator_delay(double inrisetime) -{ - int A = g_ip->tag_assoc; - - int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already - // a multiple of 4. - - /* First Inverter */ - double Ceq = gate_C(g_tp.w_comp_inv_n2+g_tp.w_comp_inv_p2, 0, is_dram) + - drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram); - double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram); - double tf = Req*Ceq; - double st1del = horowitz(inrisetime,tf,VTHCOMPINV,VTHCOMPINV,FALL); - double nextinputtime = st1del/VTHCOMPINV; - power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - - //For each degree of associativity - //there are 4 such quarter comparators - double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A; - double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, g_tp.w_comp_inv_p1, 1, inv, is_dram)* 4 * A; - /* Second Inverter */ - Ceq = gate_C(g_tp.w_comp_inv_n3+g_tp.w_comp_inv_p3, 0, is_dram) + - drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram); - Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram); - tf = Req*Ceq; - double st2del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHCOMPINV,RISE); - nextinputtime = st2del/(1.0-VTHCOMPINV); - power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A; - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, inv, is_dram)* 4 * A; - - /* Third Inverter */ - Ceq = gate_C(g_tp.w_eval_inv_n+g_tp.w_eval_inv_p, 0, is_dram) + - drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram); - Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram); - tf = Req*Ceq; - double st3del = horowitz(nextinputtime,tf,VTHCOMPINV,VTHEVALINV,FALL); - nextinputtime = st3del/(VTHEVALINV); - power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A; - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, inv, is_dram)* 4 * A; - - /* Final Inverter (virtual ground driver) discharging compare part */ - double r1 = tr_R_on(g_tp.w_comp_n,NCH,2, is_dram); - double r2 = tr_R_on(g_tp.w_eval_inv_n,NCH,1, is_dram); /* was switch */ - double c2 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) + - drain_C_(g_tp.w_eval_inv_p,PCH,1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_eval_inv_n,NCH,1, 1, g_tp.cell_h_def, is_dram); - double c1 = (tagbits_)*(drain_C_(g_tp.w_comp_n,NCH,1, 1, g_tp.cell_h_def, is_dram) + - drain_C_(g_tp.w_comp_n,NCH,2, 1, g_tp.cell_h_def, is_dram)) + - drain_C_(g_tp.w_comp_p,PCH,1, 1, g_tp.cell_h_def, is_dram) + - gate_C(WmuxdrvNANDn+WmuxdrvNANDp,0, is_dram); - power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; - power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1); - lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A; - lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A; // stack factor of 0.2 - - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, inv, is_dram)* 4 * A; - gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, is_dram)* 4 * A;//for gate leakage this equals to a inverter - - /* time to go to threshold of mux driver */ - double tstep = (r2*c2+(r1+r2)*c1)*log(1.0/VTHMUXNAND); - /* take into account non-zero input rise time */ - double m = g_tp.peri_global.Vdd/nextinputtime; - double Tcomparatorni; - - if((tstep) <= (0.5*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)/m)) - { - double a = m; - double b = 2*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth); - double c = -2*(tstep)*(g_tp.peri_global.Vdd-g_tp.peri_global.Vth)+1/m*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth)*((g_tp.peri_global.Vdd*VTHEVALINV)-g_tp.peri_global.Vth); - Tcomparatorni = (-b+sqrt(b*b-4*a*c))/(2*a); - } - else - { - Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd+g_tp.peri_global.Vth)/(2*m) - (g_tp.peri_global.Vdd*VTHEVALINV)/m; - } - delay_comparator = Tcomparatorni+st1del+st2del+st3del; - power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd; - power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd; - - return Tcomparatorni / (1.0 - VTHMUXNAND);; +double Mat::compute_comparator_delay(double inrisetime) { + int A = g_ip->tag_assoc; + + int tagbits_ = dp.tagbits / 4; // Assuming there are 4 quarter comparators. input tagbits is already + // a multiple of 4. + + /* First Inverter */ + double Ceq = gate_C(g_tp.w_comp_inv_n2 + g_tp.w_comp_inv_p2, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p1, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n1, NCH, 1, 1, g_tp.cell_h_def, is_dram); + double Req = tr_R_on(g_tp.w_comp_inv_p1, PCH, 1, is_dram); + double tf = Req * Ceq; + double st1del = horowitz(inrisetime, tf, VTHCOMPINV, VTHCOMPINV, FALL); + double nextinputtime = st1del / VTHCOMPINV; + power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + + //For each degree of associativity + //there are 4 such quarter comparators + double lkgCurrent = cmos_Isub_leakage(g_tp.w_comp_inv_n1, + g_tp.w_comp_inv_p1, 1, inv, + is_dram) * 4 * A; + double gatelkgCurrent = cmos_Ig_leakage(g_tp.w_comp_inv_n1, + g_tp.w_comp_inv_p1, 1, inv, + is_dram) * 4 * A; + /* Second Inverter */ + Ceq = gate_C(g_tp.w_comp_inv_n3 + g_tp.w_comp_inv_p3, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p2, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n2, NCH, 1, 1, g_tp.cell_h_def, is_dram); + Req = tr_R_on(g_tp.w_comp_inv_n2, NCH, 1, is_dram); + tf = Req * Ceq; + double st2del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHCOMPINV, RISE); + nextinputtime = st2del / (1.0 - VTHCOMPINV); + power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, + inv, is_dram) * 4 * A; + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n2, g_tp.w_comp_inv_p2, 1, + inv, is_dram) * 4 * A; + + /* Third Inverter */ + Ceq = gate_C(g_tp.w_eval_inv_n + g_tp.w_eval_inv_p, 0, is_dram) + + drain_C_(g_tp.w_comp_inv_p3, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_inv_n3, NCH, 1, 1, g_tp.cell_h_def, is_dram); + Req = tr_R_on(g_tp.w_comp_inv_p3, PCH, 1, is_dram); + tf = Req * Ceq; + double st3del = horowitz(nextinputtime, tf, VTHCOMPINV, VTHEVALINV, FALL); + nextinputtime = st3del / (VTHEVALINV); + power_comparator.readOp.dynamic += 0.5 * Ceq * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, 1, + inv, is_dram) * 4 * A; + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_inv_n3, g_tp.w_comp_inv_p3, + 1, inv, is_dram) * 4 * A; + + /* Final Inverter (virtual ground driver) discharging compare part */ + double r1 = tr_R_on(g_tp.w_comp_n, NCH, 2, is_dram); + double r2 = tr_R_on(g_tp.w_eval_inv_n, NCH, 1, is_dram); /* was switch */ + double c2 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1, + g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_n, NCH, 2, 1, + g_tp.cell_h_def, is_dram)) + + drain_C_(g_tp.w_eval_inv_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_eval_inv_n, NCH, 1, 1, g_tp.cell_h_def, is_dram); + double c1 = (tagbits_) * (drain_C_(g_tp.w_comp_n, NCH, 1, 1, + g_tp.cell_h_def, is_dram) + + drain_C_(g_tp.w_comp_n, NCH, 2, 1, + g_tp.cell_h_def, is_dram)) + + drain_C_(g_tp.w_comp_p, PCH, 1, 1, g_tp.cell_h_def, is_dram) + + gate_C(WmuxdrvNANDn + WmuxdrvNANDp, 0, is_dram); + power_comparator.readOp.dynamic += 0.5 * c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 4 * A; + power_comparator.readOp.dynamic += c1 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * (A - 1); + lkgCurrent += cmos_Isub_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, + inv, is_dram) * 4 * A; + lkgCurrent += cmos_Isub_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, + is_dram) * 4 * A; // stack factor of 0.2 + + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_eval_inv_n, g_tp.w_eval_inv_p, 1, + inv, is_dram) * 4 * A; + //for gate leakage this equals to a inverter + gatelkgCurrent += cmos_Ig_leakage(g_tp.w_comp_n, g_tp.w_comp_n, 1, inv, + is_dram) * 4 * A; + + /* time to go to threshold of mux driver */ + double tstep = (r2 * c2 + (r1 + r2) * c1) * log(1.0 / VTHMUXNAND); + /* take into account non-zero input rise time */ + double m = g_tp.peri_global.Vdd / nextinputtime; + double Tcomparatorni; + + if ((tstep) <= (0.5*(g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / m)) { + double a = m; + double b = 2 * ((g_tp.peri_global.Vdd * VTHEVALINV) - + g_tp.peri_global.Vth); + double c = -2 * (tstep) * (g_tp.peri_global.Vdd - + g_tp.peri_global.Vth) + 1 / m * + ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) * + ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth); + Tcomparatorni = (-b + sqrt(b * b - 4 * a * c)) / (2 * a); + } else { + Tcomparatorni = (tstep) + (g_tp.peri_global.Vdd + + g_tp.peri_global.Vth) / (2 * m) - + (g_tp.peri_global.Vdd * VTHEVALINV) / m; + } + delay_comparator = Tcomparatorni + st1del + st2del + st3del; + power_comparator.readOp.leakage = lkgCurrent * g_tp.peri_global.Vdd; + power_comparator.readOp.gate_leakage = gatelkgCurrent * g_tp.peri_global.Vdd; + + return Tcomparatorni / (1.0 - VTHMUXNAND);; } -void Mat::compute_power_energy() -{ - //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power +void Mat::compute_power_energy() { + //for cam and FA, power.readOp is the plain read power, power.searchOp is the associative search related power //when search all subarrays and all mats are fully active - //when plain read/write only one subarray in a single mat is active. + //when plain read/write only one subarray in a single mat is active. // add energy consumed in predecoder drivers. This unit is shared by all subarrays in a mat. - power.readOp.dynamic += r_predec->power.readOp.dynamic + - b_mux_predec->power.readOp.dynamic + - sa_mux_lev_1_predec->power.readOp.dynamic + - sa_mux_lev_2_predec->power.readOp.dynamic; - - // add energy consumed in decoders - power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic; - if (!(is_fa||pure_cam)) - power_row_decoders.readOp.dynamic *= num_subarrays_per_mat; - - // add energy consumed in bitline prechagers, SAs, and bitlines - if (!(is_fa||pure_cam)) - { - // add energy consumed in bitline prechagers - power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; - power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; - - //Add sense amps energy - num_sa_subarray = subarray.num_cols / deg_bl_muxing; - power_sa.readOp.dynamic *= num_sa_subarray*num_subarrays_per_mat ; - - // add energy consumed in bitlines - //cout<<"bitline power"<power.readOp.dynamic) * num_do_b_mat; - - power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + - power_sa.readOp.dynamic + - power_bitline.readOp.dynamic + - power_subarray_out_drv.readOp.dynamic; - - power.readOp.dynamic += power_row_decoders.readOp.dynamic + - bit_mux_dec->power.readOp.dynamic + - sa_mux_lev_1_dec->power.readOp.dynamic + - sa_mux_lev_2_dec->power.readOp.dynamic + - power_comparator.readOp.dynamic; - } - - else if (is_fa) - { - //for plain read/write only one subarray in a mat is active - // add energy consumed in bitline prechagers - power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic - + cam_bl_precharge_eq_drv->power.readOp.dynamic; - power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; - - //Add sense amps energy - num_sa_subarray = (subarray.num_cols_fa_cam + subarray.num_cols_fa_ram)/ deg_bl_muxing; - num_sa_subarray_search = subarray.num_cols_fa_ram/ deg_bl_muxing; - power_sa.searchOp.dynamic = power_sa.readOp.dynamic*num_sa_subarray_search; - power_sa.readOp.dynamic *= num_sa_subarray; - - - // add energy consumed in bitlines - power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic; - power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram); - power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam+subarray.num_cols_fa_ram); - power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram; - - //Add subarray output energy - power_subarray_out_drv.searchOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; - power_subarray_out_drv.readOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; - - - power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + - power_sa.readOp.dynamic + - power_bitline.readOp.dynamic + - power_subarray_out_drv.readOp.dynamic; - - power.readOp.dynamic += power_row_decoders.readOp.dynamic + - bit_mux_dec->power.readOp.dynamic + - sa_mux_lev_1_dec->power.readOp.dynamic + - sa_mux_lev_2_dec->power.readOp.dynamic + - power_comparator.readOp.dynamic; - - //add energy consumed inside cam - power_matchline.searchOp.dynamic *= num_subarrays_per_mat; - power_searchline_precharge = sl_precharge_eq_drv->power; - power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; - power_searchline = sl_data_drv->power; - power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;; - power_matchline_precharge = ml_precharge_drv->power; - power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat; - power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power; - power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic; - - power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic; - - power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; - //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; - - } - else - { - // add energy consumed in bitline prechagers - power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; - //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; - //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; - //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat; - - //Add sense amps energy - num_sa_subarray = subarray.num_cols_fa_cam/ deg_bl_muxing; - power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat; - power_sa.searchOp.dynamic = 0; - - power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam; - power_bitline.searchOp.dynamic = 0; - power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam; - - power_subarray_out_drv.searchOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; - power_subarray_out_drv.readOp.dynamic = - (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; - - power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + - power_sa.readOp.dynamic + - power_bitline.readOp.dynamic + - power_subarray_out_drv.readOp.dynamic; - - power.readOp.dynamic += power_row_decoders.readOp.dynamic + - bit_mux_dec->power.readOp.dynamic + - sa_mux_lev_1_dec->power.readOp.dynamic + - sa_mux_lev_2_dec->power.readOp.dynamic + - power_comparator.readOp.dynamic; - - - ////add energy consumed inside cam - power_matchline.searchOp.dynamic *= num_subarrays_per_mat; - power_searchline_precharge = sl_precharge_eq_drv->power; - power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; - power_searchline = sl_data_drv->power; - power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic*subarray.num_cols_fa_cam* num_subarrays_per_mat;; - power_matchline_precharge = ml_precharge_drv->power; - power_matchline_precharge.searchOp.dynamic = power_matchline_precharge.readOp.dynamic* num_subarrays_per_mat; - power_ml_to_ram_wl_drv= ml_to_ram_wl_drv->power; - power_ml_to_ram_wl_drv.searchOp.dynamic= ml_to_ram_wl_drv->power.readOp.dynamic; - - power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline_precharge.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_searchline.searchOp.dynamic; - power_cam_all_active.searchOp.dynamic +=power_matchline_precharge.searchOp.dynamic; - - power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; - //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; - - } - - - - // calculate leakage power - if (!(is_fa || pure_cam)) - { - int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); - - power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP); - - //num_sa_subarray = subarray.num_cols / deg_bl_muxing; - power_subarray_out_drv.readOp.leakage = - (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); - - power.readOp.leakage += power_bitline.readOp.leakage + - power_bl_precharge_eq_drv.readOp.leakage + - power_sa.readOp.leakage + - power_subarray_out_drv.readOp.leakage; - //cout<<"leakage"<power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; - power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing; - power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1; - power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2; - - power.readOp.leakage += r_predec->power.readOp.leakage + - b_mux_predec->power.readOp.leakage + - sa_mux_lev_1_predec->power.readOp.leakage + - sa_mux_lev_2_predec->power.readOp.leakage + - power_row_decoders.readOp.leakage + - power_bit_mux_decoders.readOp.leakage + - power_sa_mux_lev_1_decoders.readOp.leakage + - power_sa_mux_lev_2_decoders.readOp.leakage; - //cout<<"leakage2"<power.readOp.gate_leakage * num_subarrays_per_mat; - power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP); - - //num_sa_subarray = subarray.num_cols / deg_bl_muxing; - power_subarray_out_drv.readOp.gate_leakage = - (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); - - power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + - power_bl_precharge_eq_drv.readOp.gate_leakage + - power_sa.readOp.gate_leakage + - power_subarray_out_drv.readOp.gate_leakage; - //cout<<"leakage"<power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; - power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing; - power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1; - power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2; - - power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + - b_mux_predec->power.readOp.gate_leakage + - sa_mux_lev_1_predec->power.readOp.gate_leakage + - sa_mux_lev_2_predec->power.readOp.gate_leakage + - power_row_decoders.readOp.gate_leakage + - power_bit_mux_decoders.readOp.gate_leakage + - power_sa_mux_lev_1_decoders.readOp.gate_leakage + - power_sa_mux_lev_2_decoders.readOp.gate_leakage; - } - else if (is_fa) - { - int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); - - power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); - - //cout<<"leakage3"<power.readOp.leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - - power.readOp.leakage += power_bitline.readOp.leakage + - power_bl_precharge_eq_drv.readOp.leakage + - power_bl_precharge_eq_drv.searchOp.leakage + - power_sa.readOp.leakage + - power_subarray_out_drv.readOp.leakage; - - //cout<<"leakage4"<power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; - power.readOp.leakage += r_predec->power.readOp.leakage + - power_row_decoders.readOp.leakage; - - //cout<<"leakage5"<power.readOp.leakage; - power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; - - power.readOp.leakage += power_cam_all_active.searchOp.leakage; - -// cout<<"leakage6"<power.readOp.gate_leakage * num_subarrays_per_mat; - power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; - power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); - - //cout<<"leakage3"<power.readOp.gate_leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - - power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + - power_bl_precharge_eq_drv.readOp.gate_leakage + - power_bl_precharge_eq_drv.searchOp.gate_leakage + - power_sa.readOp.gate_leakage + - power_subarray_out_drv.readOp.gate_leakage; - - //cout<<"leakage4"<power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; - power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + - power_row_decoders.readOp.gate_leakage; + power.readOp.dynamic += r_predec->power.readOp.dynamic + + b_mux_predec->power.readOp.dynamic + + sa_mux_lev_1_predec->power.readOp.dynamic + + sa_mux_lev_2_predec->power.readOp.dynamic; + + // add energy consumed in decoders + power_row_decoders.readOp.dynamic = row_dec->power.readOp.dynamic; + if (!(is_fa || pure_cam)) + power_row_decoders.readOp.dynamic *= num_subarrays_per_mat; + + // add energy consumed in bitline prechagers, SAs, and bitlines + if (!(is_fa || pure_cam)) { + // add energy consumed in bitline prechagers + power_bl_precharge_eq_drv.readOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; + power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; + + //Add sense amps energy + num_sa_subarray = subarray.num_cols / deg_bl_muxing; + power_sa.readOp.dynamic *= num_sa_subarray * num_subarrays_per_mat ; + + // add energy consumed in bitlines + //cout<<"bitline power"<power.readOp.dynamic) * num_do_b_mat; + + power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + + power_subarray_out_drv.readOp.dynamic; + + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; + } - //cout<<"leakage5"<power.readOp.dynamic + + cam_bl_precharge_eq_drv->power.readOp.dynamic; + power_bl_precharge_eq_drv.searchOp.dynamic = bl_precharge_eq_drv->power.readOp.dynamic; + + //Add sense amps energy + num_sa_subarray = (subarray.num_cols_fa_cam + + subarray.num_cols_fa_ram) / deg_bl_muxing; + num_sa_subarray_search = subarray.num_cols_fa_ram / deg_bl_muxing; + power_sa.searchOp.dynamic = power_sa.readOp.dynamic * + num_sa_subarray_search; + power_sa.readOp.dynamic *= num_sa_subarray; + + + // add energy consumed in bitlines + power_bitline.searchOp.dynamic = power_bitline.readOp.dynamic; + power_bitline.readOp.dynamic *= (subarray.num_cols_fa_cam + + subarray.num_cols_fa_ram); + power_bitline.writeOp.dynamic *= (subarray.num_cols_fa_cam + + subarray.num_cols_fa_ram); + power_bitline.searchOp.dynamic *= subarray.num_cols_fa_ram; + + //Add subarray output energy + power_subarray_out_drv.searchOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; + power_subarray_out_drv.readOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; + + + power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + + power_subarray_out_drv.readOp.dynamic; + + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; + + //add energy consumed inside cam + power_matchline.searchOp.dynamic *= num_subarrays_per_mat; + power_searchline_precharge = sl_precharge_eq_drv->power; + power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_searchline = sl_data_drv->power; + power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic * + subarray.num_cols_fa_cam * num_subarrays_per_mat;; + power_matchline_precharge = ml_precharge_drv->power; + power_matchline_precharge.searchOp.dynamic = + power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power; + power_ml_to_ram_wl_drv.searchOp.dynamic = + ml_to_ram_wl_drv->power.readOp.dynamic; + + power_cam_all_active.searchOp.dynamic = power_matchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_searchline_precharge.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_searchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_matchline_precharge.searchOp.dynamic; + + power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; + //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; + + } else { + // add energy consumed in bitline prechagers + power_bl_precharge_eq_drv.readOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; + //power_bl_precharge_eq_drv.readOp.dynamic *= num_subarrays_per_mat; + //power_bl_precharge_eq_drv.searchOp.dynamic = cam_bl_precharge_eq_drv->power.readOp.dynamic; + //power_bl_precharge_eq_drv.searchOp.dynamic *= num_subarrays_per_mat; + + //Add sense amps energy + num_sa_subarray = subarray.num_cols_fa_cam / deg_bl_muxing; + power_sa.readOp.dynamic *= num_sa_subarray;//*num_subarrays_per_mat; + power_sa.searchOp.dynamic = 0; + + power_bitline.readOp.dynamic *= subarray.num_cols_fa_cam; + power_bitline.searchOp.dynamic = 0; + power_bitline.writeOp.dynamic *= subarray.num_cols_fa_cam; + + power_subarray_out_drv.searchOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_so_b_mat; + power_subarray_out_drv.readOp.dynamic = + (power_subarray_out_drv.readOp.dynamic + subarray_out_wire->power.readOp.dynamic) * num_do_b_mat; + + power.readOp.dynamic += power_bl_precharge_eq_drv.readOp.dynamic + + power_sa.readOp.dynamic + + power_bitline.readOp.dynamic + + power_subarray_out_drv.readOp.dynamic; + + power.readOp.dynamic += power_row_decoders.readOp.dynamic + + bit_mux_dec->power.readOp.dynamic + + sa_mux_lev_1_dec->power.readOp.dynamic + + sa_mux_lev_2_dec->power.readOp.dynamic + + power_comparator.readOp.dynamic; + + + ////add energy consumed inside cam + power_matchline.searchOp.dynamic *= num_subarrays_per_mat; + power_searchline_precharge = sl_precharge_eq_drv->power; + power_searchline_precharge.searchOp.dynamic = power_searchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_searchline = sl_data_drv->power; + power_searchline.searchOp.dynamic = power_searchline.readOp.dynamic * + subarray.num_cols_fa_cam * num_subarrays_per_mat;; + power_matchline_precharge = ml_precharge_drv->power; + power_matchline_precharge.searchOp.dynamic = + power_matchline_precharge.readOp.dynamic * num_subarrays_per_mat; + power_ml_to_ram_wl_drv = ml_to_ram_wl_drv->power; + power_ml_to_ram_wl_drv.searchOp.dynamic = + ml_to_ram_wl_drv->power.readOp.dynamic; + + power_cam_all_active.searchOp.dynamic = + power_matchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_searchline_precharge.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_searchline.searchOp.dynamic; + power_cam_all_active.searchOp.dynamic += + power_matchline_precharge.searchOp.dynamic; + + power.searchOp.dynamic += power_cam_all_active.searchOp.dynamic; + //power.searchOp.dynamic += ml_to_ram_wl_drv->power.readOp.dynamic; - //inside cam - power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; + } - power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; - } - else - { - int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); - //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; - //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; - power_sa.readOp.leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); + // calculate leakage power + if (!(is_fa || pure_cam)) { + int number_output_drivers_subarray = num_sa_subarray / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); + power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat * + (RWP + ERP); + + //num_sa_subarray = subarray.num_cols / deg_bl_muxing; + power_subarray_out_drv.readOp.leakage = + (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); + + power.readOp.leakage += power_bitline.readOp.leakage + + power_bl_precharge_eq_drv.readOp.leakage + + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage; + //cout<<"leakage"<power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; + power_bit_mux_decoders.readOp.leakage = bit_mux_dec->power.readOp.leakage * deg_bl_muxing; + power_sa_mux_lev_1_decoders.readOp.leakage = sa_mux_lev_1_dec->power.readOp.leakage * dp.Ndsam_lev_1; + power_sa_mux_lev_2_decoders.readOp.leakage = sa_mux_lev_2_dec->power.readOp.leakage * dp.Ndsam_lev_2; + + power.readOp.leakage += r_predec->power.readOp.leakage + + b_mux_predec->power.readOp.leakage + + sa_mux_lev_1_predec->power.readOp.leakage + + sa_mux_lev_2_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage + + power_bit_mux_decoders.readOp.leakage + + power_sa_mux_lev_1_decoders.readOp.leakage + + power_sa_mux_lev_2_decoders.readOp.leakage; + //cout<<"leakage2"<power.readOp.gate_leakage * num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= num_sa_subarray * + num_subarrays_per_mat * (RWP + ERP); + + //num_sa_subarray = subarray.num_cols / deg_bl_muxing; + power_subarray_out_drv.readOp.gate_leakage = + (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP); + + power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + + power_bl_precharge_eq_drv.readOp.gate_leakage + + power_sa.readOp.gate_leakage + + power_subarray_out_drv.readOp.gate_leakage; + //cout<<"leakage"<power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; + power_bit_mux_decoders.readOp.gate_leakage = bit_mux_dec->power.readOp.gate_leakage * deg_bl_muxing; + power_sa_mux_lev_1_decoders.readOp.gate_leakage = sa_mux_lev_1_dec->power.readOp.gate_leakage * dp.Ndsam_lev_1; + power_sa_mux_lev_2_decoders.readOp.gate_leakage = sa_mux_lev_2_dec->power.readOp.gate_leakage * dp.Ndsam_lev_2; + + power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + + b_mux_predec->power.readOp.gate_leakage + + sa_mux_lev_1_predec->power.readOp.gate_leakage + + sa_mux_lev_2_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage + + power_bit_mux_decoders.readOp.gate_leakage + + power_sa_mux_lev_1_decoders.readOp.gate_leakage + + power_sa_mux_lev_2_decoders.readOp.gate_leakage; + } else if (is_fa) { + int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); - power_subarray_out_drv.readOp.leakage = - (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat * + (RWP + ERP + SCHP); - power.readOp.leakage += //power_bitline.readOp.leakage + - //power_bl_precharge_eq_drv.readOp.leakage + - power_bl_precharge_eq_drv.searchOp.leakage + - power_sa.readOp.leakage + - power_subarray_out_drv.readOp.leakage; + //cout<<"leakage3"<power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP); - power.readOp.leakage += r_predec->power.readOp.leakage + - power_row_decoders.readOp.leakage; - //inside cam - power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; - power_cam_all_active.searchOp.leakage +=sl_precharge_eq_drv->power.readOp.leakage; - power_cam_all_active.searchOp.leakage +=sl_data_drv->power.readOp.leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; + power_subarray_out_drv.readOp.leakage = + (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); - power.readOp.leakage += power_cam_all_active.searchOp.leakage; + power.readOp.leakage += power_bitline.readOp.leakage + + power_bl_precharge_eq_drv.readOp.leakage + + power_bl_precharge_eq_drv.searchOp.leakage + + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage; - //+++Below is gate leakage - power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; - power_sa.readOp.gate_leakage *= num_sa_subarray*num_subarrays_per_mat*(RWP + ERP + SCHP); + //cout<<"leakage4"<power.readOp.leakage * subarray.num_rows * num_subarrays_per_mat; + power.readOp.leakage += r_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage; - power_subarray_out_drv.readOp.gate_leakage = - (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * - number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + //cout<<"leakage5"<power.readOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.leakage *= + num_subarrays_per_mat; - // gate_leakage power - power_row_decoders.readOp.gate_leakage = row_dec->power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat*(RWP + ERP + EWP); - power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + - power_row_decoders.readOp.gate_leakage; + power.readOp.leakage += power_cam_all_active.searchOp.leakage; - //inside cam - power_cam_all_active.searchOp.gate_leakage = power_matchline.searchOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_precharge_eq_drv->power.readOp.gate_leakage; - power_cam_all_active.searchOp.gate_leakage +=sl_data_drv->power.readOp.gate_leakage*subarray.num_cols_fa_cam; - power_cam_all_active.searchOp.gate_leakage +=ml_precharge_drv->power.readOp.dynamic; - power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; +// cout<<"leakage6"<power.readOp.gate_leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= num_sa_subarray * + num_subarrays_per_mat * (RWP + ERP + SCHP); + + //cout<<"leakage3"<power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + + power.readOp.gate_leakage += power_bitline.readOp.gate_leakage + + power_bl_precharge_eq_drv.readOp.gate_leakage + + power_bl_precharge_eq_drv.searchOp.gate_leakage + + power_sa.readOp.gate_leakage + + power_subarray_out_drv.readOp.gate_leakage; + + //cout<<"leakage4"<power.readOp.gate_leakage * subarray.num_rows * num_subarrays_per_mat; + power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage; + + //cout<<"leakage5"<power.readOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage += + sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.gate_leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.gate_leakage *= num_subarrays_per_mat; + + power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; + + } else { + int number_output_drivers_subarray = num_sa_subarray;// / (dp.Ndsam_lev_1 * dp.Ndsam_lev_2); + + //power_bitline.readOp.leakage *= subarray.num_rows * subarray.num_cols * num_subarrays_per_mat; + //power_bl_precharge_eq_drv.readOp.leakage = bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_bl_precharge_eq_drv.searchOp.leakage = cam_bl_precharge_eq_drv->power.readOp.leakage * num_subarrays_per_mat; + power_sa.readOp.leakage *= num_sa_subarray * num_subarrays_per_mat * + (RWP + ERP + SCHP); + + + power_subarray_out_drv.readOp.leakage = + (power_subarray_out_drv.readOp.leakage + subarray_out_wire->power.readOp.leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + + power.readOp.leakage += //power_bitline.readOp.leakage + + //power_bl_precharge_eq_drv.readOp.leakage + + power_bl_precharge_eq_drv.searchOp.leakage + + power_sa.readOp.leakage + + power_subarray_out_drv.readOp.leakage; + + // leakage power + power_row_decoders.readOp.leakage = row_dec->power.readOp.leakage * + subarray.num_rows * num_subarrays_per_mat * (RWP + ERP + EWP); + power.readOp.leakage += r_predec->power.readOp.leakage + + power_row_decoders.readOp.leakage; + + //inside cam + power_cam_all_active.searchOp.leakage = power_matchline.searchOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_precharge_eq_drv->power.readOp.leakage; + power_cam_all_active.searchOp.leakage += + sl_data_drv->power.readOp.leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.leakage *= num_subarrays_per_mat; + + power.readOp.leakage += power_cam_all_active.searchOp.leakage; + + //+++Below is gate leakage + power_bl_precharge_eq_drv.searchOp.gate_leakage = cam_bl_precharge_eq_drv->power.readOp.gate_leakage * num_subarrays_per_mat; + power_sa.readOp.gate_leakage *= num_sa_subarray * + num_subarrays_per_mat * (RWP + ERP + SCHP); + + + power_subarray_out_drv.readOp.gate_leakage = + (power_subarray_out_drv.readOp.gate_leakage + subarray_out_wire->power.readOp.gate_leakage) * + number_output_drivers_subarray * num_subarrays_per_mat * (RWP + ERP + SCHP); + + power.readOp.gate_leakage += //power_bitline.readOp.gate_leakage + + //power_bl_precharge_eq_drv.readOp.gate_leakage + + power_bl_precharge_eq_drv.searchOp.gate_leakage + + power_sa.readOp.gate_leakage + + power_subarray_out_drv.readOp.gate_leakage; + + // gate_leakage power + power_row_decoders.readOp.gate_leakage = + row_dec->power.readOp.gate_leakage * subarray.num_rows * + num_subarrays_per_mat * (RWP + ERP + EWP); + power.readOp.gate_leakage += r_predec->power.readOp.gate_leakage + + power_row_decoders.readOp.gate_leakage; + + //inside cam + power_cam_all_active.searchOp.gate_leakage = + power_matchline.searchOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage += + sl_precharge_eq_drv->power.readOp.gate_leakage; + power_cam_all_active.searchOp.gate_leakage += + sl_data_drv->power.readOp.gate_leakage * subarray.num_cols_fa_cam; + power_cam_all_active.searchOp.gate_leakage += + ml_precharge_drv->power.readOp.dynamic; + power_cam_all_active.searchOp.gate_leakage *= + num_subarrays_per_mat; + + power.readOp.gate_leakage += power_cam_all_active.searchOp.gate_leakage; + } } diff --git a/ext/mcpat/cacti/mat.h b/ext/mcpat/cacti/mat.h index 8d038be8b..38200107c 100755 --- a/ext/mcpat/cacti/mat.h +++ b/ext/mcpat/cacti/mat.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -39,9 +40,8 @@ #include "subarray.h" #include "wire.h" -class Mat : public Component -{ - public: +class Mat : public Component { +public: Mat(const DynamicParameter & dyn_p); ~Mat(); double compute_delays(double inrisetime); // return outrisetime @@ -106,8 +106,8 @@ class Mat : public Component int deg_bl_muxing; int num_act_mats_hor_dir; double delay_writeback; - Area cell,cam_cell; - bool is_dram,is_fa, pure_cam, camFlag; + Area cell, cam_cell; + bool is_dram, is_fa, pure_cam, camFlag; int num_mats; powerDef power_sa; double delay_sa; @@ -127,7 +127,7 @@ class Mat : public Component uint32_t num_subarrays_per_row; // the number of subarrays in a row of a mat - private: +private: double compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(); double width_write_driver_or_write_mux(); double compute_comparators_height(int tagbits, int number_ways_in_mat, double subarray_mem_cell_area_w); diff --git a/ext/mcpat/cacti/nuca.cc b/ext/mcpat/cacti/nuca.cc index 2aabe843f..e0b4dcdaf 100644 --- a/ext/mcpat/cacti/nuca.cc +++ b/ext/mcpat/cacti/nuca.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -36,89 +37,86 @@ #include "Ucache.h" #include "nuca.h" -unsigned int MIN_BANKSIZE=65536; +unsigned int MIN_BANKSIZE = 65536; #define FIXED_OVERHEAD 55e-12 /* clock skew and jitter in s. Ref: Hrishikesh et al ISCA 01 */ #define LATCH_DELAY 28e-12 /* latch delay in s (later should use FO4 TODO) */ #define CONTR_2_BANK_LAT 0 int cont_stats[2 /*l2 or l3*/][5/* cores */][ROUTER_TYPES][7 /*banks*/][8 /* cycle time */]; - Nuca::Nuca( - TechnologyParameter::DeviceType *dt = &(g_tp.peri_global) - ):deviceType(dt) -{ - init_cont(); +Nuca::Nuca( + TechnologyParameter::DeviceType *dt = &(g_tp.peri_global) +): deviceType(dt) { + init_cont(); } void -Nuca::init_cont() -{ - FILE *cont; - char line[5000]; - char jk[5000]; - cont = fopen("contention.dat", "r"); - if (!cont) { - cout << "contention.dat file is missing!\n"; - exit(0); - } - - for(int i=0; i<2; i++) { - for(int j=2; j<5; j++) { - for(int k=0; k nuca_list; - Router *router_s[ROUTER_TYPES]; - router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global)); - router_s[0]->print_router(); - router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global)); - router_s[1]->print_router(); - router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global)); - router_s[2]->print_router(); - - int core_in; // to store no. of cores - - /* to search diff grid organizations */ - double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat, - curr_acclat; - double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power, - avg_leakage_power; - - double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF; - int opt_rows = 0; - int opt_columns = 0; - double opt_totno_hops = 0; - double opt_avg_hop = 0; - double opt_dyn_power = 0, opt_leakage_power = 0; - min_values_t minval; - - int bank_start = 0; - - int flit_width = 0; - - /* vertical and horizontal hop latency values */ - int ver_hop_lat, hor_hop_lat; /* in cycles */ - - - /* no. of different bank sizes to consider */ - int iterations; - - - g_ip->nuca_cache_sz = g_ip->cache_sz; - nuca_list.push_back(new nuca_org_t()); - - if (g_ip->cache_level == 0) l2_c = 1; - else l2_c = 0; - - if (g_ip->cores <= 4) core_in = 2; - else if (g_ip->cores <= 8) core_in = 3; - else if (g_ip->cores <= 16) core_in = 4; - else {cout << "Number of cores should be <= 16!\n"; exit(0);} - - - // set the lower bound to an appropriate value. this depends on cache associativity - if (g_ip->assoc > 2) { - i = 2; - while (i != g_ip->assoc) { - MIN_BANKSIZE *= 2; - i *= 2; - } - } - - iterations = (int)logtwo((int)g_ip->cache_sz/MIN_BANKSIZE); - - if (g_ip->force_wiretype) - { - if (g_ip->wt == Low_swing) { - wt_min = Low_swing; - wt_max = Low_swing; - } +void +Nuca::sim_nuca() { + /* temp variables */ + int it, ro, wr; + int num_cyc; + unsigned int i, j, k; + unsigned int r, c; + int l2_c; + int bank_count = 0; + uca_org_t ures; + nuca_org_t *opt_n; + mem_array tag, data; + list nuca_list; + Router *router_s[ROUTER_TYPES]; + router_s[0] = new Router(64.0, 8, 4, &(g_tp.peri_global)); + router_s[0]->print_router(); + router_s[1] = new Router(128.0, 8, 4, &(g_tp.peri_global)); + router_s[1]->print_router(); + router_s[2] = new Router(256.0, 8, 4, &(g_tp.peri_global)); + router_s[2]->print_router(); + + int core_in; // to store no. of cores + + /* to search diff grid organizations */ + double curr_hop, totno_hops, totno_hhops, totno_vhops, tot_lat, + curr_acclat; + double avg_lat, avg_hop, avg_hhop, avg_vhop, avg_dyn_power, + avg_leakage_power; + + double opt_acclat = INF, opt_avg_lat = INF, opt_tot_lat = INF; + int opt_rows = 0; + int opt_columns = 0; + double opt_totno_hops = 0; + double opt_avg_hop = 0; + double opt_dyn_power = 0, opt_leakage_power = 0; + min_values_t minval; + + int bank_start = 0; + + int flit_width = 0; + + /* vertical and horizontal hop latency values */ + int ver_hop_lat, hor_hop_lat; /* in cycles */ + + + /* no. of different bank sizes to consider */ + int iterations; + + + g_ip->nuca_cache_sz = g_ip->cache_sz; + nuca_list.push_back(new nuca_org_t()); + + if (g_ip->cache_level == 0) l2_c = 1; + else l2_c = 0; + + if (g_ip->cores <= 4) core_in = 2; + else if (g_ip->cores <= 8) core_in = 3; + else if (g_ip->cores <= 16) core_in = 4; else { - wt_min = Global; - wt_max = Low_swing-1; + cout << "Number of cores should be <= 16!\n"; + exit(0); } - } - else { - wt_min = Global; - wt_max = Low_swing; - } - if (g_ip->nuca_bank_count != 0) { // simulate just one bank - if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 && - g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 && - g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) { - fprintf(stderr,"Incorrect bank count value! Please fix the value in cache.cfg\n"); - } - bank_start = (int)logtwo((double)g_ip->nuca_bank_count); - iterations = bank_start+1; - g_ip->cache_sz = g_ip->cache_sz/g_ip->nuca_bank_count; - } - cout << "Simulating various NUCA configurations\n"; - for (it=bank_start; itnuca_cache_sz/g_ip->cache_sz; - cout << "====" << g_ip->cache_sz << "\n"; - - for (wr=wt_min; wr<=wt_max; wr++) { - - for (ro=0; roflit_size; //initialize router - nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time; - - /* calculate router and wire parameters */ - - double vlength = ures.cache_ht; /* length of the wire (u)*/ - double hlength = ures.cache_len; // u - /* find delay, area, and power for wires */ - wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength); - wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength); + // set the lower bound to an appropriate value. this depends on cache associativity + if (g_ip->assoc > 2) { + i = 2; + while (i != g_ip->assoc) { + MIN_BANKSIZE *= 2; + i *= 2; + } + } - hor_hop_lat = calc_cycles(wire_horizontal[wr]->delay, - 1/(nuca_list.back()->nuca_pda.cycle_time*.001)); - ver_hop_lat = calc_cycles(wire_vertical[wr]->delay, - 1/(nuca_list.back()->nuca_pda.cycle_time*.001)); + iterations = (int)logtwo((int)g_ip->cache_sz / MIN_BANKSIZE); + if (g_ip->force_wiretype) { + if (g_ip->wt == Low_swing) { + wt_min = Low_swing; + wt_max = Low_swing; + } else { + wt_min = Global; + wt_max = Low_swing - 1; + } + } else { + wt_min = Global; + wt_max = Low_swing; + } + if (g_ip->nuca_bank_count != 0) { // simulate just one bank + if (g_ip->nuca_bank_count != 2 && g_ip->nuca_bank_count != 4 && + g_ip->nuca_bank_count != 8 && g_ip->nuca_bank_count != 16 && + g_ip->nuca_bank_count != 32 && g_ip->nuca_bank_count != 64) { + fprintf(stderr, "Incorrect bank count value! Please fix the ", + "value in cache.cfg\n"); + } + bank_start = (int)logtwo((double)g_ip->nuca_bank_count); + iterations = bank_start + 1; + g_ip->cache_sz = g_ip->cache_sz / g_ip->nuca_bank_count; + } + cout << "Simulating various NUCA configurations\n"; + for (it = bank_start; it < iterations; it++) { + /* different bank count values */ + ures.tag_array2 = &tag; + ures.data_array2 = &data; /* - * assume a grid like topology and explore for optimal network - * configuration using different row and column count values. + * find the optimal bank organization */ - for (c=1; c<=(unsigned int)bank_count; c++) { - while (bank_count%c != 0) c++; - r = bank_count/c; - - /* - * to find the avg access latency of a NUCA cache, uncontended - * access time to each bank from the - * cache controller is calculated. - * avg latency = - * sum of the access latencies to individual banks)/bank - * count value. - */ - totno_hops = totno_hhops = totno_vhops = tot_lat = 0; - k = 1; - for (i=0; inuca_cache_sz / g_ip->cache_sz; + cout << "====" << g_ip->cache_sz << "\n"; + + for (wr = wt_min; wr <= wt_max; wr++) { + + for (ro = 0; ro < ROUTER_TYPES; ro++) { + flit_width = (int) router_s[ro]->flit_size; //initialize router + nuca_list.back()->nuca_pda.cycle_time = router_s[ro]->cycle_time; + + /* calculate router and wire parameters */ + + double vlength = ures.cache_ht; /* length of the wire (u)*/ + double hlength = ures.cache_len; // u + + /* find delay, area, and power for wires */ + wire_vertical[wr] = new Wire((enum Wire_type) wr, vlength); + wire_horizontal[wr] = new Wire((enum Wire_type) wr, hlength); + + + hor_hop_lat = + calc_cycles(wire_horizontal[wr]->delay, + 1 /(nuca_list.back()->nuca_pda.cycle_time * + .001)); + ver_hop_lat = + calc_cycles(wire_vertical[wr]->delay, + 1 / (nuca_list.back()->nuca_pda.cycle_time * + .001)); + + /* + * assume a grid like topology and explore for optimal network + * configuration using different row and column count values. + */ + for (c = 1; c <= (unsigned int)bank_count; c++) { + while (bank_count % c != 0) c++; + r = bank_count / c; + + /* + * to find the avg access latency of a NUCA cache, uncontended + * access time to each bank from the + * cache controller is calculated. + * avg latency = + * sum of the access latencies to individual banks)/bank + * count value. + */ + totno_hops = totno_hhops = totno_vhops = tot_lat = 0; + k = 1; + for (i = 0; i < r; i++) { + for (j = 0; j < c; j++) { + /* + * vertical hops including the + * first hop from the cache controller + */ + curr_hop = i + 1; + curr_hop += j; /* horizontal hops */ + totno_hhops += j; + totno_vhops += (i + 1); + curr_acclat = (i * ver_hop_lat + CONTR_2_BANK_LAT + + j * hor_hop_lat); + + tot_lat += curr_acclat; + totno_hops += curr_hop; + } + } + avg_lat = tot_lat / bank_count; + avg_hop = totno_hops / bank_count; + avg_hhop = totno_hhops / bank_count; + avg_vhop = totno_vhops / bank_count; + + /* net access latency */ + curr_acclat = 2 * avg_lat + 2 * (router_s[ro]->delay * + avg_hop) + + calc_cycles(ures.access_time, + 1 / + (nuca_list.back()->nuca_pda.cycle_time * + .001)); + + /* avg access lat of nuca */ + avg_dyn_power = + avg_hop * + (router_s[ro]->power.readOp.dynamic) + avg_hhop * + (wire_horizontal[wr]->power.readOp.dynamic) * + (g_ip->block_sz * 8 + 64) + avg_vhop * + (wire_vertical[wr]->power.readOp.dynamic) * + (g_ip->block_sz * 8 + 64) + ures.power.readOp.dynamic; + + avg_leakage_power = + bank_count * router_s[ro]->power.readOp.leakage + + avg_hhop * (wire_horizontal[wr]->power.readOp.leakage * + wire_horizontal[wr]->delay) * flit_width + + avg_vhop * (wire_vertical[wr]->power.readOp.leakage * + wire_horizontal[wr]->delay); + + if (curr_acclat < opt_acclat) { + opt_acclat = curr_acclat; + opt_tot_lat = tot_lat; + opt_avg_lat = avg_lat; + opt_totno_hops = totno_hops; + opt_avg_hop = avg_hop; + opt_rows = r; + opt_columns = c; + opt_dyn_power = avg_dyn_power; + opt_leakage_power = avg_leakage_power; + } + totno_hops = 0; + tot_lat = 0; + totno_hhops = 0; + totno_vhops = 0; + } + nuca_list.back()->wire_pda.power.readOp.dynamic = + opt_avg_hop * flit_width * + (wire_horizontal[wr]->power.readOp.dynamic + + wire_vertical[wr]->power.readOp.dynamic); + nuca_list.back()->avg_hops = opt_avg_hop; + /* network delay/power */ + nuca_list.back()->h_wire = wire_horizontal[wr]; + nuca_list.back()->v_wire = wire_vertical[wr]; + nuca_list.back()->router = router_s[ro]; + /* bank delay/power */ + + nuca_list.back()->bank_pda.delay = ures.access_time; + nuca_list.back()->bank_pda.power = ures.power; + nuca_list.back()->bank_pda.area.h = ures.cache_ht; + nuca_list.back()->bank_pda.area.w = ures.cache_len; + nuca_list.back()->bank_pda.cycle_time = ures.cycle_time; + + num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/, + 1 / + (nuca_list.back()->nuca_pda.cycle_time * + .001/*GHz*/)); + if (num_cyc % 2 != 0) num_cyc++; + if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles + + if (it < 7) { + nuca_list.back()->nuca_pda.delay = opt_acclat + + cont_stats[l2_c][core_in][ro][it][num_cyc/2-1]; + nuca_list.back()->contention = + cont_stats[l2_c][core_in][ro][it][num_cyc/2-1]; + } else { + nuca_list.back()->nuca_pda.delay = opt_acclat + + cont_stats[l2_c][core_in][ro][7][num_cyc/2-1]; + nuca_list.back()->contention = + cont_stats[l2_c][core_in][ro][7][num_cyc/2-1]; + } + nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power; + nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power; + + /* array organization */ + nuca_list.back()->bank_count = bank_count; + nuca_list.back()->rows = opt_rows; + nuca_list.back()->columns = opt_columns; + calculate_nuca_area (nuca_list.back()); + + minval.update_min_values(nuca_list.back()); + nuca_list.push_back(new nuca_org_t()); + opt_acclat = BIGNUM; + } - } - avg_lat = tot_lat/bank_count; - avg_hop = totno_hops/bank_count; - avg_hhop = totno_hhops/bank_count; - avg_vhop = totno_vhops/bank_count; - - /* net access latency */ - curr_acclat = 2*avg_lat + 2*(router_s[ro]->delay*avg_hop) + - calc_cycles(ures.access_time, - 1/(nuca_list.back()->nuca_pda.cycle_time*.001)); - - /* avg access lat of nuca */ - avg_dyn_power = - avg_hop * - (router_s[ro]->power.readOp.dynamic) + avg_hhop * - (wire_horizontal[wr]->power.readOp.dynamic) * - (g_ip->block_sz*8 + 64) + avg_vhop * - (wire_vertical[wr]->power.readOp.dynamic) * - (g_ip->block_sz*8 + 64) + ures.power.readOp.dynamic; - - avg_leakage_power = - bank_count * router_s[ro]->power.readOp.leakage + - avg_hhop * (wire_horizontal[wr]->power.readOp.leakage* - wire_horizontal[wr]->delay) * flit_width + - avg_vhop * (wire_vertical[wr]->power.readOp.leakage * - wire_horizontal[wr]->delay); - - if (curr_acclat < opt_acclat) { - opt_acclat = curr_acclat; - opt_tot_lat = tot_lat; - opt_avg_lat = avg_lat; - opt_totno_hops = totno_hops; - opt_avg_hop = avg_hop; - opt_rows = r; - opt_columns = c; - opt_dyn_power = avg_dyn_power; - opt_leakage_power = avg_leakage_power; - } - totno_hops = 0; - tot_lat = 0; - totno_hhops = 0; - totno_vhops = 0; } - nuca_list.back()->wire_pda.power.readOp.dynamic = - opt_avg_hop * flit_width * - (wire_horizontal[wr]->power.readOp.dynamic + - wire_vertical[wr]->power.readOp.dynamic); - nuca_list.back()->avg_hops = opt_avg_hop; - /* network delay/power */ - nuca_list.back()->h_wire = wire_horizontal[wr]; - nuca_list.back()->v_wire = wire_vertical[wr]; - nuca_list.back()->router = router_s[ro]; - /* bank delay/power */ - - nuca_list.back()->bank_pda.delay = ures.access_time; - nuca_list.back()->bank_pda.power = ures.power; - nuca_list.back()->bank_pda.area.h = ures.cache_ht; - nuca_list.back()->bank_pda.area.w = ures.cache_len; - nuca_list.back()->bank_pda.cycle_time = ures.cycle_time; - - num_cyc = calc_cycles(nuca_list.back()->bank_pda.delay /*s*/, - 1/(nuca_list.back()->nuca_pda.cycle_time*.001/*GHz*/)); - if(num_cyc%2 != 0) num_cyc++; - if (num_cyc > 16) num_cyc = 16; // we have data only up to 16 cycles - - if (it < 7) { - nuca_list.back()->nuca_pda.delay = opt_acclat + - cont_stats[l2_c][core_in][ro][it][num_cyc/2-1]; - nuca_list.back()->contention = - cont_stats[l2_c][core_in][ro][it][num_cyc/2-1]; - } - else { - nuca_list.back()->nuca_pda.delay = opt_acclat + - cont_stats[l2_c][core_in][ro][7][num_cyc/2-1]; - nuca_list.back()->contention = - cont_stats[l2_c][core_in][ro][7][num_cyc/2-1]; - } - nuca_list.back()->nuca_pda.power.readOp.dynamic = opt_dyn_power; - nuca_list.back()->nuca_pda.power.readOp.leakage = opt_leakage_power; + g_ip->cache_sz /= 2; + } - /* array organization */ - nuca_list.back()->bank_count = bank_count; - nuca_list.back()->rows = opt_rows; - nuca_list.back()->columns = opt_columns; - calculate_nuca_area (nuca_list.back()); + delete(nuca_list.back()); + nuca_list.pop_back(); + opt_n = find_optimal_nuca(&nuca_list, &minval); + print_nuca(opt_n); + g_ip->cache_sz = g_ip->nuca_cache_sz / opt_n->bank_count; - minval.update_min_values(nuca_list.back()); - nuca_list.push_back(new nuca_org_t()); - opt_acclat = BIGNUM; + list::iterator niter; + for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) { + delete *niter; + } + nuca_list.clear(); - } + for (int i = 0; i < ROUTER_TYPES; i++) { + delete router_s[i]; } - g_ip->cache_sz /= 2; - } - - delete(nuca_list.back()); - nuca_list.pop_back(); - opt_n = find_optimal_nuca(&nuca_list, &minval); - print_nuca(opt_n); - g_ip->cache_sz = g_ip->nuca_cache_sz/opt_n->bank_count; - - list::iterator niter; - for (niter = nuca_list.begin(); niter != nuca_list.end(); ++niter) - { - delete *niter; - } - nuca_list.clear(); - - for(int i=0; i < ROUTER_TYPES; i++) - { - delete router_s[i]; - } - g_ip->display_ip(); - // g_ip->force_cache_config = true; - // g_ip->ndwl = 8; - // g_ip->ndbl = 16; - // g_ip->nspd = 4; - // g_ip->ndcm = 1; - // g_ip->ndsam1 = 8; - // g_ip->ndsam2 = 32; + g_ip->display_ip(); + // g_ip->force_cache_config = true; + // g_ip->ndwl = 8; + // g_ip->ndbl = 16; + // g_ip->nspd = 4; + // g_ip->ndcm = 1; + // g_ip->ndsam1 = 8; + // g_ip->ndsam2 = 32; } - void -Nuca::print_nuca (nuca_org_t *fr) -{ - printf("\n---------- CACTI version 6.5, Non-uniform Cache Access " - "----------\n\n"); - printf("Optimal number of banks - %d\n", fr->bank_count); - printf("Grid organization rows x columns - %d x %d\n", - fr->rows, fr->columns); - printf("Network frequency - %g GHz\n", - (1/fr->nuca_pda.cycle_time)*1e3); - printf("Cache dimension (mm x mm) - %g x %g\n", - fr->nuca_pda.area.h, - fr->nuca_pda.area.w); - - fr->router->print_router(); - - printf("\n\nWire stats:\n"); - if (fr->h_wire->wt == Global) { - printf("\tWire type - Full swing global wires with least " - "possible delay\n"); - } - else if (fr->h_wire->wt == Global_5) { - printf("\tWire type - Full swing global wires with " - "5%% delay penalty\n"); - } - else if (fr->h_wire->wt == Global_10) { - printf("\tWire type - Full swing global wires with " - "10%% delay penalty\n"); - } - else if (fr->h_wire->wt == Global_20) { - printf("\tWire type - Full swing global wires with " - "20%% delay penalty\n"); - } - else if (fr->h_wire->wt == Global_30) { - printf("\tWire type - Full swing global wires with " - "30%% delay penalty\n"); - } - else if(fr->h_wire->wt == Low_swing) { - printf("\tWire type - Low swing wires\n"); - } - - printf("\tHorizontal link delay - %g (ns)\n", - fr->h_wire->delay*1e9); - printf("\tVertical link delay - %g (ns)\n", - fr->v_wire->delay*1e9); - printf("\tDelay/length - %g (ns/mm)\n", - fr->h_wire->delay*1e9/fr->bank_pda.area.w); - printf("\tHorizontal link energy -dynamic/access %g (nJ)\n" - "\t -leakage %g (nW)\n\n", - fr->h_wire->power.readOp.dynamic*1e9, - fr->h_wire->power.readOp.leakage*1e9); - printf("\tVertical link energy -dynamic/access %g (nJ)\n" - "\t -leakage %g (nW)\n\n", - fr->v_wire->power.readOp.dynamic*1e9, - fr->v_wire->power.readOp.leakage*1e9); - printf("\n\n"); - fr->v_wire->print_wire(); - printf("\n\nBank stats:\n"); +void +Nuca::print_nuca (nuca_org_t *fr) { + printf("\n---------- CACTI version 6.5, Non-uniform Cache Access " + "----------\n\n"); + printf("Optimal number of banks - %d\n", fr->bank_count); + printf("Grid organization rows x columns - %d x %d\n", + fr->rows, fr->columns); + printf("Network frequency - %g GHz\n", + (1 / fr->nuca_pda.cycle_time)*1e3); + printf("Cache dimension (mm x mm) - %g x %g\n", + fr->nuca_pda.area.h, + fr->nuca_pda.area.w); + + fr->router->print_router(); + + printf("\n\nWire stats:\n"); + if (fr->h_wire->wt == Global) { + printf("\tWire type - Full swing global wires with least " + "possible delay\n"); + } else if (fr->h_wire->wt == Global_5) { + printf("\tWire type - Full swing global wires with " + "5%% delay penalty\n"); + } else if (fr->h_wire->wt == Global_10) { + printf("\tWire type - Full swing global wires with " + "10%% delay penalty\n"); + } else if (fr->h_wire->wt == Global_20) { + printf("\tWire type - Full swing global wires with " + "20%% delay penalty\n"); + } else if (fr->h_wire->wt == Global_30) { + printf("\tWire type - Full swing global wires with " + "30%% delay penalty\n"); + } else if (fr->h_wire->wt == Low_swing) { + printf("\tWire type - Low swing wires\n"); + } + + printf("\tHorizontal link delay - %g (ns)\n", + fr->h_wire->delay*1e9); + printf("\tVertical link delay - %g (ns)\n", + fr->v_wire->delay*1e9); + printf("\tDelay/length - %g (ns/mm)\n", + fr->h_wire->delay*1e9 / fr->bank_pda.area.w); + printf("\tHorizontal link energy -dynamic/access %g (nJ)\n" + "\t -leakage %g (nW)\n\n", + fr->h_wire->power.readOp.dynamic*1e9, + fr->h_wire->power.readOp.leakage*1e9); + printf("\tVertical link energy -dynamic/access %g (nJ)\n" + "\t -leakage %g (nW)\n\n", + fr->v_wire->power.readOp.dynamic*1e9, + fr->v_wire->power.readOp.leakage*1e9); + printf("\n\n"); + fr->v_wire->print_wire(); + printf("\n\nBank stats:\n"); } - nuca_org_t * -Nuca::find_optimal_nuca (list *n, min_values_t *minval) -{ - double cost = 0; - double min_cost = BIGNUM; - nuca_org_t *res = NULL; - float d, a, dp, lp, c; - int v; - dp = g_ip->dynamic_power_wt_nuca; - lp = g_ip->leakage_power_wt_nuca; - a = g_ip->area_wt_nuca; - d = g_ip->delay_wt_nuca; - c = g_ip->cycle_time_wt_nuca; - - list::iterator niter; - - - for (niter = n->begin(); niter != n->end(); niter++) { - fprintf(stderr, "\n-----------------------------" - "---------------\n"); - - - printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t " - "bank_dpower = %g \tleak = %g \tcycle = %g\n", - (*niter)->bank_count, - (*niter)->nuca_pda.delay, - (*niter)->nuca_pda.power.readOp.dynamic, - (*niter)->h_wire->wt, - (*niter)->bank_pda.power.readOp.dynamic, - (*niter)->nuca_pda.power.readOp.leakage, - (*niter)->nuca_pda.cycle_time); - - - if (g_ip->ed == 1) { - cost = ((*niter)->nuca_pda.delay/minval->min_delay)* - ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn); - if (min_cost > cost) { - min_cost = cost; - res = ((*niter)); - } - } - else if (g_ip->ed == 2) { - cost = ((*niter)->nuca_pda.delay/minval->min_delay)* - ((*niter)->nuca_pda.delay/minval->min_delay)* - ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn); - if (min_cost > cost) { - min_cost = cost; - res = ((*niter)); - } - } - else { - /* - * check whether the current organization - * meets the input deviation constraints - */ - v = check_nuca_org((*niter), minval); - if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling - - if (v) { - cost = (d * ((*niter)->nuca_pda.delay/minval->min_delay) + - c * ((*niter)->nuca_pda.cycle_time/minval->min_cyc) + - dp * ((*niter)->nuca_pda.power.readOp.dynamic/minval->min_dyn) + - lp * ((*niter)->nuca_pda.power.readOp.leakage/minval->min_leakage) + - a * ((*niter)->nuca_pda.area.get_area()/minval->min_area)); - fprintf(stderr, "cost = %g\n", cost); - - if (min_cost > cost) { - min_cost = cost; - res = ((*niter)); +nuca_org_t * +Nuca::find_optimal_nuca (list *n, min_values_t *minval) { + double cost = 0; + double min_cost = BIGNUM; + nuca_org_t *res = NULL; + float d, a, dp, lp, c; + int v; + dp = g_ip->dynamic_power_wt_nuca; + lp = g_ip->leakage_power_wt_nuca; + a = g_ip->area_wt_nuca; + d = g_ip->delay_wt_nuca; + c = g_ip->cycle_time_wt_nuca; + + list::iterator niter; + + + for (niter = n->begin(); niter != n->end(); niter++) { + fprintf(stderr, "\n-----------------------------" + "---------------\n"); + + + printf("NUCA___stats %d \tbankcount: lat = %g \tdynP = %g \twt = %d\t " + "bank_dpower = %g \tleak = %g \tcycle = %g\n", + (*niter)->bank_count, + (*niter)->nuca_pda.delay, + (*niter)->nuca_pda.power.readOp.dynamic, + (*niter)->h_wire->wt, + (*niter)->bank_pda.power.readOp.dynamic, + (*niter)->nuca_pda.power.readOp.leakage, + (*niter)->nuca_pda.cycle_time); + + + if (g_ip->ed == 1) { + cost = ((*niter)->nuca_pda.delay / minval->min_delay) * + ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn); + if (min_cost > cost) { + min_cost = cost; + res = ((*niter)); + } + } else if (g_ip->ed == 2) { + cost = ((*niter)->nuca_pda.delay / minval->min_delay) * + ((*niter)->nuca_pda.delay / minval->min_delay) * + ((*niter)->nuca_pda.power.readOp.dynamic / minval->min_dyn); + if (min_cost > cost) { + min_cost = cost; + res = ((*niter)); + } + } else { + /* + * check whether the current organization + * meets the input deviation constraints + */ + v = check_nuca_org((*niter), minval); + if (minval->min_leakage == 0) minval->min_leakage = 0.1; //FIXME remove this after leakage modeling + + if (v) { + cost = (d * ((*niter)->nuca_pda.delay / minval->min_delay) + + c * ((*niter)->nuca_pda.cycle_time / minval->min_cyc) + + dp * ((*niter)->nuca_pda.power.readOp.dynamic / + minval->min_dyn) + + lp * ((*niter)->nuca_pda.power.readOp.leakage / + minval->min_leakage) + + a * ((*niter)->nuca_pda.area.get_area() / + minval->min_area)); + fprintf(stderr, "cost = %g\n", cost); + + if (min_cost > cost) { + min_cost = cost; + res = ((*niter)); + } + } else { + niter = n->erase(niter); + if (niter != n->begin()) + niter --; + } } - } - else { - niter = n->erase(niter); - if (niter !=n->begin()) - niter --; - } } - } - return res; + return res; } - int -Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) -{ - if (((n->nuca_pda.delay - minval->min_delay)*100/minval->min_delay) > g_ip->delay_dev_nuca) { - return 0; - } - if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn)/minval->min_dyn)*100 > - g_ip->dynamic_power_dev_nuca) { - return 0; - } - if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage)/minval->min_leakage)*100 > - g_ip->leakage_power_dev_nuca) { - return 0; - } - if (((n->nuca_pda.cycle_time - minval->min_cyc)/minval->min_cyc)*100 > - g_ip->cycle_time_dev_nuca) { - return 0; - } - if (((n->nuca_pda.area.get_area() - minval->min_area)/minval->min_area)*100 > - g_ip->area_dev_nuca) { - return 0; - } - return 1; +int +Nuca::check_nuca_org (nuca_org_t *n, min_values_t *minval) { + if (((n->nuca_pda.delay - minval->min_delay)*100 / minval->min_delay) > + g_ip->delay_dev_nuca) { + return 0; + } + if (((n->nuca_pda.power.readOp.dynamic - minval->min_dyn) / + minval->min_dyn)*100 > + g_ip->dynamic_power_dev_nuca) { + return 0; + } + if (((n->nuca_pda.power.readOp.leakage - minval->min_leakage) / + minval->min_leakage)*100 > + g_ip->leakage_power_dev_nuca) { + return 0; + } + if (((n->nuca_pda.cycle_time - minval->min_cyc) / minval->min_cyc)*100 > + g_ip->cycle_time_dev_nuca) { + return 0; + } + if (((n->nuca_pda.area.get_area() - minval->min_area) / minval->min_area) * + 100 > + g_ip->area_dev_nuca) { + return 0; + } + return 1; } - void -Nuca::calculate_nuca_area (nuca_org_t *nuca) -{ - nuca->nuca_pda.area.h= - nuca->rows * ((nuca->h_wire->wire_width + - nuca->h_wire->wire_spacing) - * nuca->router->flit_size + - nuca->bank_pda.area.h); - - nuca->nuca_pda.area.w = - nuca->columns * ((nuca->v_wire->wire_width + - nuca->v_wire->wire_spacing) - * nuca->router->flit_size + - nuca->bank_pda.area.w); +void +Nuca::calculate_nuca_area (nuca_org_t *nuca) { + nuca->nuca_pda.area.h = + nuca->rows * ((nuca->h_wire->wire_width + + nuca->h_wire->wire_spacing) + * nuca->router->flit_size + + nuca->bank_pda.area.h); + + nuca->nuca_pda.area.w = + nuca->columns * ((nuca->v_wire->wire_width + + nuca->v_wire->wire_spacing) + * nuca->router->flit_size + + nuca->bank_pda.area.w); } diff --git a/ext/mcpat/cacti/nuca.h b/ext/mcpat/cacti/nuca.h index adfe32564..38cca6f70 100644 --- a/ext/mcpat/cacti/nuca.h +++ b/ext/mcpat/cacti/nuca.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -46,8 +47,8 @@ #include "wire.h" class nuca_org_t { - public: - ~nuca_org_t(); +public: + ~nuca_org_t(); // int size; /* area, power, access time, and cycle time stats */ Component nuca_pda; @@ -71,9 +72,8 @@ class nuca_org_t { -class Nuca : public Component -{ - public: +class Nuca : public Component { +public: Nuca( TechnologyParameter::DeviceType *dt); void print_router(); @@ -87,12 +87,12 @@ class Nuca : public Component void print_nuca(nuca_org_t *n); void print_cont_stats(); - private: +private: TechnologyParameter::DeviceType *deviceType; int wt_min, wt_max; Wire *wire_vertical[WIRE_TYPES], - *wire_horizontal[WIRE_TYPES]; + *wire_horizontal[WIRE_TYPES]; }; diff --git a/ext/mcpat/cacti/parameter.cc b/ext/mcpat/cacti/parameter.cc index b71640c19..f7184d8a9 100644 --- a/ext/mcpat/cacti/parameter.cc +++ b/ext/mcpat/cacti/parameter.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -46,147 +47,141 @@ TechnologyParameter g_tp; -void TechnologyParameter::DeviceType::display(uint32_t indent) -{ - string indent_str(indent, ' '); - - cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl; - cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl; - cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl; - cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl; - cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl; - cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl; - cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl; - cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl; - cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl; - cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl; - cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl; - cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl; - cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl; - cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl; - cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl; - cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl; - cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl; +void TechnologyParameter::DeviceType::display(uint32_t indent) { + string indent_str(indent, ' '); + + cout << indent_str << "C_g_ideal = " << setw(12) << C_g_ideal << " F/um" << endl; + cout << indent_str << "C_fringe = " << setw(12) << C_fringe << " F/um" << endl; + cout << indent_str << "C_overlap = " << setw(12) << C_overlap << " F/um" << endl; + cout << indent_str << "C_junc = " << setw(12) << C_junc << " F/um^2" << endl; + cout << indent_str << "l_phy = " << setw(12) << l_phy << " um" << endl; + cout << indent_str << "l_elec = " << setw(12) << l_elec << " um" << endl; + cout << indent_str << "R_nch_on = " << setw(12) << R_nch_on << " ohm-um" << endl; + cout << indent_str << "R_pch_on = " << setw(12) << R_pch_on << " ohm-um" << endl; + cout << indent_str << "Vdd = " << setw(12) << Vdd << " V" << endl; + cout << indent_str << "Vth = " << setw(12) << Vth << " V" << endl; + cout << indent_str << "I_on_n = " << setw(12) << I_on_n << " A/um" << endl; + cout << indent_str << "I_on_p = " << setw(12) << I_on_p << " A/um" << endl; + cout << indent_str << "I_off_n = " << setw(12) << I_off_n << " A/um" << endl; + cout << indent_str << "I_off_p = " << setw(12) << I_off_p << " A/um" << endl; + cout << indent_str << "C_ox = " << setw(12) << C_ox << " F/um^2" << endl; + cout << indent_str << "t_ox = " << setw(12) << t_ox << " um" << endl; + cout << indent_str << "n_to_p_eff_curr_drv_ratio = " << n_to_p_eff_curr_drv_ratio << endl; } -void TechnologyParameter::InterconnectType::display(uint32_t indent) -{ - string indent_str(indent, ' '); +void TechnologyParameter::InterconnectType::display(uint32_t indent) { + string indent_str(indent, ' '); - cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl; - cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl; - cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl; + cout << indent_str << "pitch = " << setw(12) << pitch << " um" << endl; + cout << indent_str << "R_per_um = " << setw(12) << R_per_um << " ohm/um" << endl; + cout << indent_str << "C_per_um = " << setw(12) << C_per_um << " F/um" << endl; } -void TechnologyParameter::ScalingFactor::display(uint32_t indent) -{ - string indent_str(indent, ' '); +void TechnologyParameter::ScalingFactor::display(uint32_t indent) { + string indent_str(indent, ' '); - cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl; - cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl; + cout << indent_str << "logic_scaling_co_eff = " << setw(12) << logic_scaling_co_eff << endl; + cout << indent_str << "curr_core_tx_density = " << setw(12) << core_tx_density << " # of tx/um^2" << endl; } -void TechnologyParameter::MemoryType::display(uint32_t indent) -{ - string indent_str(indent, ' '); +void TechnologyParameter::MemoryType::display(uint32_t indent) { + string indent_str(indent, ' '); - cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl; - cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl; - cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl; - cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl; - cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl; - cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl; + cout << indent_str << "b_w = " << setw(12) << b_w << " um" << endl; + cout << indent_str << "b_h = " << setw(12) << b_h << " um" << endl; + cout << indent_str << "cell_a_w = " << setw(12) << cell_a_w << " um" << endl; + cout << indent_str << "cell_pmos_w = " << setw(12) << cell_pmos_w << " um" << endl; + cout << indent_str << "cell_nmos_w = " << setw(12) << cell_nmos_w << " um" << endl; + cout << indent_str << "Vbitpre = " << setw(12) << Vbitpre << " V" << endl; } -void TechnologyParameter::display(uint32_t indent) -{ - string indent_str(indent, ' '); - - cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl; - cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl; - cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl; - cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl; - cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl; - cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl; - cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl; - cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl; - cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl; - cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl; - cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl; - cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl; - cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl; - cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl; - cout << endl; - cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl; - cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl; - cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl; - cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl; - cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl; - cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl; - cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl; - cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl; - cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl; - cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl; - cout << endl; - cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl; - cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl; - cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl; - cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl; - cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl; - cout << endl; - cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl; - cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl; - cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl; - cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl; - cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl; - cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl; - cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl; - - cout << endl; - cout << indent_str << "SRAM cell transistor: " << endl; - sram_cell.display(indent + 2); - - cout << endl; - cout << indent_str << "DRAM access transistor: " << endl; - dram_acc.display(indent + 2); - - cout << endl; - cout << indent_str << "DRAM wordline transistor: " << endl; - dram_wl.display(indent + 2); - - cout << endl; - cout << indent_str << "peripheral global transistor: " << endl; - peri_global.display(indent + 2); - - cout << endl; - cout << indent_str << "wire local" << endl; - wire_local.display(indent + 2); - - cout << endl; - cout << indent_str << "wire inside mat" << endl; - wire_inside_mat.display(indent + 2); - - cout << endl; - cout << indent_str << "wire outside mat" << endl; - wire_outside_mat.display(indent + 2); - - cout << endl; - cout << indent_str << "SRAM" << endl; - sram.display(indent + 2); - - cout << endl; - cout << indent_str << "DRAM" << endl; - dram.display(indent + 2); +void TechnologyParameter::display(uint32_t indent) { + string indent_str(indent, ' '); + + cout << indent_str << "ram_wl_stitching_overhead_ = " << setw(12) << ram_wl_stitching_overhead_ << " um" << endl; + cout << indent_str << "min_w_nmos_ = " << setw(12) << min_w_nmos_ << " um" << endl; + cout << indent_str << "max_w_nmos_ = " << setw(12) << max_w_nmos_ << " um" << endl; + cout << indent_str << "unit_len_wire_del = " << setw(12) << unit_len_wire_del << " s/um^2" << endl; + cout << indent_str << "FO4 = " << setw(12) << FO4 << " s" << endl; + cout << indent_str << "kinv = " << setw(12) << kinv << " s" << endl; + cout << indent_str << "vpp = " << setw(12) << vpp << " V" << endl; + cout << indent_str << "w_sense_en = " << setw(12) << w_sense_en << " um" << endl; + cout << indent_str << "w_sense_n = " << setw(12) << w_sense_n << " um" << endl; + cout << indent_str << "w_sense_p = " << setw(12) << w_sense_p << " um" << endl; + cout << indent_str << "w_iso = " << setw(12) << w_iso << " um" << endl; + cout << indent_str << "w_poly_contact = " << setw(12) << w_poly_contact << " um" << endl; + cout << indent_str << "spacing_poly_to_poly = " << setw(12) << spacing_poly_to_poly << " um" << endl; + cout << indent_str << "spacing_poly_to_contact = " << setw(12) << spacing_poly_to_contact << " um" << endl; + cout << endl; + cout << indent_str << "w_comp_inv_p1 = " << setw(12) << w_comp_inv_p1 << " um" << endl; + cout << indent_str << "w_comp_inv_p2 = " << setw(12) << w_comp_inv_p2 << " um" << endl; + cout << indent_str << "w_comp_inv_p3 = " << setw(12) << w_comp_inv_p3 << " um" << endl; + cout << indent_str << "w_comp_inv_n1 = " << setw(12) << w_comp_inv_n1 << " um" << endl; + cout << indent_str << "w_comp_inv_n2 = " << setw(12) << w_comp_inv_n2 << " um" << endl; + cout << indent_str << "w_comp_inv_n3 = " << setw(12) << w_comp_inv_n3 << " um" << endl; + cout << indent_str << "w_eval_inv_p = " << setw(12) << w_eval_inv_p << " um" << endl; + cout << indent_str << "w_eval_inv_n = " << setw(12) << w_eval_inv_n << " um" << endl; + cout << indent_str << "w_comp_n = " << setw(12) << w_comp_n << " um" << endl; + cout << indent_str << "w_comp_p = " << setw(12) << w_comp_p << " um" << endl; + cout << endl; + cout << indent_str << "dram_cell_I_on = " << setw(12) << dram_cell_I_on << " A/um" << endl; + cout << indent_str << "dram_cell_Vdd = " << setw(12) << dram_cell_Vdd << " V" << endl; + cout << indent_str << "dram_cell_I_off_worst_case_len_temp = " << setw(12) << dram_cell_I_off_worst_case_len_temp << " A/um" << endl; + cout << indent_str << "dram_cell_C = " << setw(12) << dram_cell_C << " F" << endl; + cout << indent_str << "gm_sense_amp_latch = " << setw(12) << gm_sense_amp_latch << " F/s" << endl; + cout << endl; + cout << indent_str << "w_nmos_b_mux = " << setw(12) << w_nmos_b_mux << " um" << endl; + cout << indent_str << "w_nmos_sa_mux = " << setw(12) << w_nmos_sa_mux << " um" << endl; + cout << indent_str << "w_pmos_bl_precharge = " << setw(12) << w_pmos_bl_precharge << " um" << endl; + cout << indent_str << "w_pmos_bl_eq = " << setw(12) << w_pmos_bl_eq << " um" << endl; + cout << indent_str << "MIN_GAP_BET_P_AND_N_DIFFS = " << setw(12) << MIN_GAP_BET_P_AND_N_DIFFS << " um" << endl; + cout << indent_str << "HPOWERRAIL = " << setw(12) << HPOWERRAIL << " um" << endl; + cout << indent_str << "cell_h_def = " << setw(12) << cell_h_def << " um" << endl; + + cout << endl; + cout << indent_str << "SRAM cell transistor: " << endl; + sram_cell.display(indent + 2); + + cout << endl; + cout << indent_str << "DRAM access transistor: " << endl; + dram_acc.display(indent + 2); + + cout << endl; + cout << indent_str << "DRAM wordline transistor: " << endl; + dram_wl.display(indent + 2); + + cout << endl; + cout << indent_str << "peripheral global transistor: " << endl; + peri_global.display(indent + 2); + + cout << endl; + cout << indent_str << "wire local" << endl; + wire_local.display(indent + 2); + + cout << endl; + cout << indent_str << "wire inside mat" << endl; + wire_inside_mat.display(indent + 2); + + cout << endl; + cout << indent_str << "wire outside mat" << endl; + wire_outside_mat.display(indent + 2); + + cout << endl; + cout << indent_str << "SRAM" << endl; + sram.display(indent + 2); + + cout << endl; + cout << indent_str << "DRAM" << endl; + dram.display(indent + 2); } DynamicParameter::DynamicParameter(): - use_inp_params(0), cell(), is_valid(true) -{ + use_inp_params(0), cell(), is_valid(true) { } @@ -202,512 +197,433 @@ DynamicParameter::DynamicParameter( unsigned int Ndsam_lev_1_, unsigned int Ndsam_lev_2_, bool is_main_mem_): - is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_),Ndcm(Ndcm_), - Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_), - number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0), - is_main_mem(is_main_mem_), cell(), is_valid(false) -{ - ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; - is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); - - unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer - const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local; - fully_assoc = (g_ip->fully_assoc) ? true : false; - - if (fully_assoc || pure_cam) - { // fully-assocative cache -- ref: CACTi 2.0 report - if (Ndwl != 1 || //Ndwl is fixed to 1 for FA - Ndcm != 1 || //Ndcm is fixed to 1 for FA - Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA - Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one - Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one - Ndbl < 2) - { - return; - } - } - - if ((is_dram) && (!is_tag) && (Ndcm > 1)) - { - return; // For a DRAM array, each bitline has its own sense-amp - } - - // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be - // at least two because an array is assumed to have at least one mat. And a mat - // is formed out of two horizontal subarrays and two vertical subarrays - if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1)) - { - return; - } - - //***********compute row, col of an subarray - if (!(fully_assoc || pure_cam))//Not fully_asso nor cam - { - // if data array, let tagbits = 0 - if (is_tag) - { - if (g_ip->specific_tag) - { - tagbits = g_ip->tag_w; - } - else - { - tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) + - _log2(g_ip->tag_assoc*2 - 1) - _log2(g_ip->nbanks); - - } - tagbits = (((tagbits + 3) >> 2) << 2); - - num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * - g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON); - num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON); - //burst_length = 1; - } - else - { - num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * - g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON); - num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON); - // burst_length = g_ip->block_sz * 8 / g_ip->out_w; - } - - if (num_r_subarray < MINSUBARRAYROWS) return; - if (num_r_subarray == 0) return; - if (num_r_subarray > MAXSUBARRAYROWS) return; - if (num_c_subarray < MINSUBARRAYCOLS) return; - if (num_c_subarray > MAXSUBARRAYCOLS) return; - - } - - else - {//either fully-asso or cam - if (pure_cam) - { - if (g_ip->specific_tag) - { - tagbits = int(ceil(g_ip->tag_w/8.0)*8); - } - else - { - tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS)/8.0)*8); + is_tag(is_tag_), pure_ram(pure_ram_), pure_cam(pure_cam_), tagbits(0), + Nspd(Nspd_), Ndwl(Ndwl_), Ndbl(Ndbl_), Ndcm(Ndcm_), + Ndsam_lev_1(Ndsam_lev_1_), Ndsam_lev_2(Ndsam_lev_2_), + number_way_select_signals_mat(0), V_b_sense(0), use_inp_params(0), + is_main_mem(is_main_mem_), cell(), is_valid(false) { + ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; + is_dram = ((ram_cell_tech_type == lp_dram) || (ram_cell_tech_type == comm_dram)); + + unsigned int capacity_per_die = g_ip->cache_sz / NUMBER_STACKED_DIE_LAYERS; // capacity per stacked die layer + const TechnologyParameter::InterconnectType & wire_local = g_tp.wire_local; + fully_assoc = (g_ip->fully_assoc) ? true : false; + + // fully-assocative cache -- ref: CACTi 2.0 report + if (fully_assoc || pure_cam) { + if (Ndwl != 1 || //Ndwl is fixed to 1 for FA + Ndcm != 1 || //Ndcm is fixed to 1 for FA + Nspd < 1 || Nspd > 1 || //Nspd is fixed to 1 for FA + Ndsam_lev_1 != 1 || //Ndsam_lev_1 is fixed to one + Ndsam_lev_2 != 1 || //Ndsam_lev_2 is fixed to one + Ndbl < 2) { + return; + } + } + + if ((is_dram) && (!is_tag) && (Ndcm > 1)) { + return; // For a DRAM array, each bitline has its own sense-amp + } + + // If it's not an FA tag/data array, Ndwl should be at least two and Ndbl should be + // at least two because an array is assumed to have at least one mat. And a mat + // is formed out of two horizontal subarrays and two vertical subarrays + if (fully_assoc == false && (Ndwl < 1 || Ndbl < 1)) { + return; + } + + //***********compute row, col of an subarray + if (!(fully_assoc || pure_cam)) { + //Not fully_asso nor cam + // if data array, let tagbits = 0 + if (is_tag) { + if (g_ip->specific_tag) { + tagbits = g_ip->tag_w; + } else { + tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) + + _log2(g_ip->tag_assoc * 2 - 1) - _log2(g_ip->nbanks); + + } + tagbits = (((tagbits + 3) >> 2) << 2); + + num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * + g_ip->block_sz * g_ip->tag_assoc * Ndbl * Nspd));// + EPSILON); + num_c_subarray = (int)ceil((tagbits * g_ip->tag_assoc * Nspd / Ndwl));// + EPSILON); + //burst_length = 1; + } else { + num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks * + g_ip->block_sz * g_ip->data_assoc * Ndbl * Nspd));// + EPSILON); + num_c_subarray = (int)ceil((8 * g_ip->block_sz * g_ip->data_assoc * Nspd / Ndwl));// + EPSILON); + EPSILON); + // burst_length = g_ip->block_sz * 8 / g_ip->out_w; + } + + if (num_r_subarray < MINSUBARRAYROWS) return; + if (num_r_subarray == 0) return; + if (num_r_subarray > MAXSUBARRAYROWS) return; + if (num_c_subarray < MINSUBARRAYCOLS) return; + if (num_c_subarray > MAXSUBARRAYCOLS) return; + + } + + else {//either fully-asso or cam + if (pure_cam) { + if (g_ip->specific_tag) { + tagbits = int(ceil(g_ip->tag_w / 8.0) * 8); + } else { + tagbits = int(ceil((ADDRESS_BITS + EXTRA_TAG_BITS) / 8.0) * 8); // cout<<"Pure CAM needs tag width to be specified"<> 2) << 2); - - tag_num_r_subarray = (int)ceil(capacity_per_die / (g_ip->nbanks*tagbits/8.0 * Ndbl));//TODO: error check input of tagbits and blocksize //TODO: for pure CAM, g_ip->block should be number of entries. - //tag_num_c_subarray = (int)(tagbits + EPSILON); - tag_num_c_subarray = tagbits; - if (tag_num_r_subarray == 0) return; - if (tag_num_r_subarray > MAXSUBARRAYROWS) return; - if (tag_num_c_subarray < MINSUBARRAYCOLS) return; - if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; - num_r_subarray = tag_num_r_subarray; - } - else //fully associative - { - if (g_ip->specific_tag) - { - tagbits = g_ip->tag_w; - } - else - { - tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem. - } - tagbits = (((tagbits + 3) >> 2) << 2); - - tag_num_r_subarray = (int)(capacity_per_die / (g_ip->nbanks*g_ip->block_sz * Ndbl)); - tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON); - if (tag_num_r_subarray == 0) return; - if (tag_num_r_subarray > MAXSUBARRAYROWS) return; - if (tag_num_c_subarray < MINSUBARRAYCOLS) return; - if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; - - data_num_r_subarray = tag_num_r_subarray; - data_num_c_subarray = 8 * g_ip->block_sz; - if (data_num_r_subarray == 0) return; - if (data_num_r_subarray > MAXSUBARRAYROWS) return; - if (data_num_c_subarray < MINSUBARRAYCOLS) return; - if (data_num_c_subarray > MAXSUBARRAYCOLS) return; - num_r_subarray = tag_num_r_subarray; - } - } - - num_subarrays = Ndwl * Ndbl; - //****************end of computation of row, col of an subarray - - // calculate wire parameters - if (fully_assoc || pure_cam) - { - cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports) - + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports; - cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports-1 + g_ip->num_rd_ports + g_ip->num_wr_ports) - + 2 * wire_local.pitch*(g_ip->num_search_ports-1) + wire_local.pitch * g_ip->num_se_rd_ports; - - cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports +g_ip->num_rw_ports-1 + g_ip->num_rd_ports) - + 2 * wire_local.pitch*(g_ip->num_search_ports-1); - cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports -1 + (g_ip->num_rd_ports - g_ip->num_se_rd_ports) - + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports + 2 * wire_local.pitch*(g_ip->num_search_ports-1); - } - else - { - if(is_tag) - { - cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + - g_ip->num_wr_ports); - cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports + - (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) + - wire_local.pitch * g_ip->num_se_rd_ports; - } - else - { - if (is_dram) - { - cell.h = g_tp.dram.b_h; - cell.w = g_tp.dram.b_w; - } - else - { - cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports + - g_ip->num_rw_ports - 1 + g_ip->num_rd_ports); - cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + - (g_ip->num_rd_ports - g_ip->num_se_rd_ports) + - g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports; - } - } - } - - double c_b_metal = cell.h * wire_local.C_per_um; - double C_bl; - - if (!(fully_assoc || pure_cam)) - { - if (is_dram) - { - deg_bl_muxing = 1; - if (ram_cell_tech_type == comm_dram) - { - C_bl = num_r_subarray * c_b_metal; - V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl); - if (V_b_sense < VBITSENSEMIN) - { - return; - } - V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value - dram_refresh_period = 64e-3; - } - else - { - double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; - C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); - V_b_sense = (g_tp.dram_cell_Vdd/2) * g_tp.dram_cell_C /(g_tp.dram_cell_C + C_bl); - - if (V_b_sense < VBITSENSEMIN) - { - return; //Sense amp input signal is smaller that minimum allowable sense amp input signal - } - V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value - //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C; - //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp; - dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp; - } - } - else - { //SRAM - V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; - deg_bl_muxing = Ndcm; - // "/ 2.0" below is due to the fact that two adjacent access transistors share drain - // contacts in a physical layout - double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; - C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); - dram_refresh_period = 0; - } - } - else - { - c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM - V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; - deg_bl_muxing = 1;//FA fix as 1 - // "/ 2.0" below is due to the fact that two adjacent access transistors share drain - // contacts in a physical layout - double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines - C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); - dram_refresh_period = 0; - } - - - // do/di: data in/out, for fully associative they are the data width for normal read and write - // so/si: search data in/out, for fully associative they are the data width for the search ops - // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write) - // so/si needs broadcase while do/di do not - - if (fully_assoc || pure_cam) - { - switch (Ndbl) { - case (0): - cout << " Invalid Ndbl \n"<> 2) << 2); + + //TODO: error check input of tagbits and blocksize + //TODO: for pure CAM, g_ip->block should be number of entries. + tag_num_r_subarray = (int)ceil(capacity_per_die / + (g_ip->nbanks * tagbits / 8.0 * Ndbl)); + //tag_num_c_subarray = (int)(tagbits + EPSILON); + tag_num_c_subarray = tagbits; + if (tag_num_r_subarray == 0) return; + if (tag_num_r_subarray > MAXSUBARRAYROWS) return; + if (tag_num_c_subarray < MINSUBARRAYCOLS) return; + if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; + num_r_subarray = tag_num_r_subarray; + } else { //fully associative + if (g_ip->specific_tag) { + tagbits = g_ip->tag_w; + } else { + tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip->block_sz);//TODO: should be the page_offset=log2(page size), but this info is not avail with CACTI, for McPAT this is no problem. + } + tagbits = (((tagbits + 3) >> 2) << 2); + + tag_num_r_subarray = (int)(capacity_per_die / + (g_ip->nbanks * g_ip->block_sz * Ndbl)); + tag_num_c_subarray = (int)ceil((tagbits * Nspd / Ndwl));// + EPSILON); + if (tag_num_r_subarray == 0) return; + if (tag_num_r_subarray > MAXSUBARRAYROWS) return; + if (tag_num_c_subarray < MINSUBARRAYCOLS) return; + if (tag_num_c_subarray > MAXSUBARRAYCOLS) return; + + data_num_r_subarray = tag_num_r_subarray; + data_num_c_subarray = 8 * g_ip->block_sz; + if (data_num_r_subarray == 0) return; + if (data_num_r_subarray > MAXSUBARRAYROWS) return; + if (data_num_c_subarray < MINSUBARRAYCOLS) return; + if (data_num_c_subarray > MAXSUBARRAYCOLS) return; + num_r_subarray = tag_num_r_subarray; + } + } + + num_subarrays = Ndwl * Ndbl; + //****************end of computation of row, col of an subarray + + // calculate wire parameters + if (fully_assoc || pure_cam) { + cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * + (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports) + + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) + + wire_local.pitch * g_ip->num_se_rd_ports; + cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * + (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + g_ip->num_wr_ports) + + 2 * wire_local.pitch * (g_ip->num_search_ports - 1) + + wire_local.pitch * g_ip->num_se_rd_ports; + + cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * + (g_ip->num_wr_ports + g_ip->num_rw_ports - 1 + g_ip->num_rd_ports) + + 2 * wire_local.pitch * (g_ip->num_search_ports - 1); + cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * + (g_ip->num_rw_ports - 1 + (g_ip->num_rd_ports - + g_ip->num_se_rd_ports) + + g_ip->num_wr_ports) + g_tp.wire_local.pitch * + g_ip->num_se_rd_ports + 2 * wire_local.pitch * + (g_ip->num_search_ports - 1); + } else { + if (is_tag) { + cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_rd_ports + + g_ip->num_wr_ports); + cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + g_ip->num_wr_ports + + (g_ip->num_rd_ports - g_ip->num_se_rd_ports)) + + wire_local.pitch * g_ip->num_se_rd_ports; + } else { + if (is_dram) { + cell.h = g_tp.dram.b_h; + cell.w = g_tp.dram.b_w; + } else { + cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip->num_wr_ports + + g_ip->num_rw_ports - 1 + g_ip->num_rd_ports); + cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip->num_rw_ports - 1 + + (g_ip->num_rd_ports - g_ip->num_se_rd_ports) + + g_ip->num_wr_ports) + g_tp.wire_local.pitch * g_ip->num_se_rd_ports; } - else - { - num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data - num_do_b_mat = tagbits; + } + } + + double c_b_metal = cell.h * wire_local.C_per_um; + double C_bl; + + if (!(fully_assoc || pure_cam)) { + if (is_dram) { + deg_bl_muxing = 1; + if (ram_cell_tech_type == comm_dram) { + C_bl = num_r_subarray * c_b_metal; + V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C / + (g_tp.dram_cell_C + C_bl); + if (V_b_sense < VBITSENSEMIN) { + return; + } + V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value + dram_refresh_period = 64e-3; + } else { + double Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, cell.w, true, true) / 2.0; + C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); + V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C / + (g_tp.dram_cell_C + C_bl); + + if (V_b_sense < VBITSENSEMIN) { + return; //Sense amp input signal is smaller that minimum allowable sense amp input signal + } + V_b_sense = VBITSENSEMIN; // in any case, we fix sense amp input signal to a constant value + //v_storage_worst = g_tp.dram_cell_Vdd / 2 - VBITSENSEMIN * (g_tp.dram_cell_C + C_bl) / g_tp.dram_cell_C; + //dram_refresh_period = 1.1 * g_tp.dram_cell_C * v_storage_worst / g_tp.dram_cell_I_off_worst_case_len_temp; + dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp; } - } - else - { - num_mats_h_dir = MAX(Ndwl / 2, 1); - num_mats_v_dir = MAX(Ndbl / 2, 1); - num_mats = num_mats_h_dir * num_mats_v_dir; - num_do_b_mat = MAX((num_subarrays/num_mats) * num_c_subarray / (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1); - } - - if (!(fully_assoc|| pure_cam) && (num_do_b_mat < (num_subarrays/num_mats))) - { - return; - } - - - int deg_sa_mux_l1_non_assoc; - //TODO:the i/o for subbank is not necessary and should be removed. - if (!(fully_assoc || pure_cam)) - { - if (!is_tag) - { - if (is_main_mem == true) - { - num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w; - deg_sa_mux_l1_non_assoc = Ndsam_lev_1; - } - else - { - if (g_ip->fast_access == true) - { - num_do_b_subbank = g_ip->out_w * g_ip->data_assoc; - deg_sa_mux_l1_non_assoc = Ndsam_lev_1; - } - else - { - - num_do_b_subbank = g_ip->out_w; - deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc; - if (deg_sa_mux_l1_non_assoc < 1) - { - return; - } - - } - } - } - else - { - num_do_b_subbank = tagbits * g_ip->tag_assoc; - if (num_do_b_mat < tagbits) - { - return; - } - deg_sa_mux_l1_non_assoc = Ndsam_lev_1; - //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir; - } - } - else - { - if (fully_assoc) - { - num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa - num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray; - } - else - { - num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data - num_do_b_subbank = tag_num_c_subarray; - } - - deg_sa_mux_l1_non_assoc = 1; - } - - deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc; - - if (fully_assoc || pure_cam) - { - num_act_mats_hor_dir = 1; - num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used - } - else - { - num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat; - if (num_act_mats_hor_dir == 0) - { - return; - } - } - - //compute num_do_mat for tag - if (is_tag) - { - if (!(fully_assoc || pure_cam)) - { - num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir; - num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat; - } - } - - if ((g_ip->is_cache == false && is_main_mem == true) || (PAGE_MODE == 1 && is_dram)) - { - if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != (int)g_ip->page_sz_bits) - { - return; - } - } + } else { //SRAM + V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; + deg_bl_muxing = Ndcm; + // "/ 2.0" below is due to the fact that two adjacent access transistors share drain + // contacts in a physical layout + double Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, cell.w, false, true) / 2.0; + C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); + dram_refresh_period = 0; + } + } else { + c_b_metal = cam_cell.h * wire_local.C_per_um;//IBM and SUN design, SRAM array uses dummy cells to fill the blank space due to mismatch on CAM-RAM + V_b_sense = (0.05 * g_tp.sram_cell.Vdd > VBITSENSEMIN) ? 0.05 * g_tp.sram_cell.Vdd : VBITSENSEMIN; + deg_bl_muxing = 1;//FA fix as 1 + // "/ 2.0" below is due to the fact that two adjacent access transistors share drain + // contacts in a physical layout + double Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, cam_cell.w, false, true) / 2.0;//TODO: comment out these two lines + C_bl = num_r_subarray * (Cbitrow_drain_cap + c_b_metal); + dram_refresh_period = 0; + } + + + // do/di: data in/out, for fully associative they are the data width for normal read and write + // so/si: search data in/out, for fully associative they are the data width for the search ops + // for CAM, si=di, but so = matching address. do = data out = di (for normal read/write) + // so/si needs broadcase while do/di do not + + if (fully_assoc || pure_cam) { + switch (Ndbl) { + case (0): + cout << " Invalid Ndbl \n" << endl; + exit(0); + break; + case (1): + num_mats_h_dir = 1;//one subarray per mat + num_mats_v_dir = 1; + break; + case (2): + num_mats_h_dir = 1;//two subarrays per mat + num_mats_v_dir = 1; + break; + default: + num_mats_h_dir = int(floor(sqrt(Ndbl / 4.0)));//4 subbarrys per mat + num_mats_v_dir = int(Ndbl / 4.0 / num_mats_h_dir); + } + num_mats = num_mats_h_dir * num_mats_v_dir; + + if (fully_assoc) { + num_so_b_mat = data_num_c_subarray; + num_do_b_mat = data_num_c_subarray + tagbits; + } else { + num_so_b_mat = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data + num_do_b_mat = tagbits; + } + } else { + num_mats_h_dir = MAX(Ndwl / 2, 1); + num_mats_v_dir = MAX(Ndbl / 2, 1); + num_mats = num_mats_h_dir * num_mats_v_dir; + num_do_b_mat = MAX((num_subarrays / num_mats) * num_c_subarray / + (deg_bl_muxing * Ndsam_lev_1 * Ndsam_lev_2), 1); + } + + if (!(fully_assoc || pure_cam) && (num_do_b_mat < + (num_subarrays / num_mats))) { + return; + } + + + int deg_sa_mux_l1_non_assoc; + //TODO:the i/o for subbank is not necessary and should be removed. + if (!(fully_assoc || pure_cam)) { + if (!is_tag) { + if (is_main_mem == true) { + num_do_b_subbank = g_ip->int_prefetch_w * g_ip->out_w; + deg_sa_mux_l1_non_assoc = Ndsam_lev_1; + } else { + if (g_ip->fast_access == true) { + num_do_b_subbank = g_ip->out_w * g_ip->data_assoc; + deg_sa_mux_l1_non_assoc = Ndsam_lev_1; + } else { + + num_do_b_subbank = g_ip->out_w; + deg_sa_mux_l1_non_assoc = Ndsam_lev_1 / g_ip->data_assoc; + if (deg_sa_mux_l1_non_assoc < 1) { + return; + } + + } + } + } else { + num_do_b_subbank = tagbits * g_ip->tag_assoc; + if (num_do_b_mat < tagbits) { + return; + } + deg_sa_mux_l1_non_assoc = Ndsam_lev_1; + //num_do_b_mat = g_ip->tag_assoc / num_mats_h_dir; + } + } else { + if (fully_assoc) { + num_so_b_subbank = 8 * g_ip->block_sz;//TODO:internal perfetch should be considered also for fa + num_do_b_subbank = num_so_b_subbank + tag_num_c_subarray; + } else { + num_so_b_subbank = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays)));//the address contains the matched data + num_do_b_subbank = tag_num_c_subarray; + } + + deg_sa_mux_l1_non_assoc = 1; + } + + deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc; + + if (fully_assoc || pure_cam) { + num_act_mats_hor_dir = 1; + num_act_mats_hor_dir_sl = num_mats_h_dir;//TODO: this is unnecessary, since search op, num_mats is used + } else { + num_act_mats_hor_dir = num_do_b_subbank / num_do_b_mat; + if (num_act_mats_hor_dir == 0) { + return; + } + } + + //compute num_do_mat for tag + if (is_tag) { + if (!(fully_assoc || pure_cam)) { + num_do_b_mat = g_ip->tag_assoc / num_act_mats_hor_dir; + num_do_b_subbank = num_act_mats_hor_dir * num_do_b_mat; + } + } + + if ((g_ip->is_cache == false && is_main_mem == true) || + (PAGE_MODE == 1 && is_dram)) { + if (num_act_mats_hor_dir * num_do_b_mat * Ndsam_lev_1 * Ndsam_lev_2 != + (int)g_ip->page_sz_bits) { + return; + } + } // if (is_tag == false && g_ip->is_cache == true && !fully_assoc && !pure_cam && //TODO: TODO burst transfer should also apply to RAM arrays - if (is_tag == false && g_ip->is_main_mem == true && - num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc)) - { - return; - } - - if (num_act_mats_hor_dir > num_mats_h_dir) - { - return; - } - - - //compute di for mat subbank and bank - if (!(fully_assoc ||pure_cam)) - { - if(!is_tag) - { - if(g_ip->fast_access == true) - { - num_di_b_mat = num_do_b_mat / g_ip->data_assoc; - } - else - { - num_di_b_mat = num_do_b_mat; - } - } - else - { - num_di_b_mat = tagbits; - } - } - else - { - if (fully_assoc) - { - num_di_b_mat = num_do_b_mat; - //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache, - //but inside the mat wire tracks need to be reserved for search data bus - num_si_b_mat = tagbits; - } - else - { - num_di_b_mat = tagbits; - num_si_b_mat = tagbits;//*num_subarrays/num_mats; - } - - } - - num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA - num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast - - int num_addr_b_row_dec = _log2(num_r_subarray); - if ((fully_assoc ||pure_cam)) - num_addr_b_row_dec +=_log2(num_subarrays/num_mats); - int number_subbanks = num_mats / num_act_mats_hor_dir; - number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM - - num_rw_ports = g_ip->num_rw_ports; - num_rd_ports = g_ip->num_rd_ports; - num_wr_ports = g_ip->num_wr_ports; - num_se_rd_ports = g_ip->num_se_rd_ports; - num_search_ports = g_ip->num_search_ports; - - if (is_dram && is_main_mem) - { - number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec, - _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2)); - } - else - { - number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) + - _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2); - } - - if (!(fully_assoc ||pure_cam)) - { - if (is_tag) - { - num_di_b_bank_per_port = tagbits; - num_do_b_bank_per_port = g_ip->data_assoc; - } - else - { - num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc; - num_do_b_bank_per_port = g_ip->out_w; - } - } - else - { - if (fully_assoc) - { - num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz? - num_si_b_bank_per_port = tagbits; - num_do_b_bank_per_port = g_ip->out_w + tagbits; - num_so_b_bank_per_port = g_ip->out_w; - } - else - { - num_di_b_bank_per_port = tagbits; - num_si_b_bank_per_port = tagbits; - num_do_b_bank_per_port = tagbits; - num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays))); - } - } - - if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) - { - number_way_select_signals_mat = g_ip->data_assoc; - } - - // add ECC adjustment to all data signals that traverse on H-trees. - if (g_ip->add_ecc_b_ == true) - { - num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_)); - num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_)); - num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_)); - num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_)); - num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_)); - num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_)); - - num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_)); - num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_)); - num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_)); - num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_)); - num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_)); - num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_)); - } - - is_valid = true; + if (is_tag == false && g_ip->is_main_mem == true && + num_act_mats_hor_dir*num_do_b_mat*Ndsam_lev_1*Ndsam_lev_2 < + ((int) g_ip->out_w * (int) g_ip->burst_len * (int) g_ip->data_assoc)) { + return; + } + + if (num_act_mats_hor_dir > num_mats_h_dir) { + return; + } + + + //compute di for mat subbank and bank + if (!(fully_assoc || pure_cam)) { + if (!is_tag) { + if (g_ip->fast_access == true) { + num_di_b_mat = num_do_b_mat / g_ip->data_assoc; + } else { + num_di_b_mat = num_do_b_mat; + } + } else { + num_di_b_mat = tagbits; + } + } else { + if (fully_assoc) { + num_di_b_mat = num_do_b_mat; + //*num_subarrays/num_mats; bits per mat of CAM/FA is as same as cache, + //but inside the mat wire tracks need to be reserved for search data bus + num_si_b_mat = tagbits; + } else { + num_di_b_mat = tagbits; + num_si_b_mat = tagbits;//*num_subarrays/num_mats; + } + + } + + num_di_b_subbank = num_di_b_mat * num_act_mats_hor_dir;//normal cache or normal r/w for FA + num_si_b_subbank = num_si_b_mat; //* num_act_mats_hor_dir_sl; inside the data is broadcast + + int num_addr_b_row_dec = _log2(num_r_subarray); + if ((fully_assoc || pure_cam)) + num_addr_b_row_dec += _log2(num_subarrays / num_mats); + int number_subbanks = num_mats / num_act_mats_hor_dir; + number_subbanks_decode = _log2(number_subbanks);//TODO: add log2(num_subarray_per_bank) to FA/CAM + + num_rw_ports = g_ip->num_rw_ports; + num_rd_ports = g_ip->num_rd_ports; + num_wr_ports = g_ip->num_wr_ports; + num_se_rd_ports = g_ip->num_se_rd_ports; + num_search_ports = g_ip->num_search_ports; + + if (is_dram && is_main_mem) { + number_addr_bits_mat = MAX((unsigned int) num_addr_b_row_dec, + _log2(deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2)); + } else { + number_addr_bits_mat = num_addr_b_row_dec + _log2(deg_bl_muxing) + + _log2(deg_sa_mux_l1_non_assoc) + _log2(Ndsam_lev_2); + } + + if (!(fully_assoc || pure_cam)) { + if (is_tag) { + num_di_b_bank_per_port = tagbits; + num_do_b_bank_per_port = g_ip->data_assoc; + } else { + num_di_b_bank_per_port = g_ip->out_w + g_ip->data_assoc; + num_do_b_bank_per_port = g_ip->out_w; + } + } else { + if (fully_assoc) { + num_di_b_bank_per_port = g_ip->out_w + tagbits;//TODO: out_w or block_sz? + num_si_b_bank_per_port = tagbits; + num_do_b_bank_per_port = g_ip->out_w + tagbits; + num_so_b_bank_per_port = g_ip->out_w; + } else { + num_di_b_bank_per_port = tagbits; + num_si_b_bank_per_port = tagbits; + num_do_b_bank_per_port = tagbits; + num_so_b_bank_per_port = int(ceil(log2(num_r_subarray)) + ceil(log2(num_subarrays))); + } + } + + if ((!is_tag) && (g_ip->data_assoc > 1) && (!g_ip->fast_access)) { + number_way_select_signals_mat = g_ip->data_assoc; + } + + // add ECC adjustment to all data signals that traverse on H-trees. + if (g_ip->add_ecc_b_ == true) { + num_do_b_mat += (int) (ceil(num_do_b_mat / num_bits_per_ecc_b_)); + num_di_b_mat += (int) (ceil(num_di_b_mat / num_bits_per_ecc_b_)); + num_di_b_subbank += (int) (ceil(num_di_b_subbank / num_bits_per_ecc_b_)); + num_do_b_subbank += (int) (ceil(num_do_b_subbank / num_bits_per_ecc_b_)); + num_di_b_bank_per_port += (int) (ceil(num_di_b_bank_per_port / num_bits_per_ecc_b_)); + num_do_b_bank_per_port += (int) (ceil(num_do_b_bank_per_port / num_bits_per_ecc_b_)); + + num_so_b_mat += (int) (ceil(num_so_b_mat / num_bits_per_ecc_b_)); + num_si_b_mat += (int) (ceil(num_si_b_mat / num_bits_per_ecc_b_)); + num_si_b_subbank += (int) (ceil(num_si_b_subbank / num_bits_per_ecc_b_)); + num_so_b_subbank += (int) (ceil(num_so_b_subbank / num_bits_per_ecc_b_)); + num_si_b_bank_per_port += (int) (ceil(num_si_b_bank_per_port / num_bits_per_ecc_b_)); + num_so_b_bank_per_port += (int) (ceil(num_so_b_bank_per_port / num_bits_per_ecc_b_)); + } + + is_valid = true; } diff --git a/ext/mcpat/cacti/parameter.h b/ext/mcpat/cacti/parameter.h index 9c827bbc8..573b726a6 100644 --- a/ext/mcpat/cacti/parameter.h +++ b/ext/mcpat/cacti/parameter.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -40,251 +41,242 @@ #include "io.h" // parameters which are functions of certain device technology -class TechnologyParameter -{ - public: - class DeviceType - { - public: - double C_g_ideal; - double C_fringe; - double C_overlap; - double C_junc; // C_junc_area - double C_junc_sidewall; - double l_phy; - double l_elec; - double R_nch_on; - double R_pch_on; - double Vdd; - double Vth; - double I_on_n; - double I_on_p; - double I_off_n; - double I_off_p; - double I_g_on_n; - double I_g_on_p; - double C_ox; - double t_ox; - double n_to_p_eff_curr_drv_ratio; - double long_channel_leakage_reduction; - - DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0), - C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0), - Vdd(0), Vth(0), - I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0),I_g_on_n(0),I_g_on_p(0), - C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), long_channel_leakage_reduction(0) { }; - void reset() - { - C_g_ideal = 0; - C_fringe = 0; - C_overlap = 0; - C_junc = 0; - l_phy = 0; - l_elec = 0; - R_nch_on = 0; - R_pch_on = 0; - Vdd = 0; - Vth = 0; - I_on_n = 0; - I_on_p = 0; - I_off_n = 0; - I_off_p = 0; - I_g_on_n = 0; - I_g_on_p = 0; - C_ox = 0; - t_ox = 0; - n_to_p_eff_curr_drv_ratio = 0; - long_channel_leakage_reduction = 0; - } +class TechnologyParameter { +public: + class DeviceType { + public: + double C_g_ideal; + double C_fringe; + double C_overlap; + double C_junc; // C_junc_area + double C_junc_sidewall; + double l_phy; + double l_elec; + double R_nch_on; + double R_pch_on; + double Vdd; + double Vth; + double I_on_n; + double I_on_p; + double I_off_n; + double I_off_p; + double I_g_on_n; + double I_g_on_p; + double C_ox; + double t_ox; + double n_to_p_eff_curr_drv_ratio; + double long_channel_leakage_reduction; + + DeviceType(): C_g_ideal(0), C_fringe(0), C_overlap(0), C_junc(0), + C_junc_sidewall(0), l_phy(0), l_elec(0), R_nch_on(0), R_pch_on(0), + Vdd(0), Vth(0), + I_on_n(0), I_on_p(0), I_off_n(0), I_off_p(0), I_g_on_n(0), + I_g_on_p(0), + C_ox(0), t_ox(0), n_to_p_eff_curr_drv_ratio(0), + long_channel_leakage_reduction(0) { }; + void reset() { + C_g_ideal = 0; + C_fringe = 0; + C_overlap = 0; + C_junc = 0; + l_phy = 0; + l_elec = 0; + R_nch_on = 0; + R_pch_on = 0; + Vdd = 0; + Vth = 0; + I_on_n = 0; + I_on_p = 0; + I_off_n = 0; + I_off_p = 0; + I_g_on_n = 0; + I_g_on_p = 0; + C_ox = 0; + t_ox = 0; + n_to_p_eff_curr_drv_ratio = 0; + long_channel_leakage_reduction = 0; + } + + void display(uint32_t indent = 0); + }; + class InterconnectType { + public: + double pitch; + double R_per_um; + double C_per_um; + double horiz_dielectric_constant; + double vert_dielectric_constant; + double aspect_ratio; + double miller_value; + double ild_thickness; + + InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { }; + + void reset() { + pitch = 0; + R_per_um = 0; + C_per_um = 0; + horiz_dielectric_constant = 0; + vert_dielectric_constant = 0; + aspect_ratio = 0; + miller_value = 0; + ild_thickness = 0; + } + + void display(uint32_t indent = 0); + }; + class MemoryType { + public: + double b_w; + double b_h; + double cell_a_w; + double cell_pmos_w; + double cell_nmos_w; + double Vbitpre; + + void reset() { + b_w = 0; + b_h = 0; + cell_a_w = 0; + cell_pmos_w = 0; + cell_nmos_w = 0; + Vbitpre = 0; + } + + void display(uint32_t indent = 0); + }; + + class ScalingFactor { + public: + double logic_scaling_co_eff; + double core_tx_density; + double long_channel_leakage_reduction; + + ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0), + long_channel_leakage_reduction(0) { }; + + void reset() { + logic_scaling_co_eff = 0; + core_tx_density = 0; + long_channel_leakage_reduction = 0; + } + + void display(uint32_t indent = 0); + }; + + double ram_wl_stitching_overhead_; + double min_w_nmos_; + double max_w_nmos_; + double max_w_nmos_dec; + double unit_len_wire_del; + double FO4; + double kinv; + double vpp; + double w_sense_en; + double w_sense_n; + double w_sense_p; + double sense_delay; + double sense_dy_power; + double w_iso; + double w_poly_contact; + double spacing_poly_to_poly; + double spacing_poly_to_contact; + + double w_comp_inv_p1; + double w_comp_inv_p2; + double w_comp_inv_p3; + double w_comp_inv_n1; + double w_comp_inv_n2; + double w_comp_inv_n3; + double w_eval_inv_p; + double w_eval_inv_n; + double w_comp_n; + double w_comp_p; + + double dram_cell_I_on; + double dram_cell_Vdd; + double dram_cell_I_off_worst_case_len_temp; + double dram_cell_C; + double gm_sense_amp_latch; + + double w_nmos_b_mux; + double w_nmos_sa_mux; + double w_pmos_bl_precharge; + double w_pmos_bl_eq; + double MIN_GAP_BET_P_AND_N_DIFFS; + double MIN_GAP_BET_SAME_TYPE_DIFFS; + double HPOWERRAIL; + double cell_h_def; + + double chip_layout_overhead; + double macro_layout_overhead; + double sckt_co_eff; + + double fringe_cap; + + uint64_t h_dec; + + DeviceType sram_cell; // SRAM cell transistor + DeviceType dram_acc; // DRAM access transistor + DeviceType dram_wl; // DRAM wordline transistor + DeviceType peri_global; // peripheral global + DeviceType cam_cell; // SRAM cell transistor + + InterconnectType wire_local; + InterconnectType wire_inside_mat; + InterconnectType wire_outside_mat; + + ScalingFactor scaling_factor; + + MemoryType sram; + MemoryType dram; + MemoryType cam; void display(uint32_t indent = 0); - }; - class InterconnectType - { - public: - double pitch; - double R_per_um; - double C_per_um; - double horiz_dielectric_constant; - double vert_dielectric_constant; - double aspect_ratio; - double miller_value; - double ild_thickness; - - InterconnectType(): pitch(0), R_per_um(0), C_per_um(0) { }; - - void reset() - { - pitch = 0; - R_per_um = 0; - C_per_um = 0; - horiz_dielectric_constant = 0; - vert_dielectric_constant = 0; - aspect_ratio = 0; - miller_value = 0; - ild_thickness = 0; - } - void display(uint32_t indent = 0); - }; - class MemoryType - { - public: - double b_w; - double b_h; - double cell_a_w; - double cell_pmos_w; - double cell_nmos_w; - double Vbitpre; - - void reset() - { - b_w = 0; - b_h = 0; - cell_a_w = 0; - cell_pmos_w = 0; - cell_nmos_w = 0; - Vbitpre = 0; - } + void reset() { + dram_cell_Vdd = 0; + dram_cell_I_on = 0; + dram_cell_C = 0; + vpp = 0; - void display(uint32_t indent = 0); - }; - - class ScalingFactor - { - public: - double logic_scaling_co_eff; - double core_tx_density; - double long_channel_leakage_reduction; - - ScalingFactor(): logic_scaling_co_eff(0), core_tx_density(0), - long_channel_leakage_reduction(0) { }; - - void reset() - { - logic_scaling_co_eff= 0; - core_tx_density = 0; - long_channel_leakage_reduction= 0; - } - - void display(uint32_t indent = 0); - }; - - double ram_wl_stitching_overhead_; - double min_w_nmos_; - double max_w_nmos_; - double max_w_nmos_dec; - double unit_len_wire_del; - double FO4; - double kinv; - double vpp; - double w_sense_en; - double w_sense_n; - double w_sense_p; - double sense_delay; - double sense_dy_power; - double w_iso; - double w_poly_contact; - double spacing_poly_to_poly; - double spacing_poly_to_contact; - - double w_comp_inv_p1; - double w_comp_inv_p2; - double w_comp_inv_p3; - double w_comp_inv_n1; - double w_comp_inv_n2; - double w_comp_inv_n3; - double w_eval_inv_p; - double w_eval_inv_n; - double w_comp_n; - double w_comp_p; - - double dram_cell_I_on; - double dram_cell_Vdd; - double dram_cell_I_off_worst_case_len_temp; - double dram_cell_C; - double gm_sense_amp_latch; - - double w_nmos_b_mux; - double w_nmos_sa_mux; - double w_pmos_bl_precharge; - double w_pmos_bl_eq; - double MIN_GAP_BET_P_AND_N_DIFFS; - double MIN_GAP_BET_SAME_TYPE_DIFFS; - double HPOWERRAIL; - double cell_h_def; - - double chip_layout_overhead; - double macro_layout_overhead; - double sckt_co_eff; - - double fringe_cap; - - uint64_t h_dec; - - DeviceType sram_cell; // SRAM cell transistor - DeviceType dram_acc; // DRAM access transistor - DeviceType dram_wl; // DRAM wordline transistor - DeviceType peri_global; // peripheral global - DeviceType cam_cell; // SRAM cell transistor - - InterconnectType wire_local; - InterconnectType wire_inside_mat; - InterconnectType wire_outside_mat; - - ScalingFactor scaling_factor; - - MemoryType sram; - MemoryType dram; - MemoryType cam; - - void display(uint32_t indent = 0); - - void reset() - { - dram_cell_Vdd = 0; - dram_cell_I_on = 0; - dram_cell_C = 0; - vpp = 0; - - sense_delay = 0; - sense_dy_power = 0; - fringe_cap = 0; + sense_delay = 0; + sense_dy_power = 0; + fringe_cap = 0; // horiz_dielectric_constant = 0; // vert_dielectric_constant = 0; // aspect_ratio = 0; // miller_value = 0; // ild_thickness = 0; - dram_cell_I_off_worst_case_len_temp = 0; + dram_cell_I_off_worst_case_len_temp = 0; - sram_cell.reset(); - dram_acc.reset(); - dram_wl.reset(); - peri_global.reset(); - cam_cell.reset(); + sram_cell.reset(); + dram_acc.reset(); + dram_wl.reset(); + peri_global.reset(); + cam_cell.reset(); - scaling_factor.reset(); + scaling_factor.reset(); - wire_local.reset(); - wire_inside_mat.reset(); - wire_outside_mat.reset(); + wire_local.reset(); + wire_inside_mat.reset(); + wire_outside_mat.reset(); - sram.reset(); - dram.reset(); - cam.reset(); + sram.reset(); + dram.reset(); + cam.reset(); - chip_layout_overhead = 0; - macro_layout_overhead = 0; - sckt_co_eff = 0; - } + chip_layout_overhead = 0; + macro_layout_overhead = 0; + sckt_co_eff = 0; + } }; -class DynamicParameter -{ - public: +class DynamicParameter { +public: bool is_tag; bool pure_ram; bool pure_cam; @@ -313,8 +305,8 @@ class DynamicParameter int num_so_b_mat; int num_si_b_subbank; int num_so_b_subbank; - int num_si_b_bank_per_port; - int num_so_b_bank_per_port; + int num_si_b_bank_per_port; + int num_so_b_bank_per_port; int number_way_select_signals_mat; int num_act_mats_hor_dir; diff --git a/ext/mcpat/cacti/router.cc b/ext/mcpat/cacti/router.cc index 06f170691..d3368d946 100644 --- a/ext/mcpat/cacti/router.cc +++ b/ext/mcpat/cacti/router.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -41,57 +42,56 @@ Router::Router( double I_, double O_, double M_ - ):flit_size(flit_size_), - deviceType(dt), - I(I_), - O(O_), - M(M_) -{ - vc_buffer_size = vc_buf; - vc_count = vc_c; - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; - double technology = g_ip->F_sz_um; - - Vdd = dt->Vdd; - - /*Crossbar parameters. Transmisson gate is employed for connector*/ - NTtr = 10*technology*1e-6/2; /*Transmission gate's nmos tr. length*/ - PTtr = 20*technology*1e-6/2; /* pmos tr. length*/ - wt = 15*technology*1e-6/2; /*track width*/ - ht = 15*technology*1e-6/2; /*track height*/ + ): flit_size(flit_size_), + deviceType(dt), + I(I_), + O(O_), + M(M_) { + vc_buffer_size = vc_buf; + vc_count = vc_c; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + double technology = g_ip->F_sz_um; + + Vdd = dt->Vdd; + + /*Crossbar parameters. Transmisson gate is employed for connector*/ + NTtr = 10 * technology * 1e-6 / 2; /*Transmission gate's nmos tr. length*/ + PTtr = 20 * technology * 1e-6 / 2; /* pmos tr. length*/ + wt = 15 * technology * 1e-6 / 2; /*track width*/ + ht = 15 * technology * 1e-6 / 2; /*track height*/ // I = 5; /*Number of crossbar input ports*/ // O = 5; /*Number of crossbar output ports*/ - NTi = 12.5*technology*1e-6/2; - PTi = 25*technology*1e-6/2; + NTi = 12.5 * technology * 1e-6 / 2; + PTi = 25 * technology * 1e-6 / 2; - NTid = 60*technology*1e-6/2; //m - PTid = 120*technology*1e-6/2; // m - NTod = 60*technology*1e-6/2; // m - PTod = 120*technology*1e-6/2; // m + NTid = 60 * technology * 1e-6 / 2; //m + PTid = 120 * technology * 1e-6 / 2; // m + NTod = 60 * technology * 1e-6 / 2; // m + PTod = 120 * technology * 1e-6 / 2; // m - calc_router_parameters(); + calc_router_parameters(); } -Router::~Router(){} +Router::~Router() {} double //wire cap with triple spacing Router::Cw3(double length) { - Wire wc(g_ip->wt, length, 1, 3, 3); - return (wc.wire_cap(length)); + Wire wc(g_ip->wt, length, 1, 3, 3); + return (wc.wire_cap(length)); } /*Function to calculate the gate capacitance*/ double Router::gate_cap(double w) { - return (double) gate_C (w*1e6 /*u*/, 0); + return (double) gate_C (w*1e6 /*u*/, 0); } /*Function to calculate the diffusion capacitance*/ double Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, - double s /*number of stacking transistors*/) { - return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def); + double s /*number of stacking transistors*/) { + return (double) drain_C_(w*1e6 /*u*/, type, (int) s, 1, g_tp.cell_h_def); } @@ -100,212 +100,216 @@ Router::diff_cap(double w, int type /*0 for n-mos and 1 for p-mos*/, // Model for simple transmission gate double Router::transmission_buf_inpcap() { - return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1); + return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1); } double Router::transmission_buf_outcap() { - return diff_cap(NTtr, 0, 1)+diff_cap(PTtr, 1, 1); + return diff_cap(NTtr, 0, 1) + diff_cap(PTtr, 1, 1); } double Router::transmission_buf_ctrcap() { - return gate_cap(NTtr)+gate_cap(PTtr); + return gate_cap(NTtr) + gate_cap(PTtr); } double Router::crossbar_inpline() { - return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) + - gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1)); + return (Cw3(O*flit_size*wt) + O*transmission_buf_inpcap() + gate_cap(NTid) + + gate_cap(PTid) + diff_cap(NTid, 0, 1) + diff_cap(PTid, 1, 1)); } double Router::crossbar_outline() { - return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) + - gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1)); + return (Cw3(I*flit_size*ht) + I*transmission_buf_outcap() + gate_cap(NTod) + + gate_cap(PTod) + diff_cap(NTod, 0, 1) + diff_cap(PTod, 1, 1)); } double Router::crossbar_ctrline() { - return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() + - diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) + - gate_cap(NTi) + gate_cap(PTi)); + return (Cw3(0.5*O*flit_size*wt) + flit_size*transmission_buf_ctrcap() + + diff_cap(NTi, 0, 1) + diff_cap(PTi, 1, 1) + + gate_cap(NTi) + gate_cap(PTi)); } double Router::tr_crossbar_power() { - return (crossbar_inpline()*Vdd*Vdd*flit_size/2 + - crossbar_outline()*Vdd*Vdd*flit_size/2)*2; + return (crossbar_inpline()*Vdd*Vdd*flit_size / 2 + + crossbar_outline()*Vdd*Vdd*flit_size / 2) * 2; } -void Router::buffer_stats() -{ - DynamicParameter dyn_p; - dyn_p.is_tag = false; - dyn_p.pure_cam = false; - dyn_p.fully_assoc = false; - dyn_p.pure_ram = true; - dyn_p.is_dram = false; - dyn_p.is_main_mem = false; - dyn_p.num_subarrays = 1; - dyn_p.num_mats = 1; - dyn_p.Ndbl = 1; - dyn_p.Ndwl = 1; - dyn_p.Nspd = 1; - dyn_p.deg_bl_muxing = 1; - dyn_p.deg_senseamp_muxing_non_associativity = 1; - dyn_p.Ndsam_lev_1 = 1; - dyn_p.Ndsam_lev_2 = 1; - dyn_p.Ndcm = 1; - dyn_p.number_addr_bits_mat = 8; - dyn_p.number_way_select_signals_mat = 1; - dyn_p.number_subbanks_decode = 0; - dyn_p.num_act_mats_hor_dir = 1; - dyn_p.V_b_sense = Vdd; // FIXME check power calc. - dyn_p.ram_cell_tech_type = 0; - dyn_p.num_r_subarray = (int) vc_buffer_size; - dyn_p.num_c_subarray = (int) flit_size * (int) vc_count; - dyn_p.num_mats_h_dir = 1; - dyn_p.num_mats_v_dir = 1; - dyn_p.num_do_b_subbank = (int)flit_size; - dyn_p.num_di_b_subbank = (int)flit_size; - dyn_p.num_do_b_mat = (int) flit_size; - dyn_p.num_di_b_mat = (int) flit_size; - dyn_p.num_do_b_mat = (int) flit_size; - dyn_p.num_di_b_mat = (int) flit_size; - dyn_p.num_do_b_bank_per_port = (int) flit_size; - dyn_p.num_di_b_bank_per_port = (int) flit_size; - dyn_p.out_w = (int) flit_size; - - dyn_p.use_inp_params = 1; - dyn_p.num_wr_ports = (unsigned int) vc_count; - dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book - dyn_p.num_rw_ports = 0; - dyn_p.num_se_rd_ports =0; - dyn_p.num_search_ports =0; - - - - dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports + - dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports); - dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 + - (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) + - dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports; - - Mat buff(dyn_p); - buff.compute_delays(0); - buff.compute_power_energy(); - buffer.power.readOp = buff.power.readOp; - buffer.power.writeOp = buffer.power.readOp; //FIXME - buffer.area = buff.area; +void Router::buffer_stats() { + DynamicParameter dyn_p; + dyn_p.is_tag = false; + dyn_p.pure_cam = false; + dyn_p.fully_assoc = false; + dyn_p.pure_ram = true; + dyn_p.is_dram = false; + dyn_p.is_main_mem = false; + dyn_p.num_subarrays = 1; + dyn_p.num_mats = 1; + dyn_p.Ndbl = 1; + dyn_p.Ndwl = 1; + dyn_p.Nspd = 1; + dyn_p.deg_bl_muxing = 1; + dyn_p.deg_senseamp_muxing_non_associativity = 1; + dyn_p.Ndsam_lev_1 = 1; + dyn_p.Ndsam_lev_2 = 1; + dyn_p.Ndcm = 1; + dyn_p.number_addr_bits_mat = 8; + dyn_p.number_way_select_signals_mat = 1; + dyn_p.number_subbanks_decode = 0; + dyn_p.num_act_mats_hor_dir = 1; + dyn_p.V_b_sense = Vdd; // FIXME check power calc. + dyn_p.ram_cell_tech_type = 0; + dyn_p.num_r_subarray = (int) vc_buffer_size; + dyn_p.num_c_subarray = (int) flit_size * (int) vc_count; + dyn_p.num_mats_h_dir = 1; + dyn_p.num_mats_v_dir = 1; + dyn_p.num_do_b_subbank = (int)flit_size; + dyn_p.num_di_b_subbank = (int)flit_size; + dyn_p.num_do_b_mat = (int) flit_size; + dyn_p.num_di_b_mat = (int) flit_size; + dyn_p.num_do_b_mat = (int) flit_size; + dyn_p.num_di_b_mat = (int) flit_size; + dyn_p.num_do_b_bank_per_port = (int) flit_size; + dyn_p.num_di_b_bank_per_port = (int) flit_size; + dyn_p.out_w = (int) flit_size; + + dyn_p.use_inp_params = 1; + dyn_p.num_wr_ports = (unsigned int) vc_count; + dyn_p.num_rd_ports = 1;//(unsigned int) vc_count;//based on Bill Dally's book + dyn_p.num_rw_ports = 0; + dyn_p.num_se_rd_ports = 0; + dyn_p.num_search_ports = 0; + + + + dyn_p.cell.h = g_tp.sram.b_h + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_wr_ports + + dyn_p.num_rw_ports - 1 + dyn_p.num_rd_ports); + dyn_p.cell.w = g_tp.sram.b_w + 2 * g_tp.wire_outside_mat.pitch * (dyn_p.num_rw_ports - 1 + + (dyn_p.num_rd_ports - dyn_p.num_se_rd_ports) + + dyn_p.num_wr_ports) + g_tp.wire_outside_mat.pitch * dyn_p.num_se_rd_ports; + + Mat buff(dyn_p); + buff.compute_delays(0); + buff.compute_power_energy(); + buffer.power.readOp = buff.power.readOp; + buffer.power.writeOp = buffer.power.readOp; //FIXME + buffer.area = buff.area; } - void -Router::cb_stats () -{ - if (1) { - Crossbar c_b(I, O, flit_size); - c_b.compute_power(); - crossbar.delay = c_b.delay; - crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic; - crossbar.power.readOp.leakage = c_b.power.readOp.leakage; - crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage; - crossbar.area = c_b.area; +void +Router::cb_stats () { + if (1) { + Crossbar c_b(I, O, flit_size); + c_b.compute_power(); + crossbar.delay = c_b.delay; + crossbar.power.readOp.dynamic = c_b.power.readOp.dynamic; + crossbar.power.readOp.leakage = c_b.power.readOp.leakage; + crossbar.power.readOp.gate_leakage = c_b.power.readOp.gate_leakage; + crossbar.area = c_b.area; // c_b.print_crossbar(); - } - else { - crossbar.power.readOp.dynamic = tr_crossbar_power(); - crossbar.power.readOp.leakage = flit_size * I * O * - cmos_Isub_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg); - crossbar.power.readOp.gate_leakage = flit_size * I * O * - cmos_Ig_leakage(NTtr*g_tp.min_w_nmos_, PTtr*min_w_pmos, 1, tg); - } + } else { + crossbar.power.readOp.dynamic = tr_crossbar_power(); + crossbar.power.readOp.leakage = flit_size * I * O * + cmos_Isub_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg); + crossbar.power.readOp.gate_leakage = flit_size * I * O * + cmos_Ig_leakage(NTtr * g_tp.min_w_nmos_, PTtr * min_w_pmos, 1, tg); + } } void -Router::get_router_power() -{ - /* calculate buffer stats */ - buffer_stats(); - - /* calculate cross-bar stats */ - cb_stats(); - - /* calculate arbiter stats */ - Arbiter vcarb(vc_count, flit_size, buffer.area.w); - Arbiter cbarb(I, flit_size, crossbar.area.w); - vcarb.compute_power(); - cbarb.compute_power(); - arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I + - cbarb.power.readOp.dynamic * O; - arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I + - cbarb.power.readOp.leakage * O; - arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I + - cbarb.power.readOp.gate_leakage * O; +Router::get_router_power() { + /* calculate buffer stats */ + buffer_stats(); + + /* calculate cross-bar stats */ + cb_stats(); + + /* calculate arbiter stats */ + Arbiter vcarb(vc_count, flit_size, buffer.area.w); + Arbiter cbarb(I, flit_size, crossbar.area.w); + vcarb.compute_power(); + cbarb.compute_power(); + arbiter.power.readOp.dynamic = vcarb.power.readOp.dynamic * I + + cbarb.power.readOp.dynamic * O; + arbiter.power.readOp.leakage = vcarb.power.readOp.leakage * I + + cbarb.power.readOp.leakage * O; + arbiter.power.readOp.gate_leakage = vcarb.power.readOp.gate_leakage * I + + cbarb.power.readOp.gate_leakage * O; // arb_stats(); - power.readOp.dynamic = ((buffer.power.readOp.dynamic+buffer.power.writeOp.dynamic) + - crossbar.power.readOp.dynamic + - arbiter.power.readOp.dynamic)*MIN(I, O)*M; - double pppm_t[4] = {1,I,I,1}; - power = power + (buffer.power*pppm_t + crossbar.power + arbiter.power)*pppm_lkg; + power.readOp.dynamic = ((buffer.power.readOp.dynamic + + buffer.power.writeOp.dynamic) + + crossbar.power.readOp.dynamic + + arbiter.power.readOp.dynamic) * MIN(I, O) * M; + double pppm_t[4] = {1, I, I, 1}; + power = power + (buffer.power * pppm_t + crossbar.power + arbiter.power) * + pppm_lkg; } - void -Router::get_router_delay () -{ - FREQUENCY=5; // move this to config file --TODO - cycle_time = (1/(double)FREQUENCY)*1e3; //ps - delay = 4; - max_cyc = 17 * g_tp.FO4; //s - max_cyc *= 1e12; //ps - if (cycle_time < max_cyc) { - FREQUENCY = (1/max_cyc)*1e3; //GHz - } +void +Router::get_router_delay () { + FREQUENCY = 5; // move this to config file --TODO + cycle_time = (1 / (double)FREQUENCY) * 1e3; //ps + delay = 4; + max_cyc = 17 * g_tp.FO4; //s + max_cyc *= 1e12; //ps + if (cycle_time < max_cyc) { + FREQUENCY = (1 / max_cyc) * 1e3; //GHz + } } - void -Router::get_router_area() -{ - area.h = I*buffer.area.h; - area.w = buffer.area.w+crossbar.area.w; +void +Router::get_router_area() { + area.h = I * buffer.area.h; + area.w = buffer.area.w + crossbar.area.w; } - void -Router::calc_router_parameters() -{ - /* calculate router frequency and pipeline cycles */ - get_router_delay(); +void +Router::calc_router_parameters() { + /* calculate router frequency and pipeline cycles */ + get_router_delay(); - /* router power stats */ - get_router_power(); + /* router power stats */ + get_router_power(); - /* area stats */ - get_router_area(); + /* area stats */ + get_router_area(); } - void -Router::print_router() -{ - cout << "\n\nRouter stats:\n"; - cout << "\tRouter Area - "<< area.get_area()*1e-6<<"(mm^2)\n"; - cout << "\tMaximum possible network frequency - " << (1/max_cyc)*1e3 << "GHz\n"; - cout << "\tNetwork frequency - " << FREQUENCY <<" GHz\n"; - cout << "\tNo. of Virtual channels - " << vc_count << "\n"; - cout << "\tNo. of pipeline stages - " << delay << endl; - cout << "\tLink bandwidth - " << flit_size << " (bits)\n"; - cout << "\tNo. of buffer entries per virtual channel - "<< vc_buffer_size << "\n"; - cout << "\tSimple buffer Area - "<< buffer.area.get_area()*1e-6<<"(mm^2)\n"; - cout << "\tSimple buffer access (Read) - " << buffer.power.readOp.dynamic * 1e9 <<" (nJ)\n"; - cout << "\tSimple buffer leakage - " << buffer.power.readOp.leakage * 1e3 <<" (mW)\n"; - cout << "\tCrossbar Area - "<< crossbar.area.get_area()*1e-6<<"(mm^2)\n"; - cout << "\tCross bar access energy - " << crossbar.power.readOp.dynamic * 1e9<<" (nJ)\n"; - cout << "\tCross bar leakage power - " << crossbar.power.readOp.leakage * 1e3<<" (mW)\n"; - cout << "\tArbiter access energy (VC arb + Crossbar arb) - "<tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; - uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type; - - technology = technology * 1000.0; // in the unit of nm - - // initialize parameters - g_tp.reset(); - double gmp_to_gmn_multiplier_periph_global = 0; - - double curr_Wmemcella_dram, curr_Wmemcellpmos_dram, curr_Wmemcellnmos_dram, - curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram, - curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram, - curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp; - double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, curr_area_cell_cam,//Sheng: CAM data - curr_asp_ratio_cell_cam; - double SENSE_AMP_D, SENSE_AMP_P; // J - double area_cell_dram = 0; - double asp_ratio_cell_dram = 0; - double area_cell_sram = 0; - double asp_ratio_cell_sram = 0; - double area_cell_cam = 0; - double asp_ratio_cell_cam = 0; - double mobility_eff_periph_global = 0; - double Vdsat_periph_global = 0; - double nmos_effective_resistance_multiplier; - double width_dram_access_transistor; - - double curr_logic_scaling_co_eff = 0;//This is based on the reported numbers of Intel Merom 65nm, Penryn45nm and IBM cell 90/65/45 date - double curr_core_tx_density = 0;//this is density per um^2; 90, ...22nm based on Intel Penryn - double curr_chip_layout_overhead = 0; - double curr_macro_layout_overhead = 0; - double curr_sckt_co_eff = 0; - - if (technology < 181 && technology > 179) - { + double barrier_thickness, dishing_thickness, alpha_scatter; + double curr_vdd_dram_cell, curr_v_th_dram_access_transistor, curr_I_on_dram_cell, curr_c_dram_cell; + + uint32_t ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; + uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type; + + technology = technology * 1000.0; // in the unit of nm + + // initialize parameters + g_tp.reset(); + double gmp_to_gmn_multiplier_periph_global = 0; + + double curr_Wmemcella_dram, curr_Wmemcellpmos_dram, curr_Wmemcellnmos_dram, + curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram, + curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram, + curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp; + double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, curr_area_cell_cam,//Sheng: CAM data + curr_asp_ratio_cell_cam; + double SENSE_AMP_D, SENSE_AMP_P; // J + double area_cell_dram = 0; + double asp_ratio_cell_dram = 0; + double area_cell_sram = 0; + double asp_ratio_cell_sram = 0; + double area_cell_cam = 0; + double asp_ratio_cell_cam = 0; + double mobility_eff_periph_global = 0; + double Vdsat_periph_global = 0; + double nmos_effective_resistance_multiplier; + double width_dram_access_transistor; + + double curr_logic_scaling_co_eff = 0;//This is based on the reported numbers of Intel Merom 65nm, Penryn45nm and IBM cell 90/65/45 date + double curr_core_tx_density = 0;//this is density per um^2; 90, ...22nm based on Intel Penryn + double curr_chip_layout_overhead = 0; + double curr_macro_layout_overhead = 0; + double curr_sckt_co_eff = 0; + + if (technology < 181 && technology > 179) { tech_lo = 180; tech_hi = 180; - } - else if (technology < 91 && technology > 89) - { - tech_lo = 90; - tech_hi = 90; - } - else if (technology < 66 && technology > 64) - { - tech_lo = 65; - tech_hi = 65; - } - else if (technology < 46 && technology > 44) - { - tech_lo = 45; - tech_hi = 45; - } - else if (technology < 33 && technology > 31) - { - tech_lo = 32; - tech_hi = 32; - } - else if (technology < 23 && technology > 21) - { - tech_lo = 22; - tech_hi = 22; - if (ram_cell_tech_type == 3 ) - { - cout<<"current version does not support eDRAM technologies at 22nm"< 15) -// { -// tech_lo = 16; -// tech_hi = 16; -// } - else if (technology < 180 && technology > 90) - { - tech_lo = 180; - tech_hi = 90; - } - else if (technology < 90 && technology > 65) - { - tech_lo = 90; - tech_hi = 65; - } - else if (technology < 65 && technology > 45) - { - tech_lo = 65; - tech_hi = 45; - } - else if (technology < 45 && technology > 32) - { - tech_lo = 45; - tech_hi = 32; - } - else if (technology < 32 && technology > 22) - { - tech_lo = 32; - tech_hi = 22; + } else if (technology < 91 && technology > 89) { + tech_lo = 90; + tech_hi = 90; + } else if (technology < 66 && technology > 64) { + tech_lo = 65; + tech_hi = 65; + } else if (technology < 46 && technology > 44) { + tech_lo = 45; + tech_hi = 45; + } else if (technology < 33 && technology > 31) { + tech_lo = 32; + tech_hi = 32; + } else if (technology < 23 && technology > 21) { + tech_lo = 22; + tech_hi = 22; + if (ram_cell_tech_type == 3 ) { + cout << "current version does not support eDRAM technologies at " + << "22nm" << endl; + exit(0); + } + } else if (technology < 180 && technology > 90) { + tech_lo = 180; + tech_hi = 90; + } else if (technology < 90 && technology > 65) { + tech_lo = 90; + tech_hi = 65; + } else if (technology < 65 && technology > 45) { + tech_lo = 65; + tech_hi = 45; + } else if (technology < 45 && technology > 32) { + tech_lo = 45; + tech_hi = 32; + } else if (technology < 32 && technology > 22) { + tech_lo = 32; + tech_hi = 22; } // else if (technology < 22 && technology > 16) // { // tech_lo = 22; // tech_hi = 16; // } - else - { - cout<<"Invalid technology nodes"<F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360 - curr_asp_ratio_cell_cam = 2.92;//2.5 - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 1.5;//linear scaling from 90nm - curr_core_tx_density = 1.25*0.7*0.7*0.4; - curr_sckt_co_eff = 1.11; - curr_chip_layout_overhead = 1.0;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.0;//EDA placement and routing tool rule of thumb + if (tech == 180) { + //180nm technology-node. Corresponds to year 1999 in ITRS + //Only HP transistor was of interest that 180nm since leakage power was not a big issue. Performance was the king + //MASTAR does not contain data for 0.18um process. The following parameters are projected based on ITRS 2000 update and IBM 0.18 Cu Spice input + bool Aggre_proj = false; + SENSE_AMP_D = .28e-9; // s + SENSE_AMP_P = 14.7e-15; // J + vdd[0] = 1.5; + Lphy[0] = 0.12;//Lphy is the physical gate-length. micron + Lelec[0] = 0.10;//Lelec is the electrical gate-length. micron + t_ox[0] = 1.2e-3 * (Aggre_proj ? 1.9 / 1.2 : 2);//micron + v_th[0] = Aggre_proj ? 0.36 : 0.4407;//V + c_ox[0] = 1.79e-14 * (Aggre_proj ? 1.9 / 1.2 : 2);//F/micron2 + mobility_eff[0] = 302.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs + Vdsat[0] = 0.128 * 2; //V + c_g_ideal[0] = (Aggre_proj ? 1.9 / 1.2 : 2) * 6.64e-16;//F/micron + c_fringe[0] = (Aggre_proj ? 1.9 / 1.2 : 2) * 0.08e-15;//F/micron + c_junc[0] = (Aggre_proj ? 1.9 / 1.2 : 2) * 1e-15;//F/micron2 + I_on_n[0] = 750e-6;//A/micron + I_on_p[0] = 350e-6;//A/micron + //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline + nmos_effective_resistance_multiplier = 1.54; + n_to_p_eff_curr_drv_ratio[0] = 2.45; + gmp_to_gmn_multiplier[0] = 1.22; + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron + Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron + long_channel_leakage_reduction[0] = 1; + I_off_n[0][0] = 7e-10;//A/micron + I_off_n[0][10] = 8.26e-10; + I_off_n[0][20] = 9.74e-10; + I_off_n[0][30] = 1.15e-9; + I_off_n[0][40] = 1.35e-9; + I_off_n[0][50] = 1.60e-9; + I_off_n[0][60] = 1.88e-9; + I_off_n[0][70] = 2.29e-9; + I_off_n[0][80] = 2.70e-9; + I_off_n[0][90] = 3.19e-9; + I_off_n[0][100] = 3.76e-9; + + I_g_on_n[0][0] = 1.65e-10;//A/micron + I_g_on_n[0][10] = 1.65e-10; + I_g_on_n[0][20] = 1.65e-10; + I_g_on_n[0][30] = 1.65e-10; + I_g_on_n[0][40] = 1.65e-10; + I_g_on_n[0][50] = 1.65e-10; + I_g_on_n[0][60] = 1.65e-10; + I_g_on_n[0][70] = 1.65e-10; + I_g_on_n[0][80] = 1.65e-10; + I_g_on_n[0][90] = 1.65e-10; + I_g_on_n[0][100] = 1.65e-10; + + //SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + //CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360 + curr_asp_ratio_cell_cam = 2.92;//2.5 + //Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 1.5;//linear scaling from 90nm + curr_core_tx_density = 1.25 * 0.7 * 0.7 * 0.4; + curr_sckt_co_eff = 1.11; + curr_chip_layout_overhead = 1.0;//die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = 1.0;//EDA placement and routing tool rule of thumb - } + } - if (tech == 90) - { - SENSE_AMP_D = .28e-9; // s - SENSE_AMP_P = 14.7e-15; // J - //90nm technology-node. Corresponds to year 2004 in ITRS - //ITRS HP device type - vdd[0] = 1.2; - Lphy[0] = 0.037;//Lphy is the physical gate-length. micron - Lelec[0] = 0.0266;//Lelec is the electrical gate-length. micron - t_ox[0] = 1.2e-3;//micron - v_th[0] = 0.23707;//V - c_ox[0] = 1.79e-14;//F/micron2 - mobility_eff[0] = 342.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[0] = 0.128; //V - c_g_ideal[0] = 6.64e-16;//F/micron - c_fringe[0] = 0.08e-15;//F/micron - c_junc[0] = 1e-15;//F/micron2 - I_on_n[0] = 1076.9e-6;//A/micron - I_on_p[0] = 712.6e-6;//A/micron - //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline - nmos_effective_resistance_multiplier = 1.54; - n_to_p_eff_curr_drv_ratio[0] = 2.45; - gmp_to_gmn_multiplier[0] = 1.22; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1; - I_off_n[0][0] = 3.24e-8;//A/micron - I_off_n[0][10] = 4.01e-8; - I_off_n[0][20] = 4.90e-8; - I_off_n[0][30] = 5.92e-8; - I_off_n[0][40] = 7.08e-8; - I_off_n[0][50] = 8.38e-8; - I_off_n[0][60] = 9.82e-8; - I_off_n[0][70] = 1.14e-7; - I_off_n[0][80] = 1.29e-7; - I_off_n[0][90] = 1.43e-7; - I_off_n[0][100] = 1.54e-7; - - I_g_on_n[0][0] = 1.65e-8;//A/micron - I_g_on_n[0][10] = 1.65e-8; - I_g_on_n[0][20] = 1.65e-8; - I_g_on_n[0][30] = 1.65e-8; - I_g_on_n[0][40] = 1.65e-8; - I_g_on_n[0][50] = 1.65e-8; - I_g_on_n[0][60] = 1.65e-8; - I_g_on_n[0][70] = 1.65e-8; - I_g_on_n[0][80] = 1.65e-8; - I_g_on_n[0][90] = 1.65e-8; - I_g_on_n[0][100] = 1.65e-8; - - //ITRS LSTP device type - vdd[1] = 1.3; - Lphy[1] = 0.075; - Lelec[1] = 0.0486; - t_ox[1] = 2.2e-3; - v_th[1] = 0.48203; - c_ox[1] = 1.22e-14; - mobility_eff[1] = 356.76 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[1] = 0.373; - c_g_ideal[1] = 9.15e-16; - c_fringe[1] = 0.08e-15; - c_junc[1] = 1e-15; - I_on_n[1] = 503.6e-6; - I_on_p[1] = 235.1e-6; - nmos_effective_resistance_multiplier = 1.92; - n_to_p_eff_curr_drv_ratio[1] = 2.44; - gmp_to_gmn_multiplier[1] =0.88; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1; - I_off_n[1][0] = 2.81e-12; - I_off_n[1][10] = 4.76e-12; - I_off_n[1][20] = 7.82e-12; - I_off_n[1][30] = 1.25e-11; - I_off_n[1][40] = 1.94e-11; - I_off_n[1][50] = 2.94e-11; - I_off_n[1][60] = 4.36e-11; - I_off_n[1][70] = 6.32e-11; - I_off_n[1][80] = 8.95e-11; - I_off_n[1][90] = 1.25e-10; - I_off_n[1][100] = 1.7e-10; - - I_g_on_n[1][0] = 3.87e-11;//A/micron - I_g_on_n[1][10] = 3.87e-11; - I_g_on_n[1][20] = 3.87e-11; - I_g_on_n[1][30] = 3.87e-11; - I_g_on_n[1][40] = 3.87e-11; - I_g_on_n[1][50] = 3.87e-11; - I_g_on_n[1][60] = 3.87e-11; - I_g_on_n[1][70] = 3.87e-11; - I_g_on_n[1][80] = 3.87e-11; - I_g_on_n[1][90] = 3.87e-11; - I_g_on_n[1][100] = 3.87e-11; - - //ITRS LOP device type - vdd[2] = 0.9; - Lphy[2] = 0.053; - Lelec[2] = 0.0354; - t_ox[2] = 1.5e-3; - v_th[2] = 0.30764; - c_ox[2] = 1.59e-14; - mobility_eff[2] = 460.39 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[2] = 0.113; - c_g_ideal[2] = 8.45e-16; - c_fringe[2] = 0.08e-15; - c_junc[2] = 1e-15; - I_on_n[2] = 386.6e-6; - I_on_p[2] = 209.7e-6; - nmos_effective_resistance_multiplier = 1.77; - n_to_p_eff_curr_drv_ratio[2] = 2.54; - gmp_to_gmn_multiplier[2] = 0.98; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1; - I_off_n[2][0] = 2.14e-9; - I_off_n[2][10] = 2.9e-9; - I_off_n[2][20] = 3.87e-9; - I_off_n[2][30] = 5.07e-9; - I_off_n[2][40] = 6.54e-9; - I_off_n[2][50] = 8.27e-8; - I_off_n[2][60] = 1.02e-7; - I_off_n[2][70] = 1.20e-7; - I_off_n[2][80] = 1.36e-8; - I_off_n[2][90] = 1.52e-8; - I_off_n[2][100] = 1.73e-8; - - I_g_on_n[2][0] = 4.31e-8;//A/micron - I_g_on_n[2][10] = 4.31e-8; - I_g_on_n[2][20] = 4.31e-8; - I_g_on_n[2][30] = 4.31e-8; - I_g_on_n[2][40] = 4.31e-8; - I_g_on_n[2][50] = 4.31e-8; - I_g_on_n[2][60] = 4.31e-8; - I_g_on_n[2][70] = 4.31e-8; - I_g_on_n[2][80] = 4.31e-8; - I_g_on_n[2][90] = 4.31e-8; - I_g_on_n[2][100] = 4.31e-8; - - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.2; - Lphy[3] = 0.12; - Lelec[3] = 0.0756; - curr_v_th_dram_access_transistor = 0.4545; - width_dram_access_transistor = 0.14; - curr_I_on_dram_cell = 45e-6; - curr_I_off_dram_cell_worst_case_length_temp = 21.1e-12; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 0.168; - curr_asp_ratio_cell_dram = 1.46; - curr_c_dram_cell = 20e-15; - - //LP-DRAM wordline transistor parameters - curr_vpp = 1.6; - t_ox[3] = 2.2e-3; - v_th[3] = 0.4545; - c_ox[3] = 1.22e-14; - mobility_eff[3] = 323.95 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.3; - c_g_ideal[3] = 1.47e-15; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 321.6e-6; - I_on_p[3] = 203.3e-6; - nmos_effective_resistance_multiplier = 1.65; - n_to_p_eff_curr_drv_ratio[3] = 1.95; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.42e-11; - I_off_n[3][10] = 2.25e-11; - I_off_n[3][20] = 3.46e-11; - I_off_n[3][30] = 5.18e-11; - I_off_n[3][40] = 7.58e-11; - I_off_n[3][50] = 1.08e-10; - I_off_n[3][60] = 1.51e-10; - I_off_n[3][70] = 2.02e-10; - I_off_n[3][80] = 2.57e-10; - I_off_n[3][90] = 3.14e-10; - I_off_n[3][100] = 3.85e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.6; - Lphy[3] = 0.09; - Lelec[3] = 0.0576; - curr_v_th_dram_access_transistor = 1; - width_dram_access_transistor = 0.09; - curr_I_on_dram_cell = 20e-6; - curr_I_off_dram_cell_worst_case_length_temp = 1e-15; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.09*0.09; - curr_asp_ratio_cell_dram = 1.5; - curr_c_dram_cell = 30e-15; - - //COMM-DRAM wordline transistor parameters - curr_vpp = 3.7; - t_ox[3] = 5.5e-3; - v_th[3] = 1.0; - c_ox[3] = 5.65e-15; - mobility_eff[3] = 302.2 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.32; - c_g_ideal[3] = 5.08e-16; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 1094.3e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.62; - n_to_p_eff_curr_drv_ratio[3] = 2.05; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 5.80e-15; - I_off_n[3][10] = 1.21e-14; - I_off_n[3][20] = 2.42e-14; - I_off_n[3][30] = 4.65e-14; - I_off_n[3][40] = 8.60e-14; - I_off_n[3][50] = 1.54e-13; - I_off_n[3][60] = 2.66e-13; - I_off_n[3][70] = 4.45e-13; - I_off_n[3][80] = 7.17e-13; - I_off_n[3][90] = 1.11e-12; - I_off_n[3][100] = 1.67e-12; - } - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360 - curr_asp_ratio_cell_cam = 2.92;//2.5 - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 1; - curr_core_tx_density = 1.25*0.7*0.7; - curr_sckt_co_eff = 1.1539; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb + if (tech == 90) { + SENSE_AMP_D = .28e-9; // s + SENSE_AMP_P = 14.7e-15; // J + //90nm technology-node. Corresponds to year 2004 in ITRS + //ITRS HP device type + vdd[0] = 1.2; + Lphy[0] = 0.037;//Lphy is the physical gate-length. micron + Lelec[0] = 0.0266;//Lelec is the electrical gate-length. micron + t_ox[0] = 1.2e-3;//micron + v_th[0] = 0.23707;//V + c_ox[0] = 1.79e-14;//F/micron2 + mobility_eff[0] = 342.16 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs + Vdsat[0] = 0.128; //V + c_g_ideal[0] = 6.64e-16;//F/micron + c_fringe[0] = 0.08e-15;//F/micron + c_junc[0] = 1e-15;//F/micron2 + I_on_n[0] = 1076.9e-6;//A/micron + I_on_p[0] = 712.6e-6;//A/micron + //Note that nmos_effective_resistance_multiplier, n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier values are calculated offline + nmos_effective_resistance_multiplier = 1.54; + n_to_p_eff_curr_drv_ratio[0] = 2.45; + gmp_to_gmn_multiplier[0] = 1.22; + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron + Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron + long_channel_leakage_reduction[0] = 1; + I_off_n[0][0] = 3.24e-8;//A/micron + I_off_n[0][10] = 4.01e-8; + I_off_n[0][20] = 4.90e-8; + I_off_n[0][30] = 5.92e-8; + I_off_n[0][40] = 7.08e-8; + I_off_n[0][50] = 8.38e-8; + I_off_n[0][60] = 9.82e-8; + I_off_n[0][70] = 1.14e-7; + I_off_n[0][80] = 1.29e-7; + I_off_n[0][90] = 1.43e-7; + I_off_n[0][100] = 1.54e-7; + + I_g_on_n[0][0] = 1.65e-8;//A/micron + I_g_on_n[0][10] = 1.65e-8; + I_g_on_n[0][20] = 1.65e-8; + I_g_on_n[0][30] = 1.65e-8; + I_g_on_n[0][40] = 1.65e-8; + I_g_on_n[0][50] = 1.65e-8; + I_g_on_n[0][60] = 1.65e-8; + I_g_on_n[0][70] = 1.65e-8; + I_g_on_n[0][80] = 1.65e-8; + I_g_on_n[0][90] = 1.65e-8; + I_g_on_n[0][100] = 1.65e-8; + + //ITRS LSTP device type + vdd[1] = 1.3; + Lphy[1] = 0.075; + Lelec[1] = 0.0486; + t_ox[1] = 2.2e-3; + v_th[1] = 0.48203; + c_ox[1] = 1.22e-14; + mobility_eff[1] = 356.76 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[1] = 0.373; + c_g_ideal[1] = 9.15e-16; + c_fringe[1] = 0.08e-15; + c_junc[1] = 1e-15; + I_on_n[1] = 503.6e-6; + I_on_p[1] = 235.1e-6; + nmos_effective_resistance_multiplier = 1.92; + n_to_p_eff_curr_drv_ratio[1] = 2.44; + gmp_to_gmn_multiplier[1] = 0.88; + Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; + Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; + long_channel_leakage_reduction[1] = 1; + I_off_n[1][0] = 2.81e-12; + I_off_n[1][10] = 4.76e-12; + I_off_n[1][20] = 7.82e-12; + I_off_n[1][30] = 1.25e-11; + I_off_n[1][40] = 1.94e-11; + I_off_n[1][50] = 2.94e-11; + I_off_n[1][60] = 4.36e-11; + I_off_n[1][70] = 6.32e-11; + I_off_n[1][80] = 8.95e-11; + I_off_n[1][90] = 1.25e-10; + I_off_n[1][100] = 1.7e-10; + + I_g_on_n[1][0] = 3.87e-11;//A/micron + I_g_on_n[1][10] = 3.87e-11; + I_g_on_n[1][20] = 3.87e-11; + I_g_on_n[1][30] = 3.87e-11; + I_g_on_n[1][40] = 3.87e-11; + I_g_on_n[1][50] = 3.87e-11; + I_g_on_n[1][60] = 3.87e-11; + I_g_on_n[1][70] = 3.87e-11; + I_g_on_n[1][80] = 3.87e-11; + I_g_on_n[1][90] = 3.87e-11; + I_g_on_n[1][100] = 3.87e-11; + + //ITRS LOP device type + vdd[2] = 0.9; + Lphy[2] = 0.053; + Lelec[2] = 0.0354; + t_ox[2] = 1.5e-3; + v_th[2] = 0.30764; + c_ox[2] = 1.59e-14; + mobility_eff[2] = 460.39 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[2] = 0.113; + c_g_ideal[2] = 8.45e-16; + c_fringe[2] = 0.08e-15; + c_junc[2] = 1e-15; + I_on_n[2] = 386.6e-6; + I_on_p[2] = 209.7e-6; + nmos_effective_resistance_multiplier = 1.77; + n_to_p_eff_curr_drv_ratio[2] = 2.54; + gmp_to_gmn_multiplier[2] = 0.98; + Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; + Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; + long_channel_leakage_reduction[2] = 1; + I_off_n[2][0] = 2.14e-9; + I_off_n[2][10] = 2.9e-9; + I_off_n[2][20] = 3.87e-9; + I_off_n[2][30] = 5.07e-9; + I_off_n[2][40] = 6.54e-9; + I_off_n[2][50] = 8.27e-8; + I_off_n[2][60] = 1.02e-7; + I_off_n[2][70] = 1.20e-7; + I_off_n[2][80] = 1.36e-8; + I_off_n[2][90] = 1.52e-8; + I_off_n[2][100] = 1.73e-8; + + I_g_on_n[2][0] = 4.31e-8;//A/micron + I_g_on_n[2][10] = 4.31e-8; + I_g_on_n[2][20] = 4.31e-8; + I_g_on_n[2][30] = 4.31e-8; + I_g_on_n[2][40] = 4.31e-8; + I_g_on_n[2][50] = 4.31e-8; + I_g_on_n[2][60] = 4.31e-8; + I_g_on_n[2][70] = 4.31e-8; + I_g_on_n[2][80] = 4.31e-8; + I_g_on_n[2][90] = 4.31e-8; + I_g_on_n[2][100] = 4.31e-8; + + if (ram_cell_tech_type == lp_dram) { + //LP-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.2; + Lphy[3] = 0.12; + Lelec[3] = 0.0756; + curr_v_th_dram_access_transistor = 0.4545; + width_dram_access_transistor = 0.14; + curr_I_on_dram_cell = 45e-6; + curr_I_off_dram_cell_worst_case_length_temp = 21.1e-12; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 0.168; + curr_asp_ratio_cell_dram = 1.46; + curr_c_dram_cell = 20e-15; + + //LP-DRAM wordline transistor parameters + curr_vpp = 1.6; + t_ox[3] = 2.2e-3; + v_th[3] = 0.4545; + c_ox[3] = 1.22e-14; + mobility_eff[3] = 323.95 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.3; + c_g_ideal[3] = 1.47e-15; + c_fringe[3] = 0.08e-15; + c_junc[3] = 1e-15; + I_on_n[3] = 321.6e-6; + I_on_p[3] = 203.3e-6; + nmos_effective_resistance_multiplier = 1.65; + n_to_p_eff_curr_drv_ratio[3] = 1.95; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 1.42e-11; + I_off_n[3][10] = 2.25e-11; + I_off_n[3][20] = 3.46e-11; + I_off_n[3][30] = 5.18e-11; + I_off_n[3][40] = 7.58e-11; + I_off_n[3][50] = 1.08e-10; + I_off_n[3][60] = 1.51e-10; + I_off_n[3][70] = 2.02e-10; + I_off_n[3][80] = 2.57e-10; + I_off_n[3][90] = 3.14e-10; + I_off_n[3][100] = 3.85e-10; + } else if (ram_cell_tech_type == comm_dram) { + //COMM-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.6; + Lphy[3] = 0.09; + Lelec[3] = 0.0576; + curr_v_th_dram_access_transistor = 1; + width_dram_access_transistor = 0.09; + curr_I_on_dram_cell = 20e-6; + curr_I_off_dram_cell_worst_case_length_temp = 1e-15; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 6 * 0.09 * 0.09; + curr_asp_ratio_cell_dram = 1.5; + curr_c_dram_cell = 30e-15; + + //COMM-DRAM wordline transistor parameters + curr_vpp = 3.7; + t_ox[3] = 5.5e-3; + v_th[3] = 1.0; + c_ox[3] = 5.65e-15; + mobility_eff[3] = 302.2 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.32; + c_g_ideal[3] = 5.08e-16; + c_fringe[3] = 0.08e-15; + c_junc[3] = 1e-15; + I_on_n[3] = 1094.3e-6; + I_on_p[3] = I_on_n[3] / 2; + nmos_effective_resistance_multiplier = 1.62; + n_to_p_eff_curr_drv_ratio[3] = 2.05; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 5.80e-15; + I_off_n[3][10] = 1.21e-14; + I_off_n[3][20] = 2.42e-14; + I_off_n[3][30] = 4.65e-14; + I_off_n[3][40] = 8.60e-14; + I_off_n[3][50] = 1.54e-13; + I_off_n[3][60] = 2.66e-13; + I_off_n[3][70] = 4.45e-13; + I_off_n[3][80] = 7.17e-13; + I_off_n[3][90] = 1.11e-12; + I_off_n[3][100] = 1.67e-12; + } + + //SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + //CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360 + curr_asp_ratio_cell_cam = 2.92;//2.5 + //Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 1; + curr_core_tx_density = 1.25 * 0.7 * 0.7; + curr_sckt_co_eff = 1.1539; + curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } + } - if (tech == 65) - { //65nm technology-node. Corresponds to year 2007 in ITRS - //ITRS HP device type - SENSE_AMP_D = .2e-9; // s - SENSE_AMP_P = 5.7e-15; // J - vdd[0] = 1.1; - Lphy[0] = 0.025; - Lelec[0] = 0.019; - t_ox[0] = 1.1e-3; - v_th[0] = .19491; - c_ox[0] = 1.88e-14; - mobility_eff[0] = 436.24 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[0] = 7.71e-2; - c_g_ideal[0] = 4.69e-16; - c_fringe[0] = 0.077e-15; - c_junc[0] = 1e-15; - I_on_n[0] = 1197.2e-6; - I_on_p[0] = 870.8e-6; - nmos_effective_resistance_multiplier = 1.50; - n_to_p_eff_curr_drv_ratio[0] = 2.41; - gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0]; - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; - long_channel_leakage_reduction[0] = 1/3.74; - //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first - //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74. - I_off_n[0][0] = 1.96e-7; - I_off_n[0][10] = 2.29e-7; - I_off_n[0][20] = 2.66e-7; - I_off_n[0][30] = 3.05e-7; - I_off_n[0][40] = 3.49e-7; - I_off_n[0][50] = 3.95e-7; - I_off_n[0][60] = 4.45e-7; - I_off_n[0][70] = 4.97e-7; - I_off_n[0][80] = 5.48e-7; - I_off_n[0][90] = 5.94e-7; - I_off_n[0][100] = 6.3e-7; - I_g_on_n[0][0] = 4.09e-8;//A/micron - I_g_on_n[0][10] = 4.09e-8; - I_g_on_n[0][20] = 4.09e-8; - I_g_on_n[0][30] = 4.09e-8; - I_g_on_n[0][40] = 4.09e-8; - I_g_on_n[0][50] = 4.09e-8; - I_g_on_n[0][60] = 4.09e-8; - I_g_on_n[0][70] = 4.09e-8; - I_g_on_n[0][80] = 4.09e-8; - I_g_on_n[0][90] = 4.09e-8; - I_g_on_n[0][100] = 4.09e-8; - - //ITRS LSTP device type - vdd[1] = 1.2; - Lphy[1] = 0.045; - Lelec[1] = 0.0298; - t_ox[1] = 1.9e-3; - v_th[1] = 0.52354; - c_ox[1] = 1.36e-14; - mobility_eff[1] = 341.21 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[1] = 0.128; - c_g_ideal[1] = 6.14e-16; - c_fringe[1] = 0.08e-15; - c_junc[1] = 1e-15; - I_on_n[1] = 519.2e-6; - I_on_p[1] = 266e-6; - nmos_effective_resistance_multiplier = 1.96; - n_to_p_eff_curr_drv_ratio[1] = 2.23; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/2.82; - I_off_n[1][0] = 9.12e-12; - I_off_n[1][10] = 1.49e-11; - I_off_n[1][20] = 2.36e-11; - I_off_n[1][30] = 3.64e-11; - I_off_n[1][40] = 5.48e-11; - I_off_n[1][50] = 8.05e-11; - I_off_n[1][60] = 1.15e-10; - I_off_n[1][70] = 1.59e-10; - I_off_n[1][80] = 2.1e-10; - I_off_n[1][90] = 2.62e-10; - I_off_n[1][100] = 3.21e-10; - - I_g_on_n[1][0] = 1.09e-10;//A/micron - I_g_on_n[1][10] = 1.09e-10; - I_g_on_n[1][20] = 1.09e-10; - I_g_on_n[1][30] = 1.09e-10; - I_g_on_n[1][40] = 1.09e-10; - I_g_on_n[1][50] = 1.09e-10; - I_g_on_n[1][60] = 1.09e-10; - I_g_on_n[1][70] = 1.09e-10; - I_g_on_n[1][80] = 1.09e-10; - I_g_on_n[1][90] = 1.09e-10; - I_g_on_n[1][100] = 1.09e-10; - - //ITRS LOP device type - vdd[2] = 0.8; - Lphy[2] = 0.032; - Lelec[2] = 0.0216; - t_ox[2] = 1.2e-3; - v_th[2] = 0.28512; - c_ox[2] = 1.87e-14; - mobility_eff[2] = 495.19 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[2] = 0.292; - c_g_ideal[2] = 6e-16; - c_fringe[2] = 0.08e-15; - c_junc[2] = 1e-15; - I_on_n[2] = 573.1e-6; - I_on_p[2] = 340.6e-6; - nmos_effective_resistance_multiplier = 1.82; - n_to_p_eff_curr_drv_ratio[2] = 2.28; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/2.05; - I_off_n[2][0] = 4.9e-9; - I_off_n[2][10] = 6.49e-9; - I_off_n[2][20] = 8.45e-9; - I_off_n[2][30] = 1.08e-8; - I_off_n[2][40] = 1.37e-8; - I_off_n[2][50] = 1.71e-8; - I_off_n[2][60] = 2.09e-8; - I_off_n[2][70] = 2.48e-8; - I_off_n[2][80] = 2.84e-8; - I_off_n[2][90] = 3.13e-8; - I_off_n[2][100] = 3.42e-8; - - I_g_on_n[2][0] = 9.61e-9;//A/micron - I_g_on_n[2][10] = 9.61e-9; - I_g_on_n[2][20] = 9.61e-9; - I_g_on_n[2][30] = 9.61e-9; - I_g_on_n[2][40] = 9.61e-9; - I_g_on_n[2][50] = 9.61e-9; - I_g_on_n[2][60] = 9.61e-9; - I_g_on_n[2][70] = 9.61e-9; - I_g_on_n[2][80] = 9.61e-9; - I_g_on_n[2][90] = 9.61e-9; - I_g_on_n[2][100] = 9.61e-9; - - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.2; - Lphy[3] = 0.12; - Lelec[3] = 0.0756; - curr_v_th_dram_access_transistor = 0.43806; - width_dram_access_transistor = 0.09; - curr_I_on_dram_cell = 36e-6; - curr_I_off_dram_cell_worst_case_length_temp = 19.6e-12; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 0.11; - curr_asp_ratio_cell_dram = 1.46; - curr_c_dram_cell = 20e-15; - - //LP-DRAM wordline transistor parameters - curr_vpp = 1.6; - t_ox[3] = 2.2e-3; - v_th[3] = 0.43806; - c_ox[3] = 1.22e-14; - mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.43806; - c_g_ideal[3] = 1.46e-15; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15 ; - I_on_n[3] = 399.8e-6; - I_on_p[3] = 243.4e-6; - nmos_effective_resistance_multiplier = 1.65; - n_to_p_eff_curr_drv_ratio[3] = 2.05; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 2.23e-11; - I_off_n[3][10] = 3.46e-11; - I_off_n[3][20] = 5.24e-11; - I_off_n[3][30] = 7.75e-11; - I_off_n[3][40] = 1.12e-10; - I_off_n[3][50] = 1.58e-10; - I_off_n[3][60] = 2.18e-10; - I_off_n[3][70] = 2.88e-10; - I_off_n[3][80] = 3.63e-10; - I_off_n[3][90] = 4.41e-10; - I_off_n[3][100] = 5.36e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.3; - Lphy[3] = 0.065; - Lelec[3] = 0.0426; - curr_v_th_dram_access_transistor = 1; - width_dram_access_transistor = 0.065; - curr_I_on_dram_cell = 20e-6; - curr_I_off_dram_cell_worst_case_length_temp = 1e-15; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.065*0.065; - curr_asp_ratio_cell_dram = 1.5; - curr_c_dram_cell = 30e-15; - - //COMM-DRAM wordline transistor parameters - curr_vpp = 3.3; - t_ox[3] = 5e-3; - v_th[3] = 1.0; - c_ox[3] = 6.16e-15; - mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.385; - c_g_ideal[3] = 4e-16; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15 ; - I_on_n[3] = 1031e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.69; - n_to_p_eff_curr_drv_ratio[3] = 2.39; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.80e-14; - I_off_n[3][10] = 3.64e-14; - I_off_n[3][20] = 7.03e-14; - I_off_n[3][30] = 1.31e-13; - I_off_n[3][40] = 2.35e-13; - I_off_n[3][50] = 4.09e-13; - I_off_n[3][60] = 6.89e-13; - I_off_n[3][70] = 1.13e-12; - I_off_n[3][80] = 1.78e-12; - I_off_n[3][90] = 2.71e-12; - I_off_n[3][100] = 3.99e-12; - } - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7; //Rather than scale proportionally to square of feature size, only scale linearly according to IBM cell processor - curr_core_tx_density = 1.25*0.7; - curr_sckt_co_eff = 1.1359; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } + if (tech == 65) { + //65nm technology-node. Corresponds to year 2007 in ITRS + //ITRS HP device type + SENSE_AMP_D = .2e-9; // s + SENSE_AMP_P = 5.7e-15; // J + vdd[0] = 1.1; + Lphy[0] = 0.025; + Lelec[0] = 0.019; + t_ox[0] = 1.1e-3; + v_th[0] = .19491; + c_ox[0] = 1.88e-14; + mobility_eff[0] = 436.24 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[0] = 7.71e-2; + c_g_ideal[0] = 4.69e-16; + c_fringe[0] = 0.077e-15; + c_junc[0] = 1e-15; + I_on_n[0] = 1197.2e-6; + I_on_p[0] = 870.8e-6; + nmos_effective_resistance_multiplier = 1.50; + n_to_p_eff_curr_drv_ratio[0] = 2.41; + gmp_to_gmn_multiplier[0] = 1.38; + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0]; + Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; + long_channel_leakage_reduction[0] = 1 / 3.74; + //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first + //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74. + I_off_n[0][0] = 1.96e-7; + I_off_n[0][10] = 2.29e-7; + I_off_n[0][20] = 2.66e-7; + I_off_n[0][30] = 3.05e-7; + I_off_n[0][40] = 3.49e-7; + I_off_n[0][50] = 3.95e-7; + I_off_n[0][60] = 4.45e-7; + I_off_n[0][70] = 4.97e-7; + I_off_n[0][80] = 5.48e-7; + I_off_n[0][90] = 5.94e-7; + I_off_n[0][100] = 6.3e-7; + I_g_on_n[0][0] = 4.09e-8;//A/micron + I_g_on_n[0][10] = 4.09e-8; + I_g_on_n[0][20] = 4.09e-8; + I_g_on_n[0][30] = 4.09e-8; + I_g_on_n[0][40] = 4.09e-8; + I_g_on_n[0][50] = 4.09e-8; + I_g_on_n[0][60] = 4.09e-8; + I_g_on_n[0][70] = 4.09e-8; + I_g_on_n[0][80] = 4.09e-8; + I_g_on_n[0][90] = 4.09e-8; + I_g_on_n[0][100] = 4.09e-8; + + //ITRS LSTP device type + vdd[1] = 1.2; + Lphy[1] = 0.045; + Lelec[1] = 0.0298; + t_ox[1] = 1.9e-3; + v_th[1] = 0.52354; + c_ox[1] = 1.36e-14; + mobility_eff[1] = 341.21 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[1] = 0.128; + c_g_ideal[1] = 6.14e-16; + c_fringe[1] = 0.08e-15; + c_junc[1] = 1e-15; + I_on_n[1] = 519.2e-6; + I_on_p[1] = 266e-6; + nmos_effective_resistance_multiplier = 1.96; + n_to_p_eff_curr_drv_ratio[1] = 2.23; + gmp_to_gmn_multiplier[1] = 0.99; + Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; + Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; + long_channel_leakage_reduction[1] = 1 / 2.82; + I_off_n[1][0] = 9.12e-12; + I_off_n[1][10] = 1.49e-11; + I_off_n[1][20] = 2.36e-11; + I_off_n[1][30] = 3.64e-11; + I_off_n[1][40] = 5.48e-11; + I_off_n[1][50] = 8.05e-11; + I_off_n[1][60] = 1.15e-10; + I_off_n[1][70] = 1.59e-10; + I_off_n[1][80] = 2.1e-10; + I_off_n[1][90] = 2.62e-10; + I_off_n[1][100] = 3.21e-10; + + I_g_on_n[1][0] = 1.09e-10;//A/micron + I_g_on_n[1][10] = 1.09e-10; + I_g_on_n[1][20] = 1.09e-10; + I_g_on_n[1][30] = 1.09e-10; + I_g_on_n[1][40] = 1.09e-10; + I_g_on_n[1][50] = 1.09e-10; + I_g_on_n[1][60] = 1.09e-10; + I_g_on_n[1][70] = 1.09e-10; + I_g_on_n[1][80] = 1.09e-10; + I_g_on_n[1][90] = 1.09e-10; + I_g_on_n[1][100] = 1.09e-10; + + //ITRS LOP device type + vdd[2] = 0.8; + Lphy[2] = 0.032; + Lelec[2] = 0.0216; + t_ox[2] = 1.2e-3; + v_th[2] = 0.28512; + c_ox[2] = 1.87e-14; + mobility_eff[2] = 495.19 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[2] = 0.292; + c_g_ideal[2] = 6e-16; + c_fringe[2] = 0.08e-15; + c_junc[2] = 1e-15; + I_on_n[2] = 573.1e-6; + I_on_p[2] = 340.6e-6; + nmos_effective_resistance_multiplier = 1.82; + n_to_p_eff_curr_drv_ratio[2] = 2.28; + gmp_to_gmn_multiplier[2] = 1.11; + Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; + Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; + long_channel_leakage_reduction[2] = 1 / 2.05; + I_off_n[2][0] = 4.9e-9; + I_off_n[2][10] = 6.49e-9; + I_off_n[2][20] = 8.45e-9; + I_off_n[2][30] = 1.08e-8; + I_off_n[2][40] = 1.37e-8; + I_off_n[2][50] = 1.71e-8; + I_off_n[2][60] = 2.09e-8; + I_off_n[2][70] = 2.48e-8; + I_off_n[2][80] = 2.84e-8; + I_off_n[2][90] = 3.13e-8; + I_off_n[2][100] = 3.42e-8; + + I_g_on_n[2][0] = 9.61e-9;//A/micron + I_g_on_n[2][10] = 9.61e-9; + I_g_on_n[2][20] = 9.61e-9; + I_g_on_n[2][30] = 9.61e-9; + I_g_on_n[2][40] = 9.61e-9; + I_g_on_n[2][50] = 9.61e-9; + I_g_on_n[2][60] = 9.61e-9; + I_g_on_n[2][70] = 9.61e-9; + I_g_on_n[2][80] = 9.61e-9; + I_g_on_n[2][90] = 9.61e-9; + I_g_on_n[2][100] = 9.61e-9; + + if (ram_cell_tech_type == lp_dram) { + //LP-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.2; + Lphy[3] = 0.12; + Lelec[3] = 0.0756; + curr_v_th_dram_access_transistor = 0.43806; + width_dram_access_transistor = 0.09; + curr_I_on_dram_cell = 36e-6; + curr_I_off_dram_cell_worst_case_length_temp = 19.6e-12; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 0.11; + curr_asp_ratio_cell_dram = 1.46; + curr_c_dram_cell = 20e-15; + + //LP-DRAM wordline transistor parameters + curr_vpp = 1.6; + t_ox[3] = 2.2e-3; + v_th[3] = 0.43806; + c_ox[3] = 1.22e-14; + mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.43806; + c_g_ideal[3] = 1.46e-15; + c_fringe[3] = 0.08e-15; + c_junc[3] = 1e-15 ; + I_on_n[3] = 399.8e-6; + I_on_p[3] = 243.4e-6; + nmos_effective_resistance_multiplier = 1.65; + n_to_p_eff_curr_drv_ratio[3] = 2.05; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 2.23e-11; + I_off_n[3][10] = 3.46e-11; + I_off_n[3][20] = 5.24e-11; + I_off_n[3][30] = 7.75e-11; + I_off_n[3][40] = 1.12e-10; + I_off_n[3][50] = 1.58e-10; + I_off_n[3][60] = 2.18e-10; + I_off_n[3][70] = 2.88e-10; + I_off_n[3][80] = 3.63e-10; + I_off_n[3][90] = 4.41e-10; + I_off_n[3][100] = 5.36e-10; + } else if (ram_cell_tech_type == comm_dram) { + //COMM-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.3; + Lphy[3] = 0.065; + Lelec[3] = 0.0426; + curr_v_th_dram_access_transistor = 1; + width_dram_access_transistor = 0.065; + curr_I_on_dram_cell = 20e-6; + curr_I_off_dram_cell_worst_case_length_temp = 1e-15; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 6 * 0.065 * 0.065; + curr_asp_ratio_cell_dram = 1.5; + curr_c_dram_cell = 30e-15; + + //COMM-DRAM wordline transistor parameters + curr_vpp = 3.3; + t_ox[3] = 5e-3; + v_th[3] = 1.0; + c_ox[3] = 6.16e-15; + mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.385; + c_g_ideal[3] = 4e-16; + c_fringe[3] = 0.08e-15; + c_junc[3] = 1e-15 ; + I_on_n[3] = 1031e-6; + I_on_p[3] = I_on_n[3] / 2; + nmos_effective_resistance_multiplier = 1.69; + n_to_p_eff_curr_drv_ratio[3] = 2.39; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 1.80e-14; + I_off_n[3][10] = 3.64e-14; + I_off_n[3][20] = 7.03e-14; + I_off_n[3][30] = 1.31e-13; + I_off_n[3][40] = 2.35e-13; + I_off_n[3][50] = 4.09e-13; + I_off_n[3][60] = 6.89e-13; + I_off_n[3][70] = 1.13e-12; + I_off_n[3][80] = 1.78e-12; + I_off_n[3][90] = 2.71e-12; + I_off_n[3][100] = 3.99e-12; + } + + //SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + //CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_cam = 2.92; + //Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 0.7; //Rather than scale proportionally to square of feature size, only scale linearly according to IBM cell processor + curr_core_tx_density = 1.25 * 0.7; + curr_sckt_co_eff = 1.1359; + curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb + } - if (tech == 45) - { //45nm technology-node. Corresponds to year 2010 in ITRS - //ITRS HP device type - SENSE_AMP_D = .04e-9; // s - SENSE_AMP_P = 2.7e-15; // J - vdd[0] = 1.0; - Lphy[0] = 0.018; - Lelec[0] = 0.01345; - t_ox[0] = 0.65e-3; - v_th[0] = .18035; - c_ox[0] = 3.77e-14; - mobility_eff[0] = 266.68 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[0] = 9.38E-2; - c_g_ideal[0] = 6.78e-16; - c_fringe[0] = 0.05e-15; - c_junc[0] = 1e-15; - I_on_n[0] = 2046.6e-6; - //There are certain problems with the ITRS PMOS numbers in MASTAR for 45nm. So we are using 65nm values of - //n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier for 45nm - I_on_p[0] = I_on_n[0] / 2;//This value is fixed arbitrarily but I_on_p is not being used in CACTI - nmos_effective_resistance_multiplier = 1.51; - n_to_p_eff_curr_drv_ratio[0] = 2.41; - gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0]; - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; - long_channel_leakage_reduction[0] = 1/3.546;//Using MASTAR, @380K, increase Lgate until Ion reduces to 90%, Ioff(Lgate normal)/Ioff(Lgate long)= 3.74 - I_off_n[0][0] = 2.8e-7; - I_off_n[0][10] = 3.28e-7; - I_off_n[0][20] = 3.81e-7; - I_off_n[0][30] = 4.39e-7; - I_off_n[0][40] = 5.02e-7; - I_off_n[0][50] = 5.69e-7; - I_off_n[0][60] = 6.42e-7; - I_off_n[0][70] = 7.2e-7; - I_off_n[0][80] = 8.03e-7; - I_off_n[0][90] = 8.91e-7; - I_off_n[0][100] = 9.84e-7; - - I_g_on_n[0][0] = 3.59e-8;//A/micron - I_g_on_n[0][10] = 3.59e-8; - I_g_on_n[0][20] = 3.59e-8; - I_g_on_n[0][30] = 3.59e-8; - I_g_on_n[0][40] = 3.59e-8; - I_g_on_n[0][50] = 3.59e-8; - I_g_on_n[0][60] = 3.59e-8; - I_g_on_n[0][70] = 3.59e-8; - I_g_on_n[0][80] = 3.59e-8; - I_g_on_n[0][90] = 3.59e-8; - I_g_on_n[0][100] = 3.59e-8; - - //ITRS LSTP device type - vdd[1] = 1.1; - Lphy[1] = 0.028; - Lelec[1] = 0.0212; - t_ox[1] = 1.4e-3; - v_th[1] = 0.50245; - c_ox[1] = 2.01e-14; - mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[1] = 9.12e-2; - c_g_ideal[1] = 5.18e-16; - c_fringe[1] = 0.08e-15; - c_junc[1] = 1e-15; - I_on_n[1] = 666.2e-6; - I_on_p[1] = I_on_n[1] / 2; - nmos_effective_resistance_multiplier = 1.99; - n_to_p_eff_curr_drv_ratio[1] = 2.23; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/2.08; - I_off_n[1][0] = 1.01e-11; - I_off_n[1][10] = 1.65e-11; - I_off_n[1][20] = 2.62e-11; - I_off_n[1][30] = 4.06e-11; - I_off_n[1][40] = 6.12e-11; - I_off_n[1][50] = 9.02e-11; - I_off_n[1][60] = 1.3e-10; - I_off_n[1][70] = 1.83e-10; - I_off_n[1][80] = 2.51e-10; - I_off_n[1][90] = 3.29e-10; - I_off_n[1][100] = 4.1e-10; - - I_g_on_n[1][0] = 9.47e-12;//A/micron - I_g_on_n[1][10] = 9.47e-12; - I_g_on_n[1][20] = 9.47e-12; - I_g_on_n[1][30] = 9.47e-12; - I_g_on_n[1][40] = 9.47e-12; - I_g_on_n[1][50] = 9.47e-12; - I_g_on_n[1][60] = 9.47e-12; - I_g_on_n[1][70] = 9.47e-12; - I_g_on_n[1][80] = 9.47e-12; - I_g_on_n[1][90] = 9.47e-12; - I_g_on_n[1][100] = 9.47e-12; - - //ITRS LOP device type - vdd[2] = 0.7; - Lphy[2] = 0.022; - Lelec[2] = 0.016; - t_ox[2] = 0.9e-3; - v_th[2] = 0.22599; - c_ox[2] = 2.82e-14;//F/micron2 - mobility_eff[2] = 508.9 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[2] = 5.71e-2; - c_g_ideal[2] = 6.2e-16; - c_fringe[2] = 0.073e-15; - c_junc[2] = 1e-15; - I_on_n[2] = 748.9e-6; - I_on_p[2] = I_on_n[2] / 2; - nmos_effective_resistance_multiplier = 1.76; - n_to_p_eff_curr_drv_ratio[2] = 2.28; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/1.92; - I_off_n[2][0] = 4.03e-9; - I_off_n[2][10] = 5.02e-9; - I_off_n[2][20] = 6.18e-9; - I_off_n[2][30] = 7.51e-9; - I_off_n[2][40] = 9.04e-9; - I_off_n[2][50] = 1.08e-8; - I_off_n[2][60] = 1.27e-8; - I_off_n[2][70] = 1.47e-8; - I_off_n[2][80] = 1.66e-8; - I_off_n[2][90] = 1.84e-8; - I_off_n[2][100] = 2.03e-8; - - I_g_on_n[2][0] = 3.24e-8;//A/micron - I_g_on_n[2][10] = 4.01e-8; - I_g_on_n[2][20] = 4.90e-8; - I_g_on_n[2][30] = 5.92e-8; - I_g_on_n[2][40] = 7.08e-8; - I_g_on_n[2][50] = 8.38e-8; - I_g_on_n[2][60] = 9.82e-8; - I_g_on_n[2][70] = 1.14e-7; - I_g_on_n[2][80] = 1.29e-7; - I_g_on_n[2][90] = 1.43e-7; - I_g_on_n[2][100] = 1.54e-7; - - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.1; - Lphy[3] = 0.078; - Lelec[3] = 0.0504;// Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. - curr_v_th_dram_access_transistor = 0.44559; - width_dram_access_transistor = 0.079; - curr_I_on_dram_cell = 36e-6;//A - curr_I_off_dram_cell_worst_case_length_temp = 19.5e-12; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0; - curr_asp_ratio_cell_dram = 1.46; - curr_c_dram_cell = 20e-15; - - //LP-DRAM wordline transistor parameters - curr_vpp = 1.5; - t_ox[3] = 2.1e-3; - v_th[3] = 0.44559; - c_ox[3] = 1.41e-14; - mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.181; - c_g_ideal[3] = 1.10e-15; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 456e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.65; - n_to_p_eff_curr_drv_ratio[3] = 2.05; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 2.54e-11; - I_off_n[3][10] = 3.94e-11; - I_off_n[3][20] = 5.95e-11; - I_off_n[3][30] = 8.79e-11; - I_off_n[3][40] = 1.27e-10; - I_off_n[3][50] = 1.79e-10; - I_off_n[3][60] = 2.47e-10; - I_off_n[3][70] = 3.31e-10; - I_off_n[3][80] = 4.26e-10; - I_off_n[3][90] = 5.27e-10; - I_off_n[3][100] = 6.46e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.1; - Lphy[3] = 0.045; - Lelec[3] = 0.0298; - curr_v_th_dram_access_transistor = 1; - width_dram_access_transistor = 0.045; - curr_I_on_dram_cell = 20e-6;//A - curr_I_off_dram_cell_worst_case_length_temp = 1e-15; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.045*0.045; - curr_asp_ratio_cell_dram = 1.5; - curr_c_dram_cell = 30e-15; - - //COMM-DRAM wordline transistor parameters - curr_vpp = 2.7; - t_ox[3] = 4e-3; - v_th[3] = 1.0; - c_ox[3] = 7.98e-15; - mobility_eff[3] = 368.58 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.147; - c_g_ideal[3] = 3.59e-16; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 999.4e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.69; - n_to_p_eff_curr_drv_ratio[3] = 1.95; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.31e-14; - I_off_n[3][10] = 2.68e-14; - I_off_n[3][20] = 5.25e-14; - I_off_n[3][30] = 9.88e-14; - I_off_n[3][40] = 1.79e-13; - I_off_n[3][50] = 3.15e-13; - I_off_n[3][60] = 5.36e-13; - I_off_n[3][70] = 8.86e-13; - I_off_n[3][80] = 1.42e-12; - I_off_n[3][90] = 2.20e-12; - I_off_n[3][100] = 3.29e-12; - } - - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7; - curr_core_tx_density = 1.25; - curr_sckt_co_eff = 1.1387; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } + if (tech == 45) { + //45nm technology-node. Corresponds to year 2010 in ITRS + //ITRS HP device type + SENSE_AMP_D = .04e-9; // s + SENSE_AMP_P = 2.7e-15; // J + vdd[0] = 1.0; + Lphy[0] = 0.018; + Lelec[0] = 0.01345; + t_ox[0] = 0.65e-3; + v_th[0] = .18035; + c_ox[0] = 3.77e-14; + mobility_eff[0] = 266.68 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[0] = 9.38E-2; + c_g_ideal[0] = 6.78e-16; + c_fringe[0] = 0.05e-15; + c_junc[0] = 1e-15; + I_on_n[0] = 2046.6e-6; + //There are certain problems with the ITRS PMOS numbers in MASTAR for 45nm. So we are using 65nm values of + //n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier for 45nm + I_on_p[0] = I_on_n[0] / 2;//This value is fixed arbitrarily but I_on_p is not being used in CACTI + nmos_effective_resistance_multiplier = 1.51; + n_to_p_eff_curr_drv_ratio[0] = 2.41; + gmp_to_gmn_multiplier[0] = 1.38; + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0]; + Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; + //Using MASTAR, @380K, increase Lgate until Ion reduces to 90%, + //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74 + long_channel_leakage_reduction[0] = 1 / 3.546; + I_off_n[0][0] = 2.8e-7; + I_off_n[0][10] = 3.28e-7; + I_off_n[0][20] = 3.81e-7; + I_off_n[0][30] = 4.39e-7; + I_off_n[0][40] = 5.02e-7; + I_off_n[0][50] = 5.69e-7; + I_off_n[0][60] = 6.42e-7; + I_off_n[0][70] = 7.2e-7; + I_off_n[0][80] = 8.03e-7; + I_off_n[0][90] = 8.91e-7; + I_off_n[0][100] = 9.84e-7; + + I_g_on_n[0][0] = 3.59e-8;//A/micron + I_g_on_n[0][10] = 3.59e-8; + I_g_on_n[0][20] = 3.59e-8; + I_g_on_n[0][30] = 3.59e-8; + I_g_on_n[0][40] = 3.59e-8; + I_g_on_n[0][50] = 3.59e-8; + I_g_on_n[0][60] = 3.59e-8; + I_g_on_n[0][70] = 3.59e-8; + I_g_on_n[0][80] = 3.59e-8; + I_g_on_n[0][90] = 3.59e-8; + I_g_on_n[0][100] = 3.59e-8; + + //ITRS LSTP device type + vdd[1] = 1.1; + Lphy[1] = 0.028; + Lelec[1] = 0.0212; + t_ox[1] = 1.4e-3; + v_th[1] = 0.50245; + c_ox[1] = 2.01e-14; + mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[1] = 9.12e-2; + c_g_ideal[1] = 5.18e-16; + c_fringe[1] = 0.08e-15; + c_junc[1] = 1e-15; + I_on_n[1] = 666.2e-6; + I_on_p[1] = I_on_n[1] / 2; + nmos_effective_resistance_multiplier = 1.99; + n_to_p_eff_curr_drv_ratio[1] = 2.23; + gmp_to_gmn_multiplier[1] = 0.99; + Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; + Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; + long_channel_leakage_reduction[1] = 1 / 2.08; + I_off_n[1][0] = 1.01e-11; + I_off_n[1][10] = 1.65e-11; + I_off_n[1][20] = 2.62e-11; + I_off_n[1][30] = 4.06e-11; + I_off_n[1][40] = 6.12e-11; + I_off_n[1][50] = 9.02e-11; + I_off_n[1][60] = 1.3e-10; + I_off_n[1][70] = 1.83e-10; + I_off_n[1][80] = 2.51e-10; + I_off_n[1][90] = 3.29e-10; + I_off_n[1][100] = 4.1e-10; + + I_g_on_n[1][0] = 9.47e-12;//A/micron + I_g_on_n[1][10] = 9.47e-12; + I_g_on_n[1][20] = 9.47e-12; + I_g_on_n[1][30] = 9.47e-12; + I_g_on_n[1][40] = 9.47e-12; + I_g_on_n[1][50] = 9.47e-12; + I_g_on_n[1][60] = 9.47e-12; + I_g_on_n[1][70] = 9.47e-12; + I_g_on_n[1][80] = 9.47e-12; + I_g_on_n[1][90] = 9.47e-12; + I_g_on_n[1][100] = 9.47e-12; + + //ITRS LOP device type + vdd[2] = 0.7; + Lphy[2] = 0.022; + Lelec[2] = 0.016; + t_ox[2] = 0.9e-3; + v_th[2] = 0.22599; + c_ox[2] = 2.82e-14;//F/micron2 + mobility_eff[2] = 508.9 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[2] = 5.71e-2; + c_g_ideal[2] = 6.2e-16; + c_fringe[2] = 0.073e-15; + c_junc[2] = 1e-15; + I_on_n[2] = 748.9e-6; + I_on_p[2] = I_on_n[2] / 2; + nmos_effective_resistance_multiplier = 1.76; + n_to_p_eff_curr_drv_ratio[2] = 2.28; + gmp_to_gmn_multiplier[2] = 1.11; + Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; + Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; + long_channel_leakage_reduction[2] = 1 / 1.92; + I_off_n[2][0] = 4.03e-9; + I_off_n[2][10] = 5.02e-9; + I_off_n[2][20] = 6.18e-9; + I_off_n[2][30] = 7.51e-9; + I_off_n[2][40] = 9.04e-9; + I_off_n[2][50] = 1.08e-8; + I_off_n[2][60] = 1.27e-8; + I_off_n[2][70] = 1.47e-8; + I_off_n[2][80] = 1.66e-8; + I_off_n[2][90] = 1.84e-8; + I_off_n[2][100] = 2.03e-8; + + I_g_on_n[2][0] = 3.24e-8;//A/micron + I_g_on_n[2][10] = 4.01e-8; + I_g_on_n[2][20] = 4.90e-8; + I_g_on_n[2][30] = 5.92e-8; + I_g_on_n[2][40] = 7.08e-8; + I_g_on_n[2][50] = 8.38e-8; + I_g_on_n[2][60] = 9.82e-8; + I_g_on_n[2][70] = 1.14e-7; + I_g_on_n[2][80] = 1.29e-7; + I_g_on_n[2][90] = 1.43e-7; + I_g_on_n[2][100] = 1.54e-7; + + if (ram_cell_tech_type == lp_dram) { + //LP-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.1; + Lphy[3] = 0.078; + Lelec[3] = 0.0504;// Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. + curr_v_th_dram_access_transistor = 0.44559; + width_dram_access_transistor = 0.079; + curr_I_on_dram_cell = 36e-6;//A + curr_I_off_dram_cell_worst_case_length_temp = 19.5e-12; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0; + curr_asp_ratio_cell_dram = 1.46; + curr_c_dram_cell = 20e-15; + + //LP-DRAM wordline transistor parameters + curr_vpp = 1.5; + t_ox[3] = 2.1e-3; + v_th[3] = 0.44559; + c_ox[3] = 1.41e-14; + mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.181; + c_g_ideal[3] = 1.10e-15; + c_fringe[3] = 0.08e-15; + c_junc[3] = 1e-15; + I_on_n[3] = 456e-6; + I_on_p[3] = I_on_n[3] / 2; + nmos_effective_resistance_multiplier = 1.65; + n_to_p_eff_curr_drv_ratio[3] = 2.05; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 2.54e-11; + I_off_n[3][10] = 3.94e-11; + I_off_n[3][20] = 5.95e-11; + I_off_n[3][30] = 8.79e-11; + I_off_n[3][40] = 1.27e-10; + I_off_n[3][50] = 1.79e-10; + I_off_n[3][60] = 2.47e-10; + I_off_n[3][70] = 3.31e-10; + I_off_n[3][80] = 4.26e-10; + I_off_n[3][90] = 5.27e-10; + I_off_n[3][100] = 6.46e-10; + } else if (ram_cell_tech_type == comm_dram) { + //COMM-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.1; + Lphy[3] = 0.045; + Lelec[3] = 0.0298; + curr_v_th_dram_access_transistor = 1; + width_dram_access_transistor = 0.045; + curr_I_on_dram_cell = 20e-6;//A + curr_I_off_dram_cell_worst_case_length_temp = 1e-15; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 6 * 0.045 * 0.045; + curr_asp_ratio_cell_dram = 1.5; + curr_c_dram_cell = 30e-15; + + //COMM-DRAM wordline transistor parameters + curr_vpp = 2.7; + t_ox[3] = 4e-3; + v_th[3] = 1.0; + c_ox[3] = 7.98e-15; + mobility_eff[3] = 368.58 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.147; + c_g_ideal[3] = 3.59e-16; + c_fringe[3] = 0.08e-15; + c_junc[3] = 1e-15; + I_on_n[3] = 999.4e-6; + I_on_p[3] = I_on_n[3] / 2; + nmos_effective_resistance_multiplier = 1.69; + n_to_p_eff_curr_drv_ratio[3] = 1.95; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 1.31e-14; + I_off_n[3][10] = 2.68e-14; + I_off_n[3][20] = 5.25e-14; + I_off_n[3][30] = 9.88e-14; + I_off_n[3][40] = 1.79e-13; + I_off_n[3][50] = 3.15e-13; + I_off_n[3][60] = 5.36e-13; + I_off_n[3][70] = 8.86e-13; + I_off_n[3][80] = 1.42e-12; + I_off_n[3][90] = 2.20e-12; + I_off_n[3][100] = 3.29e-12; + } + + + //SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + //CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_cam = 2.92; + //Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 0.7 * 0.7; + curr_core_tx_density = 1.25; + curr_sckt_co_eff = 1.1387; + curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb + } - if (tech == 32) - { - SENSE_AMP_D = .03e-9; // s - SENSE_AMP_P = 2.16e-15; // J - //For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm - //technology i.e. FEATURESIZE = 0.032). Using the SOI process numbers for - //HP and LSTP. - vdd[0] = 0.9; - Lphy[0] = 0.013; - Lelec[0] = 0.01013; - t_ox[0] = 0.5e-3; - v_th[0] = 0.21835; - c_ox[0] = 4.11e-14; - mobility_eff[0] = 361.84 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[0] = 5.09E-2; - c_g_ideal[0] = 5.34e-16; - c_fringe[0] = 0.04e-15; - c_junc[0] = 1e-15; - I_on_n[0] = 2211.7e-6; - I_on_p[0] = I_on_n[0] / 2; - nmos_effective_resistance_multiplier = 1.49; - n_to_p_eff_curr_drv_ratio[0] = 2.41; - gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/3.706; - //Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate until Ion reduces to 95% or Lgate increase by 5% (DG device can only increase by 5%), - //whichever comes first - I_off_n[0][0] = 1.52e-7; - I_off_n[0][10] = 1.55e-7; - I_off_n[0][20] = 1.59e-7; - I_off_n[0][30] = 1.68e-7; - I_off_n[0][40] = 1.90e-7; - I_off_n[0][50] = 2.69e-7; - I_off_n[0][60] = 5.32e-7; - I_off_n[0][70] = 1.02e-6; - I_off_n[0][80] = 1.62e-6; - I_off_n[0][90] = 2.73e-6; - I_off_n[0][100] = 6.1e-6; - - I_g_on_n[0][0] = 6.55e-8;//A/micron - I_g_on_n[0][10] = 6.55e-8; - I_g_on_n[0][20] = 6.55e-8; - I_g_on_n[0][30] = 6.55e-8; - I_g_on_n[0][40] = 6.55e-8; - I_g_on_n[0][50] = 6.55e-8; - I_g_on_n[0][60] = 6.55e-8; - I_g_on_n[0][70] = 6.55e-8; - I_g_on_n[0][80] = 6.55e-8; - I_g_on_n[0][90] = 6.55e-8; - I_g_on_n[0][100] = 6.55e-8; - -// 32 DG -// I_g_on_n[0][0] = 2.71e-9;//A/micron -// I_g_on_n[0][10] = 2.71e-9; -// I_g_on_n[0][20] = 2.71e-9; -// I_g_on_n[0][30] = 2.71e-9; -// I_g_on_n[0][40] = 2.71e-9; -// I_g_on_n[0][50] = 2.71e-9; -// I_g_on_n[0][60] = 2.71e-9; -// I_g_on_n[0][70] = 2.71e-9; -// I_g_on_n[0][80] = 2.71e-9; -// I_g_on_n[0][90] = 2.71e-9; -// I_g_on_n[0][100] = 2.71e-9; - - //LSTP device type - vdd[1] = 1; - Lphy[1] = 0.020; - Lelec[1] = 0.0173; - t_ox[1] = 1.2e-3; - v_th[1] = 0.513; - c_ox[1] = 2.29e-14; - mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[1] = 8.64e-2; - c_g_ideal[1] = 4.58e-16; - c_fringe[1] = 0.053e-15; - c_junc[1] = 1e-15; - I_on_n[1] = 683.6e-6; - I_on_p[1] = I_on_n[1] / 2; - nmos_effective_resistance_multiplier = 1.99; - n_to_p_eff_curr_drv_ratio[1] = 2.23; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/1.93; - I_off_n[1][0] = 2.06e-11; - I_off_n[1][10] = 3.30e-11; - I_off_n[1][20] = 5.15e-11; - I_off_n[1][30] = 7.83e-11; - I_off_n[1][40] = 1.16e-10; - I_off_n[1][50] = 1.69e-10; - I_off_n[1][60] = 2.40e-10; - I_off_n[1][70] = 3.34e-10; - I_off_n[1][80] = 4.54e-10; - I_off_n[1][90] = 5.96e-10; - I_off_n[1][100] = 7.44e-10; - - I_g_on_n[1][0] = 3.73e-11;//A/micron - I_g_on_n[1][10] = 3.73e-11; - I_g_on_n[1][20] = 3.73e-11; - I_g_on_n[1][30] = 3.73e-11; - I_g_on_n[1][40] = 3.73e-11; - I_g_on_n[1][50] = 3.73e-11; - I_g_on_n[1][60] = 3.73e-11; - I_g_on_n[1][70] = 3.73e-11; - I_g_on_n[1][80] = 3.73e-11; - I_g_on_n[1][90] = 3.73e-11; - I_g_on_n[1][100] = 3.73e-11; - - - //LOP device type - vdd[2] = 0.6; - Lphy[2] = 0.016; - Lelec[2] = 0.01232; - t_ox[2] = 0.9e-3; - v_th[2] = 0.24227; - c_ox[2] = 2.84e-14; - mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[2] = 4.64e-2; - c_g_ideal[2] = 4.54e-16; - c_fringe[2] = 0.057e-15; - c_junc[2] = 1e-15; - I_on_n[2] = 827.8e-6; - I_on_p[2] = I_on_n[2] / 2; - nmos_effective_resistance_multiplier = 1.73; - n_to_p_eff_curr_drv_ratio[2] = 2.28; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/1.89; - I_off_n[2][0] = 5.94e-8; - I_off_n[2][10] = 7.23e-8; - I_off_n[2][20] = 8.7e-8; - I_off_n[2][30] = 1.04e-7; - I_off_n[2][40] = 1.22e-7; - I_off_n[2][50] = 1.43e-7; - I_off_n[2][60] = 1.65e-7; - I_off_n[2][70] = 1.90e-7; - I_off_n[2][80] = 2.15e-7; - I_off_n[2][90] = 2.39e-7; - I_off_n[2][100] = 2.63e-7; - - I_g_on_n[2][0] = 2.93e-9;//A/micron - I_g_on_n[2][10] = 2.93e-9; - I_g_on_n[2][20] = 2.93e-9; - I_g_on_n[2][30] = 2.93e-9; - I_g_on_n[2][40] = 2.93e-9; - I_g_on_n[2][50] = 2.93e-9; - I_g_on_n[2][60] = 2.93e-9; - I_g_on_n[2][70] = 2.93e-9; - I_g_on_n[2][80] = 2.93e-9; - I_g_on_n[2][90] = 2.93e-9; - I_g_on_n[2][100] = 2.93e-9; - - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.0; - Lphy[3] = 0.056; - Lelec[3] = 0.0419;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. - curr_v_th_dram_access_transistor = 0.44129; - width_dram_access_transistor = 0.056; - curr_I_on_dram_cell = 36e-6; - curr_I_off_dram_cell_worst_case_length_temp = 18.9e-12; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0; - curr_asp_ratio_cell_dram = 1.46; - curr_c_dram_cell = 20e-15; - - //LP-DRAM wordline transistor parameters - curr_vpp = 1.5; - t_ox[3] = 2e-3; - v_th[3] = 0.44467; - c_ox[3] = 1.48e-14; - mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.174; - c_g_ideal[3] = 7.45e-16; - c_fringe[3] = 0.053e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 1055.4e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.65; - n_to_p_eff_curr_drv_ratio[3] = 2.05; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 3.57e-11; - I_off_n[3][10] = 5.51e-11; - I_off_n[3][20] = 8.27e-11; - I_off_n[3][30] = 1.21e-10; - I_off_n[3][40] = 1.74e-10; - I_off_n[3][50] = 2.45e-10; - I_off_n[3][60] = 3.38e-10; - I_off_n[3][70] = 4.53e-10; - I_off_n[3][80] = 5.87e-10; - I_off_n[3][90] = 7.29e-10; - I_off_n[3][100] = 8.87e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.0; - Lphy[3] = 0.032; - Lelec[3] = 0.0205;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. - curr_v_th_dram_access_transistor = 1; - width_dram_access_transistor = 0.032; - curr_I_on_dram_cell = 20e-6; - curr_I_off_dram_cell_worst_case_length_temp = 1e-15; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.032*0.032; - curr_asp_ratio_cell_dram = 1.5; - curr_c_dram_cell = 30e-15; - - //COMM-DRAM wordline transistor parameters - curr_vpp = 2.6; - t_ox[3] = 4e-3; - v_th[3] = 1.0; - c_ox[3] = 7.99e-15; - mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.129; - c_g_ideal[3] = 2.56e-16; - c_fringe[3] = 0.053e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 1024.5e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.69; - n_to_p_eff_curr_drv_ratio[3] = 1.95; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 3.63e-14; - I_off_n[3][10] = 7.18e-14; - I_off_n[3][20] = 1.36e-13; - I_off_n[3][30] = 2.49e-13; - I_off_n[3][40] = 4.41e-13; - I_off_n[3][50] = 7.55e-13; - I_off_n[3][60] = 1.26e-12; - I_off_n[3][70] = 2.03e-12; - I_off_n[3][80] = 3.19e-12; - I_off_n[3][90] = 4.87e-12; - I_off_n[3][100] = 7.16e-12; - } - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7; - curr_sckt_co_eff = 1.1111; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } + if (tech == 32) { + SENSE_AMP_D = .03e-9; // s + SENSE_AMP_P = 2.16e-15; // J + //For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm + //technology i.e. FEATURESIZE = 0.032). Using the SOI process numbers for + //HP and LSTP. + vdd[0] = 0.9; + Lphy[0] = 0.013; + Lelec[0] = 0.01013; + t_ox[0] = 0.5e-3; + v_th[0] = 0.21835; + c_ox[0] = 4.11e-14; + mobility_eff[0] = 361.84 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[0] = 5.09E-2; + c_g_ideal[0] = 5.34e-16; + c_fringe[0] = 0.04e-15; + c_junc[0] = 1e-15; + I_on_n[0] = 2211.7e-6; + I_on_p[0] = I_on_n[0] / 2; + nmos_effective_resistance_multiplier = 1.49; + n_to_p_eff_curr_drv_ratio[0] = 2.41; + gmp_to_gmn_multiplier[0] = 1.38; + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron + Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron + long_channel_leakage_reduction[0] = 1 / 3.706; + //Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate until Ion reduces to 95% or Lgate increase by 5% (DG device can only increase by 5%), + //whichever comes first + I_off_n[0][0] = 1.52e-7; + I_off_n[0][10] = 1.55e-7; + I_off_n[0][20] = 1.59e-7; + I_off_n[0][30] = 1.68e-7; + I_off_n[0][40] = 1.90e-7; + I_off_n[0][50] = 2.69e-7; + I_off_n[0][60] = 5.32e-7; + I_off_n[0][70] = 1.02e-6; + I_off_n[0][80] = 1.62e-6; + I_off_n[0][90] = 2.73e-6; + I_off_n[0][100] = 6.1e-6; + + I_g_on_n[0][0] = 6.55e-8;//A/micron + I_g_on_n[0][10] = 6.55e-8; + I_g_on_n[0][20] = 6.55e-8; + I_g_on_n[0][30] = 6.55e-8; + I_g_on_n[0][40] = 6.55e-8; + I_g_on_n[0][50] = 6.55e-8; + I_g_on_n[0][60] = 6.55e-8; + I_g_on_n[0][70] = 6.55e-8; + I_g_on_n[0][80] = 6.55e-8; + I_g_on_n[0][90] = 6.55e-8; + I_g_on_n[0][100] = 6.55e-8; + + //LSTP device type + vdd[1] = 1; + Lphy[1] = 0.020; + Lelec[1] = 0.0173; + t_ox[1] = 1.2e-3; + v_th[1] = 0.513; + c_ox[1] = 2.29e-14; + mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[1] = 8.64e-2; + c_g_ideal[1] = 4.58e-16; + c_fringe[1] = 0.053e-15; + c_junc[1] = 1e-15; + I_on_n[1] = 683.6e-6; + I_on_p[1] = I_on_n[1] / 2; + nmos_effective_resistance_multiplier = 1.99; + n_to_p_eff_curr_drv_ratio[1] = 2.23; + gmp_to_gmn_multiplier[1] = 0.99; + Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; + Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; + long_channel_leakage_reduction[1] = 1 / 1.93; + I_off_n[1][0] = 2.06e-11; + I_off_n[1][10] = 3.30e-11; + I_off_n[1][20] = 5.15e-11; + I_off_n[1][30] = 7.83e-11; + I_off_n[1][40] = 1.16e-10; + I_off_n[1][50] = 1.69e-10; + I_off_n[1][60] = 2.40e-10; + I_off_n[1][70] = 3.34e-10; + I_off_n[1][80] = 4.54e-10; + I_off_n[1][90] = 5.96e-10; + I_off_n[1][100] = 7.44e-10; + + I_g_on_n[1][0] = 3.73e-11;//A/micron + I_g_on_n[1][10] = 3.73e-11; + I_g_on_n[1][20] = 3.73e-11; + I_g_on_n[1][30] = 3.73e-11; + I_g_on_n[1][40] = 3.73e-11; + I_g_on_n[1][50] = 3.73e-11; + I_g_on_n[1][60] = 3.73e-11; + I_g_on_n[1][70] = 3.73e-11; + I_g_on_n[1][80] = 3.73e-11; + I_g_on_n[1][90] = 3.73e-11; + I_g_on_n[1][100] = 3.73e-11; + + //LOP device type + vdd[2] = 0.6; + Lphy[2] = 0.016; + Lelec[2] = 0.01232; + t_ox[2] = 0.9e-3; + v_th[2] = 0.24227; + c_ox[2] = 2.84e-14; + mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[2] = 4.64e-2; + c_g_ideal[2] = 4.54e-16; + c_fringe[2] = 0.057e-15; + c_junc[2] = 1e-15; + I_on_n[2] = 827.8e-6; + I_on_p[2] = I_on_n[2] / 2; + nmos_effective_resistance_multiplier = 1.73; + n_to_p_eff_curr_drv_ratio[2] = 2.28; + gmp_to_gmn_multiplier[2] = 1.11; + Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; + Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; + long_channel_leakage_reduction[2] = 1 / 1.89; + I_off_n[2][0] = 5.94e-8; + I_off_n[2][10] = 7.23e-8; + I_off_n[2][20] = 8.7e-8; + I_off_n[2][30] = 1.04e-7; + I_off_n[2][40] = 1.22e-7; + I_off_n[2][50] = 1.43e-7; + I_off_n[2][60] = 1.65e-7; + I_off_n[2][70] = 1.90e-7; + I_off_n[2][80] = 2.15e-7; + I_off_n[2][90] = 2.39e-7; + I_off_n[2][100] = 2.63e-7; + + I_g_on_n[2][0] = 2.93e-9;//A/micron + I_g_on_n[2][10] = 2.93e-9; + I_g_on_n[2][20] = 2.93e-9; + I_g_on_n[2][30] = 2.93e-9; + I_g_on_n[2][40] = 2.93e-9; + I_g_on_n[2][50] = 2.93e-9; + I_g_on_n[2][60] = 2.93e-9; + I_g_on_n[2][70] = 2.93e-9; + I_g_on_n[2][80] = 2.93e-9; + I_g_on_n[2][90] = 2.93e-9; + I_g_on_n[2][100] = 2.93e-9; + + if (ram_cell_tech_type == lp_dram) { + //LP-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.0; + Lphy[3] = 0.056; + Lelec[3] = 0.0419;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. + curr_v_th_dram_access_transistor = 0.44129; + width_dram_access_transistor = 0.056; + curr_I_on_dram_cell = 36e-6; + curr_I_off_dram_cell_worst_case_length_temp = 18.9e-12; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0; + curr_asp_ratio_cell_dram = 1.46; + curr_c_dram_cell = 20e-15; + + //LP-DRAM wordline transistor parameters + curr_vpp = 1.5; + t_ox[3] = 2e-3; + v_th[3] = 0.44467; + c_ox[3] = 1.48e-14; + mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.174; + c_g_ideal[3] = 7.45e-16; + c_fringe[3] = 0.053e-15; + c_junc[3] = 1e-15; + I_on_n[3] = 1055.4e-6; + I_on_p[3] = I_on_n[3] / 2; + nmos_effective_resistance_multiplier = 1.65; + n_to_p_eff_curr_drv_ratio[3] = 2.05; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 3.57e-11; + I_off_n[3][10] = 5.51e-11; + I_off_n[3][20] = 8.27e-11; + I_off_n[3][30] = 1.21e-10; + I_off_n[3][40] = 1.74e-10; + I_off_n[3][50] = 2.45e-10; + I_off_n[3][60] = 3.38e-10; + I_off_n[3][70] = 4.53e-10; + I_off_n[3][80] = 5.87e-10; + I_off_n[3][90] = 7.29e-10; + I_off_n[3][100] = 8.87e-10; + } else if (ram_cell_tech_type == comm_dram) { + //COMM-DRAM cell access transistor technology parameters + curr_vdd_dram_cell = 1.0; + Lphy[3] = 0.032; + Lelec[3] = 0.0205;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. + curr_v_th_dram_access_transistor = 1; + width_dram_access_transistor = 0.032; + curr_I_on_dram_cell = 20e-6; + curr_I_off_dram_cell_worst_case_length_temp = 1e-15; + curr_Wmemcella_dram = width_dram_access_transistor; + curr_Wmemcellpmos_dram = 0; + curr_Wmemcellnmos_dram = 0; + curr_area_cell_dram = 6 * 0.032 * 0.032; + curr_asp_ratio_cell_dram = 1.5; + curr_c_dram_cell = 30e-15; + + //COMM-DRAM wordline transistor parameters + curr_vpp = 2.6; + t_ox[3] = 4e-3; + v_th[3] = 1.0; + c_ox[3] = 7.99e-15; + mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6); + Vdsat[3] = 0.129; + c_g_ideal[3] = 2.56e-16; + c_fringe[3] = 0.053e-15; + c_junc[3] = 1e-15; + I_on_n[3] = 1024.5e-6; + I_on_p[3] = I_on_n[3] / 2; + nmos_effective_resistance_multiplier = 1.69; + n_to_p_eff_curr_drv_ratio[3] = 1.95; + gmp_to_gmn_multiplier[3] = 0.90; + Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; + Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; + long_channel_leakage_reduction[3] = 1; + I_off_n[3][0] = 3.63e-14; + I_off_n[3][10] = 7.18e-14; + I_off_n[3][20] = 1.36e-13; + I_off_n[3][30] = 2.49e-13; + I_off_n[3][40] = 4.41e-13; + I_off_n[3][50] = 7.55e-13; + I_off_n[3][60] = 1.26e-12; + I_off_n[3][70] = 2.03e-12; + I_off_n[3][80] = 3.19e-12; + I_off_n[3][90] = 4.87e-12; + I_off_n[3][100] = 7.16e-12; + } + + //SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + //CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_cam = 2.92; + //Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7; + curr_core_tx_density = 1.25 / 0.7; + curr_sckt_co_eff = 1.1111; + curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb + } - if(tech == 22){ - SENSE_AMP_D = .03e-9; // s - SENSE_AMP_P = 2.16e-15; // J - //For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is 22 nm - //technology i.e. FEATURESIZE = 0.022). Using the DG process numbers for HP. - //22 nm HP - vdd[0] = 0.8; - Lphy[0] = 0.009;//Lphy is the physical gate-length. - Lelec[0] = 0.00468;//Lelec is the electrical gate-length. - t_ox[0] = 0.55e-3;//micron - v_th[0] = 0.1395;//V - c_ox[0] = 3.63e-14;//F/micron2 - mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[0] = 2.33e-2; //V/micron - c_g_ideal[0] = 3.27e-16;//F/micron - c_fringe[0] = 0.06e-15;//F/micron - c_junc[0] = 0;//F/micron2 - I_on_n[0] = 2626.4e-6;//A/micron - I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.45; - n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in - //"Dynamic" tab of Device workspace. - gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value. - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/3.274; - I_off_n[0][0] = 1.52e-7/1.5*1.2;//From 22nm, leakage current are directly from ITRS report rather than MASTAR, since MASTAR has serious bugs there. - I_off_n[0][10] = 1.55e-7/1.5*1.2; - I_off_n[0][20] = 1.59e-7/1.5*1.2; - I_off_n[0][30] = 1.68e-7/1.5*1.2; - I_off_n[0][40] = 1.90e-7/1.5*1.2; - I_off_n[0][50] = 2.69e-7/1.5*1.2; - I_off_n[0][60] = 5.32e-7/1.5*1.2; - I_off_n[0][70] = 1.02e-6/1.5*1.2; - I_off_n[0][80] = 1.62e-6/1.5*1.2; - I_off_n[0][90] = 2.73e-6/1.5*1.2; - I_off_n[0][100] = 6.1e-6/1.5*1.2; - //for 22nm DG HP - I_g_on_n[0][0] = 1.81e-9;//A/micron - I_g_on_n[0][10] = 1.81e-9; - I_g_on_n[0][20] = 1.81e-9; - I_g_on_n[0][30] = 1.81e-9; - I_g_on_n[0][40] = 1.81e-9; - I_g_on_n[0][50] = 1.81e-9; - I_g_on_n[0][60] = 1.81e-9; - I_g_on_n[0][70] = 1.81e-9; - I_g_on_n[0][80] = 1.81e-9; - I_g_on_n[0][90] = 1.81e-9; - I_g_on_n[0][100] = 1.81e-9; - - //22 nm LSTP DG - vdd[1] = 0.8; - Lphy[1] = 0.014; - Lelec[1] = 0.008;//Lelec is the electrical gate-length. - t_ox[1] = 1.1e-3;//micron - v_th[1] = 0.40126;//V - c_ox[1] = 2.30e-14;//F/micron2 - mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[1] = 6.64e-2; //V/micron - c_g_ideal[1] = 3.22e-16;//F/micron - c_fringe[1] = 0.08e-15; - c_junc[1] = 0;//F/micron2 - I_on_n[1] = 727.6e-6;//A/micron - I_on_p[1] = I_on_n[1] / 2; - nmos_effective_resistance_multiplier = 1.99; - n_to_p_eff_curr_drv_ratio[1] = 2; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron - long_channel_leakage_reduction[1] = 1/1.89; - I_off_n[1][0] = 2.43e-11; - I_off_n[1][10] = 4.85e-11; - I_off_n[1][20] = 9.68e-11; - I_off_n[1][30] = 1.94e-10; - I_off_n[1][40] = 3.87e-10; - I_off_n[1][50] = 7.73e-10; - I_off_n[1][60] = 3.55e-10; - I_off_n[1][70] = 3.09e-9; - I_off_n[1][80] = 6.19e-9; - I_off_n[1][90] = 1.24e-8; - I_off_n[1][100]= 2.48e-8; - - I_g_on_n[1][0] = 4.51e-10;//A/micron - I_g_on_n[1][10] = 4.51e-10; - I_g_on_n[1][20] = 4.51e-10; - I_g_on_n[1][30] = 4.51e-10; - I_g_on_n[1][40] = 4.51e-10; - I_g_on_n[1][50] = 4.51e-10; - I_g_on_n[1][60] = 4.51e-10; - I_g_on_n[1][70] = 4.51e-10; - I_g_on_n[1][80] = 4.51e-10; - I_g_on_n[1][90] = 4.51e-10; - I_g_on_n[1][100] = 4.51e-10; - - //22 nm LOP - vdd[2] = 0.6; - Lphy[2] = 0.011; - Lelec[2] = 0.00604;//Lelec is the electrical gate-length. - t_ox[2] = 0.8e-3;//micron - v_th[2] = 0.2315;//V - c_ox[2] = 2.87e-14;//F/micron2 - mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[2] = 1.81e-2; //V/micron - c_g_ideal[2] = 3.16e-16;//F/micron - c_fringe[2] = 0.08e-15; - c_junc[2] = 0;//F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab - I_on_n[2] = 916.1e-6;//A/micron - I_on_p[2] = I_on_n[2] / 2; - nmos_effective_resistance_multiplier = 1.73; - n_to_p_eff_curr_drv_ratio[2] = 2; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];//ohm-micron - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];//ohm-micron - long_channel_leakage_reduction[2] = 1/2.38; - - I_off_n[2][0] = 1.31e-8; - I_off_n[2][10] = 2.60e-8; - I_off_n[2][20] = 5.14e-8; - I_off_n[2][30] = 1.02e-7; - I_off_n[2][40] = 2.02e-7; - I_off_n[2][50] = 3.99e-7; - I_off_n[2][60] = 7.91e-7; - I_off_n[2][70] = 1.09e-6; - I_off_n[2][80] = 2.09e-6; - I_off_n[2][90] = 4.04e-6; - I_off_n[2][100]= 4.48e-6; - - I_g_on_n[2][0] = 2.74e-9;//A/micron - I_g_on_n[2][10] = 2.74e-9; - I_g_on_n[2][20] = 2.74e-9; - I_g_on_n[2][30] = 2.74e-9; - I_g_on_n[2][40] = 2.74e-9; - I_g_on_n[2][50] = 2.74e-9; - I_g_on_n[2][60] = 2.74e-9; - I_g_on_n[2][70] = 2.74e-9; - I_g_on_n[2][80] = 2.74e-9; - I_g_on_n[2][90] = 2.74e-9; - I_g_on_n[2][100] = 2.74e-9; - - - - if (ram_cell_tech_type == 3) - {} - else if (ram_cell_tech_type == 4) - { - //22 nm commodity DRAM cell access transistor technology parameters. + if (tech == 22) { + SENSE_AMP_D = .03e-9; // s + SENSE_AMP_P = 2.16e-15; // J + //For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is 22 nm + //technology i.e. FEATURESIZE = 0.022). Using the DG process numbers for HP. + //22 nm HP + vdd[0] = 0.8; + Lphy[0] = 0.009;//Lphy is the physical gate-length. + Lelec[0] = 0.00468;//Lelec is the electrical gate-length. + t_ox[0] = 0.55e-3;//micron + v_th[0] = 0.1395;//V + c_ox[0] = 3.63e-14;//F/micron2 + mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs + Vdsat[0] = 2.33e-2; //V/micron + c_g_ideal[0] = 3.27e-16;//F/micron + c_fringe[0] = 0.06e-15;//F/micron + c_junc[0] = 0;//F/micron2 + I_on_n[0] = 2626.4e-6;//A/micron + I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used. + nmos_effective_resistance_multiplier = 1.45; + n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in + //"Dynamic" tab of Device workspace. + gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value. + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron + Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron + long_channel_leakage_reduction[0] = 1 / 3.274; + //From 22nm, leakage current are directly from ITRS report rather + //than MASTAR, since MASTAR has serious bugs there. + I_off_n[0][0] = 1.52e-7 / 1.5 * 1.2; + I_off_n[0][10] = 1.55e-7 / 1.5 * 1.2; + I_off_n[0][20] = 1.59e-7 / 1.5 * 1.2; + I_off_n[0][30] = 1.68e-7 / 1.5 * 1.2; + I_off_n[0][40] = 1.90e-7 / 1.5 * 1.2; + I_off_n[0][50] = 2.69e-7 / 1.5 * 1.2; + I_off_n[0][60] = 5.32e-7 / 1.5 * 1.2; + I_off_n[0][70] = 1.02e-6 / 1.5 * 1.2; + I_off_n[0][80] = 1.62e-6 / 1.5 * 1.2; + I_off_n[0][90] = 2.73e-6 / 1.5 * 1.2; + I_off_n[0][100] = 6.1e-6 / 1.5 * 1.2; + //for 22nm DG HP + I_g_on_n[0][0] = 1.81e-9;//A/micron + I_g_on_n[0][10] = 1.81e-9; + I_g_on_n[0][20] = 1.81e-9; + I_g_on_n[0][30] = 1.81e-9; + I_g_on_n[0][40] = 1.81e-9; + I_g_on_n[0][50] = 1.81e-9; + I_g_on_n[0][60] = 1.81e-9; + I_g_on_n[0][70] = 1.81e-9; + I_g_on_n[0][80] = 1.81e-9; + I_g_on_n[0][90] = 1.81e-9; + I_g_on_n[0][100] = 1.81e-9; + + //22 nm LSTP DG + vdd[1] = 0.8; + Lphy[1] = 0.014; + Lelec[1] = 0.008;//Lelec is the electrical gate-length. + t_ox[1] = 1.1e-3;//micron + v_th[1] = 0.40126;//V + c_ox[1] = 2.30e-14;//F/micron2 + mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs + Vdsat[1] = 6.64e-2; //V/micron + c_g_ideal[1] = 3.22e-16;//F/micron + c_fringe[1] = 0.08e-15; + c_junc[1] = 0;//F/micron2 + I_on_n[1] = 727.6e-6;//A/micron + I_on_p[1] = I_on_n[1] / 2; + nmos_effective_resistance_multiplier = 1.99; + n_to_p_eff_curr_drv_ratio[1] = 2; + gmp_to_gmn_multiplier[1] = 0.99; + Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron + Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron + long_channel_leakage_reduction[1] = 1 / 1.89; + I_off_n[1][0] = 2.43e-11; + I_off_n[1][10] = 4.85e-11; + I_off_n[1][20] = 9.68e-11; + I_off_n[1][30] = 1.94e-10; + I_off_n[1][40] = 3.87e-10; + I_off_n[1][50] = 7.73e-10; + I_off_n[1][60] = 3.55e-10; + I_off_n[1][70] = 3.09e-9; + I_off_n[1][80] = 6.19e-9; + I_off_n[1][90] = 1.24e-8; + I_off_n[1][100] = 2.48e-8; + + I_g_on_n[1][0] = 4.51e-10;//A/micron + I_g_on_n[1][10] = 4.51e-10; + I_g_on_n[1][20] = 4.51e-10; + I_g_on_n[1][30] = 4.51e-10; + I_g_on_n[1][40] = 4.51e-10; + I_g_on_n[1][50] = 4.51e-10; + I_g_on_n[1][60] = 4.51e-10; + I_g_on_n[1][70] = 4.51e-10; + I_g_on_n[1][80] = 4.51e-10; + I_g_on_n[1][90] = 4.51e-10; + I_g_on_n[1][100] = 4.51e-10; + + //22 nm LOP + vdd[2] = 0.6; + Lphy[2] = 0.011; + Lelec[2] = 0.00604;//Lelec is the electrical gate-length. + t_ox[2] = 0.8e-3;//micron + v_th[2] = 0.2315;//V + c_ox[2] = 2.87e-14;//F/micron2 + mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs + Vdsat[2] = 1.81e-2; //V/micron + c_g_ideal[2] = 3.16e-16;//F/micron + c_fringe[2] = 0.08e-15; + c_junc[2] = 0;//F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab + I_on_n[2] = 916.1e-6;//A/micron + I_on_p[2] = I_on_n[2] / 2; + nmos_effective_resistance_multiplier = 1.73; + n_to_p_eff_curr_drv_ratio[2] = 2; + gmp_to_gmn_multiplier[2] = 1.11; + Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];//ohm-micron + Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];//ohm-micron + long_channel_leakage_reduction[2] = 1 / 2.38; + + I_off_n[2][0] = 1.31e-8; + I_off_n[2][10] = 2.60e-8; + I_off_n[2][20] = 5.14e-8; + I_off_n[2][30] = 1.02e-7; + I_off_n[2][40] = 2.02e-7; + I_off_n[2][50] = 3.99e-7; + I_off_n[2][60] = 7.91e-7; + I_off_n[2][70] = 1.09e-6; + I_off_n[2][80] = 2.09e-6; + I_off_n[2][90] = 4.04e-6; + I_off_n[2][100] = 4.48e-6; + + I_g_on_n[2][0] = 2.74e-9;//A/micron + I_g_on_n[2][10] = 2.74e-9; + I_g_on_n[2][20] = 2.74e-9; + I_g_on_n[2][30] = 2.74e-9; + I_g_on_n[2][40] = 2.74e-9; + I_g_on_n[2][50] = 2.74e-9; + I_g_on_n[2][60] = 2.74e-9; + I_g_on_n[2][70] = 2.74e-9; + I_g_on_n[2][80] = 2.74e-9; + I_g_on_n[2][90] = 2.74e-9; + I_g_on_n[2][100] = 2.74e-9; + + + + if (ram_cell_tech_type == 3) {} else if (ram_cell_tech_type == 4) { + //22 nm commodity DRAM cell access transistor technology parameters. //parameters curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In //2005 ITRS, the value was about twice the value in 2007 ITRS @@ -1486,12 +1423,12 @@ void init_tech_params(double technology, bool is_tag) curr_Wmemcella_dram = width_dram_access_transistor; curr_Wmemcellpmos_dram = 0; curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.022*0.022;//micron2. + curr_area_cell_dram = 6 * 0.022 * 0.022;//micron2. curr_asp_ratio_cell_dram = 0.667; curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus //kept constant. - //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR. + //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR. curr_vpp = 2.3;//vpp. V t_ox[3] = 3.5e-3;//micron v_th[3] = 1.0;//V @@ -1522,130 +1459,80 @@ void init_tech_params(double technology, bool is_tag) I_off_n[3][90] = 1.18e-11; I_off_n[3][100] = 1.72e-11; - } - else - { - //some error handler + } else { + //some error handler + } + + //SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + //CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_cam = 2.92; + //Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7 * 0.7; + curr_core_tx_density = 1.25 / 0.7 / 0.7; + curr_sckt_co_eff = 1.1296; + curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb } - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7/0.7; - curr_sckt_co_eff = 1.1296; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } - - if(tech == 16){ - //For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is 16 nm - //technology i.e. FEATURESIZE = 0.016). Using the DG process numbers for HP. - //16 nm HP - vdd[0] = 0.7; - Lphy[0] = 0.006;//Lphy is the physical gate-length. - Lelec[0] = 0.00315;//Lelec is the electrical gate-length. - t_ox[0] = 0.5e-3;//micron - v_th[0] = 0.1489;//V - c_ox[0] = 3.83e-14;//F/micron2 Cox_elec in MASTAR - mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[0] = 1.42e-2; //V/micron calculated in spreadsheet - c_g_ideal[0] = 2.30e-16;//F/micron - c_fringe[0] = 0.06e-15;//F/micron MASTAR inputdynamic/3 - c_junc[0] = 0;//F/micron2 MASTAR result dynamic - I_on_n[0] = 2768.4e-6;//A/micron - I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.48;//nmos_effective_resistance_multiplier is the ratio of Ieff to Idsat where Ieff is the effective NMOS current and Idsat is the saturation current. - n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in - //"Dynamic" tab of Device workspace. - gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value. - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/2.655; - I_off_n[0][0] = 1.52e-7/1.5*1.2*1.07; - I_off_n[0][10] = 1.55e-7/1.5*1.2*1.07; - I_off_n[0][20] = 1.59e-7/1.5*1.2*1.07; - I_off_n[0][30] = 1.68e-7/1.5*1.2*1.07; - I_off_n[0][40] = 1.90e-7/1.5*1.2*1.07; - I_off_n[0][50] = 2.69e-7/1.5*1.2*1.07; - I_off_n[0][60] = 5.32e-7/1.5*1.2*1.07; - I_off_n[0][70] = 1.02e-6/1.5*1.2*1.07; - I_off_n[0][80] = 1.62e-6/1.5*1.2*1.07; - I_off_n[0][90] = 2.73e-6/1.5*1.2*1.07; - I_off_n[0][100] = 6.1e-6/1.5*1.2*1.07; - //for 16nm DG HP - I_g_on_n[0][0] = 1.07e-9;//A/micron - I_g_on_n[0][10] = 1.07e-9; - I_g_on_n[0][20] = 1.07e-9; - I_g_on_n[0][30] = 1.07e-9; - I_g_on_n[0][40] = 1.07e-9; - I_g_on_n[0][50] = 1.07e-9; - I_g_on_n[0][60] = 1.07e-9; - I_g_on_n[0][70] = 1.07e-9; - I_g_on_n[0][80] = 1.07e-9; - I_g_on_n[0][90] = 1.07e-9; - I_g_on_n[0][100] = 1.07e-9; - -// //16 nm LSTP DG -// vdd[1] = 0.8; -// Lphy[1] = 0.014; -// Lelec[1] = 0.008;//Lelec is the electrical gate-length. -// t_ox[1] = 1.1e-3;//micron -// v_th[1] = 0.40126;//V -// c_ox[1] = 2.30e-14;//F/micron2 -// mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs -// Vdsat[1] = 6.64e-2; //V/micron -// c_g_ideal[1] = 3.22e-16;//F/micron -// c_fringe[1] = 0.008e-15; -// c_junc[1] = 0;//F/micron2 -// I_on_n[1] = 727.6e-6;//A/micron -// I_on_p[1] = I_on_n[1] / 2; -// nmos_effective_resistance_multiplier = 1.99; -// n_to_p_eff_curr_drv_ratio[1] = 2; -// gmp_to_gmn_multiplier[1] = 0.99; -// Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron -// Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron -// I_off_n[1][0] = 2.43e-11; -// I_off_n[1][10] = 4.85e-11; -// I_off_n[1][20] = 9.68e-11; -// I_off_n[1][30] = 1.94e-10; -// I_off_n[1][40] = 3.87e-10; -// I_off_n[1][50] = 7.73e-10; -// I_off_n[1][60] = 3.55e-10; -// I_off_n[1][70] = 3.09e-9; -// I_off_n[1][80] = 6.19e-9; -// I_off_n[1][90] = 1.24e-8; -// I_off_n[1][100]= 2.48e-8; -// -// // for 22nm LSTP HP -// I_g_on_n[1][0] = 4.51e-10;//A/micron -// I_g_on_n[1][10] = 4.51e-10; -// I_g_on_n[1][20] = 4.51e-10; -// I_g_on_n[1][30] = 4.51e-10; -// I_g_on_n[1][40] = 4.51e-10; -// I_g_on_n[1][50] = 4.51e-10; -// I_g_on_n[1][60] = 4.51e-10; -// I_g_on_n[1][70] = 4.51e-10; -// I_g_on_n[1][80] = 4.51e-10; -// I_g_on_n[1][90] = 4.51e-10; -// I_g_on_n[1][100] = 4.51e-10; - - - if (ram_cell_tech_type == 3) - {} - else if (ram_cell_tech_type == 4) - { - //22 nm commodity DRAM cell access transistor technology parameters. + if (tech == 16) { + //For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is 16 nm + //technology i.e. FEATURESIZE = 0.016). Using the DG process numbers for HP. + //16 nm HP + vdd[0] = 0.7; + Lphy[0] = 0.006;//Lphy is the physical gate-length. + Lelec[0] = 0.00315;//Lelec is the electrical gate-length. + t_ox[0] = 0.5e-3;//micron + v_th[0] = 0.1489;//V + c_ox[0] = 3.83e-14;//F/micron2 Cox_elec in MASTAR + mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs + Vdsat[0] = 1.42e-2; //V/micron calculated in spreadsheet + c_g_ideal[0] = 2.30e-16;//F/micron + c_fringe[0] = 0.06e-15;//F/micron MASTAR inputdynamic/3 + c_junc[0] = 0;//F/micron2 MASTAR result dynamic + I_on_n[0] = 2768.4e-6;//A/micron + I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used. + nmos_effective_resistance_multiplier = 1.48;//nmos_effective_resistance_multiplier is the ratio of Ieff to Idsat where Ieff is the effective NMOS current and Idsat is the saturation current. + n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in + //"Dynamic" tab of Device workspace. + gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value. + Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron + Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron + long_channel_leakage_reduction[0] = 1 / 2.655; + I_off_n[0][0] = 1.52e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][10] = 1.55e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][20] = 1.59e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][30] = 1.68e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][40] = 1.90e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][50] = 2.69e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][60] = 5.32e-7 / 1.5 * 1.2 * 1.07; + I_off_n[0][70] = 1.02e-6 / 1.5 * 1.2 * 1.07; + I_off_n[0][80] = 1.62e-6 / 1.5 * 1.2 * 1.07; + I_off_n[0][90] = 2.73e-6 / 1.5 * 1.2 * 1.07; + I_off_n[0][100] = 6.1e-6 / 1.5 * 1.2 * 1.07; + //for 16nm DG HP + I_g_on_n[0][0] = 1.07e-9;//A/micron + I_g_on_n[0][10] = 1.07e-9; + I_g_on_n[0][20] = 1.07e-9; + I_g_on_n[0][30] = 1.07e-9; + I_g_on_n[0][40] = 1.07e-9; + I_g_on_n[0][50] = 1.07e-9; + I_g_on_n[0][60] = 1.07e-9; + I_g_on_n[0][70] = 1.07e-9; + I_g_on_n[0][80] = 1.07e-9; + I_g_on_n[0][90] = 1.07e-9; + I_g_on_n[0][100] = 1.07e-9; + + if (ram_cell_tech_type == 3) {} else if (ram_cell_tech_type == 4) { + //22 nm commodity DRAM cell access transistor technology parameters. //parameters curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In //2005 ITRS, the value was about twice the value in 2007 ITRS @@ -1659,12 +1546,12 @@ void init_tech_params(double technology, bool is_tag) curr_Wmemcella_dram = width_dram_access_transistor; curr_Wmemcellpmos_dram = 0; curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.022*0.022;//micron2. + curr_area_cell_dram = 6 * 0.022 * 0.022;//micron2. curr_asp_ratio_cell_dram = 0.667; curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus //kept constant. - //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR. + //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR. curr_vpp = 2.3;//vpp. V t_ox[3] = 3.5e-3;//micron v_th[3] = 1.0;//V @@ -1695,930 +1582,766 @@ void init_tech_params(double technology, bool is_tag) I_off_n[3][90] = 1.18e-11; I_off_n[3][100] = 1.72e-11; - } - else - { - //some error handler + } else { + //some error handler + } + + //SRAM cell properties + curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; + curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_sram = 1.46; + //CAM cell properties //TODO: data need to be revisited + curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; + curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; + curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; + curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; + curr_asp_ratio_cell_cam = 2.92; + //Empirical undifferetiated core/FU coefficient + curr_logic_scaling_co_eff = 0.7 * 0.7 * 0.7 * 0.7 * 0.7; + curr_core_tx_density = 1.25 / 0.7 / 0.7 / 0.7; + curr_sckt_co_eff = 1.1296; + curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 + curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb } - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7/0.7/0.7; - curr_sckt_co_eff = 1.1296; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } + g_tp.peri_global.Vdd += curr_alpha * vdd[peri_global_tech_type]; + g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type]; + g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type]; + g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type]; + g_tp.peri_global.C_g_ideal += curr_alpha * c_g_ideal[peri_global_tech_type]; + g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type]; + g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type]; + g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type]; + g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type]; + g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type]; + g_tp.peri_global.R_nch_on += curr_alpha * Rnchannelon[peri_global_tech_type]; + g_tp.peri_global.R_pch_on += curr_alpha * Rpchannelon[peri_global_tech_type]; + g_tp.peri_global.n_to_p_eff_curr_drv_ratio + += curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type]; + g_tp.peri_global.long_channel_leakage_reduction + += curr_alpha * long_channel_leakage_reduction[peri_global_tech_type]; + g_tp.peri_global.I_off_n += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300]; + g_tp.peri_global.I_off_p += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300]; + g_tp.peri_global.I_g_on_n += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; + g_tp.peri_global.I_g_on_p += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; + gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type]; + + g_tp.sram_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type]; + g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; + g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; + g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; + g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; + g_tp.sram_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type]; + g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; + g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; + g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; + g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; + g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; + g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; + g_tp.sram_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; + g_tp.sram_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; + g_tp.sram_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; + g_tp.sram_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; + g_tp.sram_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; + + g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell; + g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor; + g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; + g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; + g_tp.dram_acc.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; + g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; + g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; + g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell; + g_tp.dram_cell_I_off_worst_case_len_temp += curr_alpha * curr_I_off_dram_cell_worst_case_length_temp; + g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; + g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell; + g_tp.vpp += curr_alpha * curr_vpp; + g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; + g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; + g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; + g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; + g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; + g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; + g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor]; + g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor]; + g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor]; + g_tp.dram_wl.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor]; + g_tp.dram_wl.I_off_n += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; + g_tp.dram_wl.I_off_p += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; + + g_tp.cam_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type]; + g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; + g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; + g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; + g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; + g_tp.cam_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type]; + g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; + g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; + g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron + g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; + g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; + g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; + g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; + g_tp.cam_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; + g_tp.cam_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; + g_tp.cam_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; + g_tp.cam_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; + g_tp.cam_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; + + g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram; + g_tp.dram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_dram; + g_tp.dram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_dram; + area_cell_dram += curr_alpha * curr_area_cell_dram; + asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram; + + g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram; + g_tp.sram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_sram; + g_tp.sram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_sram; + area_cell_sram += curr_alpha * curr_area_cell_sram; + asp_ratio_cell_sram += curr_alpha * curr_asp_ratio_cell_sram; + + g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam;//sheng + g_tp.cam.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_cam; + g_tp.cam.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_cam; + area_cell_cam += curr_alpha * curr_area_cell_cam; + asp_ratio_cell_cam += curr_alpha * curr_asp_ratio_cell_cam; + + //Sense amplifier latch Gm calculation + mobility_eff_periph_global += curr_alpha * mobility_eff[peri_global_tech_type]; + Vdsat_periph_global += curr_alpha * Vdsat[peri_global_tech_type]; - g_tp.peri_global.Vdd += curr_alpha * vdd[peri_global_tech_type]; - g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type]; - g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type]; - g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type]; - g_tp.peri_global.C_g_ideal += curr_alpha * c_g_ideal[peri_global_tech_type]; - g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type]; - g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type]; - g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type]; - g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type]; - g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type]; - g_tp.peri_global.R_nch_on += curr_alpha * Rnchannelon[peri_global_tech_type]; - g_tp.peri_global.R_pch_on += curr_alpha * Rpchannelon[peri_global_tech_type]; - g_tp.peri_global.n_to_p_eff_curr_drv_ratio - += curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type]; - g_tp.peri_global.long_channel_leakage_reduction - += curr_alpha * long_channel_leakage_reduction[peri_global_tech_type]; - g_tp.peri_global.I_off_n += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300]; - g_tp.peri_global.I_off_p += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300]; - g_tp.peri_global.I_g_on_n += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; - g_tp.peri_global.I_g_on_p += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; - gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type]; - - g_tp.sram_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type]; - g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; - g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; - g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; - g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; - g_tp.sram_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type]; - g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; - g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; - g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; - g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; - g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; - g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; - g_tp.sram_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; - g_tp.sram_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.sram_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.sram_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.sram_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - - g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell; - g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor; - g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; - g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; - g_tp.dram_acc.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; - g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; - g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; - g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell; - g_tp.dram_cell_I_off_worst_case_len_temp += curr_alpha * curr_I_off_dram_cell_worst_case_length_temp; - g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; - g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell; - g_tp.vpp += curr_alpha * curr_vpp; - g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; - g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; - g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; - g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; - g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; - g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; - g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor]; - g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor]; - g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor]; - g_tp.dram_wl.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor]; - g_tp.dram_wl.I_off_n += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; - g_tp.dram_wl.I_off_p += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; - - g_tp.cam_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type]; - g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; - g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; - g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; - g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; - g_tp.cam_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type]; - g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; - g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; - g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; - g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; - g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; - g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; - g_tp.cam_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; - g_tp.cam_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.cam_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.cam_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.cam_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - - g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram; - g_tp.dram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_dram; - g_tp.dram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_dram; - area_cell_dram += curr_alpha * curr_area_cell_dram; - asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram; - - g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram; - g_tp.sram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_sram; - g_tp.sram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_sram; - area_cell_sram += curr_alpha * curr_area_cell_sram; - asp_ratio_cell_sram += curr_alpha * curr_asp_ratio_cell_sram; - - g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam;//sheng - g_tp.cam.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_cam; - g_tp.cam.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_cam; - area_cell_cam += curr_alpha * curr_area_cell_cam; - asp_ratio_cell_cam += curr_alpha * curr_asp_ratio_cell_cam; - - //Sense amplifier latch Gm calculation - mobility_eff_periph_global += curr_alpha * mobility_eff[peri_global_tech_type]; - Vdsat_periph_global += curr_alpha * Vdsat[peri_global_tech_type]; - - //Empirical undifferetiated core/FU coefficient - g_tp.scaling_factor.logic_scaling_co_eff += curr_alpha * curr_logic_scaling_co_eff; - g_tp.scaling_factor.core_tx_density += curr_alpha * curr_core_tx_density; - g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead; - g_tp.macro_layout_overhead += curr_alpha * curr_macro_layout_overhead; - g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff; - } - - - //Currently we are not modeling the resistance/capacitance of poly anywhere. - //Continuous function (or date have been processed) does not need linear interpolation - g_tp.w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - g_tp.w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process - g_tp.w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - g_tp.w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process - g_tp.w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - g_tp.w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process - g_tp.w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process - g_tp.w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - g_tp.w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - g_tp.w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - - g_tp.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um; - g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um; - g_tp.HPOWERRAIL = 2 * g_ip->F_sz_um; - g_tp.cell_h_def = 50 * g_ip->F_sz_um; - g_tp.w_poly_contact = g_ip->F_sz_um; - g_tp.spacing_poly_to_contact = g_ip->F_sz_um; - g_tp.spacing_poly_to_poly = 1.5 * g_ip->F_sz_um; - g_tp.ram_wl_stitching_overhead_ = 7.5 * g_ip->F_sz_um; - - g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2; - g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um; - g_tp.w_iso = 12.5*g_ip->F_sz_um;//was 10 micron for the 0.8 micron process - g_tp.w_sense_n = 3.75*g_ip->F_sz_um; // sense amplifier N-trans; was 3 micron for the 0.8 micron process - g_tp.w_sense_p = 7.5*g_ip->F_sz_um; // sense amplifier P-trans; was 6 micron for the 0.8 micron process - g_tp.w_sense_en = 5*g_ip->F_sz_um; // Sense enable transistor of the sense amplifier; was 4 micron for the 0.8 micron process - g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_; - g_tp.w_nmos_sa_mux = 6 * g_tp.min_w_nmos_; - - if (ram_cell_tech_type == comm_dram) - { - g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um; - g_tp.h_dec = 8; // in the unit of memory cell height - } - else - { - g_tp.max_w_nmos_dec = g_tp.max_w_nmos_; - g_tp.h_dec = 4; // in the unit of memory cell height - } - - g_tp.peri_global.C_overlap = 0.2 * g_tp.peri_global.C_g_ideal; - g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal; - g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal; - - g_tp.dram_acc.C_overlap = 0.2 * g_tp.dram_acc.C_g_ideal; - g_tp.dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n; - //g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p; - - g_tp.dram_wl.C_overlap = 0.2 * g_tp.dram_wl.C_g_ideal; - - double gmn_sense_amp_latch = (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global; - double gmp_sense_amp_latch = gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch; - g_tp.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch; - - g_tp.dram.b_w = sqrt(area_cell_dram / (asp_ratio_cell_dram)); - g_tp.dram.b_h = asp_ratio_cell_dram * g_tp.dram.b_w; - g_tp.sram.b_w = sqrt(area_cell_sram / (asp_ratio_cell_sram)); - g_tp.sram.b_h = asp_ratio_cell_sram * g_tp.sram.b_w; - g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam));//Sheng - g_tp.cam.b_h = asp_ratio_cell_cam * g_tp.cam.b_w; - - g_tp.dram.Vbitpre = g_tp.dram_cell_Vdd; - g_tp.sram.Vbitpre = vdd[ram_cell_tech_type]; - g_tp.cam.Vbitpre = vdd[ram_cell_tech_type];//Sheng - pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - g_tp.w_pmos_bl_precharge = 6 * pmos_to_nmos_sizing_r * g_tp.min_w_nmos_; - g_tp.w_pmos_bl_eq = pmos_to_nmos_sizing_r * g_tp.min_w_nmos_; - - - double wire_pitch [NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES]; - - for (iter=0; iter<=1; ++iter) - { - // linear interpolation - if (iter == 0) - { - tech = tech_lo; - if (tech_lo == tech_hi) - { - curr_alpha = 1; - } - else - { - curr_alpha = (technology - tech_hi)/(tech_lo - tech_hi); - } - } - else - { - tech = tech_hi; - if (tech_lo == tech_hi) - { - break; - } - else - { - curr_alpha = (tech_lo - technology)/(tech_lo - tech_hi); - } + //Empirical undifferetiated core/FU coefficient + g_tp.scaling_factor.logic_scaling_co_eff += curr_alpha * curr_logic_scaling_co_eff; + g_tp.scaling_factor.core_tx_density += curr_alpha * curr_core_tx_density; + g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead; + g_tp.macro_layout_overhead += curr_alpha * curr_macro_layout_overhead; + g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff; } - if (tech == 180) - { - //Aggressive projections - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron - aspect_ratio[0][0] = 2.0; - wire_width = wire_pitch[0][0] / 2; //micron - wire_thickness = aspect_ratio[0][0] * wire_width;//micron - wire_spacing = wire_pitch[0][0] - wire_width;//micron - barrier_thickness = 0.017;//micron - dishing_thickness = 0;//micron - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron - ild_thickness[0][0] = 0.75;//micron - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 2.709; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; //F/micron - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], - vert_dielectric_constant[0][0], - fringe_cap);//F/micron. - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 2.4; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.75;//micron - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 2.709; - vert_dielectric_constant[0][1] = 3.9; - fringe_cap = 0.115e-15; //F/micron - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 2.2; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 1.5; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 2.709; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0]= 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.017; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.75; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 3.038; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.75; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 3.038; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 1.98; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 3.038; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.18; - wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.18); - wire_r_per_micron[1][3] = 12 / 0.18; - } - else if (tech == 90) - { - //Aggressive projections - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron - aspect_ratio[0][0] = 2.4; - wire_width = wire_pitch[0][0] / 2; //micron - wire_thickness = aspect_ratio[0][0] * wire_width;//micron - wire_spacing = wire_pitch[0][0] - wire_width;//micron - barrier_thickness = 0.01;//micron - dishing_thickness = 0;//micron - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron - ild_thickness[0][0] = 0.48;//micron - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 2.709; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; //F/micron - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], - vert_dielectric_constant[0][0], - fringe_cap);//F/micron. - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 2.4; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.48;//micron - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 2.709; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 2.7; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.96; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 2.709; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.008; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.48; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 3.038; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.48; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 3.038; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 1.1; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 3.038; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.09; - wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09); - wire_r_per_micron[1][3] = 12 / 0.09; - } - else if (tech == 65) - { - //Aggressive projections - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[0][0] = 2.7; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.405; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 2.303; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] , - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 2.7; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.405; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 2.303; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 2.8; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.81; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 2.303; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.006; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.405; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.734; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.405; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.734; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.77; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.734; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.065; - wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065); - wire_r_per_micron[1][3] = 12 / 0.065; - } - else if (tech == 45) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.315; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.958; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] , - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 3.0; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.315; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.958; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.63; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.958; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.004; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.315; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.46; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.315; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.46; - vert_dielectric_constant[1][1] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.55; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.46; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.045; - wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045); - wire_r_per_micron[1][3] = 12 / 0.045; - } - else if (tech == 32) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.21; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.664; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 3.0; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.21; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.664; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.42; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.664; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.003; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.21; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.214; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - aspect_ratio[1][1] = 2.0; - wire_width = wire_pitch[1][1] / 2; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.21; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.214; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.385; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.214; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.032;//micron - wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron - wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron + + //Currently we are not modeling the resistance/capacitance of poly anywhere. + //Continuous function (or date have been processed) does not need linear interpolation + g_tp.w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + g_tp.w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process + g_tp.w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process + g_tp.w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process + g_tp.w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process + g_tp.w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process + g_tp.w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process + g_tp.w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process + g_tp.w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process + g_tp.w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process + + g_tp.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um; + g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um; + g_tp.HPOWERRAIL = 2 * g_ip->F_sz_um; + g_tp.cell_h_def = 50 * g_ip->F_sz_um; + g_tp.w_poly_contact = g_ip->F_sz_um; + g_tp.spacing_poly_to_contact = g_ip->F_sz_um; + g_tp.spacing_poly_to_poly = 1.5 * g_ip->F_sz_um; + g_tp.ram_wl_stitching_overhead_ = 7.5 * g_ip->F_sz_um; + + g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2; + g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um; + //was 10 micron for the 0.8 micron process + g_tp.w_iso = 12.5 * g_ip->F_sz_um; + // sense amplifier N-trans; was 3 micron for the 0.8 micron process + g_tp.w_sense_n = 3.75 * g_ip->F_sz_um; + // sense amplifier P-trans; was 6 micron for the 0.8 micron process + g_tp.w_sense_p = 7.5 * g_ip->F_sz_um; + // Sense enable transistor of the sense amplifier; was 4 micron for the + //0.8 micron process + g_tp.w_sense_en = 5 * g_ip->F_sz_um; + g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_; + g_tp.w_nmos_sa_mux= 6 * g_tp.min_w_nmos_; + + if (ram_cell_tech_type == comm_dram) { + g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um; + g_tp.h_dec = 8; // in the unit of memory cell height + } else { + g_tp.max_w_nmos_dec = g_tp.max_w_nmos_; + g_tp.h_dec = 4; // in the unit of memory cell height } - else if (tech == 22) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.15; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.414; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 3.0; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.15; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.414; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.3; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.414; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - -// //************************* -// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][4] - wire_width; -// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][5] - wire_width; -// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][6] - wire_width; -// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - //************************* - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.003; - dishing_thickness = 0; - alpha_scatter = 1.05; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.15; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.104; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.15; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.104; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); + + g_tp.peri_global.C_overlap = 0.2 * g_tp.peri_global.C_g_ideal; + g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal; + g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal; + + g_tp.dram_acc.C_overlap = 0.2 * g_tp.dram_acc.C_g_ideal; + g_tp.dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n; + //g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p; + + g_tp.dram_wl.C_overlap = 0.2 * g_tp.dram_wl.C_g_ideal; + + double gmn_sense_amp_latch = (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global; + double gmp_sense_amp_latch = gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch; + g_tp.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch; + + g_tp.dram.b_w = sqrt(area_cell_dram / (asp_ratio_cell_dram)); + g_tp.dram.b_h = asp_ratio_cell_dram * g_tp.dram.b_w; + g_tp.sram.b_w = sqrt(area_cell_sram / (asp_ratio_cell_sram)); + g_tp.sram.b_h = asp_ratio_cell_sram * g_tp.sram.b_w; + g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam));//Sheng + g_tp.cam.b_h = asp_ratio_cell_cam * g_tp.cam.b_w; + + g_tp.dram.Vbitpre = g_tp.dram_cell_Vdd; + g_tp.sram.Vbitpre = vdd[ram_cell_tech_type]; + g_tp.cam.Vbitpre = vdd[ram_cell_tech_type];//Sheng + pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + g_tp.w_pmos_bl_precharge = 6 * pmos_to_nmos_sizing_r * g_tp.min_w_nmos_; + g_tp.w_pmos_bl_eq = pmos_to_nmos_sizing_r * g_tp.min_w_nmos_; + + + double wire_pitch [NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], + ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES]; + + for (iter = 0; iter <= 1; ++iter) { + // linear interpolation + if (iter == 0) { + tech = tech_lo; + if (tech_lo == tech_hi) { + curr_alpha = 1; + } else { + curr_alpha = (technology - tech_hi) / (tech_lo - tech_hi); + } + } else { + tech = tech_hi; + if (tech_lo == tech_hi) { + break; + } else { + curr_alpha = (tech_lo - technology) / (tech_lo - tech_hi); + } + } + + if (tech == 180) { + //Aggressive projections + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron + aspect_ratio[0][0] = 2.0; + wire_width = wire_pitch[0][0] / 2; //micron + wire_thickness = aspect_ratio[0][0] * wire_width;//micron + wire_spacing = wire_pitch[0][0] - wire_width;//micron + barrier_thickness = 0.017;//micron + dishing_thickness = 0;//micron + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron + ild_thickness[0][0] = 0.75;//micron + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 2.709; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; //F/micron + wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], + fringe_cap);//F/micron. + + wire_pitch[0][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[0][1] / 2; + aspect_ratio[0][1] = 2.4; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.75;//micron + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 2.709; + vert_dielectric_constant[0][1] = 3.9; + fringe_cap = 0.115e-15; //F/micron + wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], + fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um; + aspect_ratio[0][2] = 2.2; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 1.5; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 2.709; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], + fringe_cap); + + //Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.017; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.75; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 3.038; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], + fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.75; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 3.038; + vert_dielectric_constant[1][1] = 3.9; + wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], + fringe_cap); + + wire_pitch[1][2] = 8 * g_ip->F_sz_um; + aspect_ratio[1][2] = 2.2; + wire_width = wire_pitch[1][2] / 2; + wire_thickness = aspect_ratio[1][2] * wire_width; + wire_spacing = wire_pitch[1][2] - wire_width; + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][2] = 1.98; + miller_value[1][2] = 1.5; + horiz_dielectric_constant[1][2] = 3.038; + vert_dielectric_constant[1][2] = 3.9; + wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], + fringe_cap); + //Nominal projections for commodity DRAM wordline/bitline + wire_pitch[1][3] = 2 * 0.18; + wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.18); + wire_r_per_micron[1][3] = 12 / 0.18; + } else if (tech == 90) { + //Aggressive projections + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron + aspect_ratio[0][0] = 2.4; + wire_width = wire_pitch[0][0] / 2; //micron + wire_thickness = aspect_ratio[0][0] * wire_width;//micron + wire_spacing = wire_pitch[0][0] - wire_width;//micron + barrier_thickness = 0.01;//micron + dishing_thickness = 0;//micron + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron + ild_thickness[0][0] = 0.48;//micron + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 2.709; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; //F/micron + wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], + vert_dielectric_constant[0][0], + fringe_cap);//F/micron. + + wire_pitch[0][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[0][1] / 2; + aspect_ratio[0][1] = 2.4; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.48;//micron + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 2.709; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], + fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um; + aspect_ratio[0][2] = 2.7; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 0.96; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 2.709; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], + fringe_cap); + + //Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.008; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.48; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 3.038; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], + vert_dielectric_constant[1][0], + fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.48; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 3.038; + vert_dielectric_constant[1][1] = 3.9; + wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], + vert_dielectric_constant[1][1], + fringe_cap); + + wire_pitch[1][2] = 8 * g_ip->F_sz_um; + aspect_ratio[1][2] = 2.2; + wire_width = wire_pitch[1][2] / 2; + wire_thickness = aspect_ratio[1][2] * wire_width; + wire_spacing = wire_pitch[1][2] - wire_width; + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][2] = 1.1; + miller_value[1][2] = 1.5; + horiz_dielectric_constant[1][2] = 3.038; + vert_dielectric_constant[1][2] = 3.9; + wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], + fringe_cap); + //Nominal projections for commodity DRAM wordline/bitline + wire_pitch[1][3] = 2 * 0.09; + wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09); + wire_r_per_micron[1][3] = 12 / 0.09; + } else if (tech == 65) { + //Aggressive projections + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[0][0] = 2.7; + wire_width = wire_pitch[0][0] / 2; + wire_thickness = aspect_ratio[0][0] * wire_width; + wire_spacing = wire_pitch[0][0] - wire_width; + barrier_thickness = 0; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][0] = 0.405; + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 2.303; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] , + fringe_cap); + + wire_pitch[0][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[0][1] / 2; + aspect_ratio[0][1] = 2.7; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.405; + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 2.303; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], + vert_dielectric_constant[0][1], + fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um; + aspect_ratio[0][2] = 2.8; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 0.81; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 2.303; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], + fringe_cap); + + //Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.006; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.405; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 2.734; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], + fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.405; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 2.734; + vert_dielectric_constant[1][1] = 3.9; + wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], + fringe_cap); + + wire_pitch[1][2] = 8 * g_ip->F_sz_um; + aspect_ratio[1][2] = 2.2; + wire_width = wire_pitch[1][2] / 2; + wire_thickness = aspect_ratio[1][2] * wire_width; + wire_spacing = wire_pitch[1][2] - wire_width; + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][2] = 0.77; + miller_value[1][2] = 1.5; + horiz_dielectric_constant[1][2] = 2.734; + vert_dielectric_constant[1][2] = 3.9; + wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], + fringe_cap); + //Nominal projections for commodity DRAM wordline/bitline + wire_pitch[1][3] = 2 * 0.065; + wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065); + wire_r_per_micron[1][3] = 12 / 0.065; + } else if (tech == 45) { + //Aggressive projections. + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[0][0] = 3.0; + wire_width = wire_pitch[0][0] / 2; + wire_thickness = aspect_ratio[0][0] * wire_width; + wire_spacing = wire_pitch[0][0] - wire_width; + barrier_thickness = 0; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][0] = 0.315; + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 1.958; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] , + fringe_cap); + + wire_pitch[0][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[0][1] / 2; + aspect_ratio[0][1] = 3.0; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.315; + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 1.958; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], + fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um; + aspect_ratio[0][2] = 3.0; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 0.63; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 1.958; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], + fringe_cap); + + //Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.004; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.315; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 2.46; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], + fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.315; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 2.46; + vert_dielectric_constant[1][1] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], + fringe_cap); + + wire_pitch[1][2] = 8 * g_ip->F_sz_um; + aspect_ratio[1][2] = 2.2; + wire_width = wire_pitch[1][2] / 2; + wire_thickness = aspect_ratio[1][2] * wire_width; + wire_spacing = wire_pitch[1][2] - wire_width; + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][2] = 0.55; + miller_value[1][2] = 1.5; + horiz_dielectric_constant[1][2] = 2.46; + vert_dielectric_constant[1][2] = 3.9; + wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], + fringe_cap); + //Nominal projections for commodity DRAM wordline/bitline + wire_pitch[1][3] = 2 * 0.045; + wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045); + wire_r_per_micron[1][3] = 12 / 0.045; + } else if (tech == 32) { + //Aggressive projections. + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[0][0] = 3.0; + wire_width = wire_pitch[0][0] / 2; + wire_thickness = aspect_ratio[0][0] * wire_width; + wire_spacing = wire_pitch[0][0] - wire_width; + barrier_thickness = 0; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][0] = 0.21; + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 1.664; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], + fringe_cap); + + wire_pitch[0][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[0][1] / 2; + aspect_ratio[0][1] = 3.0; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.21; + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 1.664; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], + fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um; + aspect_ratio[0][2] = 3.0; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 0.42; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 1.664; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], + fringe_cap); + + //Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.003; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.21; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 2.214; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], + fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + aspect_ratio[1][1] = 2.0; + wire_width = wire_pitch[1][1] / 2; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.21; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 2.214; + vert_dielectric_constant[1][1] = 3.9; + wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], + fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; aspect_ratio[1][2] = 2.2; @@ -2627,184 +2350,210 @@ void init_tech_params(double technology, bool is_tag) wire_spacing = wire_pitch[1][2] - wire_width; dishing_thickness = 0.1 * wire_thickness; wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][2] = 0.385; + miller_value[1][2] = 1.5; + horiz_dielectric_constant[1][2] = 2.214; + vert_dielectric_constant[1][2] = 3.9; + wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], + fringe_cap); + //Nominal projections for commodity DRAM wordline/bitline + wire_pitch[1][3] = 2 * 0.032;//micron + wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron + wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron + } else if (tech == 22) { + //Aggressive projections. + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local + aspect_ratio[0][0] = 3.0; + wire_width = wire_pitch[0][0] / 2; + wire_thickness = aspect_ratio[0][0] * wire_width; + wire_spacing = wire_pitch[0][0] - wire_width; + barrier_thickness = 0; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][0] = 0.15; + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 1.414; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], + fringe_cap); + + wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global + wire_width = wire_pitch[0][1] / 2; + aspect_ratio[0][1] = 3.0; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.15; + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 1.414; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], + fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global + aspect_ratio[0][2] = 3.0; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 0.3; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 1.414; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], + fringe_cap); + + //Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.003; + dishing_thickness = 0; + alpha_scatter = 1.05; + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.15; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 2.104; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], + fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.15; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 2.104; + vert_dielectric_constant[1][1] = 3.9; + wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], + fringe_cap); + + wire_pitch[1][2] = 8 * g_ip->F_sz_um; + aspect_ratio[1][2] = 2.2; + wire_width = wire_pitch[1][2] / 2; + wire_thickness = aspect_ratio[1][2] * wire_width; + wire_spacing = wire_pitch[1][2] - wire_width; + dishing_thickness = 0.1 * wire_thickness; + wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[1][2] = 0.275; miller_value[1][2] = 1.5; horiz_dielectric_constant[1][2] = 2.104; vert_dielectric_constant[1][2] = 3.9; wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); + ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], + fringe_cap); //Nominal projections for commodity DRAM wordline/bitline wire_pitch[1][3] = 2 * 0.022;//micron wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.022);//F/micron wire_r_per_micron[1][3] = 12 / 0.022;//ohm/micron - - //****************** -// wire_pitch[1][4] = 16 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][4] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][5] = 24 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][5] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][6] = 32 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][6] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); } - else if (tech == 16) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.108; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.202; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global - aspect_ratio[0][1] = 3.0; - wire_width = wire_pitch[0][1] / 2; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.108; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.202; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.216; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.202; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - -// //************************* -// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][4] - wire_width; -// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][5] - wire_width; -// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][6] - wire_width; -// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - //************************* - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.002; - dishing_thickness = 0; - alpha_scatter = 1.05; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.108; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 1.998; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.108; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 1.998; - vert_dielectric_constant[1][1] = 3.9; + else if (tech == 16) { + //Aggressive projections. + wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local + aspect_ratio[0][0] = 3.0; + wire_width = wire_pitch[0][0] / 2; + wire_thickness = aspect_ratio[0][0] * wire_width; + wire_spacing = wire_pitch[0][0] - wire_width; + barrier_thickness = 0; + dishing_thickness = 0; + alpha_scatter = 1; + wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][0] = 0.108; + miller_value[0][0] = 1.5; + horiz_dielectric_constant[0][0] = 1.202; + vert_dielectric_constant[0][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], + fringe_cap); + + wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global + aspect_ratio[0][1] = 3.0; + wire_width = wire_pitch[0][1] / 2; + wire_thickness = aspect_ratio[0][1] * wire_width; + wire_spacing = wire_pitch[0][1] - wire_width; + wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][1] = 0.108; + miller_value[0][1] = 1.5; + horiz_dielectric_constant[0][1] = 1.202; + vert_dielectric_constant[0][1] = 3.9; + wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], + fringe_cap); + + wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global + aspect_ratio[0][2] = 3.0; + wire_width = wire_pitch[0][2] / 2; + wire_thickness = aspect_ratio[0][2] * wire_width; + wire_spacing = wire_pitch[0][2] - wire_width; + wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[0][2] = 0.216; + miller_value[0][2] = 1.5; + horiz_dielectric_constant[0][2] = 1.202; + vert_dielectric_constant[0][2] = 3.9; + wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], + fringe_cap); + + //Conservative projections + wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; + aspect_ratio[1][0] = 2.0; + wire_width = wire_pitch[1][0] / 2; + wire_thickness = aspect_ratio[1][0] * wire_width; + wire_spacing = wire_pitch[1][0] - wire_width; + barrier_thickness = 0.002; + dishing_thickness = 0; + alpha_scatter = 1.05; + wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][0] = 0.108; + miller_value[1][0] = 1.5; + horiz_dielectric_constant[1][0] = 1.998; + vert_dielectric_constant[1][0] = 3.9; + fringe_cap = 0.115e-15; + wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, + ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], + fringe_cap); + + wire_pitch[1][1] = 4 * g_ip->F_sz_um; + wire_width = wire_pitch[1][1] / 2; + aspect_ratio[1][1] = 2.0; + wire_thickness = aspect_ratio[1][1] * wire_width; + wire_spacing = wire_pitch[1][1] - wire_width; + wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + ild_thickness[1][1] = 0.108; + miller_value[1][1] = 1.5; + horiz_dielectric_constant[1][1] = 1.998; + vert_dielectric_constant[1][1] = 3.9; wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); + ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], + fringe_cap); wire_pitch[1][2] = 8 * g_ip->F_sz_um; aspect_ratio[1][2] = 2.2; @@ -2813,109 +2562,101 @@ void init_tech_params(double technology, bool is_tag) wire_spacing = wire_pitch[1][2] - wire_width; dishing_thickness = 0.1 * wire_thickness; wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); + wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); ild_thickness[1][2] = 0.198; miller_value[1][2] = 1.5; horiz_dielectric_constant[1][2] = 1.998; vert_dielectric_constant[1][2] = 3.9; wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); + ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], + fringe_cap); //Nominal projections for commodity DRAM wordline/bitline wire_pitch[1][3] = 2 * 0.016;//micron wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.016);//F/micron wire_r_per_micron[1][3] = 12 / 0.016;//ohm/micron - - //****************** -// wire_pitch[1][4] = 16 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][4] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][5] = 24 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][5] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][6] = 32 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][6] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); } - g_tp.wire_local.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.R_per_um += curr_alpha * wire_r_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.C_per_um += curr_alpha * wire_c_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - - g_tp.wire_inside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.R_per_um += curr_alpha* wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.C_per_um += curr_alpha* wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - - g_tp.wire_outside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.R_per_um += curr_alpha*wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.C_per_um += curr_alpha*wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - - g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um * g_tp.wire_inside_mat.C_per_um / 2; - - g_tp.sense_delay += curr_alpha *SENSE_AMP_D; - g_tp.sense_dy_power += curr_alpha *SENSE_AMP_P; -// g_tp.horiz_dielectric_constant += horiz_dielectric_constant; -// g_tp.vert_dielectric_constant += vert_dielectric_constant; -// g_tp.aspect_ratio += aspect_ratio; -// g_tp.miller_value += miller_value; -// g_tp.ild_thickness += ild_thickness; - - } - g_tp.fringe_cap = fringe_cap; - - double rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1); - double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(); - double c_load = gate_C(g_tp.min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0); - double tf = rd * c_load; - g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE); - double KLOAD = 1; - c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0)); - tf = rd * c_load; - g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE); + g_tp.wire_local.pitch += curr_alpha * + wire_pitch[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.R_per_um += curr_alpha * + wire_r_per_micron[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.C_per_um += curr_alpha * + wire_c_per_micron[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.aspect_ratio += curr_alpha * + aspect_ratio[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.ild_thickness += curr_alpha * + ild_thickness[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.miller_value += curr_alpha * + miller_value[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.horiz_dielectric_constant += curr_alpha * + horiz_dielectric_constant[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + g_tp.wire_local.vert_dielectric_constant += curr_alpha * + vert_dielectric_constant[g_ip->ic_proj_type] + [(ram_cell_tech_type == comm_dram) ? 3 : 0]; + + g_tp.wire_inside_mat.pitch += curr_alpha * + wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.R_per_um += curr_alpha * + wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.C_per_um += curr_alpha * + wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.aspect_ratio += curr_alpha * + aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.ild_thickness += curr_alpha * + ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.miller_value += curr_alpha * + miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha * + horiz_dielectric_constant[g_ip->ic_proj_type] + [g_ip->wire_is_mat_type]; + g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha * + vert_dielectric_constant [g_ip->ic_proj_type] + [g_ip->wire_is_mat_type]; + + g_tp.wire_outside_mat.pitch += curr_alpha * + wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.R_per_um += curr_alpha * + wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.C_per_um += curr_alpha * + wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.aspect_ratio += curr_alpha * + aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.ild_thickness += curr_alpha * + ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.miller_value += curr_alpha * + miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha * + horiz_dielectric_constant[g_ip->ic_proj_type] + [g_ip->wire_os_mat_type]; + g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha * + vert_dielectric_constant [g_ip->ic_proj_type] + [g_ip->wire_os_mat_type]; + + g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um * + g_tp.wire_inside_mat.C_per_um / 2; + + g_tp.sense_delay += curr_alpha * SENSE_AMP_D; + g_tp.sense_dy_power += curr_alpha * SENSE_AMP_P; + + } + g_tp.fringe_cap = fringe_cap; + + double rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1); + double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(); + double c_load = gate_C(g_tp.min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0); + double tf = rd * c_load; + g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE); + double KLOAD = 1; + c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0)); + tf = rd * c_load; + g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE); } diff --git a/ext/mcpat/cacti/uca.cc b/ext/mcpat/cacti/uca.cc index 568cd9e44..703ad470f 100755 --- a/ext/mcpat/cacti/uca.cc +++ b/ext/mcpat/cacti/uca.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -37,390 +38,390 @@ #include "uca.h" UCA::UCA(const DynamicParameter & dyn_p) - :dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) -{ - int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks)/2 : (_log2(nbanks) - _log2(nbanks)/2)); - int num_banks_hor_dir = nbanks/num_banks_ver_dir; - - if (dp.use_inp_params) - { - RWP = dp.num_rw_ports; - ERP = dp.num_rd_ports; - EWP = dp.num_wr_ports; - SCHP = dp.num_search_ports; - } - else - { - RWP = g_ip->num_rw_ports; - ERP = g_ip->num_rd_ports; - EWP = g_ip->num_wr_ports; - SCHP = g_ip->num_search_ports; - } - - num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode)*(RWP+ERP+EWP); - num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP); - num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP); - num_si_b_bank = dp.num_si_b_bank_per_port * SCHP; - num_so_b_bank = dp.num_so_b_bank_per_port * SCHP; - - if (!dp.fully_assoc && !dp.pure_cam) - { - - if (g_ip->fast_access && dp.is_tag == false) - { - num_do_b_bank *= g_ip->data_assoc; - } - - htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,0, num_do_b_bank,0,num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true); - htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); - htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank, 0, num_do_b_bank, 0, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); - } - - else - { - - htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank, num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Add_htree, true); - htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); - htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); - htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_in_htree, true); - htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, - num_addr_b_bank, num_di_b_bank,num_si_b_bank, num_do_b_bank, num_so_b_bank, num_banks_ver_dir*2, num_banks_hor_dir*2, Data_out_htree, true); - } - - area.w = htree_in_data->area.w; - area.h = htree_in_data->area.h; - - area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks; + : dp(dyn_p), bank(dp), nbanks(g_ip->nbanks), refresh_power(0) { + int num_banks_ver_dir = 1 << ((bank.area.h > bank.area.w) ? _log2(nbanks) + / 2 : (_log2(nbanks) - _log2(nbanks) / 2)); + int num_banks_hor_dir = nbanks / num_banks_ver_dir; + + if (dp.use_inp_params) { + RWP = dp.num_rw_ports; + ERP = dp.num_rd_ports; + EWP = dp.num_wr_ports; + SCHP = dp.num_search_ports; + } else { + RWP = g_ip->num_rw_ports; + ERP = g_ip->num_rd_ports; + EWP = g_ip->num_wr_ports; + SCHP = g_ip->num_search_ports; + } + + num_addr_b_bank = (dp.number_addr_bits_mat + dp.number_subbanks_decode) * + (RWP + ERP + EWP); + num_di_b_bank = dp.num_di_b_bank_per_port * (RWP + EWP); + num_do_b_bank = dp.num_do_b_bank_per_port * (RWP + ERP); + num_si_b_bank = dp.num_si_b_bank_per_port * SCHP; + num_so_b_bank = dp.num_so_b_bank_per_port * SCHP; + + if (!dp.fully_assoc && !dp.pure_cam) { + + if (g_ip->fast_access && dp.is_tag == false) { + num_do_b_bank *= g_ip->data_assoc; + } + + htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, 0, + num_do_b_bank, 0, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Add_htree, true); + htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, 0, + num_do_b_bank, 0, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_in_htree, true); + htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, 0, + num_do_b_bank, 0, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_out_htree, true); + } + + else { + + htree_in_add = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, num_so_b_bank, + num_banks_ver_dir * 2, num_banks_hor_dir * 2, + Add_htree, true); + htree_in_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, num_so_b_bank, + num_banks_ver_dir * 2, num_banks_hor_dir * 2, + Data_in_htree, true); + htree_out_data = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, + num_so_b_bank, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_out_htree, true); + htree_in_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, + num_so_b_bank, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_in_htree, true); + htree_out_search = new Htree2(g_ip->wt, bank.area.w, bank.area.h, + num_addr_b_bank, num_di_b_bank, + num_si_b_bank, num_do_b_bank, + num_so_b_bank, num_banks_ver_dir * 2, + num_banks_hor_dir * 2, Data_out_htree, + true); + } + + area.w = htree_in_data->area.w; + area.h = htree_in_data->area.h; + + area_all_dataramcells = bank.mat.subarray.get_total_cell_area() * dp.num_subarrays * g_ip->nbanks; // cout<<"area cell"<delay + bank.htree_in_add->delay; - double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay; - delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat + - bank.mat.sa_mux_lev_1_predec->delay + - bank.mat.sa_mux_lev_1_dec->delay; - delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat + - bank.mat.sa_mux_lev_2_predec->delay + - bank.mat.sa_mux_lev_2_dec->delay; - double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa; - - delay_before_subarray_output_driver = - MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path - delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path - MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path - delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path - delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree + - bank.htree_out_data->delay + htree_out_data->delay; - access_time = bank.mat.delay_comparator; - - double ram_delay_inside_mat; - if (dp.fully_assoc) - { - //delay of FA contains both CAM tag and RAM data - { //delay of CAM - ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; - access_time = htree_in_add->delay + bank.htree_in_add->delay; - //delay of fully-associative data array - access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out; +double UCA::compute_delays(double inrisetime) { + double outrisetime = bank.compute_delays(inrisetime); + + double delay_array_to_mat = htree_in_add->delay + bank.htree_in_add->delay; + double max_delay_before_row_decoder = delay_array_to_mat + bank.mat.r_predec->delay; + delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat + + bank.mat.sa_mux_lev_1_predec->delay + + bank.mat.sa_mux_lev_1_dec->delay; + delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat + + bank.mat.sa_mux_lev_2_predec->delay + + bank.mat.sa_mux_lev_2_dec->delay; + double delay_inside_mat = bank.mat.row_dec->delay + bank.mat.delay_bitline + bank.mat.delay_sa; + + delay_before_subarray_output_driver = + MAX(MAX(max_delay_before_row_decoder + delay_inside_mat, // row_path + delay_array_to_mat + bank.mat.b_mux_predec->delay + bank.mat.bit_mux_dec->delay + bank.mat.delay_sa), // col_path + MAX(delay_array_to_sa_mux_lev_1_decoder, // sa_mux_lev_1_path + delay_array_to_sa_mux_lev_2_decoder)); // sa_mux_lev_2_path + delay_from_subarray_out_drv_to_out = bank.mat.delay_subarray_out_drv_htree + + bank.htree_out_data->delay + htree_out_data->delay; + access_time = bank.mat.delay_comparator; + + double ram_delay_inside_mat; + if (dp.fully_assoc) { + //delay of FA contains both CAM tag and RAM data + { //delay of CAM + ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; + access_time = htree_in_add->delay + bank.htree_in_add->delay; + //delay of fully-associative data array + access_time += ram_delay_inside_mat + delay_from_subarray_out_drv_to_out; + } + } else { + access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path } - } - else - { - access_time = delay_before_subarray_output_driver + delay_from_subarray_out_drv_to_out; //data_acc_path - } - - if (dp.is_main_mem) - { - double t_rcd = max_delay_before_row_decoder + delay_inside_mat; - double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) + - delay_from_subarray_out_drv_to_out; - access_time = t_rcd + cas_latency; - } - - double temp; - - if (!dp.fully_assoc) - { - temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit - if (dp.is_dram) - { - temp += bank.mat.delay_writeback; // temp stores random cycle time + + if (dp.is_main_mem) { + double t_rcd = max_delay_before_row_decoder + delay_inside_mat; + double cas_latency = MAX(delay_array_to_sa_mux_lev_1_decoder, delay_array_to_sa_mux_lev_2_decoder) + + delay_from_subarray_out_drv_to_out; + access_time = t_rcd + cas_latency; + } + + double temp; + + if (!dp.fully_assoc) { + temp = delay_inside_mat + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore;//TODO: Sheng: revisit + if (dp.is_dram) { + temp += bank.mat.delay_writeback; // temp stores random cycle time + } + + + temp = MAX(temp, bank.mat.r_predec->delay); + temp = MAX(temp, bank.mat.b_mux_predec->delay); + temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); + temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); + } else { + ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; + temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore + + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset; + + temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc. + temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); + temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); + } + + // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav + if (g_ip->rpters_in_htree == false) { + temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay); + } + cycle_time = temp; + + double delay_req_network = max_delay_before_row_decoder; + double delay_rep_network = delay_from_subarray_out_drv_to_out; + multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network); + + if (dp.is_main_mem) { + multisubbank_interleave_cycle_time = htree_in_add->delay; + precharge_delay = htree_in_add->delay + + bank.htree_in_add->delay + bank.mat.delay_writeback + + bank.mat.delay_wl_reset + bank.mat.delay_bl_restore; + cycle_time = access_time + precharge_delay; + } else { + precharge_delay = 0; } + double dram_array_availability = 0; + if (dp.is_dram) { + dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100; + } - temp = MAX(temp, bank.mat.r_predec->delay); - temp = MAX(temp, bank.mat.b_mux_predec->delay); - temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); - temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); - } - else - { - ram_delay_inside_mat = bank.mat.delay_bitline + bank.mat.delay_matchchline; - temp = ram_delay_inside_mat + bank.mat.delay_cam_sl_restore + bank.mat.delay_cam_ml_reset + bank.mat.delay_bl_restore - + bank.mat.delay_hit_miss_reset + bank.mat.delay_wl_reset; - - temp = MAX(temp, bank.mat.b_mux_predec->delay);//TODO: Sheng revisit whether distinguish cam and ram bitline etc. - temp = MAX(temp, bank.mat.sa_mux_lev_1_predec->delay); - temp = MAX(temp, bank.mat.sa_mux_lev_2_predec->delay); - } - - // The following is true only if the input parameter "repeaters_in_htree" is set to false --Nav - if (g_ip->rpters_in_htree == false) - { - temp = MAX(temp, bank.htree_in_add->max_unpipelined_link_delay); - } - cycle_time = temp; - - double delay_req_network = max_delay_before_row_decoder; - double delay_rep_network = delay_from_subarray_out_drv_to_out; - multisubbank_interleave_cycle_time = MAX(delay_req_network, delay_rep_network); - - if (dp.is_main_mem) - { - multisubbank_interleave_cycle_time = htree_in_add->delay; - precharge_delay = htree_in_add->delay + - bank.htree_in_add->delay + bank.mat.delay_writeback + - bank.mat.delay_wl_reset + bank.mat.delay_bl_restore; - cycle_time = access_time + precharge_delay; - } - else - { - precharge_delay = 0; - } - - double dram_array_availability = 0; - if (dp.is_dram) - { - dram_array_availability = (1 - dp.num_r_subarray * cycle_time / dp.dram_refresh_period) * 100; - } - - return outrisetime; + return outrisetime; } // note: currently, power numbers are for a bank of an array -void UCA::compute_power_energy() -{ - bank.compute_power_energy(); - power = bank.power; - - power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic; - power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic; - if (dp.fully_assoc || dp.pure_cam) - power_routing_to_bank.searchOp.dynamic= htree_in_search->power.searchOp.dynamic + htree_out_search->power.searchOp.dynamic; - - power_routing_to_bank.readOp.leakage += htree_in_add->power.readOp.leakage + - htree_in_data->power.readOp.leakage + - htree_out_data->power.readOp.leakage; - - power_routing_to_bank.readOp.gate_leakage += htree_in_add->power.readOp.gate_leakage + - htree_in_data->power.readOp.gate_leakage + - htree_out_data->power.readOp.gate_leakage; - if (dp.fully_assoc || dp.pure_cam) - { +void UCA::compute_power_energy() { + bank.compute_power_energy(); + power = bank.power; + + power_routing_to_bank.readOp.dynamic = htree_in_add->power.readOp.dynamic + htree_out_data->power.readOp.dynamic; + power_routing_to_bank.writeOp.dynamic = htree_in_add->power.readOp.dynamic + htree_in_data->power.readOp.dynamic; + if (dp.fully_assoc || dp.pure_cam) + power_routing_to_bank.searchOp.dynamic = + htree_in_search->power.searchOp.dynamic + + htree_out_search->power.searchOp.dynamic; + + power_routing_to_bank.readOp.leakage += + htree_in_add->power.readOp.leakage + + htree_in_data->power.readOp.leakage + + htree_out_data->power.readOp.leakage; + + power_routing_to_bank.readOp.gate_leakage += + htree_in_add->power.readOp.gate_leakage + + htree_in_data->power.readOp.gate_leakage + + htree_out_data->power.readOp.gate_leakage; + if (dp.fully_assoc || dp.pure_cam) { power_routing_to_bank.readOp.leakage += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; power_routing_to_bank.readOp.gate_leakage += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; - } - - power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic; - power.readOp.dynamic += power_routing_to_bank.readOp.dynamic; - power.readOp.leakage += power_routing_to_bank.readOp.leakage; - power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage; - - // calculate total write energy per access - power.writeOp.dynamic = power.readOp.dynamic - - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir - + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir - - power_routing_to_bank.readOp.dynamic - + power_routing_to_bank.writeOp.dynamic - + bank.htree_in_data->power.readOp.dynamic - - bank.htree_out_data->power.readOp.dynamic; - - if (dp.is_dram == false) - { - power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; - } - - dyn_read_energy_from_closed_page = power.readOp.dynamic; - dyn_read_energy_from_open_page = power.readOp.dynamic - - (bank.mat.r_predec->power.readOp.dynamic + - bank.mat.power_row_decoders.readOp.dynamic + - bank.mat.power_bl_precharge_eq_drv.readOp.dynamic + - bank.mat.power_sa.readOp.dynamic + - bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir; - - dyn_read_energy_remaining_words_in_burst = - (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) * - ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + - bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + - bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + - bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + - bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + - bank.htree_out_data->power.readOp.dynamic + - power_routing_to_bank.readOp.dynamic); - dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst; - dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst; - - activate_energy = htree_in_add->power.readOp.dynamic + - bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act + - (bank.mat.r_predec->power.readOp.dynamic + - bank.mat.power_row_decoders.readOp.dynamic + - bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir; - read_energy = (htree_in_add->power.readOp.dynamic + - bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + - (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + - bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + - bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + - bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + - bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + - bank.htree_out_data->power.readOp.dynamic + - htree_in_data->power.readOp.dynamic) * g_ip->burst_len; - write_energy = (htree_in_add->power.readOp.dynamic + - bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + - htree_in_data->power.readOp.dynamic + - bank.htree_in_data->power.readOp.dynamic + - (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + - bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + - bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + - bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len; - precharge_energy = (bank.mat.power_bitline.readOp.dynamic + - bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir; - - leak_power_subbank_closed_page = - (bank.mat.r_predec->power.readOp.leakage + - bank.mat.b_mux_predec->power.readOp.leakage + - bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + - bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + - bank.mat.power_row_decoders.readOp.leakage + - bank.mat.power_bit_mux_decoders.readOp.leakage + - bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + - bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + - bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; - - leak_power_subbank_closed_page += - (bank.mat.r_predec->power.readOp.gate_leakage + - bank.mat.b_mux_predec->power.readOp.gate_leakage + - bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + - bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + - bank.mat.power_row_decoders.readOp.gate_leakage + - bank.mat.power_bit_mux_decoders.readOp.gate_leakage + - bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + - bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+ - //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; - - leak_power_subbank_open_page = - (bank.mat.r_predec->power.readOp.leakage + - bank.mat.b_mux_predec->power.readOp.leakage + - bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + - bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + - bank.mat.power_row_decoders.readOp.leakage + - bank.mat.power_bit_mux_decoders.readOp.leakage + - bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + - bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + - bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; - - leak_power_subbank_open_page += - (bank.mat.r_predec->power.readOp.gate_leakage + - bank.mat.b_mux_predec->power.readOp.gate_leakage + - bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + - bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + - bank.mat.power_row_decoders.readOp.gate_leakage + - bank.mat.power_bit_mux_decoders.readOp.gate_leakage + - bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + - bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir; - //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; - - leak_power_request_and_reply_networks = - power_routing_to_bank.readOp.leakage + - bank.htree_in_add->power.readOp.leakage + - bank.htree_in_data->power.readOp.leakage + - bank.htree_out_data->power.readOp.leakage; - - leak_power_request_and_reply_networks += - power_routing_to_bank.readOp.gate_leakage + - bank.htree_in_add->power.readOp.gate_leakage + - bank.htree_in_data->power.readOp.gate_leakage + - bank.htree_out_data->power.readOp.gate_leakage; - - if (dp.fully_assoc || dp.pure_cam) - { + } + + power.searchOp.dynamic += power_routing_to_bank.searchOp.dynamic; + power.readOp.dynamic += power_routing_to_bank.readOp.dynamic; + power.readOp.leakage += power_routing_to_bank.readOp.leakage; + power.readOp.gate_leakage += power_routing_to_bank.readOp.gate_leakage; + + // calculate total write energy per access + power.writeOp.dynamic = power.readOp.dynamic + - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir + + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir + - power_routing_to_bank.readOp.dynamic + + power_routing_to_bank.writeOp.dynamic + + bank.htree_in_data->power.readOp.dynamic + - bank.htree_out_data->power.readOp.dynamic; + + if (dp.is_dram == false) { + power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; + } + + dyn_read_energy_from_closed_page = power.readOp.dynamic; + dyn_read_energy_from_open_page = power.readOp.dynamic - + (bank.mat.r_predec->power.readOp.dynamic + + bank.mat.power_row_decoders.readOp.dynamic + + bank.mat.power_bl_precharge_eq_drv.readOp.dynamic + + bank.mat.power_sa.readOp.dynamic + + bank.mat.power_bitline.readOp.dynamic) * dp.num_act_mats_hor_dir; + + dyn_read_energy_remaining_words_in_burst = + (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1) * + ((bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + + bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + + bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + + bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + + bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + + bank.htree_out_data->power.readOp.dynamic + + power_routing_to_bank.readOp.dynamic); + dyn_read_energy_from_closed_page += dyn_read_energy_remaining_words_in_burst; + dyn_read_energy_from_open_page += dyn_read_energy_remaining_words_in_burst; + + activate_energy = htree_in_add->power.readOp.dynamic + + bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_act + + (bank.mat.r_predec->power.readOp.dynamic + + bank.mat.power_row_decoders.readOp.dynamic + + bank.mat.power_sa.readOp.dynamic) * dp.num_act_mats_hor_dir; + read_energy = (htree_in_add->power.readOp.dynamic + + bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + + (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + + bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + + bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + + bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic + + bank.mat.power_subarray_out_drv.readOp.dynamic) * dp.num_act_mats_hor_dir + + bank.htree_out_data->power.readOp.dynamic + + htree_in_data->power.readOp.dynamic) * g_ip->burst_len; + write_energy = (htree_in_add->power.readOp.dynamic + + bank.htree_in_add->power_bit.readOp.dynamic * bank.num_addr_b_routed_to_mat_for_rd_or_wr + + htree_in_data->power.readOp.dynamic + + bank.htree_in_data->power.readOp.dynamic + + (bank.mat.sa_mux_lev_1_predec->power.readOp.dynamic + + bank.mat.sa_mux_lev_2_predec->power.readOp.dynamic + + bank.mat.power_sa_mux_lev_1_decoders.readOp.dynamic + + bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic) * dp.num_act_mats_hor_dir) * g_ip->burst_len; + precharge_energy = (bank.mat.power_bitline.readOp.dynamic + + bank.mat.power_bl_precharge_eq_drv.readOp.dynamic) * dp.num_act_mats_hor_dir; + + leak_power_subbank_closed_page = + (bank.mat.r_predec->power.readOp.leakage + + bank.mat.b_mux_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + + bank.mat.power_row_decoders.readOp.leakage + + bank.mat.power_bit_mux_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + + bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; + + leak_power_subbank_closed_page += + (bank.mat.r_predec->power.readOp.gate_leakage + + bank.mat.b_mux_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + + bank.mat.power_row_decoders.readOp.gate_leakage + + bank.mat.power_bit_mux_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage) * dp.num_act_mats_hor_dir; //+ + //bank.mat.leak_power_sense_amps_closed_page_state) * dp.num_act_mats_hor_dir; + + leak_power_subbank_open_page = + (bank.mat.r_predec->power.readOp.leakage + + bank.mat.b_mux_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.leakage + + bank.mat.power_row_decoders.readOp.leakage + + bank.mat.power_bit_mux_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.leakage + + bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; + + leak_power_subbank_open_page += + (bank.mat.r_predec->power.readOp.gate_leakage + + bank.mat.b_mux_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_1_predec->power.readOp.gate_leakage + + bank.mat.sa_mux_lev_2_predec->power.readOp.gate_leakage + + bank.mat.power_row_decoders.readOp.gate_leakage + + bank.mat.power_bit_mux_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_1_decoders.readOp.gate_leakage + + bank.mat.power_sa_mux_lev_2_decoders.readOp.gate_leakage ) * dp.num_act_mats_hor_dir; + //bank.mat.leak_power_sense_amps_open_page_state) * dp.num_act_mats_hor_dir; + + leak_power_request_and_reply_networks = + power_routing_to_bank.readOp.leakage + + bank.htree_in_add->power.readOp.leakage + + bank.htree_in_data->power.readOp.leakage + + bank.htree_out_data->power.readOp.leakage; + + leak_power_request_and_reply_networks += + power_routing_to_bank.readOp.gate_leakage + + bank.htree_in_add->power.readOp.gate_leakage + + bank.htree_in_data->power.readOp.gate_leakage + + bank.htree_out_data->power.readOp.gate_leakage; + + if (dp.fully_assoc || dp.pure_cam) { leak_power_request_and_reply_networks += htree_in_search->power.readOp.leakage + htree_out_search->power.readOp.leakage; leak_power_request_and_reply_networks += htree_in_search->power.readOp.gate_leakage + htree_out_search->power.readOp.gate_leakage; - } - - - if (dp.is_dram) - { // if DRAM, add contribution of power spent in row predecoder drivers, blocks and decoders to refresh power - refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir + - bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays; - refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays; - refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir; - refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; - refresh_power /= dp.dram_refresh_period; - } - - - if (dp.is_tag == false) - { - power.readOp.dynamic = dyn_read_energy_from_closed_page; - power.writeOp.dynamic = dyn_read_energy_from_closed_page - - dyn_read_energy_remaining_words_in_burst - - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir - + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir - + (power_routing_to_bank.writeOp.dynamic - - power_routing_to_bank.readOp.dynamic - - bank.htree_out_data->power.readOp.dynamic + - bank.htree_in_data->power.readOp.dynamic) * - (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME + } - if (dp.is_dram == false) - { - power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; + + // if DRAM, add contribution of power spent in row predecoder drivers, + // blocks and decoders to refresh power + if (dp.is_dram) { + refresh_power = (bank.mat.r_predec->power.readOp.dynamic * dp.num_act_mats_hor_dir + + bank.mat.row_dec->power.readOp.dynamic) * dp.num_r_subarray * dp.num_subarrays; + refresh_power += bank.mat.per_bitline_read_energy * dp.num_c_subarray * dp.num_r_subarray * dp.num_subarrays; + refresh_power += bank.mat.power_bl_precharge_eq_drv.readOp.dynamic * dp.num_act_mats_hor_dir; + refresh_power += bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; + refresh_power /= dp.dram_refresh_period; } - } - - // if DRAM, add refresh power to total leakage - if (dp.is_dram) - { - power.readOp.leakage += refresh_power; - } - - // TODO: below should be avoided. - /*if (dp.is_main_mem) - { - power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks; - }*/ - - assert(power.readOp.dynamic > 0); - assert(power.writeOp.dynamic > 0); - assert(power.readOp.leakage > 0); + + + if (dp.is_tag == false) { + power.readOp.dynamic = dyn_read_energy_from_closed_page; + power.writeOp.dynamic = dyn_read_energy_from_closed_page + - dyn_read_energy_remaining_words_in_burst + - bank.mat.power_bitline.readOp.dynamic * dp.num_act_mats_hor_dir + + bank.mat.power_bitline.writeOp.dynamic * dp.num_act_mats_hor_dir + + (power_routing_to_bank.writeOp.dynamic - + power_routing_to_bank.readOp.dynamic - + bank.htree_out_data->power.readOp.dynamic + + bank.htree_in_data->power.readOp.dynamic) * + (MAX((g_ip->burst_len / g_ip->int_prefetch_w), 1) - 1); //FIXME + + if (dp.is_dram == false) { + power.writeOp.dynamic -= bank.mat.power_sa.readOp.dynamic * dp.num_act_mats_hor_dir; + } + } + + // if DRAM, add refresh power to total leakage + if (dp.is_dram) { + power.readOp.leakage += refresh_power; + } + + // TODO: below should be avoided. + /*if (dp.is_main_mem) + { + power.readOp.leakage += MAIN_MEM_PER_CHIP_STANDBY_CURRENT_mA * 1e-3 * g_tp.peri_global.Vdd / g_ip->nbanks; + }*/ + + assert(power.readOp.dynamic > 0); + assert(power.writeOp.dynamic > 0); + assert(power.readOp.leakage > 0); } diff --git a/ext/mcpat/cacti/uca.h b/ext/mcpat/cacti/uca.h index fdab14fc7..402035f9a 100755 --- a/ext/mcpat/cacti/uca.h +++ b/ext/mcpat/cacti/uca.h @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -40,9 +41,8 @@ #include "htree2.h" #include "parameter.h" -class UCA : public Component -{ - public: +class UCA : public Component { +public: UCA(const DynamicParameter & dyn_p); ~UCA(); double compute_delays(double inrisetime); // returns outrisetime @@ -66,7 +66,10 @@ class UCA : public Component int num_do_b_bank; int num_si_b_bank; int num_so_b_bank; - int RWP, ERP, EWP,SCHP; + int RWP; + int ERP; + int EWP; + int SCHP; double area_all_dataramcells; double dyn_read_energy_from_closed_page; diff --git a/ext/mcpat/cacti/wire.cc b/ext/mcpat/cacti/wire.cc index 742000c85..b7d9e34ce 100644 --- a/ext/mcpat/cacti/wire.cc +++ b/ext/mcpat/cacti/wire.cc @@ -2,6 +2,7 @@ * McPAT/CACTI * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -41,173 +42,173 @@ Wire::Wire( enum Wire_placement wp, double resistivity, TechnologyParameter::DeviceType *dt - ):wt(wire_model), wire_length(wl*1e-6), nsense(n), w_scale(w_s), s_scale(s_s), - resistivity(resistivity), deviceType(dt) -{ - wire_placement = wp; - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio*g_tp.min_w_nmos_; - in_rise_time = 0; - out_rise_time = 0; - if (initialized != 1) { - cout << "Wire not initialized. Initializing it with default values\n"; - Wire winit; - } - calculate_wire_stats(); - // change everything back to seconds, microns, and Joules - repeater_spacing *= 1e6; - wire_length *= 1e6; - wire_width *= 1e6; - wire_spacing *= 1e6; - assert(wire_length > 0); - assert(power.readOp.dynamic > 0); - assert(power.readOp.leakage > 0); - assert(power.readOp.gate_leakage > 0); + ): wt(wire_model), wire_length(wl*1e-6), nsense(n), w_scale(w_s), + s_scale(s_s), + resistivity(resistivity), deviceType(dt) { + wire_placement = wp; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + in_rise_time = 0; + out_rise_time = 0; + if (initialized != 1) { + cout << "Wire not initialized. Initializing it with default values\n"; + Wire winit; + } + calculate_wire_stats(); + // change everything back to seconds, microns, and Joules + repeater_spacing *= 1e6; + wire_length *= 1e6; + wire_width *= 1e6; + wire_spacing *= 1e6; + assert(wire_length > 0); + assert(power.readOp.dynamic > 0); + assert(power.readOp.leakage > 0); + assert(power.readOp.gate_leakage > 0); } - // the following values are for peripheral global technology - // specified in the input config file - Component Wire::global; - Component Wire::global_5; - Component Wire::global_10; - Component Wire::global_20; - Component Wire::global_30; - Component Wire::low_swing; - - int Wire::initialized; - double Wire::wire_width_init; - double Wire::wire_spacing_init; - - -Wire::Wire(double w_s, double s_s, enum Wire_placement wp, double resis, TechnologyParameter::DeviceType *dt) -{ - w_scale = w_s; - s_scale = s_s; - deviceType = dt; - wire_placement = wp; - resistivity = resis; - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; - in_rise_time = 0; - out_rise_time = 0; - - switch (wire_placement) - { - case outside_mat: wire_width = g_tp.wire_outside_mat.pitch; break; - case inside_mat : wire_width = g_tp.wire_inside_mat.pitch; break; - default: wire_width = g_tp.wire_local.pitch; break; - } - - wire_spacing = wire_width; - - wire_width *= (w_scale * 1e-6/2) /* (m) */; - wire_spacing *= (s_scale * 1e-6/2) /* (m) */; - - initialized = 1; - init_wire(); - wire_width_init = wire_width; - wire_spacing_init = wire_spacing; - - assert(power.readOp.dynamic > 0); - assert(power.readOp.leakage > 0); - assert(power.readOp.gate_leakage > 0); +// the following values are for peripheral global technology +// specified in the input config file +Component Wire::global; +Component Wire::global_5; +Component Wire::global_10; +Component Wire::global_20; +Component Wire::global_30; +Component Wire::low_swing; + +int Wire::initialized; +double Wire::wire_width_init; +double Wire::wire_spacing_init; + + +Wire::Wire(double w_s, double s_s, enum Wire_placement wp, double resis, + TechnologyParameter::DeviceType *dt) { + w_scale = w_s; + s_scale = s_s; + deviceType = dt; + wire_placement = wp; + resistivity = resis; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_; + in_rise_time = 0; + out_rise_time = 0; + + switch (wire_placement) { + case outside_mat: + wire_width = g_tp.wire_outside_mat.pitch; + break; + case inside_mat : + wire_width = g_tp.wire_inside_mat.pitch; + break; + default: + wire_width = g_tp.wire_local.pitch; + break; + } + + wire_spacing = wire_width; + + wire_width *= (w_scale * 1e-6 / 2) /* (m) */; + wire_spacing *= (s_scale * 1e-6 / 2) /* (m) */; + + initialized = 1; + init_wire(); + wire_width_init = wire_width; + wire_spacing_init = wire_spacing; + + assert(power.readOp.dynamic > 0); + assert(power.readOp.leakage > 0); + assert(power.readOp.gate_leakage > 0); } -Wire::~Wire() -{ +Wire::~Wire() { } void -Wire::calculate_wire_stats() -{ - - if (wire_placement == outside_mat) { - wire_width = g_tp.wire_outside_mat.pitch; - } - else if (wire_placement == inside_mat) { - wire_width = g_tp.wire_inside_mat.pitch; - } - else { - wire_width = g_tp.wire_local.pitch; - } - - wire_spacing = wire_width; - - wire_width *= (w_scale * 1e-6/2) /* (m) */; - wire_spacing *= (s_scale * 1e-6/2) /* (m) */; - - - if (wt != Low_swing) { - - // delay_optimal_wire(); - - if (wt == Global) { - delay = global.delay * wire_length; - power.readOp.dynamic = global.power.readOp.dynamic * wire_length; - power.readOp.leakage = global.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global.power.readOp.gate_leakage * wire_length; - repeater_spacing = global.area.w; - repeater_size = global.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - else if (wt == Global_5) { - delay = global_5.delay * wire_length; - power.readOp.dynamic = global_5.power.readOp.dynamic * wire_length; - power.readOp.leakage = global_5.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global_5.power.readOp.gate_leakage * wire_length; - repeater_spacing = global_5.area.w; - repeater_size = global_5.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - else if (wt == Global_10) { - delay = global_10.delay * wire_length; - power.readOp.dynamic = global_10.power.readOp.dynamic * wire_length; - power.readOp.leakage = global_10.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global_10.power.readOp.gate_leakage * wire_length; - repeater_spacing = global_10.area.w; - repeater_size = global_10.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - else if (wt == Global_20) { - delay = global_20.delay * wire_length; - power.readOp.dynamic = global_20.power.readOp.dynamic * wire_length; - power.readOp.leakage = global_20.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global_20.power.readOp.gate_leakage * wire_length; - repeater_spacing = global_20.area.w; - repeater_size = global_20.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - else if (wt == Global_30) { - delay = global_30.delay * wire_length; - power.readOp.dynamic = global_30.power.readOp.dynamic * wire_length; - power.readOp.leakage = global_30.power.readOp.leakage * wire_length; - power.readOp.gate_leakage = global_30.power.readOp.gate_leakage * wire_length; - repeater_spacing = global_30.area.w; - repeater_size = global_30.area.h; - area.set_area((wire_length/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_size, - g_tp.min_w_nmos_ * repeater_size, g_tp.cell_h_def)); - } - out_rise_time = delay*repeater_spacing/deviceType->Vth; - } - else if (wt == Low_swing) { - low_swing_model (); - repeater_spacing = wire_length; - repeater_size = 1; - } - else { - assert(0); - } +Wire::calculate_wire_stats() { + + if (wire_placement == outside_mat) { + wire_width = g_tp.wire_outside_mat.pitch; + } else if (wire_placement == inside_mat) { + wire_width = g_tp.wire_inside_mat.pitch; + } else { + wire_width = g_tp.wire_local.pitch; + } + + wire_spacing = wire_width; + + wire_width *= (w_scale * 1e-6 / 2) /* (m) */; + wire_spacing *= (s_scale * 1e-6 / 2) /* (m) */; + + + if (wt != Low_swing) { + + // delay_optimal_wire(); + + if (wt == Global) { + delay = global.delay * wire_length; + power.readOp.dynamic = global.power.readOp.dynamic * wire_length; + power.readOp.leakage = global.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global.power.readOp.gate_leakage * wire_length; + repeater_spacing = global.area.w; + repeater_size = global.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } else if (wt == Global_5) { + delay = global_5.delay * wire_length; + power.readOp.dynamic = global_5.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_5.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global_5.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_5.area.w; + repeater_size = global_5.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } else if (wt == Global_10) { + delay = global_10.delay * wire_length; + power.readOp.dynamic = global_10.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_10.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global_10.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_10.area.w; + repeater_size = global_10.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } else if (wt == Global_20) { + delay = global_20.delay * wire_length; + power.readOp.dynamic = global_20.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_20.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global_20.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_20.area.w; + repeater_size = global_20.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } else if (wt == Global_30) { + delay = global_30.delay * wire_length; + power.readOp.dynamic = global_30.power.readOp.dynamic * wire_length; + power.readOp.leakage = global_30.power.readOp.leakage * wire_length; + power.readOp.gate_leakage = global_30.power.readOp.gate_leakage * wire_length; + repeater_spacing = global_30.area.w; + repeater_size = global_30.area.h; + area.set_area((wire_length / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_size, + g_tp.min_w_nmos_ * repeater_size, + g_tp.cell_h_def)); + } + out_rise_time = delay * repeater_spacing / deviceType->Vth; + } else if (wt == Low_swing) { + low_swing_model (); + repeater_spacing = wire_length; + repeater_size = 1; + } else { + assert(0); + } } @@ -218,51 +219,55 @@ Wire::calculate_wire_stats() * inverters connected in series (refer: CACTI 1 Technical report, * section 6.1.3) */ - double -Wire::signal_fall_time () -{ - - /* rise time of inverter 1's output */ - double rt; - /* fall time of inverter 2's output */ - double ft; - double timeconst; - - timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * - tr_R_on(min_w_pmos, PCH, 1); - rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth); - timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * - tr_R_on(g_tp.min_w_nmos_, NCH, 1); - ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth; - return ft; +double +Wire::signal_fall_time () { + + /* rise time of inverter 1's output */ + double rt; + /* fall time of inverter 2's output */ + double ft; + double timeconst; + + timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(min_w_pmos, PCH, 1); + rt = horowitz (0, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, FALL) / + (deviceType->Vdd - deviceType->Vth); + timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(g_tp.min_w_nmos_, NCH, 1); + ft = horowitz (rt, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, RISE) / deviceType->Vth; + return ft; } -double Wire::signal_rise_time () -{ - - /* rise time of inverter 1's output */ - double ft; - /* fall time of inverter 2's output */ - double rt; - double timeconst; - - timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * - tr_R_on(g_tp.min_w_nmos_, NCH, 1); - rt = horowitz (0, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, RISE) / deviceType->Vth; - timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * - tr_R_on(min_w_pmos, PCH, 1); - ft = horowitz (rt, timeconst, deviceType->Vth/deviceType->Vdd, deviceType->Vth/deviceType->Vdd, FALL) / (deviceType->Vdd - deviceType->Vth); - return ft; //sec +double Wire::signal_rise_time () { + + /* rise time of inverter 1's output */ + double ft; + /* fall time of inverter 2's output */ + double rt; + double timeconst; + + timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(g_tp.min_w_nmos_, NCH, 1); + rt = horowitz (0, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, RISE) / deviceType->Vth; + timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(min_w_pmos + g_tp.min_w_nmos_, 0)) * + tr_R_on(min_w_pmos, PCH, 1); + ft = horowitz (rt, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, FALL) / + (deviceType->Vdd - deviceType->Vth); + return ft; //sec } @@ -281,111 +286,110 @@ double Wire::signal_rise_time () * */ -double Wire::wire_cap (double len /* in m */, bool call_from_outside) -{ - //TODO: this should be consistent with the wire_res in technology file - double sidewall, adj, tot_cap; - double wire_height; - double epsilon0 = 8.8542e-12; - double aspect_ratio, horiz_dielectric_constant, vert_dielectric_constant, miller_value,ild_thickness; +double Wire::wire_cap (double len /* in m */, bool call_from_outside) { + //TODO: this should be consistent with the wire_res in technology file + double sidewall, adj, tot_cap; + double wire_height; + double epsilon0 = 8.8542e-12; + double aspect_ratio; + double horiz_dielectric_constant; + double vert_dielectric_constant; + double miller_value; + double ild_thickness; + + switch (wire_placement) { + case outside_mat: { + aspect_ratio = g_tp.wire_outside_mat.aspect_ratio; + horiz_dielectric_constant = g_tp.wire_outside_mat.horiz_dielectric_constant; + vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant; + miller_value = g_tp.wire_outside_mat.miller_value; + ild_thickness = g_tp.wire_outside_mat.ild_thickness; + break; + } + case inside_mat : { + aspect_ratio = g_tp.wire_inside_mat.aspect_ratio; + horiz_dielectric_constant = g_tp.wire_inside_mat.horiz_dielectric_constant; + vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant; + miller_value = g_tp.wire_inside_mat.miller_value; + ild_thickness = g_tp.wire_inside_mat.ild_thickness; + break; + } + default: { + aspect_ratio = g_tp.wire_local.aspect_ratio; + horiz_dielectric_constant = g_tp.wire_local.horiz_dielectric_constant; + vert_dielectric_constant = g_tp.wire_local.vert_dielectric_constant; + miller_value = g_tp.wire_local.miller_value; + ild_thickness = g_tp.wire_local.ild_thickness; + break; + } + } - switch (wire_placement) - { - case outside_mat: - { - aspect_ratio = g_tp.wire_outside_mat.aspect_ratio; - horiz_dielectric_constant = g_tp.wire_outside_mat.horiz_dielectric_constant; - vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant; - miller_value = g_tp.wire_outside_mat.miller_value; - ild_thickness = g_tp.wire_outside_mat.ild_thickness; - break; - } - case inside_mat : - { - aspect_ratio = g_tp.wire_inside_mat.aspect_ratio; - horiz_dielectric_constant = g_tp.wire_inside_mat.horiz_dielectric_constant; - vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant; - miller_value = g_tp.wire_inside_mat.miller_value; - ild_thickness = g_tp.wire_inside_mat.ild_thickness; - break; - } - default: - { - aspect_ratio = g_tp.wire_local.aspect_ratio; - horiz_dielectric_constant = g_tp.wire_local.horiz_dielectric_constant; - vert_dielectric_constant = g_tp.wire_local.vert_dielectric_constant; - miller_value = g_tp.wire_local.miller_value; - ild_thickness = g_tp.wire_local.ild_thickness; - break; - } - } - - if (call_from_outside) - { - wire_width *= 1e-6; - wire_spacing *= 1e-6; - } - wire_height = wire_width/w_scale*aspect_ratio; - /* - * assuming height does not change. wire_width = width_original*w_scale - * So wire_height does not change as wire width increases - */ + if (call_from_outside) { + wire_width *= 1e-6; + wire_spacing *= 1e-6; + } + wire_height = wire_width / w_scale * aspect_ratio; + /* + * assuming height does not change. wire_width = width_original*w_scale + * So wire_height does not change as wire width increases + */ // capacitance between wires in the same level // sidewall = 2*miller_value * horiz_dielectric_constant * (wire_height/wire_spacing) // * epsilon0; - sidewall = miller_value * horiz_dielectric_constant * (wire_height/wire_spacing) - * epsilon0; + sidewall = miller_value * horiz_dielectric_constant * + (wire_height / wire_spacing) + * epsilon0; - // capacitance between wires in adjacent levels - //adj = miller_value * vert_dielectric_constant *w_scale * epsilon0; - //adj = 2*vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0; + // capacitance between wires in adjacent levels + //adj = miller_value * vert_dielectric_constant *w_scale * epsilon0; + //adj = 2*vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0; - adj = miller_value *vert_dielectric_constant *wire_width/(ild_thickness*1e-6) * epsilon0; - //Change ild_thickness from micron to M + adj = miller_value * vert_dielectric_constant * wire_width / + (ild_thickness * 1e-6) * epsilon0; + //Change ild_thickness from micron to M - //tot_cap = (sidewall + adj + (deviceType->C_fringe * 1e6)); //F/m - tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)); //F/m + //tot_cap = (sidewall + adj + (deviceType->C_fringe * 1e6)); //F/m + tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)); //F/m - if (call_from_outside) - { - wire_width *= 1e6; - wire_spacing *= 1e6; - } - return (tot_cap*len); // (F) + if (call_from_outside) { + wire_width *= 1e6; + wire_spacing *= 1e6; + } + return (tot_cap*len); // (F) } - double -Wire::wire_res (double len /*(in m)*/) -{ - - double aspect_ratio,alpha_scatter =1.05, dishing_thickness=0, barrier_thickness=0; - //TODO: this should be consistent with the wire_res in technology file - //The whole computation should be consistent with the wire_res in technology.cc too! - - switch (wire_placement) - { - case outside_mat: - { - aspect_ratio = g_tp.wire_outside_mat.aspect_ratio; - break; - } - case inside_mat : - { - aspect_ratio = g_tp.wire_inside_mat.aspect_ratio; - break; - } - default: - { - aspect_ratio = g_tp.wire_local.aspect_ratio; - break; - } - } - return (alpha_scatter * resistivity * 1e-6 * len/((aspect_ratio*wire_width/w_scale-dishing_thickness - barrier_thickness)* - (wire_width-2*barrier_thickness))); +double +Wire::wire_res (double len /*(in m)*/) { + + double aspect_ratio; + double alpha_scatter = 1.05; + double dishing_thickness = 0; + double barrier_thickness = 0; + //TODO: this should be consistent with the wire_res in technology file + //The whole computation should be consistent with the wire_res in technology.cc too! + + switch (wire_placement) { + case outside_mat: { + aspect_ratio = g_tp.wire_outside_mat.aspect_ratio; + break; + } + case inside_mat : { + aspect_ratio = g_tp.wire_inside_mat.aspect_ratio; + break; + } + default: { + aspect_ratio = g_tp.wire_local.aspect_ratio; + break; + } + } + return (alpha_scatter * resistivity * 1e-6 * len / + ((aspect_ratio*wire_width / w_scale - dishing_thickness - + barrier_thickness)* + (wire_width - 2*barrier_thickness))); } /* @@ -395,438 +399,456 @@ Wire::wire_res (double len /*(in m)*/) * low swing nmos delay, and the wire delay * (ref: Technical report 6) */ - void -Wire::low_swing_model() -{ - double len = wire_length; - double beta = pmos_to_nmos_sz_ratio(); - - - double inputrise = (in_rise_time == 0) ? signal_rise_time() : in_rise_time; - - /* Final nmos low swing driver size calculation: - * Try to size the driver such that the delay - * is less than 8FO4. - * If the driver size is greater than - * the max allowable size, assume max size for the driver. - * In either case, recalculate the delay using - * the final driver size assuming slow input with - * finite rise time instead of ideal step input - * - * (ref: Technical report 6) - */ - double cwire = wire_cap(len); /* load capacitance */ - double rwire = wire_res(len); +void +Wire::low_swing_model() { + double len = wire_length; + double beta = pmos_to_nmos_sz_ratio(); + + + double inputrise = (in_rise_time == 0) ? signal_rise_time() : in_rise_time; + + /* Final nmos low swing driver size calculation: + * Try to size the driver such that the delay + * is less than 8FO4. + * If the driver size is greater than + * the max allowable size, assume max size for the driver. + * In either case, recalculate the delay using + * the final driver size assuming slow input with + * finite rise time instead of ideal step input + * + * (ref: Technical report 6) + */ + double cwire = wire_cap(len); /* load capacitance */ + double rwire = wire_res(len); #define RES_ADJ (8.6) // Increase in resistance due to low driving vol. - double driver_res = (-8*g_tp.FO4/(log(0.5) * cwire))/RES_ADJ; - double nsize = R_to_w(driver_res, NCH); - - nsize = MIN(nsize, g_tp.max_w_nmos_); - nsize = MAX(nsize, g_tp.min_w_nmos_); - - if(rwire*cwire > 8*g_tp.FO4) - { - nsize = g_tp.max_w_nmos_; - } - - // size the inverter appropriately to minimize the transmitter delay - // Note - In order to minimize leakage, we are not adding a set of inverters to - // bring down delay. Instead, we are sizing the single gate - // based on the logical effort. - double st_eff = sqrt((2+beta/1+beta)*gate_C(nsize, 0)/(gate_C(2*g_tp.min_w_nmos_, 0) - + gate_C(2*min_w_pmos, 0))); - double req_cin = ((2+beta/1+beta)*gate_C(nsize, 0))/st_eff; - double inv_size = req_cin/(gate_C(min_w_pmos, 0) + gate_C(g_tp.min_w_nmos_, 0)); - inv_size = MAX(inv_size, 1); - - /* nand gate delay */ - double res_eq = (2 * tr_R_on(g_tp.min_w_nmos_, NCH, 1)); - double cap_eq = 2 * drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(2*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - gate_C(inv_size*g_tp.min_w_nmos_, 0) + - gate_C(inv_size*min_w_pmos, 0); - - double timeconst = res_eq * cap_eq; - - delay = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, - deviceType->Vth/deviceType->Vdd, RISE); - double temp_power = cap_eq*deviceType->Vdd*deviceType->Vdd; - - inputrise = delay / (deviceType->Vdd - deviceType->Vth); /* for the next stage */ - - /* Inverter delay: - * The load capacitance of this inv depends on - * the gate capacitance of the final stage nmos - * transistor which in turn depends on nsize - */ - res_eq = tr_R_on(inv_size*min_w_pmos, PCH, 1); - cap_eq = drain_C_(inv_size*min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(inv_size*g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - gate_C(nsize, 0); - timeconst = res_eq * cap_eq; - - delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, - deviceType->Vth/deviceType->Vdd, FALL); - temp_power += cap_eq*deviceType->Vdd*deviceType->Vdd; - - - transmitter.delay = delay; - transmitter.power.readOp.dynamic = temp_power*2; /* since it is a diff. model*/ - transmitter.power.readOp.leakage = deviceType->Vdd * - (4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) + - 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv)); - - transmitter.power.readOp.gate_leakage = deviceType->Vdd * - (4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) + - 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv)); - - inputrise = delay / deviceType->Vth; - - /* nmos delay + wire delay */ - cap_eq = cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2 + - nsense * sense_amp_input_cap(); //+receiver cap - /* - * NOTE: nmos is used as both pull up and pull down transistor - * in the transmitter. This is because for low voltage swing, drive - * resistance of nmos is less than pmos - * (for a detailed graph ref: On-Chip Wires: Scaling and Efficiency) - */ - timeconst = (tr_R_on(nsize, NCH, 1)*RES_ADJ) * (cwire + - drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)*2) + - rwire*cwire/2 + - (tr_R_on(nsize, NCH, 1)*RES_ADJ + rwire) * - nsense * sense_amp_input_cap(); - - /* - * since we are pre-equalizing and overdriving the low - * swing wires, the net time constant is less - * than the actual value - */ - delay += horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, .25, 0); + double driver_res = (-8 * g_tp.FO4 / (log(0.5) * cwire)) / RES_ADJ; + double nsize = R_to_w(driver_res, NCH); + + nsize = MIN(nsize, g_tp.max_w_nmos_); + nsize = MAX(nsize, g_tp.min_w_nmos_); + + if (rwire*cwire > 8*g_tp.FO4) { + nsize = g_tp.max_w_nmos_; + } + + // size the inverter appropriately to minimize the transmitter delay + // Note - In order to minimize leakage, we are not adding a set of inverters to + // bring down delay. Instead, we are sizing the single gate + // based on the logical effort. + double st_eff = sqrt((2 + beta / 1 + beta) * gate_C(nsize, 0) / + (gate_C(2 * g_tp.min_w_nmos_, 0) + + gate_C(2 * min_w_pmos, 0))); + double req_cin = ((2 + beta / 1 + beta) * gate_C(nsize, 0)) / st_eff; + double inv_size = req_cin / (gate_C(min_w_pmos, 0) + + gate_C(g_tp.min_w_nmos_, 0)); + inv_size = MAX(inv_size, 1); + + /* nand gate delay */ + double res_eq = (2 * tr_R_on(g_tp.min_w_nmos_, NCH, 1)); + double cap_eq = 2 * drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(2 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(inv_size * g_tp.min_w_nmos_, 0) + + gate_C(inv_size * min_w_pmos, 0); + + double timeconst = res_eq * cap_eq; + + delay = horowitz(inputrise, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, RISE); + double temp_power = cap_eq * deviceType->Vdd * deviceType->Vdd; + + inputrise = delay / (deviceType->Vdd - deviceType->Vth); /* for the next stage */ + + /* Inverter delay: + * The load capacitance of this inv depends on + * the gate capacitance of the final stage nmos + * transistor which in turn depends on nsize + */ + res_eq = tr_R_on(inv_size * min_w_pmos, PCH, 1); + cap_eq = drain_C_(inv_size * min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(inv_size * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + + gate_C(nsize, 0); + timeconst = res_eq * cap_eq; + + delay += horowitz(inputrise, timeconst, deviceType->Vth / deviceType->Vdd, + deviceType->Vth / deviceType->Vdd, FALL); + temp_power += cap_eq * deviceType->Vdd * deviceType->Vdd; + + + transmitter.delay = delay; + /* since it is a diff. model*/ + transmitter.power.readOp.dynamic = temp_power * 2; + transmitter.power.readOp.leakage = deviceType->Vdd * + (4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) + + 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv)); + + transmitter.power.readOp.gate_leakage = deviceType->Vdd * + (4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 2, nand) + + 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, min_w_pmos, 1, inv)); + + inputrise = delay / deviceType->Vth; + + /* nmos delay + wire delay */ + cap_eq = cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def) * 2 + + nsense * sense_amp_input_cap(); //+receiver cap + /* + * NOTE: nmos is used as both pull up and pull down transistor + * in the transmitter. This is because for low voltage swing, drive + * resistance of nmos is less than pmos + * (for a detailed graph ref: On-Chip Wires: Scaling and Efficiency) + */ + timeconst = (tr_R_on(nsize, NCH, 1) * RES_ADJ) * (cwire + + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def) * 2) + + rwire * cwire / 2 + + (tr_R_on(nsize, NCH, 1) * RES_ADJ + rwire) * + nsense * sense_amp_input_cap(); + + /* + * since we are pre-equalizing and overdriving the low + * swing wires, the net time constant is less + * than the actual value + */ + delay += horowitz(inputrise, timeconst, deviceType->Vth / + deviceType->Vdd, .25, 0); #define VOL_SWING .1 - temp_power += cap_eq*VOL_SWING*.400; /* .4v is the over drive voltage */ - temp_power *= 2; /* differential wire */ - - l_wire.delay = delay - transmitter.delay; - l_wire.power.readOp.dynamic = temp_power - transmitter.power.readOp.dynamic; - l_wire.power.readOp.leakage = deviceType->Vdd* - (4* cmos_Isub_leakage(nsize, 0, 1, nmos)); - - l_wire.power.readOp.gate_leakage = deviceType->Vdd* - (4* cmos_Ig_leakage(nsize, 0, 1, nmos)); - - //double rt = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, - // deviceType->Vth/deviceType->Vdd, RISE)/deviceType->Vth; - - delay += g_tp.sense_delay; - - sense_amp.delay = g_tp.sense_delay; - out_rise_time = g_tp.sense_delay/(deviceType->Vth); - sense_amp.power.readOp.dynamic = g_tp.sense_dy_power; - sense_amp.power.readOp.leakage = 0; //FIXME - sense_amp.power.readOp.gate_leakage = 0; - - power.readOp.dynamic = temp_power + sense_amp.power.readOp.dynamic; - power.readOp.leakage = transmitter.power.readOp.leakage + - l_wire.power.readOp.leakage + - sense_amp.power.readOp.leakage; - power.readOp.gate_leakage = transmitter.power.readOp.gate_leakage + - l_wire.power.readOp.gate_leakage + - sense_amp.power.readOp.gate_leakage; + temp_power += cap_eq * VOL_SWING * .400; /* .4v is the over drive voltage */ + temp_power *= 2; /* differential wire */ + + l_wire.delay = delay - transmitter.delay; + l_wire.power.readOp.dynamic = temp_power - transmitter.power.readOp.dynamic; + l_wire.power.readOp.leakage = deviceType->Vdd * + (4 * cmos_Isub_leakage(nsize, 0, 1, nmos)); + + l_wire.power.readOp.gate_leakage = deviceType->Vdd * + (4 * cmos_Ig_leakage(nsize, 0, 1, nmos)); + + //double rt = horowitz(inputrise, timeconst, deviceType->Vth/deviceType->Vdd, + // deviceType->Vth/deviceType->Vdd, RISE)/deviceType->Vth; + + delay += g_tp.sense_delay; + + sense_amp.delay = g_tp.sense_delay; + out_rise_time = g_tp.sense_delay / (deviceType->Vth); + sense_amp.power.readOp.dynamic = g_tp.sense_dy_power; + sense_amp.power.readOp.leakage = 0; //FIXME + sense_amp.power.readOp.gate_leakage = 0; + + power.readOp.dynamic = temp_power + sense_amp.power.readOp.dynamic; + power.readOp.leakage = transmitter.power.readOp.leakage + + l_wire.power.readOp.leakage + + sense_amp.power.readOp.leakage; + power.readOp.gate_leakage = transmitter.power.readOp.gate_leakage + + l_wire.power.readOp.gate_leakage + + sense_amp.power.readOp.gate_leakage; } - double -Wire::sense_amp_input_cap() -{ - return drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) + - drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def); +double +Wire::sense_amp_input_cap() { + return drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) + + drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def); } -void Wire::delay_optimal_wire () -{ - double len = wire_length; - //double min_wire_width = wire_width; //m - double beta = pmos_to_nmos_sz_ratio(); - double switching = 0; // switching energy - double short_ckt = 0; // short-circuit energy - double tc = 0; // time constant - // input cap of min sized driver - double input_cap = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0); +void Wire::delay_optimal_wire () { + double len = wire_length; + //double min_wire_width = wire_width; //m + double beta = pmos_to_nmos_sz_ratio(); + double switching = 0; // switching energy + double short_ckt = 0; // short-circuit energy + double tc = 0; // time constant + // input cap of min sized driver + double input_cap = gate_C(g_tp.min_w_nmos_ + min_w_pmos, 0); - // output parasitic capacitance of - // the min. sized driver - double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def); - // drive resistance - double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + - tr_R_on(min_w_pmos, PCH, 1))/2; - double wr = wire_res(len); //ohm + // output parasitic capacitance of + // the min. sized driver + double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def); + // drive resistance + double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + + tr_R_on(min_w_pmos, PCH, 1)) / 2; + double wr = wire_res(len); //ohm - // wire cap /m - double wc = wire_cap(len); + // wire cap /m + double wc = wire_cap(len); - // size the repeater such that the delay of the wire is minimum - double repeater_scaling = sqrt(out_res*wc/(wr*input_cap)); // len will cancel + // size the repeater such that the delay of the wire is minimum + // len will cancel + double repeater_scaling = sqrt(out_res * wc / (wr * input_cap)); - // calc the optimum spacing between the repeaters (m) + // calc the optimum spacing between the repeaters (m) - repeater_spacing = sqrt(2 * out_res * (out_cap + input_cap)/ - ((wr/len)*(wc/len))); - repeater_size = repeater_scaling; + repeater_spacing = sqrt(2 * out_res * (out_cap + input_cap) / + ((wr / len) * (wc / len))); + repeater_size = repeater_scaling; - switching = (repeater_scaling * (input_cap + out_cap) + - repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd; + switching = (repeater_scaling * (input_cap + out_cap) + + repeater_spacing * (wc / len)) * deviceType->Vdd * + deviceType->Vdd; - tc = out_res * (input_cap + out_cap) + - out_res * wc/len * repeater_spacing/repeater_scaling + - wr/len * repeater_spacing * input_cap * repeater_scaling + - 0.5 * (wr/len) * (wc/len)* repeater_spacing * repeater_spacing; + tc = out_res * (input_cap + out_cap) + + out_res * wc / len * repeater_spacing / repeater_scaling + + wr / len * repeater_spacing * input_cap * repeater_scaling + + 0.5 * (wr / len) * (wc / len) * repeater_spacing * repeater_spacing; - delay = 0.693 * tc * len/repeater_spacing; + delay = 0.693 * tc * len / repeater_spacing; #define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */ - short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * - repeater_scaling * tc; - - area.set_area((len/repeater_spacing) * - compute_gate_area(INV, 1, min_w_pmos * repeater_scaling, - g_tp.min_w_nmos_ * repeater_scaling, g_tp.cell_h_def)); - power.readOp.dynamic = ((len/repeater_spacing)*(switching + short_ckt)); - power.readOp.leakage = ((len/repeater_spacing)* - deviceType->Vdd* - cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv)); - power.readOp.gate_leakage = ((len/repeater_spacing)* - deviceType->Vdd* - cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_scaling, beta*g_tp.min_w_nmos_*repeater_scaling, 1, inv)); + short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * + repeater_scaling * tc; + + area.set_area((len / repeater_spacing) * + compute_gate_area(INV, 1, min_w_pmos * repeater_scaling, + g_tp.min_w_nmos_ * repeater_scaling, + g_tp.cell_h_def)); + power.readOp.dynamic = ((len / repeater_spacing) * (switching + short_ckt)); + power.readOp.leakage = ((len / repeater_spacing) * + deviceType->Vdd * + cmos_Isub_leakage(g_tp.min_w_nmos_ * + repeater_scaling, beta * + g_tp.min_w_nmos_ * + repeater_scaling, 1, inv)); + power.readOp.gate_leakage = ((len / repeater_spacing) * + deviceType->Vdd * + cmos_Ig_leakage(g_tp.min_w_nmos_ * + repeater_scaling, beta * + g_tp.min_w_nmos_ * + repeater_scaling, 1, inv)); } // calculate power/delay values for wires with suboptimal repeater sizing/spacing void -Wire::init_wire(){ - wire_length = 1; - delay_optimal_wire(); +Wire::init_wire() { + wire_length = 1; + delay_optimal_wire(); double sp, si; - powerDef pow; - si = repeater_size; - sp = repeater_spacing; - sp *= 1e6; // in microns - - double i, j, del; - repeated_wire.push_back(Component()); - for (j=sp; j < 4*sp; j+=100) { - for (i = si; i > 1; i--) { - pow = wire_model(j*1e-6, i, &del); - if (j == sp && i == si) { - global.delay = del; - global.power = pow; - global.area.h = si; - global.area.w = sp*1e-6; // m - } + powerDef pow; + si = repeater_size; + sp = repeater_spacing; + sp *= 1e6; // in microns + + double i, j, del; + repeated_wire.push_back(Component()); + for (j = sp; j < 4*sp; j += 100) { + for (i = si; i > 1; i--) { + pow = wire_model(j * 1e-6, i, &del); + if (j == sp && i == si) { + global.delay = del; + global.power = pow; + global.area.h = si; + global.area.w = sp * 1e-6; // m + } // cout << "Repeater size - "<< i << // " Repeater spacing - " << j << // " Delay - " << del << // " PowerD - " << pow.readOp.dynamic << // " PowerL - " << pow.readOp.leakage <delay; - low_swing.power = l_wire->power; - delete l_wire; + repeated_wire.pop_back(); + update_fullswing(); + Wire *l_wire = new Wire(Low_swing, 0.001/* 1 mm*/, 1); + low_swing.delay = l_wire->delay; + low_swing.power = l_wire->power; + delete l_wire; } -void Wire::update_fullswing() -{ - - list::iterator citer; - double del[4]; - del[3] = this->global.delay + this->global.delay*.3; - del[2] = global.delay + global.delay*.2; - del[1] = global.delay + global.delay*.1; - del[0] = global.delay + global.delay*.05; - double threshold; - double ncost; - double cost; - int i = 4; - while (i>0) { - threshold = del[i-1]; - cost = BIGNUM; - for (citer = repeated_wire.begin(); citer != repeated_wire.end(); citer++) - { - if (citer->delay > threshold) { - citer = repeated_wire.erase(citer); - citer --; - } - else { - ncost = citer->power.readOp.dynamic/global.power.readOp.dynamic + - citer->power.readOp.leakage/global.power.readOp.leakage; - if(ncost < cost) - { - cost = ncost; - if (i == 4) { - global_30.delay = citer->delay; - global_30.power = citer->power; - global_30.area = citer->area; - } - else if (i==3) { - global_20.delay = citer->delay; - global_20.power = citer->power; - global_20.area = citer->area; - } - else if(i==2) { - global_10.delay = citer->delay; - global_10.power = citer->power; - global_10.area = citer->area; - } - else if(i==1) { - global_5.delay = citer->delay; - global_5.power = citer->power; - global_5.area = citer->area; - } +void Wire::update_fullswing() { + + list::iterator citer; + double del[4]; + del[3] = this->global.delay + this->global.delay * .3; + del[2] = global.delay + global.delay * .2; + del[1] = global.delay + global.delay * .1; + del[0] = global.delay + global.delay * .05; + double threshold; + double ncost; + double cost; + int i = 4; + while (i > 0) { + threshold = del[i-1]; + cost = BIGNUM; + for (citer = repeated_wire.begin(); citer != repeated_wire.end(); + citer++) { + if (citer->delay > threshold) { + citer = repeated_wire.erase(citer); + citer --; + } else { + ncost = citer->power.readOp.dynamic / + global.power.readOp.dynamic + + citer->power.readOp.leakage / global.power.readOp.leakage; + if (ncost < cost) { + cost = ncost; + if (i == 4) { + global_30.delay = citer->delay; + global_30.power = citer->power; + global_30.area = citer->area; + } else if (i == 3) { + global_20.delay = citer->delay; + global_20.power = citer->power; + global_20.area = citer->area; + } else if (i == 2) { + global_10.delay = citer->delay; + global_10.power = citer->power; + global_10.area = citer->area; + } else if (i == 1) { + global_5.delay = citer->delay; + global_5.power = citer->power; + global_5.area = citer->area; + } + } + } } - } + i--; } - i--; - } } -powerDef Wire::wire_model (double space, double size, double *delay) -{ - powerDef ptemp; - double len = 1; - //double min_wire_width = wire_width; //m - double beta = pmos_to_nmos_sz_ratio(); - // switching energy - double switching = 0; - // short-circuit energy - double short_ckt = 0; - // time constant - double tc = 0; - // input cap of min sized driver - double input_cap = gate_C (g_tp.min_w_nmos_ + - min_w_pmos, 0); - - // output parasitic capacitance of - // the min. sized driver - double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def); - // drive resistance - double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + - tr_R_on(min_w_pmos, PCH, 1))/2; - double wr = wire_res(len); //ohm - - // wire cap /m - double wc = wire_cap(len); - - repeater_spacing = space; - repeater_size = size; - - switching = (repeater_size * (input_cap + out_cap) + - repeater_spacing * (wc/len)) * deviceType->Vdd * deviceType->Vdd; - - tc = out_res * (input_cap + out_cap) + - out_res * wc/len * repeater_spacing/repeater_size + - wr/len * repeater_spacing * out_cap * repeater_size + - 0.5 * (wr/len) * (wc/len)* repeater_spacing * repeater_spacing; - - *delay = 0.693 * tc * len/repeater_spacing; +powerDef Wire::wire_model (double space, double size, double *delay) { + powerDef ptemp; + double len = 1; + //double min_wire_width = wire_width; //m + double beta = pmos_to_nmos_sz_ratio(); + // switching energy + double switching = 0; + // short-circuit energy + double short_ckt = 0; + // time constant + double tc = 0; + // input cap of min sized driver + double input_cap = gate_C (g_tp.min_w_nmos_ + + min_w_pmos, 0); + + // output parasitic capacitance of + // the min. sized driver + double out_cap = drain_C_(min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def); + // drive resistance + double out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + + tr_R_on(min_w_pmos, PCH, 1)) / 2; + double wr = wire_res(len); //ohm + + // wire cap /m + double wc = wire_cap(len); + + repeater_spacing = space; + repeater_size = size; + + switching = (repeater_size * (input_cap + out_cap) + + repeater_spacing * (wc / len)) * deviceType->Vdd * + deviceType->Vdd; + + tc = out_res * (input_cap + out_cap) + + out_res * wc / len * repeater_spacing / repeater_size + + wr / len * repeater_spacing * out_cap * repeater_size + + 0.5 * (wr / len) * (wc / len) * repeater_spacing * repeater_spacing; + + *delay = 0.693 * tc * len / repeater_spacing; #define Ishort_ckt 65e-6 /* across all tech Ref:Banerjee et al. {IEEE TED} */ - short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * - repeater_size * tc; - - ptemp.readOp.dynamic = ((len/repeater_spacing)*(switching + short_ckt)); - ptemp.readOp.leakage = ((len/repeater_spacing)* - deviceType->Vdd* - cmos_Isub_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv)); - - ptemp.readOp.gate_leakage = ((len/repeater_spacing)* - deviceType->Vdd* - cmos_Ig_leakage(g_tp.min_w_nmos_*repeater_size, beta*g_tp.min_w_nmos_*repeater_size, 1, inv)); - - return ptemp; + short_ckt = deviceType->Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * + repeater_size * tc; + + ptemp.readOp.dynamic = ((len / repeater_spacing) * (switching + short_ckt)); + ptemp.readOp.leakage = ((len / repeater_spacing) * + deviceType->Vdd * + cmos_Isub_leakage(g_tp.min_w_nmos_ * + repeater_size, beta * + g_tp.min_w_nmos_ * + repeater_size, 1, inv)); + + ptemp.readOp.gate_leakage = ((len / repeater_spacing) * + deviceType->Vdd * + cmos_Ig_leakage(g_tp.min_w_nmos_ * + repeater_size, beta * + g_tp.min_w_nmos_ * + repeater_size, 1, inv)); + + return ptemp; } void -Wire::print_wire() -{ - - cout << "\nWire Properties:\n\n"; - cout << " Delay Optimal\n\tRepeater size - "<< global.area.h << - " \n\tRepeater spacing - " << global.area.w*1e3 << " (mm)" - " \n\tDelay - " << global.delay*1e6 << " (ns/mm)" - " \n\tPowerD - " << global.power.readOp.dynamic *1e6<< " (nJ/mm)" - " \n\tPowerL - " << global.power.readOp.leakage << " (mW/mm)" - " \n\tPowerLgate - " << global.power.readOp.gate_leakage << " (mW/mm)\n"; - cout << "\tWire width - " < + +#include "xmlParser.h" + +// Macro definitions to do string comparson to specific parameter/stat. +// Note: These macros assume node_name and value variables of type XMLCSTR +// to exist already. +#define STRCMP(var, str) else if (strcmp(var, str) == 0) + +#define ASSIGN_INT_IF(str, lhs) STRCMP(node_name, str) \ +lhs = atoi(value) + +#define ASSIGN_FP_IF(str, lhs) STRCMP(node_name, str) \ +lhs = atof(value) + +#define ASSIGN_STR_IF(str, lhs) STRCMP(node_name, str) \ +lhs = string(value) + +#define ASSIGN_ENUM_IF(str, lhs, etype) STRCMP(node_name, str) \ +lhs = (etype)atoi(value) + + +// Constants shared across many system components +#define BITS_PER_BYTE 8.0 +#define MIN_BUFFER_SIZE 64 +// CAM structures do not have any associativity +#define CAM_ASSOC 0 + +#endif // __COMMON_H__ diff --git a/ext/mcpat/core.cc b/ext/mcpat/core.cc index ba9106061..b25c23cac 100644 --- a/ext/mcpat/core.cc +++ b/ext/mcpat/core.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -33,491 +34,570 @@ #include #include #include +#include #include -#include "XML_Parse.h" #include "basic_circuit.h" +#include "basic_components.h" +#include "common.h" #include "const.h" #include "core.h" #include "io.h" #include "parameter.h" -//#include "globalvar.h" - -InstFetchU::InstFetchU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - IB (0), - BTB (0), - ID_inst (0), - ID_operand (0), - ID_misc (0), - exist(exist_) -{ - if (!exist) return; - int idx, tag, data, size, line, assoc, banks; - bool debug= false, is_default = true; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - cache_p = (Cache_policy)XML->sys.core[ithCore].icache.icache_config[7]; - //Assuming all L1 caches are virtually idxed physically tagged. - //cache - - size = (int)XML->sys.core[ithCore].icache.icache_config[0]; - line = (int)XML->sys.core[ithCore].icache.icache_config[1]; - assoc = (int)XML->sys.core[ithCore].icache.icache_config[2]; - banks = (int)XML->sys.core[ithCore].icache.icache_config[3]; - idx = debug?9:int(ceil(log2(size/line/assoc))); - tag = debug?51:(int)XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].icache.icache_config[0]; - interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].icache.icache_config[1]; - interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].icache.icache_config[2]; - interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].icache.icache_config[3]; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - // interface_ip.obj_func_dyn_energy = 0; - // interface_ip.obj_func_dyn_power = 0; - // interface_ip.obj_func_leak_power = 0; - // interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - icache.caches = new ArrayST(&interface_ip, "icache", Core_device, coredynp.opt_local, coredynp.core_ty); - scktRatio = g_tp.sckt_co_eff; - chip_PR_overhead = g_tp.chip_layout_overhead; - macro_PR_overhead = g_tp.macro_layout_overhead; - icache.area.set_area(icache.area.get_area()+ icache.caches->local_result.area); - area.set_area(area.get_area()+ icache.caches->local_result.area); - //output_data_csv(icache.caches.local_result); - - - /* - *iCache controllers - *miss buffer Each MSHR contains enough state - *to handle one or more accesses of any type to a single memory line. - *Due to the generality of the MSHR mechanism, - *the amount of state involved is non-trivial: - *including the address, pointers to the cache entry and destination register, - *written data, and various other pieces of state. - */ - interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + icache.caches->l_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[0]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate;//means cycle time - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate;//means access time - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.missb = new ArrayST(&interface_ip, "icacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - icache.area.set_area(icache.area.get_area()+ icache.missb->local_result.area); - area.set_area(area.get_area()+ icache.missb->local_result.area); - //output_data_csv(icache.missb.local_result); - - //fill buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = icache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*XML->sys.core[ithCore].icache.buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.ifb = new ArrayST(&interface_ip, "icacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - icache.area.set_area(icache.area.get_area()+ icache.ifb->local_result.area); - area.set_area(area.get_area()+ icache.ifb->local_result.area); - //output_data_csv(icache.ifb.local_result); - - //prefetch buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = icache.caches->l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = XML->sys.core[ithCore].icache.buffer_sizes[2]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].number_instruction_fetch_ports; - icache.prefetchb = new ArrayST(&interface_ip, "icacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - icache.area.set_area(icache.area.get_area()+ icache.prefetchb->local_result.area); - area.set_area(area.get_area()+ icache.prefetchb->local_result.area); - //output_data_csv(icache.prefetchb.local_result); - - //Instruction buffer - data = XML->sys.core[ithCore].instruction_length*XML->sys.core[ithCore].peak_issue_width;//icache.caches.l_ip.line_sz; //multiple threads timing sharing the instruction buffer. - interface_ip.is_cache = false; - interface_ip.pure_ram = true; - interface_ip.pure_cam = false; - interface_ip.line_sz = int(ceil(data/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz>64? - XML->sys.core[ithCore].number_hardware_threads*XML->sys.core[ithCore].instruction_buffer_size*interface_ip.line_sz:64; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - //NOTE: Assuming IB is time slice shared among threads, every fetch op will at least fetch "fetch width" instructions. - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports;//XML->sys.core[ithCore].fetch_width; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - IB = new ArrayST(&interface_ip, "InstBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - IB->area.set_area(IB->area.get_area()+ IB->local_result.area); - area.set_area(area.get_area()+ IB->local_result.area); - //output_data_csv(IB.IB.local_result); - - // inst_decoder.opcode_length = XML->sys.core[ithCore].opcode_width; - // inst_decoder.init_decoder(is_default, &interface_ip); - // inst_decoder.full_decoder_power(); - - if (coredynp.predictionW>0) - { - /* - * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged - * It is only a cache without all the buffers in the cache controller since it is more like a - * look up table than a cache with cache controller. When access miss, no load from other places - * such as main memory (not actively fill the misses), it is passively updated under two circumstances: - * 1) when BPT@ID stage finds out current is a taken branch while BTB missed - * 2) When BPT@ID stage predicts differently than BTB - * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid) - * 4) when EXEU find out wrong target has been provided from BTB. - * - */ - size = XML->sys.core[ithCore].BTB.BTB_config[0]; - line = XML->sys.core[ithCore].BTB.BTB_config[1]; - assoc = XML->sys.core[ithCore].BTB.BTB_config[2]; - banks = XML->sys.core[ithCore].BTB.BTB_config[3]; - idx = debug?9:int(ceil(log2(size/line/assoc))); -// tag = debug?51:XML->sys.virtual_address_width-idx-int(ceil(log2(line))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS; - tag = debug?51:XML->sys.virtual_address_width + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) +EXTRA_TAG_BITS; - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - interface_ip.pure_cam = false; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = debug?32768:size; - interface_ip.line_sz = debug?64:line; - interface_ip.assoc = debug?8:assoc; - interface_ip.nbanks = debug?1:banks; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[4]/clockRate; - interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].BTB.BTB_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - BTB = new ArrayST(&interface_ip, "Branch Target Buffer", Core_device, coredynp.opt_local, coredynp.core_ty); - BTB->area.set_area(BTB->area.get_area()+ BTB->local_result.area); - area.set_area(area.get_area()+ BTB->local_result.area); - ///cout<<"area="<area.get_area()); - } - - ID_inst = new inst_decoder(is_default, &interface_ip, - coredynp.opcode_length, 1/*Decoder should not know how many by itself*/, - coredynp.x86, - Core_device, coredynp.core_ty); - - ID_operand = new inst_decoder(is_default, &interface_ip, - coredynp.arch_ireg_width, 1, - coredynp.x86, - Core_device, coredynp.core_ty); - - ID_misc = new inst_decoder(is_default, &interface_ip, - 8/* Prefix field etc upto 14B*/, 1, - coredynp.x86, - Core_device, coredynp.core_ty); - //TODO: X86 decoder should decode the inst in cyclic mode under the control of squencer. - //So the dynamic power should be multiplied by a few times. - area.set_area(area.get_area()+ (ID_inst->area.get_area() - +ID_operand->area.get_area() - +ID_misc->area.get_area())*coredynp.decodeW); -} +int RegFU::RFWIN_ACCESS_MULTIPLIER = 16; + +// The five bits are: busy, Issued, Finished, speculative, valid +int SchedulerU::ROB_STATUS_BITS = 5; + +InstFetchU::InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), icache(NULL), IB(NULL), BTB(NULL), + BPT(NULL), ID_inst(NULL), ID_operand(NULL), ID_misc(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int idx, tag, data, size, line, assoc, banks; + bool is_default = true; + + clockRate = core_params.clockRate; + name = "Instruction Fetch Unit"; + // Check if there is an icache child: + int i; + icache = NULL; + for( i = 0; i < xml_data->nChildNode("component"); i++ ) { + XMLNode* childXML = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = childXML->getAttribute("type"); + + if (!type) + warnMissingComponentType(childXML->getAttribute("id")); + + STRCMP(type, "CacheUnit") { + XMLCSTR name = childXML->getAttribute("name"); + if (strcmp(name, "Instruction Cache") == 0 || + strcmp(name, "icache") == 0) { + icache = new CacheUnit(childXML, &interface_ip); + children.push_back(icache); + } + } + } + set_params_stats(); -BranchPredictor::BranchPredictor(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - globalBPT(0), - localBPT(0), - L1_localBPT(0), - L2_localBPT(0), - chooser(0), - RAS(0), - exist(exist_) -{ + //Instruction buffer + data = core_params.instruction_length * core_params.peak_issueW; + line = int(ceil(data / BITS_PER_BYTE)); + size = core_params.num_hthreads * core_params.instruction_buffer_size * + line; + if (size < MIN_BUFFER_SIZE) { + size = MIN_BUFFER_SIZE; + } + + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.instruction_buffer_assoc; + interface_ip.nbanks = core_params.instruction_buffer_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.instruction_buffer_tag_width > 0; + interface_ip.tag_w = core_params.instruction_buffer_tag_width; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = + core_params.number_instruction_fetch_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + interface_ip.pure_cam = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + + IB = new ArrayST(xml_data, &interface_ip, "Instruction Buffer", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + IB->area.set_area(IB->area.get_area() + IB->local_result.area); + area.set_area(area.get_area() + IB->local_result.area); + + if (core_params.predictionW > 0) { /* - * Branch Predictor, accessed during ID stage. - * McPAT's branch predictor model is the tournament branch predictor used in Alpha 21264, - * including global predictor, local two level predictor, and Chooser. - * The Branch predictor also includes a RAS (return address stack) for function calls - * Branch predictors are tagged by thread ID and modeled as 1-way associative $ - * However RAS return address stacks are duplicated for each thread. - * TODO:Data Width need to be computed more precisely * + * BTB branch target buffer, accessed during IF stage. Virtually indexed and virtually tagged + * It is only a cache without all the buffers in the cache controller since it is more like a + * look up table than a cache with cache controller. When access miss, no load from other places + * such as main memory (not actively fill the misses), it is passively updated under two circumstances: + * 1) when BPT@ID stage finds out current is a taken branch while BTB missed + * 2) When BPT@ID stage predicts differently than BTB + * 3) When ID stage finds out current instruction is not a branch while BTB had a hit.(mark as invalid) + * 4) when EXEU find out wrong target has been provided from BTB. + * */ - if (!exist) return; - int tag, data; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - interface_ip.assoc = 1; - interface_ip.pure_cam = false; - if (coredynp.multithreaded) - { - - tag = int(log2(coredynp.num_hthreads)+ EXTRA_TAG_BITS); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - } - else - { - interface_ip.is_cache = false; - interface_ip.pure_ram = true; - - } - //Global predictor - data = int(ceil(XML->sys.core[ithCore].predictor.global_predictor_bits/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.global_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + size = inst_fetch_params.btb_size; + line = inst_fetch_params.btb_block_size; + assoc = inst_fetch_params.btb_assoc; + banks = inst_fetch_params.btb_num_banks; + idx = int(ceil(log2(size / line / assoc))); + tag = virtual_address_width + int(ceil(log2(core_params.num_hthreads))) + + EXTRA_TAG_BITS; + + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = assoc; + interface_ip.nbanks = banks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; interface_ip.num_se_rd_ports = 0; - globalBPT = new ArrayST(&interface_ip, "Global Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); - globalBPT->area.set_area(globalBPT->area.get_area()+ globalBPT->local_result.area); - area.set_area(area.get_area()+ globalBPT->local_result.area); - - //Local BPT (Level 1) - data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[0]/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - L1_localBPT = new ArrayST(&interface_ip, "L1 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); - L1_localBPT->area.set_area(L1_localBPT->area.get_area()+ L1_localBPT->local_result.area); - area.set_area(area.get_area()+ L1_localBPT->local_result.area); - - //Local BPT (Level 2) - data = int(ceil(XML->sys.core[ithCore].predictor.local_predictor_size[1]/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.local_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - L2_localBPT = new ArrayST(&interface_ip, "L2 local Predictor", Core_device, coredynp.opt_local, coredynp.core_ty); - L2_localBPT->area.set_area(L2_localBPT->area.get_area()+ L2_localBPT->local_result.area); - area.set_area(area.get_area()+ L2_localBPT->local_result.area); - - //Chooser - data = int(ceil(XML->sys.core[ithCore].predictor.chooser_predictor_bits/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].predictor.chooser_predictor_entries; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; - interface_ip.num_se_rd_ports = 0; - chooser = new ArrayST(&interface_ip, "Predictor Chooser", Core_device, coredynp.opt_local, coredynp.core_ty); - chooser->area.set_area(chooser->area.get_area()+ chooser->local_result.area); - area.set_area(area.get_area()+ chooser->local_result.area); - - //RAS return address stacks are Duplicated for each thread. - interface_ip.is_cache = false; - interface_ip.pure_ram = true; - data = int(ceil(coredynp.pc_width/8.0)); - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].RAS_size; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + interface_ip.throughput = inst_fetch_params.btb_throughput / clockRate; + interface_ip.latency = inst_fetch_params.btb_latency / clockRate; + + BTB = new ArrayST(xml_data, &interface_ip, "Branch Target Buffer", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + area.set_area(area.get_area() + BTB->local_result.area); + + BPT = new BranchPredictor(xml_data, &interface_ip, + core_params, core_stats); + area.set_area(area.get_area() + BPT->area.get_area()); + } + + ID_inst = new InstructionDecoder(xml_data, "Instruction Opcode Decoder", + is_default, &interface_ip, + core_params.opcode_width, + core_params.decodeW, + core_params.x86, clockRate, + Core_device, core_params.core_ty); + + ID_operand = new InstructionDecoder(xml_data, + "Instruction Operand Decoder", + is_default, &interface_ip, + core_params.arch_ireg_width, + core_params.decodeW, + core_params.x86, clockRate, + Core_device, core_params.core_ty); + + ID_misc = new InstructionDecoder(xml_data, "Instruction Microcode Decoder", + is_default, &interface_ip, + core_params.micro_opcode_length, + core_params.decodeW, + core_params.x86, clockRate, + Core_device, core_params.core_ty); + area.set_area(area.get_area()+ (ID_inst->area.get_area() + + ID_operand->area.get_area() + + ID_misc->area.get_area()) + * core_params.decodeW); +} + +void +InstFetchU::set_params_stats() { + int num_children = xml_data->nChildNode("component"); + int i; + memset(&inst_fetch_params,0,sizeof(InstFetchParameters)); + for (i = 0; i < num_children; i++) { + XMLNode* child = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = child->getAttribute("type"); + + if (!type) + warnMissingComponentType(child->getAttribute("id")); + + STRCMP(type, "BranchTargetBuffer") { + int sub_num_children = child->nChildNode("param"); + int j; + for (j = 0; j < sub_num_children; j++) { + XMLNode* paramNode = child->getChildNodePtr("param", &j); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("size", inst_fetch_params.btb_size); + ASSIGN_INT_IF("block_size", inst_fetch_params.btb_block_size); + ASSIGN_INT_IF("assoc", inst_fetch_params.btb_assoc); + ASSIGN_INT_IF("num_banks", inst_fetch_params.btb_num_banks); + ASSIGN_INT_IF("latency", inst_fetch_params.btb_latency); + ASSIGN_INT_IF("throughput", inst_fetch_params.btb_throughput); + ASSIGN_INT_IF("rw_ports", inst_fetch_params.btb_rw_ports); + + else { + warnUnrecognizedParam(node_name); + } + } + + sub_num_children = child->nChildNode("stat"); + for (j = 0; j < sub_num_children; j++) { + XMLNode* statNode = child->getChildNodePtr("stat", &j); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("read_accesses", + inst_fetch_stats.btb_read_accesses); + ASSIGN_FP_IF("write_accesses", + inst_fetch_stats.btb_write_accesses); + else { + warnUnrecognizedStat(node_name); + } + } + } + } + + // Parameter sanity check + if (inst_fetch_params.btb_size <= 0) { + errorNonPositiveParam("size"); + } + + if (inst_fetch_params.btb_block_size <= 0) { + errorNonPositiveParam("block_size"); + } + + if (inst_fetch_params.btb_assoc <= 0) { + errorNonPositiveParam("assoc"); + } + + if (inst_fetch_params.btb_num_banks <= 0) { + errorNonPositiveParam("num_banks"); + } +} + +BranchPredictor::BranchPredictor(XMLNode* _xml_data, + InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exist_) + : McPATComponent(_xml_data), globalBPT(NULL), localBPT(NULL), + L1_localBPT(NULL), L2_localBPT(NULL), chooser(NULL), RAS(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int data; + int size; + + clockRate = core_params.clockRate; + name = "Branch Predictor"; + + // Common interface parameters for the branch predictor structures + interface_ip.pure_cam = false; + + if (core_params.multithreaded) { + tag = int(log2(core_params.num_hthreads) + EXTRA_TAG_BITS); + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + } else { + interface_ip.specific_tag = 0; + interface_ip.tag_w = 0; + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + } + + // Parse params and stats from XML + set_params_stats(); + + // Common interface parameters for the branch predictor structures + interface_ip.assoc = branch_pred_params.assoc; + interface_ip.nbanks = branch_pred_params.nbanks; + + //Global predictor + data = int(ceil(branch_pred_params.global_predictor_bits / BITS_PER_BYTE)); + size = data * branch_pred_params.global_predictor_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + globalBPT = new ArrayST(xml_data, &interface_ip, "Global Predictor", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + area.set_area(area.get_area() + globalBPT->local_result.area); + + //Local BPT (Level 1) + data = int(ceil(branch_pred_params.local_l1_predictor_size / + BITS_PER_BYTE)); + size = data * branch_pred_params.local_predictor_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + L1_localBPT = new ArrayST(xml_data, &interface_ip, + "Local Predictor, Level 1", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + L1_localBPT->area.set_area(L1_localBPT->area.get_area() + + L1_localBPT->local_result.area); + area.set_area(area.get_area()+ L1_localBPT->local_result.area); + + //Local BPT (Level 2) + data = int(ceil(branch_pred_params.local_l2_predictor_size / + BITS_PER_BYTE)); + size = data * branch_pred_params.local_predictor_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + L2_localBPT = new ArrayST(xml_data, &interface_ip, + "Local Predictor, Level 2", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + area.set_area(area.get_area() + L2_localBPT->local_result.area); + + //Chooser + data = int(ceil(branch_pred_params.chooser_predictor_bits / + BITS_PER_BYTE)); + size = data * branch_pred_params.chooser_predictor_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + chooser = new ArrayST(xml_data, &interface_ip, "Predictor Chooser", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + area.set_area(area.get_area() + chooser->local_result.area); + + //RAS return address stacks are Duplicated for each thread. + data = int(ceil(core_params.pc_width / BITS_PER_BYTE)); + size = data * core_params.RAS_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.predictionW; + interface_ip.num_wr_ports = core_params.predictionW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + RAS = new ArrayST(xml_data, &interface_ip, "RAS", Core_device, clockRate, + core_params.opt_local, core_params.core_ty); + RAS->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + RAS->local_result.area * + core_params.num_hthreads); + +} + +void +BranchPredictor::set_params_stats() { + int num_children = xml_data->nChildNode("component"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* child = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = child->getAttribute("type"); + + if (!type) + warnMissingComponentType(child->getAttribute("id")); + + STRCMP(type, "BranchPredictor") { + int sub_num_children = child->nChildNode("param"); + int j; + for (j = 0; j < sub_num_children; j++) { + XMLNode* paramNode = child->getChildNodePtr("param", &j); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("assoc", branch_pred_params.assoc); + ASSIGN_INT_IF("nbanks", branch_pred_params.nbanks); + ASSIGN_INT_IF("local_l1_predictor_size", + branch_pred_params.local_l1_predictor_size); + ASSIGN_INT_IF("local_l2_predictor_size", + branch_pred_params.local_l2_predictor_size); + ASSIGN_INT_IF("local_predictor_entries", + branch_pred_params.local_predictor_entries); + ASSIGN_INT_IF("global_predictor_entries", + branch_pred_params.global_predictor_entries); + ASSIGN_INT_IF("global_predictor_bits", + branch_pred_params.global_predictor_bits); + ASSIGN_INT_IF("chooser_predictor_entries", + branch_pred_params.chooser_predictor_entries); + ASSIGN_INT_IF("chooser_predictor_bits", + branch_pred_params.chooser_predictor_bits); + + else { + warnUnrecognizedParam(node_name); + } + } + // The core reads in the number of branches and the number of + // function calls and these values are passed through the + // core_stats variable, so we don't need to read them in here + } + } +} + +SchedulerU::SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), int_inst_window(NULL), + fp_inst_window(NULL), ROB(NULL), int_instruction_selection(NULL), + fp_instruction_selection(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int data; + int size; + int line; + bool is_default = true; + string tmp_name; + + clockRate = core_params.clockRate; + name = "Instruction Scheduler"; + if ((core_params.core_ty == Inorder && core_params.multithreaded)) { + //Instruction issue queue, in-order multi-issue or multithreaded + //processor also has this structure. Unified window for Inorder + //processors + //This tag width is the normal thread state bits based on + //Niagara Design + tag = int(log2(core_params.num_hthreads) * core_params.perThreadState); + data = core_params.instruction_length; + line = int(ceil(data / BITS_PER_BYTE)); + size = core_params.instruction_window_size * line; + if (size < MIN_BUFFER_SIZE) { + size = MIN_BUFFER_SIZE; + } + + //NOTE: x86 inst can be very lengthy, up to 15B. + //Source: Intel® 64 and IA-32 Architectures + //Software Developer’s Manual + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.scheduler_assoc; + interface_ip.nbanks = core_params.scheduler_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.predictionW; - interface_ip.num_wr_ports = coredynp.predictionW; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.peak_issueW; + interface_ip.num_wr_ports = core_params.peak_issueW; interface_ip.num_se_rd_ports = 0; - RAS = new ArrayST(&interface_ip, "RAS", Core_device, coredynp.opt_local, coredynp.core_ty); - RAS->area.set_area(RAS->area.get_area()+ RAS->local_result.area*coredynp.num_hthreads); - area.set_area(area.get_area()+ RAS->local_result.area*coredynp.num_hthreads); + interface_ip.num_search_ports = core_params.peak_issueW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + int_inst_window = new ArrayST(xml_data, &interface_ip, + "InstFetchQueue", Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + int_inst_window->output_data.area *= core_params.num_pipelines; + area.set_area(area.get_area() + int_inst_window->local_result.area * + core_params.num_pipelines); + Iw_height = int_inst_window->local_result.cache_ht; -} + /* + * selection logic + * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up + * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who + * at the issue stage. + */ -SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - int_inst_window(0), - fp_inst_window(0), - ROB(0), - instruction_selection(0), - exist(exist_) - { - if (!exist) return; - int tag, data; - bool is_default=true; - string tmp_name; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - if ((coredynp.core_ty==Inorder && coredynp.multithreaded)) - { - //Instruction issue queue, in-order multi-issue or multithreaded processor also has this structure. Unified window for Inorder processors - tag = int(log2(XML->sys.core[ithCore].number_hardware_threads)*coredynp.perThreadState);//This is the normal thread state bits based on Niagara Design - data = XML->sys.core[ithCore].instruction_length; - //NOTE: x86 inst can be very lengthy, up to 15B. Source: Intel® 64 and IA-32 Architectures - //Software Developer’s Manual - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = int(ceil(data/8.0)); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz>64?XML->sys.core[ithCore].instruction_window_size*interface_ip.line_sz:64; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.peak_issueW; - int_inst_window = new ArrayST(&interface_ip, "InstFetchQueue", Core_device, coredynp.opt_local, coredynp.core_ty); - int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - //output_data_csv(iRS.RS.local_result); - Iw_height =int_inst_window->local_result.cache_ht; - - /* - * selection logic - * In a single-issue Inorder multithreaded processor like Niagara, issue width=1*number_of_threads since the processor does need to pick up - * instructions from multiple ready ones(although these ready ones are from different threads).While SMT processors do not distinguish which thread belongs to who - * at the issue stage. - */ - - instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size, - coredynp.peak_issueW*XML->sys.core[ithCore].number_hardware_threads, - &interface_ip, Core_device, coredynp.core_ty); + int_instruction_selection = + new selection_logic(xml_data, is_default, + core_params.instruction_window_size, + core_params.peak_issueW * + core_params.num_hthreads, + &interface_ip, + "Int Instruction Selection Logic", + core_stats.inst_window_wakeup_accesses, + clockRate, Core_device, core_params.core_ty); + + if (core_params.fp_instruction_window_size > 0) { + fp_instruction_selection = + new selection_logic(xml_data, is_default, + core_params.fp_instruction_window_size, + core_params.fp_issueW * + core_params.num_hthreads, + &interface_ip, + "FP Instruction Selection Logic", + core_stats.fp_inst_window_wakeup_accesses, + clockRate, Core_device, + core_params.core_ty); } + } - if (coredynp.core_ty==OOO) - { + if (core_params.core_ty == OOO) { /* * CAM based instruction window * For physicalRegFilebased OOO it is the instruction issue queue, where only tags of phy regs are stored @@ -525,3611 +605,3405 @@ SchedulerU::SchedulerU(ParseXML* XML_interface, int ithCore_, InputParameter* in * It is written once and read twice(two operands) before an instruction can be issued. * X86 instruction can be very long up to 15B. add instruction length in XML */ - if(coredynp.scheu_ty==PhysicalRegFile) - { - tag = coredynp.phy_ireg_width; - // Each time only half of the tag is compared, but two tag should be stored. - // This underestimate the search power - data = int((ceil((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width))/2.0)/8.0)); - //Data width being divided by 2 means only after both operands available the whole data will be read out. - //This is modeled using two equivalent readouts with half of the data width - tmp_name = "InstIssueQueue"; - } - else - { - tag = coredynp.phy_ireg_width; - // Each time only half of the tag is compared, but two tag should be stored. - // This underestimate the search power - data = int(ceil(((coredynp.instruction_length+2*(coredynp.phy_ireg_width - coredynp.arch_ireg_width)+ - 2*coredynp.int_data_width)/2.0)/8.0)); - //Data width being divided by 2 means only after both operands available the whole data will be read out. - //This is modeled using two equivalent readouts with half of the data width - - tmp_name = "IntReservationStation"; - } - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].instruction_window_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 2*1.0/clockRate; - interface_ip.latency = 2*1.0/clockRate; + if (core_params.scheu_ty == PhysicalRegFile) { + tag = core_params.phy_ireg_width; + data = int((ceil((core_params.instruction_length + + NUM_SOURCE_OPERANDS * + (core_params.phy_ireg_width - + core_params.arch_ireg_width)) / + (double)NUM_SOURCE_OPERANDS) / + BITS_PER_BYTE)); + tmp_name = "Integer Instruction Window"; + } else { + tag = core_params.phy_ireg_width; + data = int(ceil(((core_params.instruction_length + + NUM_SOURCE_OPERANDS * + (core_params.phy_ireg_width - + core_params.arch_ireg_width) + + 2 * core_params.int_data_width) / + (double)NUM_SOURCE_OPERANDS) / + BITS_PER_BYTE)); + tmp_name = "Integer Reservation Station"; + } + + size = data * core_params.instruction_window_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = core_params.scheduler_assoc; + interface_ip.nbanks = core_params.scheduler_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.peak_issueW; + interface_ip.num_wr_ports = core_params.peak_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.peak_issueW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = NUM_SOURCE_OPERANDS * 1.0 / clockRate; + interface_ip.latency = NUM_SOURCE_OPERANDS * 1.0 / clockRate; + int_inst_window = new ArrayST(xml_data, &interface_ip, tmp_name, + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + int_inst_window->output_data.area *= core_params.num_pipelines; + area.set_area(area.get_area() + int_inst_window->local_result.area * + core_params.num_pipelines); + Iw_height = int_inst_window->local_result.cache_ht; + + //FU inst window + if (core_params.scheu_ty == PhysicalRegFile) { + tag = NUM_SOURCE_OPERANDS * core_params.phy_freg_width; + data = int(ceil((core_params.instruction_length + + NUM_SOURCE_OPERANDS * + (core_params.phy_freg_width - + core_params.arch_freg_width)) / BITS_PER_BYTE)); + tmp_name = "FP Instruction Window"; + } else { + tag = NUM_SOURCE_OPERANDS * core_params.phy_ireg_width; + data = int(ceil((core_params.instruction_length + + NUM_SOURCE_OPERANDS * + (core_params.phy_freg_width - + core_params.arch_freg_width) + + NUM_SOURCE_OPERANDS * core_params.fp_data_width) / + BITS_PER_BYTE)); + tmp_name = "FP Reservation Station"; + } + + size = data * core_params.fp_instruction_window_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = core_params.scheduler_assoc; + interface_ip.nbanks = core_params.scheduler_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.peak_issueW; - int_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty); - int_inst_window->area.set_area(int_inst_window->area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ int_inst_window->local_result.area*coredynp.num_pipelines); - Iw_height =int_inst_window->local_result.cache_ht; - //FU inst window - if(coredynp.scheu_ty==PhysicalRegFile) - { - tag = 2*coredynp.phy_freg_width;// TODO: each time only half of the tag is compared - data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width))/8.0)); - tmp_name = "FPIssueQueue"; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.fp_issueW; + interface_ip.num_wr_ports = core_params.fp_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.fp_issueW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fp_inst_window = + new ArrayST(xml_data, &interface_ip, tmp_name, Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + fp_inst_window->output_data.area *= core_params.num_fp_pipelines; + area.set_area(area.get_area() + fp_inst_window->local_result.area + *core_params.num_fp_pipelines); + fp_Iw_height = fp_inst_window->local_result.cache_ht; + + if (core_params.ROB_size > 0) { + /* + * if ROB_size = 0, then the target processor does not support hardware-based + * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which + * means branch must be resolved before instruction issued into instruction window, since + * there is no change to flush miss-predict branch path after instructions are issued in this situation. + * + * ROB.ROB size = inflight inst. ROB is unified for int and fp inst. + * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7. + * However, this approach is abandoned due to its high power and poor scalablility. + * McPAT uses current implementation of ROB as circular buffer. + * ROB is written once when instruction is issued and read once when the instruction is committed. * + */ + int robExtra = int(ceil(ROB_STATUS_BITS + + log2(core_params.num_hthreads))); + + if (core_params.scheu_ty == PhysicalRegFile) { + //PC is to id the instruction for recover exception. + //inst is used to map the renamed dest. registers. so that + //commit stage can know which reg/RRAT to update + data = int(ceil((robExtra + core_params.pc_width + + core_params.phy_ireg_width) / BITS_PER_BYTE)); + } else { + //in RS based OOO, ROB also contains value of destination reg + data = int(ceil((robExtra + core_params.pc_width + + core_params.phy_ireg_width + + core_params.fp_data_width) / BITS_PER_BYTE)); + } + + interface_ip.cache_sz = data * core_params.ROB_size; + interface_ip.line_sz = data; + interface_ip.assoc = core_params.ROB_assoc; + interface_ip.nbanks = core_params.ROB_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.ROB_tag_width > 0; + interface_ip.tag_w = core_params.ROB_tag_width; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.peak_commitW; + interface_ip.num_wr_ports = core_params.peak_issueW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + ROB = new ArrayST(xml_data, &interface_ip, "Reorder Buffer", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + ROB->output_data.area *= core_params.num_pipelines; + area.set_area(area.get_area() + ROB->local_result.area * + core_params.num_pipelines); + ROB_height = ROB->local_result.cache_ht; + } + + int_instruction_selection = + new selection_logic(xml_data, is_default, + core_params.instruction_window_size, + core_params.peak_issueW, &interface_ip, + "Int Instruction Selection Logic", + core_stats.inst_window_wakeup_accesses, + clockRate, Core_device, core_params.core_ty); + + if (core_params.fp_instruction_window_size > 0) { + fp_instruction_selection = + new selection_logic(xml_data, is_default, + core_params.fp_instruction_window_size, + core_params.fp_issueW, &interface_ip, + "FP Instruction Selection Logic", + core_stats.fp_inst_window_wakeup_accesses, + clockRate, Core_device, + core_params.core_ty); } - else - { - tag = 2*coredynp.phy_ireg_width; - data = int(ceil((coredynp.instruction_length+2*(coredynp.phy_freg_width - coredynp.arch_freg_width)+ - 2*coredynp.fp_data_width)/8.0)); - tmp_name = "FPReservationStation"; + + } +} + +LoadStoreU::LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), dcache(NULL), LSQ(NULL), LoadQ(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int line; + int size; + int ldst_opcode = core_params.opcode_width; + + clockRate = core_params.clockRate; + name = "Load/Store Unit"; + + // Check if there is a dcache child: + int i; + dcache = NULL; + for( i = 0; i < xml_data->nChildNode("component"); i++ ) { + XMLNode* childXML = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = childXML->getAttribute("type"); + + if (!type) + warnMissingComponentType(childXML->getAttribute("id")); + + STRCMP(type, "CacheUnit") { + XMLCSTR name = childXML->getAttribute("name"); + if (strcmp(name, "Data Cache") == 0 || + strcmp(name, "dcache") == 0) { + dcache = new CacheUnit(childXML, &interface_ip); + children.push_back(dcache); + } } - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].fp_instruction_window_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + } + + /* + * LSU--in-order processors do not have separate load queue: unified lsq + * partitioned among threads + * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ + */ + tag = ldst_opcode + virtual_address_width + + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; + line = int(ceil(data_path_width / BITS_PER_BYTE)); + size = core_params.store_buffer_size * line * core_params.num_hthreads; + + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.store_buffer_assoc; + interface_ip.nbanks = core_params.store_buffer_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.memory_ports; + interface_ip.num_wr_ports = core_params.memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.memory_ports; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + LSQ = new ArrayST(xml_data, &interface_ip, "Store Queue", Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + area.set_area(area.get_area() + LSQ->local_result.area); + area.set_area(area.get_area()*cdb_overhead); + lsq_height = LSQ->local_result.cache_ht * sqrt(cdb_overhead); + + if ((core_params.core_ty == OOO) && (core_params.load_buffer_size > 0)) { + tag = ldst_opcode + virtual_address_width + + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; + line = int(ceil(data_path_width / BITS_PER_BYTE)); + size = core_params.load_buffer_size * line * core_params.num_hthreads; + + interface_ip.cache_sz = size; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.load_buffer_assoc; + interface_ip.nbanks = core_params.load_buffer_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.fp_issueW; - interface_ip.num_wr_ports = coredynp.fp_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = coredynp.fp_issueW; - fp_inst_window = new ArrayST(&interface_ip, tmp_name, Core_device, coredynp.opt_local, coredynp.core_ty); - fp_inst_window->area.set_area(fp_inst_window->area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines); - area.set_area(area.get_area()+ fp_inst_window->local_result.area*coredynp.num_fp_pipelines); - fp_Iw_height =fp_inst_window->local_result.cache_ht; - - if (XML->sys.core[ithCore].ROB_size >0) - { - /* - * if ROB_size = 0, then the target processor does not support hardware-based - * speculation, i.e. , the processor allow OOO issue as well as OOO completion, which - * means branch must be resolved before instruction issued into instruction window, since - * there is no change to flush miss-predict branch path after instructions are issued in this situation. - * - * ROB.ROB size = inflight inst. ROB is unified for int and fp inst. - * One old approach is to combine the RAT and ROB as a huge CAM structure as in AMD K7. - * However, this approach is abandoned due to its high power and poor scalablility. - * McPAT uses current implementation of ROB as circular buffer. - * ROB is written once when instruction is issued and read once when the instruction is committed. * - */ - int robExtra = int(ceil(5 + log2(coredynp.num_hthreads))); - //5 bits are: busy, Issued, Finished, speculative, valid - if(coredynp.scheu_ty==PhysicalRegFile) - { - //PC is to id the instruction for recover exception. - //inst is used to map the renamed dest. registers.so that commit stage can know which reg/RRAT to update -// data = int(ceil((robExtra+coredynp.pc_width + -// coredynp.instruction_length + 2*coredynp.phy_ireg_width)/8.0)); - data = int(ceil((robExtra+coredynp.pc_width + - coredynp.phy_ireg_width)/8.0)); - } - else - { - //in RS based OOO, ROB also contains value of destination reg -// data = int(ceil((robExtra+coredynp.pc_width + -// coredynp.instruction_length + 2*coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0)); - data = int(ceil((robExtra + coredynp.pc_width + - coredynp.phy_ireg_width + coredynp.fp_data_width)/8.0)); - } - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].ROB_size;//The XML ROB size is for all threads - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.peak_commitW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 0; - ROB = new ArrayST(&interface_ip, "ReorderBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - ROB->area.set_area(ROB->area.get_area()+ ROB->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ ROB->local_result.area*coredynp.num_pipelines); - ROB_height =ROB->local_result.cache_ht; - } - - instruction_selection = new selection_logic(is_default, XML->sys.core[ithCore].instruction_window_size, - coredynp.peak_issueW, &interface_ip, Core_device, coredynp.core_ty); + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.memory_ports; + interface_ip.num_wr_ports = core_params.memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.memory_ports; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + LoadQ = new ArrayST(xml_data, &interface_ip, "Load Queue", Core_device, + clockRate, core_params.opt_local, + core_params.core_ty); + LoadQ->area.set_area(LoadQ->area.get_area() + + LoadQ->local_result.area); + area.set_area(area.get_area()*cdb_overhead); + lsq_height = (LSQ->local_result.cache_ht + + LoadQ->local_result.cache_ht) * sqrt(cdb_overhead); } + } -LoadStoreU::LoadStoreU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - LSQ(0), - exist(exist_) -{ - if (!exist) return; - int idx, tag, data, size, line, assoc, banks; - bool debug= false; - int ldst_opcode = XML->sys.core[ithCore].opcode_width;//16; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - cache_p = (Cache_policy)XML->sys.core[ithCore].dcache.dcache_config[7]; - - interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - //Dcache - size = (int)XML->sys.core[ithCore].dcache.dcache_config[0]; - line = (int)XML->sys.core[ithCore].dcache.dcache_config[1]; - assoc = (int)XML->sys.core[ithCore].dcache.dcache_config[2]; - banks = (int)XML->sys.core[ithCore].dcache.dcache_config[3]; - idx = debug?9:int(ceil(log2(size/line/assoc))); - tag = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = debug?32768:(int)XML->sys.core[ithCore].dcache.dcache_config[0]; - interface_ip.line_sz = debug?64:(int)XML->sys.core[ithCore].dcache.dcache_config[1]; - interface_ip.assoc = debug?8:(int)XML->sys.core[ithCore].dcache.dcache_config[2]; - interface_ip.nbanks = debug?1:(int)XML->sys.core[ithCore].dcache.dcache_config[3]; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].dcache.dcache_config[5]; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?3.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.is_cache = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;//usually In-order has 1 and OOO has 2 at least. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.caches = new ArrayST(&interface_ip, "dcache", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.caches->local_result.area); - area.set_area(area.get_area()+ dcache.caches->local_result.area); - //output_data_csv(dcache.caches.local_result); - - //dCache controllers - //miss buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + dcache.caches->l_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[0]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.missb = new ArrayST(&interface_ip, "dcacheMissBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.missb->local_result.area); - area.set_area(area.get_area()+ dcache.missb->local_result.area); - //output_data_csv(dcache.missb.local_result); - - //fill buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = dcache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*XML->sys.core[ithCore].dcache.buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.ifb = new ArrayST(&interface_ip, "dcacheFillBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.ifb->local_result.area); - area.set_area(area.get_area()+ dcache.ifb->local_result.area); - //output_data_csv(dcache.ifb.local_result); - - //prefetch buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = dcache.caches->l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[2]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = debug?1:XML->sys.core[ithCore].memory_ports;; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.prefetchb = new ArrayST(&interface_ip, "dcacheprefetchBuffer", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.prefetchb->local_result.area); - area.set_area(area.get_area()+ dcache.prefetchb->local_result.area); - //output_data_csv(dcache.prefetchb.local_result); - - //WBB - - if (cache_p==Write_back) - { - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = dcache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = XML->sys.core[ithCore].dcache.buffer_sizes[3]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 2; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - dcache.wbb = new ArrayST(&interface_ip, "dcacheWBB", Core_device, coredynp.opt_local, coredynp.core_ty); - dcache.area.set_area(dcache.area.get_area()+ dcache.wbb->local_result.area); - area.set_area(area.get_area()+ dcache.wbb->local_result.area); - //output_data_csv(dcache.wbb.local_result); - } - - /* - * LSU--in-order processors do not have separate load queue: unified lsq - * partitioned among threads - * it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ - */ - tag = ldst_opcode+XML->sys.virtual_address_width +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads))) + EXTRA_TAG_BITS; - data = XML->sys.machine_bits; - interface_ip.is_cache = true; - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.core[ithCore].store_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports; - LSQ = new ArrayST(&interface_ip, "Load(Store)Queue", Core_device, coredynp.opt_local, coredynp.core_ty); - LSQ->area.set_area(LSQ->area.get_area()+ LSQ->local_result.area); - area.set_area(area.get_area()+ LSQ->local_result.area); - area.set_area(area.get_area()*cdb_overhead); - //output_data_csv(LSQ.LSQ.local_result); - lsq_height=LSQ->local_result.cache_ht*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/ - - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.core[ithCore].load_buffer_size*interface_ip.line_sz*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports =XML->sys.core[ithCore].memory_ports; - LoadQ = new ArrayST(&interface_ip, "LoadQueue", Core_device, coredynp.opt_local, coredynp.core_ty); - LoadQ->area.set_area(LoadQ->area.get_area()+ LoadQ->local_result.area); - area.set_area(area.get_area()+ LoadQ->local_result.area); - area.set_area(area.get_area()*cdb_overhead); - //output_data_csv(LoadQ.LoadQ.local_result); - lsq_height=(LSQ->local_result.cache_ht + LoadQ->local_result.cache_ht)*sqrt(cdb_overhead);/*XML->sys.core[ithCore].number_hardware_threads*/ - } +MemManU::MemManU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), itlb(NULL), dtlb(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int data; + int line; + + clockRate = core_params.clockRate; + name = "Memory Management Unit"; + + set_params_stats(); + + // These are shared between ITLB and DTLB + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + //Itlb TLBs are partioned among threads according to Nigara and Nehalem + tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) + + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; + data = physical_address_width - int(floor(log2(virtual_memory_page_size))); + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = mem_man_params.itlb_number_entries * line; + interface_ip.line_sz = line; + interface_ip.assoc = mem_man_params.itlb_assoc; + interface_ip.nbanks = mem_man_params.itlb_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.number_instruction_fetch_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.number_instruction_fetch_ports; + interface_ip.throughput = mem_man_params.itlb_throughput / clockRate; + interface_ip.latency = mem_man_params.itlb_latency / clockRate; + itlb = new ArrayST(xml_data, &interface_ip, "Instruction TLB", Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + area.set_area(area.get_area() + itlb->local_result.area); + + //dtlb + tag = virtual_address_width - int(floor(log2(virtual_memory_page_size))) + + int(ceil(log2(core_params.num_hthreads))) + EXTRA_TAG_BITS; + data = physical_address_width - int(floor(log2(virtual_memory_page_size))); + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = mem_man_params.dtlb_number_entries * line; + interface_ip.line_sz = line; + interface_ip.assoc = mem_man_params.dtlb_assoc; + interface_ip.nbanks = mem_man_params.dtlb_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.memory_ports; + interface_ip.num_wr_ports = core_params.memory_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.memory_ports; + interface_ip.throughput = mem_man_params.dtlb_throughput / clockRate; + interface_ip.latency = mem_man_params.dtlb_latency / clockRate; + dtlb = new ArrayST(xml_data, &interface_ip, "Data TLB", Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + area.set_area(area.get_area() + dtlb->local_result.area); } -MemManU::MemManU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - itlb(0), - dtlb(0), - exist(exist_) -{ - if (!exist) return; - int tag, data; - bool debug= false; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.specific_tag = 1; - //Itlb TLBs are partioned among threads according to Nigara and Nehalem - tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) + int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS; - data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))); - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].itlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].icache.icache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = debug?1:XML->sys.core[ithCore].number_instruction_fetch_ports; - itlb = new ArrayST(&interface_ip, "ITLB", Core_device, coredynp.opt_local, coredynp.core_ty); - itlb->area.set_area(itlb->area.get_area()+ itlb->local_result.area); - area.set_area(area.get_area()+ itlb->local_result.area); - //output_data_csv(itlb.tlb.local_result); - - //dtlb - tag = XML->sys.virtual_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))) +int(ceil(log2(XML->sys.core[ithCore].number_hardware_threads)))+ EXTRA_TAG_BITS; - data = XML->sys.physical_address_width- int(floor(log2(XML->sys.virtual_memory_page_size))); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].dtlb.number_entries*interface_ip.line_sz;//*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[4]/clockRate; - interface_ip.latency = debug?1.0/clockRate:XML->sys.core[ithCore].dcache.dcache_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = XML->sys.core[ithCore].memory_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.core[ithCore].memory_ports; - dtlb = new ArrayST(&interface_ip, "DTLB", Core_device, coredynp.opt_local, coredynp.core_ty); - dtlb->area.set_area(dtlb->area.get_area()+ dtlb->local_result.area); - area.set_area(area.get_area()+ dtlb->local_result.area); - //output_data_csv(dtlb.tlb.local_result); +void +MemManU::set_params_stats() { + memset(&mem_man_params, 0, sizeof(MemoryManagementParams)); + memset(&mem_man_stats, 0, sizeof(MemoryManagementStats)); + int num_children = xml_data->nChildNode("component"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* child = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = child->getAttribute("type"); + + if (!type) + warnMissingComponentType(child->getAttribute("id")); + + STRCMP(type, "InstructionTLB") { + int sub_num_children = child->nChildNode("param"); + int j; + for (j = 0; j < sub_num_children; j++) { + XMLNode* paramNode = child->getChildNodePtr("param", &j); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("number_entries", + mem_man_params.itlb_number_entries); + ASSIGN_FP_IF("latency", mem_man_params.itlb_latency); + ASSIGN_FP_IF("throughput", mem_man_params.itlb_throughput); + ASSIGN_FP_IF("assoc", mem_man_params.itlb_assoc); + ASSIGN_FP_IF("nbanks", mem_man_params.itlb_nbanks); + + else { + warnUnrecognizedParam(node_name); + } + } + sub_num_children = child->nChildNode("stat"); + for (j = 0; j < sub_num_children; j++) { + XMLNode* statNode = child->getChildNodePtr("stat", &j); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("total_accesses", + mem_man_stats.itlb_total_accesses); + ASSIGN_FP_IF("total_misses", mem_man_stats.itlb_total_misses); + ASSIGN_FP_IF("conflicts", mem_man_stats.itlb_conflicts); + else { + warnUnrecognizedStat(node_name); + } + } + } STRCMP(type, "DataTLB") { + int sub_num_children = child->nChildNode("param"); + int j; + for (j = 0; j < sub_num_children; j++) { + XMLNode* paramNode = child->getChildNodePtr("param", &j); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("number_entries", + mem_man_params.dtlb_number_entries); + ASSIGN_FP_IF("latency", mem_man_params.dtlb_latency); + ASSIGN_FP_IF("throughput", mem_man_params.dtlb_throughput); + ASSIGN_FP_IF("assoc", mem_man_params.dtlb_assoc); + ASSIGN_FP_IF("nbanks", mem_man_params.dtlb_nbanks); + + else { + warnUnrecognizedParam(node_name); + } + } + sub_num_children = child->nChildNode("stat"); + for (j = 0; j < sub_num_children; j++) { + XMLNode* statNode = child->getChildNodePtr("stat", &j); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("read_accesses", + mem_man_stats.dtlb_read_accesses); + ASSIGN_FP_IF("read_misses", mem_man_stats.dtlb_read_misses); + ASSIGN_FP_IF("write_accesses", + mem_man_stats.dtlb_write_accesses); + ASSIGN_FP_IF("write_misses", mem_man_stats.dtlb_write_misses); + ASSIGN_FP_IF("conflicts", mem_man_stats.dtlb_conflicts); + + else { + warnUnrecognizedStat(node_name); + } + } + } + } } -RegFU::RegFU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - IRF (0), - FRF (0), - RFWIN (0), - exist(exist_) - { - /* - * processors have separate architectural register files for each thread. - * therefore, the bypass buses need to travel across all the register files. - */ - if (!exist) return; - int data; - - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - //**********************************IRF*************************************** - data = coredynp.int_data_width; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.cache_sz = coredynp.num_IRF_entry*interface_ip.line_sz; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. - interface_ip.num_rd_ports = 2*coredynp.peak_issueW; - interface_ip.num_wr_ports = coredynp.peak_issueW; - interface_ip.num_se_rd_ports = 0; - IRF = new ArrayST(&interface_ip, "Integer Register File", Core_device, coredynp.opt_local, coredynp.core_ty); - IRF->area.set_area(IRF->area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead); - area.set_area(area.get_area()+ IRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_pipelines*cdb_overhead); - //area.set_area(area.get_area()*cdb_overhead); - //output_data_csv(IRF.RF.local_result); - - //**********************************FRF*************************************** - data = coredynp.fp_data_width; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data/32.0))*4; - interface_ip.cache_sz = coredynp.num_FRF_entry*interface_ip.line_sz; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; +RegFU::RegFU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), IRF(NULL), FRF(NULL), RFWIN(NULL), + interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + /* + * processors have separate architectural register files for each thread. + * therefore, the bypass buses need to travel across all the register files. + */ + if (!exist) return; + int data; + int line; + + clockRate = core_params.clockRate; + name = "Register File Unit"; + + //**********************************IRF************************************ + data = core_params.int_data_width; + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = core_params.num_IRF_entry * line; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.phy_Regs_IRF_assoc; + interface_ip.nbanks = core_params.phy_Regs_IRF_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.phy_Regs_IRF_tag_width > 0; + interface_ip.tag_w = core_params.phy_Regs_IRF_tag_width; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.phy_Regs_IRF_rd_ports; + interface_ip.num_wr_ports = core_params.phy_Regs_IRF_wr_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + IRF = new ArrayST(xml_data, &interface_ip, "Integer Register File", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + IRF->output_data.area *= core_params.num_hthreads * + core_params.num_pipelines * cdb_overhead; + area.set_area(area.get_area() + IRF->local_result.area * + core_params.num_hthreads * core_params.num_pipelines * + cdb_overhead); + + //**********************************FRF************************************ + data = core_params.fp_data_width; + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = core_params.num_FRF_entry * line; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.phy_Regs_FRF_assoc; + interface_ip.nbanks = core_params.phy_Regs_FRF_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.phy_Regs_FRF_tag_width > 0; + interface_ip.tag_w = core_params.phy_Regs_FRF_tag_width; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = core_params.phy_Regs_FRF_rd_ports; + interface_ip.num_wr_ports = core_params.phy_Regs_FRF_wr_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + FRF = new ArrayST(xml_data, &interface_ip, "FP Register File", Core_device, + clockRate, core_params.opt_local, core_params.core_ty); + FRF->output_data.area *= core_params.num_hthreads * + core_params.num_fp_pipelines * cdb_overhead; + area.set_area(area.get_area() + FRF->local_result.area * + core_params.num_hthreads * core_params.num_fp_pipelines * + cdb_overhead); + int_regfile_height = IRF->local_result.cache_ht * + core_params.num_hthreads * sqrt(cdb_overhead); + fp_regfile_height = FRF->local_result.cache_ht * core_params.num_hthreads * + sqrt(cdb_overhead); + //since a EXU is associated with each pipeline, the cdb should not have + //longer length. + + if (core_params.regWindowing) { + //*********************************REG_WIN***************************** + //ECC, and usually 2 regs are transfered together during window + //shifting.Niagara Mega cell + data = core_params.int_data_width; + line = int(ceil(data / BITS_PER_BYTE)); + + interface_ip.cache_sz = core_params.register_window_size * + IRF->l_ip.cache_sz * core_params.num_hthreads; + interface_ip.line_sz = line; + interface_ip.assoc = core_params.register_window_assoc; + interface_ip.nbanks = core_params.register_window_nbanks; + interface_ip.out_w = line * BITS_PER_BYTE; + interface_ip.specific_tag = core_params.register_window_tag_width > 0; + interface_ip.tag_w = core_params.register_window_tag_width; + interface_ip.access_mode = Sequential; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. - interface_ip.num_rd_ports = 2*XML->sys.core[ithCore].issue_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].issue_width; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.register_window_rw_ports; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; interface_ip.num_se_rd_ports = 0; - FRF = new ArrayST(&interface_ip, "Floating point Register File", Core_device, coredynp.opt_local, coredynp.core_ty); - FRF->area.set_area(FRF->area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead); - area.set_area(area.get_area()+ FRF->local_result.area*XML->sys.core[ithCore].number_hardware_threads*coredynp.num_fp_pipelines*cdb_overhead); - //area.set_area(area.get_area()*cdb_overhead); - //output_data_csv(FRF.RF.local_result); - int_regfile_height= IRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead); - fp_regfile_height = FRF->local_result.cache_ht*XML->sys.core[ithCore].number_hardware_threads*sqrt(cdb_overhead); - //since a EXU is associated with each pipeline, the cdb should not have longer length. - if (coredynp.regWindowing) - { - //*********************************REG_WIN************************************ - data = coredynp.int_data_width; //ECC, and usually 2 regs are transfered together during window shifting.Niagara Mega cell - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = int(ceil(data/8.0)); - interface_ip.cache_sz = XML->sys.core[ithCore].register_windows_size*IRF->l_ip.cache_sz*XML->sys.core[ithCore].number_hardware_threads; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 4.0/clockRate; - interface_ip.latency = 4.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//this is the transfer port for saving/restoring states when exceptions happen. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - RFWIN = new ArrayST(&interface_ip, "RegWindow", Core_device, coredynp.opt_local, coredynp.core_ty); - RFWIN->area.set_area(RFWIN->area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines); - area.set_area(area.get_area()+ RFWIN->local_result.area*coredynp.num_pipelines); - //output_data_csv(RFWIN.RF.local_result); - } + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = + core_params.register_window_throughput / clockRate; + interface_ip.latency = + core_params.register_window_latency / clockRate; + RFWIN = new ArrayST(xml_data, &interface_ip, "RegWindow", Core_device, + clockRate, core_params.opt_local, + core_params.core_ty); + RFWIN->output_data.area *= core_params.num_pipelines; + area.set_area(area.get_area() + RFWIN->local_result.area * + core_params.num_pipelines); + } +} +EXECU::EXECU(XMLNode* _xml_data, + InputParameter* interface_ip_, double lsq_height_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), rfu(NULL), scheu(NULL), fp_u(NULL), + exeu(NULL), mul(NULL), int_bypass(NULL), intTagBypass(NULL), + int_mul_bypass(NULL), intTag_mul_Bypass(NULL), fp_bypass(NULL), + fpTagBypass(NULL), interface_ip(*interface_ip_), + lsq_height(lsq_height_), core_params(_core_params), + core_stats(_core_stats), exist(exist_) { + if (!exist) return; + double fu_height = 0.0; + clockRate = core_params.clockRate; + name = "Execution Unit"; + rfu = new RegFU(xml_data, &interface_ip, core_params, core_stats); + if (core_params.core_ty == OOO || + (core_params.core_ty == Inorder && core_params.multithreaded)) { + scheu = new SchedulerU(xml_data, &interface_ip, core_params, + core_stats); + area.set_area(area.get_area() + scheu->area.get_area() ); + } + exeu = new FunctionalUnit(xml_data, &interface_ip, core_params, + core_stats, ALU); + area.set_area(area.get_area() + exeu->area.get_area() + + rfu->area.get_area()); + fu_height = exeu->FU_height; + if (core_params.num_fpus > 0) { + fp_u = new FunctionalUnit(xml_data, &interface_ip, + core_params, core_stats, FPU); + area.set_area(area.get_area() + fp_u->area.get_area()); + } + if (core_params.num_muls > 0) { + mul = new FunctionalUnit(xml_data, &interface_ip, + core_params, core_stats, MUL); + area.set_area(area.get_area() + mul->area.get_area()); + fu_height += mul->FU_height; + } + /* + * broadcast logic, including int-broadcast; int_tag-broadcast; + * fp-broadcast; fp_tag-broadcast + * integer by pass has two paths and fp has 3 paths. + * on the same bus there are multiple tri-state drivers and muxes that go + * to different components on the same bus + */ + interface_ip.wt = core_params.execu_broadcast_wt; + interface_ip.wire_is_mat_type = core_params.execu_wire_mat_type; + interface_ip.wire_os_mat_type = core_params.execu_wire_mat_type; + interface_ip.throughput = core_params.broadcast_numerator / clockRate; + interface_ip.latency = core_params.broadcast_numerator / clockRate; + double scheu_Iw_height = 0.0; + double scheu_ROB_height = 0.0; + double scheu_fp_Iw_height = 0.0; + if (scheu) { + scheu_Iw_height = scheu->Iw_height; + scheu_ROB_height = scheu->ROB_height; + scheu_fp_Iw_height = scheu->fp_Iw_height; + } - } - -EXECU::EXECU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_, const CoreDynParam & dyn_p_, bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - lsq_height(lsq_height_), - coredynp(dyn_p_), - rfu(0), - scheu(0), - fp_u(0), - exeu(0), - mul(0), - int_bypass(0), - intTagBypass(0), - int_mul_bypass(0), - intTag_mul_Bypass(0), - fp_bypass(0), - fpTagBypass(0), - exist(exist_) -{ - if (!exist) return; - double fu_height = 0.0; - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - rfu = new RegFU(XML, ithCore, &interface_ip,coredynp); - scheu = new SchedulerU(XML, ithCore, &interface_ip,coredynp); - exeu = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, ALU); - area.set_area(area.get_area()+ exeu->area.get_area() + rfu->area.get_area() +scheu->area.get_area() ); - fu_height = exeu->FU_height; - if (coredynp.num_fpus >0) - { - fp_u = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, FPU); - area.set_area(area.get_area()+ fp_u->area.get_area()); - } - if (coredynp.num_muls >0) - { - mul = new FunctionalUnit(XML, ithCore,&interface_ip, coredynp, MUL); - area.set_area(area.get_area()+ mul->area.get_area()); - fu_height += mul->FU_height; - } - /* - * broadcast logic, including int-broadcast; int_tag-broadcast; fp-broadcast; fp_tag-broadcast - * integer by pass has two paths and fp has 3 paths. - * on the same bus there are multiple tri-state drivers and muxes that go to different components on the same bus - */ - if (XML->sys.Embedded) - { - interface_ip.wt =Global_30; - interface_ip.wire_is_mat_type = 0; - interface_ip.wire_os_mat_type = 0; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - } - else - { - interface_ip.wt =Global; - interface_ip.wire_is_mat_type = 2;//start from semi-global since local wires are already used - interface_ip.wire_os_mat_type = 2; - interface_ip.throughput = 10.0/clockRate; //Do not care - interface_ip.latency = 10.0/clockRate; - } - - if (coredynp.core_ty==Inorder) - { - int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32), - rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() + int_bypass->area.get_area()); - intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area()); - - if (coredynp.num_muls>0) - { - int_mul_bypass = new interconnect("Mul Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5), - rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); - intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, - rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); - } - - if (coredynp.num_fpus>0) - { - fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(XML->sys.machine_bits/32.0)*32*1.5), - rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); - fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.perThreadState, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->Iw_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); - } - } - else - {//OOO - if (coredynp.scheu_ty==PhysicalRegFile) - { - /* For physical register based OOO, - * data broadcast interconnects cover across functional units, lsq, inst windows and register files, - * while tag broadcast interconnects also cover across ROB - */ - int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area()); - intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - - if (coredynp.num_muls>0) - { - int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); - } - - if (coredynp.num_fpus>0) - { - fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u->FU_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); - } - } - else - { - /* - * In RS based processor both data and tag are broadcast together, - * covering functional units, lsq, nst windows, register files, and ROBs - */ - int_bypass = new interconnect("Int Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - intTagBypass = new interconnect("Int Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +intTagBypass->area.get_area()); - if (coredynp.num_muls>0) - { - int_mul_bypass = new interconnect("Mul Bypass Data", Core_device, 1, 1, int(ceil(coredynp.int_data_width)), - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - intTag_mul_Bypass = new interconnect("Mul Bypass tag" , Core_device, 1, 1, coredynp.phy_ireg_width, - rfu->int_regfile_height + exeu->FU_height + mul->FU_height + lsq_height + scheu->Iw_height + scheu->ROB_height , &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +int_mul_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +intTag_mul_Bypass->area.get_area()); - } - - if (coredynp.num_fpus>0) - { - fp_bypass = new interconnect("FP Bypass Data" , Core_device, 1, 1, int(ceil(coredynp.fp_data_width)), - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - fpTagBypass = new interconnect("FP Bypass tag" , Core_device, 1, 1, coredynp.phy_freg_width, - rfu->fp_regfile_height + fp_u->FU_height + lsq_height + scheu->fp_Iw_height + scheu->ROB_height, &interface_ip, 3, - false, 1.0, coredynp.opt_local, coredynp.core_ty); - bypass.area.set_area(bypass.area.get_area() +fp_bypass->area.get_area()); - bypass.area.set_area(bypass.area.get_area() +fpTagBypass->area.get_area()); - } - } - - - } - area.set_area(area.get_area()+ bypass.area.get_area()); -} + // Common bypass logic parameters + double base_w = core_params.execu_bypass_base_width; + double base_h = core_params.execu_bypass_base_height; + int level = core_params.execu_bypass_start_wiring_level; + double route_over_perc = core_params.execu_bypass_route_over_perc; + Wire_type wire_type = core_params.execu_bypass_wire_type; + int data_w; + double len; + + if (core_params.core_ty == Inorder) { + data_w = int(ceil(data_path_width / 32.0)*32); + len = rfu->int_regfile_height + exeu->FU_height + lsq_height; + int_bypass = new Interconnect(xml_data, "Int Bypass Data", Core_device, + base_w, base_h, data_w, len, + &interface_ip, level, clockRate, false, + route_over_perc, core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.perThreadState; + len = rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu_Iw_height; + intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", + Core_device, + base_w, base_h, data_w, len, + &interface_ip, level, clockRate, false, + route_over_perc, core_params.opt_local, + core_params.core_ty, wire_type); + + if (core_params.num_muls > 0) { + data_w = int(ceil(data_path_width / 32.0)*32*1.5); + len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height; + int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.perThreadState; + len = rfu->fp_regfile_height + exeu->FU_height + mul->FU_height + + lsq_height + scheu_Iw_height; + intTag_mul_Bypass = new Interconnect(xml_data, "Mul Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); + } + + if (core_params.num_fpus > 0) { + data_w = int(ceil(data_path_width / 32.0)*32*1.5); + len = rfu->fp_regfile_height + fp_u->FU_height; + fp_bypass = new Interconnect(xml_data, "FP Bypass Data", + Core_device, + base_w, base_h, data_w, len, + &interface_ip, level, clockRate, + false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.perThreadState; + len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu_Iw_height; + fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", + Core_device, base_w, base_h, data_w, + len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + } + } else {//OOO + if (core_params.scheu_ty == PhysicalRegFile) { + /* For physical register based OOO, + * data broadcast interconnects cover across functional units, lsq, + * inst windows and register files, + * while tag broadcast interconnects also cover across ROB + */ + data_w = int(ceil(core_params.int_data_width)); + len = rfu->int_regfile_height + exeu->FU_height + lsq_height; + int_bypass = new Interconnect(xml_data, "Int Bypass Data", + Core_device, base_w, base_h, data_w, + len, &interface_ip, level, clockRate, + false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.phy_ireg_width; + len = rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu_Iw_height + scheu_ROB_height; + intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + if (core_params.num_muls > 0) { + data_w = int(ceil(core_params.int_data_width)); + len = rfu->int_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height; + int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); + + data_w = core_params.phy_ireg_width; + len = rfu->int_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height + scheu_Iw_height + + scheu_ROB_height; + intTag_mul_Bypass = new Interconnect(xml_data, + "Mul Bypass Tag", + Core_device, base_w, + base_h, data_w, len, + &interface_ip, level, + clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); + } -RENAMINGU::RENAMINGU(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_,bool exist_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - iFRAT(0), - fFRAT(0), - iRRAT(0), - fRRAT(0), - ifreeL(0), - ffreeL(0), - idcl(0), - fdcl(0), - RAHT(0), - exist(exist_) - { - /* - * Although renaming logic maybe be used in in-order processors, - * McPAT assumes no renaming logic is used since the performance gain is very limited and - * the only major inorder processor with renaming logic is Itainium - * that is a VLIW processor and different from current McPAT's model. - * physical register base OOO must have Dual-RAT architecture or equivalent structure.FRAT:FrontRAT, RRAT:RetireRAT; - * i,f prefix mean int and fp - * RAT for all Renaming logic, random accessible checkpointing is used, but only update when instruction retires. - * FRAT will be read twice and written once per instruction; - * RRAT will be write once per instruction when committing and reads out all when context switch - * checkpointing is implicit - * Renaming logic is duplicated for each different hardware threads - * - * No Dual-RAT is needed in RS-based OOO processors, - * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry, - * to make sure all the renamings associated with the ROB to be released are updated at the same time. - * RAM scheme has # ARchi Reg entry with each entry hold phy reg tag, - * CAM scheme has # Phy Reg entry with each entry hold ARchi reg tag, - * - * Both RAM and CAM have same DCL - */ - if (!exist) return; - int tag, data, out_w; -// interface_ip.wire_is_mat_type = 0; -// interface_ip.wire_os_mat_type = 0; -// interface_ip.wt = Global_30; - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - if (coredynp.core_ty==OOO) - { - //integer pipeline - if (coredynp.scheu_ty==PhysicalRegFile) - { - if (coredynp.rm_ty ==RAMbased) - { //FRAT with global checkpointing (GCs) please see paper tech report for detailed explaintions - data = 33;//int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0)); -// data = int(ceil(coredynp.phy_ireg_width/8.0)); - out_w = 1;//int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iFRAT->area.get_area()); - -// //RAHT According to Intel, combine GC with FRAT is very costly. -// data = int(ceil(coredynp.phy_ireg_width/8.0)*coredynp.num_IRF_entry); -// out_w = data; -// interface_ip.is_cache = false; -// interface_ip.pure_cam = false; -// interface_ip.pure_ram = true; -// interface_ip.line_sz = data; -// interface_ip.cache_sz = data*coredynp.globalCheckpoint; -// interface_ip.assoc = 1; -// interface_ip.nbanks = 1; -// interface_ip.out_w = out_w*8; -// interface_ip.access_mode = 0; -// interface_ip.throughput = 1.0/clockRate; -// interface_ip.latency = 1.0/clockRate; -// interface_ip.obj_func_dyn_energy = 0; -// interface_ip.obj_func_dyn_power = 0; -// interface_ip.obj_func_leak_power = 0; -// interface_ip.obj_func_cycle_t = 1; -// interface_ip.num_rw_ports = 1;//the extra one port is for GCs -// interface_ip.num_rd_ports = 2*coredynp.decodeW; -// interface_ip.num_wr_ports = coredynp.decodeW; -// interface_ip.num_se_rd_ports = 0; -// iFRAT = new ArrayST(&interface_ip, "Int FrontRAT"); -// iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); -// area.set_area(area.get_area()+ iFRAT->area.get_area()); - - //FRAT floating point - data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0)); - out_w = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fFRAT->area.get_area()); + if (core_params.num_fpus > 0) { + data_w = int(ceil(core_params.fp_data_width)); + len = rfu->fp_regfile_height + fp_u->FU_height; + fp_bypass = new Interconnect(xml_data, "FP Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.phy_freg_width; + len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu_fp_Iw_height + scheu_ROB_height; + fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + } + } else { + /* + * In RS based processor both data and tag are broadcast together, + * covering functional units, lsq, nst windows, register files, and ROBs + */ + data_w = int(ceil(core_params.int_data_width)); + len = rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu_Iw_height + scheu_ROB_height; + int_bypass = new Interconnect(xml_data, "Int Bypass Data", + Core_device, base_w, base_h, data_w, + len, &interface_ip, level, clockRate, + false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.phy_ireg_width; + len = rfu->int_regfile_height + exeu->FU_height + lsq_height + + scheu_Iw_height + scheu_ROB_height; + intTagBypass = new Interconnect(xml_data, "Int Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + if (core_params.num_muls > 0) { + data_w = int(ceil(core_params.int_data_width)); + len = rfu->int_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height + scheu_Iw_height + + scheu_ROB_height; + int_mul_bypass = new Interconnect(xml_data, "Mul Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); + + data_w = core_params.phy_ireg_width; + len = rfu->int_regfile_height + exeu->FU_height + + mul->FU_height + lsq_height + scheu_Iw_height + + scheu_ROB_height; + intTag_mul_Bypass = new Interconnect(xml_data, + "Mul Bypass Tag", + Core_device, base_w, + base_h, data_w, len, + &interface_ip, level, + clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, + wire_type); + } - } - else if ((coredynp.rm_ty ==CAMbased)) - { - //FRAT - tag = coredynp.arch_ireg_width; - data = int(ceil ((coredynp.arch_ireg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out - out_w = int(ceil (coredynp.arch_ireg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*coredynp.decodeW; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iFRAT->area.get_area()); - - //FRAT for FP - tag = coredynp.arch_freg_width; - data = int(ceil ((coredynp.arch_freg_width+1*coredynp.globalCheckpoint)/8.0));//the address of CAM needed to be sent out - out_w = int(ceil (coredynp.arch_freg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*coredynp.fp_decodeW; - fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fFRAT->area.get_area()); + if (core_params.num_fpus > 0) { + data_w = int(ceil(core_params.fp_data_width)); + len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu_fp_Iw_height + scheu_ROB_height; + fp_bypass = new Interconnect(xml_data, "FP Bypass Data", + Core_device, base_w, base_h, + data_w, len, &interface_ip, level, + clockRate, false, route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + + data_w = core_params.phy_freg_width; + len = rfu->fp_regfile_height + fp_u->FU_height + lsq_height + + scheu_fp_Iw_height + scheu_ROB_height; + fpTagBypass = new Interconnect(xml_data, "FP Bypass Tag", + Core_device, base_w, base_h, + data_w, len, &interface_ip, + level, clockRate, false, + route_over_perc, + core_params.opt_local, + core_params.core_ty, wire_type); + } + } + } + if (int_bypass) { + children.push_back(int_bypass); + } + if (intTagBypass) { + children.push_back(intTagBypass); + } + if (int_mul_bypass) { + children.push_back(int_mul_bypass); + } + if (intTag_mul_Bypass) { + children.push_back(intTag_mul_Bypass); + } + if (fp_bypass) { + children.push_back(fp_bypass); + } + if (fpTagBypass) { + children.push_back(fpTagBypass); + } - } + area.set_area(area.get_area() + int_bypass->area.get_area() + + intTagBypass->area.get_area()); + if (core_params.num_muls > 0) { + area.set_area(area.get_area() + int_mul_bypass->area.get_area() + + intTag_mul_Bypass->area.get_area()); + } + if (core_params.num_fpus > 0) { + area.set_area(area.get_area() + fp_bypass->area.get_area() + + fpTagBypass->area.get_area()); + } +} - //RRAT is always RAM based, does not have GCs, and is used only for record latest non-speculative mapping - data = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size*2;//HACK to make it as least 64B - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; +RENAMINGU::RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_) + : McPATComponent(_xml_data), iFRAT(NULL), fFRAT(NULL), iRRAT(NULL), + fRRAT(NULL), ifreeL(NULL), ffreeL(NULL), idcl(NULL), fdcl(NULL), + RAHT(NULL), interface_ip(*interface_ip_), + core_params(_core_params), core_stats(_core_stats), exist(exist_) { + if (!exist) return; + int tag; + int data; + int out_w; + int size; + + // Assumption: + // We make an implicit design assumption based on the specific structure + // that is being modeled. + // 1. RAM-based RATs are direct mapped. However, if the associated + // scheduler is a reservation station style, the RATs are fully + // associative. + // 2. Non-CAM based RATs and free lists do not have tags. + // 3. Free lists are direct mapped. + + const int RAM_BASED_RAT_ASSOC = 1; + const int RS_RAT_ASSOC = 0; + const int NON_CAM_BASED_TAG_WIDTH = 0; + const int FREELIST_ASSOC = 1; + + clockRate = core_params.clockRate; + name = "Rename Unit"; + if (core_params.core_ty == OOO) { + //integer pipeline + if (core_params.scheu_ty == PhysicalRegFile) { + if (core_params.rm_ty == RAMbased) { + //FRAT with global checkpointing (GCs) please see paper tech + //report for detailed explaintions + + data = int(ceil(core_params.phy_ireg_width * + (1 + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); + + size = data * core_params.archi_Regs_IRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RAM_BASED_RAT_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.core[ithCore].commit_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].commit_width; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = + NUM_SOURCE_OPERANDS * core_params.decodeW; + interface_ip.num_wr_ports = core_params.decodeW; interface_ip.num_se_rd_ports = 0; - iRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iRRAT->area.set_area(iRRAT->area.get_area()+ iRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iRRAT->area.get_area()); - - //RRAT for FP - data = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size*2;//HACK to make it as least 64B - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + iFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iFRAT->area.get_area()); + + //FRAT floating point + data = int(ceil(core_params.phy_freg_width * + (1 + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_FRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RAM_BASED_RAT_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = + NUM_SOURCE_OPERANDS * core_params.fp_decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; interface_ip.num_se_rd_ports = 0; - fRRAT = new ArrayST(&interface_ip, "Int RetireRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fRRAT->area.set_area(fRRAT->area.get_area()+ fRRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fRRAT->area.get_area()); - - //Freelist of renaming unit always RAM based - //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist - // 2)When instruction commits the Phyregisters/ROB needed to be recycled. - //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width - data = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*coredynp.num_ifreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + fFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fFRAT->area.get_area()); + + } else if ((core_params.rm_ty == CAMbased)) { + //IRAT + tag = core_params.arch_ireg_width; + //the address of CAM needed to be sent out + data = int(ceil((core_params.arch_ireg_width + 1 * + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.arch_ireg_width / BITS_PER_BYTE)); + size = data * core_params.phy_Regs_IRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = CAM_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//TODO - interface_ip.num_rd_ports = coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW -1 + XML->sys.core[ithCore].commit_width; - //every cycle, (coredynp.decodeW -1) inst may need to send back it dest tags, committW insts needs to update freelist buffers + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = core_params.decodeW; interface_ip.num_se_rd_ports = 0; - ifreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ ifreeL->area.get_area()); - - //freelist for FP - data = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*coredynp.num_ffreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + interface_ip.num_search_ports = + NUM_SOURCE_OPERANDS * core_params.decodeW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + iFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iFRAT->area.get_area()); + + //FRAT for FP + tag = core_params.arch_freg_width; + //the address of CAM needed to be sent out + data = int(ceil((core_params.arch_freg_width + 1 * + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE)); + size = data * core_params.phy_Regs_FRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = CAM_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW -1 + XML->sys.core[ithCore].commit_width; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = core_params.fp_decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; interface_ip.num_se_rd_ports = 0; - ffreeL = new ArrayST(&interface_ip, "Int Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - ffreeL->area.set_area(ffreeL->area.get_area()+ ffreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ ffreeL->area.get_area()); - - idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); - - } - else if (coredynp.scheu_ty==ReservationStation){ - if (coredynp.rm_ty ==RAMbased){ - /* - * however, RAT needs to do associative search in RAT, when instruction commits and ROB release the entry, - * to make sure all the renamings associated with the ROB to be released are updated to ARF at the same time. - * RAM based RAT for RS base OOO does not save the search operations. Its advantage is to have less entries than - * CAM based RAT so that it is more scalable as number of ROB/physical regs increases. - */ - tag = coredynp.phy_ireg_width; - data = int(ceil(coredynp.phy_ireg_width*(1+coredynp.globalCheckpoint)/8.0)); - out_w = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_IRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.decodeW; - interface_ip.num_wr_ports = coredynp.decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= coredynp.commitW;//TODO - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->local_result.adjust_area(); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iFRAT->area.get_area()); - - //FP - tag = coredynp.phy_freg_width; - data = int(ceil(coredynp.phy_freg_width*(1+coredynp.globalCheckpoint)/8.0)); - out_w = int(ceil(coredynp.phy_freg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].archi_Regs_FRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//the extra one port is for GCs - interface_ip.num_rd_ports = 2*coredynp.fp_decodeW; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= coredynp.fp_decodeW;//actually is fp commit width - fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->local_result.adjust_area(); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fFRAT->area.get_area()); - - } - else if ((coredynp.rm_ty ==CAMbased)) - { - //FRAT - tag = coredynp.arch_ireg_width; - data = int(ceil (coredynp.arch_ireg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out - out_w = int(ceil (coredynp.arch_ireg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_IRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO - interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*XML->sys.core[ithCore].decode_width; - iFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - iFRAT->area.set_area(iFRAT->area.get_area()+ iFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ iFRAT->area.get_area()); - - //FRAT - tag = coredynp.arch_freg_width; - data = int(ceil (coredynp.arch_freg_width+1*coredynp.globalCheckpoint/8.0));//the address of CAM needed to be sent out - out_w = int(ceil (coredynp.arch_freg_width/8.0)); - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*XML->sys.core[ithCore].phy_Regs_FRF_size; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = out_w*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 2; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//for GCs - interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width;//0;TODO; - interface_ip.num_wr_ports = coredynp.fp_decodeW; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports= 2*coredynp.fp_decodeW; - fFRAT = new ArrayST(&interface_ip, "Int FrontRAT", Core_device, coredynp.opt_local, coredynp.core_ty); - fFRAT->area.set_area(fFRAT->area.get_area()+ fFRAT->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ fFRAT->area.get_area()); + interface_ip.num_search_ports = + NUM_SOURCE_OPERANDS * core_params.fp_decodeW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + fFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fFRAT->area.get_area()); + } - } - //No RRAT for RS based OOO - //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified - data = int(ceil(coredynp.phy_ireg_width/8.0)); - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.line_sz = data; - interface_ip.cache_sz = data*coredynp.num_ifreelist_entries; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/clockRate; - interface_ip.latency = 1.0/clockRate; + //RRAT is always RAM based, does not have GCs, and is used only for + //record latest non-speculative mapping + data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_IRF_size * + NUM_SOURCE_OPERANDS; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RAM_BASED_RAT_ASSOC; + interface_ip.nbanks = core_params.retire_rat_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.retire_rat_rw_ports; + interface_ip.num_rd_ports = core_params.commitW; + interface_ip.num_wr_ports = core_params.commitW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iRRAT = new ArrayST(xml_data, &interface_ip, "Int Retire RAT", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + iRRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iRRAT->area.get_area()); + + //RRAT for FP + data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_FRF_size * + NUM_SOURCE_OPERANDS; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RAM_BASED_RAT_ASSOC; + interface_ip.nbanks = core_params.retire_rat_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.retire_rat_rw_ports; + interface_ip.num_rd_ports = core_params.fp_decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fRRAT = new ArrayST(xml_data, &interface_ip, "FP Retire RAT", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + fRRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fRRAT->area.get_area()); + + //Freelist of renaming unit always RAM based + //Recycle happens at two places: 1)when DCL check there are WAW, the Phyregisters/ROB directly recycles into freelist + // 2)When instruction commits the Phyregisters/ROB needed to be recycled. + //therefore num_wr port = decode-1(-1 means at least one phy reg will be used for the current renaming group) + commit width + data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); + size = data * core_params.num_ifreelist_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = FREELIST_ASSOC; + interface_ip.nbanks = core_params.freelist_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.freelist_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = + core_params.decodeW - 1 + core_params.commitW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + ifreeL = new ArrayST(xml_data, &interface_ip, "Integer Free List", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + ifreeL->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + ifreeL->area.get_area()); + + //freelist for FP + data = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); + size = data * core_params.num_ffreelist_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = FREELIST_ASSOC; + interface_ip.nbanks = core_params.freelist_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.freelist_rw_ports; + interface_ip.num_rd_ports = core_params.fp_decodeW; + interface_ip.num_wr_ports = + core_params.fp_decodeW - 1 + core_params.commitW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + ffreeL = new ArrayST(xml_data, &interface_ip, "FP Free List", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + ffreeL->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + ffreeL->area.get_area()); + + } else if (core_params.scheu_ty == ReservationStation) { + if (core_params.rm_ty == RAMbased) { + tag = core_params.phy_ireg_width; + data = int(ceil(core_params.phy_ireg_width * + (1 + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_IRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RS_RAT_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = + NUM_SOURCE_OPERANDS * core_params.decodeW; + interface_ip.num_wr_ports = core_params.decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.commitW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + iFRAT->local_result.adjust_area(); + iFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iFRAT->area.get_area()); + + //FP + tag = core_params.phy_freg_width; + data = int(ceil(core_params.phy_freg_width * + (1 + core_params.globalCheckpoint) / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.phy_freg_width / BITS_PER_BYTE)); + size = data * core_params.archi_Regs_FRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = RS_RAT_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = + NUM_SOURCE_OPERANDS * core_params.fp_decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = core_params.fp_issueW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + fFRAT->local_result.adjust_area(); + fFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fFRAT->area.get_area()); + + } else if ((core_params.rm_ty == CAMbased)) { + //FRAT + //the address of CAM needed to be sent out + tag = core_params.arch_ireg_width; + data = int(ceil (core_params.arch_ireg_width + + 1 * core_params.globalCheckpoint / + BITS_PER_BYTE)); + out_w = int(ceil (core_params.arch_ireg_width / + BITS_PER_BYTE)); + size = data * core_params.phy_Regs_IRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = CAM_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = core_params.decodeW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = + NUM_SOURCE_OPERANDS * core_params.decodeW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + iFRAT = new ArrayST(xml_data, &interface_ip, "Int Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + iFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + iFRAT->area.get_area()); + + //FRAT + tag = core_params.arch_freg_width; + //the address of CAM needed to be sent out + data = int(ceil(core_params.arch_freg_width + + 1 * core_params.globalCheckpoint / + BITS_PER_BYTE)); + out_w = int(ceil(core_params.arch_freg_width / BITS_PER_BYTE)); + size = data * core_params.phy_Regs_FRF_size; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = CAM_ASSOC; + interface_ip.nbanks = core_params.front_rat_nbanks; + interface_ip.out_w = out_w * BITS_PER_BYTE; + interface_ip.specific_tag = tag > 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Fast; interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_dyn_power = 0; interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//TODO - interface_ip.num_rd_ports = XML->sys.core[ithCore].decode_width; - interface_ip.num_wr_ports = XML->sys.core[ithCore].decode_width -1 + XML->sys.core[ithCore].commit_width; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.front_rat_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = core_params.fp_decodeW; interface_ip.num_se_rd_ports = 0; - ifreeL = new ArrayST(&interface_ip, "Unified Free List", Core_device, coredynp.opt_local, coredynp.core_ty); - ifreeL->area.set_area(ifreeL->area.get_area()+ ifreeL->local_result.area*XML->sys.core[ithCore].number_hardware_threads); - area.set_area(area.get_area()+ ifreeL->area.get_area()); + interface_ip.num_search_ports = + NUM_SOURCE_OPERANDS * core_params.fp_decodeW; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + fFRAT = new ArrayST(xml_data, &interface_ip, "FP Front RAT", + Core_device, clockRate, + core_params.opt_local, + core_params.core_ty); + fFRAT->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + fFRAT->area.get_area()); - idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); + } + //No RRAT for RS based OOO + //Freelist of renaming unit of RS based OOO is unifed for both int and fp renaming unit since the ROB is unified + data = int(ceil(core_params.phy_ireg_width / BITS_PER_BYTE)); + size = data * core_params.num_ifreelist_entries; + + interface_ip.cache_sz = size; + interface_ip.line_sz = data; + interface_ip.assoc = FREELIST_ASSOC; + interface_ip.nbanks = core_params.freelist_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = NON_CAM_BASED_TAG_WIDTH > 0; + interface_ip.tag_w = NON_CAM_BASED_TAG_WIDTH; + interface_ip.access_mode = Fast; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = core_params.freelist_rw_ports; + interface_ip.num_rd_ports = core_params.decodeW; + interface_ip.num_wr_ports = + core_params.decodeW - 1 + core_params.commitW; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / clockRate; + interface_ip.latency = 1.0 / clockRate; + ifreeL = new ArrayST(xml_data, &interface_ip, "Unified Free List", + Core_device, clockRate, core_params.opt_local, + core_params.core_ty); + ifreeL->output_data.area *= core_params.num_hthreads; + area.set_area(area.get_area() + ifreeL->area.get_area()); } -} - if (coredynp.core_ty==Inorder&& coredynp.issueW>1) - { - /* Dependency check logic will only present when decode(issue) width>1. - * Multiple issue in order processor can do without renaming, but dcl is a must. - */ - idcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_ireg_width);//TODO:Separate 2 sections See TR - fdcl = new dep_resource_conflict_check(&interface_ip,coredynp,coredynp.phy_freg_width); } + idcl = + new dep_resource_conflict_check(xml_data, + "Instruction Dependency Check?", + &interface_ip, core_params, + core_params.phy_ireg_width, + clockRate); + fdcl = + new dep_resource_conflict_check(xml_data, + "FP Dependency Check?", &interface_ip, + core_params, + core_params.phy_freg_width, clockRate); } -Core::Core(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - ifu (0), - lsu (0), - mmu (0), - exu (0), - rnu (0), - corepipe (0), - undiffCore (0), - l2cache (0) -{ - /* - * initialize, compute and optimize individual components. - */ - - double pipeline_area_per_unit; - if (XML->sys.Private_L2) - { - l2cache = new SharedCache(XML,ithCore, &interface_ip); - - } -// interface_ip.wire_is_mat_type = 2; -// interface_ip.wire_os_mat_type = 2; -// interface_ip.wt =Global_30; - set_core_param(); - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - ifu = new InstFetchU(XML, ithCore, &interface_ip,coredynp); - lsu = new LoadStoreU(XML, ithCore, &interface_ip,coredynp); - mmu = new MemManU (XML, ithCore, &interface_ip,coredynp); - exu = new EXECU (XML, ithCore, &interface_ip,lsu->lsq_height, coredynp); - undiffCore = new UndiffCore(XML, ithCore, &interface_ip,coredynp); - if (coredynp.core_ty==OOO) - { - rnu = new RENAMINGU(XML, ithCore, &interface_ip,coredynp); - } - corepipe = new Pipeline(&interface_ip,coredynp); - - if (coredynp.core_ty==OOO) - { - pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/5.0; - if (rnu->exist) - { - rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); - } - } - else { - pipeline_area_per_unit = (corepipe->area.get_area()*coredynp.num_pipelines)/4.0; - } - - //area.set_area(area.get_area()+ corepipe->area.get_area()); - if (ifu->exist) - { - ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + ifu->area.get_area()); - } - if (lsu->exist) - { - lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area() + lsu->area.get_area()); - } - if (exu->exist) - { - exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area()+exu->area.get_area()); - } - if (mmu->exist) - { - mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); - area.set_area(area.get_area()+mmu->area.get_area()); - } - - if (coredynp.core_ty==OOO) - { - if (rnu->exist) - { - - area.set_area(area.get_area() + rnu->area.get_area()); - } - } - - if (undiffCore->exist) - { - area.set_area(area.get_area() + undiffCore->area.get_area()); - } - - if (XML->sys.Private_L2) - { - area.set_area(area.get_area() + l2cache->area.get_area()); - - } -// //clock power -// clockNetwork.init_wire_external(is_default, &interface_ip); -// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb -// clockNetwork.end_wiring_level =5;//toplevel metal -// clockNetwork.start_wiring_level =5;//toplevel metal -// clockNetwork.num_regs = corepipe.tot_stage_vector; -// clockNetwork.optimize_wire(); -} +Core::Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_) + : McPATComponent(_xml_data), ifu(NULL), lsu(NULL), mmu(NULL), + exu(NULL), rnu(NULL), corepipe (NULL), undiffCore(NULL), l2cache (NULL), + ithCore(_ithCore), interface_ip(*interface_ip_) { + + ostringstream os; + os << ithCore; + name = "Core " + os.str(); + + int i = 0; + XMLNode* childXML; + for (i = 0; i < xml_data->nChildNode("component"); i++) { + childXML = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = childXML->getAttribute("type"); + if (!type) + warnMissingComponentType(childXML->getAttribute("id")); + + STRCMP(type, "CacheUnit") { + XMLCSTR comp_name = childXML->getAttribute("id"); + if (!comp_name) + continue; + + STRCMP(comp_name, "system.L20") { + l2cache = new CacheUnit(childXML, &interface_ip); + children.push_back(l2cache); + } + } + } + set_core_param(); + clockRate = core_params.clockRate; + + ifu = new InstFetchU(xml_data, &interface_ip, core_params, + core_stats); + children.push_back(ifu); + lsu = new LoadStoreU(xml_data, &interface_ip, core_params, + core_stats); + children.push_back(lsu); + mmu = new MemManU(xml_data, &interface_ip, core_params, + core_stats); + children.push_back(mmu); + exu = new EXECU(xml_data, &interface_ip, lsu->lsq_height, + core_params, core_stats); + children.push_back(exu); + undiffCore = new UndiffCore(xml_data, &interface_ip, core_params); + children.push_back(undiffCore); + if (core_params.core_ty == OOO) { + rnu = new RENAMINGU(xml_data, &interface_ip, core_params, + core_stats); + children.push_back(rnu); + } + corepipe = new Pipeline(xml_data, &interface_ip, core_params); + children.push_back(corepipe); + + double pipeline_area_per_unit; + if (core_params.core_ty == OOO) { + pipeline_area_per_unit = (corepipe->area.get_area() * + core_params.num_pipelines) / 5.0; + if (rnu->exist) { + rnu->area.set_area(rnu->area.get_area() + pipeline_area_per_unit); + } + } else { + pipeline_area_per_unit = (corepipe->area.get_area() * + core_params.num_pipelines) / 4.0; + } -void BranchPredictor::computeEnergy(bool is_tdp) -{ - if (!exist) return; - double r_access; - double w_access; - if (is_tdp) - { - r_access = coredynp.predictionW*coredynp.BR_duty_cycle; - w_access = 0*coredynp.BR_duty_cycle; - globalBPT->stats_t.readAc.access = r_access; - globalBPT->stats_t.writeAc.access = w_access; - globalBPT->tdp_stats = globalBPT->stats_t; - - L1_localBPT->stats_t.readAc.access = r_access; - L1_localBPT->stats_t.writeAc.access = w_access; - L1_localBPT->tdp_stats = L1_localBPT->stats_t; - - L2_localBPT->stats_t.readAc.access = r_access; - L2_localBPT->stats_t.writeAc.access = w_access; - L2_localBPT->tdp_stats = L2_localBPT->stats_t; - - chooser->stats_t.readAc.access = r_access; - chooser->stats_t.writeAc.access = w_access; - chooser->tdp_stats = chooser->stats_t; - - RAS->stats_t.readAc.access = r_access; - RAS->stats_t.writeAc.access = w_access; - RAS->tdp_stats = RAS->stats_t; - } - else - { - //The resolution of BPT accesses is coarse, but this is - //because most simulators cannot track finer grained details - r_access = XML->sys.core[ithCore].branch_instructions; - w_access = XML->sys.core[ithCore].branch_mispredictions + 0.1*XML->sys.core[ithCore].branch_instructions;//10% of BR will flip internal bits//0 - globalBPT->stats_t.readAc.access = r_access; - globalBPT->stats_t.writeAc.access = w_access; - globalBPT->rtp_stats = globalBPT->stats_t; - - L1_localBPT->stats_t.readAc.access = r_access; - L1_localBPT->stats_t.writeAc.access = w_access; - L1_localBPT->rtp_stats = L1_localBPT->stats_t; - - L2_localBPT->stats_t.readAc.access = r_access; - L2_localBPT->stats_t.writeAc.access = w_access; - L2_localBPT->rtp_stats = L2_localBPT->stats_t; - - chooser->stats_t.readAc.access = r_access; - chooser->stats_t.writeAc.access = w_access; - chooser->rtp_stats = chooser->stats_t; - - RAS->stats_t.readAc.access = XML->sys.core[ithCore].function_calls; - RAS->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls; - RAS->rtp_stats = RAS->stats_t; - } - - globalBPT->power_t.reset(); - L1_localBPT->power_t.reset(); - L2_localBPT->power_t.reset(); - chooser->power_t.reset(); - RAS->power_t.reset(); - - globalBPT->power_t.readOp.dynamic += globalBPT->local_result.power.readOp.dynamic*globalBPT->stats_t.readAc.access + - globalBPT->stats_t.writeAc.access*globalBPT->local_result.power.writeOp.dynamic; - L1_localBPT->power_t.readOp.dynamic += L1_localBPT->local_result.power.readOp.dynamic*L1_localBPT->stats_t.readAc.access + - L1_localBPT->stats_t.writeAc.access*L1_localBPT->local_result.power.writeOp.dynamic; - - L2_localBPT->power_t.readOp.dynamic += L2_localBPT->local_result.power.readOp.dynamic*L2_localBPT->stats_t.readAc.access + - L2_localBPT->stats_t.writeAc.access*L2_localBPT->local_result.power.writeOp.dynamic; - - chooser->power_t.readOp.dynamic += chooser->local_result.power.readOp.dynamic*chooser->stats_t.readAc.access + - chooser->stats_t.writeAc.access*chooser->local_result.power.writeOp.dynamic; - RAS->power_t.readOp.dynamic += RAS->local_result.power.readOp.dynamic*RAS->stats_t.readAc.access + - RAS->stats_t.writeAc.access*RAS->local_result.power.writeOp.dynamic; - - if (is_tdp) - { - globalBPT->power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg; - L1_localBPT->power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg; - L2_localBPT->power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg; - chooser->power = chooser->power_t + chooser->local_result.power*pppm_lkg; - RAS->power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread; - - power = power + globalBPT->power + L1_localBPT->power + chooser->power + RAS->power; - } - else - { - globalBPT->rt_power = globalBPT->power_t + globalBPT->local_result.power*pppm_lkg; - L1_localBPT->rt_power = L1_localBPT->power_t + L1_localBPT->local_result.power*pppm_lkg; - L2_localBPT->rt_power = L2_localBPT->power_t + L2_localBPT->local_result.power*pppm_lkg; - chooser->rt_power = chooser->power_t + chooser->local_result.power*pppm_lkg; - RAS->rt_power = RAS->power_t + RAS->local_result.power*coredynp.pppm_lkg_multhread; - rt_power = rt_power + globalBPT->rt_power + L1_localBPT->rt_power + chooser->rt_power + RAS->rt_power; + // Move all of this to computeArea + //area.set_area(area.get_area()+ corepipe->area.get_area()); + if (ifu->exist) { + ifu->area.set_area(ifu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + ifu->area.get_area()); + } + if (lsu->exist) { + lsu->area.set_area(lsu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + lsu->area.get_area()); + } + if (exu->exist) { + exu->area.set_area(exu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + exu->area.get_area()); + } + if (mmu->exist) { + mmu->area.set_area(mmu->area.get_area() + pipeline_area_per_unit); + area.set_area(area.get_area() + mmu->area.get_area()); } -} -void BranchPredictor::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - if (is_tdp) - { - cout << indent_str<< "Global Predictor:" << endl; - cout << indent_str_next << "Area = " << globalBPT->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << globalBPT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? globalBPT->power.readOp.longer_channel_leakage:globalBPT->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << globalBPT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << globalBPT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <exist) { -} - -void InstFetchU::computeEnergy(bool is_tdp) -{ - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - icache.caches->stats_t.readAc.access = icache.caches->l_ip.num_rw_ports*coredynp.IFU_duty_cycle; - icache.caches->stats_t.readAc.miss = 0; - icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss; - icache.caches->tdp_stats = icache.caches->stats_t; - - icache.missb->stats_t.readAc.access = icache.missb->stats_t.readAc.hit= icache.missb->l_ip.num_search_ports; - icache.missb->stats_t.writeAc.access = icache.missb->stats_t.writeAc.hit= icache.missb->l_ip.num_search_ports; - icache.missb->tdp_stats = icache.missb->stats_t; - - icache.ifb->stats_t.readAc.access = icache.ifb->stats_t.readAc.hit= icache.ifb->l_ip.num_search_ports; - icache.ifb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports; - icache.ifb->tdp_stats = icache.ifb->stats_t; - - icache.prefetchb->stats_t.readAc.access = icache.prefetchb->stats_t.readAc.hit= icache.prefetchb->l_ip.num_search_ports; - icache.prefetchb->stats_t.writeAc.access = icache.ifb->stats_t.writeAc.hit= icache.ifb->l_ip.num_search_ports; - icache.prefetchb->tdp_stats = icache.prefetchb->stats_t; - - IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].peak_issue_width; - IB->tdp_stats = IB->stats_t; - - if (coredynp.predictionW>0) - { - BTB->stats_t.readAc.access = coredynp.predictionW;//XML->sys.core[ithCore].BTB.read_accesses; - BTB->stats_t.writeAc.access = 0;//XML->sys.core[ithCore].BTB.write_accesses; + area.set_area(area.get_area() + rnu->area.get_area()); } + } - ID_inst->stats_t.readAc.access = coredynp.decodeW; - ID_operand->stats_t.readAc.access = coredynp.decodeW; - ID_misc->stats_t.readAc.access = coredynp.decodeW; - ID_inst->tdp_stats = ID_inst->stats_t; - ID_operand->tdp_stats = ID_operand->stats_t; - ID_misc->tdp_stats = ID_misc->stats_t; - + if (undiffCore->exist) { + area.set_area(area.get_area() + undiffCore->area.get_area()); + } + if (l2cache) { + area.set_area(area.get_area() + l2cache->area.get_area()); } - else - { - //init stats for Runtime Dynamic (RTP) - icache.caches->stats_t.readAc.access = XML->sys.core[ithCore].icache.read_accesses; - icache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].icache.read_misses; - icache.caches->stats_t.readAc.hit = icache.caches->stats_t.readAc.access - icache.caches->stats_t.readAc.miss; - icache.caches->rtp_stats = icache.caches->stats_t; +} - icache.missb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.missb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.missb->rtp_stats = icache.missb->stats_t; - icache.ifb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.ifb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.ifb->rtp_stats = icache.ifb->stats_t; +void BranchPredictor::computeEnergy() { + if (!exist) return; + + // ASSUMPTION: All instructions access the branch predictors at Fetch and + // only branch instrucions update the predictors regardless + // of the correctness of the prediction. + double tdp_read_accesses = + core_params.predictionW * core_stats.BR_duty_cycle; + globalBPT->tdp_stats.reset(); + globalBPT->tdp_stats.readAc.access = tdp_read_accesses; + globalBPT->tdp_stats.writeAc.access = 0; + globalBPT->rtp_stats.reset(); + globalBPT->rtp_stats.readAc.access = core_stats.total_instructions; + globalBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; + globalBPT->power_t.reset(); + globalBPT->power_t.readOp.dynamic += + globalBPT->local_result.power.readOp.dynamic * + globalBPT->tdp_stats.readAc.access + + globalBPT->local_result.power.writeOp.dynamic * + globalBPT->tdp_stats.writeAc.access; + globalBPT->power_t = globalBPT->power_t + + globalBPT->local_result.power * pppm_lkg; + globalBPT->rt_power.reset(); + globalBPT->rt_power.readOp.dynamic += + globalBPT->local_result.power.readOp.dynamic * + globalBPT->rtp_stats.readAc.access + + globalBPT->local_result.power.writeOp.dynamic * + globalBPT->rtp_stats.writeAc.access; + + L1_localBPT->tdp_stats.reset(); + L1_localBPT->tdp_stats.readAc.access = tdp_read_accesses; + L1_localBPT->tdp_stats.writeAc.access = 0; + L1_localBPT->rtp_stats.reset(); + L1_localBPT->rtp_stats.readAc.access = core_stats.total_instructions; + L1_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; + L1_localBPT->power_t.reset(); + L1_localBPT->power_t.readOp.dynamic += + L1_localBPT->local_result.power.readOp.dynamic * + L1_localBPT->tdp_stats.readAc.access + + L1_localBPT->local_result.power.writeOp.dynamic * + L1_localBPT->tdp_stats.writeAc.access; + L1_localBPT->power_t = L1_localBPT->power_t + + L1_localBPT->local_result.power * pppm_lkg; + L1_localBPT->rt_power.reset(); + L1_localBPT->rt_power.readOp.dynamic += + L1_localBPT->local_result.power.readOp.dynamic * + L1_localBPT->rtp_stats.readAc.access + + L1_localBPT->local_result.power.writeOp.dynamic * + L1_localBPT->rtp_stats.writeAc.access; + + L2_localBPT->tdp_stats.reset(); + L2_localBPT->tdp_stats.readAc.access = tdp_read_accesses; + L2_localBPT->tdp_stats.writeAc.access = 0; + L2_localBPT->rtp_stats.reset(); + L2_localBPT->rtp_stats.readAc.access = core_stats.branch_instructions; + L2_localBPT->rtp_stats.writeAc.access = core_stats.branch_instructions; + L2_localBPT->power_t.reset(); + L2_localBPT->power_t.readOp.dynamic += + L2_localBPT->local_result.power.readOp.dynamic * + L2_localBPT->tdp_stats.readAc.access + + L2_localBPT->local_result.power.writeOp.dynamic * + L2_localBPT->tdp_stats.writeAc.access; + L2_localBPT->power_t = L2_localBPT->power_t + + L2_localBPT->local_result.power * pppm_lkg; + L2_localBPT->rt_power.reset(); + L2_localBPT->rt_power.readOp.dynamic += + L2_localBPT->local_result.power.readOp.dynamic * + L2_localBPT->rtp_stats.readAc.access + + L2_localBPT->local_result.power.writeOp.dynamic * + L2_localBPT->rtp_stats.writeAc.access; + + chooser->tdp_stats.reset(); + chooser->tdp_stats.readAc.access = tdp_read_accesses; + chooser->tdp_stats.writeAc.access = 0; + chooser->rtp_stats.reset(); + chooser->rtp_stats.readAc.access = core_stats.total_instructions; + chooser->rtp_stats.writeAc.access = core_stats.branch_instructions; + chooser->power_t.reset(); + chooser->power_t.readOp.dynamic += + chooser->local_result.power.readOp.dynamic * + chooser->tdp_stats.readAc.access + + chooser->local_result.power.writeOp.dynamic * + chooser->tdp_stats.writeAc.access; + chooser->power_t = + chooser->power_t + chooser->local_result.power * pppm_lkg; + chooser->rt_power.reset(); + chooser->rt_power.readOp.dynamic += + chooser->local_result.power.readOp.dynamic * + chooser->rtp_stats.readAc.access + + chooser->local_result.power.writeOp.dynamic * + chooser->rtp_stats.writeAc.access; + + RAS->tdp_stats.reset(); + RAS->tdp_stats.readAc.access = tdp_read_accesses; + RAS->tdp_stats.writeAc.access = 0; + RAS->rtp_stats.reset(); + RAS->rtp_stats.readAc.access = core_stats.function_calls; + RAS->rtp_stats.writeAc.access = core_stats.function_calls; + RAS->power_t.reset(); + RAS->power_t.readOp.dynamic += + RAS->local_result.power.readOp.dynamic * RAS->tdp_stats.readAc.access + + RAS->local_result.power.writeOp.dynamic * + RAS->tdp_stats.writeAc.access; + RAS->power_t = RAS->power_t + RAS->local_result.power * + core_params.pppm_lkg_multhread; + RAS->rt_power.reset(); + RAS->rt_power.readOp.dynamic += RAS->local_result.power.readOp.dynamic * + RAS->rtp_stats.readAc.access + + RAS->local_result.power.writeOp.dynamic * + RAS->rtp_stats.writeAc.access; + + output_data.reset(); + if (globalBPT) { + globalBPT->output_data.peak_dynamic_power = + globalBPT->power_t.readOp.dynamic * clockRate; + globalBPT->output_data.runtime_dynamic_energy = + globalBPT->rt_power.readOp.dynamic; + output_data += globalBPT->output_data; + } + if (L1_localBPT) { + L1_localBPT->output_data.peak_dynamic_power = + L1_localBPT->power_t.readOp.dynamic * clockRate; + L1_localBPT->output_data.runtime_dynamic_energy = + L1_localBPT->rt_power.readOp.dynamic; + output_data += L1_localBPT->output_data; + } + if (L2_localBPT) { + L2_localBPT->output_data.peak_dynamic_power = + L2_localBPT->power_t.readOp.dynamic * clockRate; + L2_localBPT->output_data.runtime_dynamic_energy = + L2_localBPT->rt_power.readOp.dynamic; + output_data += L2_localBPT->output_data; + } + if (chooser) { + chooser->output_data.peak_dynamic_power = + chooser->power_t.readOp.dynamic * clockRate; + chooser->output_data.runtime_dynamic_energy = + chooser->rt_power.readOp.dynamic; + output_data += chooser->output_data; + } + if (RAS) { + RAS->output_data.peak_dynamic_power = + RAS->power_t.readOp.dynamic * clockRate; + RAS->output_data.subthreshold_leakage_power = + RAS->power_t.readOp.leakage * core_params.num_hthreads; + RAS->output_data.gate_leakage_power = + RAS->power_t.readOp.gate_leakage * core_params.num_hthreads; + RAS->output_data.runtime_dynamic_energy = RAS->rt_power.readOp.dynamic; + output_data += RAS->output_data; + } +} - icache.prefetchb->stats_t.readAc.access = icache.caches->stats_t.readAc.miss; - icache.prefetchb->stats_t.writeAc.access = icache.caches->stats_t.readAc.miss; - icache.prefetchb->rtp_stats = icache.prefetchb->stats_t; +void BranchPredictor::displayData(uint32_t indent, int plevel) { + if (!exist) return; - IB->stats_t.readAc.access = IB->stats_t.writeAc.access = XML->sys.core[ithCore].total_instructions; - IB->rtp_stats = IB->stats_t; + McPATComponent::displayData(indent, plevel); - if (coredynp.predictionW>0) - { - BTB->stats_t.readAc.access = XML->sys.core[ithCore].BTB.read_accesses;//XML->sys.core[ithCore].branch_instructions; - BTB->stats_t.writeAc.access = XML->sys.core[ithCore].BTB.write_accesses;//XML->sys.core[ithCore].branch_mispredictions; - BTB->rtp_stats = BTB->stats_t; - } + globalBPT->displayData(indent + 4, plevel); + L1_localBPT->displayData(indent + 4, plevel); + L2_localBPT->displayData(indent + 4, plevel); + chooser->displayData(indent + 4, plevel); + RAS->displayData(indent + 4, plevel); +} - ID_inst->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_operand->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_misc->stats_t.readAc.access = XML->sys.core[ithCore].total_instructions; - ID_inst->rtp_stats = ID_inst->stats_t; - ID_operand->rtp_stats = ID_operand->stats_t; - ID_misc->rtp_stats = ID_misc->stats_t; +void InstFetchU::computeEnergy() { + if (!exist) return; + if (BPT) { + BPT->computeEnergy(); } - icache.power_t.reset(); + IB->tdp_stats.reset(); + IB->tdp_stats.readAc.access = core_params.peak_issueW; + IB->tdp_stats.writeAc.access = core_params.peak_issueW; + IB->rtp_stats.reset(); + IB->rtp_stats.readAc.access = core_stats.total_instructions; + IB->rtp_stats.writeAc.access = core_stats.total_instructions; IB->power_t.reset(); -// ID_inst->power_t.reset(); -// ID_operand->power_t.reset(); -// ID_misc->power_t.reset(); - if (coredynp.predictionW>0) - { + IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic * + IB->tdp_stats.readAc.access + + IB->local_result.power.writeOp.dynamic * IB->tdp_stats.writeAc.access; + IB->power_t = IB->power_t + IB->local_result.power * pppm_lkg; + IB->rt_power.reset(); + IB->rt_power.readOp.dynamic += IB->local_result.power.readOp.dynamic * + IB->rtp_stats.readAc.access + + IB->local_result.power.writeOp.dynamic * IB->rtp_stats.writeAc.access; + + if (core_params.predictionW > 0) { + BTB->tdp_stats.reset(); + BTB->tdp_stats.readAc.access = core_params.predictionW; + BTB->tdp_stats.writeAc.access = 0; + BTB->rtp_stats.reset(); + BTB->rtp_stats.readAc.access = inst_fetch_stats.btb_read_accesses; + BTB->rtp_stats.writeAc.access = inst_fetch_stats.btb_write_accesses; BTB->power_t.reset(); + BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic * + BTB->tdp_stats.readAc.access + + BTB->local_result.power.writeOp.dynamic * + BTB->tdp_stats.writeAc.access; + BTB->rt_power.reset(); + BTB->rt_power.readOp.dynamic += + BTB->local_result.power.readOp.dynamic * + BTB->rtp_stats.readAc.access + + BTB->local_result.power.writeOp.dynamic * + BTB->rtp_stats.writeAc.access; } - icache.power_t.readOp.dynamic += (icache.caches->stats_t.readAc.hit*icache.caches->local_result.power.readOp.dynamic+ - //icache.caches->stats_t.readAc.miss*icache.caches->local_result.tag_array2->power.readOp.dynamic+ - icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.readOp.dynamic+ //assume tag data accessed in parallel - icache.caches->stats_t.readAc.miss*icache.caches->local_result.power.writeOp.dynamic); //read miss in Icache cause a write to Icache - icache.power_t.readOp.dynamic += icache.missb->stats_t.readAc.access*icache.missb->local_result.power.searchOp.dynamic + - icache.missb->stats_t.writeAc.access*icache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write - icache.power_t.readOp.dynamic += icache.ifb->stats_t.readAc.access*icache.ifb->local_result.power.searchOp.dynamic + - icache.ifb->stats_t.writeAc.access*icache.ifb->local_result.power.writeOp.dynamic; - icache.power_t.readOp.dynamic += icache.prefetchb->stats_t.readAc.access*icache.prefetchb->local_result.power.searchOp.dynamic + - icache.prefetchb->stats_t.writeAc.access*icache.prefetchb->local_result.power.writeOp.dynamic; + ID_inst->tdp_stats.reset(); + ID_inst->tdp_stats.readAc.access = core_params.decodeW; + ID_inst->power_t.reset(); + ID_inst->power_t = ID_misc->power; + ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic * + ID_inst->tdp_stats.readAc.access; + ID_inst->rtp_stats.reset(); + ID_inst->rtp_stats.readAc.access = core_stats.total_instructions; + ID_inst->rt_power.reset(); + ID_inst->rt_power.readOp.dynamic = ID_inst->power.readOp.dynamic * + ID_inst->rtp_stats.readAc.access; + + ID_operand->tdp_stats.reset(); + ID_operand->tdp_stats.readAc.access = core_params.decodeW; + ID_operand->power_t.reset(); + ID_operand->power_t = ID_misc->power; + ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic * + ID_operand->tdp_stats.readAc.access; + ID_operand->rtp_stats.reset(); + ID_operand->rtp_stats.readAc.access = core_stats.total_instructions; + ID_operand->rt_power.reset(); + ID_operand->rt_power.readOp.dynamic = ID_operand->power.readOp.dynamic * + ID_operand->rtp_stats.readAc.access; + + ID_misc->tdp_stats.reset(); + ID_misc->tdp_stats.readAc.access = core_params.decodeW; + ID_misc->power_t.reset(); + ID_misc->power_t = ID_misc->power; + ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic * + ID_misc->tdp_stats.readAc.access; + ID_misc->rtp_stats.reset(); + ID_misc->rtp_stats.readAc.access = core_stats.total_instructions; + ID_misc->rt_power.reset(); + ID_misc->rt_power.readOp.dynamic = ID_misc->power.readOp.dynamic * + ID_misc->rtp_stats.readAc.access; + + power.reset(); + rt_power.reset(); + McPATComponent::computeEnergy(); + + output_data.reset(); + if (icache) { + output_data += icache->output_data; + } + if (IB) { + IB->output_data.peak_dynamic_power = + IB->power_t.readOp.dynamic * clockRate; + IB->output_data.runtime_dynamic_energy = IB->rt_power.readOp.dynamic; + output_data += IB->output_data; + } + if (BTB) { + BTB->output_data.peak_dynamic_power = + BTB->power_t.readOp.dynamic * clockRate; + BTB->output_data.runtime_dynamic_energy = BTB->rt_power.readOp.dynamic; + output_data += BTB->output_data; + } + if (BPT) { + output_data += BPT->output_data; + } + if (ID_inst) { + ID_inst->output_data.peak_dynamic_power = + ID_inst->power_t.readOp.dynamic * clockRate; + ID_inst->output_data.runtime_dynamic_energy = + ID_inst->rt_power.readOp.dynamic; + output_data += ID_inst->output_data; + } + if (ID_operand) { + ID_operand->output_data.peak_dynamic_power = + ID_operand->power_t.readOp.dynamic * clockRate; + ID_operand->output_data.runtime_dynamic_energy = + ID_operand->rt_power.readOp.dynamic; + output_data += ID_operand->output_data; + } + if (ID_misc) { + ID_misc->output_data.peak_dynamic_power = + ID_misc->power_t.readOp.dynamic * clockRate; + ID_misc->output_data.runtime_dynamic_energy = + ID_misc->rt_power.readOp.dynamic; + output_data += ID_misc->output_data; + } +} - IB->power_t.readOp.dynamic += IB->local_result.power.readOp.dynamic*IB->stats_t.readAc.access + - IB->stats_t.writeAc.access*IB->local_result.power.writeOp.dynamic; +void InstFetchU::displayData(uint32_t indent, int plevel) { + if (!exist) return; - if (coredynp.predictionW>0) - { - BTB->power_t.readOp.dynamic += BTB->local_result.power.readOp.dynamic*BTB->stats_t.readAc.access + - BTB->stats_t.writeAc.access*BTB->local_result.power.writeOp.dynamic; + McPATComponent::displayData(indent, plevel); - BPT->computeEnergy(is_tdp); + if (core_params.predictionW > 0) { + BTB->displayData(indent + 4, plevel); + if (BPT->exist) { + BPT->displayData(indent + 4, plevel); } + } + IB->displayData(indent + 4, plevel); + ID_inst->displayData(indent + 4, plevel); + ID_operand->displayData(indent + 4, plevel); + ID_misc->displayData(indent + 4, plevel); +} - if (is_tdp) - { -// icache.power = icache.power_t + -// (icache.caches->local_result.power)*pppm_lkg + -// (icache.missb->local_result.power + -// icache.ifb->local_result.power + -// icache.prefetchb->local_result.power)*pppm_Isub; - icache.power = icache.power_t + - (icache.caches->local_result.power + - icache.missb->local_result.power + - icache.ifb->local_result.power + - icache.prefetchb->local_result.power)*pppm_lkg; - - IB->power = IB->power_t + IB->local_result.power*pppm_lkg; - power = power + icache.power + IB->power; - if (coredynp.predictionW>0) - { - BTB->power = BTB->power_t + BTB->local_result.power*pppm_lkg; - power = power + BTB->power + BPT->power; - } +void RENAMINGU::computeEnergy() { + if (!exist) return; + + idcl->tdp_stats.reset(); + idcl->rtp_stats.reset(); + idcl->power_t.reset(); + idcl->rt_power.reset(); + if (core_params.core_ty == OOO) { + idcl->tdp_stats.readAc.access = core_params.decodeW; + idcl->rtp_stats.readAc.access = 3 * core_params.decodeW * + core_params.decodeW * core_stats.rename_reads; + } else if (core_params.issueW > 1) { + idcl->tdp_stats.readAc.access = core_params.decodeW; + idcl->rtp_stats.readAc.access = 2 * core_stats.int_instructions; + } + idcl->power_t.readOp.dynamic = idcl->tdp_stats.readAc.access * + idcl->power.readOp.dynamic; + idcl->power_t.readOp.leakage = idcl->power.readOp.leakage * + core_params.num_hthreads; + idcl->power_t.readOp.gate_leakage = idcl->power.readOp.gate_leakage * + core_params.num_hthreads; + idcl->rt_power.readOp.dynamic = idcl->rtp_stats.readAc.access * + idcl->power.readOp.dynamic; + + fdcl->tdp_stats.reset(); + fdcl->rtp_stats.reset(); + fdcl->power_t.reset(); + fdcl->rt_power.reset(); + if (core_params.core_ty == OOO) { + fdcl->tdp_stats.readAc.access = core_params.decodeW; + fdcl->rtp_stats.readAc.access = 3 * core_params.fp_issueW * + core_params.fp_issueW * core_stats.fp_rename_writes; + } else if (core_params.issueW > 1) { + fdcl->tdp_stats.readAc.access = core_params.decodeW; + fdcl->rtp_stats.readAc.access = core_stats.fp_instructions; + } + fdcl->power_t.readOp.dynamic = fdcl->tdp_stats.readAc.access * + fdcl->power.readOp.dynamic; + fdcl->power_t.readOp.leakage = fdcl->power.readOp.leakage * + core_params.num_hthreads; + fdcl->power_t.readOp.gate_leakage = fdcl->power.readOp.gate_leakage * + core_params.num_hthreads; + fdcl->rt_power.readOp.dynamic = fdcl->rtp_stats.readAc.access * + fdcl->power.readOp.dynamic; + + if (iRRAT) { + iRRAT->tdp_stats.reset(); + iRRAT->tdp_stats.readAc.access = iRRAT->l_ip.num_rd_ports; + iRRAT->tdp_stats.writeAc.access = iRRAT->l_ip.num_wr_ports; + iRRAT->rtp_stats.reset(); + iRRAT->rtp_stats.readAc.access = core_stats.rename_writes; + iRRAT->rtp_stats.writeAc.access = core_stats.rename_writes; + iRRAT->power_t.reset(); + iRRAT->power_t.readOp.dynamic += + iRRAT->tdp_stats.readAc.access * iRRAT->power.readOp.dynamic + + iRRAT->tdp_stats.writeAc.access * iRRAT->power.writeOp.dynamic; + iRRAT->rt_power.reset(); + iRRAT->rt_power.readOp.dynamic += + iRRAT->rtp_stats.readAc.access * iRRAT->power.readOp.dynamic + + iRRAT->rtp_stats.writeAc.access * iRRAT->power.writeOp.dynamic; + iRRAT->power_t.readOp.leakage = + iRRAT->power.readOp.leakage * core_params.num_hthreads; + iRRAT->power_t.readOp.gate_leakage = + iRRAT->power.readOp.gate_leakage * core_params.num_hthreads; + } - ID_inst->power_t.readOp.dynamic = ID_inst->power.readOp.dynamic; - ID_operand->power_t.readOp.dynamic = ID_operand->power.readOp.dynamic; - ID_misc->power_t.readOp.dynamic = ID_misc->power.readOp.dynamic; - - ID_inst->power.readOp.dynamic *= ID_inst->tdp_stats.readAc.access; - ID_operand->power.readOp.dynamic *= ID_operand->tdp_stats.readAc.access; - ID_misc->power.readOp.dynamic *= ID_misc->tdp_stats.readAc.access; - - power = power + (ID_inst->power + - ID_operand->power + - ID_misc->power); - } - else - { -// icache.rt_power = icache.power_t + -// (icache.caches->local_result.power)*pppm_lkg + -// (icache.missb->local_result.power + -// icache.ifb->local_result.power + -// icache.prefetchb->local_result.power)*pppm_Isub; - - icache.rt_power = icache.power_t + - (icache.caches->local_result.power + - icache.missb->local_result.power + - icache.ifb->local_result.power + - icache.prefetchb->local_result.power)*pppm_lkg; - - IB->rt_power = IB->power_t + IB->local_result.power*pppm_lkg; - rt_power = rt_power + icache.rt_power + IB->rt_power; - if (coredynp.predictionW>0) - { - BTB->rt_power = BTB->power_t + BTB->local_result.power*pppm_lkg; - rt_power = rt_power + BTB->rt_power + BPT->rt_power; - } + if (ifreeL) { + ifreeL->tdp_stats.reset(); + ifreeL->tdp_stats.readAc.access = core_params.decodeW; + ifreeL->tdp_stats.writeAc.access = core_params.decodeW; + ifreeL->rtp_stats.reset(); + if (core_params.scheu_ty == PhysicalRegFile) { + ifreeL->rtp_stats.readAc.access = core_stats.rename_reads; + ifreeL->rtp_stats.writeAc.access = 2 * core_stats.rename_writes; + } else if (core_params.scheu_ty == ReservationStation) { + ifreeL->rtp_stats.readAc.access = + core_stats.rename_reads + core_stats.fp_rename_reads; + ifreeL->rtp_stats.writeAc.access = + 2 * (core_stats.rename_writes + core_stats.fp_rename_writes); + } + ifreeL->power_t.reset(); + ifreeL->power_t.readOp.dynamic += + ifreeL->tdp_stats.readAc.access * ifreeL->power.readOp.dynamic + + ifreeL->tdp_stats.writeAc.access * ifreeL->power.writeOp.dynamic; + ifreeL->rt_power.reset(); + ifreeL->rt_power.readOp.dynamic += + ifreeL->rtp_stats.readAc.access * ifreeL->power.readOp.dynamic + + ifreeL->rtp_stats.writeAc.access * ifreeL->power.writeOp.dynamic; + ifreeL->power_t.readOp.leakage = + ifreeL->power.readOp.leakage * core_params.num_hthreads; + ifreeL->power_t.readOp.gate_leakage = + ifreeL->power.readOp.gate_leakage * core_params.num_hthreads; + } - ID_inst->rt_power.readOp.dynamic = ID_inst->power_t.readOp.dynamic*ID_inst->rtp_stats.readAc.access; - ID_operand->rt_power.readOp.dynamic = ID_operand->power_t.readOp.dynamic * ID_operand->rtp_stats.readAc.access; - ID_misc->rt_power.readOp.dynamic = ID_misc->power_t.readOp.dynamic * ID_misc->rtp_stats.readAc.access; + if (fRRAT) { + fRRAT->tdp_stats.reset(); + fRRAT->tdp_stats.readAc.access = fRRAT->l_ip.num_rd_ports; + fRRAT->tdp_stats.writeAc.access = fRRAT->l_ip.num_wr_ports; + fRRAT->rtp_stats.reset(); + fRRAT->rtp_stats.readAc.access = core_stats.fp_rename_writes; + fRRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes; + fRRAT->power_t.reset(); + fRRAT->power_t.readOp.dynamic += + fRRAT->tdp_stats.readAc.access * fRRAT->power.readOp.dynamic + + fRRAT->tdp_stats.writeAc.access * fRRAT->power.writeOp.dynamic; + fRRAT->rt_power.reset(); + fRRAT->rt_power.readOp.dynamic += + fRRAT->rtp_stats.readAc.access * fRRAT->power.readOp.dynamic + + fRRAT->rtp_stats.writeAc.access * fRRAT->power.writeOp.dynamic; + fRRAT->power_t.readOp.leakage = + fRRAT->power.readOp.leakage * core_params.num_hthreads; + fRRAT->power_t.readOp.gate_leakage = + fRRAT->power.readOp.gate_leakage * core_params.num_hthreads; + } - rt_power = rt_power + (ID_inst->rt_power + - ID_operand->rt_power + - ID_misc->rt_power); + if (ffreeL) { + ffreeL->tdp_stats.reset(); + ffreeL->tdp_stats.readAc.access = core_params.decodeW; + ffreeL->tdp_stats.writeAc.access = core_params.decodeW; + ffreeL->rtp_stats.reset(); + ffreeL->rtp_stats.readAc.access = core_stats.fp_rename_reads; + ffreeL->rtp_stats.writeAc.access = 2 * core_stats.fp_rename_writes; + ffreeL->power_t.reset(); + ffreeL->power_t.readOp.dynamic += + ffreeL->tdp_stats.readAc.access * ffreeL->power.readOp.dynamic + + ffreeL->tdp_stats.writeAc.access * ffreeL->power.writeOp.dynamic; + ffreeL->rt_power.reset(); + ffreeL->rt_power.readOp.dynamic += + ffreeL->rtp_stats.readAc.access * ffreeL->power.readOp.dynamic + + ffreeL->rtp_stats.writeAc.access * ffreeL->power.writeOp.dynamic; + ffreeL->power_t.readOp.leakage = + ffreeL->power.readOp.leakage * core_params.num_hthreads; + ffreeL->power_t.readOp.gate_leakage = + ffreeL->power.readOp.gate_leakage * core_params.num_hthreads; } -} -void InstFetchU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - - if (is_tdp) - { - - cout << indent_str<< "Instruction Cache:" << endl; - cout << indent_str_next << "Area = " << icache.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << icache.power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? icache.power.readOp.longer_channel_leakage:icache.power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << icache.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << icache.rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <0) - { - cout << indent_str<< "Branch Target Buffer:" << endl; - cout << indent_str_next << "Area = " << BTB->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << BTB->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? BTB->power.readOp.longer_channel_leakage:BTB->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << BTB->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << BTB->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <exist) - { - cout << indent_str<< "Branch Predictor:" << endl; - cout << indent_str_next << "Area = " << BPT->area.get_area() *1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << BPT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? BPT->power.readOp.longer_channel_leakage:BPT->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << BPT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << BPT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <3) - { - BPT->displayEnergy(indent+4, plevel, is_tdp); - } - } - } - cout << indent_str<< "Instruction Buffer:" << endl; - cout << indent_str_next << "Area = " << IB->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << IB->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? IB->power.readOp.longer_channel_leakage:IB->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << IB->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << IB->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <tdp_stats.readAc.access = iFRAT->l_ip.num_rd_ports; + iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports; + iFRAT->tdp_stats.searchAc.access = iFRAT->l_ip.num_search_ports; + } else if ((core_params.rm_ty == CAMbased)) { + iFRAT->tdp_stats.readAc.access = iFRAT->l_ip.num_search_ports; + iFRAT->tdp_stats.writeAc.access = iFRAT->l_ip.num_wr_ports; + } + rtp_stats.reset(); + iFRAT->rtp_stats.readAc.access = core_stats.rename_reads; + iFRAT->rtp_stats.writeAc.access = core_stats.rename_writes; + if (core_params.scheu_ty == ReservationStation && + core_params.rm_ty == RAMbased) { + iFRAT->rtp_stats.searchAc.access = + core_stats.committed_int_instructions; + } + iFRAT->power_t.reset(); + iFRAT->power_t.readOp.dynamic += iFRAT->tdp_stats.readAc.access + * (iFRAT->local_result.power.readOp.dynamic + + idcl->power.readOp.dynamic) + + iFRAT->tdp_stats.writeAc.access + * iFRAT->local_result.power.writeOp.dynamic + + iFRAT->tdp_stats.searchAc.access + * iFRAT->local_result.power.searchOp.dynamic; + iFRAT->power_t.readOp.leakage = + iFRAT->power.readOp.leakage * core_params.num_hthreads; + iFRAT->power_t.readOp.gate_leakage = + iFRAT->power.readOp.gate_leakage * core_params.num_hthreads; + iFRAT->rt_power.reset(); + iFRAT->rt_power.readOp.dynamic += iFRAT->rtp_stats.readAc.access + * (iFRAT->local_result.power.readOp.dynamic + + idcl->power.readOp.dynamic) + + iFRAT->rtp_stats.writeAc.access + * iFRAT->local_result.power.writeOp.dynamic + + iFRAT->rtp_stats.searchAc.access + * iFRAT->local_result.power.searchOp.dynamic; + } + + if (fFRAT) { + tdp_stats.reset(); + fFRAT->tdp_stats.writeAc.access = fFRAT->l_ip.num_wr_ports; + if ((core_params.rm_ty == CAMbased)) { + fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_search_ports; + } else if (core_params.rm_ty == RAMbased) { + fFRAT->tdp_stats.readAc.access = fFRAT->l_ip.num_rd_ports; + if (core_params.scheu_ty == ReservationStation) { + fFRAT->tdp_stats.searchAc.access = fFRAT->l_ip.num_search_ports; + } } + rtp_stats.reset(); + fFRAT->rtp_stats.readAc.access = core_stats.fp_rename_reads; + fFRAT->rtp_stats.writeAc.access = core_stats.fp_rename_writes; + if (core_params.scheu_ty == ReservationStation && + core_params.rm_ty == RAMbased) { + fFRAT->rtp_stats.searchAc.access = + core_stats.committed_fp_instructions; + } + fFRAT->power_t.reset(); + fFRAT->power_t.readOp.dynamic += fFRAT->tdp_stats.readAc.access + * (fFRAT->local_result.power.readOp.dynamic + + fdcl->power.readOp.dynamic) + + fFRAT->tdp_stats.writeAc.access + * fFRAT->local_result.power.writeOp.dynamic + + fFRAT->tdp_stats.searchAc.access + * fFRAT->local_result.power.searchOp.dynamic; + fFRAT->power_t.readOp.leakage = + fFRAT->power.readOp.leakage * core_params.num_hthreads; + fFRAT->power_t.readOp.gate_leakage = + fFRAT->power.readOp.gate_leakage * core_params.num_hthreads; + fFRAT->rt_power.reset(); + fFRAT->rt_power.readOp.dynamic += fFRAT->rtp_stats.readAc.access + * (fFRAT->local_result.power.readOp.dynamic + + fdcl->power.readOp.dynamic) + + fFRAT->rtp_stats.writeAc.access + * fFRAT->local_result.power.writeOp.dynamic + + fFRAT->rtp_stats.searchAc.access + * fFRAT->local_result.power.searchOp.dynamic; + } + output_data.reset(); + if (iFRAT) { + iFRAT->output_data.peak_dynamic_power = + iFRAT->power_t.readOp.dynamic * clockRate; + iFRAT->output_data.subthreshold_leakage_power = + iFRAT->power_t.readOp.leakage; + iFRAT->output_data.gate_leakage_power = + iFRAT->power_t.readOp.gate_leakage; + iFRAT->output_data.runtime_dynamic_energy = + iFRAT->rt_power.readOp.dynamic; + output_data += iFRAT->output_data; + } + if (fFRAT) { + fFRAT->output_data.peak_dynamic_power = + fFRAT->power_t.readOp.dynamic * clockRate; + fFRAT->output_data.subthreshold_leakage_power = + fFRAT->power_t.readOp.leakage; + fFRAT->output_data.gate_leakage_power = + fFRAT->power_t.readOp.gate_leakage; + fFRAT->output_data.runtime_dynamic_energy = + fFRAT->rt_power.readOp.dynamic; + output_data += fFRAT->output_data; + } + if (iRRAT) { + iRRAT->output_data.peak_dynamic_power = + iRRAT->power_t.readOp.dynamic * clockRate; + iRRAT->output_data.subthreshold_leakage_power = + iRRAT->power_t.readOp.leakage; + iRRAT->output_data.gate_leakage_power = + iRRAT->power_t.readOp.gate_leakage; + iRRAT->output_data.runtime_dynamic_energy = + iRRAT->rt_power.readOp.dynamic; + output_data += iRRAT->output_data; + } + if (fRRAT) { + fRRAT->output_data.peak_dynamic_power = + fRRAT->power_t.readOp.dynamic * clockRate; + fRRAT->output_data.subthreshold_leakage_power = + fRRAT->power_t.readOp.leakage; + fRRAT->output_data.gate_leakage_power = + fRRAT->power_t.readOp.gate_leakage; + fRRAT->output_data.runtime_dynamic_energy = + fRRAT->rt_power.readOp.dynamic; + output_data += fRRAT->output_data; + } + if (ifreeL) { + ifreeL->output_data.peak_dynamic_power = + ifreeL->power_t.readOp.dynamic * clockRate; + ifreeL->output_data.subthreshold_leakage_power = + ifreeL->power_t.readOp.leakage; + ifreeL->output_data.gate_leakage_power = + ifreeL->power_t.readOp.gate_leakage; + ifreeL->output_data.runtime_dynamic_energy = + ifreeL->rt_power.readOp.dynamic; + output_data += ifreeL->output_data; + } + if (ffreeL) { + ffreeL->output_data.peak_dynamic_power = + ffreeL->power_t.readOp.dynamic * clockRate; + ffreeL->output_data.subthreshold_leakage_power = + ffreeL->power_t.readOp.leakage; + ffreeL->output_data.gate_leakage_power = + ffreeL->power_t.readOp.gate_leakage; + ffreeL->output_data.runtime_dynamic_energy = + ffreeL->rt_power.readOp.dynamic; + output_data += ffreeL->output_data; + } + if (idcl) { + idcl->output_data.peak_dynamic_power = + idcl->power_t.readOp.dynamic * clockRate; + idcl->output_data.subthreshold_leakage_power = + idcl->power_t.readOp.leakage; + idcl->output_data.gate_leakage_power = + idcl->power_t.readOp.gate_leakage; + idcl->output_data.runtime_dynamic_energy = + idcl->rt_power.readOp.dynamic; + output_data += idcl->output_data; + } + if (fdcl) { + fdcl->output_data.peak_dynamic_power = + fdcl->power_t.readOp.dynamic * clockRate; + fdcl->output_data.subthreshold_leakage_power = + fdcl->power_t.readOp.leakage; + fdcl->output_data.gate_leakage_power = + fdcl->power_t.readOp.gate_leakage; + fdcl->output_data.runtime_dynamic_energy = + fdcl->rt_power.readOp.dynamic; + output_data += fdcl->output_data; + } + if (RAHT) { + output_data += RAHT->output_data; + } } -void RENAMINGU::computeEnergy(bool is_tdp) -{ - if (!exist) return; - double pppm_t[4] = {1,1,1,1}; - if (is_tdp) - {//init stats for Peak - if (coredynp.core_ty==OOO){ - if (coredynp.scheu_ty==PhysicalRegFile) - { - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - } - - iRRAT->stats_t.readAc.access = iRRAT->l_ip.num_rd_ports; - iRRAT->stats_t.writeAc.access = iRRAT->l_ip.num_wr_ports; - iRRAT->tdp_stats = iRRAT->stats_t; - - fRRAT->stats_t.readAc.access = fRRAT->l_ip.num_rd_ports; - fRRAT->stats_t.writeAc.access = fRRAT->l_ip.num_wr_ports; - fRRAT->tdp_stats = fRRAT->stats_t; - - ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports;; - ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports; - ifreeL->tdp_stats = ifreeL->stats_t; - - ffreeL->stats_t.readAc.access = coredynp.decodeW;//ffreeL->l_ip.num_rd_ports; - ffreeL->stats_t.writeAc.access = coredynp.decodeW;//ffreeL->l_ip.num_wr_ports; - ffreeL->tdp_stats = ffreeL->stats_t; - } - else if (coredynp.scheu_ty==ReservationStation){ - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_rd_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->stats_t.searchAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_rd_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->stats_t.searchAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = iFRAT->l_ip.num_search_ports; - iFRAT->stats_t.writeAc.access = iFRAT->l_ip.num_wr_ports; - iFRAT->tdp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = fFRAT->l_ip.num_search_ports; - fFRAT->stats_t.writeAc.access = fFRAT->l_ip.num_wr_ports; - fFRAT->tdp_stats = fFRAT->stats_t; - } - //Unified free list for both int and fp - ifreeL->stats_t.readAc.access = coredynp.decodeW;//ifreeL->l_ip.num_rd_ports; - ifreeL->stats_t.writeAc.access = coredynp.decodeW;//ifreeL->l_ip.num_wr_ports; - ifreeL->tdp_stats = ifreeL->stats_t; - } - idcl->stats_t.readAc.access = coredynp.decodeW; - fdcl->stats_t.readAc.access = coredynp.decodeW; - idcl->tdp_stats = idcl->stats_t; - fdcl->tdp_stats = fdcl->stats_t; - } - else - { - if (coredynp.issueW>1) - { - idcl->stats_t.readAc.access = coredynp.decodeW; - fdcl->stats_t.readAc.access = coredynp.decodeW; - idcl->tdp_stats = idcl->stats_t; - fdcl->tdp_stats = fdcl->stats_t; - } - } +void RENAMINGU::displayData(uint32_t indent, int plevel) { + if (!exist) return; - } - else - {//init stats for Runtime Dynamic (RTP) - if (coredynp.core_ty==OOO){ - if (coredynp.scheu_ty==PhysicalRegFile) - { - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } - - iRRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_writes;//Hack, should be (context switch + branch mispredictions)*16 - iRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iRRAT->rtp_stats = iRRAT->stats_t; - - fRRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_writes;//Hack, should be (context switch + branch mispredictions)*16 - fRRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fRRAT->rtp_stats = fRRAT->stats_t; - - ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - ifreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].rename_writes; - ifreeL->rtp_stats = ifreeL->stats_t; - - ffreeL->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - ffreeL->stats_t.writeAc.access = 2*XML->sys.core[ithCore].fp_rename_writes; - ffreeL->rtp_stats = ffreeL->stats_t; - } - else if (coredynp.scheu_ty==ReservationStation){ - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_int_instructions;//hack: not all committed instructions use regs. - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->stats_t.searchAc.access = XML->sys.core[ithCore].committed_fp_instructions; - fFRAT->rtp_stats = fFRAT->stats_t; - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads; - iFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].rename_writes; - iFRAT->rtp_stats = iFRAT->stats_t; - - fFRAT->stats_t.readAc.access = XML->sys.core[ithCore].fp_rename_reads; - fFRAT->stats_t.writeAc.access = XML->sys.core[ithCore].fp_rename_writes; - fFRAT->rtp_stats = fFRAT->stats_t; - } - //Unified free list for both int and fp since the ROB act as physcial registers - ifreeL->stats_t.readAc.access = XML->sys.core[ithCore].rename_reads + - XML->sys.core[ithCore].fp_rename_reads; - ifreeL->stats_t.writeAc.access = 2*(XML->sys.core[ithCore].rename_writes + - XML->sys.core[ithCore].fp_rename_writes);//HACK: 2-> since some of renaming in the same group - //are terminated early - ifreeL->rtp_stats = ifreeL->stats_t; - } - idcl->stats_t.readAc.access = 3*coredynp.decodeW*coredynp.decodeW*XML->sys.core[ithCore].rename_reads; - fdcl->stats_t.readAc.access = 3*coredynp.fp_issueW*coredynp.fp_issueW*XML->sys.core[ithCore].fp_rename_writes; - idcl->rtp_stats = idcl->stats_t; - fdcl->rtp_stats = fdcl->stats_t; - } - else - { - if (coredynp.issueW>1) - { - idcl->stats_t.readAc.access = 2*XML->sys.core[ithCore].int_instructions; - fdcl->stats_t.readAc.access = XML->sys.core[ithCore].fp_instructions; - idcl->rtp_stats = idcl->stats_t; - fdcl->rtp_stats = fdcl->stats_t; - } - } + McPATComponent::displayData(indent, plevel); + if (core_params.core_ty == OOO) { + iFRAT->displayData(indent + 4, plevel); + fFRAT->displayData(indent + 4, plevel); + ifreeL->displayData(indent + 4, plevel); + + if (core_params.scheu_ty == PhysicalRegFile) { + iRRAT->displayData(indent + 4, plevel); + fRRAT->displayData(indent + 4, plevel); + ffreeL->displayData(indent + 4, plevel); } - /* Compute engine */ - if (coredynp.core_ty==OOO) - { - if (coredynp.scheu_ty==PhysicalRegFile) - { - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); - } - - iRRAT->power_t.reset(); - fRRAT->power_t.reset(); - ifreeL->power_t.reset(); - ffreeL->power_t.reset(); - - iRRAT->power_t.readOp.dynamic += (iRRAT->stats_t.readAc.access*iRRAT->local_result.power.readOp.dynamic - +iRRAT->stats_t.writeAc.access*iRRAT->local_result.power.writeOp.dynamic); - fRRAT->power_t.readOp.dynamic += (fRRAT->stats_t.readAc.access*fRRAT->local_result.power.readOp.dynamic - +fRRAT->stats_t.writeAc.access*fRRAT->local_result.power.writeOp.dynamic); - ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic - +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic); - ffreeL->power_t.readOp.dynamic += (ffreeL->stats_t.readAc.access*ffreeL->local_result.power.readOp.dynamic - +ffreeL->stats_t.writeAc.access*ffreeL->local_result.power.writeOp.dynamic); + } + idcl->displayData(indent + 4, plevel); + fdcl->displayData(indent + 4, plevel); +} - } - else if (coredynp.scheu_ty==ReservationStation) - { - if (coredynp.rm_ty ==RAMbased) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.readOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic - +iFRAT->stats_t.searchAc.access*iFRAT->local_result.power.searchOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.readOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic - +fFRAT->stats_t.searchAc.access*fFRAT->local_result.power.searchOp.dynamic); - } - else if ((coredynp.rm_ty ==CAMbased)) - { - iFRAT->power_t.reset(); - fFRAT->power_t.reset(); - iFRAT->power_t.readOp.dynamic += (iFRAT->stats_t.readAc.access - *(iFRAT->local_result.power.searchOp.dynamic + idcl->power.readOp.dynamic) - +iFRAT->stats_t.writeAc.access*iFRAT->local_result.power.writeOp.dynamic); - fFRAT->power_t.readOp.dynamic += (fFRAT->stats_t.readAc.access - *(fFRAT->local_result.power.searchOp.dynamic + fdcl->power.readOp.dynamic) - +fFRAT->stats_t.writeAc.access*fFRAT->local_result.power.writeOp.dynamic); - } - ifreeL->power_t.reset(); - ifreeL->power_t.readOp.dynamic += (ifreeL->stats_t.readAc.access*ifreeL->local_result.power.readOp.dynamic - +ifreeL->stats_t.writeAc.access*ifreeL->local_result.power.writeOp.dynamic); - } +void SchedulerU::computeEnergy() { + if (!exist) return; - } - else - { - if (coredynp.issueW>1) - { - idcl->power_t.reset(); - fdcl->power_t.reset(); - set_pppm(pppm_t, idcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access); - idcl->power_t = idcl->power * pppm_t; - set_pppm(pppm_t, fdcl->stats_t.readAc.access, coredynp.num_hthreads, coredynp.num_hthreads, idcl->stats_t.readAc.access); - fdcl->power_t = fdcl->power * pppm_t; - } + double ROB_duty_cycle; + ROB_duty_cycle = 1; - } + if (int_instruction_selection) { + int_instruction_selection->computeEnergy(); + } - //assign value to tpd and rtp - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - if (coredynp.scheu_ty==PhysicalRegFile) - { - iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; - fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; - iRRAT->power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread; - fRRAT->power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread; - ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; - ffreeL->power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread; - power = power + (iFRAT->power + fFRAT->power) - + (iRRAT->power + fRRAT->power) - + (ifreeL->power + ffreeL->power); - } - else if (coredynp.scheu_ty==ReservationStation) - { - iFRAT->power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; - fFRAT->power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; - ifreeL->power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; - power = power + (iFRAT->power + fFRAT->power) - + ifreeL->power; - } - } - else - { - power = power + idcl->power_t + fdcl->power_t; - } + if (fp_instruction_selection) { + fp_instruction_selection->computeEnergy(); + } + if (int_inst_window) { + int_inst_window->tdp_stats.reset(); + int_inst_window->rtp_stats.reset(); + int_inst_window->power_t.reset(); + int_inst_window->rt_power.reset(); + if (core_params.core_ty == OOO) { + int_inst_window->tdp_stats.readAc.access = + core_params.issueW * core_params.num_pipelines; + int_inst_window->tdp_stats.writeAc.access = + core_params.issueW * core_params.num_pipelines; + int_inst_window->tdp_stats.searchAc.access = + core_params.issueW * core_params.num_pipelines; + + int_inst_window->power_t.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->tdp_stats.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->tdp_stats.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->tdp_stats.writeAc.access; + + int_inst_window->rtp_stats.readAc.access = + core_stats.inst_window_reads; + int_inst_window->rtp_stats.writeAc.access = + core_stats.inst_window_writes; + int_inst_window->rtp_stats.searchAc.access = + core_stats.inst_window_wakeup_accesses; + + int_inst_window->rt_power.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->rtp_stats.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->rtp_stats.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->rtp_stats.writeAc.access; + } else if (core_params.multithreaded) { + int_inst_window->tdp_stats.readAc.access = + core_params.issueW * core_params.num_pipelines; + int_inst_window->tdp_stats.writeAc.access = + core_params.issueW * core_params.num_pipelines; + int_inst_window->tdp_stats.searchAc.access = + core_params.issueW * core_params.num_pipelines; + + int_inst_window->power_t.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->tdp_stats.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->tdp_stats.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->tdp_stats.writeAc.access; + + int_inst_window->rtp_stats.readAc.access = + core_stats.int_instructions + core_stats.fp_instructions; + int_inst_window->rtp_stats.writeAc.access = + core_stats.int_instructions + core_stats.fp_instructions; + int_inst_window->rtp_stats.searchAc.access = + 2 * (core_stats.int_instructions + core_stats.fp_instructions); + + int_inst_window->rt_power.readOp.dynamic += + int_inst_window->local_result.power.readOp.dynamic * + int_inst_window->rtp_stats.readAc.access + + int_inst_window->local_result.power.searchOp.dynamic * + int_inst_window->rtp_stats.searchAc.access + + int_inst_window->local_result.power.writeOp.dynamic * + int_inst_window->rtp_stats.writeAc.access; } - else - { - if (coredynp.core_ty==OOO) - { - if (coredynp.scheu_ty==PhysicalRegFile) - { - iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; - fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; - iRRAT->rt_power = iRRAT->power_t + iRRAT->local_result.power * coredynp.pppm_lkg_multhread; - fRRAT->rt_power = fRRAT->power_t + fRRAT->local_result.power * coredynp.pppm_lkg_multhread; - ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; - ffreeL->rt_power = ffreeL->power_t + ffreeL->local_result.power * coredynp.pppm_lkg_multhread; - rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power) - + (iRRAT->rt_power + fRRAT->rt_power) - + (ifreeL->rt_power + ffreeL->rt_power); - } - else if (coredynp.scheu_ty==ReservationStation) - { - iFRAT->rt_power = iFRAT->power_t + (iFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + idcl->power_t; - fFRAT->rt_power = fFRAT->power_t + (fFRAT->local_result.power ) * coredynp.pppm_lkg_multhread + fdcl->power_t; - ifreeL->rt_power = ifreeL->power_t + ifreeL->local_result.power * coredynp.pppm_lkg_multhread; - rt_power = rt_power + (iFRAT->rt_power + fFRAT->rt_power) - + ifreeL->rt_power; - } - } - else - { - rt_power = rt_power + idcl->power_t + fdcl->power_t; - } + } - } -} + if (fp_inst_window) { + fp_inst_window->tdp_stats.reset(); + fp_inst_window->tdp_stats.readAc.access = + fp_inst_window->l_ip.num_rd_ports * core_params.num_fp_pipelines; + fp_inst_window->tdp_stats.writeAc.access = + fp_inst_window->l_ip.num_wr_ports * core_params.num_fp_pipelines; + fp_inst_window->tdp_stats.searchAc.access = + fp_inst_window->l_ip.num_search_ports * + core_params.num_fp_pipelines; + + fp_inst_window->rtp_stats.reset(); + fp_inst_window->rtp_stats.readAc.access = + core_stats.fp_inst_window_reads; + fp_inst_window->rtp_stats.writeAc.access = + core_stats.fp_inst_window_writes; + fp_inst_window->rtp_stats.searchAc.access = + core_stats.fp_inst_window_wakeup_accesses; + + fp_inst_window->power_t.reset(); + fp_inst_window->power_t.readOp.dynamic += + fp_inst_window->power.readOp.dynamic * + fp_inst_window->tdp_stats.readAc.access + + fp_inst_window->power.searchOp.dynamic * + fp_inst_window->tdp_stats.searchAc.access + + fp_inst_window->power.writeOp.dynamic * + fp_inst_window->tdp_stats.writeAc.access; + + fp_inst_window->rt_power.reset(); + fp_inst_window->rt_power.readOp.dynamic += + fp_inst_window->power.readOp.dynamic * + fp_inst_window->rtp_stats.readAc.access + + fp_inst_window->power.searchOp.dynamic * + fp_inst_window->rtp_stats.searchAc.access + + fp_inst_window->power.writeOp.dynamic * + fp_inst_window->rtp_stats.writeAc.access; + } -void RENAMINGU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - - if (is_tdp) - { - - if (coredynp.core_ty==OOO) - { - cout << indent_str<< "Int Front End RAT:" << endl; - cout << indent_str_next << "Area = " << iFRAT->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << iFRAT->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? iFRAT->power.readOp.longer_channel_leakage:iFRAT->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << iFRAT->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << iFRAT->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <tdp_stats.reset(); + ROB->tdp_stats.readAc.access = core_params.commitW * + core_params.num_pipelines * ROB_duty_cycle; + ROB->tdp_stats.writeAc.access = core_params.issueW * + core_params.num_pipelines * ROB_duty_cycle; + ROB->rtp_stats.reset(); + ROB->rtp_stats.readAc.access = core_stats.ROB_reads; + ROB->rtp_stats.writeAc.access = core_stats.ROB_writes; + ROB->power_t.reset(); + ROB->power_t.readOp.dynamic += + ROB->local_result.power.readOp.dynamic * + ROB->tdp_stats.readAc.access + + ROB->local_result.power.writeOp.dynamic * + ROB->tdp_stats.writeAc.access; + ROB->rt_power.reset(); + ROB->rt_power.readOp.dynamic += + ROB->local_result.power.readOp.dynamic * + ROB->rtp_stats.readAc.access + + ROB->local_result.power.writeOp.dynamic * + ROB->rtp_stats.writeAc.access; + } + + output_data.reset(); + if (int_inst_window) { + int_inst_window->output_data.subthreshold_leakage_power = + int_inst_window->power_t.readOp.leakage; + int_inst_window->output_data.gate_leakage_power = + int_inst_window->power_t.readOp.gate_leakage; + int_inst_window->output_data.peak_dynamic_power = + int_inst_window->power_t.readOp.dynamic * clockRate; + int_inst_window->output_data.runtime_dynamic_energy = + int_inst_window->rt_power.readOp.dynamic; + output_data += int_inst_window->output_data; + } + if (fp_inst_window) { + fp_inst_window->output_data.subthreshold_leakage_power = + fp_inst_window->power_t.readOp.leakage; + fp_inst_window->output_data.gate_leakage_power = + fp_inst_window->power_t.readOp.gate_leakage; + fp_inst_window->output_data.peak_dynamic_power = + fp_inst_window->power_t.readOp.dynamic * clockRate; + fp_inst_window->output_data.runtime_dynamic_energy = + fp_inst_window->rt_power.readOp.dynamic; + output_data += fp_inst_window->output_data; + } + if (ROB) { + ROB->output_data.peak_dynamic_power = + ROB->power_t.readOp.dynamic * clockRate; + ROB->output_data.runtime_dynamic_energy = + ROB->rt_power.readOp.dynamic; + output_data += ROB->output_data; + } + // Integer and FP instruction selection logic is not included in the + // roll-up due to the uninitialized area + /* + if (int_instruction_selection) { + output_data += int_instruction_selection->output_data; + } + if (fp_instruction_selection) { + output_data += fp_instruction_selection->output_data; + } + */ } +void SchedulerU::displayData(uint32_t indent, int plevel) { + if (!exist) return; -void SchedulerU::computeEnergy(bool is_tdp) -{ - if (!exist) return; - double ROB_duty_cycle; -// ROB_duty_cycle = ((coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 -// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0))*1.1<1 ? (coredynp.ALU_duty_cycle + coredynp.num_muls>0?coredynp.MUL_duty_cycle:0 -// + coredynp.num_fpus>0?coredynp.FPU_duty_cycle:0)*1.1:1; - ROB_duty_cycle = 1; - //init stats - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports; - int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports; - int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines; - int_inst_window->tdp_stats = int_inst_window->stats_t; - fp_inst_window->stats_t.readAc.access = fp_inst_window->l_ip.num_rd_ports*coredynp.num_fp_pipelines; - fp_inst_window->stats_t.writeAc.access = fp_inst_window->l_ip.num_wr_ports*coredynp.num_fp_pipelines; - fp_inst_window->stats_t.searchAc.access = fp_inst_window->l_ip.num_search_ports*coredynp.num_fp_pipelines; - fp_inst_window->tdp_stats = fp_inst_window->stats_t; - - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->stats_t.readAc.access = coredynp.commitW*coredynp.num_pipelines*ROB_duty_cycle; - ROB->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines*ROB_duty_cycle; - ROB->tdp_stats = ROB->stats_t; - - /* - * When inst commits, ROB must be read. - * Because for Physcial register based cores, physical register tag in ROB - * need to be read out and write into RRAT/CAM based RAT. - * For RS based cores, register content that stored in ROB must be - * read out and stored in architectural registers. - * - * if no-register is involved, the ROB read out operation when instruction commits can be ignored. - * assuming 20% insts. belong this type. - * TODO: ROB duty_cycle need to be revisited - */ - } + McPATComponent::displayData(indent, plevel); - } - else if (coredynp.multithreaded) - { - int_inst_window->stats_t.readAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_search_ports; - int_inst_window->stats_t.writeAc.access = coredynp.issueW*coredynp.num_pipelines;//int_inst_window->l_ip.num_wr_ports; - int_inst_window->stats_t.searchAc.access = coredynp.issueW*coredynp.num_pipelines; - int_inst_window->tdp_stats = int_inst_window->stats_t; - } + if (core_params.core_ty == OOO) { + int_inst_window->displayData(indent + 4, plevel); + fp_inst_window->displayData(indent + 4, plevel); + if (core_params.ROB_size > 0) { + ROB->displayData(indent + 4, plevel); + } + } else if (core_params.multithreaded) { + int_inst_window->displayData(indent + 4, plevel); + } - } - else - {//rtp - if (coredynp.core_ty==OOO) - { - int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].inst_window_reads; - int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].inst_window_writes; - int_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].inst_window_wakeup_accesses; - int_inst_window->rtp_stats = int_inst_window->stats_t; - fp_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].fp_inst_window_reads; - fp_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].fp_inst_window_writes; - fp_inst_window->stats_t.searchAc.access = XML->sys.core[ithCore].fp_inst_window_wakeup_accesses; - fp_inst_window->rtp_stats = fp_inst_window->stats_t; - - if (XML->sys.core[ithCore].ROB_size >0) - { - - ROB->stats_t.readAc.access = XML->sys.core[ithCore].ROB_reads; - ROB->stats_t.writeAc.access = XML->sys.core[ithCore].ROB_writes; - /* ROB need to be updated in RS based OOO when new values are produced, - * this update may happen before the commit stage when ROB entry is released - * 1. ROB write at instruction inserted in - * 2. ROB write as results produced (for RS based OOO only) - * 3. ROB read as instruction committed. For RS based OOO, data values are read out and sent to ARF - * For Physical reg based OOO, no data stored in ROB, but register tags need to be - * read out and used to set the RRAT and to recycle the register tag to free list buffer - */ - ROB->rtp_stats = ROB->stats_t; - } + // Integer and FP instruction selection logic is not included in the + // roll-up due to the uninitialized area + /* + if (int_instruction_selection) { + int_instruction_selection->displayData(indent + 4, plevel); + } + if (fp_instruction_selection) { + fp_instruction_selection->displayData(indent + 4, plevel); + } + */ +} - } - else if (coredynp.multithreaded) - { - int_inst_window->stats_t.readAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions; - int_inst_window->stats_t.writeAc.access = XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions; - int_inst_window->stats_t.searchAc.access = 2*(XML->sys.core[ithCore].int_instructions + XML->sys.core[ithCore].fp_instructions); - int_inst_window->rtp_stats = int_inst_window->stats_t; - } +void LoadStoreU::computeEnergy() { + if (!exist) return; + + LSQ->tdp_stats.reset(); + LSQ->tdp_stats.readAc.access = LSQ->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + LSQ->tdp_stats.writeAc.access = LSQ->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + LSQ->rtp_stats.reset(); + // Flush overhead conidered + LSQ->rtp_stats.readAc.access = (core_stats.load_instructions + + core_stats.store_instructions) * 2; + LSQ->rtp_stats.writeAc.access = (core_stats.load_instructions + + core_stats.store_instructions) * 2; + LSQ->power_t.reset(); + //every memory access invloves at least two operations on LSQ + LSQ->power_t.readOp.dynamic += LSQ->tdp_stats.readAc.access * + (LSQ->local_result.power.searchOp.dynamic + + LSQ->local_result.power.readOp.dynamic) + + LSQ->tdp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic; + LSQ->rt_power.reset(); + //every memory access invloves at least two operations on LSQ + LSQ->rt_power.readOp.dynamic += LSQ->rtp_stats.readAc.access * + (LSQ->local_result.power.searchOp.dynamic + + LSQ->local_result.power.readOp.dynamic) + + LSQ->rtp_stats.writeAc.access * LSQ->local_result.power.writeOp.dynamic; + + if (LoadQ) { + LoadQ->tdp_stats.reset(); + LoadQ->tdp_stats.readAc.access = LoadQ->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + LoadQ->tdp_stats.writeAc.access = LoadQ->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + LoadQ->rtp_stats.reset(); + LoadQ->rtp_stats.readAc.access = core_stats.load_instructions + + core_stats.store_instructions; + LoadQ->rtp_stats.writeAc.access = core_stats.load_instructions + + core_stats.store_instructions; + LoadQ->power_t.reset(); + //every memory access invloves at least two operations on LoadQ + LoadQ->power_t.readOp.dynamic += + LoadQ->tdp_stats.readAc.access * + (LoadQ->local_result.power.searchOp.dynamic + + LoadQ->local_result.power.readOp.dynamic) + + LoadQ->tdp_stats.writeAc.access * + LoadQ->local_result.power.writeOp.dynamic; + LoadQ->rt_power.reset(); + //every memory access invloves at least two operations on LoadQ + LoadQ->rt_power.readOp.dynamic += LoadQ->rtp_stats.readAc.access * + (LoadQ->local_result.power.searchOp.dynamic + + LoadQ->local_result.power.readOp.dynamic) + + LoadQ->rtp_stats.writeAc.access * + LoadQ->local_result.power.writeOp.dynamic; } - //computation engine - if (coredynp.core_ty==OOO) - { - int_inst_window->power_t.reset(); - fp_inst_window->power_t.reset(); - - /* each instruction needs to write to scheduler, read out when all resources and source operands are ready - * two search ops with one for each source operand - * - */ - int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access - + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access - + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access - + int_inst_window->stats_t.readAc.access * instruction_selection->power.readOp.dynamic; - - fp_inst_window->power_t.readOp.dynamic += fp_inst_window->local_result.power.readOp.dynamic * fp_inst_window->stats_t.readAc.access - + fp_inst_window->local_result.power.searchOp.dynamic * fp_inst_window->stats_t.searchAc.access - + fp_inst_window->local_result.power.writeOp.dynamic * fp_inst_window->stats_t.writeAc.access - + fp_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic; - - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->power_t.reset(); - ROB->power_t.readOp.dynamic += ROB->local_result.power.readOp.dynamic*ROB->stats_t.readAc.access + - ROB->stats_t.writeAc.access*ROB->local_result.power.writeOp.dynamic; - } + McPATComponent::computeEnergy(); + + output_data.reset(); + if (dcache) { + output_data += dcache->output_data; + } + if (LSQ) { + LSQ->output_data.peak_dynamic_power = + LSQ->power_t.readOp.dynamic * clockRate; + LSQ->output_data.runtime_dynamic_energy = LSQ->rt_power.readOp.dynamic; + output_data += LSQ->output_data; + } + if (LoadQ) { + LoadQ->output_data.peak_dynamic_power = + LoadQ->power_t.readOp.dynamic * clockRate; + LoadQ->output_data.runtime_dynamic_energy = + LoadQ->rt_power.readOp.dynamic; + output_data += LoadQ->output_data; + } +} +void LoadStoreU::displayData(uint32_t indent, int plevel) { + if (!exist) return; + McPATComponent::displayData(indent, plevel); + if (LoadQ) { + LoadQ->displayData(indent + 4, plevel); + } + LSQ->displayData(indent + 4, plevel); - } - else if (coredynp.multithreaded) - { - int_inst_window->power_t.reset(); - int_inst_window->power_t.readOp.dynamic += int_inst_window->local_result.power.readOp.dynamic * int_inst_window->stats_t.readAc.access - + int_inst_window->local_result.power.searchOp.dynamic * int_inst_window->stats_t.searchAc.access - + int_inst_window->local_result.power.writeOp.dynamic * int_inst_window->stats_t.writeAc.access - + int_inst_window->stats_t.writeAc.access * instruction_selection->power.readOp.dynamic; - } +} - //assign values - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - fp_inst_window->power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - power = power + int_inst_window->power + fp_inst_window->power; - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->power = ROB->power_t + ROB->local_result.power*pppm_lkg; - power = power + ROB->power; - } +void MemManU::computeEnergy() { + if (!exist) return; - } - else if (coredynp.multithreaded) - { - // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); - int_inst_window->power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - power = power + int_inst_window->power; - } + itlb->tdp_stats.reset(); + itlb->tdp_stats.readAc.access = itlb->l_ip.num_search_ports; + itlb->tdp_stats.readAc.miss = 0; + itlb->tdp_stats.readAc.hit = itlb->tdp_stats.readAc.access - + itlb->tdp_stats.readAc.miss; + itlb->rtp_stats.reset(); + itlb->rtp_stats.readAc.access = mem_man_stats.itlb_total_accesses; + itlb->rtp_stats.writeAc.access = mem_man_stats.itlb_total_misses; - } - else - {//rtp - if (coredynp.core_ty==OOO) - { - int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - fp_inst_window->rt_power = fp_inst_window->power_t + (fp_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - rt_power = rt_power + int_inst_window->rt_power + fp_inst_window->rt_power; - if (XML->sys.core[ithCore].ROB_size >0) - { - ROB->rt_power = ROB->power_t + ROB->local_result.power*pppm_lkg; - rt_power = rt_power + ROB->rt_power; - } + itlb->power_t.reset(); + //FA spent most power in tag, so use total access not hits + itlb->power_t.readOp.dynamic += itlb->tdp_stats.readAc.access * + itlb->local_result.power.searchOp.dynamic + + itlb->tdp_stats.readAc.miss * + itlb->local_result.power.writeOp.dynamic; + itlb->rt_power.reset(); + //FA spent most power in tag, so use total access not hits + itlb->rt_power.readOp.dynamic += itlb->rtp_stats.readAc.access * + itlb->local_result.power.searchOp.dynamic + + itlb->rtp_stats.writeAc.access * + itlb->local_result.power.writeOp.dynamic; + + dtlb->tdp_stats.reset(); + dtlb->tdp_stats.readAc.access = dtlb->l_ip.num_search_ports * + core_stats.LSU_duty_cycle; + dtlb->tdp_stats.readAc.miss = 0; + dtlb->tdp_stats.readAc.hit = dtlb->tdp_stats.readAc.access - + dtlb->tdp_stats.readAc.miss; + dtlb->rtp_stats.reset(); + dtlb->rtp_stats.readAc.access = mem_man_stats.dtlb_read_accesses + + mem_man_stats.dtlb_write_misses; + dtlb->rtp_stats.writeAc.access = mem_man_stats.dtlb_write_accesses + + mem_man_stats.dtlb_read_misses; - } - else if (coredynp.multithreaded) - { - // set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); - int_inst_window->rt_power = int_inst_window->power_t + (int_inst_window->local_result.power +instruction_selection->power) *pppm_lkg; - rt_power = rt_power + int_inst_window->rt_power; - } + dtlb->power_t.reset(); + //FA spent most power in tag, so use total access not hits + dtlb->power_t.readOp.dynamic += dtlb->tdp_stats.readAc.access * + dtlb->local_result.power.searchOp.dynamic + + dtlb->tdp_stats.readAc.miss * + dtlb->local_result.power.writeOp.dynamic; + dtlb->rt_power.reset(); + //FA spent most power in tag, so use total access not hits + dtlb->rt_power.readOp.dynamic += dtlb->rtp_stats.readAc.access * + dtlb->local_result.power.searchOp.dynamic + + dtlb->rtp_stats.writeAc.access * + dtlb->local_result.power.writeOp.dynamic; + + output_data.reset(); + if (itlb) { + itlb->output_data.peak_dynamic_power = itlb->power_t.readOp.dynamic * + clockRate; + itlb->output_data.runtime_dynamic_energy = + itlb->rt_power.readOp.dynamic; + output_data += itlb->output_data; + } + if (dtlb) { + dtlb->output_data.peak_dynamic_power = + dtlb->power_t.readOp.dynamic * clockRate; + dtlb->output_data.runtime_dynamic_energy = + dtlb->rt_power.readOp.dynamic; + output_data += dtlb->output_data; } -// set_pppm(pppm_t, XML->sys.core[ithCore].issue_width,1, 1, 1); -// cout<<"Scheduler power="<power.readOp.dynamic<<"leakage"<power.readOp.leakage<sys.longer_channel_device; - - - if (is_tdp) - { - if (coredynp.core_ty==OOO) - { - cout << indent_str << "Instruction Window:" << endl; - cout << indent_str_next << "Area = " << int_inst_window->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << int_inst_window->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? int_inst_window->power.readOp.longer_channel_leakage:int_inst_window->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << int_inst_window->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << int_inst_window->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <sys.core[ithCore].ROB_size >0) - { - cout << indent_str<<"ROB:" << endl; - cout << indent_str_next << "Area = " << ROB->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << ROB->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? ROB->power.readOp.longer_channel_leakage:ROB->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << ROB->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << ROB->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <sys.core[ithCore].ROB_size >0) - { - cout << indent_str_next << "ROB Peak Dynamic = " << ROB->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "ROB Subthreshold Leakage = " << ROB->rt_power.readOp.leakage << " W" << endl; - cout << indent_str_next << "ROB Gate Leakage = " << ROB->rt_power.readOp.gate_leakage << " W" << endl; - } - } - else if (coredynp.multithreaded) - { - cout << indent_str_next << "Instruction Window Peak Dynamic = " << int_inst_window->rt_power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Instruction Window Subthreshold Leakage = " << int_inst_window->rt_power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Instruction Window Gate Leakage = " << int_inst_window->rt_power.readOp.gate_leakage << " W" << endl; - } - } +void MemManU::displayData(uint32_t indent, int plevel) { + if (!exist) return; + + McPATComponent::displayData(indent, plevel); + itlb->displayData(indent + 4, plevel); + dtlb->displayData(indent + 4, plevel); } -void LoadStoreU::computeEnergy(bool is_tdp) -{ - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - dcache.caches->stats_t.readAc.access = 0.67*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle; - dcache.caches->stats_t.readAc.miss = 0; - dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss; - dcache.caches->stats_t.writeAc.access = 0.33*dcache.caches->l_ip.num_rw_ports*coredynp.LSU_duty_cycle; - dcache.caches->stats_t.writeAc.miss = 0; - dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss; - dcache.caches->tdp_stats = dcache.caches->stats_t; - - dcache.missb->stats_t.readAc.access = dcache.missb->l_ip.num_search_ports; - dcache.missb->stats_t.writeAc.access = dcache.missb->l_ip.num_search_ports; - dcache.missb->tdp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.ifb->l_ip.num_search_ports; - dcache.ifb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports; - dcache.ifb->tdp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = dcache.prefetchb->l_ip.num_search_ports; - dcache.prefetchb->stats_t.writeAc.access = dcache.ifb->l_ip.num_search_ports; - dcache.prefetchb->tdp_stats = dcache.prefetchb->stats_t; - if (cache_p==Write_back) - { - dcache.wbb->stats_t.readAc.access = dcache.wbb->l_ip.num_search_ports; - dcache.wbb->stats_t.writeAc.access = dcache.wbb->l_ip.num_search_ports; - dcache.wbb->tdp_stats = dcache.wbb->stats_t; - } +void RegFU::computeEnergy() { + /* + * Architecture RF and physical RF cannot be present at the same time. + * Therefore, the RF stats can only refer to either ARF or PRF; + * And the same stats can be used for both. + */ + if (!exist) return; + + IRF->tdp_stats.reset(); + IRF->tdp_stats.readAc.access = + core_params.issueW * NUM_INT_INST_SOURCE_OPERANDS * + (core_stats.ALU_duty_cycle * 1.1 + + (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) * + core_params.num_pipelines; + IRF->tdp_stats.writeAc.access = + core_params.issueW * + (core_stats.ALU_duty_cycle * 1.1 + + (core_params.num_muls > 0 ? core_stats.MUL_duty_cycle : 0)) * + core_params.num_pipelines; + IRF->rtp_stats.reset(); + IRF->rtp_stats.readAc.access = core_stats.int_regfile_reads; + IRF->rtp_stats.writeAc.access = core_stats.int_regfile_writes; + if (core_params.regWindowing) { + IRF->rtp_stats.readAc.access += core_stats.function_calls * + RFWIN_ACCESS_MULTIPLIER; + IRF->rtp_stats.writeAc.access += core_stats.function_calls * + RFWIN_ACCESS_MULTIPLIER; + } + IRF->power_t.reset(); + IRF->power_t.readOp.dynamic += IRF->tdp_stats.readAc.access * + IRF->local_result.power.readOp.dynamic + + IRF->tdp_stats.writeAc.access * + IRF->local_result.power.writeOp.dynamic; + IRF->rt_power.reset(); + IRF->rt_power.readOp.dynamic += + IRF->rtp_stats.readAc.access * IRF->local_result.power.readOp.dynamic + + IRF->rtp_stats.writeAc.access * IRF->local_result.power.writeOp.dynamic; + + FRF->tdp_stats.reset(); + FRF->tdp_stats.readAc.access = + FRF->l_ip.num_rd_ports * core_stats.FPU_duty_cycle * 1.05 * + core_params.num_fp_pipelines; + FRF->tdp_stats.writeAc.access = + FRF->l_ip.num_wr_ports * core_stats.FPU_duty_cycle * 1.05 * + core_params.num_fp_pipelines; + FRF->rtp_stats.reset(); + FRF->rtp_stats.readAc.access = core_stats.float_regfile_reads; + FRF->rtp_stats.writeAc.access = core_stats.float_regfile_writes; + if (core_params.regWindowing) { + FRF->rtp_stats.readAc.access += core_stats.function_calls * + RFWIN_ACCESS_MULTIPLIER; + FRF->rtp_stats.writeAc.access += core_stats.function_calls * + RFWIN_ACCESS_MULTIPLIER; + } + FRF->power_t.reset(); + FRF->power_t.readOp.dynamic += + FRF->tdp_stats.readAc.access * FRF->local_result.power.readOp.dynamic + + FRF->tdp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic; + FRF->rt_power.reset(); + FRF->rt_power.readOp.dynamic += + FRF->rtp_stats.readAc.access * FRF->local_result.power.readOp.dynamic + + FRF->rtp_stats.writeAc.access * FRF->local_result.power.writeOp.dynamic; + + if (core_params.regWindowing) { + RFWIN->tdp_stats.reset(); + RFWIN->tdp_stats.readAc.access = 0; + RFWIN->tdp_stats.writeAc.access = 0; + RFWIN->rtp_stats.reset(); + RFWIN->rtp_stats.readAc.access = + core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER; + RFWIN->rtp_stats.writeAc.access = + core_stats.function_calls * RFWIN_ACCESS_MULTIPLIER; + RFWIN->power_t.reset(); + RFWIN->power_t.readOp.dynamic += + RFWIN->tdp_stats.readAc.access * + RFWIN->local_result.power.readOp.dynamic + + RFWIN->tdp_stats.writeAc.access * + RFWIN->local_result.power.writeOp.dynamic; + RFWIN->rt_power.reset(); + RFWIN->rt_power.readOp.dynamic += + RFWIN->rtp_stats.readAc.access * + RFWIN->local_result.power.readOp.dynamic + + RFWIN->rtp_stats.writeAc.access * + RFWIN->local_result.power.writeOp.dynamic; + } - LSQ->stats_t.readAc.access = LSQ->stats_t.writeAc.access = LSQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - LSQ->tdp_stats = LSQ->stats_t; - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->stats_t.readAc.access = LoadQ->stats_t.writeAc.access = LoadQ->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - LoadQ->tdp_stats = LoadQ->stats_t; - } - } - else - { - //init stats for Runtime Dynamic (RTP) - dcache.caches->stats_t.readAc.access = XML->sys.core[ithCore].dcache.read_accesses; - dcache.caches->stats_t.readAc.miss = XML->sys.core[ithCore].dcache.read_misses; - dcache.caches->stats_t.readAc.hit = dcache.caches->stats_t.readAc.access - dcache.caches->stats_t.readAc.miss; - dcache.caches->stats_t.writeAc.access = XML->sys.core[ithCore].dcache.write_accesses; - dcache.caches->stats_t.writeAc.miss = XML->sys.core[ithCore].dcache.write_misses; - dcache.caches->stats_t.writeAc.hit = dcache.caches->stats_t.writeAc.access - dcache.caches->stats_t.writeAc.miss; - dcache.caches->rtp_stats = dcache.caches->stats_t; - - if (cache_p==Write_back) - { - dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.missb->rtp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.ifb->rtp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; - - dcache.wbb->stats_t.readAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.wbb->stats_t.writeAc.access = dcache.caches->stats_t.writeAc.miss; - dcache.wbb->rtp_stats = dcache.wbb->stats_t; - } - else - { - dcache.missb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.missb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.missb->rtp_stats = dcache.missb->stats_t; - - dcache.ifb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.ifb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.ifb->rtp_stats = dcache.ifb->stats_t; - - dcache.prefetchb->stats_t.readAc.access = dcache.caches->stats_t.readAc.miss; - dcache.prefetchb->stats_t.writeAc.access = dcache.caches->stats_t.readAc.miss; - dcache.prefetchb->rtp_stats = dcache.prefetchb->stats_t; - } + output_data.reset(); + if (IRF) { + IRF->output_data.peak_dynamic_power = + IRF->power_t.readOp.dynamic * clockRate; + IRF->output_data.subthreshold_leakage_power *= + core_params.num_hthreads; + IRF->output_data.gate_leakage_power *= core_params.num_hthreads; + IRF->output_data.runtime_dynamic_energy = IRF->rt_power.readOp.dynamic; + output_data += IRF->output_data; + } + if (FRF) { + FRF->output_data.peak_dynamic_power = + FRF->power_t.readOp.dynamic * clockRate; + FRF->output_data.subthreshold_leakage_power *= + core_params.num_hthreads; + FRF->output_data.gate_leakage_power *= core_params.num_hthreads; + FRF->output_data.runtime_dynamic_energy = FRF->rt_power.readOp.dynamic; + output_data += FRF->output_data; + } + if (RFWIN) { + RFWIN->output_data.peak_dynamic_power = + RFWIN->power_t.readOp.dynamic * clockRate; + RFWIN->output_data.runtime_dynamic_energy = + RFWIN->rt_power.readOp.dynamic; + output_data += RFWIN->output_data; + } +} - LSQ->stats_t.readAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2;//flush overhead considered - LSQ->stats_t.writeAc.access = (XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions)*2; - LSQ->rtp_stats = LSQ->stats_t; +void RegFU::displayData(uint32_t indent, int plevel) { + if (!exist) return; - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->stats_t.readAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions; - LoadQ->stats_t.writeAc.access = XML->sys.core[ithCore].load_instructions + XML->sys.core[ithCore].store_instructions; - LoadQ->rtp_stats = LoadQ->stats_t; - } + McPATComponent::displayData(indent, plevel); - } + IRF->displayData(indent + 4, plevel); + FRF->displayData(indent + 4, plevel); + if (core_params.regWindowing) { + RFWIN->displayData(indent + 4, plevel); + } +} - dcache.power_t.reset(); - LSQ->power_t.reset(); - dcache.power_t.readOp.dynamic += (dcache.caches->stats_t.readAc.hit*dcache.caches->local_result.power.readOp.dynamic+ - dcache.caches->stats_t.readAc.miss*dcache.caches->local_result.power.readOp.dynamic+ - dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.tag_array2->power.readOp.dynamic+ - dcache.caches->stats_t.writeAc.access*dcache.caches->local_result.power.writeOp.dynamic); +void EXECU::computeEnergy() { + if (!exist) return; - if (cache_p==Write_back) - {//write miss will generate a write later - dcache.power_t.readOp.dynamic += dcache.caches->stats_t.writeAc.miss*dcache.caches->local_result.power.writeOp.dynamic; - } + int_bypass->set_params_stats(core_params.execu_int_bypass_ports, + core_stats.ALU_cdb_duty_cycle, + core_stats.cdb_alu_accesses); - dcache.power_t.readOp.dynamic += dcache.missb->stats_t.readAc.access*dcache.missb->local_result.power.searchOp.dynamic + - dcache.missb->stats_t.writeAc.access*dcache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write - dcache.power_t.readOp.dynamic += dcache.ifb->stats_t.readAc.access*dcache.ifb->local_result.power.searchOp.dynamic + - dcache.ifb->stats_t.writeAc.access*dcache.ifb->local_result.power.writeOp.dynamic; - dcache.power_t.readOp.dynamic += dcache.prefetchb->stats_t.readAc.access*dcache.prefetchb->local_result.power.searchOp.dynamic + - dcache.prefetchb->stats_t.writeAc.access*dcache.prefetchb->local_result.power.writeOp.dynamic; - if (cache_p==Write_back) - { - dcache.power_t.readOp.dynamic += dcache.wbb->stats_t.readAc.access*dcache.wbb->local_result.power.searchOp.dynamic - + dcache.wbb->stats_t.writeAc.access*dcache.wbb->local_result.power.writeOp.dynamic; - } + intTagBypass->set_params_stats(core_params.execu_int_bypass_ports, + core_stats.ALU_cdb_duty_cycle, + core_stats.cdb_alu_accesses); - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->power_t.reset(); - LoadQ->power_t.readOp.dynamic += LoadQ->stats_t.readAc.access*(LoadQ->local_result.power.searchOp.dynamic+ LoadQ->local_result.power.readOp.dynamic)+ - LoadQ->stats_t.writeAc.access*LoadQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LoadQ - - LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic) - + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ - - } - else - { - LSQ->power_t.readOp.dynamic += LSQ->stats_t.readAc.access*(LSQ->local_result.power.searchOp.dynamic + LSQ->local_result.power.readOp.dynamic) - + LSQ->stats_t.writeAc.access*LSQ->local_result.power.writeOp.dynamic;//every memory access invloves at least two operations on LSQ - - } - - if (is_tdp) - { -// dcache.power = dcache.power_t + (dcache.caches->local_result.power)*pppm_lkg + -// (dcache.missb->local_result.power + -// dcache.ifb->local_result.power + -// dcache.prefetchb->local_result.power + -// dcache.wbb->local_result.power)*pppm_Isub; - dcache.power = dcache.power_t + (dcache.caches->local_result.power + - dcache.missb->local_result.power + - dcache.ifb->local_result.power + - dcache.prefetchb->local_result.power) *pppm_lkg; - if (cache_p==Write_back) - { - dcache.power = dcache.power + dcache.wbb->local_result.power*pppm_lkg; - } + if (core_params.num_muls > 0) { + int_mul_bypass->set_params_stats(core_params.execu_mul_bypass_ports, + core_stats.MUL_cdb_duty_cycle, + core_stats.cdb_mul_accesses); - LSQ->power = LSQ->power_t + LSQ->local_result.power *pppm_lkg; - power = power + dcache.power + LSQ->power; + intTag_mul_Bypass->set_params_stats(core_params.execu_mul_bypass_ports, + core_stats.MUL_cdb_duty_cycle, + core_stats.cdb_mul_accesses); + } - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg; - power = power + LoadQ->power; - } + if (core_params.num_fpus > 0) { + fp_bypass->set_params_stats(core_params.execu_fp_bypass_ports, + core_stats.FPU_cdb_duty_cycle, + core_stats.cdb_fpu_accesses); + + fpTagBypass->set_params_stats(core_params.execu_fp_bypass_ports, + core_stats.FPU_cdb_duty_cycle, + core_stats.cdb_fpu_accesses); } - else - { -// dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power + -// dcache.missb->local_result.power + -// dcache.ifb->local_result.power + -// dcache.prefetchb->local_result.power + -// dcache.wbb->local_result.power)*pppm_lkg; - dcache.rt_power = dcache.power_t + (dcache.caches->local_result.power + - dcache.missb->local_result.power + - dcache.ifb->local_result.power + - dcache.prefetchb->local_result.power )*pppm_lkg; - - if (cache_p==Write_back) - { - dcache.rt_power = dcache.rt_power + dcache.wbb->local_result.power*pppm_lkg; - } - LSQ->rt_power = LSQ->power_t + LSQ->local_result.power *pppm_lkg; - rt_power = rt_power + dcache.rt_power + LSQ->rt_power; + McPATComponent::computeEnergy(); - if ((coredynp.core_ty==OOO) && (XML->sys.core[ithCore].load_buffer_size >0)) - { - LoadQ->rt_power = LoadQ->power_t + LoadQ->local_result.power *pppm_lkg; - rt_power = rt_power + LoadQ->rt_power; - } + if (rfu) { + rfu->computeEnergy(); + output_data += rfu->output_data; + } + if (scheu) { + scheu->computeEnergy(); + output_data += scheu->output_data; + } + if (fp_u) { + fp_u->computeEnergy(); + output_data += fp_u->output_data; + } + if (exeu) { + exeu->computeEnergy(); + output_data += exeu->output_data; + } + if (mul) { + mul->computeEnergy(); + output_data += mul->output_data; } } +void EXECU::displayData(uint32_t indent, int plevel) { + if (!exist) return; -void LoadStoreU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - - if (is_tdp) - { - cout << indent_str << "Data Cache:" << endl; - cout << indent_str_next << "Area = " << dcache.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << dcache.power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? dcache.power.readOp.longer_channel_leakage:dcache.power.readOp.leakage )<<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << dcache.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << dcache.rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <sys.core[ithCore].load_buffer_size >0) - { - cout << indent_str << "LoadQ:" << endl; - cout << indent_str_next << "Area = " << LoadQ->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << LoadQ->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? LoadQ->power.readOp.longer_channel_leakage:LoadQ->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << LoadQ->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << LoadQ->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <displayData(indent + 4, plevel); + if (scheu) { + scheu->displayData(indent + 4, plevel); + } + exeu->displayData(indent + 4, plevel); + if (core_params.num_fpus > 0) { + fp_u->displayData(indent + 4, plevel); + } + if (core_params.num_muls > 0) { + mul->displayData(indent + 4, plevel); + } } -void MemManU::computeEnergy(bool is_tdp) -{ - - if (!exist) return; - if (is_tdp) - { - //init stats for Peak - itlb->stats_t.readAc.access = itlb->l_ip.num_search_ports; - itlb->stats_t.readAc.miss = 0; - itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; - itlb->tdp_stats = itlb->stats_t; - - dtlb->stats_t.readAc.access = dtlb->l_ip.num_search_ports*coredynp.LSU_duty_cycle; - dtlb->stats_t.readAc.miss = 0; - dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; - dtlb->tdp_stats = dtlb->stats_t; - } - else - { - //init stats for Runtime Dynamic (RTP) - itlb->stats_t.readAc.access = XML->sys.core[ithCore].itlb.total_accesses; - itlb->stats_t.readAc.miss = XML->sys.core[ithCore].itlb.total_misses; - itlb->stats_t.readAc.hit = itlb->stats_t.readAc.access - itlb->stats_t.readAc.miss; - itlb->rtp_stats = itlb->stats_t; - - dtlb->stats_t.readAc.access = XML->sys.core[ithCore].dtlb.total_accesses; - dtlb->stats_t.readAc.miss = XML->sys.core[ithCore].dtlb.total_misses; - dtlb->stats_t.readAc.hit = dtlb->stats_t.readAc.access - dtlb->stats_t.readAc.miss; - dtlb->rtp_stats = dtlb->stats_t; +void Core::computeEnergy() { + ifu->computeEnergy(); + lsu->computeEnergy(); + mmu->computeEnergy(); + exu->computeEnergy(); + if (core_params.core_ty == OOO) { + rnu->computeEnergy(); } - itlb->power_t.reset(); - dtlb->power_t.reset(); - itlb->power_t.readOp.dynamic += itlb->stats_t.readAc.access*itlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits - +itlb->stats_t.readAc.miss*itlb->local_result.power.writeOp.dynamic; - dtlb->power_t.readOp.dynamic += dtlb->stats_t.readAc.access*dtlb->local_result.power.searchOp.dynamic//FA spent most power in tag, so use total access not hits - +dtlb->stats_t.readAc.miss*dtlb->local_result.power.writeOp.dynamic; - - if (is_tdp) - { - itlb->power = itlb->power_t + itlb->local_result.power *pppm_lkg; - dtlb->power = dtlb->power_t + dtlb->local_result.power *pppm_lkg; - power = power + itlb->power + dtlb->power; - } - else - { - itlb->rt_power = itlb->power_t + itlb->local_result.power *pppm_lkg; - dtlb->rt_power = dtlb->power_t + dtlb->local_result.power *pppm_lkg; - rt_power = rt_power + itlb->rt_power + dtlb->rt_power; - } + output_data.reset(); + if (ifu) { + output_data += ifu->output_data; + } + if (lsu) { + output_data += lsu->output_data; + } + if (mmu) { + output_data += mmu->output_data; + } + if (exu) { + output_data += exu->output_data; + } + if (rnu) { + output_data += rnu->output_data; + } + if (corepipe) { + output_data += corepipe->output_data; + } + if (undiffCore) { + output_data += undiffCore->output_data; + } + if (l2cache) { + output_data += l2cache->output_data; + } } -void MemManU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - - - - if (is_tdp) - { - cout << indent_str << "Itlb:" << endl; - cout << indent_str_next << "Area = " << itlb->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << itlb->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? itlb->power.readOp.longer_channel_leakage:itlb->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << itlb->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << itlb->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <stats_t.readAc.access = coredynp.issueW*2*(coredynp.ALU_duty_cycle*1.1+ - (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines; - IRF->stats_t.writeAc.access = coredynp.issueW*(coredynp.ALU_duty_cycle*1.1+ - (coredynp.num_muls>0?coredynp.MUL_duty_cycle:0))*coredynp.num_pipelines; - //Rule of Thumb: about 10% RF related instructions do not need to access ALUs - IRF->tdp_stats = IRF->stats_t; - - FRF->stats_t.readAc.access = FRF->l_ip.num_rd_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines; - FRF->stats_t.writeAc.access = FRF->l_ip.num_wr_ports*coredynp.FPU_duty_cycle*1.05*coredynp.num_fp_pipelines; - FRF->tdp_stats = FRF->stats_t; - if (coredynp.regWindowing) - { - RFWIN->stats_t.readAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports; - RFWIN->stats_t.writeAc.access = 0;//0.5*RFWIN->l_ip.num_rw_ports; - RFWIN->tdp_stats = RFWIN->stats_t; - } - } - else - { - //init stats for Runtime Dynamic (RTP) - IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads;//TODO: no diff on archi and phy - IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes; - IRF->rtp_stats = IRF->stats_t; - - FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads; - FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes; - FRF->rtp_stats = FRF->stats_t; - if (coredynp.regWindowing) - { - RFWIN->stats_t.readAc.access = XML->sys.core[ithCore].function_calls*16; - RFWIN->stats_t.writeAc.access = XML->sys.core[ithCore].function_calls*16; - RFWIN->rtp_stats = RFWIN->stats_t; - - IRF->stats_t.readAc.access = XML->sys.core[ithCore].int_regfile_reads + - XML->sys.core[ithCore].function_calls*16; - IRF->stats_t.writeAc.access = XML->sys.core[ithCore].int_regfile_writes + - XML->sys.core[ithCore].function_calls*16; - IRF->rtp_stats = IRF->stats_t; - - FRF->stats_t.readAc.access = XML->sys.core[ithCore].float_regfile_reads + - XML->sys.core[ithCore].function_calls*16;; - FRF->stats_t.writeAc.access = XML->sys.core[ithCore].float_regfile_writes+ - XML->sys.core[ithCore].function_calls*16;; - FRF->rtp_stats = FRF->stats_t; - } + if (!exist) return; + if (IB) { + delete IB; + IB = NULL; } - IRF->power_t.reset(); - FRF->power_t.reset(); - IRF->power_t.readOp.dynamic += (IRF->stats_t.readAc.access*IRF->local_result.power.readOp.dynamic - +IRF->stats_t.writeAc.access*IRF->local_result.power.writeOp.dynamic); - FRF->power_t.readOp.dynamic += (FRF->stats_t.readAc.access*FRF->local_result.power.readOp.dynamic - +FRF->stats_t.writeAc.access*FRF->local_result.power.writeOp.dynamic); - if (coredynp.regWindowing) - { - RFWIN->power_t.reset(); - RFWIN->power_t.readOp.dynamic += (RFWIN->stats_t.readAc.access*RFWIN->local_result.power.readOp.dynamic + - RFWIN->stats_t.writeAc.access*RFWIN->local_result.power.writeOp.dynamic); - } - - if (is_tdp) - { - IRF->power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread; - FRF->power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread; - power = power + (IRF->power + FRF->power); - if (coredynp.regWindowing) - { - RFWIN->power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg; - power = power + RFWIN->power; - } + if (ID_inst) { + delete ID_inst; + ID_inst = NULL; + } + if (ID_operand) { + delete ID_operand; + ID_operand = NULL; + } + if (ID_misc) { + delete ID_misc; + ID_misc = NULL; + } + if (core_params.predictionW > 0) { + if (BTB) { + delete BTB; + BTB = NULL; } - else - { - IRF->rt_power = IRF->power_t + IRF->local_result.power *coredynp.pppm_lkg_multhread; - FRF->rt_power = FRF->power_t + FRF->local_result.power *coredynp.pppm_lkg_multhread; - rt_power = rt_power + (IRF->power_t + FRF->power_t); - if (coredynp.regWindowing) - { - RFWIN->rt_power = RFWIN->power_t + RFWIN->local_result.power *pppm_lkg; - rt_power = rt_power + RFWIN->rt_power; - } + if (BPT) { + delete BPT; + BPT = NULL; } + } + if (icache) { + delete icache; + } } +BranchPredictor ::~BranchPredictor() { -void RegFU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - if (is_tdp) - { cout << indent_str << "Integer RF:" << endl; - cout << indent_str_next << "Area = " << IRF->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << IRF->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? IRF->power.readOp.longer_channel_leakage:IRF->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << IRF->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << IRF->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <power.reset(); -// rfu->rt_power.reset(); -// scheu->power.reset(); -// scheu->rt_power.reset(); -// exeu->power.reset(); -// exeu->rt_power.reset(); - - rfu->computeEnergy(is_tdp); - scheu->computeEnergy(is_tdp); - exeu->computeEnergy(is_tdp); - if (coredynp.num_fpus >0) - { - fp_u->computeEnergy(is_tdp); - } - if (coredynp.num_muls >0) - { - mul->computeEnergy(is_tdp); - } - - if (is_tdp) - { - set_pppm(pppm_t, 2*coredynp.ALU_cdb_duty_cycle, 2, 2, 2*coredynp.ALU_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction. - bypass.power = bypass.power + intTagBypass->power*pppm_t + int_bypass->power*pppm_t; - if (coredynp.num_muls >0) - { - set_pppm(pppm_t, 2*coredynp.MUL_cdb_duty_cycle, 2, 2, 2*coredynp.MUL_cdb_duty_cycle);//2 means two source operands needs to be passed for each int instruction. - bypass.power = bypass.power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t; - power = power + mul->power; - } - if (coredynp.num_fpus>0) - { - set_pppm(pppm_t, 3*coredynp.FPU_cdb_duty_cycle, 3, 3, 3*coredynp.FPU_cdb_duty_cycle);//3 means three source operands needs to be passed for each fp instruction. - bypass.power = bypass.power + fp_bypass->power*pppm_t + fpTagBypass->power*pppm_t ; - power = power + fp_u->power; - } - - power = power + rfu->power + exeu->power + bypass.power + scheu->power; - } - else - { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_alu_accesses, 2, 2, XML->sys.core[ithCore].cdb_alu_accesses); - bypass.rt_power = bypass.rt_power + intTagBypass->power*pppm_t; - bypass.rt_power = bypass.rt_power + int_bypass->power*pppm_t; - - if (coredynp.num_muls >0) - { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_mul_accesses, 2, 2, XML->sys.core[ithCore].cdb_mul_accesses);//2 means two source operands needs to be passed for each int instruction. - bypass.rt_power = bypass.rt_power + intTag_mul_Bypass->power*pppm_t + int_mul_bypass->power*pppm_t; - rt_power = rt_power + mul->rt_power; - } - - if (coredynp.num_fpus>0) - { - set_pppm(pppm_t, XML->sys.core[ithCore].cdb_fpu_accesses, 3, 3, XML->sys.core[ithCore].cdb_fpu_accesses); - bypass.rt_power = bypass.rt_power + fp_bypass->power*pppm_t; - bypass.rt_power = bypass.rt_power + fpTagBypass->power*pppm_t; - rt_power = rt_power + fp_u->rt_power; - } - rt_power = rt_power + rfu->rt_power + exeu->rt_power + bypass.rt_power + scheu->rt_power; - } + if (!exist) return; + if (iFRAT) { + delete iFRAT; + iFRAT = NULL; + } + if (fFRAT) { + delete fFRAT; + fFRAT = NULL; + } + if (iRRAT) { + delete iRRAT; + iRRAT = NULL; + } + if (iFRAT) { + delete iFRAT; + iFRAT = NULL; + } + if (ifreeL) { + delete ifreeL; + ifreeL = NULL; + } + if (ffreeL) { + delete ffreeL; + ffreeL = NULL; + } + if (idcl) { + delete idcl; + idcl = NULL; + } + if (fdcl) { + delete fdcl; + fdcl = NULL; + } + if (RAHT) { + delete RAHT; + RAHT = NULL; + } } -void EXECU::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - if (!exist) return; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - -// cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl; - if (is_tdp) - { - cout << indent_str << "Register Files:" << endl; - cout << indent_str_next << "Area = " << rfu->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << rfu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? rfu->power.readOp.longer_channel_leakage:rfu->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << rfu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << rfu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <3){ - rfu->displayEnergy(indent+4,is_tdp); - } - cout << indent_str << "Instruction Scheduler:" << endl; - cout << indent_str_next << "Area = " << scheu->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << scheu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? scheu->power.readOp.longer_channel_leakage:scheu->power.readOp.leakage) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << scheu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << scheu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <3){ - scheu->displayEnergy(indent+4,is_tdp); - } - exeu->displayEnergy(indent,is_tdp); - if (coredynp.num_fpus>0) - { - fp_u->displayEnergy(indent,is_tdp); - } - if (coredynp.num_muls >0) - { - mul->displayEnergy(indent,is_tdp); - } - cout << indent_str << "Results Broadcast Bus:" << endl; - cout << indent_str_next << "Area Overhead = " << bypass.area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << bypass.power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? bypass.power.readOp.longer_channel_leakage:bypass.power.readOp.leakage ) << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << bypass.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << bypass.rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <computeEnergy(is_tdp); - lsu->computeEnergy(is_tdp); - mmu->computeEnergy(is_tdp); - exu->computeEnergy(is_tdp); - - if (coredynp.core_ty==OOO) - { - num_units = 5.0; - rnu->computeEnergy(is_tdp); - set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - if (rnu->exist) - { - rnu->power = rnu->power + corepipe->power*pppm_t; - power = power + rnu->power; - } - } - - if (ifu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.IFU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); -// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; - ifu->power = ifu->power + corepipe->power*pppm_t; -// cout << "IFU = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; -// cout << "1/4 pipe = " << corepipe->power.readOp.dynamic*clockRate/num_units << " W" << endl; - power = power + ifu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } - if (lsu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.LSU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - lsu->power = lsu->power + corepipe->power*pppm_t; -// cout << "LSU = " << lsu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + lsu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } - if (exu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*coredynp.ALU_duty_cycle, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - exu->power = exu->power + corepipe->power*pppm_t; -// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + exu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } - if (mmu->exist) - { - set_pppm(pppm_t, coredynp.num_pipelines/num_units*(0.5+0.5*coredynp.LSU_duty_cycle), coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - mmu->power = mmu->power + corepipe->power*pppm_t; -// cout << "MMU = " << mmu->power.readOp.dynamic*clockRate << " W" << endl; - power = power + mmu->power; -// cout << "core = " << power.readOp.dynamic*clockRate << " W" << endl; - } +MemManU ::~MemManU() { - power = power + undiffCore->power; + if (!exist) return; + if (itlb) { + delete itlb; + itlb = NULL; + } + if (dtlb) { + delete dtlb; + dtlb = NULL; + } +} - if (XML->sys.Private_L2) - { +RegFU ::~RegFU() { - l2cache->computeEnergy(is_tdp); - set_pppm(pppm_t,l2cache->cachep.clockRate/clockRate, 1,1,1); - //l2cache->power = l2cache->power*pppm_t; - power = power + l2cache->power*pppm_t; - } - } - else - { - ifu->computeEnergy(is_tdp); - lsu->computeEnergy(is_tdp); - mmu->computeEnergy(is_tdp); - exu->computeEnergy(is_tdp); - if (coredynp.core_ty==OOO) - { - num_units = 5.0; - rnu->computeEnergy(is_tdp); - set_pppm(pppm_t, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - if (rnu->exist) - { - rnu->rt_power = rnu->rt_power + corepipe->power*pppm_t; - - rt_power = rt_power + rnu->rt_power; - } - } - else - { - if (XML->sys.homogeneous_cores==1) - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * XML->sys.total_cycles * XML->sys.number_of_cores; - } - else - { - rtp_pipeline_coe = coredynp.pipeline_duty_cycle * coredynp.total_cycles; - } - set_pppm(pppm_t, coredynp.num_pipelines*rtp_pipeline_coe/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units, coredynp.num_pipelines/num_units); - } + if (!exist) return; + if (IRF) { + delete IRF; + IRF = NULL; + } + if (FRF) { + delete FRF; + FRF = NULL; + } + if (RFWIN) { + delete RFWIN; + RFWIN = NULL; + } +} - if (ifu->exist) - { - ifu->rt_power = ifu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + ifu->rt_power ; - } - if (lsu->exist) - { - lsu->rt_power = lsu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + lsu->rt_power; - } - if (exu->exist) - { - exu->rt_power = exu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + exu->rt_power; - } - if (mmu->exist) - { - mmu->rt_power = mmu->rt_power + corepipe->power*pppm_t; - rt_power = rt_power + mmu->rt_power ; - } +SchedulerU ::~SchedulerU() { - rt_power = rt_power + undiffCore->power; -// cout << "EXE = " << exu->power.readOp.dynamic*clockRate << " W" << endl; - if (XML->sys.Private_L2) - { + if (!exist) return; + if (int_inst_window) { + delete int_inst_window; + int_inst_window = NULL; + } + if (fp_inst_window) { + delete int_inst_window; + int_inst_window = NULL; + } + if (ROB) { + delete ROB; + ROB = NULL; + } + if (int_instruction_selection) { + delete int_instruction_selection; + int_instruction_selection = NULL; + } + if (fp_instruction_selection) { + delete fp_instruction_selection; + fp_instruction_selection = NULL; + } +} - l2cache->computeEnergy(is_tdp); - //set_pppm(pppm_t,1/l2cache->cachep.executionTime, 1,1,1); - //l2cache->rt_power = l2cache->rt_power*pppm_t; - rt_power = rt_power + l2cache->rt_power; - } - } +EXECU ::~EXECU() { + if (!exist) return; + if (int_bypass) { + delete int_bypass; + int_bypass = NULL; + } + if (intTagBypass) { + delete intTagBypass; + intTagBypass = NULL; + } + if (int_mul_bypass) { + delete int_mul_bypass; + int_mul_bypass = NULL; + } + if (intTag_mul_Bypass) { + delete intTag_mul_Bypass; + intTag_mul_Bypass = NULL; + } + if (fp_bypass) { + delete fp_bypass; + fp_bypass = NULL; + } + if (fpTagBypass) { + delete fpTagBypass; + fpTagBypass = NULL; + } + if (fp_u) { + delete fp_u; + fp_u = NULL; + } + if (exeu) { + delete exeu; + exeu = NULL; + } + if (mul) { + delete mul; + mul = NULL; + } + if (rfu) { + delete rfu; + rfu = NULL; + } + if (scheu) { + delete scheu; + scheu = NULL; + } } -void Core::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - if (is_tdp) - { - cout << "Core:" << endl; - cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - //cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl; - cout<exist) - { - cout << indent_str << "Instruction Fetch Unit:" << endl; - cout << indent_str_next << "Area = " << ifu->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << ifu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? ifu->power.readOp.longer_channel_leakage:ifu->power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << ifu->power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << ifu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << ifu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - ifu->displayEnergy(indent+4,plevel,is_tdp); - } - } - if (coredynp.core_ty==OOO) - { - if (rnu->exist) - { - cout << indent_str<< "Renaming Unit:" << endl; - cout << indent_str_next << "Area = " << rnu->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << rnu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? rnu->power.readOp.longer_channel_leakage:rnu->power.readOp.leakage) << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << rnu->power.readOp.longer_channel_leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << rnu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << rnu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - rnu->displayEnergy(indent+4,plevel,is_tdp); - } - } +Core::~Core() { - } - if (lsu->exist) - { - cout << indent_str<< "Load Store Unit:" << endl; - cout << indent_str_next << "Area = " << lsu->area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << lsu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? lsu->power.readOp.longer_channel_leakage:lsu->power.readOp.leakage ) << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << lsu->power.readOp.longer_channel_leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << lsu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << lsu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - lsu->displayEnergy(indent+4,plevel,is_tdp); - } - } - if (mmu->exist) - { - cout << indent_str<< "Memory Management Unit:" << endl; - cout << indent_str_next << "Area = " << mmu->area.get_area() *1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << mmu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? mmu->power.readOp.longer_channel_leakage:mmu->power.readOp.leakage) << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << mmu->power.readOp.longer_channel_leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << mmu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << mmu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - mmu->displayEnergy(indent+4,plevel,is_tdp); - } - } - if (exu->exist) - { - cout << indent_str<< "Execution Unit:" << endl; - cout << indent_str_next << "Area = " << exu->area.get_area() *1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << exu->power.readOp.dynamic*clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? exu->power.readOp.longer_channel_leakage:exu->power.readOp.leakage) << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << exu->power.readOp.longer_channel_leakage << " W" << endl; - cout << indent_str_next << "Gate Leakage = " << exu->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << exu->rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <2){ - exu->displayEnergy(indent+4,plevel,is_tdp); - } - } -// if (plevel >2) -// { -// if (undiffCore->exist) -// { -// cout << indent_str << "Undifferentiated Core" << endl; -// cout << indent_str_next << "Area = " << undiffCore->area.get_area()*1e-6<< " mm^2" << endl; -// cout << indent_str_next << "Peak Dynamic = " << undiffCore->power.readOp.dynamic*clockRate << " W" << endl; -//// cout << indent_str_next << "Subthreshold Leakage = " << undiffCore->power.readOp.leakage <<" W" << endl; -// cout << indent_str_next << "Subthreshold Leakage = " -// << (long_channel? undiffCore->power.readOp.longer_channel_leakage:undiffCore->power.readOp.leakage) << " W" << endl; -// cout << indent_str_next << "Gate Leakage = " << undiffCore->power.readOp.gate_leakage << " W" << endl; -// // cout << indent_str_next << "Runtime Dynamic = " << undiffCore->rt_power.readOp.dynamic/executionTime << " W" << endl; -// cout <sys.Private_L2) - { - - l2cache->displayEnergy(4,is_tdp); - } - - } - else - { -// cout << indent_str_next << "Instruction Fetch Unit Peak Dynamic = " << ifu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Instruction Fetch Unit Subthreshold Leakage = " << ifu->rt_power.readOp.leakage <<" W" << endl; -// cout << indent_str_next << "Instruction Fetch Unit Gate Leakage = " << ifu->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Load Store Unit Peak Dynamic = " << lsu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Load Store Unit Subthreshold Leakage = " << lsu->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Load Store Unit Gate Leakage = " << lsu->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Memory Management Unit Peak Dynamic = " << mmu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Memory Management Unit Subthreshold Leakage = " << mmu->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Memory Management Unit Gate Leakage = " << mmu->rt_power.readOp.gate_leakage << " W" << endl; -// cout << indent_str_next << "Execution Unit Peak Dynamic = " << exu->rt_power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Execution Unit Subthreshold Leakage = " << exu->rt_power.readOp.leakage << " W" << endl; -// cout << indent_str_next << "Execution Unit Gate Leakage = " << exu->rt_power.readOp.gate_leakage << " W" << endl; - } -} -InstFetchU ::~InstFetchU(){ - - if (!exist) return; - if(IB) {delete IB; IB = 0;} - if(ID_inst) {delete ID_inst; ID_inst = 0;} - if(ID_operand) {delete ID_operand; ID_operand = 0;} - if(ID_misc) {delete ID_misc; ID_misc = 0;} - if (coredynp.predictionW>0) - { - if(BTB) {delete BTB; BTB = 0;} - if(BPT) {delete BPT; BPT = 0;} - } + if (ifu) { + delete ifu; + ifu = NULL; + } + if (lsu) { + delete lsu; + lsu = NULL; + } + if (rnu) { + delete rnu; + rnu = NULL; + } + if (mmu) { + delete mmu; + mmu = NULL; + } + if (exu) { + delete exu; + exu = NULL; + } + if (corepipe) { + delete corepipe; + corepipe = NULL; + } + if (undiffCore) { + delete undiffCore; + undiffCore = NULL; + } + if (l2cache) { + delete l2cache; + l2cache = NULL; + } } -BranchPredictor ::~BranchPredictor(){ +void Core::initialize_params() { + memset(&core_params, 0, sizeof(CoreParameters)); + core_params.peak_issueW = -1; + core_params.peak_commitW = -1; +} - if (!exist) return; - if(globalBPT) {delete globalBPT; globalBPT = 0;} - if(localBPT) {delete localBPT; localBPT = 0;} - if(L1_localBPT) {delete L1_localBPT; L1_localBPT = 0;} - if(L2_localBPT) {delete L2_localBPT; L2_localBPT = 0;} - if(chooser) {delete chooser; chooser = 0;} - if(RAS) {delete RAS; RAS = 0;} - } +void Core::initialize_stats() { + memset(&core_stats, 0, sizeof(CoreStatistics)); + core_stats.IFU_duty_cycle = 1.0; + core_stats.ALU_duty_cycle = 1.0; + core_stats.FPU_duty_cycle = 1.0; + core_stats.MUL_duty_cycle = 1.0; + core_stats.ALU_cdb_duty_cycle = 1.0; + core_stats.FPU_cdb_duty_cycle = 1.0; + core_stats.MUL_cdb_duty_cycle = 1.0; + core_stats.pipeline_duty_cycle = 1.0; + core_stats.IFU_duty_cycle = 1.0; + core_stats.LSU_duty_cycle = 1.0; + core_stats.MemManU_D_duty_cycle = 1.0; + core_stats.MemManU_I_duty_cycle = 1.0; +} -RENAMINGU ::~RENAMINGU(){ - - if (!exist) return; - if(iFRAT ) {delete iFRAT; iFRAT = 0;} - if(fFRAT ) {delete fFRAT; fFRAT =0;} - if(iRRAT) {delete iRRAT; iRRAT = 0;} - if(iFRAT) {delete iFRAT; iFRAT = 0;} - if(ifreeL) {delete ifreeL;ifreeL= 0;} - if(ffreeL) {delete ffreeL;ffreeL= 0;} - if(idcl) {delete idcl; idcl = 0;} - if(fdcl) {delete fdcl; fdcl = 0;} - if(RAHT) {delete RAHT; RAHT = 0;} +void Core::set_core_param() { + initialize_params(); + initialize_stats(); + + int num_children = xml_data->nChildNode("param"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_STR_IF("name", name); + ASSIGN_INT_IF("opt_local", core_params.opt_local); + ASSIGN_FP_IF("clock_rate", core_params.clockRate); + ASSIGN_INT_IF("instruction_length", core_params.instruction_length); + ASSIGN_INT_IF("opcode_width", core_params.opcode_width); + ASSIGN_INT_IF("x86", core_params.x86); + ASSIGN_INT_IF("Embedded", core_params.Embedded); + ASSIGN_ENUM_IF("machine_type", core_params.core_ty, Core_type); + ASSIGN_INT_IF("micro_opcode_width", core_params.micro_opcode_length); + ASSIGN_INT_IF("number_hardware_threads", core_params.num_hthreads); + ASSIGN_INT_IF("fetch_width", core_params.fetchW); + ASSIGN_INT_IF("decode_width", core_params.decodeW); + ASSIGN_INT_IF("issue_width", core_params.issueW); + ASSIGN_INT_IF("peak_issue_width", core_params.peak_issueW); + ASSIGN_INT_IF("commit_width", core_params.commitW); + ASSIGN_INT_IF("prediction_width", core_params.predictionW); + ASSIGN_INT_IF("ALU_per_core", core_params.num_alus); + ASSIGN_INT_IF("FPU_per_core", core_params.num_fpus); + ASSIGN_INT_IF("MUL_per_core", core_params.num_muls); + ASSIGN_INT_IF("fp_issue_width", core_params.fp_issueW); + ASSIGN_ENUM_IF("instruction_window_scheme", core_params.scheu_ty, + Scheduler_type); + ASSIGN_ENUM_IF("rename_scheme", core_params.rm_ty, Renaming_type); + ASSIGN_INT_IF("archi_Regs_IRF_size", core_params.archi_Regs_IRF_size); + ASSIGN_INT_IF("archi_Regs_FRF_size", core_params.archi_Regs_FRF_size); + ASSIGN_INT_IF("ROB_size", core_params.ROB_size); + ASSIGN_INT_IF("ROB_assoc", core_params.ROB_assoc); + ASSIGN_INT_IF("ROB_nbanks", core_params.ROB_nbanks); + ASSIGN_INT_IF("ROB_tag_width", core_params.ROB_tag_width); + ASSIGN_INT_IF("scheduler_assoc", core_params.scheduler_assoc); + ASSIGN_INT_IF("scheduler_nbanks", core_params.scheduler_nbanks); + ASSIGN_INT_IF("register_window_size", + core_params.register_window_size); + ASSIGN_INT_IF("register_window_throughput", + core_params.register_window_throughput); + ASSIGN_INT_IF("register_window_latency", + core_params.register_window_latency); + ASSIGN_INT_IF("register_window_assoc", + core_params.register_window_assoc); + ASSIGN_INT_IF("register_window_nbanks", + core_params.register_window_nbanks); + ASSIGN_INT_IF("register_window_tag_width", + core_params.register_window_tag_width); + ASSIGN_INT_IF("register_window_rw_ports", + core_params.register_window_rw_ports); + ASSIGN_INT_IF("phy_Regs_IRF_size", core_params.phy_Regs_IRF_size); + ASSIGN_INT_IF("phy_Regs_IRF_assoc", core_params.phy_Regs_IRF_assoc); + ASSIGN_INT_IF("phy_Regs_IRF_nbanks", core_params.phy_Regs_IRF_nbanks); + ASSIGN_INT_IF("phy_Regs_IRF_tag_width", + core_params.phy_Regs_IRF_tag_width); + ASSIGN_INT_IF("phy_Regs_IRF_rd_ports", + core_params.phy_Regs_IRF_rd_ports); + ASSIGN_INT_IF("phy_Regs_IRF_wr_ports", + core_params.phy_Regs_IRF_wr_ports); + ASSIGN_INT_IF("phy_Regs_FRF_size", core_params.phy_Regs_FRF_size); + ASSIGN_INT_IF("phy_Regs_FRF_assoc", core_params.phy_Regs_FRF_assoc); + ASSIGN_INT_IF("phy_Regs_FRF_nbanks", core_params.phy_Regs_FRF_nbanks); + ASSIGN_INT_IF("phy_Regs_FRF_tag_width", + core_params.phy_Regs_FRF_tag_width); + ASSIGN_INT_IF("phy_Regs_FRF_rd_ports", + core_params.phy_Regs_FRF_rd_ports); + ASSIGN_INT_IF("phy_Regs_FRF_wr_ports", + core_params.phy_Regs_FRF_wr_ports); + ASSIGN_INT_IF("front_rat_nbanks", core_params.front_rat_nbanks); + ASSIGN_INT_IF("front_rat_rw_ports", core_params.front_rat_rw_ports); + ASSIGN_INT_IF("retire_rat_nbanks", core_params.retire_rat_nbanks); + ASSIGN_INT_IF("retire_rat_rw_ports", core_params.retire_rat_rw_ports); + ASSIGN_INT_IF("freelist_nbanks", core_params.freelist_nbanks); + ASSIGN_INT_IF("freelist_rw_ports", core_params.freelist_rw_ports); + ASSIGN_INT_IF("memory_ports", core_params.memory_ports); + ASSIGN_INT_IF("load_buffer_size", core_params.load_buffer_size); + ASSIGN_INT_IF("load_buffer_assoc", core_params.load_buffer_assoc); + ASSIGN_INT_IF("load_buffer_nbanks", core_params.load_buffer_nbanks); + ASSIGN_INT_IF("store_buffer_size", core_params.store_buffer_size); + ASSIGN_INT_IF("store_buffer_assoc", core_params.store_buffer_assoc); + ASSIGN_INT_IF("store_buffer_nbanks", core_params.store_buffer_nbanks); + ASSIGN_INT_IF("instruction_window_size", + core_params.instruction_window_size); + ASSIGN_INT_IF("fp_instruction_window_size", + core_params.fp_instruction_window_size); + ASSIGN_INT_IF("instruction_buffer_size", + core_params.instruction_buffer_size); + ASSIGN_INT_IF("instruction_buffer_assoc", + core_params.instruction_buffer_assoc); + ASSIGN_INT_IF("instruction_buffer_nbanks", + core_params.instruction_buffer_nbanks); + ASSIGN_INT_IF("instruction_buffer_tag_width", + core_params.instruction_buffer_tag_width); + ASSIGN_INT_IF("number_instruction_fetch_ports", + core_params.number_instruction_fetch_ports); + ASSIGN_INT_IF("RAS_size", core_params.RAS_size); + ASSIGN_ENUM_IF("execu_broadcast_wt", core_params.execu_broadcast_wt, + Wire_type); + ASSIGN_INT_IF("execu_wire_mat_type", core_params.execu_wire_mat_type); + ASSIGN_INT_IF("execu_int_bypass_ports", + core_params.execu_int_bypass_ports); + ASSIGN_INT_IF("execu_mul_bypass_ports", + core_params.execu_mul_bypass_ports); + ASSIGN_INT_IF("execu_fp_bypass_ports", + core_params.execu_fp_bypass_ports); + ASSIGN_ENUM_IF("execu_bypass_wire_type", + core_params.execu_bypass_wire_type, Wire_type); + ASSIGN_FP_IF("execu_bypass_base_width", + core_params.execu_bypass_base_width); + ASSIGN_FP_IF("execu_bypass_base_height", + core_params.execu_bypass_base_height); + ASSIGN_INT_IF("execu_bypass_start_wiring_level", + core_params.execu_bypass_start_wiring_level); + ASSIGN_FP_IF("execu_bypass_route_over_perc", + core_params.execu_bypass_route_over_perc); + ASSIGN_FP_IF("broadcast_numerator", core_params.broadcast_numerator); + ASSIGN_INT_IF("int_pipeline_depth", core_params.pipeline_stages); + ASSIGN_INT_IF("fp_pipeline_depth", core_params.fp_pipeline_stages); + ASSIGN_INT_IF("int_pipelines", core_params.num_pipelines); + ASSIGN_INT_IF("fp_pipelines", core_params.num_fp_pipelines); + ASSIGN_INT_IF("globalCheckpoint", core_params.globalCheckpoint); + ASSIGN_INT_IF("perThreadState", core_params.perThreadState); + ASSIGN_INT_IF("instruction_length", core_params.instruction_length); + + else { + warnUnrecognizedParam(node_name); } + } -LoadStoreU ::~LoadStoreU(){ - - if (!exist) return; - if(LSQ) {delete LSQ; LSQ = 0;} + // Change from MHz to Hz + core_params.clockRate *= 1e6; + clockRate = core_params.clockRate; + + core_params.peak_commitW = core_params.peak_issueW; + core_params.fp_decodeW = core_params.fp_issueW; + + + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("ALU_duty_cycle", core_stats.ALU_duty_cycle); + ASSIGN_FP_IF("FPU_duty_cycle", core_stats.FPU_duty_cycle); + ASSIGN_FP_IF("MUL_duty_cycle", core_stats.MUL_duty_cycle); + ASSIGN_FP_IF("ALU_cdb_duty_cycle", core_stats.ALU_cdb_duty_cycle); + ASSIGN_FP_IF("FPU_cdb_duty_cycle", core_stats.FPU_cdb_duty_cycle); + ASSIGN_FP_IF("MUL_cdb_duty_cycle", core_stats.MUL_cdb_duty_cycle); + ASSIGN_FP_IF("pipeline_duty_cycle", core_stats.pipeline_duty_cycle); + ASSIGN_FP_IF("total_cycles", core_stats.total_cycles); + ASSIGN_FP_IF("busy_cycles", core_stats.busy_cycles); + ASSIGN_FP_IF("idle_cycles", core_stats.idle_cycles); + ASSIGN_FP_IF("IFU_duty_cycle", core_stats.IFU_duty_cycle); + ASSIGN_FP_IF("BR_duty_cycle", core_stats.BR_duty_cycle); + ASSIGN_FP_IF("LSU_duty_cycle", core_stats.LSU_duty_cycle); + ASSIGN_FP_IF("MemManU_D_duty_cycle", core_stats.MemManU_D_duty_cycle); + ASSIGN_FP_IF("MemManU_I_duty_cycle", core_stats.MemManU_I_duty_cycle); + ASSIGN_FP_IF("cdb_fpu_accesses", core_stats.cdb_fpu_accesses); + ASSIGN_FP_IF("cdb_alu_accesses", core_stats.cdb_alu_accesses); + ASSIGN_FP_IF("cdb_mul_accesses", core_stats.cdb_mul_accesses); + ASSIGN_FP_IF("function_calls", core_stats.function_calls); + ASSIGN_FP_IF("total_instructions", core_stats.total_instructions); + ASSIGN_FP_IF("int_instructions", core_stats.int_instructions); + ASSIGN_FP_IF("fp_instructions", core_stats.fp_instructions); + ASSIGN_FP_IF("branch_instructions", core_stats.branch_instructions); + ASSIGN_FP_IF("branch_mispredictions", + core_stats.branch_mispredictions); + ASSIGN_FP_IF("load_instructions", core_stats.load_instructions); + ASSIGN_FP_IF("store_instructions", core_stats.store_instructions); + ASSIGN_FP_IF("committed_instructions", + core_stats.committed_instructions); + ASSIGN_FP_IF("committed_int_instructions", + core_stats.committed_int_instructions); + ASSIGN_FP_IF("committed_fp_instructions", + core_stats.committed_fp_instructions); + ASSIGN_FP_IF("ROB_reads", core_stats.ROB_reads); + ASSIGN_FP_IF("ROB_writes", core_stats.ROB_writes); + ASSIGN_FP_IF("rename_reads", core_stats.rename_reads); + ASSIGN_FP_IF("rename_writes", core_stats.rename_writes); + ASSIGN_FP_IF("fp_rename_reads", core_stats.fp_rename_reads); + ASSIGN_FP_IF("fp_rename_writes", core_stats.fp_rename_writes); + ASSIGN_FP_IF("inst_window_reads", core_stats.inst_window_reads); + ASSIGN_FP_IF("inst_window_writes", core_stats.inst_window_writes); + ASSIGN_FP_IF("inst_window_wakeup_accesses", + core_stats.inst_window_wakeup_accesses); + ASSIGN_FP_IF("fp_inst_window_reads", core_stats.fp_inst_window_reads); + ASSIGN_FP_IF("fp_inst_window_writes", + core_stats.fp_inst_window_writes); + ASSIGN_FP_IF("fp_inst_window_wakeup_accesses", + core_stats.fp_inst_window_wakeup_accesses); + ASSIGN_FP_IF("int_regfile_reads", core_stats.int_regfile_reads); + ASSIGN_FP_IF("float_regfile_reads", core_stats.float_regfile_reads); + ASSIGN_FP_IF("int_regfile_writes", core_stats.int_regfile_writes); + ASSIGN_FP_IF("float_regfile_writes", core_stats.float_regfile_writes); + ASSIGN_FP_IF("context_switches", core_stats.context_switches); + ASSIGN_FP_IF("ialu_accesses", core_stats.ialu_accesses); + ASSIGN_FP_IF("fpu_accesses", core_stats.fpu_accesses); + ASSIGN_FP_IF("mul_accesses", core_stats.mul_accesses); + + else { + warnUnrecognizedStat(node_name); } + } -MemManU ::~MemManU(){ + // Initialize a few variables + core_params.multithreaded = core_params.num_hthreads > 1 ? true : false; + core_params.pc_width = virtual_address_width; + core_params.v_address_width = virtual_address_width; + core_params.p_address_width = physical_address_width; + core_params.int_data_width = int(ceil(data_path_width / 32.0)) * 32; + core_params.fp_data_width = core_params.int_data_width; + core_params.arch_ireg_width = + int(ceil(log2(core_params.archi_Regs_IRF_size))); + core_params.arch_freg_width + = int(ceil(log2(core_params.archi_Regs_FRF_size))); + core_params.num_IRF_entry = core_params.archi_Regs_IRF_size; + core_params.num_FRF_entry = core_params.archi_Regs_FRF_size; + + if (core_params.instruction_length <= 0) { + errorNonPositiveParam("instruction_length"); + } - if (!exist) return; - if(itlb) {delete itlb; itlb = 0;} - if(dtlb) {delete dtlb; dtlb = 0;} - } + if (core_params.num_hthreads <= 0) { + errorNonPositiveParam("number_hardware_threads"); + } -RegFU ::~RegFU(){ + if (core_params.opcode_width <= 0) { + errorNonPositiveParam("opcode_width"); + } - if (!exist) return; - if(IRF) {delete IRF; IRF = 0;} - if(FRF) {delete FRF; FRF = 0;} - if(RFWIN) {delete RFWIN; RFWIN = 0;} - } + if (core_params.instruction_buffer_size <= 0) { + errorNonPositiveParam("instruction_buffer_size"); + } -SchedulerU ::~SchedulerU(){ + if (core_params.number_instruction_fetch_ports <= 0) { + errorNonPositiveParam("number_instruction_fetch_ports"); + } - if (!exist) return; - if(int_inst_window) {delete int_inst_window; int_inst_window = 0;} - if(fp_inst_window) {delete int_inst_window; int_inst_window = 0;} - if(ROB) {delete ROB; ROB = 0;} - if(instruction_selection) {delete instruction_selection;instruction_selection = 0;} - } + if (core_params.peak_issueW <= 0) { + errorNonPositiveParam("peak_issue_width"); + } else { + assert(core_params.peak_commitW > 0); + } -EXECU ::~EXECU(){ - - if (!exist) return; - if(int_bypass) {delete int_bypass; int_bypass = 0;} - if(intTagBypass) {delete intTagBypass; intTagBypass =0;} - if(int_mul_bypass) {delete int_mul_bypass; int_mul_bypass = 0;} - if(intTag_mul_Bypass) {delete intTag_mul_Bypass; intTag_mul_Bypass =0;} - if(fp_bypass) {delete fp_bypass;fp_bypass = 0;} - if(fpTagBypass) {delete fpTagBypass;fpTagBypass = 0;} - if(fp_u) {delete fp_u;fp_u = 0;} - if(exeu) {delete exeu;exeu = 0;} - if(mul) {delete mul;mul = 0;} - if(rfu) {delete rfu;rfu = 0;} - if(scheu) {delete scheu; scheu = 0;} + if (core_params.core_ty == OOO) { + if (core_params.scheu_ty == PhysicalRegFile) { + core_params.phy_ireg_width = + int(ceil(log2(core_params.phy_Regs_IRF_size))); + core_params.phy_freg_width = + int(ceil(log2(core_params.phy_Regs_FRF_size))); + core_params.num_ifreelist_entries = + core_params.num_IRF_entry = core_params.phy_Regs_IRF_size; + core_params.num_ffreelist_entries = + core_params.num_FRF_entry = core_params.phy_Regs_FRF_size; + } else if (core_params.scheu_ty == ReservationStation) { + core_params.phy_ireg_width = int(ceil(log2(core_params.ROB_size))); + core_params.phy_freg_width = int(ceil(log2(core_params.ROB_size))); + core_params.num_ifreelist_entries = core_params.ROB_size; + core_params.num_ffreelist_entries = core_params.ROB_size; } + } -Core ::~Core(){ + core_params.regWindowing = + (core_params.register_window_size > 0 && + core_params.core_ty == Inorder) ? true : false; - if(ifu) {delete ifu; ifu = 0;} - if(lsu) {delete lsu; lsu = 0;} - if(rnu) {delete rnu; rnu = 0;} - if(mmu) {delete mmu; mmu = 0;} - if(exu) {delete exu; exu = 0;} - if(corepipe) {delete corepipe; corepipe = 0;} - if(undiffCore) {delete undiffCore;undiffCore = 0;} - if(l2cache) {delete l2cache;l2cache = 0;} + if (core_params.regWindowing) { + if (core_params.register_window_throughput <= 0) { + errorNonPositiveParam("register_window_throughput"); + } else if (core_params.register_window_latency <= 0) { + errorNonPositiveParam("register_window_latency"); } + } -void Core::set_core_param() -{ - coredynp.opt_local = XML->sys.core[ithCore].opt_local; - coredynp.x86 = XML->sys.core[ithCore].x86; - coredynp.Embedded = XML->sys.Embedded; - coredynp.core_ty = (enum Core_type)XML->sys.core[ithCore].machine_type; - coredynp.rm_ty = (enum Renaming_type)XML->sys.core[ithCore].rename_scheme; - coredynp.fetchW = XML->sys.core[ithCore].fetch_width; - coredynp.decodeW = XML->sys.core[ithCore].decode_width; - coredynp.issueW = XML->sys.core[ithCore].issue_width; - coredynp.peak_issueW = XML->sys.core[ithCore].peak_issue_width; - coredynp.commitW = XML->sys.core[ithCore].commit_width; - coredynp.peak_commitW = XML->sys.core[ithCore].peak_issue_width; - coredynp.predictionW = XML->sys.core[ithCore].prediction_width; - coredynp.fp_issueW = XML->sys.core[ithCore].fp_issue_width; - coredynp.fp_decodeW = XML->sys.core[ithCore].fp_issue_width; - coredynp.num_alus = XML->sys.core[ithCore].ALU_per_core; - coredynp.num_fpus = XML->sys.core[ithCore].FPU_per_core; - coredynp.num_muls = XML->sys.core[ithCore].MUL_per_core; - - - coredynp.num_hthreads = XML->sys.core[ithCore].number_hardware_threads; - coredynp.multithreaded = coredynp.num_hthreads>1? true:false; - coredynp.instruction_length = XML->sys.core[ithCore].instruction_length; - coredynp.pc_width = XML->sys.virtual_address_width; - - coredynp.opcode_length = XML->sys.core[ithCore].opcode_width; - coredynp.micro_opcode_length = XML->sys.core[ithCore].micro_opcode_width; - coredynp.num_pipelines = XML->sys.core[ithCore].pipelines_per_core[0]; - coredynp.pipeline_stages = XML->sys.core[ithCore].pipeline_depth[0]; - coredynp.num_fp_pipelines = XML->sys.core[ithCore].pipelines_per_core[1]; - coredynp.fp_pipeline_stages = XML->sys.core[ithCore].pipeline_depth[1]; - coredynp.int_data_width = int(ceil(XML->sys.machine_bits/32.0))*32; - coredynp.fp_data_width = coredynp.int_data_width; - coredynp.v_address_width = XML->sys.virtual_address_width; - coredynp.p_address_width = XML->sys.physical_address_width; - - coredynp.scheu_ty = (enum Scheduler_type)XML->sys.core[ithCore].instruction_window_scheme; - coredynp.arch_ireg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_IRF_size))); - coredynp.arch_freg_width = int(ceil(log2(XML->sys.core[ithCore].archi_Regs_FRF_size))); - coredynp.num_IRF_entry = XML->sys.core[ithCore].archi_Regs_IRF_size; - coredynp.num_FRF_entry = XML->sys.core[ithCore].archi_Regs_FRF_size; - coredynp.pipeline_duty_cycle = XML->sys.core[ithCore].pipeline_duty_cycle; - coredynp.total_cycles = XML->sys.core[ithCore].total_cycles; - coredynp.busy_cycles = XML->sys.core[ithCore].busy_cycles; - coredynp.idle_cycles = XML->sys.core[ithCore].idle_cycles; - - //Max power duty cycle for peak power estimation -// if (coredynp.core_ty==OOO) -// { -// coredynp.IFU_duty_cycle = 1; -// coredynp.LSU_duty_cycle = 1; -// coredynp.MemManU_I_duty_cycle =1; -// coredynp.MemManU_D_duty_cycle =1; -// coredynp.ALU_duty_cycle =1; -// coredynp.MUL_duty_cycle =1; -// coredynp.FPU_duty_cycle =1; -// coredynp.ALU_cdb_duty_cycle =1; -// coredynp.MUL_cdb_duty_cycle =1; -// coredynp.FPU_cdb_duty_cycle =1; -// } -// else -// { - coredynp.IFU_duty_cycle = XML->sys.core[ithCore].IFU_duty_cycle; - coredynp.BR_duty_cycle = XML->sys.core[ithCore].BR_duty_cycle; - coredynp.LSU_duty_cycle = XML->sys.core[ithCore].LSU_duty_cycle; - coredynp.MemManU_I_duty_cycle = XML->sys.core[ithCore].MemManU_I_duty_cycle; - coredynp.MemManU_D_duty_cycle = XML->sys.core[ithCore].MemManU_D_duty_cycle; - coredynp.ALU_duty_cycle = XML->sys.core[ithCore].ALU_duty_cycle; - coredynp.MUL_duty_cycle = XML->sys.core[ithCore].MUL_duty_cycle; - coredynp.FPU_duty_cycle = XML->sys.core[ithCore].FPU_duty_cycle; - coredynp.ALU_cdb_duty_cycle = XML->sys.core[ithCore].ALU_cdb_duty_cycle; - coredynp.MUL_cdb_duty_cycle = XML->sys.core[ithCore].MUL_cdb_duty_cycle; - coredynp.FPU_cdb_duty_cycle = XML->sys.core[ithCore].FPU_cdb_duty_cycle; -// } - - - if (!((coredynp.core_ty==OOO)||(coredynp.core_ty==Inorder))) - { - cout<<"Invalid Core Type"<sys.core[ithCore].phy_Regs_IRF_size))); - coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].phy_Regs_FRF_size))); - coredynp.num_ifreelist_entries = coredynp.num_IRF_entry = XML->sys.core[ithCore].phy_Regs_IRF_size; - coredynp.num_ffreelist_entries = coredynp.num_FRF_entry = XML->sys.core[ithCore].phy_Regs_FRF_size; - } - else if (coredynp.scheu_ty==ReservationStation) - {//ROB serves as Phy RF in RS based OOO - coredynp.phy_ireg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size))); - coredynp.phy_freg_width = int(ceil(log2(XML->sys.core[ithCore].ROB_size))); - coredynp.num_ifreelist_entries = XML->sys.core[ithCore].ROB_size; - coredynp.num_ffreelist_entries = XML->sys.core[ithCore].ROB_size; + if (!((core_params.scheu_ty == PhysicalRegFile) || + (core_params.scheu_ty == ReservationStation))) { + cout << "Invalid OOO Scheduler Type" << endl; + exit(0); + } - } + if (!((core_params.rm_ty == RAMbased) || + (core_params.rm_ty == CAMbased))) { + cout << "Invalid OOO Renaming Type" << endl; + exit(0); + } -} - coredynp.globalCheckpoint = 32;//best check pointing entries for a 4~8 issue OOO should be 16~48;See TR for reference. - coredynp.perThreadState = 8; - coredynp.instruction_length = 32; - coredynp.clockRate = XML->sys.core[ithCore].clock_rate; - coredynp.clockRate *= 1e6; - coredynp.regWindowing= (XML->sys.core[ithCore].register_windows_size>0&&coredynp.core_ty==Inorder)?true:false; - coredynp.executionTime = XML->sys.total_cycles/coredynp.clockRate; - set_pppm(coredynp.pppm_lkg_multhread, 0, coredynp.num_hthreads, coredynp.num_hthreads, 0); } diff --git a/ext/mcpat/core.h b/ext/mcpat/core.h index 8ef3babdd..206fe6d58 100644 --- a/ext/mcpat/core.h +++ b/ext/mcpat/core.h @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -33,230 +34,305 @@ #ifndef CORE_H_ #define CORE_H_ -#include "XML_Parse.h" #include "array.h" #include "basic_components.h" +#include "cacheunit.h" #include "interconnect.h" #include "logic.h" #include "parameter.h" -#include "sharedcache.h" - -class BranchPredictor :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - ArrayST * globalBPT; - ArrayST * localBPT; - ArrayST * L1_localBPT; - ArrayST * L2_localBPT; - ArrayST * chooser; - ArrayST * RAS; - bool exist; - - BranchPredictor(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exsit=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~BranchPredictor(); + +// Macros used in the various core-related classes +#define NUM_SOURCE_OPERANDS 2 +#define NUM_INT_INST_SOURCE_OPERANDS 2 + +class BranchPredictorParameters { +public: + int assoc; + int nbanks; + int local_l1_predictor_size; + int local_l2_predictor_size; + int local_predictor_entries; + int global_predictor_bits; + int global_predictor_entries; + int chooser_predictor_bits; + int chooser_predictor_entries; +}; + +class BranchPredictor : public McPATComponent { +public: + ArrayST* globalBPT; + ArrayST* localBPT; + ArrayST* L1_localBPT; + ArrayST* L2_localBPT; + ArrayST* chooser; + ArrayST* RAS; + + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + BranchPredictorParameters branch_pred_params; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + bool exist; + + BranchPredictor(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exsit = true); + void set_params_stats(); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~BranchPredictor(); +}; + +class InstFetchParameters { +public: + int btb_size; + int btb_block_size; + int btb_assoc; + int btb_num_banks; + int btb_latency; + int btb_throughput; + int btb_rw_ports; }; +class InstFetchStatistics { +public: + double btb_read_accesses; + double btb_write_accesses; +}; + +class InstFetchU : public McPATComponent { +public: + CacheUnit* icache; + ArrayST* IB; + ArrayST* BTB; + BranchPredictor* BPT; + InstructionDecoder* ID_inst; + InstructionDecoder* ID_operand; + InstructionDecoder* ID_misc; + + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + InstFetchParameters inst_fetch_params; + InstFetchStatistics inst_fetch_stats; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + enum Cache_policy cache_p; + bool exist; + + InstFetchU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exsit = true); + void set_params_stats(); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~InstFetchU(); +}; + + +class SchedulerU : public McPATComponent { +public: + static int ROB_STATUS_BITS; -class InstFetchU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - enum Cache_policy cache_p; - InstCache icache; - ArrayST * IB; - ArrayST * BTB; - BranchPredictor * BPT; - inst_decoder * ID_inst; - inst_decoder * ID_operand; - inst_decoder * ID_misc; - bool exist; - - InstFetchU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exsit=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~InstFetchU(); + ArrayST* int_inst_window; + ArrayST* fp_inst_window; + ArrayST* ROB; + selection_logic* int_instruction_selection; + selection_logic* fp_instruction_selection; + + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + double Iw_height, fp_Iw_height, ROB_height; + bool exist; + + SchedulerU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exist_ = true); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~SchedulerU(); }; +class RENAMINGU : public McPATComponent { +public: + ArrayST* iFRAT; + ArrayST* fFRAT; + ArrayST* iRRAT; + ArrayST* fRRAT; + ArrayST* ifreeL; + ArrayST* ffreeL; + dep_resource_conflict_check* idcl; + dep_resource_conflict_check* fdcl; + ArrayST* RAHT; + + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + bool exist; -class SchedulerU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double Iw_height, fp_Iw_height,ROB_height; - ArrayST * int_inst_window; - ArrayST * fp_inst_window; - ArrayST * ROB; - selection_logic * instruction_selection; + RENAMINGU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exist_ = true); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~RENAMINGU(); +}; + +class LoadStoreU : public McPATComponent { +public: + CacheUnit* dcache; + ArrayST* LSQ; + ArrayST* LoadQ; + + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + enum Cache_policy cache_p; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + double lsq_height; bool exist; - SchedulerU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~SchedulerU(); + LoadStoreU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exist_ = true); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~LoadStoreU(); }; -class RENAMINGU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - double clockRate,executionTime; - CoreDynParam coredynp; - ArrayST * iFRAT; - ArrayST * fFRAT; - ArrayST * iRRAT; - ArrayST * fRRAT; - ArrayST * ifreeL; - ArrayST * ffreeL; - dep_resource_conflict_check * idcl; - dep_resource_conflict_check * fdcl; - ArrayST * RAHT;//register alias history table Used to store GC - bool exist; - - - RENAMINGU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~RENAMINGU(); +class MemoryManagementParams { +public: + int itlb_number_entries; + double itlb_latency; + double itlb_throughput; + int itlb_assoc; + int itlb_nbanks; + int dtlb_number_entries; + double dtlb_latency; + double dtlb_throughput; + int dtlb_assoc; + int dtlb_nbanks; }; -class LoadStoreU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - enum Cache_policy cache_p; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double lsq_height; - DataCache dcache; - ArrayST * LSQ;//it is actually the store queue but for inorder processors it serves as both loadQ and StoreQ - ArrayST * LoadQ; - bool exist; - - LoadStoreU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~LoadStoreU(); +class MemoryManagementStats { +public: + double itlb_total_accesses; + double itlb_total_misses; + double itlb_conflicts; + double dtlb_read_accesses; + double dtlb_read_misses; + double dtlb_write_accesses; + double dtlb_write_misses; + double dtlb_conflicts; }; -class MemManU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - ArrayST * itlb; - ArrayST * dtlb; - bool exist; - - MemManU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~MemManU(); +class MemManU : public McPATComponent { +public: + ArrayST* itlb; + ArrayST* dtlb; + + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + MemoryManagementParams mem_man_params; + MemoryManagementStats mem_man_stats; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + bool exist; + + MemManU(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_ = true); + void set_params_stats(); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~MemManU(); }; -class RegFU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double int_regfile_height, fp_regfile_height; - ArrayST * IRF; - ArrayST * FRF; - ArrayST * RFWIN; - bool exist; - - RegFU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~RegFU(); +class RegFU : public McPATComponent { +public: + static int RFWIN_ACCESS_MULTIPLIER; + + ArrayST* IRF; + ArrayST* FRF; + ArrayST* RFWIN; + + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + double int_regfile_height, fp_regfile_height; + bool exist; + + RegFU(XMLNode* _xml_data, + InputParameter* interface_ip_, const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + bool exist_ = true); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~RegFU(); }; -class EXECU :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - double lsq_height; - CoreDynParam coredynp; - RegFU * rfu; - SchedulerU * scheu; - FunctionalUnit * fp_u; - FunctionalUnit * exeu; - FunctionalUnit * mul; - interconnect * int_bypass; - interconnect * intTagBypass; - interconnect * int_mul_bypass; - interconnect * intTag_mul_Bypass; - interconnect * fp_bypass; - interconnect * fpTagBypass; - - Component bypass; - bool exist; - - EXECU(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_, double lsq_height_,const CoreDynParam & dyn_p_, bool exist_=true); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~EXECU(); +class EXECU : public McPATComponent { +public: + RegFU* rfu; + SchedulerU* scheu; + FunctionalUnit* fp_u; + FunctionalUnit* exeu; + FunctionalUnit* mul; + Interconnect* int_bypass; + Interconnect* intTagBypass; + Interconnect* int_mul_bypass; + Interconnect* intTag_mul_Bypass; + Interconnect* fp_bypass; + Interconnect* fpTagBypass; + + InputParameter interface_ip; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + double lsq_height; + CoreParameters core_params; + CoreStatistics core_stats; + bool exist; + + EXECU(XMLNode* _xml_data, InputParameter* interface_ip_, + double lsq_height_, const CoreParameters & _core_params, + const CoreStatistics & _core_stats, bool exist_ = true); + void computeEnergy(); + void displayData(uint32_t indent = 0, int plevel = 100); + ~EXECU(); }; -class Core :public Component { - public: - - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - InstFetchU * ifu; - LoadStoreU * lsu; - MemManU * mmu; - EXECU * exu; - RENAMINGU * rnu; - Pipeline * corepipe; - UndiffCore * undiffCore; - SharedCache * l2cache; - CoreDynParam coredynp; - //full_decoder inst_decoder; - //clock_network clockNetwork; - Core(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_); - void set_core_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~Core(); +class Core : public McPATComponent { +public: + InstFetchU* ifu; + LoadStoreU* lsu; + MemManU* mmu; + EXECU* exu; + RENAMINGU* rnu; + Pipeline* corepipe; + UndiffCore* undiffCore; + CacheUnit* l2cache; + + int ithCore; + InputParameter interface_ip; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + CoreParameters core_params; + CoreStatistics core_stats; + + // TODO: Migrate component ID handling into the XML data to remove this + // ithCore variable + Core(XMLNode* _xml_data, int _ithCore, InputParameter* interface_ip_); + void initialize_params(); + void initialize_stats(); + void set_core_param(); + void computeEnergy(); + ~Core(); }; #endif /* CORE_H_ */ diff --git a/ext/mcpat/globalvar.h b/ext/mcpat/globalvar.h deleted file mode 100644 index 953257653..000000000 --- a/ext/mcpat/globalvar.h +++ /dev/null @@ -1,48 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ - - -#ifndef GLOBALVAR_H_ -#define GLOBALVAR_H_ - -#ifdef GLOBALVAR -#define EXTERN -#else -#define EXTERN extern -#endif - -EXTERN bool opt_for_clk; - -#endif /* GLOBALVAR_H_ */ - - - - diff --git a/ext/mcpat/interconnect.cc b/ext/mcpat/interconnect.cc index ba502b6a8..98fbc3e54 100644 --- a/ext/mcpat/interconnect.cc +++ b/ext/mcpat/interconnect.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -33,173 +34,178 @@ #include #include -#include "globalvar.h" +#include "basic_components.h" #include "interconnect.h" #include "wire.h" -interconnect::interconnect( - string name_, - enum Device_ty device_ty_, - double base_w, double base_h, - int data_w, double len,const InputParameter *configure_interface, - int start_wiring_level_, - bool pipelinable_ , - double route_over_perc_ , - bool opt_local_, - enum Core_type core_ty_, - enum Wire_type wire_model, - double width_s, double space_s, - TechnologyParameter::DeviceType *dt -) - :name(name_), - device_ty(device_ty_), - in_rise_time(0), - out_rise_time(0), - base_width(base_w), - base_height(base_h), - data_width(data_w), - wt(wire_model), - width_scaling(width_s), - space_scaling(space_s), - start_wiring_level(start_wiring_level_), - length(len), - //interconnect_latency(1e-12), - //interconnect_throughput(1e-12), - opt_local(opt_local_), - core_ty(core_ty_), - pipelinable(pipelinable_), - route_over_perc(route_over_perc_), - deviceType(dt) -{ - - wt = Global; - l_ip=*configure_interface; - local_result = init_interface(&l_ip); - - - max_unpipelined_link_delay = 0; //TODO - min_w_nmos = g_tp.min_w_nmos_; - min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos; - - - - latency = l_ip.latency; - throughput = l_ip.throughput; - latency_overflow=false; - throughput_overflow=false; - - /* - * TODO: Add wiring option from semi-global to global automatically - * And directly jump to global if semi-global cannot satisfy timing - * Fat wires only available for global wires, thus - * if signal wiring layer starts from semi-global, - * the next layer up will be global, i.e., semi-global does - * not have fat wires. - */ - if (pipelinable == false) - //Non-pipelinable wires, such as bypass logic, care latency - { - compute(); - if (opt_for_clk && opt_local) - { - while (delay > latency && width_scaling<3.0) - { - width_scaling *= 2; - space_scaling *= 2; - Wire winit(width_scaling, space_scaling); - compute(); - } - if (delay > latency) - { - latency_overflow=true; - } - } - } - else //Pipelinable wires, such as bus, does not care latency but throughput - { - /* - * TODO: Add pipe regs power, area, and timing; - * Pipelinable wires optimize latency first. - */ - compute(); - if (opt_for_clk && opt_local) - { - while (delay > throughput && width_scaling<3.0) - { - width_scaling *= 2; - space_scaling *= 2; - Wire winit(width_scaling, space_scaling); - compute(); - } - if (delay > throughput) - // insert pipeline stages - { - num_pipe_stages = (int)ceil(delay/throughput); - assert(num_pipe_stages>0); - delay = delay/num_pipe_stages + num_pipe_stages*0.05*delay; - } - } - } +double Interconnect::width_scaling_threshold = 3.0; + +Interconnect::Interconnect(XMLNode* _xml_data, string name_, + enum Device_ty device_ty_, double base_w, + double base_h, int data_w, + double len, + const InputParameter *configure_interface, + int start_wiring_level_, double _clockRate, + bool pipelinable_, double route_over_perc_, + bool opt_local_, enum Core_type core_ty_, + enum Wire_type wire_model, + double width_s, double space_s, + TechnologyParameter::DeviceType *dt) + : McPATComponent(_xml_data), device_ty(device_ty_), in_rise_time(0), + out_rise_time(0), base_width(base_w), base_height(base_h), + data_width(data_w), wt(wire_model), width_scaling(width_s), + space_scaling(space_s), start_wiring_level(start_wiring_level_), + length(len), opt_local(opt_local_), core_ty(core_ty_), + pipelinable(pipelinable_), route_over_perc(route_over_perc_), + deviceType(dt) { + name = name_; + clockRate = _clockRate; + l_ip = *configure_interface; + local_result = init_interface(&l_ip, name); + + max_unpipelined_link_delay = 0; + min_w_nmos = g_tp.min_w_nmos_; + min_w_pmos = deviceType->n_to_p_eff_curr_drv_ratio * min_w_nmos; + + + + latency = l_ip.latency; + throughput = l_ip.throughput; + latency_overflow = false; + throughput_overflow = false; + + if (pipelinable == false) { + //Non-pipelinable wires, such as bypass logic, care latency + calcWireData(); + if (opt_for_clk && opt_local) { + while (delay > latency && + width_scaling < width_scaling_threshold) { + width_scaling *= 2; + space_scaling *= 2; + Wire winit(width_scaling, space_scaling); + calcWireData(); + } + if (delay > latency) { + latency_overflow = true; + } + } + } else { + //Pipelinable wires, such as bus, does not care latency but throughput + calcWireData(); + if (opt_for_clk && opt_local) { + while (delay > throughput && + width_scaling < width_scaling_threshold) { + width_scaling *= 2; + space_scaling *= 2; + Wire winit(width_scaling, space_scaling); + calcWireData(); + } + if (delay > throughput) { + // insert pipeline stages + num_pipe_stages = (int)ceil(delay / throughput); + assert(num_pipe_stages > 0); + delay = delay / num_pipe_stages + num_pipe_stages * 0.05 * delay; + } + } + } + + power_bit = power; + power.readOp.dynamic *= data_width; + power.readOp.leakage *= data_width; + power.readOp.gate_leakage *= data_width; + area.set_area(area.get_area()*data_width); + no_device_under_wire_area.h *= data_width; + + if (latency_overflow == true) { + cout << "Warning: " << name + << " wire structure cannot satisfy latency constraint." << endl; + } + + assert(power.readOp.dynamic > 0); + assert(power.readOp.leakage > 0); + assert(power.readOp.gate_leakage > 0); + + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; + + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + + //Only global wires has the option to choose whether routing over or not + if (pipelinable) + area.set_area(area.get_area() * route_over_perc + + no_device_under_wire_area.get_area() * + (1 - route_over_perc)); + + Wire wreset(); +} - power_bit = power; - power.readOp.dynamic *= data_width; - power.readOp.leakage *= data_width; - power.readOp.gate_leakage *= data_width; - area.set_area(area.get_area()*data_width); - no_device_under_wire_area.h *= data_width; - if (latency_overflow==true) - cout<< "Warning: "<< name <<" wire structure cannot satisfy latency constraint." << endl; +void +Interconnect::calcWireData() { - assert(power.readOp.dynamic > 0); - assert(power.readOp.leakage > 0); - assert(power.readOp.gate_leakage > 0); + Wire *wtemp1 = 0; + wtemp1 = new Wire(wt, length, 1, width_scaling, space_scaling); + delay = wtemp1->delay; + power.readOp.dynamic = wtemp1->power.readOp.dynamic; + power.readOp.leakage = wtemp1->power.readOp.leakage; + power.readOp.gate_leakage = wtemp1->power.readOp.gate_leakage; - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); + area.set_area(wtemp1->area.get_area()); + no_device_under_wire_area.h = (wtemp1->wire_width + wtemp1->wire_spacing); + no_device_under_wire_area.w = length; - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; + if (wtemp1) + delete wtemp1; - power.readOp.longer_channel_leakage = - power.readOp.leakage*long_channel_device_reduction; - - if (pipelinable)//Only global wires has the option to choose whether routing over or not - area.set_area(area.get_area()*route_over_perc + no_device_under_wire_area.get_area()*(1-route_over_perc)); - - Wire wreset(); } - - void -interconnect::compute() -{ - - Wire *wtemp1 = 0; - wtemp1 = new Wire(wt, length, 1, width_scaling, space_scaling); - delay = wtemp1->delay; - power.readOp.dynamic = wtemp1->power.readOp.dynamic; - power.readOp.leakage = wtemp1->power.readOp.leakage; - power.readOp.gate_leakage = wtemp1->power.readOp.gate_leakage; - - area.set_area(wtemp1->area.get_area()); - no_device_under_wire_area.h = (wtemp1->wire_width + wtemp1->wire_spacing); - no_device_under_wire_area.w = length; +Interconnect::computeEnergy() { + double pppm_t[4] = {1, 1, 1, 1}; + + // Compute TDP + power_t.reset(); + set_pppm(pppm_t, int_params.active_ports * int_stats.duty_cycle, + int_params.active_ports, int_params.active_ports, + int_params.active_ports * int_stats.duty_cycle); + power_t = power * pppm_t; + + rt_power.reset(); + set_pppm(pppm_t, int_stats.accesses, int_params.active_ports, + int_params.active_ports, int_stats.accesses); + rt_power = power * pppm_t; + + output_data.peak_dynamic_power = power_t.readOp.dynamic * clockRate; + output_data.subthreshold_leakage_power = power_t.readOp.leakage; + output_data.gate_leakage_power = power_t.readOp.gate_leakage; + output_data.runtime_dynamic_energy = rt_power.readOp.dynamic; +} - if (wtemp1) - delete wtemp1; +void +Interconnect::computeArea() { + output_data.area = area.get_area() / 1e6; +} +void +Interconnect::set_params_stats(double active_ports, + double duty_cycle, double accesses) { + int_params.active_ports = active_ports; + int_stats.duty_cycle = duty_cycle; + int_stats.accesses = accesses; } -void interconnect::leakage_feedback(double temperature) -{ +void Interconnect::leakage_feedback(double temperature) { l_ip.temp = (unsigned int)round(temperature/10.0)*10; - uca_org_t init_result = init_interface(&l_ip); // init_result is dummy + uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy - compute(); + calcWireData(); power_bit = power; power.readOp.dynamic *= data_width; @@ -210,13 +216,15 @@ void interconnect::leakage_feedback(double temperature) assert(power.readOp.leakage > 0); assert(power.readOp.gate_leakage > 0); - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty,core_ty); double sckRation = g_tp.sckt_co_eff; power.readOp.dynamic *= sckRation; power.writeOp.dynamic *= sckRation; power.searchOp.dynamic *= sckRation; - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; + power.readOp.longer_channel_leakage = + power.readOp.leakage*long_channel_device_reduction; } diff --git a/ext/mcpat/interconnect.h b/ext/mcpat/interconnect.h index 4cf42dafd..2ae39c5a2 100644 --- a/ext/mcpat/interconnect.h +++ b/ext/mcpat/interconnect.h @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -42,46 +43,31 @@ #include "subarray.h" #include "wire.h" -// leakge power includes entire htree in a bank (when uca_tree == false) -// leakge power includes only part to one bank when uca_tree == true +class InterconnectParameters { +public: + double active_ports; +}; -class interconnect : public Component -{ - public: - interconnect( - string name_, - enum Device_ty device_ty_, - double base_w, double base_h, int data_w, double len, - const InputParameter *configure_interface, int start_wiring_level_, - bool pipelinable_ = false, - double route_over_perc_ =0.5, - bool opt_local_=true, - enum Core_type core_ty_=Inorder, - enum Wire_type wire_model=Global, - double width_s=1.0, double space_s=1.0, - TechnologyParameter::DeviceType *dt = &(g_tp.peri_global) - ); +class InterconnectStatistics { +public: + double duty_cycle; + double accesses; +}; - ~interconnect() {}; +class Interconnect : public McPATComponent { +public: + static double width_scaling_threshold; - void compute(); - string name; - enum Device_ty device_ty; + enum Device_ty device_ty; double in_rise_time, out_rise_time; - InputParameter l_ip; - uca_org_t local_result; + InputParameter l_ip; + uca_org_t local_result; Area no_device_under_wire_area; - void set_in_rise_time(double rt) - { - in_rise_time = rt; - } - - void leakage_feedback(double temperature); double max_unpipelined_link_delay; powerDef power_bit; double wire_bw; - double init_wire_bw; // bus width at root + double init_wire_bw; double base_width; double base_height; int data_width; @@ -92,19 +78,39 @@ class interconnect : public Component double min_w_nmos; double min_w_pmos; double latency, throughput; - bool latency_overflow; - bool throughput_overflow; - double interconnect_latency; - double interconnect_throughput; + bool latency_overflow; + bool throughput_overflow; + double interconnect_latency; + double interconnect_throughput; bool opt_local; enum Core_type core_ty; bool pipelinable; double route_over_perc; - int num_pipe_stages; - - private: - TechnologyParameter::DeviceType *deviceType; + int num_pipe_stages; + TechnologyParameter::DeviceType* deviceType; + InterconnectParameters int_params; + InterconnectStatistics int_stats; + Interconnect(XMLNode* _xml_data, string name_, + enum Device_ty device_ty_, double base_w, + double base_h, int data_w, double len, + const InputParameter *configure_interface, + int start_wiring_level_, + double _clockRate = 0.0f, + bool pipelinable_ = false, double route_over_perc_ = 0.5, + bool opt_local_ = true, enum Core_type core_ty_ = Inorder, + enum Wire_type wire_model = Global, double width_s = 1.0, + double space_s = 1.0, + TechnologyParameter::DeviceType *dt = &(g_tp.peri_global)); +private: + void calcWireData(); +public: + void computeArea(); + void computeEnergy(); + void set_params_stats(double active_ports, + double duty_cycle, double accesses); + void leakage_feedback(double temperature); + ~Interconnect() {}; }; #endif diff --git a/ext/mcpat/iocontrollers.cc b/ext/mcpat/iocontrollers.cc index 70b0f2dcb..4a175d841 100644 --- a/ext/mcpat/iocontrollers.cc +++ b/ext/mcpat/iocontrollers.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ #include @@ -34,14 +35,12 @@ #include #include -#include "XML_Parse.h" #include "basic_circuit.h" -#include "basic_components.h" +#include "common.h" #include "const.h" #include "io.h" #include "iocontrollers.h" #include "logic.h" -#include "parameter.h" /* SUN Niagara 2 I/O power analysis: @@ -69,378 +68,473 @@ Further, if assuming I/O logic power is about 50% of I/Os then Total energy of F * */ -NIUController::NIUController(ParseXML *XML_interface,InputParameter* interface_ip_) -:XML(XML_interface), - interface_ip(*interface_ip_) - { - local_result = init_interface(&interface_ip); - - double frontend_area, phy_area, mac_area, SerDer_area; - double frontend_dyn, mac_dyn, SerDer_dyn; - double frontend_gates, mac_gates, SerDer_gates; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; - - set_niu_param(); - - if (niup.type == 0) //high performance NIU - { - //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate using 65nm. - mac_area = (1.53 + 0.3)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2, ISSCC "An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS" - //and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique" Frontend is PCS - frontend_area = (9.8 + (6 + 18)*65/130*65/130)/3 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm. - //SerDer is very hard to scale - SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065); - phy_area = frontend_area + SerDer_area; - //total area - area.set_area((mac_area + frontend_area + SerDer_area)*1e6); - //Power - //Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9); - mac_dyn = 2.19e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm - //Cadence ChipEstimate using 65nm soft IP; - frontend_dyn = 0.27e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; - //according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006 - //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm - SerDer_dyn = 0.01*10*sqrt(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2; - SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU - - //Cadence ChipEstimate using 65nm - mac_gates = 111700; - frontend_gates = 320000; - SerDer_gates = 200000; - NMOS_sizing = 5*g_tp.min_w_nmos_; - PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - - - } - else - {//Low power implementations are mostly from Cadence ChipEstimator; Ignore the multiple IP effect - // ---When there are multiple IP (same kind or not) selected, Cadence ChipEstimator results are not - // a simple summation of all IPs. Ignore this effect - mac_area = 0.24 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - frontend_area = 0.1 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065);//Frontend is the PCS layer - SerDer_area = 0.35 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface With Robust VCO Tuning Technique" - //and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can scale perfectly with the technology - //total area - area.set_area((mac_area + frontend_area + SerDer_area)*1e6); - //Power - //Cadence ChipEstimate using 65nm (mac, front_end are all energy. E=P*T = P/F = 1.37/1Ghz = 1.37e-9); - mac_dyn = 1.257e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm - //Cadence ChipEstimate using 65nm soft IP; - frontend_dyn = 0.6e-9*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0);//niup.clockRate; - //SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm - SerDer_dyn = 0.0216*10*(interface_ip.F_sz_um/0.13)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2; - SerDer_dyn /= niup.clockRate;//covert to energy per clock cycle of whole NIU - - mac_gates = 111700; - frontend_gates = 52000; - SerDer_gates = 199260; - - NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - - } - - power_t.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn; - power_t.readOp.leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); - power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction; - power_t.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - } - -void NIUController::computeEnergy(bool is_tdp) -{ - if (is_tdp) - { +NIUController::NIUController(XMLNode* _xml_data,InputParameter* interface_ip_) + : McPATComponent(_xml_data, interface_ip_) { + name = "NIU"; + set_niu_param(); +} +void NIUController::computeArea() { + double mac_area; + double frontend_area; + double SerDer_area; + + if (niup.type == 0) { //high performance NIU + //Area estimation based on average of die photo from Niagara 2 and + //Cadence ChipEstimate using 65nm. + mac_area = (1.53 + 0.3) / 2 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um / 0.065); + //Area estimation based on average of die photo from Niagara 2, ISSCC + //"An 800mW 10Gb Ethernet Transceiver in 0.13μm CMOS" + //and"A 1.2-V-Only 900-mW 10 Gb Ethernet Transceiver and XAUI Interface + //With Robust VCO Tuning Technique" Frontend is PCS + frontend_area = (9.8 + (6 + 18) * 65 / 130 * 65 / 130) / 3 * + (interface_ip.F_sz_um / 0.065) * (interface_ip.F_sz_um / 0.065); + //Area estimation based on average of die photo from Niagara 2 and + //Cadence ChipEstimate hard IP @65nm. + //SerDer is very hard to scale + SerDer_area = (1.39 + 0.36) * (interface_ip.F_sz_um / + 0.065);//* (interface_ip.F_sz_um/0.065); + } else { + //Low power implementations are mostly from Cadence ChipEstimator; + //Ignore the multiple IP effect + // ---When there are multiple IP (same kind or not) selected, Cadence + //ChipEstimator results are not a simple summation of all IPs. + //Ignore this effect + mac_area = 0.24 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um / 0.065); + frontend_area = 0.1 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um / 0.065);//Frontend is the PCS layer + SerDer_area = 0.35 * (interface_ip.F_sz_um / 0.065) * + (interface_ip.F_sz_um/0.065); + //Compare 130um implementation in "A 1.2-V-Only 900-mW 10 Gb Ethernet + //Transceiver and XAUI Interface With Robust VCO Tuning Technique" + //and the ChipEstimator XAUI PHY hard IP, confirm that even PHY can + //scale perfectly with the technology + } - power = power_t; - power.readOp.dynamic *= niup.duty_cycle; + //total area + output_data.area = (mac_area + frontend_area + SerDer_area) * 1e6; + } +void NIUController::computeEnergy() { + double mac_dyn; + double frontend_dyn; + double SerDer_dyn; + double frontend_gates; + double mac_gates; + double SerDer_gates; + double NMOS_sizing; + double PMOS_sizing; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + + if (niup.type == 0) { //high performance NIU + //Power + //Cadence ChipEstimate using 65nm (mac, front_end are all energy. + //E=P*T = P/F = 1.37/1Ghz = 1.37e-9); + //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm + mac_dyn = 2.19e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / + 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate; + //Cadence ChipEstimate using 65nm soft IP; + frontend_dyn = 0.27e-9 * g_tp.peri_global.Vdd / 1.1 * + g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0); + //according to "A 100mW 9.6Gb/s Transceiver in 90nm CMOS..." ISSCC 2006 + //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm + SerDer_dyn = 0.01 * 10 * sqrt(interface_ip.F_sz_um / 0.09) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2; + + //Cadence ChipEstimate using 65nm + mac_gates = 111700; + frontend_gates = 320000; + SerDer_gates = 200000; + NMOS_sizing = 5 * g_tp.min_w_nmos_; + PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + } else { + //Power + //Cadence ChipEstimate using 65nm (mac, front_end are all energy. + ///E=P*T = P/F = 1.37/1Ghz = 1.37e-9); + //2.19W@1GHz fully active according to Cadence ChipEstimate @65nm + mac_dyn = 1.257e-9 * g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd + / 1.1 * (interface_ip.F_sz_nm / 65.0);//niup.clockRate; + //Cadence ChipEstimate using 65nm soft IP; + frontend_dyn = 0.6e-9 * g_tp.peri_global.Vdd / 1.1 * + g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0); + //SerDer_dyn is power not energy, scaling from 216mw/10Gb/s @130nm + SerDer_dyn = 0.0216 * 10 * (interface_ip.F_sz_um / 0.13) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd / 1.2; + + mac_gates = 111700; + frontend_gates = 52000; + SerDer_gates = 199260; + NMOS_sizing = g_tp.min_w_nmos_; + PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; } - else - { - rt_power = power_t; - rt_power.readOp.dynamic *= niup.perc_load; - } + + //covert to energy per clock cycle of whole NIU + SerDer_dyn /= niup.clockRate; + + power.readOp.dynamic = mac_dyn + frontend_dyn + SerDer_dyn; + power.readOp.leakage = (mac_gates + frontend_gates + frontend_gates) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd;//unit W + double long_channel_device_reduction = + longer_channel_device_reduction(Uncore_device); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + power.readOp.gate_leakage = (mac_gates + frontend_gates + frontend_gates) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd;//unit W + + // Output power + output_data.subthreshold_leakage_power = + longer_channel_device ? power.readOp.longer_channel_leakage : + power.readOp.leakage; + output_data.gate_leakage_power = power.readOp.gate_leakage; + output_data.peak_dynamic_power = power.readOp.dynamic * nius.duty_cycle; + output_data.runtime_dynamic_energy = power.readOp.dynamic * nius.perc_load; } -void NIUController::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - if (is_tdp) - { - cout << "NIU:" << endl; - cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*niup.clockRate << " W" << endl; - cout << indent_str<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*niup.clockRate << " W" << endl; - cout<nChildNode("param"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); - } + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); -} + ASSIGN_FP_IF("niu_clockRate", niup.clockRate); + ASSIGN_INT_IF("num_units", niup.num_units); + ASSIGN_INT_IF("type", niup.type); -void NIUController::set_niu_param() -{ - niup.clockRate = XML->sys.niu.clockrate; - niup.clockRate *= 1e6; - niup.num_units = XML->sys.niu.number_units; - niup.duty_cycle = XML->sys.niu.duty_cycle; - niup.perc_load = XML->sys.niu.total_load_perc; - niup.type = XML->sys.niu.type; -// niup.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); -} + else { + warnUnrecognizedParam(node_name); + } + } -PCIeController::PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_) -:XML(XML_interface), - interface_ip(*interface_ip_) - { - local_result = init_interface(&interface_ip); - double frontend_area, phy_area, ctrl_area, SerDer_area; - double ctrl_dyn, frontend_dyn, SerDer_dyn; - double ctrl_gates,frontend_gates, SerDer_gates; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; - - /* Assuming PCIe is bit-slice based architecture - * This is the reason for /8 in both area and power calculation - * to get per lane numbers - */ - - set_pcie_param(); - if (pciep.type == 0) //high performance NIU - { - //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate @ 65nm. - ctrl_area = (5.2 + 0.5)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm. - frontend_area = (5.2 + 0.1)/2 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2 and Cadence ChipEstimate hard IP @65nm. - //SerDer is very hard to scale - SerDer_area = (3.03 + 0.36) * (interface_ip.F_sz_um/0.065);//* (interface_ip.F_sz_um/0.065); - phy_area = frontend_area + SerDer_area; - //total area - //Power - //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer - ctrl_dyn = 3.75e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); - // //Cadence ChipEstimate using 65nm soft IP; - // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); - //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm - SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s - SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle - - //power_t.readOp.dynamic = (ctrl_dyn)*pciep.num_channels; - //Cadence ChipEstimate using 65nm - ctrl_gates = 900000/8*pciep.num_channels; - // frontend_gates = 120000/8; - // SerDer_gates = 200000/8; - NMOS_sizing = 5*g_tp.min_w_nmos_; - PMOS_sizing = 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - } - else - { - ctrl_area = 0.412 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //Area estimation based on average of die photo from Niagara 2, and Cadence ChipEstimate @ 65nm. - SerDer_area = 0.36 * (interface_ip.F_sz_um/0.065)* (interface_ip.F_sz_um/0.065); - //total area - //Power - //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer - ctrl_dyn = 2.21e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); - // //Cadence ChipEstimate using 65nm soft IP; - // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); - //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm - SerDer_dyn = 0.01*4*(interface_ip.F_sz_um/0.09)*g_tp.peri_global.Vdd/1.2*g_tp.peri_global.Vdd/1.2;//PCIe 2.0 max per lane speed is 4Gb/s - SerDer_dyn /= pciep.clockRate;//covert to energy per clock cycle - - //Cadence ChipEstimate using 65nm - ctrl_gates = 200000/8*pciep.num_channels; - // frontend_gates = 120000/8; - SerDer_gates = 200000/8*pciep.num_channels; - NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - - } - area.set_area(((ctrl_area + (pciep.withPHY? SerDer_area:0))/8*pciep.num_channels)*1e6); - power_t.readOp.dynamic = (ctrl_dyn + (pciep.withPHY? SerDer_dyn:0))*pciep.num_channels; - power_t.readOp.leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); - power_t.readOp.longer_channel_leakage = power_t.readOp.leakage * long_channel_device_reduction; - power_t.readOp.gate_leakage = (ctrl_gates + (pciep.withPHY? SerDer_gates:0))*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - } + // Change from MHz to Hz + niup.clockRate *= 1e6; -void PCIeController::computeEnergy(bool is_tdp) -{ - if (is_tdp) - { + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); - power = power_t; - power.readOp.dynamic *= pciep.duty_cycle; + ASSIGN_FP_IF("duty_cycle", nius.duty_cycle); + ASSIGN_FP_IF("perc_load", nius.perc_load); - } - else - { - rt_power = power_t; - rt_power.readOp.dynamic *= pciep.perc_load; + else { + warnUnrecognizedStat(node_name); + } } } -void PCIeController::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - if (is_tdp) - { - cout << "PCIe:" << endl; - cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*pciep.clockRate << " W" << endl; - cout << indent_str<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic*pciep.clockRate << " W" << endl; - cout<sys.pcie.clockrate; - pciep.clockRate *= 1e6; - pciep.num_units = XML->sys.pcie.number_units; - pciep.num_channels = XML->sys.pcie.num_channels; - pciep.duty_cycle = XML->sys.pcie.duty_cycle; - pciep.perc_load = XML->sys.pcie.total_load_perc; - pciep.type = XML->sys.pcie.type; - pciep.withPHY = XML->sys.pcie.withPHY; -// pciep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); +void PCIeController::computeEnergy() { + double ctrl_dyn; + double SerDer_dyn; + double ctrl_gates; + double SerDer_gates = 0; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + double NMOS_sizing; + double PMOS_sizing; + + /* Assuming PCIe is bit-slice based architecture + * This is the reason for /8 in both area and power calculation + * to get per lane numbers + */ + + if (pciep.type == 0) { //high performance PCIe + //Power + //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer + ctrl_dyn = 3.75e-9 / 8 * g_tp.peri_global.Vdd / 1.1 * + g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0); + // //Cadence ChipEstimate using 65nm soft IP; + // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); + //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm + //PCIe 2.0 max per lane speed is 4Gb/s + SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um /0.09) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2; + + //Cadence ChipEstimate using 65nm + ctrl_gates = 900000 / 8 * pciep.num_channels; + // frontend_gates = 120000/8; + // SerDer_gates = 200000/8; + NMOS_sizing = 5 * g_tp.min_w_nmos_; + PMOS_sizing = 5 * g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + } else { + //Power + //Cadence ChipEstimate using 65nm the controller includes everything: the PHY, the data link and transaction layer + ctrl_dyn = 2.21e-9 / 8 * g_tp.peri_global.Vdd / 1.1 * + g_tp.peri_global.Vdd / 1.1 * (interface_ip.F_sz_nm / 65.0); + // //Cadence ChipEstimate using 65nm soft IP; + // frontend_dyn = 0.27e-9/8*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(interface_ip.F_sz_nm/65.0); + //SerDer_dyn is power not energy, scaling from 10mw/Gb/s @90nm + //PCIe 2.0 max per lane speed is 4Gb/s + SerDer_dyn = 0.01 * 4 * (interface_ip.F_sz_um / 0.09) * + g_tp.peri_global.Vdd / 1.2 * g_tp.peri_global.Vdd /1.2; + + //Cadence ChipEstimate using 65nm + ctrl_gates = 200000 / 8 * pciep.num_channels; + // frontend_gates = 120000/8; + SerDer_gates = 200000 / 8 * pciep.num_channels; + NMOS_sizing = g_tp.min_w_nmos_; + PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + } + + //covert to energy per clock cycle + SerDer_dyn /= pciep.clockRate; + + power.readOp.dynamic = (ctrl_dyn + (pciep.withPHY ? SerDer_dyn : 0)) * + pciep.num_channels; + power.readOp.leakage = (ctrl_gates + (pciep.withPHY ? SerDer_gates : 0)) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd;//unit W + double long_channel_device_reduction = + longer_channel_device_reduction(Uncore_device); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + power.readOp.gate_leakage = (ctrl_gates + + (pciep.withPHY ? SerDer_gates : 0)) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd;//unit W + + // Output power + output_data.subthreshold_leakage_power = + longer_channel_device ? power.readOp.longer_channel_leakage : + power.readOp.leakage; + output_data.gate_leakage_power = power.readOp.gate_leakage; + output_data.peak_dynamic_power = power.readOp.dynamic * pcies.duty_cycle; + output_data.runtime_dynamic_energy = + power.readOp.dynamic * pcies.perc_load; } -FlashController::FlashController(ParseXML *XML_interface,InputParameter* interface_ip_) -:XML(XML_interface), - interface_ip(*interface_ip_) - { - local_result = init_interface(&interface_ip); - double frontend_area, phy_area, ctrl_area, SerDer_area; - double ctrl_dyn, frontend_dyn, SerDer_dyn; - double ctrl_gates,frontend_gates, SerDer_gates; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; - - /* Assuming PCIe is bit-slice based architecture - * This is the reason for /8 in both area and power calculation - * to get per lane numbers - */ - - set_fc_param(); - if (fcp.type == 0) //high performance NIU - { - cout<<"Current McPAT does not support high performance flash contorller since even low power designs are enough for maintain throughput"<nChildNode("param"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_FP_IF("pcie_clockRate", pciep.clockRate); + ASSIGN_INT_IF("num_units", pciep.num_units); + ASSIGN_INT_IF("num_channels", pciep.num_channels); + ASSIGN_INT_IF("type", pciep.type); + ASSIGN_ENUM_IF("withPHY", pciep.withPHY, bool); + + else { + warnUnrecognizedParam(node_name); + } + } -void FlashController::computeEnergy(bool is_tdp) -{ - if (is_tdp) - { + // Change from MHz to Hz + pciep.clockRate *= 1e6; + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); - power = power_t; - power.readOp.dynamic *= fcp.duty_cycle; + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); - } - else - { - rt_power = power_t; - rt_power.readOp.dynamic *= fcp.perc_load; + ASSIGN_FP_IF("duty_cycle", pcies.duty_cycle); + ASSIGN_FP_IF("perc_load", pcies.perc_load); + + else { + warnUnrecognizedStat(node_name); + } } } -void FlashController::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - if (is_tdp) - { - cout << "Flash Controller:" << endl; - cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic << " W" << endl;//no multiply of clock since this is power already - cout << indent_str<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic << " W" << endl; - cout<sys.flashc.mc_clock; -// fcp.clockRate *= 1e6; - fcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate; - fcp.num_channels = ceil(fcp.peakDataTransferRate/200); - fcp.num_mcs = XML->sys.flashc.number_mcs; - fcp.duty_cycle = XML->sys.flashc.duty_cycle; - fcp.perc_load = XML->sys.flashc.total_load_perc; - fcp.type = XML->sys.flashc.type; - fcp.withPHY = XML->sys.flashc.withPHY; -// flashcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); + int num_children = xml_data->nChildNode("param"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("num_channels", fcp.num_channels); + ASSIGN_INT_IF("type", fcp.type); + ASSIGN_ENUM_IF("withPHY", fcp.withPHY, bool); + + else { + warnUnrecognizedParam(node_name); + } + } + + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + ASSIGN_FP_IF("duty_cycle", fcs.duty_cycle); + ASSIGN_FP_IF("perc_load", fcs.perc_load); + + else { + warnUnrecognizedStat(node_name); + } + } } diff --git a/ext/mcpat/iocontrollers.h b/ext/mcpat/iocontrollers.h index 818580abb..39cfb0eb3 100644 --- a/ext/mcpat/iocontrollers.h +++ b/ext/mcpat/iocontrollers.h @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,63 +26,52 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ #ifndef IOCONTROLLERS_H_ #define IOCONTROLLERS_H_ - -#endif /* IOCONTROLLERS_H_ */ - -#include "XML_Parse.h" -#include "parameter.h" -//#include "io.h" -#include "array.h" -//#include "Undifferentiated_Core_Area.h" #include +#include "array.h" #include "basic_components.h" +#include "parameter.h" -class NIUController : public Component { +class NIUController : public McPATComponent { public: - ParseXML *XML; - InputParameter interface_ip; - NIUParam niup; - powerDef power_t; - uca_org_t local_result; - NIUController(ParseXML *XML_interface,InputParameter* interface_ip_); + NIUParameters niup; + NIUStatistics nius; + + NIUController(XMLNode* _xml_data, InputParameter* interface_ip_); void set_niu_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); + void computeArea(); + void computeEnergy(); ~NIUController(){}; }; -class PCIeController : public Component { +class PCIeController : public McPATComponent { public: - ParseXML *XML; - InputParameter interface_ip; - PCIeParam pciep; - powerDef power_t; - uca_org_t local_result; - PCIeController(ParseXML *XML_interface,InputParameter* interface_ip_); + PCIeParameters pciep; + PCIeStatistics pcies; + + PCIeController(XMLNode* _xml_data, InputParameter* interface_ip_); void set_pcie_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); + void computeArea(); + void computeEnergy(); ~PCIeController(){}; }; -class FlashController : public Component { +class FlashController : public McPATComponent { public: - ParseXML *XML; - InputParameter interface_ip; - MCParam fcp; - powerDef power_t; - uca_org_t local_result; - FlashController(ParseXML *XML_interface,InputParameter* interface_ip_); + MCParameters fcp; + MCStatistics fcs; + + FlashController(XMLNode* _xml_data, InputParameter* interface_ip_); void set_fc_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); + void computeArea(); + void computeEnergy(); ~FlashController(){}; }; +#endif /* IOCONTROLLERS_H_ */ diff --git a/ext/mcpat/logic.cc b/ext/mcpat/logic.cc index 11519d863..43823e77b 100644 --- a/ext/mcpat/logic.cc +++ b/ext/mcpat/logic.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,416 +26,500 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ +#include "common.h" #include "logic.h" - //selection_logic -selection_logic::selection_logic( - bool _is_default, - int win_entries_, - int issue_width_, - const InputParameter *configure_interface, - enum Device_ty device_ty_, - enum Core_type core_ty_) - //const ParseXML *_XML_interface) - :is_default(_is_default), - win_entries(win_entries_), - issue_width(issue_width_), - device_ty(device_ty_), - core_ty(core_ty_) - { - //uca_org_t result2; - l_ip=*configure_interface; - local_result = init_interface(&l_ip); - //init_tech_params(l_ip.F_sz_um, false); - //win_entries=numIBEntries;//IQentries; - //issue_width=issueWidth; - selection_power(); - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; - - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - } - -void selection_logic::selection_power() -{//based on cost effective superscalar processor TR pp27-31 - double Ctotal, Cor, Cpencode; - int num_arbiter; - double WSelORn, WSelORprequ, WSelPn, WSelPp, WSelEnn, WSelEnp; - - //TODO: the 0.8um process data is used. - WSelORn = 12.5 * l_ip.F_sz_um;//this was 10 micron for the 0.8 micron process - WSelORprequ = 50 * l_ip.F_sz_um;//this was 40 micron for the 0.8 micron process - WSelPn = 12.5 * l_ip.F_sz_um;//this was 10mcron for the 0.8 micron process - WSelPp = 18.75 * l_ip.F_sz_um;//this was 15 micron for the 0.8 micron process - WSelEnn = 6.25 * l_ip.F_sz_um;//this was 5 micron for the 0.8 micron process - WSelEnp = 12.5 * l_ip.F_sz_um;//this was 10 micron for the 0.8 micron process - - - Ctotal=0; - num_arbiter=1; - while(win_entries > 4) - { - win_entries = (int)ceil((double)win_entries / 4.0); - num_arbiter += win_entries; - } - //the 4-input OR logic to generate anyreq - Cor = 4 * drain_C_(WSelORn,NCH,1,1, g_tp.cell_h_def) + drain_C_(WSelORprequ,PCH,1,1, g_tp.cell_h_def); - power.readOp.gate_leakage = cmos_Ig_leakage(WSelORn, WSelORprequ, 4, nor)*g_tp.peri_global.Vdd; - - //The total capacity of the 4-bit priority encoder - Cpencode = drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,1, 1, g_tp.cell_h_def) + - 2*drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,2, 1, g_tp.cell_h_def) + - 3*drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,3, 1, g_tp.cell_h_def) + - 4*drain_C_(WSelPn,NCH,1, 1, g_tp.cell_h_def) + drain_C_(WSelPp,PCH,4, 1, g_tp.cell_h_def) +//precompute priority logic - 2*4*gate_C(WSelEnn+WSelEnp,20.0)+ - 4*drain_C_(WSelEnn,NCH,1, 1, g_tp.cell_h_def) + 2*4*drain_C_(WSelEnp,PCH,1, 1, g_tp.cell_h_def)+//enable logic - (2*4+2*3+2*2+2)*gate_C(WSelPn+WSelPp,10.0);//requests signal - - Ctotal += issue_width * num_arbiter*(Cor+Cpencode); - - power.readOp.dynamic = Ctotal*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*2;//2 means the abitration signal need to travel round trip - power.readOp.leakage = issue_width * num_arbiter * - (cmos_Isub_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p - + cmos_Isub_leakage(WSelPn, WSelPp, 3, nor)//grant2p - + cmos_Isub_leakage(WSelPn, WSelPp, 4, nor)//grant3p - + cmos_Isub_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic - + cmos_Isub_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant sIsubnals - )*g_tp.peri_global.Vdd; - power.readOp.gate_leakage = issue_width * num_arbiter * - (cmos_Ig_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p - + cmos_Ig_leakage(WSelPn, WSelPp, 3, nor)//grant2p - + cmos_Ig_leakage(WSelPn, WSelPp, 4, nor)//grant3p - + cmos_Ig_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic - + cmos_Ig_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant signals - )*g_tp.peri_global.Vdd; +selection_logic::selection_logic(XMLNode* _xml_data, bool _is_default, + int _win_entries, int issue_width_, + const InputParameter *configure_interface, + string _name, double _accesses, + double clockRate_, enum Device_ty device_ty_, + enum Core_type core_ty_) + : McPATComponent(_xml_data), is_default(_is_default), + win_entries(_win_entries), + issue_width(issue_width_), + accesses(_accesses), + device_ty(device_ty_), + core_ty(core_ty_) { + clockRate = clockRate_; + name = _name; + l_ip = *configure_interface; + local_result = init_interface(&l_ip, name); +} + +void selection_logic::computeArea() { + output_data.area = local_result.area; } +void selection_logic::computeEnergy() { + //based on cost effective superscalar processor TR pp27-31 + double Ctotal, Cor, Cpencode; + int num_arbiter; + double WSelORn, WSelORprequ, WSelPn, WSelPp, WSelEnn, WSelEnp; + + //the 0.8um process data is used. + //this was 10 micron for the 0.8 micron process + WSelORn = 12.5 * l_ip.F_sz_um; + //this was 40 micron for the 0.8 micron process + WSelORprequ = 50 * l_ip.F_sz_um; + //this was 10mcron for the 0.8 micron process + WSelPn = 12.5 * l_ip.F_sz_um; + //this was 15 micron for the 0.8 micron process + WSelPp = 18.75 * l_ip.F_sz_um; + //this was 5 micron for the 0.8 micron process + WSelEnn = 6.25 * l_ip.F_sz_um; + //this was 10 micron for the 0.8 micron process + WSelEnp = 12.5 * l_ip.F_sz_um; + + Ctotal = 0; + num_arbiter = 1; + while (win_entries > 4) { + win_entries = (int)ceil((double)win_entries / 4.0); + num_arbiter += win_entries; + } + //the 4-input OR logic to generate anyreq + Cor = 4 * drain_C_(WSelORn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelORprequ, PCH, 1, 1, g_tp.cell_h_def); + power.readOp.gate_leakage = + cmos_Ig_leakage(WSelORn, WSelORprequ, 4, nor) * g_tp.peri_global.Vdd; + + //The total capacity of the 4-bit priority encoder + Cpencode = drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelPp, PCH, 1, 1, g_tp.cell_h_def) + + 2 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelPp, PCH, 2, 1, g_tp.cell_h_def) + + 3 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelPp, PCH, 3, 1, g_tp.cell_h_def) + + 4 * drain_C_(WSelPn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(WSelPp, PCH, 4, 1, g_tp.cell_h_def) +//precompute priority logic + 2 * 4 * gate_C(WSelEnn + WSelEnp, 20.0) + + 4 * drain_C_(WSelEnn, NCH, 1, 1, g_tp.cell_h_def) + + 2 * 4 * drain_C_(WSelEnp, PCH, 1, 1, g_tp.cell_h_def) +//enable logic + (2 * 4 + 2 * 3 + 2 * 2 + 2) * + gate_C(WSelPn + WSelPp, 10.0);//requests signal + + Ctotal += issue_width * num_arbiter * (Cor + Cpencode); + + //2 means the abitration signal need to travel round trip + power.readOp.dynamic = + Ctotal * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd * 2; + power.readOp.leakage = issue_width * num_arbiter * + (cmos_Isub_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p + + cmos_Isub_leakage(WSelPn, WSelPp, 3, nor)//grant2p + + cmos_Isub_leakage(WSelPn, WSelPp, 4, nor)//grant3p + + cmos_Isub_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic + + cmos_Isub_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant sIsubnals + ) * g_tp.peri_global.Vdd; + power.readOp.gate_leakage = issue_width * num_arbiter * + (cmos_Ig_leakage(WSelPn, WSelPp, 2, nor)/*approximate precompute with a nor gate*///grant1p + + cmos_Ig_leakage(WSelPn, WSelPp, 3, nor)//grant2p + + cmos_Ig_leakage(WSelPn, WSelPp, 4, nor)//grant3p + + cmos_Ig_leakage(WSelEnn, WSelEnp, 2, nor)*4//enable logic + + cmos_Ig_leakage(WSelEnn, WSelEnp, 1, inv)*2*3//for each grant there are two inverters, there are 3 grant signals + ) * g_tp.peri_global.Vdd; + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; + + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + + output_data.peak_dynamic_power = power.readOp.dynamic * clockRate; + output_data.subthreshold_leakage_power = power.readOp.leakage; + output_data.gate_leakage_power = power.readOp.gate_leakage; + output_data.runtime_dynamic_energy = power.readOp.dynamic * accesses; +} dep_resource_conflict_check::dep_resource_conflict_check( - const InputParameter *configure_interface, - const CoreDynParam & dyn_p_, - int compare_bits_, - bool _is_default) - : l_ip(*configure_interface), - coredynp(dyn_p_), - compare_bits(compare_bits_), - is_default(_is_default) -{ - Wcompn = 25 * l_ip.F_sz_um;//this was 20.0 micron for the 0.8 micron process - Wevalinvp = 25 * l_ip.F_sz_um;//this was 20.0 micron for the 0.8 micron process - Wevalinvn = 100 * l_ip.F_sz_um;//this was 80.0 mcron for the 0.8 micron process - Wcomppreequ = 50 * l_ip.F_sz_um;//this was 40.0 micron for the 0.8 micron process - WNORn = 6.75 * l_ip.F_sz_um;//this was 5.4 micron for the 0.8 micron process - WNORp = 38.125 * l_ip.F_sz_um;//this was 30.5 micron for the 0.8 micron process - - local_result = init_interface(&l_ip); - - if (coredynp.core_ty==Inorder) - compare_bits += 16 + 8 + 8;//TODO: opcode bits + log(shared resources) + REG TAG BITS-->opcode comparator - else - compare_bits += 16 + 8 + 8; - - conflict_check_power(); - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; + XMLNode* _xml_data, const string _name, + const InputParameter *configure_interface, + const CoreParameters & dyn_p_, int compare_bits_, + double clockRate_, bool _is_default) + : McPATComponent(_xml_data), l_ip(*configure_interface), + coredynp(dyn_p_), compare_bits(compare_bits_), is_default(_is_default) { + + name = _name; + clockRate = clockRate_; + //this was 20.0 micron for the 0.8 micron process + Wcompn = 25 * l_ip.F_sz_um; + //this was 20.0 micron for the 0.8 micron process + Wevalinvp = 25 * l_ip.F_sz_um; + //this was 80.0 mcron for the 0.8 micron process + Wevalinvn = 100 * l_ip.F_sz_um; + //this was 40.0 micron for the 0.8 micron process + Wcomppreequ = 50 * l_ip.F_sz_um; + //this was 5.4 micron for the 0.8 micron process + WNORn = 6.75 * l_ip.F_sz_um; + //this was 30.5 micron for the 0.8 micron process + WNORp = 38.125 * l_ip.F_sz_um; + + // To make CACTI happy. + l_ip.cache_sz = MIN_BUFFER_SIZE; + local_result = init_interface(&l_ip, name); + + if (coredynp.core_ty == Inorder) + //TODO: opcode bits + log(shared resources) + REG TAG BITS --> + //opcode comparator + compare_bits += 16 + 8 + 8; + else + compare_bits += 16 + 8 + 8; + + conflict_check_power(); + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; } -void dep_resource_conflict_check::conflict_check_power() -{ - double Ctotal; - int num_comparators; - num_comparators = 3*((coredynp.decodeW) * (coredynp.decodeW)-coredynp.decodeW);//2(N*N-N) is used for source to dest comparison, (N*N-N) is used for dest to dest comparision. - //When decode-width ==1, no dcl logic +void dep_resource_conflict_check::conflict_check_power() { + double Ctotal; + int num_comparators; + //2(N*N-N) is used for source to dest comparison, (N*N-N) is used for + //dest to dest comparision. + num_comparators = 3 * ((coredynp.decodeW) * (coredynp.decodeW) - + coredynp.decodeW); - Ctotal = num_comparators * compare_cap(); - //printf("%i,%s\n",XML_interface->sys.core[0].predictor.predictor_entries,XML_interface->sys.core[0].predictor.prediction_scheme); + Ctotal = num_comparators * compare_cap(); - power.readOp.dynamic=Ctotal*/*CLOCKRATE*/g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/*AF*/; - power.readOp.leakage=num_comparators*compare_bits*2*simplified_nmos_leakage(Wcompn, false); + power.readOp.dynamic = Ctotal * /*CLOCKRATE*/ g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd /*AF*/; + power.readOp.leakage = num_comparators * compare_bits * 2 * + simplified_nmos_leakage(Wcompn, false); - double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - power.readOp.gate_leakage=num_comparators*compare_bits*2*cmos_Ig_leakage(Wcompn, 0, 2, nmos); + double long_channel_device_reduction = + longer_channel_device_reduction(Core_device, coredynp.core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + power.readOp.gate_leakage = num_comparators * compare_bits * 2 * + cmos_Ig_leakage(Wcompn, 0, 2, nmos); } /* estimate comparator power consumption (this comparator is similar to the tag-match structure in a CAM */ -double dep_resource_conflict_check::compare_cap() -{ - double c1, c2; - - WNORp = WNORp * compare_bits/2.0;//resize the big NOR gate at the DCL according to fan in. - /* bottom part of comparator */ - c2 = (compare_bits)*(drain_C_(Wcompn,NCH,1,1, g_tp.cell_h_def)+drain_C_(Wcompn,NCH,2,1, g_tp.cell_h_def))+ - drain_C_(Wevalinvp,PCH,1,1, g_tp.cell_h_def) + drain_C_(Wevalinvn,NCH,1,1, g_tp.cell_h_def); - - /* top part of comparator */ - c1 = (compare_bits)*(drain_C_(Wcompn,NCH,1,1, g_tp.cell_h_def)+drain_C_(Wcompn,NCH,2,1, g_tp.cell_h_def)+ - drain_C_(Wcomppreequ,NCH,1,1, g_tp.cell_h_def)) + gate_C(WNORn + WNORp,10.0) + - drain_C_(WNORp,NCH,2,1, g_tp.cell_h_def) + compare_bits*drain_C_(WNORn,NCH,2,1, g_tp.cell_h_def); - return(c1 + c2); +double dep_resource_conflict_check::compare_cap() { + double c1, c2; + + //resize the big NOR gate at the DCL according to fan in. + WNORp = WNORp * compare_bits / 2.0; + /* bottom part of comparator */ + c2 = (compare_bits) * (drain_C_(Wcompn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(Wcompn, NCH, 2, 1, g_tp.cell_h_def)) + + drain_C_(Wevalinvp, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(Wevalinvn, NCH, 1, 1, g_tp.cell_h_def); + + /* top part of comparator */ + c1 = (compare_bits) * (drain_C_(Wcompn, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(Wcompn, NCH, 2, 1, g_tp.cell_h_def) + + drain_C_(Wcomppreequ, NCH, 1, 1, g_tp.cell_h_def)) + + gate_C(WNORn + WNORp, 10.0) + + drain_C_(WNORp, NCH, 2, 1, g_tp.cell_h_def) + compare_bits * + drain_C_(WNORn, NCH, 2, 1, g_tp.cell_h_def); + return(c1 + c2); } void dep_resource_conflict_check::leakage_feedback(double temperature) { l_ip.temp = (unsigned int)round(temperature/10.0)*10; - uca_org_t init_result = init_interface(&l_ip); // init_result is dummy + uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy // This is part of conflict_check_power() - int num_comparators = 3*((coredynp.decodeW) * (coredynp.decodeW)-coredynp.decodeW);//2(N*N-N) is used for source to dest comparison, (N*N-N) is used for dest to dest comparision. - power.readOp.leakage=num_comparators*compare_bits*2*simplified_nmos_leakage(Wcompn, false); - - double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - power.readOp.gate_leakage=num_comparators*compare_bits*2*cmos_Ig_leakage(Wcompn, 0, 2, nmos); + // 2(N*N-N) is used for source to dest comparison, (N*N-N) is used for dest + // to dest comparison. + int num_comparators = 3 * ((coredynp.decodeW) * (coredynp.decodeW) - + coredynp.decodeW); + power.readOp.leakage = num_comparators * compare_bits * 2 * + simplified_nmos_leakage(Wcompn, false); + + double long_channel_device_reduction = + longer_channel_device_reduction(Core_device, coredynp.core_ty); + power.readOp.longer_channel_leakage = power.readOp.leakage * + long_channel_device_reduction; + power.readOp.gate_leakage = num_comparators * compare_bits * 2 * + cmos_Ig_leakage(Wcompn, 0, 2, nmos); } -//TODO: add inverter and transmission gate base DFF. DFFCell::DFFCell( - bool _is_dram, - double _WdecNANDn, - double _WdecNANDp, - double _cell_load, - const InputParameter *configure_interface) -:is_dram(_is_dram), -cell_load(_cell_load), -WdecNANDn(_WdecNANDn), -WdecNANDp(_WdecNANDp) -{//this model is based on the NAND2 based DFF. - l_ip=*configure_interface; -// area.set_area(730*l_ip.F_sz_um*l_ip.F_sz_um); - area.set_area(5*compute_gate_area(NAND, 2,WdecNANDn,WdecNANDp, g_tp.cell_h_def) - + compute_gate_area(NAND, 2,WdecNANDn,WdecNANDn, g_tp.cell_h_def)); + bool _is_dram, + double _WdecNANDn, + double _WdecNANDp, + double _cell_load, + const InputParameter *configure_interface) + : is_dram(_is_dram), + cell_load(_cell_load), + WdecNANDn(_WdecNANDn), + WdecNANDp(_WdecNANDp) { //this model is based on the NAND2 based DFF. + l_ip = *configure_interface; + area.set_area(5 * compute_gate_area(NAND, 2,WdecNANDn,WdecNANDp, + g_tp.cell_h_def) + + compute_gate_area(NAND, 2,WdecNANDn,WdecNANDn, + g_tp.cell_h_def)); } -double DFFCell::fpfp_node_cap(unsigned int fan_in, unsigned int fan_out) -{ - double Ctotal = 0; - //printf("WdecNANDn = %E\n", WdecNANDn); +double DFFCell::fpfp_node_cap(unsigned int fan_in, unsigned int fan_out) { + double Ctotal = 0; - /* part 1: drain cap of NAND gate */ - Ctotal += drain_C_(WdecNANDn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + fan_in * drain_C_(WdecNANDp, PCH, 1, 1, g_tp.cell_h_def, is_dram); + /* part 1: drain cap of NAND gate */ + Ctotal += drain_C_(WdecNANDn, NCH, 2, 1, g_tp.cell_h_def, is_dram) + fan_in * drain_C_(WdecNANDp, PCH, 1, 1, g_tp.cell_h_def, is_dram); - /* part 2: gate cap of NAND gates */ - Ctotal += fan_out * gate_C(WdecNANDn + WdecNANDp, 0, is_dram); + /* part 2: gate cap of NAND gates */ + Ctotal += fan_out * gate_C(WdecNANDn + WdecNANDp, 0, is_dram); - return Ctotal; + return Ctotal; } -void DFFCell::compute_DFF_cell() -{ - double c1, c2, c3, c4, c5, c6; - /* node 5 and node 6 are identical to node 1 in capacitance */ - c1 = c5 = c6 = fpfp_node_cap(2, 1); - c2 = fpfp_node_cap(2, 3); - c3 = fpfp_node_cap(3, 2); - c4 = fpfp_node_cap(2, 2); - - //cap-load of the clock signal in each Dff, actually the clock signal only connected to one NAND2 - clock_cap= 2 * gate_C(WdecNANDn + WdecNANDp, 0, is_dram); - e_switch.readOp.dynamic += (c4 + c1 + c2 + c3 + c5 + c6 + 2*cell_load)*0.5*g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;; - - /* no 1/2 for e_keep and e_clock because clock signal switches twice in one cycle */ - e_keep_1.readOp.dynamic += c3 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ; - e_keep_0.readOp.dynamic += c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ; - e_clock.readOp.dynamic += clock_cap* g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;; - - /* static power */ - e_switch.readOp.leakage += (cmos_Isub_leakage(WdecNANDn, WdecNANDp, 2, nand)*5//5 NAND2 and 1 NAND3 in a DFF - + cmos_Isub_leakage(WdecNANDn, WdecNANDn, 3, nand))*g_tp.peri_global.Vdd; - e_switch.readOp.gate_leakage += (cmos_Ig_leakage(WdecNANDn, WdecNANDp, 2, nand)*5//5 NAND2 and 1 NAND3 in a DFF - + cmos_Ig_leakage(WdecNANDn, WdecNANDn, 3, nand))*g_tp.peri_global.Vdd; - //printf("leakage =%E\n",cmos_Ileak(1, is_dram) ); +void DFFCell::compute_DFF_cell() { + double c1, c2, c3, c4, c5, c6; + /* node 5 and node 6 are identical to node 1 in capacitance */ + c1 = c5 = c6 = fpfp_node_cap(2, 1); + c2 = fpfp_node_cap(2, 3); + c3 = fpfp_node_cap(3, 2); + c4 = fpfp_node_cap(2, 2); + + //cap-load of the clock signal in each Dff, actually the clock signal only connected to one NAND2 + clock_cap = 2 * gate_C(WdecNANDn + WdecNANDp, 0, is_dram); + e_switch.readOp.dynamic += (c4 + c1 + c2 + c3 + c5 + c6 + 2 * cell_load) * + 0.5 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;; + + /* no 1/2 for e_keep and e_clock because clock signal switches twice in one cycle */ + e_keep_1.readOp.dynamic += + c3 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ; + e_keep_0.readOp.dynamic += + c2 * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd ; + e_clock.readOp.dynamic += + clock_cap * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd;; + + /* static power */ + e_switch.readOp.leakage += + (cmos_Isub_leakage(WdecNANDn, WdecNANDp, 2, nand) * + 5//5 NAND2 and 1 NAND3 in a DFF + + cmos_Isub_leakage(WdecNANDn, WdecNANDn, 3, nand)) * + g_tp.peri_global.Vdd; + e_switch.readOp.gate_leakage += + (cmos_Ig_leakage(WdecNANDn, WdecNANDp, 2, nand) * + 5//5 NAND2 and 1 NAND3 in a DFF + + cmos_Ig_leakage(WdecNANDn, WdecNANDn, 3, nand)) * + g_tp.peri_global.Vdd; } -Pipeline::Pipeline( - const InputParameter *configure_interface, - const CoreDynParam & dyn_p_, - enum Device_ty device_ty_, - bool _is_core_pipeline, - bool _is_default) -: l_ip(*configure_interface), - coredynp(dyn_p_), - device_ty(device_ty_), - is_core_pipeline(_is_core_pipeline), - is_default(_is_default), - num_piperegs(0.0) - - { - local_result = init_interface(&l_ip); - if (!coredynp.Embedded) - process_ind = true; - else - process_ind = false; - WNANDn = (process_ind)? 25 * l_ip.F_sz_um : g_tp.min_w_nmos_ ;//this was 20 micron for the 0.8 micron process - WNANDp = (process_ind)? 37.5 * l_ip.F_sz_um : g_tp.min_w_nmos_*pmos_to_nmos_sz_ratio();//this was 30 micron for the 0.8 micron process - load_per_pipeline_stage = 2*gate_C(WNANDn + WNANDp, 0, false); - compute(); +Pipeline::Pipeline(XMLNode* _xml_data, + const InputParameter *configure_interface, + const CoreParameters & dyn_p_, + enum Device_ty device_ty_, + bool _is_core_pipeline, + bool _is_default) + : McPATComponent(_xml_data), l_ip(*configure_interface), + coredynp(dyn_p_), device_ty(device_ty_), + is_core_pipeline(_is_core_pipeline), is_default(_is_default), + num_piperegs(0.0) { + name = "Pipeline?"; + + local_result = init_interface(&l_ip, name); + if (!coredynp.Embedded) { + process_ind = true; + } else { + process_ind = false; + } + //this was 20 micron for the 0.8 micron process + WNANDn = (process_ind) ? 25 * l_ip.F_sz_um : g_tp.min_w_nmos_ ; + //this was 30 micron for the 0.8 micron process + WNANDp = (process_ind) ? 37.5 * l_ip.F_sz_um : g_tp.min_w_nmos_ * + pmos_to_nmos_sz_ratio(); + load_per_pipeline_stage = 2 * gate_C(WNANDn + WNANDp, 0, false); + compute(); } -void Pipeline::compute() -{ - compute_stage_vector(); - DFFCell pipe_reg(false, WNANDn,WNANDp, load_per_pipeline_stage, &l_ip); - pipe_reg.compute_DFF_cell(); - - double clock_power_pipereg = num_piperegs * pipe_reg.e_clock.readOp.dynamic; - //******************pipeline power: currently, we average all the possibilities of the states of DFFs in the pipeline. A better way to do it is to consider - //the harming distance of two consecutive signals, However McPAT does not have plan to do this in near future as it focuses on worst case power. - double pipe_reg_power = num_piperegs * (pipe_reg.e_switch.readOp.dynamic+pipe_reg.e_keep_0.readOp.dynamic+pipe_reg.e_keep_1.readOp.dynamic)/3+clock_power_pipereg; - double pipe_reg_leakage = num_piperegs * pipe_reg.e_switch.readOp.leakage; - double pipe_reg_gate_leakage = num_piperegs * pipe_reg.e_switch.readOp.gate_leakage; - power.readOp.dynamic +=pipe_reg_power; - power.readOp.leakage +=pipe_reg_leakage; - power.readOp.gate_leakage +=pipe_reg_gate_leakage; - area.set_area(num_piperegs * pipe_reg.area.get_area()); - - double long_channel_device_reduction = longer_channel_device_reduction(device_ty, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - - - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; - double macro_layout_overhead = g_tp.macro_layout_overhead; +void Pipeline::compute() { + compute_stage_vector(); + DFFCell pipe_reg(false, WNANDn, WNANDp, load_per_pipeline_stage, &l_ip); + pipe_reg.compute_DFF_cell(); + + double clock_power_pipereg = num_piperegs * pipe_reg.e_clock.readOp.dynamic; + //******************pipeline power: currently, we average all the possibilities of the states of DFFs in the pipeline. A better way to do it is to consider + //the harming distance of two consecutive signals, However McPAT does not have plan to do this in near future as it focuses on worst case power. + double pipe_reg_power = num_piperegs * + (pipe_reg.e_switch.readOp.dynamic + pipe_reg.e_keep_0.readOp.dynamic + + pipe_reg.e_keep_1.readOp.dynamic) / 3 + clock_power_pipereg; + double pipe_reg_leakage = num_piperegs * pipe_reg.e_switch.readOp.leakage; + double pipe_reg_gate_leakage = num_piperegs * + pipe_reg.e_switch.readOp.gate_leakage; + power.readOp.dynamic += pipe_reg_power; + power.readOp.leakage += pipe_reg_leakage; + power.readOp.gate_leakage += pipe_reg_gate_leakage; + area.set_area(num_piperegs * pipe_reg.area.get_area()); + + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, coredynp.core_ty); + power.readOp.longer_channel_leakage = power.readOp.leakage * + long_channel_device_reduction; + + + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; + double macro_layout_overhead = g_tp.macro_layout_overhead; if (!coredynp.Embedded) - area.set_area(area.get_area()*macro_layout_overhead); -} - -void Pipeline::compute_stage_vector() -{ - double num_stages, tot_stage_vector, per_stage_vector; - int opcode_length = coredynp.x86? coredynp.micro_opcode_length:coredynp.opcode_length; - //Hthread = thread_clock_gated? 1:num_thread; + area.set_area(area.get_area() * macro_layout_overhead); - if (!is_core_pipeline) - { - num_piperegs=l_ip.pipeline_stages*l_ip.per_stage_vector;//The number of pipeline stages are calculated based on the achievable throughput and required throughput - } - else - { - if (coredynp.core_ty==Inorder) - { - /* assume 6 pipe stages and try to estimate bits per pipe stage */ - /* pipe stage 0/IF */ - num_piperegs += coredynp.pc_width*2*coredynp.num_hthreads; - /* pipe stage IF/ID */ - num_piperegs += coredynp.fetchW*(coredynp.instruction_length + coredynp.pc_width)*coredynp.num_hthreads; - /* pipe stage IF/ThreadSEL */ - if (coredynp.multithreaded) num_piperegs += coredynp.num_hthreads*coredynp.perThreadState; //8 bit thread states - /* pipe stage ID/EXE */ - num_piperegs += coredynp.decodeW*(coredynp.instruction_length + coredynp.pc_width + pow(2.0,opcode_length)+ 2*coredynp.int_data_width)*coredynp.num_hthreads; - /* pipe stage EXE/MEM */ - num_piperegs += coredynp.issueW*(3 * coredynp.arch_ireg_width + pow(2.0,opcode_length) + 8*2*coredynp.int_data_width/*+2*powers (2,reg_length)*/); - /* pipe stage MEM/WB the 2^opcode_length means the total decoded signal for the opcode*/ - num_piperegs += coredynp.issueW*(2*coredynp.int_data_width + pow(2.0,opcode_length) + 8*2*coredynp.int_data_width/*+2*powers (2,reg_length)*/); -// /* pipe stage 5/6 */ -// num_piperegs += issueWidth*(data_width + powers (2,opcode_length)/*+2*powers (2,reg_length)*/); -// /* pipe stage 6/7 */ -// num_piperegs += issueWidth*(data_width + powers (2,opcode_length)/*+2*powers (2,reg_length)*/); -// /* pipe stage 7/8 */ -// num_piperegs += issueWidth*(data_width + powers (2,opcode_length)/**2*powers (2,reg_length)*/); -// /* assume 50% extra in control signals (rule of thumb) */ - num_stages=6; + output_data.area = area.get_area() / 1e6; + output_data.peak_dynamic_power = power.readOp.dynamic * clockRate; + output_data.subthreshold_leakage_power = power.readOp.leakage; + output_data.gate_leakage_power = power.readOp.gate_leakage; + output_data.runtime_dynamic_energy = power.readOp.dynamic * total_cycles; +} - } - else - { - /* assume 12 stage pipe stages and try to estimate bits per pipe stage */ - /*OOO: Fetch, decode, rename, IssueQ, dispatch, regread, EXE, MEM, WB, CM */ - - /* pipe stage 0/1F*/ - num_piperegs += coredynp.pc_width*2*coredynp.num_hthreads ;//PC and Next PC - /* pipe stage IF/ID */ - num_piperegs += coredynp.fetchW*(coredynp.instruction_length + coredynp.pc_width)*coredynp.num_hthreads;//PC is used to feed branch predictor in ID - /* pipe stage 1D/Renaming*/ - num_piperegs += coredynp.decodeW*(coredynp.instruction_length + coredynp.pc_width)*coredynp.num_hthreads;//PC is for branch exe in later stage. - /* pipe stage Renaming/wire_drive */ - num_piperegs += coredynp.decodeW*(coredynp.instruction_length + coredynp.pc_width); - /* pipe stage Renaming/IssueQ */ - num_piperegs += coredynp.issueW*(coredynp.instruction_length + coredynp.pc_width + 3*coredynp.phy_ireg_width)*coredynp.num_hthreads;//3*coredynp.phy_ireg_width means 2 sources and 1 dest - /* pipe stage IssueQ/Dispatch */ - num_piperegs += coredynp.issueW*(coredynp.instruction_length + 3 * coredynp.phy_ireg_width); - /* pipe stage Dispatch/EXE */ - - num_piperegs += coredynp.issueW*(3 * coredynp.phy_ireg_width + coredynp.pc_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/); - /* 2^opcode_length means the total decoded signal for the opcode*/ - num_piperegs += coredynp.issueW*(2*coredynp.int_data_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/); - /*2 source operands in EXE; Assume 2EXE stages* since we do not really distinguish OP*/ - num_piperegs += coredynp.issueW*(2*coredynp.int_data_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/); - /* pipe stage EXE/MEM, data need to be read/write, address*/ - num_piperegs += coredynp.issueW*(coredynp.int_data_width + coredynp.v_address_width + pow(2.0,opcode_length)/*+2*powers (2,reg_length)*/);//memory Opcode still need to be passed - /* pipe stage MEM/WB; result data, writeback regs */ - num_piperegs += coredynp.issueW*(coredynp.int_data_width + coredynp.phy_ireg_width /* powers (2,opcode_length) + (2,opcode_length)+2*powers (2,reg_length)*/); - /* pipe stage WB/CM ; result data, regs need to be updated, address for resolve memory ops in ROB's top*/ - num_piperegs += coredynp.commitW*(coredynp.int_data_width + coredynp.v_address_width + coredynp.phy_ireg_width/*+ powers (2,opcode_length)*2*powers (2,reg_length)*/)*coredynp.num_hthreads; -// if (multithreaded) -// { -// -// } - num_stages=12; +void Pipeline::compute_stage_vector() { + double num_stages, tot_stage_vector, per_stage_vector; + int opcode_length = coredynp.x86 ? + coredynp.micro_opcode_length : coredynp.opcode_width; + + if (!is_core_pipeline) { + //The number of pipeline stages are calculated based on the achievable + //throughput and required throughput + num_piperegs = l_ip.pipeline_stages * l_ip.per_stage_vector; + } else { + if (coredynp.core_ty == Inorder) { + /* assume 6 pipe stages and try to estimate bits per pipe stage */ + /* pipe stage 0/IF */ + num_piperegs += coredynp.pc_width * 2 * coredynp.num_hthreads; + /* pipe stage IF/ID */ + num_piperegs += coredynp.fetchW * + (coredynp.instruction_length + coredynp.pc_width) * + coredynp.num_hthreads; + /* pipe stage IF/ThreadSEL */ + if (coredynp.multithreaded) { + num_piperegs += coredynp.num_hthreads * + coredynp.perThreadState; //8 bit thread states + } + /* pipe stage ID/EXE */ + num_piperegs += coredynp.decodeW * + (coredynp.instruction_length + coredynp.pc_width + + pow(2.0, opcode_length) + 2 * coredynp.int_data_width) * + coredynp.num_hthreads; + /* pipe stage EXE/MEM */ + num_piperegs += coredynp.issueW * + (3 * coredynp.arch_ireg_width + pow(2.0, opcode_length) + 8 * + 2 * coredynp.int_data_width/*+2*powers (2,reg_length)*/); + /* pipe stage MEM/WB the 2^opcode_length means the total decoded signal for the opcode*/ + num_piperegs += coredynp.issueW * + (2 * coredynp.int_data_width + pow(2.0, opcode_length) + 8 * + 2 * coredynp.int_data_width/*+2*powers (2,reg_length)*/); + num_stages = 6; + } else { + /* assume 12 stage pipe stages and try to estimate bits per pipe stage */ + /*OOO: Fetch, decode, rename, IssueQ, dispatch, regread, EXE, MEM, WB, CM */ + + /* pipe stage 0/1F*/ + num_piperegs += + coredynp.pc_width * 2 * coredynp.num_hthreads ;//PC and Next PC + /* pipe stage IF/ID */ + num_piperegs += coredynp.fetchW * + (coredynp.instruction_length + coredynp.pc_width) * + coredynp.num_hthreads;//PC is used to feed branch predictor in ID + /* pipe stage 1D/Renaming*/ + num_piperegs += coredynp.decodeW * + (coredynp.instruction_length + coredynp.pc_width) * + coredynp.num_hthreads;//PC is for branch exe in later stage. + /* pipe stage Renaming/wire_drive */ + num_piperegs += coredynp.decodeW * + (coredynp.instruction_length + coredynp.pc_width); + /* pipe stage Renaming/IssueQ */ + //3*coredynp.phy_ireg_width means 2 sources and 1 dest + num_piperegs += coredynp.issueW * + (coredynp.instruction_length + coredynp.pc_width + 3 * + coredynp.phy_ireg_width) * coredynp.num_hthreads; + /* pipe stage IssueQ/Dispatch */ + num_piperegs += coredynp.issueW * + (coredynp.instruction_length + 3 * coredynp.phy_ireg_width); + /* pipe stage Dispatch/EXE */ + + num_piperegs += coredynp.issueW * + (3 * coredynp.phy_ireg_width + coredynp.pc_width + + pow(2.0, opcode_length)/*+2*powers (2,reg_length)*/); + /* 2^opcode_length means the total decoded signal for the opcode*/ + num_piperegs += coredynp.issueW * + (2 * coredynp.int_data_width + pow(2.0, opcode_length) + /*+2*powers (2,reg_length)*/); + /*2 source operands in EXE; Assume 2EXE stages* since we do not really distinguish OP*/ + num_piperegs += coredynp.issueW * + (2 * coredynp.int_data_width + pow(2.0, opcode_length) + /*+2*powers (2,reg_length)*/); + /* pipe stage EXE/MEM, data need to be read/write, address*/ + //memory Opcode still need to be passed + num_piperegs += coredynp.issueW * + (coredynp.int_data_width + coredynp.v_address_width + + pow(2.0, opcode_length)/*+2*powers (2,reg_length)*/); + /* pipe stage MEM/WB; result data, writeback regs */ + num_piperegs += coredynp.issueW * + (coredynp.int_data_width + coredynp.phy_ireg_width + /* powers (2,opcode_length) + + (2,opcode_length)+2*powers (2,reg_length)*/); + /* pipe stage WB/CM ; result data, regs need to be updated, address for resolve memory ops in ROB's top*/ + num_piperegs += coredynp.commitW * + (coredynp.int_data_width + coredynp.v_address_width + + coredynp.phy_ireg_width + /*+ powers (2,opcode_length)*2*powers (2,reg_length)*/) * + coredynp.num_hthreads; + num_stages = 12; } /* assume 50% extra in control registers and interrupt registers (rule of thumb) */ num_piperegs = num_piperegs * 1.5; - tot_stage_vector=num_piperegs; - per_stage_vector=tot_stage_vector/num_stages; - - if (coredynp.core_ty==Inorder) - { - if (coredynp.pipeline_stages>6) - num_piperegs= per_stage_vector*coredynp.pipeline_stages; + tot_stage_vector = num_piperegs; + per_stage_vector = tot_stage_vector / num_stages; + + if (coredynp.core_ty == Inorder) { + if (coredynp.pipeline_stages > 6) + num_piperegs = per_stage_vector * coredynp.pipeline_stages; + } else { //OOO + if (coredynp.pipeline_stages > 12) + num_piperegs = per_stage_vector * coredynp.pipeline_stages; } - else//OOO - { - if (coredynp.pipeline_stages>12) - num_piperegs= per_stage_vector*coredynp.pipeline_stages; - } - } + } } -FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, enum FU_type fu_type_) -:XML(XML_interface), - ithCore(ithCore_), - interface_ip(*interface_ip_), - coredynp(dyn_p_), - fu_type(fu_type_) -{ - double area_t;//, leakage, gate_leakage; +FunctionalUnit::FunctionalUnit(XMLNode* _xml_data, + InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, + enum FU_type fu_type_) + : McPATComponent(_xml_data), + interface_ip(*interface_ip_), core_params(_core_params), + core_stats(_core_stats), fu_type(fu_type_) { + double area_t; + double leakage; + double gate_leakage; double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - clockRate = coredynp.clockRate; - executionTime = coredynp.executionTime; - - //XML_interface=_XML_interface; - uca_org_t result2; - result2 = init_interface(&interface_ip); - if (XML->sys.Embedded) - { - if (fu_type == FPU) - { - num_fu=coredynp.num_fpus; + clockRate = core_params.clockRate; + + uca_org_t result2; + // Temp name for the following function call + name = "Functional Unit"; + + result2 = init_interface(&interface_ip, name); + + if (core_params.Embedded) { + if (fu_type == FPU) { + num_fu=core_params.num_fpus; //area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 area_t = 4.47*1e6*(g_ip->F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2 The base number //4.47 contains both VFP and NEON processing unit, VFP is about 40% and NEON is about 60% @@ -449,10 +534,8 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParam per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per Hz energy(nJ) //FPU power from Sandia's processor sizing tech report FU_height=(18667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data - } - else if (fu_type == ALU) - { - num_fu=coredynp.num_alus; + } else if (fu_type == ALU) { + num_fu=core_params.num_alus; area_t = 280*260*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; @@ -462,10 +545,8 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParam per_access_energy = 1.15/3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ) FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU - } - else if (fu_type == MUL) - { - num_fu=coredynp.num_muls; + } else if (fu_type == MUL) { + num_fu=core_params.num_muls; area_t = 280*260*3*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; @@ -474,197 +555,117 @@ FunctionalUnit::FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParam base_energy = 0; per_access_energy = 1.15*2/3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data - } - else - { + } else { cout<<"Unknown Functional Unit Type"<F_sz_nm * g_ip->F_sz_nm / 90.0 / + 90.0);//this is um^2 + if (g_ip->F_sz_nm > 90) + area_t = 8.47 * 1e6 * + g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 + leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W + gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W + //W The base energy of ALU average numbers from Intel 4G and + //773Mhz (Wattch) + base_energy = core_params.core_ty == Inorder ? 0 : 89e-3 * 3; + base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / + 1.2); + per_access_energy = 1.15*3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per op energy(nJ) + FU_height=(38667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data + } else if (fu_type == ALU) { + name = "Integer ALU(s)"; + num_fu = core_params.num_alus; + //this is um^2 ALU + MUl + area_t = 280 * 260 * 2 * g_tp.scaling_factor.logic_scaling_co_eff; + leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W + gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; + //W The base energy of ALU average numbers from Intel 4G and 773Mhz + //(Wattch) + base_energy = core_params.core_ty == Inorder ? 0 : 89e-3; + base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / + 1.2); + per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ) + FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU + } else if (fu_type == MUL) { + name = "Multiply/Divide Unit(s)"; + num_fu = core_params.num_muls; + //this is um^2 ALU + MUl + area_t = 280 * 260 * 2 * 3 * + g_tp.scaling_factor.logic_scaling_co_eff; + leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W + gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; + //W The base energy of ALU average numbers from Intel 4G and 773Mhz + //(Wattch) + base_energy = core_params.core_ty == Inorder ? 0 : 89e-3 * 2; + base_energy *= (g_tp.peri_global.Vdd * g_tp.peri_global.Vdd / 1.2 / + 1.2); + per_access_energy = 1.15*2/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch + FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data + } else { + cout << "Unknown Functional Unit Type" << endl; + exit(0); } - else - { - if (fu_type == FPU) - { - num_fu=coredynp.num_fpus; - //area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 - area_t = 8.47*1e6*(g_ip->F_sz_nm*g_ip->F_sz_nm/90.0/90.0);//this is um^2 - if (g_ip->F_sz_nm>90) - area_t = 8.47*1e6*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - //energy = 0.3529/10*1e-9;//this is the energy(nJ) for a FP instruction in FPU usually it can have up to 20 cycles. - base_energy = coredynp.core_ty==Inorder? 0: 89e-3*3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) - base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - per_access_energy = 1.15*3/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per op energy(nJ) - FU_height=(38667*num_fu)*interface_ip.F_sz_um;//FPU from Sun's data - } - else if (fu_type == ALU) - { - num_fu=coredynp.num_alus; - area_t = 280*260*2*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; - base_energy = coredynp.core_ty==Inorder? 0:89e-3; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) - base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - per_access_energy = 1.15/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ) - FU_height=(6222*num_fu)*interface_ip.F_sz_um;//integer ALU + } - } - else if (fu_type == MUL) - { - num_fu=coredynp.num_muls; - area_t = 280*260*2*3*g_tp.scaling_factor.logic_scaling_co_eff;//this is um^2 ALU + MUl - leakage = area_t *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2;//unit W - gate_leakage = area_t*(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(20*g_tp.min_w_nmos_, 20*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd/2; - base_energy = coredynp.core_ty==Inorder? 0:89e-3*2; //W The base energy of ALU average numbers from Intel 4G and 773Mhz (Wattch) - base_energy *=(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2); - per_access_energy = 1.15*2/1e9/4/1.3/1.3*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(g_ip->F_sz_nm/90.0);//(g_tp.peri_global.Vdd*g_tp.peri_global.Vdd/1.2/1.2);//0.00649*1e-9; //This is per cycle energy(nJ), coefficient based on Wattch - FU_height=(9334*num_fu )*interface_ip.F_sz_um;//divider/mul from Sun's data - } - else - { - cout<<"Unknown Functional Unit Type"<sys.Embedded) - area.set_area(area.get_area()*macro_layout_overhead); -} - -void FunctionalUnit::computeEnergy(bool is_tdp) -{ - double pppm_t[4] = {1,1,1,1}; - double FU_duty_cycle; - if (is_tdp) - { - - - set_pppm(pppm_t, 2, 2, 2, 2);//2 means two source operands needs to be passed for each int instruction. - if (fu_type == FPU) - { - stats_t.readAc.access = num_fu; - tdp_stats = stats_t; - FU_duty_cycle = coredynp.FPU_duty_cycle; - } - else if (fu_type == ALU) - { - stats_t.readAc.access = 1*num_fu; - tdp_stats = stats_t; - FU_duty_cycle = coredynp.ALU_duty_cycle; - } - else if (fu_type == MUL) - { - stats_t.readAc.access = num_fu; - tdp_stats = stats_t; - FU_duty_cycle = coredynp.MUL_duty_cycle; - } - - //power.readOp.dynamic = base_energy/clockRate + energy*stats_t.readAc.access; - power.readOp.dynamic = per_access_energy*stats_t.readAc.access + base_energy/clockRate; - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation*FU_duty_cycle; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; - - power.readOp.leakage = leakage; - power.readOp.gate_leakage = gate_leakage; - double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - - } - else - { - if (fu_type == FPU) - { - stats_t.readAc.access = XML->sys.core[ithCore].fpu_accesses; - rtp_stats = stats_t; - } - else if (fu_type == ALU) - { - stats_t.readAc.access = XML->sys.core[ithCore].ialu_accesses; - rtp_stats = stats_t; - } - else if (fu_type == MUL) - { - stats_t.readAc.access = XML->sys.core[ithCore].mul_accesses; - rtp_stats = stats_t; - } - - //rt_power.readOp.dynamic = base_energy*executionTime + energy*stats_t.readAc.access; - rt_power.readOp.dynamic = per_access_energy*stats_t.readAc.access + base_energy*executionTime; - double sckRation = g_tp.sckt_co_eff; - rt_power.readOp.dynamic *= sckRation; - rt_power.writeOp.dynamic *= sckRation; - rt_power.searchOp.dynamic *= sckRation; - - } - - + power.readOp.leakage = leakage * num_fu; + power.readOp.gate_leakage = gate_leakage * num_fu; + + double long_channel_device_reduction = + longer_channel_device_reduction(Core_device, core_params.core_ty); + power.readOp.longer_channel_leakage = + power.readOp.leakage * long_channel_device_reduction; + double macro_layout_overhead = g_tp.macro_layout_overhead; + area.set_area(area.get_area()*macro_layout_overhead); } -void FunctionalUnit::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - -// cout << indent_str_next << "Results Broadcast Bus Area = " << bypass->area.get_area() *1e-6 << " mm^2" << endl; - if (is_tdp) - { - if (fu_type == FPU) - { - cout << indent_str << "Floating Point Units (FPUs) (Count: "<< coredynp.num_fpus <<" ):" << endl; - cout << indent_str_next << "Area = " << area.get_area()*1e-6 << " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl; -// cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage << " W" << endl; - cout << indent_str_next<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl; - cout <sys.Embedded), - pipeline_stage(coredynp.pipeline_stages), - num_hthreads(coredynp.num_hthreads), - issue_width(coredynp.issueW), - exist(exist_) -// is_default(_is_default) -{ - if (!exist) return; - double undifferentiated_core=0; - double core_tx_density=0; - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); +UndiffCore::UndiffCore(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & dyn_p_, + bool exist_) + : McPATComponent(_xml_data), + interface_ip(*interface_ip_), coredynp(dyn_p_), + core_ty(coredynp.core_ty), embedded(coredynp.Embedded), + pipeline_stage(coredynp.pipeline_stages), + num_hthreads(coredynp.num_hthreads), issue_width(coredynp.issueW), + exist(exist_) { + if (!exist) return; + + name = "Undifferentiated Core"; + clockRate = coredynp.clockRate; + + double undifferentiated_core = 0; + double core_tx_density = 0; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); double undifferentiated_core_coe; - //XML_interface=_XML_interface; - uca_org_t result2; - result2 = init_interface(&interface_ip); - - //Compute undifferentiated core area at 90nm. - if (embedded==false) - { - //Based on the results of polynomial/log curve fitting based on undifferentiated core of Niagara, Niagara2, Merom, Penyrn, Prescott, Opteron die measurements - if (core_ty==OOO) - { - //undifferentiated_core = (0.0764*pipeline_stage*pipeline_stage -2.3685*pipeline_stage + 10.405);//OOO - undifferentiated_core = (3.57*log(pipeline_stage)-1.2643)>0?(3.57*log(pipeline_stage)-1.2643):0; - } - else if (core_ty==Inorder) - { - //undifferentiated_core = (0.1238*pipeline_stage + 7.2572)*0.9;//inorder - undifferentiated_core = (-2.19*log(pipeline_stage)+6.55)>0?(-2.19*log(pipeline_stage)+6.55):0; - } - else - { - cout<<"invalid core type"< 0 ? + (3.57 * log(pipeline_stage) - 1.2643) : 0; + } else if (core_ty == Inorder) { + undifferentiated_core = (-2.19 * log(pipeline_stage) + 6.55) > 0 ? + (-2.19 * log(pipeline_stage) + 6.55) : 0; + } else { + cout << "invalid core type" << endl; + exit(0); } - else - { - //Based on the results in paper "parametrized processor models" Sandia Labs - if (XML->sys.opt_clockrate) + undifferentiated_core *= (1 + logtwo(num_hthreads) * 0.0716); + } else { + //Based on the results in paper "parametrized processor models" Sandia Labs + if (opt_for_clk) undifferentiated_core_coe = 0.05; else undifferentiated_core_coe = 0; - undifferentiated_core = (0.4109* pipeline_stage - 0.776)*undifferentiated_core_coe; - undifferentiated_core *= (1+ logtwo(num_hthreads)* 0.0426); - } - - undifferentiated_core *= g_tp.scaling_factor.logic_scaling_co_eff*1e6;//change from mm^2 to um^2 - core_tx_density = g_tp.scaling_factor.core_tx_density; - //undifferentiated_core = 3*1e6; - //undifferentiated_core *= g_tp.scaling_factor.logic_scaling_co_eff;//(g_ip->F_sz_um*g_ip->F_sz_um/0.09/0.09)*; - power.readOp.leakage = undifferentiated_core*(core_tx_density)*cmos_Isub_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W - power.readOp.gate_leakage = undifferentiated_core*(core_tx_density)*cmos_Ig_leakage(5*g_tp.min_w_nmos_, 5*g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd; - - double long_channel_device_reduction = longer_channel_device_reduction(Core_device, coredynp.core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - area.set_area(undifferentiated_core); - - scktRatio = g_tp.sckt_co_eff; - power.readOp.dynamic *= scktRatio; - power.writeOp.dynamic *= scktRatio; - power.searchOp.dynamic *= scktRatio; - macro_PR_overhead = g_tp.macro_layout_overhead; - area.set_area(area.get_area()*macro_PR_overhead); - - - -// double vt=g_tp.peri_global.Vth; -// double velocity_index=1.1; -// double c_in=gate_C(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r , 0.0, false); -// double c_out= drain_C_(g_tp.min_w_nmos_, NCH, 2, 1, g_tp.cell_h_def, false) + drain_C_(g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, PCH, 1, 1, g_tp.cell_h_def, false) + c_in; -// double w_nmos=g_tp.min_w_nmos_; -// double w_pmos=g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; -// double i_on_n=1.0; -// double i_on_p=1.0; -// double i_on_n_in=1.0; -// double i_on_p_in=1; -// double vdd=g_tp.peri_global.Vdd; - -// power.readOp.sc=shortcircuit_simple(vt, velocity_index, c_in, c_out, w_nmos,w_pmos, i_on_n, i_on_p,i_on_n_in, i_on_p_in, vdd); -// power.readOp.dynamic=c_out*vdd*vdd/2; - -// cout<sys.longer_channel_device; - - if (is_tdp) - { - cout << indent_str << "UndiffCore:" << endl; - cout << indent_str_next << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << power.readOp.dynamic*clockRate << " W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << power.readOp.leakage <<" W" << endl; - cout << indent_str_next<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - //cout << indent_str_next << "Runtime Dynamic = " << rt_power.readOp.dynamic/executionTime << " W" << endl; - cout < 18) opcode_length = 18; - num_decoded_signals= (int)pow(2.0,opcode_length); - pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - load_nmos_width=g_tp.max_w_nmos_ /2; - load_pmos_width= g_tp.max_w_nmos_ * pmos_to_nmos_sizing_r; - C_driver_load = 1024*gate_C(load_nmos_width + load_pmos_width, 0, is_dram); //TODO: this number 1024 needs to be revisited - R_wire_load = 3000*l_ip.F_sz_um * g_tp.wire_outside_mat.R_per_um; - - final_dec = new Decoder( - num_decoded_signals, - false, - C_driver_load, - R_wire_load, - false/*is_fa*/, - false/*is_dram*/, - false/*wl_tr*/, //to use peri device - cell); - - PredecBlk * predec_blk1 = new PredecBlk( - num_decoded_signals, - final_dec, - 0,//Assuming predec and dec are back to back - 0, - 1,//Each Predec only drives one final dec - false/*is_dram*/, - true); - PredecBlk * predec_blk2 = new PredecBlk( - num_decoded_signals, - final_dec, - 0,//Assuming predec and dec are back to back - 0, - 1,//Each Predec only drives one final dec - false/*is_dram*/, - false); - - PredecBlkDrv * predec_blk_drv1 = new PredecBlkDrv(0, predec_blk1, false); - PredecBlkDrv * predec_blk_drv2 = new PredecBlkDrv(0, predec_blk2, false); - - pre_dec = new Predec(predec_blk_drv1, predec_blk_drv2); - - double area_decoder = final_dec->area.get_area() * num_decoded_signals * num_decoder_segments*num_decoders; - //double w_decoder = area_decoder / area.get_h(); - double area_pre_dec = (predec_blk_drv1->area.get_area() + - predec_blk_drv2->area.get_area() + - predec_blk1->area.get_area() + - predec_blk2->area.get_area())* - num_decoder_segments*num_decoders; - area.set_area(area.get_area()+ area_decoder + area_pre_dec); - double macro_layout_overhead = g_tp.macro_layout_overhead; - double chip_PR_overhead = g_tp.chip_layout_overhead; - area.set_area(area.get_area()*macro_layout_overhead*chip_PR_overhead); - - inst_decoder_delay_power(); - - double sckRation = g_tp.sckt_co_eff; - power.readOp.dynamic *= sckRation; - power.writeOp.dynamic *= sckRation; - power.searchOp.dynamic *= sckRation; - - double long_channel_device_reduction = longer_channel_device_reduction(device_ty,core_ty); - power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; - +InstructionDecoder::InstructionDecoder(XMLNode* _xml_data, const string _name, + bool _is_default, + const InputParameter *configure_interface, + int opcode_length_, int num_decoders_, + bool x86_, + double clockRate_, + enum Device_ty device_ty_, + enum Core_type core_ty_) + : McPATComponent(_xml_data), is_default(_is_default), + opcode_length(opcode_length_), num_decoders(num_decoders_), x86(x86_), + device_ty(device_ty_), core_ty(core_ty_) { + /* + * Instruction decoder is different from n to 2^n decoders + * that are commonly used in row decoders in memory arrays. + * The RISC instruction decoder is typically a very simple device. + * We can decode an instruction by simply + * separating the machine word into small parts using wire slices + * The RISC instruction decoder can be approximate by the n to 2^n decoders, + * although this approximation usually underestimate power since each decoded + * instruction normally has more than 1 active signal. + * + * However, decoding a CISC instruction word is much more difficult + * than the RISC case. A CISC decoder is typically set up as a state machine. + * The machine reads the opcode field to determine + * what type of instruction it is, + * and where the other data values are. + * The instruction word is read in piece by piece, + * and decisions are made at each stage as to + * how the remainder of the instruction word will be read. + * (sequencer and ROM are usually needed) + * An x86 decoder can be even more complex since + * it involve both decoding instructions into u-ops and + * merge u-ops when doing micro-ops fusion. + */ + name = _name; + clockRate = clockRate_; + bool is_dram = false; + double pmos_to_nmos_sizing_r; + double load_nmos_width, load_pmos_width; + double C_driver_load, R_wire_load; + Area cell; + + l_ip = *configure_interface; + local_result = init_interface(&l_ip, name); + cell.h = g_tp.cell_h_def; + cell.w = g_tp.cell_h_def; + + num_decoder_segments = (int)ceil(opcode_length / 18.0); + if (opcode_length > 18) opcode_length = 18; + num_decoded_signals = (int)pow(2.0, opcode_length); + pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + load_nmos_width = g_tp.max_w_nmos_ / 2; + load_pmos_width = g_tp.max_w_nmos_ * pmos_to_nmos_sizing_r; + C_driver_load = 1024 * gate_C(load_nmos_width + load_pmos_width, 0, is_dram); + R_wire_load = 3000 * l_ip.F_sz_um * g_tp.wire_outside_mat.R_per_um; + + final_dec = new Decoder( + num_decoded_signals, + false, + C_driver_load, + R_wire_load, + false/*is_fa*/, + false/*is_dram*/, + false/*wl_tr*/, //to use peri device + cell); + + PredecBlk * predec_blk1 = new PredecBlk( + num_decoded_signals, + final_dec, + 0,//Assuming predec and dec are back to back + 0, + 1,//Each Predec only drives one final dec + false/*is_dram*/, + true); + PredecBlk * predec_blk2 = new PredecBlk( + num_decoded_signals, + final_dec, + 0,//Assuming predec and dec are back to back + 0, + 1,//Each Predec only drives one final dec + false/*is_dram*/, + false); + + PredecBlkDrv * predec_blk_drv1 = new PredecBlkDrv(0, predec_blk1, false); + PredecBlkDrv * predec_blk_drv2 = new PredecBlkDrv(0, predec_blk2, false); + + pre_dec = new Predec(predec_blk_drv1, predec_blk_drv2); + + double area_decoder = final_dec->area.get_area() * num_decoded_signals * + num_decoder_segments * num_decoders; + //double w_decoder = area_decoder / area.get_h(); + double area_pre_dec = (predec_blk_drv1->area.get_area() + + predec_blk_drv2->area.get_area() + + predec_blk1->area.get_area() + + predec_blk2->area.get_area()) * + num_decoder_segments * num_decoders; + area.set_area(area.get_area() + area_decoder + area_pre_dec); + double macro_layout_overhead = g_tp.macro_layout_overhead; + double chip_PR_overhead = g_tp.chip_layout_overhead; + area.set_area(area.get_area()*macro_layout_overhead*chip_PR_overhead); + + inst_decoder_delay_power(); + + double sckRation = g_tp.sckt_co_eff; + power.readOp.dynamic *= sckRation; + power.writeOp.dynamic *= sckRation; + power.searchOp.dynamic *= sckRation; + + double long_channel_device_reduction = + longer_channel_device_reduction(device_ty, core_ty); + power.readOp.longer_channel_leakage = power.readOp.leakage * + long_channel_device_reduction; + + output_data.area = area.get_area() / 1e6; + output_data.peak_dynamic_power = power.readOp.dynamic * clockRate; + output_data.subthreshold_leakage_power = power.readOp.leakage; + output_data.gate_leakage_power = power.readOp.gate_leakage; } -void inst_decoder::inst_decoder_delay_power() -{ +void InstructionDecoder::inst_decoder_delay_power() { - double dec_outrisetime; - double inrisetime=0, outrisetime; - double pppm_t[4] = {1,1,1,1}; - double squencer_passes = x86?2:1; + double dec_outrisetime; + double inrisetime = 0, outrisetime; + double pppm_t[4] = {1, 1, 1, 1}; + double squencer_passes = x86 ? 2 : 1; - outrisetime = pre_dec->compute_delays(inrisetime); - dec_outrisetime = final_dec->compute_delays(outrisetime); - set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments, squencer_passes*num_decoder_segments, num_decoder_segments); - power = power + pre_dec->power*pppm_t; + outrisetime = pre_dec->compute_delays(inrisetime); + dec_outrisetime = final_dec->compute_delays(outrisetime); + set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments, squencer_passes*num_decoder_segments, num_decoder_segments); + power = power + pre_dec->power * pppm_t; set_pppm(pppm_t, squencer_passes*num_decoder_segments, num_decoder_segments*num_decoded_signals, - num_decoder_segments*num_decoded_signals, squencer_passes*num_decoder_segments); - power = power + final_dec->power*pppm_t; + num_decoder_segments*num_decoded_signals, squencer_passes*num_decoder_segments); + power = power + final_dec->power * pppm_t; } -void inst_decoder::leakage_feedback(double temperature) -{ + +void InstructionDecoder::leakage_feedback(double temperature) { l_ip.temp = (unsigned int)round(temperature/10.0)*10; - uca_org_t init_result = init_interface(&l_ip); // init_result is dummy + uca_org_t init_result = init_interface(&l_ip, name); // init_result is dummy final_dec->leakage_feedback(temperature); pre_dec->leakage_feedback(temperature); @@ -1000,15 +945,14 @@ void inst_decoder::leakage_feedback(double temperature) power.readOp.longer_channel_leakage = power.readOp.leakage*long_channel_device_reduction; } -inst_decoder::~inst_decoder() -{ - local_result.cleanup(); +InstructionDecoder::~InstructionDecoder() { + local_result.cleanup(); - delete final_dec; + delete final_dec; - delete pre_dec->blk1; - delete pre_dec->blk2; - delete pre_dec->drv1; - delete pre_dec->drv2; - delete pre_dec; + delete pre_dec->blk1; + delete pre_dec->blk2; + delete pre_dec->drv1; + delete pre_dec->drv2; + delete pre_dec; } diff --git a/ext/mcpat/logic.h b/ext/mcpat/logic.h index e2a35e845..19c774ef9 100644 --- a/ext/mcpat/logic.h +++ b/ext/mcpat/logic.h @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,18 +26,16 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ #ifndef LOGIC_H_ #define LOGIC_H_ -#include #include #include #include -#include "XML_Parse.h" #include "arch_const.h" #include "basic_circuit.h" #include "basic_components.h" @@ -49,185 +48,190 @@ using namespace std; -class selection_logic : public Component{ +class selection_logic : public McPATComponent { public: - selection_logic(bool _is_default, int win_entries_, - int issue_width_, const InputParameter *configure_interface, - enum Device_ty device_ty_=Core_device, - enum Core_type core_ty_=Inorder);//, const ParseXML *_XML_interface); - bool is_default; - InputParameter l_ip; - uca_org_t local_result; - const ParseXML *XML_interface; - int win_entries; - int issue_width; - int num_threads; - enum Device_ty device_ty; - enum Core_type core_ty; - - void selection_power(); + bool is_default; + InputParameter l_ip; + uca_org_t local_result; + int win_entries; + int issue_width; + double accesses; + int num_threads; + enum Device_ty device_ty; + enum Core_type core_ty; + + selection_logic(XMLNode* _xml_data, bool _is_default, int _win_entries, + int issue_width_, const InputParameter* configure_interface, + string _name, double _accesses, + double clockRate_ = 0.0f, + enum Device_ty device_ty_ = Core_device, + enum Core_type core_ty_ = Inorder); + void computeArea(); + void computeEnergy(); void leakage_feedback(double temperature); // TODO + // TODO: Add a deconstructor }; -class dep_resource_conflict_check : public Component{ +class dep_resource_conflict_check : public McPATComponent { public: - dep_resource_conflict_check(const InputParameter *configure_interface, const CoreDynParam & dyn_p_, int compare_bits_, bool _is_default=true); - InputParameter l_ip; - uca_org_t local_result; - double WNORn, WNORp, Wevalinvp, Wevalinvn, Wcompn, Wcompp, Wcomppreequ; - CoreDynParam coredynp; - int compare_bits; - bool is_default; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - - void conflict_check_power(); - double compare_cap(); - ~dep_resource_conflict_check(){ - local_result.cleanup(); - } + InputParameter l_ip; + uca_org_t local_result; + double WNORn, WNORp, Wevalinvp, Wevalinvn, Wcompn, Wcompp, Wcomppreequ; + CoreParameters coredynp; + int compare_bits; + bool is_default; + statsDef stats_t; + + dep_resource_conflict_check(XMLNode* _xml_data, const string _name, + const InputParameter *configure_interface, + const CoreParameters & dyn_p_, int compare_bits_, + double clockRate_ = 0.0f, + bool _is_default = true); + void conflict_check_power(); + double compare_cap(); + void computeEnergy() {}; + ~dep_resource_conflict_check() { + local_result.cleanup(); + } void leakage_feedback(double temperature); }; -class inst_decoder: public Component{ +class InstructionDecoder: public McPATComponent { public: - inst_decoder(bool _is_default, const InputParameter *configure_interface, - int opcode_length_, - int num_decoders_, - bool x86_, - enum Device_ty device_ty_=Core_device, - enum Core_type core_ty_=Inorder); - inst_decoder(); - bool is_default; - int opcode_length; - int num_decoders; - bool x86; - int num_decoder_segments; - int num_decoded_signals; - InputParameter l_ip; - uca_org_t local_result; - enum Device_ty device_ty; - enum Core_type core_ty; - - Decoder * final_dec; - Predec * pre_dec; - - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - void inst_decoder_delay_power(); - ~inst_decoder(); + Decoder* final_dec; + Predec* pre_dec; + + bool is_default; + int opcode_length; + int num_decoders; + bool x86; + int num_decoder_segments; + int num_decoded_signals; + InputParameter l_ip; + uca_org_t local_result; + enum Device_ty device_ty; + enum Core_type core_ty; + statsDef stats_t; + + InstructionDecoder(XMLNode* _xml_data, const string _name, bool _is_default, + const InputParameter *configure_interface, + int opcode_length_, int num_decoders_, bool x86_, + double clockRate_ = 0.0f, + enum Device_ty device_ty_ = Core_device, + enum Core_type core_ty_ = Inorder); + InstructionDecoder(); + void computeEnergy() {}; + void inst_decoder_delay_power(); + ~InstructionDecoder(); void leakage_feedback(double temperature); }; +// TODO: This should be defined elsewhere? This isn't a true McPATComponent class DFFCell : public Component { public: - DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp,double _cell_load, - const InputParameter *configure_interface); - InputParameter l_ip; - bool is_dram; - double cell_load; - double WdecNANDn; - double WdecNANDp; - double clock_cap; - int model; - int n_switch; - int n_keep_1; - int n_keep_0; - int n_clock; - powerDef e_switch; - powerDef e_keep_1; - powerDef e_keep_0; - powerDef e_clock; - - double fpfp_node_cap(unsigned int fan_in, unsigned int fan_out); - void compute_DFF_cell(void); - }; - -class Pipeline : public Component{ + InputParameter l_ip; + bool is_dram; + double cell_load; + double WdecNANDn; + double WdecNANDp; + double clock_cap; + int model; + int n_switch; + int n_keep_1; + int n_keep_0; + int n_clock; + powerDef e_switch; + powerDef e_keep_1; + powerDef e_keep_0; + powerDef e_clock; + + DFFCell(bool _is_dram, double _WdecNANDn, double _WdecNANDp, double _cell_load, + const InputParameter *configure_interface); + double fpfp_node_cap(unsigned int fan_in, unsigned int fan_out); + void compute_DFF_cell(void); + ~DFFCell() {}; +}; + +// TODO: This is a very ambiguous component. Try to refactor it. +class Pipeline : public McPATComponent { public: - Pipeline(const InputParameter *configure_interface, const CoreDynParam & dyn_p_, enum Device_ty device_ty_=Core_device, bool _is_core_pipeline=true, bool _is_default=true); - InputParameter l_ip; - uca_org_t local_result; - CoreDynParam coredynp; - enum Device_ty device_ty; - bool is_core_pipeline, is_default; - double num_piperegs; -// int pipeline_stages; -// int tot_stage_vector, per_stage_vector; - bool process_ind; - double WNANDn ; - double WNANDp; - double load_per_pipeline_stage; -// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, commitWidth, instruction_length; -// int PC_width, opcode_length, num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width; -// bool thread_clock_gated; -// bool in_order, multithreaded; - void compute_stage_vector(); - void compute(); - ~Pipeline(){ - local_result.cleanup(); - }; + InputParameter l_ip; + uca_org_t local_result; + CoreParameters coredynp; + enum Device_ty device_ty; + bool is_core_pipeline, is_default; + double num_piperegs; + bool process_ind; + double WNANDn; + double WNANDp; + double load_per_pipeline_stage; + + Pipeline(XMLNode* _xml_data, const InputParameter *configure_interface, + const CoreParameters & dyn_p_, + enum Device_ty device_ty_ = Core_device, + bool _is_core_pipeline = true, bool _is_default = true); + void compute_stage_vector(); + /** + * TODO: compute() completes work that should be completed in computeArea() + * and computeEnergy() recursively. Consider shifting these calculations + * around to be consistent with rest of hierarchy + */ + void compute(); + void computeArea() {}; + // TODO: Move energy computation to this function to unify hierarchy + void computeEnergy() {}; + ~Pipeline() { + local_result.cleanup(); + }; }; -//class core_pipeline :public pipeline{ -//public: -// int Hthread, num_thread, fetchWidth, decodeWidth, issueWidth, commitWidth, instruction_length; -// int PC_width, opcode_length, num_arch_reg_tag, data_width,num_phsical_reg_tag, address_width; -// bool thread_clock_gated; -// bool in_order, multithreaded; -// core_pipeline(bool _is_default, const InputParameter *configure_interface); -// virtual void compute_stage_vector(); -// -//}; - -class FunctionalUnit :public Component{ +class FunctionalUnit : public McPATComponent { public: - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double FU_height; - double clockRate,executionTime; - double num_fu; - double energy, base_energy,per_access_energy, leakage, gate_leakage; - bool is_default; - enum FU_type fu_type; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - - FunctionalUnit(ParseXML *XML_interface, int ithCore_, InputParameter* interface_ip_,const CoreDynParam & dyn_p_, enum FU_type fu_type); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); + InputParameter interface_ip; + CoreParameters core_params; + CoreStatistics core_stats; + double FU_height; + double num_fu; + double energy; + double base_energy; + double per_access_energy; + bool is_default; + enum FU_type fu_type; + statsDef stats_t; + + FunctionalUnit(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & _core_params, + const CoreStatistics & _core_stats, enum FU_type fu_type); + void computeEnergy(); void leakage_feedback(double temperature); - + ~FunctionalUnit() {}; }; -class UndiffCore :public Component{ +// TODO: This is a very ambiguous component. Try to refactor it. +class UndiffCore : public McPATComponent { public: - UndiffCore(ParseXML* XML_interface, int ithCore_, InputParameter* interface_ip_, const CoreDynParam & dyn_p_, bool exist_=true, bool embedded_=false); - ParseXML *XML; - int ithCore; - InputParameter interface_ip; - CoreDynParam coredynp; - double clockRate,executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - enum Core_type core_ty; - bool opt_performance, embedded; - double pipeline_stage,num_hthreads,issue_width; - bool is_default; - - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~UndiffCore(){}; - bool exist; - - + InputParameter interface_ip; + CoreParameters coredynp; + double scktRatio; + double chip_PR_overhead; + double macro_PR_overhead; + enum Core_type core_ty; + bool opt_performance; + bool embedded; + double pipeline_stage; + double num_hthreads; + double issue_width; + bool is_default; + bool exist; + + UndiffCore(XMLNode* _xml_data, InputParameter* interface_ip_, + const CoreParameters & dyn_p_, + bool exist_ = true); + void computeArea() {}; + // TODO: Move energy computation to this function to unify hierarchy + void computeEnergy() {}; + ~UndiffCore() {}; }; #endif /* LOGIC_H_ */ diff --git a/ext/mcpat/main.cc b/ext/mcpat/main.cc index 8acce8d23..ec266f386 100644 --- a/ext/mcpat/main.cc +++ b/ext/mcpat/main.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,15 +26,17 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ +#include + +#include #include -#include "XML_Parse.h" -#include "globalvar.h" +#include "basic_components.h" #include "io.h" -#include "processor.h" +#include "system.h" #include "version.h" #include "xmlParser.h" @@ -41,61 +44,68 @@ using namespace std; void print_usage(char * argv0); -int main(int argc,char *argv[]) -{ - char * fb ; - bool infile_specified = false; - int plevel = 2; - opt_for_clk =true; - //cout.precision(10); - if (argc <= 1 || argv[1] == string("-h") || argv[1] == string("--help")) - { - print_usage(argv[0]); - } +int main(int argc, char *argv[]) { + char* xml_file = NULL; + int plevel = 2; - for (int32_t i = 0; i < argc; i++) - { - if (argv[i] == string("-infile")) - { - infile_specified = true; - i++; - fb = argv[ i]; - } - - if (argv[i] == string("-print_level")) - { - i++; - plevel = atoi(argv[i]); - } - - if (argv[i] == string("-opt_for_clk")) - { - i++; - opt_for_clk = (bool)atoi(argv[i]); - } - } - if (infile_specified == false) - { - print_usage(argv[0]); + for (int32_t i = 0; i < argc; i++) { + if (argv[i] == string("-infile")) { + xml_file = argv[++i]; + + } else if (argv[i] == string("-print_level")) { + plevel = atoi(argv[++i]); + + } else if (argv[i] == string("-opt_for_clk")) { + McPATComponent::opt_for_clk = (bool)atoi(argv[++i]); } + } + + // Ensure that the XML file was specified + if (xml_file == NULL) { + cerr << "ERROR: Please specify infile\n\n"; + print_usage(argv[0]); + } + + // Ensure that the XML file exists + struct stat file_info; + if (stat(xml_file, &file_info)) { + cerr << "ERROR: File not found: " << xml_file << endl << endl; + print_usage(argv[0]); + } + + cout << "McPAT (version " << VER_MAJOR << "." << VER_MINOR + << " of " << VER_UPDATE << ") is computing the target processor...\n " + << endl; + + // Parse the XML input file + XMLNode xml_data = XMLNode::openFileHelper(xml_file, "component"); + unsigned int num_children = xml_data.nChildNode("component"); + assert(num_children == 1); + XMLNode system_xml = xml_data.getChildNode("component"); + assert(strcmp(system_xml.getAttribute("type"), "System") == 0); + + // Recursively instantiate the system hierarchy + System* system = new System(&system_xml); + + // Recursively compute chip area + system->computeArea(); + + // Recursively compute the power consumed + system->computeEnergy(); + // Recursively output the computed values + system->displayData(2, plevel); - cout<<"McPAT (version "<< VER_MAJOR <<"."<< VER_MINOR - << " of " << VER_UPDATE << ") is computing the target processor...\n "<parse(fb); - Processor proc(p1); - proc.displayEnergy(2, plevel); - delete p1; - return 0; } -void print_usage(char * argv0) -{ +void print_usage(char * argv0) { cerr << "How to use McPAT:" << endl; - cerr << " mcpat -infile -print_level < level of details 0~5 > -opt_for_clk < 0 (optimize for ED^2P only)/1 (optimzed for target clock rate)>"<< endl; - //cerr << " Note:default print level is at processor level, please increase it to see the details" << endl; + cerr << " mcpat -infile -print_level < " + << "level of details 0~5 > -opt_for_clk < 0 (optimize for ED^2P " + << "only)/1 (optimzed for target clock rate)>" << endl; exit(1); } diff --git a/ext/mcpat/mcpat.mk b/ext/mcpat/mcpat.mk index f89f499a9..acb73211e 100644 --- a/ext/mcpat/mcpat.mk +++ b/ext/mcpat/mcpat.mk @@ -29,13 +29,16 @@ VPATH = cacti SRCS = \ Ucache.cc \ - XML_Parse.cc \ arbiter.cc \ area.cc \ array.cc \ bank.cc \ basic_circuit.cc \ basic_components.cc \ + bus_interconnect.cc \ + cachearray.cc \ + cachecontroller.cc \ + cacheunit.cc \ cacti_interface.cc \ component.cc \ core.cc \ @@ -52,14 +55,13 @@ SRCS = \ noc.cc \ nuca.cc \ parameter.cc \ - processor.cc \ router.cc \ - sharedcache.cc \ subarray.cc \ + system.cc \ technology.cc \ uca.cc \ wire.cc \ - xmlParser.cc + xmlParser.cc OBJS = $(patsubst %.cc,$(ODIR)/obj_$(TAG)/%.o,$(SRCS)) diff --git a/ext/mcpat/mcpatXeonCore.mk b/ext/mcpat/mcpatXeonCore.mk deleted file mode 100644 index 20cf0ddc8..000000000 --- a/ext/mcpat/mcpatXeonCore.mk +++ /dev/null @@ -1,81 +0,0 @@ -TARGET = mcpatXeonCore -SHELL = /bin/sh -.PHONY: all depend clean -.SUFFIXES: .cc .o - -ifndef NTHREADS - NTHREADS = 4 -endif - - -LIBS = -INCS = -lm - -ifeq ($(TAG),dbg) - DBG = -Wall - OPT = -ggdb -g -O0 -DNTHREADS=1 -Icacti -else - DBG = - OPT = -O3 -msse2 -mfpmath=sse -DNTHREADS=$(NTHREADS) -Icacti - #OPT = -O0 -DNTHREADS=$(NTHREADS) -endif - -#CXXFLAGS = -Wall -Wno-unknown-pragmas -Winline $(DBG) $(OPT) -CXXFLAGS = -Wno-unknown-pragmas $(DBG) $(OPT) -CXX = g++ -m32 -CC = gcc -m32 - -VPATH = cacti - -SRCS = \ - Ucache.cc \ - XML_Parse.cc \ - arbiter.cc \ - area.cc \ - array.cc \ - bank.cc \ - basic_circuit.cc \ - basic_components.cc \ - cacti_interface.cc \ - component.cc \ - core.cc \ - crossbar.cc \ - decoder.cc \ - htree2.cc \ - interconnect.cc \ - io.cc \ - iocontrollers.cc \ - logic.cc \ - main.cc \ - mat.cc \ - memoryctrl.cc \ - noc.cc \ - nuca.cc \ - parameter.cc \ - processor.cc \ - router.cc \ - sharedcache.cc \ - subarray.cc \ - technology_xeon_core.cc \ - uca.cc \ - wire.cc \ - xmlParser.cc - -OBJS = $(patsubst %.cc,obj_$(TAG)/%.o,$(SRCS)) - -all: obj_$(TAG)/$(TARGET) - cp -f obj_$(TAG)/$(TARGET) $(TARGET) - -obj_$(TAG)/$(TARGET) : $(OBJS) - $(CXX) $(OBJS) -o $@ $(INCS) $(CXXFLAGS) $(LIBS) -pthread - -#obj_$(TAG)/%.o : %.cc -# $(CXX) -c $(CXXFLAGS) $(INCS) -o $@ $< - -obj_$(TAG)/%.o : %.cc - $(CXX) $(CXXFLAGS) -c $< -o $@ - -clean: - -rm -f *.o $(TARGET) - - diff --git a/ext/mcpat/memoryctrl.cc b/ext/mcpat/memoryctrl.cc index ae3bc75ec..dec24512e 100644 --- a/ext/mcpat/memoryctrl.cc +++ b/ext/mcpat/memoryctrl.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,18 +26,19 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ + #include #include #include #include #include -#include "XML_Parse.h" #include "basic_circuit.h" #include "basic_components.h" +#include "common.h" #include "const.h" #include "io.h" #include "logic.h" @@ -69,668 +71,543 @@ * */ -MCBackend::MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_) -:l_ip(*interface_ip_), - mc_type(mc_type_), - mcp(mcp_) -{ - - local_result = init_interface(&l_ip); - compute(); - +MCBackend::MCBackend(XMLNode* _xml_data, InputParameter* interface_ip_, + const MCParameters & mcp_, const MCStatistics & mcs_) + : McPATComponent(_xml_data), l_ip(*interface_ip_), mcp(mcp_), mcs(mcs_) { + name = "Transaction Engine"; + local_result = init_interface(&l_ip, name); + + // Set up stats for the power calculations + tdp_stats.reset(); + tdp_stats.readAc.access = 0.5 * mcp.num_channels * mcp.clockRate; + tdp_stats.writeAc.access = 0.5 * mcp.num_channels * mcp.clockRate; + rtp_stats.reset(); + rtp_stats.readAc.access = mcs.reads; + rtp_stats.writeAc.access = mcs.writes; } +void MCBackend::computeArea() { + // The area is in nm^2 + if (mcp.mc_type == MC) { + if (mcp.type == 0) { + output_data.area = (2.7927 * log(mcp.peak_transfer_rate * 2) - + 19.862) / 2.0 * mcp.dataBusWidth / 128.0 * + (l_ip.F_sz_um / 0.09) * mcp.num_channels; + } else { + output_data.area = 0.15 * mcp.dataBusWidth / 72.0 * + (l_ip.F_sz_um / 0.065) * (l_ip.F_sz_um / 0.065) * + mcp.num_channels; + } + } else { + //skip old model + cout << "Unknown memory controllers" << endl; + exit(0); + //area based on Cadence ChipEstimator for 8bit bus + output_data.area = 0.243 * mcp.dataBusWidth / 8; + } +} -void MCBackend::compute() -{ - //double max_row_addr_width = 20.0;//Current address 12~18bits - double C_MCB, mc_power, backend_dyn, backend_gates;//, refresh_period,refresh_freq;//Equivalent per bit Cap for backend, - double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - double NMOS_sizing, PMOS_sizing; - - if (mc_type == MC) - { - if (mcp.type == 0) - { - //area = (2.2927*log(peakDataTransferRate)-14.504)*memDataWidth/144.0*(l_ip.F_sz_um/0.09); - area.set_area((2.7927*log(mcp.peakDataTransferRate*2)-19.862)/2.0*mcp.dataBusWidth/128.0*(l_ip.F_sz_um/0.09)*mcp.num_channels*1e6);//um^2 - //assuming the approximately same scaling factor as seen in processors. - //C_MCB=0.2/1.3/1.3/266/64/0.09*g_ip.F_sz_um;//based on AMD Geode processor which has a very basic mc on chip. - //C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process. - //mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et - mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend - C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065; - power_t.readOp.dynamic = C_MCB*g_tp.peri_global.Vdd*g_tp.peri_global.Vdd*(mcp.dataBusWidth/*+mcp.addressBusWidth*/);//per access energy in memory controller - power_t.readOp.leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Isub_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W - power_t.readOp.gate_leakage = area.get_area()/2 *(g_tp.scaling_factor.core_tx_density)*cmos_Ig_leakage(g_tp.min_w_nmos_, g_tp.min_w_nmos_*pmos_to_nmos_sizing_r, 1, inv)*g_tp.peri_global.Vdd;//unit W +void MCBackend::computeEnergy() { + double C_MCB, mc_power; + double backend_dyn; + double backend_gates; + double pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); + double NMOS_sizing = g_tp.min_w_nmos_; + double PMOS_sizing = g_tp.min_w_nmos_ * pmos_to_nmos_sizing_r; + double area_um2 = output_data.area * 1e6; + + if (mcp.mc_type == MC) { + if (mcp.type == 0) { + //assuming the approximately same scaling factor as seen in processors. + //C_MCB = 1.6/200/1e6/144/1.2/1.2*g_ip.F_sz_um/0.19;//Based on Niagara power numbers.The base power (W) is divided by device frequency and vdd and scale to target process. + //mc_power = 0.0291*2;//29.1mW@200MHz @130nm From Power Analysis of SystemLevel OnChip Communication Architectures by Lahiri et + mc_power = 4.32*0.1;//4.32W@1GhzMHz @65nm Cadence ChipEstimator 10% for backend + C_MCB = mc_power/1e9/72/1.1/1.1*l_ip.F_sz_um/0.065; + //per access energy in memory controller + power.readOp.dynamic = C_MCB * g_tp.peri_global.Vdd * + g_tp.peri_global.Vdd * + (mcp.dataBusWidth/*+mcp.addressBusWidth*/); + power.readOp.leakage = area_um2 / 2 * + (g_tp.scaling_factor.core_tx_density) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 1, inv) * + g_tp.peri_global.Vdd;//unit W + power.readOp.gate_leakage = area_um2 / 2 * + (g_tp.scaling_factor.core_tx_density) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 1, inv) * + g_tp.peri_global.Vdd;//unit W + } else { + //Average on DDR2/3 protocol controller and DDRC 1600/800A in + //Cadence ChipEstimate + backend_dyn = 0.9e-9 / 800e6 * mcp.clockRate / 12800 * + mcp.peak_transfer_rate* mcp.dataBusWidth / 72.0 * + g_tp.peri_global.Vdd / 1.1 * g_tp.peri_global.Vdd / 1.1 * + (l_ip.F_sz_nm/65.0); + //Scaling to technology and DIMM feature. The base IP support + //DDR3-1600(PC3 12800) + //5000 is from Cadence ChipEstimator + backend_gates = 50000 * mcp.dataBusWidth / 64.0; + + power.readOp.dynamic = backend_dyn; + power.readOp.leakage = (backend_gates) * + cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd;//unit W + power.readOp.gate_leakage = (backend_gates) * + cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand) * + g_tp.peri_global.Vdd;//unit W } - else - { NMOS_sizing = g_tp.min_w_nmos_; - PMOS_sizing = g_tp.min_w_nmos_*pmos_to_nmos_sizing_r; - area.set_area(0.15*mcp.dataBusWidth/72.0*(l_ip.F_sz_um/0.065)* (l_ip.F_sz_um/0.065)*mcp.num_channels*1e6);//um^2 - backend_dyn = 0.9e-9/800e6*mcp.clockRate/12800*mcp.peakDataTransferRate*mcp.dataBusWidth/72.0*g_tp.peri_global.Vdd/1.1*g_tp.peri_global.Vdd/1.1*(l_ip.F_sz_nm/65.0);//Average on DDR2/3 protocol controller and DDRC 1600/800A in Cadence ChipEstimate - //Scaling to technology and DIMM feature. The base IP support DDR3-1600(PC3 12800) - backend_gates = 50000*mcp.dataBusWidth/64.0;//5000 is from Cadence ChipEstimator - - power_t.readOp.dynamic = backend_dyn; - power_t.readOp.leakage = (backend_gates)*cmos_Isub_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W - power_t.readOp.gate_leakage = (backend_gates)*cmos_Ig_leakage(NMOS_sizing, PMOS_sizing, 2, nand)*g_tp.peri_global.Vdd;//unit W + } else { + //skip old model + cout<<"Unknown memory controllers"< 0; + interface_ip.tag_w = tag; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = mcp.num_channels; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = mcp.num_channels; + interface_ip.is_cache = true; + interface_ip.pure_cam = false; + interface_ip.pure_ram = false; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; + frontendBuffer = new CacheArray(xml_data, &interface_ip, "Reorder Buffer", + Uncore_device, mcp.clockRate); + children.push_back(frontendBuffer); + + frontendBuffer->tdp_stats.reset(); + frontendBuffer->tdp_stats.readAc.access = + frontendBuffer->l_ip.num_search_ports + + frontendBuffer->l_ip.num_wr_ports; + frontendBuffer->tdp_stats.writeAc.access = + frontendBuffer->l_ip.num_search_ports; + frontendBuffer->tdp_stats.searchAc.access = + frontendBuffer->l_ip.num_wr_ports; + frontendBuffer->rtp_stats.reset(); + // TODO: These stats assume that access power is calculated per buffer + // bit, which requires the stats to take into account the number of + // bits for each buffer slot. This should be revised... + //For each channel, each memory word need to check the address data to + //achieve best scheduling results. + //and this need to be done on all physical DIMMs in each logical memory + //DIMM *mcp.dataBusWidth/72 + frontendBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth * mcp.dataBusWidth / 72; + frontendBuffer->rtp_stats.writeAc.access = mcs.writes * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth * mcp.dataBusWidth / 72; + frontendBuffer->rtp_stats.searchAc.access = + frontendBuffer->rtp_stats.readAc.access + + frontendBuffer->rtp_stats.writeAc.access; + + // Read Buffers + //Support key words first operation + data = (int)ceil(mcp.dataBusWidth / BITS_PER_BYTE); + + interface_ip.cache_sz = data * mcp.IO_buffer_size_per_channel; + interface_ip.line_sz = data; + interface_ip.assoc = mcp.read_buffer_assoc; + interface_ip.nbanks = mcp.read_buffer_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = mcp.read_buffer_tag_width > 0; + interface_ip.tag_w = mcp.read_buffer_tag_width; + interface_ip.access_mode = Sequential; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = mcp.num_channels; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; + readBuffer = new CacheArray(xml_data, &interface_ip, "Read Buffer", + Uncore_device, mcp.clockRate); + children.push_back(readBuffer); + + readBuffer->tdp_stats.reset(); + readBuffer->tdp_stats.readAc.access = readBuffer->l_ip.num_rd_ports * + mcs.duty_cycle; + readBuffer->tdp_stats.writeAc.access = readBuffer->l_ip.num_wr_ports * + mcs.duty_cycle; + readBuffer->rtp_stats.reset(); + readBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth; + readBuffer->rtp_stats.writeAc.access = mcs.reads * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth; + + // Write Buffer + //Support key words first operation + data = (int)ceil(mcp.dataBusWidth / BITS_PER_BYTE); + + interface_ip.cache_sz = data * mcp.IO_buffer_size_per_channel; + interface_ip.line_sz = data; + interface_ip.assoc = mcp.write_buffer_assoc; + interface_ip.nbanks = mcp.write_buffer_nbanks; + interface_ip.out_w = interface_ip.line_sz * BITS_PER_BYTE; + interface_ip.specific_tag = mcp.write_buffer_tag_width > 0; + interface_ip.tag_w = mcp.write_buffer_tag_width; + interface_ip.access_mode = Normal; + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 0; + interface_ip.num_rd_ports = mcp.num_channels; + interface_ip.num_wr_ports = interface_ip.num_rd_ports; + interface_ip.num_se_rd_ports = 0; + interface_ip.num_search_ports = 0; + interface_ip.is_cache = false; + interface_ip.pure_cam = false; + interface_ip.pure_ram = true; + interface_ip.throughput = 1.0 / mcp.clockRate; + interface_ip.latency = 1.0 / mcp.clockRate; + writeBuffer = new CacheArray(xml_data, &interface_ip, "Write Buffer", + Uncore_device, mcp.clockRate); + children.push_back(writeBuffer); + + writeBuffer->tdp_stats.reset(); + writeBuffer->tdp_stats.readAc.access = writeBuffer->l_ip.num_rd_ports * + mcs.duty_cycle; + writeBuffer->tdp_stats.writeAc.access = writeBuffer->l_ip.num_wr_ports * + mcs.duty_cycle; + writeBuffer->rtp_stats.reset(); + writeBuffer->rtp_stats.readAc.access = mcs.reads * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth; + writeBuffer->rtp_stats.writeAc.access = mcs.writes * mcp.llcBlockSize * + BITS_PER_BYTE / mcp.dataBusWidth; + + // TODO: Set up selection logic as a leaf node in tree + //selection and arbitration logic + MC_arb = + new selection_logic(xml_data, is_default, + mcp.req_window_size_per_channel, 1, &interface_ip, + "Arbitration Logic", (mcs.reads + mcs.writes), + mcp.clockRate, Uncore_device); + // MC_arb is not included in the roll-up due to the uninitialized area + //children.push_back(MC_arb); +} -void MCPHY::computeEnergy(bool is_tdp) -{ - if (is_tdp) - { - //init stats for Peak - stats_t.readAc.access = 0.5*mcp.num_channels; //time share on buses - stats_t.writeAc.access = 0.5*mcp.num_channels; - tdp_stats = stats_t; - } - else - { - //init stats for runtime power (RTP) - stats_t.readAc.access = mcp.reads; - stats_t.writeAc.access = mcp.writes; - tdp_stats = stats_t; - } - - if (is_tdp) - { - double data_transfer_unit = (mc_type == MC)? 72:16;/*DIMM data width*/ - power = power_t; - power.readOp.dynamic = power.readOp.dynamic * (mcp.peakDataTransferRate*8*1e6/1e9/*change to Gbs*/)*mcp.dataBusWidth/data_transfer_unit*mcp.num_channels/mcp.clockRate; - // divide by clock rate is for match the final computation where *clock is used - //(stats_t.readAc.access*power_t.readOp.dynamic+ -// stats_t.writeAc.access*power_t.readOp.dynamic); - - } - else - { - rt_power = power_t; -// rt_power.readOp.dynamic = (stats_t.readAc.access*power_t.readOp.dynamic+ -// stats_t.writeAc.access*power_t.readOp.dynamic); - - rt_power.readOp.dynamic=power_t.readOp.dynamic*(stats_t.readAc.access + stats_t.writeAc.access)*(mcp.llcBlockSize)*8/1e9/mcp.executionTime*(mcp.executionTime); - rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime; +MemoryController::MemoryController(XMLNode* _xml_data, + InputParameter* interface_ip_) + : McPATComponent(_xml_data), interface_ip(*interface_ip_) { + name = "Memory Controller"; + set_mc_param(); + // TODO: Pass params and stats as pointers + children.push_back(new MCFrontEnd(xml_data, &interface_ip, mcp, mcs)); + children.push_back(new MCBackend(xml_data, &interface_ip, mcp, mcs)); + + if (mcp.type==0 || (mcp.type == 1 && mcp.withPHY)) { + children.push_back(new MCPHY(xml_data, &interface_ip, mcp, mcs)); } } -MCFrontEnd::MCFrontEnd(ParseXML *XML_interface,InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_) -:XML(XML_interface), - interface_ip(*interface_ip_), - mc_type(mc_type_), - mcp(mcp_), - MC_arb(0), - frontendBuffer(0), - readBuffer(0), - writeBuffer(0) -{ - /* All computations are for a single MC - * - */ - - int tag, data; - bool is_default =true;//indication for default setup - - /* MC frontend engine channels share the same engines but logically partitioned - * For all hardware inside MC. different channels do not share resources. - * TODO: add docodeing/mux stage to steer memory requests to different channels. - */ - - //memory request reorder buffer - tag = mcp.addressBusWidth + EXTRA_TAG_BITS + mcp.opcodeW; - data = int(ceil((XML->sys.physical_address_width + mcp.opcodeW)/8.0)); - interface_ip.cache_sz = data*XML->sys.mc.req_window_size_per_channel; - interface_ip.line_sz = data; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/mcp.clockRate; - interface_ip.latency = 1.0/mcp.clockRate; - interface_ip.is_cache = true; - interface_ip.pure_cam = false; - interface_ip.pure_ram = false; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = XML->sys.mc.memory_channels_per_mc; - frontendBuffer = new ArrayST(&interface_ip, "MC ReorderBuffer", Uncore_device); - frontendBuffer->area.set_area(frontendBuffer->area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area()+ frontendBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - - //selection and arbitration logic - MC_arb = new selection_logic(is_default, XML->sys.mc.req_window_size_per_channel,1,&interface_ip, Uncore_device); - - //read buffers. - data = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte - interface_ip.cache_sz = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize; - interface_ip.line_sz = data; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 1; - interface_ip.throughput = 1.0/mcp.clockRate; - interface_ip.latency = 1.0/mcp.clockRate; - interface_ip.is_cache = false; - interface_ip.pure_cam = false; - interface_ip.pure_ram = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0;//XML->sys.mc.memory_channels_per_mc*2>2?2:XML->sys.mc.memory_channels_per_mc*2; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - readBuffer = new ArrayST(&interface_ip, "MC ReadBuffer", Uncore_device); - readBuffer->area.set_area(readBuffer->area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area()+ readBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - - //write buffer - data = (int)ceil(mcp.dataBusWidth/8.0);//Support key words first operation //8 means converting bit to Byte - interface_ip.cache_sz = data*XML->sys.mc.IO_buffer_size_per_channel;//*llcBlockSize; - interface_ip.line_sz = data; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = 1.0/mcp.clockRate; - interface_ip.latency = 1.0/mcp.clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 0; - interface_ip.num_rd_ports = XML->sys.mc.memory_channels_per_mc; - interface_ip.num_wr_ports = interface_ip.num_rd_ports; - interface_ip.num_se_rd_ports = 0; - writeBuffer = new ArrayST(&interface_ip, "MC writeBuffer", Uncore_device); - writeBuffer->area.set_area(writeBuffer->area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); - area.set_area(area.get_area()+ writeBuffer->local_result.area*XML->sys.mc.memory_channels_per_mc); +void MemoryController::initialize_params() { + memset(&mcp, 0, sizeof(MCParameters)); } -void MCFrontEnd::computeEnergy(bool is_tdp) -{ - if (is_tdp) - { - //init stats for Peak - frontendBuffer->stats_t.readAc.access = frontendBuffer->l_ip.num_search_ports; - frontendBuffer->stats_t.writeAc.access = frontendBuffer->l_ip.num_wr_ports; - frontendBuffer->tdp_stats = frontendBuffer->stats_t; - - readBuffer->stats_t.readAc.access = readBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle; - readBuffer->stats_t.writeAc.access = readBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle; - readBuffer->tdp_stats = readBuffer->stats_t; - - writeBuffer->stats_t.readAc.access = writeBuffer->l_ip.num_rd_ports*mcp.frontend_duty_cycle; - writeBuffer->stats_t.writeAc.access = writeBuffer->l_ip.num_wr_ports*mcp.frontend_duty_cycle; - writeBuffer->tdp_stats = writeBuffer->stats_t; - - } - else - { - //init stats for runtime power (RTP) - frontendBuffer->stats_t.readAc.access = XML->sys.mc.memory_reads *mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72; - //For each channel, each memory word need to check the address data to achieve best scheduling results. - //and this need to be done on all physical DIMMs in each logical memory DIMM *mcp.dataBusWidth/72 - frontendBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth*mcp.dataBusWidth/72; - frontendBuffer->rtp_stats = frontendBuffer->stats_t; - - readBuffer->stats_t.readAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first - readBuffer->stats_t.writeAc.access = XML->sys.mc.memory_reads*mcp.llcBlockSize*8.0/mcp.dataBusWidth;//support key word first - readBuffer->rtp_stats = readBuffer->stats_t; - - writeBuffer->stats_t.readAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth; - writeBuffer->stats_t.writeAc.access = XML->sys.mc.memory_writes*mcp.llcBlockSize*8.0/mcp.dataBusWidth; - writeBuffer->rtp_stats = writeBuffer->stats_t; - } - - frontendBuffer->power_t.reset(); - readBuffer->power_t.reset(); - writeBuffer->power_t.reset(); - -// frontendBuffer->power_t.readOp.dynamic += (frontendBuffer->stats_t.readAc.access* -// (frontendBuffer->local_result.power.searchOp.dynamic+frontendBuffer->local_result.power.readOp.dynamic)+ -// frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic); - - frontendBuffer->power_t.readOp.dynamic += (frontendBuffer->stats_t.readAc.access + - frontendBuffer->stats_t.writeAc.access)*frontendBuffer->local_result.power.searchOp.dynamic - + frontendBuffer->stats_t.readAc.access * frontendBuffer->local_result.power.readOp.dynamic - + frontendBuffer->stats_t.writeAc.access*frontendBuffer->local_result.power.writeOp.dynamic; - - readBuffer->power_t.readOp.dynamic += (readBuffer->stats_t.readAc.access* - readBuffer->local_result.power.readOp.dynamic+ - readBuffer->stats_t.writeAc.access*readBuffer->local_result.power.writeOp.dynamic); - writeBuffer->power_t.readOp.dynamic += (writeBuffer->stats_t.readAc.access* - writeBuffer->local_result.power.readOp.dynamic+ - writeBuffer->stats_t.writeAc.access*writeBuffer->local_result.power.writeOp.dynamic); - - if (is_tdp) - { - power = power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t + - (frontendBuffer->local_result.power + - readBuffer->local_result.power + - writeBuffer->local_result.power)*pppm_lkg; - +void MemoryController::set_mc_param() { + initialize_params(); + + int num_children = xml_data->nChildNode("param"); + int tech_type; + int mat_type; + int i; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_FP_IF("mc_clock", mcp.clockRate); + ASSIGN_INT_IF("tech_type", tech_type); + ASSIGN_ENUM_IF("mc_type", mcp.mc_type, MemoryCtrl_type); + ASSIGN_FP_IF("num_mcs", mcp.num_mcs); + ASSIGN_INT_IF("llc_line_length", mcp.llc_line_length); + ASSIGN_INT_IF("databus_width", mcp.databus_width); + ASSIGN_INT_IF("memory_channels_per_mc", mcp.num_channels); + ASSIGN_INT_IF("req_window_size_per_channel", + mcp.req_window_size_per_channel); + ASSIGN_INT_IF("IO_buffer_size_per_channel", + mcp.IO_buffer_size_per_channel); + ASSIGN_INT_IF("addressbus_width", mcp.addressbus_width); + ASSIGN_INT_IF("opcode_width", mcp.opcodeW); + ASSIGN_INT_IF("type", mcp.type); + ASSIGN_ENUM_IF("LVDS", mcp.LVDS, bool); + ASSIGN_ENUM_IF("withPHY", mcp.withPHY, bool); + ASSIGN_INT_IF("peak_transfer_rate", mcp.peak_transfer_rate); + ASSIGN_INT_IF("number_ranks", mcp.number_ranks); + ASSIGN_INT_IF("reorder_buffer_assoc", mcp.reorder_buffer_assoc); + ASSIGN_INT_IF("reorder_buffer_nbanks", mcp.reorder_buffer_nbanks); + ASSIGN_INT_IF("read_buffer_assoc", mcp.read_buffer_assoc); + ASSIGN_INT_IF("read_buffer_nbanks", mcp.read_buffer_nbanks); + ASSIGN_INT_IF("read_buffer_tag_width", mcp.read_buffer_tag_width); + ASSIGN_INT_IF("write_buffer_assoc", mcp.write_buffer_assoc); + ASSIGN_INT_IF("write_buffer_nbanks", mcp.write_buffer_nbanks); + ASSIGN_INT_IF("write_buffer_tag_width", mcp.write_buffer_tag_width); + ASSIGN_INT_IF("wire_mat_type", mat_type); + ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type); + + else { + warnUnrecognizedParam(node_name); + } } - else - { - rt_power = rt_power + frontendBuffer->power_t + readBuffer->power_t + writeBuffer->power_t + - (frontendBuffer->local_result.power + - readBuffer->local_result.power + - writeBuffer->local_result.power)*pppm_lkg; - rt_power.readOp.dynamic = rt_power.readOp.dynamic + power.readOp.dynamic*0.1*mcp.clockRate*mcp.num_mcs*mcp.executionTime; + + if (mcp.mc_type != MC) { + cout << "Unknown memory controller type: Only DRAM controller is " + << "supported for now" << endl; + exit(0); } -} -void MCFrontEnd::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - - if (is_tdp) - { - cout << indent_str << "Front End ROB:" << endl; - cout << indent_str_next << "Area = " << frontendBuffer->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << frontendBuffer->power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " << frontendBuffer->power.readOp.leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << frontendBuffer->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << frontendBuffer->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - - cout <nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); -MemoryController::MemoryController(ParseXML *XML_interface,InputParameter* interface_ip_, enum MemoryCtrl_type mc_type_) -:XML(XML_interface), - interface_ip(*interface_ip_), - mc_type(mc_type_), - frontend(0), - transecEngine(0), - PHY(0), - pipeLogic(0) -{ - /* All computations are for a single MC - * - */ - interface_ip.wire_is_mat_type = 2; - interface_ip.wire_os_mat_type = 2; - interface_ip.wt =Global; - set_mc_param(); - frontend = new MCFrontEnd(XML, &interface_ip, mcp, mc_type); - area.set_area(area.get_area()+ frontend->area.get_area()); - transecEngine = new MCBackend(&interface_ip, mcp, mc_type); - area.set_area(area.get_area()+ transecEngine->area.get_area()); - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - PHY = new MCPHY(&interface_ip, mcp, mc_type); - area.set_area(area.get_area()+ PHY->area.get_area()); - } - //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. -// transecEngine.initialize(&interface_ip); -// transecEngine.peakDataTransferRate = XML->sys.mem.peak_transfer_rate; -// transecEngine.memDataWidth = dataBusWidth; -// transecEngine.memRank = XML->sys.mem.number_ranks; -// //transecEngine.memAccesses=XML->sys.mc.memory_accesses; -// //transecEngine.llcBlocksize=llcBlockSize; -// transecEngine.compute(); -// transecEngine.area.set_area(XML->sys.mc.memory_channels_per_mc*transecEngine.area.get_area()) ; -// area.set_area(area.get_area()+ transecEngine.area.get_area()); -// ///cout<<"area="<sys.mem.peak_transfer_rate; -// PHY.memDataWidth = dataBusWidth; -// //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power -// //PHY.llcBlocksize=llcBlockSize; -// PHY.compute(); -// PHY.area.set_area(XML->sys.mc.memory_channels_per_mc*PHY.area.get_area()) ; -// area.set_area(area.get_area()+ PHY.area.get_area()); - ///cout<<"area="<sys.core[0].opcode_width + dataBusWidth; -// pipeLogic = new pipeline(is_default, &interface_ip); -// //pipeLogic.init_pipeline(is_default, &interface_ip); -// pipeLogic->compute_pipeline(); -// area.set_area(area.get_area()+ pipeLogic->area.get_area()*1e-6); -// area.set_area((area.get_area()+mc_area*1e-6)*1.1);//placement and routing overhead -// -// -//// //clock -//// clockNetwork.init_wire_external(is_default, &interface_ip); -//// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb -//// clockNetwork.end_wiring_level =5;//toplevel metal -//// clockNetwork.start_wiring_level =5;//toplevel metal -//// clockNetwork.num_regs = pipeLogic.tot_stage_vector; -//// clockNetwork.optimize_wire(); + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + ASSIGN_FP_IF("duty_cycle", mcs.duty_cycle); + ASSIGN_FP_IF("perc_load", mcs.perc_load); + ASSIGN_FP_IF("memory_reads", mcs.reads); + ASSIGN_INT_IF("memory_writes", mcs.writes); -} -void MemoryController::computeEnergy(bool is_tdp) -{ - - frontend->computeEnergy(is_tdp); - transecEngine->computeEnergy(is_tdp); - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - PHY->computeEnergy(is_tdp); - } - if (is_tdp) - { - power = power + frontend->power + transecEngine->power; - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - power = power + PHY->power; - } - } - else - { - rt_power = rt_power + frontend->rt_power + transecEngine->rt_power; - if (mcp.type==0 || (mcp.type==1&&mcp.withPHY)) - { - rt_power = rt_power + PHY->rt_power; - } - } -} - -void MemoryController::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - if (is_tdp) - { - cout << "Memory Controller:" << endl; - cout << indent_str<< "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str<< "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - //cout << indent_str<< "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str<< "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - cout<2){ - frontend->displayEnergy(indent+4,is_tdp); - } - cout << indent_str << "Transaction Engine:" << endl; - cout << indent_str_next << "Area = " << transecEngine->area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << transecEngine->power.readOp.dynamic*mcp.clockRate << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? transecEngine->power.readOp.longer_channel_leakage:transecEngine->power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << transecEngine->power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << transecEngine->rt_power.readOp.dynamic/mcp.executionTime << " W" << endl; - cout <sys.mc.mc_clock*2;//DDR double pumped - mcp.clockRate *= 1e6; - mcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - - mcp.llcBlockSize =int(ceil(XML->sys.mc.llc_line_length/8.0))+XML->sys.mc.llc_line_length;//ecc overhead - mcp.dataBusWidth =int(ceil(XML->sys.mc.databus_width/8.0)) + XML->sys.mc.databus_width; - mcp.addressBusWidth =int(ceil(XML->sys.mc.addressbus_width));//XML->sys.physical_address_width; - mcp.opcodeW =16; - mcp.num_mcs = XML->sys.mc.number_mcs; - mcp.num_channels = XML->sys.mc.memory_channels_per_mc; - mcp.reads = XML->sys.mc.memory_reads; - mcp.writes = XML->sys.mc.memory_writes; - //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. - mcp.peakDataTransferRate = XML->sys.mc.peak_transfer_rate; - mcp.memRank = XML->sys.mc.number_ranks; - //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers - //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power - //PHY.llcBlocksize=llcBlockSize; - mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared - mcp.LVDS = XML->sys.mc.LVDS; - mcp.type = XML->sys.mc.type; - mcp.withPHY = XML->sys.mc.withPHY; - } -// else if (mc_type==FLASHC) -// { -// mcp.clockRate =XML->sys.flashc.mc_clock*2;//DDR double pumped -// mcp.clockRate *= 1e6; -// mcp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); -// -// mcp.llcBlockSize =int(ceil(XML->sys.flashc.llc_line_length/8.0))+XML->sys.flashc.llc_line_length;//ecc overhead -// mcp.dataBusWidth =int(ceil(XML->sys.flashc.databus_width/8.0)) + XML->sys.flashc.databus_width; -// mcp.addressBusWidth =int(ceil(XML->sys.flashc.addressbus_width));//XML->sys.physical_address_width; -// mcp.opcodeW =16; -// mcp.num_mcs = XML->sys.flashc.number_mcs; -// mcp.num_channels = XML->sys.flashc.memory_channels_per_mc; -// mcp.reads = XML->sys.flashc.memory_reads; -// mcp.writes = XML->sys.flashc.memory_writes; -// //+++++++++Transaction engine +++++++++++++++++ ////TODO needs better numbers, Run the RTL code from OpenSparc. -// mcp.peakDataTransferRate = XML->sys.flashc.peak_transfer_rate; -// mcp.memRank = XML->sys.flashc.number_ranks; -// //++++++++++++++PHY ++++++++++++++++++++++++++ //TODO needs better numbers -// //PHY.memAccesses=PHY.peakDataTransferRate;//this is the max power -// //PHY.llcBlocksize=llcBlockSize; -// mcp.frontend_duty_cycle = 0.5;//for max power, the actual off-chip links is bidirectional but time shared -// mcp.LVDS = XML->sys.flashc.LVDS; -// mcp.type = XML->sys.flashc.type; -// } - else - { - cout<<"Unknown memory controller type: neither DRAM controller nor Flash controller" < - #include "basic_components.h" +#include "cachearray.h" +#include "parameter.h" -class MCBackend : public Component { - public: +class MCBackend : public McPATComponent { +public: InputParameter l_ip; uca_org_t local_result; - enum MemoryCtrl_type mc_type; - MCParam mcp; - statsDef tdp_stats; - statsDef rtp_stats; + MCParameters mcp; + MCStatistics mcs; statsDef stats_t; - powerDef power_t; - MCBackend(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_); - void compute(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~MCBackend(){}; + + MCBackend(XMLNode* _xml_data, InputParameter* interface_ip_, + const MCParameters & mcp_, const MCStatistics & mcs_); + void computeArea(); + void computeEnergy(); + ~MCBackend() {}; }; -class MCPHY : public Component { - public: +class MCPHY : public McPATComponent { +public: InputParameter l_ip; uca_org_t local_result; - enum MemoryCtrl_type mc_type; - MCParam mcp; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - MCPHY(InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_); - void compute(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~MCPHY(){}; + MCParameters mcp; + MCStatistics mcs; + statsDef stats_t; + + MCPHY(XMLNode* _xml_data, InputParameter* interface_ip_, + const MCParameters & mcp_, const MCStatistics & mcs_); + void computeArea(); + void computeEnergy(); + ~MCPHY() {}; }; -class MCFrontEnd : public Component { - public: - ParseXML *XML; - InputParameter interface_ip; - enum MemoryCtrl_type mc_type; - MCParam mcp; - selection_logic * MC_arb; - ArrayST * frontendBuffer; - ArrayST * readBuffer; - ArrayST * writeBuffer; +class MCFrontEnd : public McPATComponent { +public: + CacheArray* frontendBuffer; + CacheArray* readBuffer; + CacheArray* writeBuffer; + selection_logic* MC_arb; - MCFrontEnd(ParseXML *XML_interface,InputParameter* interface_ip_, const MCParam & mcp_, enum MemoryCtrl_type mc_type_); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); + InputParameter interface_ip; + MCParameters mcp; + MCStatistics mcs; + + MCFrontEnd(XMLNode* _xml_data, + InputParameter* interface_ip_, const MCParameters & mcp_, + const MCStatistics & mcs_); ~MCFrontEnd(); }; -class MemoryController : public Component { - public: - ParseXML *XML; - InputParameter interface_ip; - enum MemoryCtrl_type mc_type; - MCParam mcp; - MCFrontEnd * frontend; - MCBackend * transecEngine; - MCPHY * PHY; - Pipeline * pipeLogic; +class MemoryController : public McPATComponent { +public: + InputParameter interface_ip; + MCParameters mcp; + MCStatistics mcs; - //clock_network clockNetwork; - MemoryController(ParseXML *XML_interface,InputParameter* interface_ip_, enum MemoryCtrl_type mc_type_); + MemoryController(XMLNode* _xml_data, InputParameter* interface_ip_); + void initialize_params(); void set_mc_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); ~MemoryController(); }; + #endif /* MEMORYCTRL_H_ */ diff --git a/ext/mcpat/noc.cc b/ext/mcpat/noc.cc index d5dfbb137..d6e309054 100644 --- a/ext/mcpat/noc.cc +++ b/ext/mcpat/noc.cc @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,7 +26,7 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ @@ -35,321 +36,236 @@ #include #include -#include "XML_Parse.h" #include "basic_circuit.h" +#include "common.h" #include "const.h" #include "io.h" #include "noc.h" #include "parameter.h" -NoC::NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_, double link_len_) -:XML(XML_interface), -ithNoC(ithNoC_), -interface_ip(*interface_ip_), -router(0), -link_bus(0), -link_bus_exist(false), -router_exist(false), -M_traffic_pattern(M_traffic_pattern_) -{ - /* - * initialize, compute and optimize individual components. - */ - - if (XML->sys.Embedded) - { - interface_ip.wt =Global_30; - interface_ip.wire_is_mat_type = 0; - interface_ip.wire_os_mat_type = 1; - } - else - { - interface_ip.wt =Global; - interface_ip.wire_is_mat_type = 2; - interface_ip.wire_os_mat_type = 2; - } - set_noc_param(); - local_result=init_interface(&interface_ip); - scktRatio = g_tp.sckt_co_eff; - - if (nocdynp.type) - {/* - * if NOC compute router, router links must be computed separately - * and called from external - * since total chip area must be known first - */ - init_router(); - } - else - { - init_link_bus(link_len_); //if bus compute bus - } - - // //clock power - // clockNetwork.init_wire_external(is_default, &interface_ip); - // clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb - // clockNetwork.end_wiring_level =5;//toplevel metal - // clockNetwork.start_wiring_level =5;//toplevel metal - // clockNetwork.num_regs = corepipe.tot_stage_vector; - // clockNetwork.optimize_wire(); +OnChipNetwork::OnChipNetwork(XMLNode* _xml_data, int ithNoC_, + InputParameter* interface_ip_) + : McPATComponent(_xml_data), router(NULL), link_bus(NULL), ithNoC(ithNoC_), + interface_ip(*interface_ip_), link_bus_exist(false), + router_exist(false) { + name = "On-Chip Network"; + set_param_stats(); + local_result = init_interface(&interface_ip, name); + scktRatio = g_tp.sckt_co_eff; + + // TODO: Routers and links should be children of the NOC component + if (noc_params.type) { + init_router(); + } else { + init_link_bus(); + } } -void NoC::init_router() -{ - router = new Router(nocdynp.flit_size, - nocdynp.virtual_channel_per_port*nocdynp.input_buffer_entries_per_vc, - nocdynp.virtual_channel_per_port, &(g_tp.peri_global), - nocdynp.input_ports,nocdynp.output_ports, M_traffic_pattern); - //router->print_router(); - area.set_area(area.get_area()+ router->area.get_area()*nocdynp.total_nodes); - - double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); - router->power.readOp.longer_channel_leakage = router->power.readOp.leakage * long_channel_device_reduction; - router->buffer.power.readOp.longer_channel_leakage = router->buffer.power.readOp.leakage * long_channel_device_reduction; - router->crossbar.power.readOp.longer_channel_leakage = router->crossbar.power.readOp.leakage * long_channel_device_reduction; - router->arbiter.power.readOp.longer_channel_leakage = router->arbiter.power.readOp.leakage * long_channel_device_reduction; - router_exist = true; +void OnChipNetwork::init_router() { + router = new Router(noc_params.flit_size, + noc_params.virtual_channel_per_port * + noc_params.input_buffer_entries_per_vc, + noc_params.virtual_channel_per_port, + &(g_tp.peri_global), + noc_params.input_ports, noc_params.output_ports, + noc_params.M_traffic_pattern); + // TODO: Make a router class within McPAT that descends from McPATComponent + // children.push_back(router); + area.set_area(area.get_area() + router->area.get_area() * + noc_params.total_nodes); + + double long_channel_device_reduction = longer_channel_device_reduction(Uncore_device); + router->power.readOp.longer_channel_leakage = router->power.readOp.leakage * long_channel_device_reduction; + router->buffer.power.readOp.longer_channel_leakage = router->buffer.power.readOp.leakage * long_channel_device_reduction; + router->crossbar.power.readOp.longer_channel_leakage = router->crossbar.power.readOp.leakage * long_channel_device_reduction; + router->arbiter.power.readOp.longer_channel_leakage = router->arbiter.power.readOp.leakage * long_channel_device_reduction; + router_exist = true; } -void NoC ::init_link_bus(double link_len_) -{ - - -// if (nocdynp.min_ports==1 ) - if (nocdynp.type) - link_name = "Links"; - else - link_name = "Bus"; - - link_len=link_len_; - assert(link_len>0); - - interface_ip.throughput = nocdynp.link_throughput/nocdynp.clockRate; - interface_ip.latency = nocdynp.link_latency/nocdynp.clockRate; - - link_len /= (nocdynp.horizontal_nodes + nocdynp.vertical_nodes)/2; - - if (nocdynp.total_nodes >1) link_len /=2; //All links are shared by neighbors - link_bus = new interconnect(name, Uncore_device, 1, 1, nocdynp.flit_size, - link_len, &interface_ip, 3, true/*pipelinable*/, nocdynp.route_over_perc); - - link_bus_tot_per_Router.area.set_area(link_bus_tot_per_Router.area.get_area()+ link_bus->area.get_area() - * nocdynp.global_linked_ports); - - area.set_area(area.get_area()+ link_bus_tot_per_Router.area.get_area()* nocdynp.total_nodes); - link_bus_exist = true; -} -void NoC::computeEnergy(bool is_tdp) -{ - //power_point_product_masks - double pppm_t[4] = {1,1,1,1}; - double M=nocdynp.duty_cycle; - if (is_tdp) - { - //init stats for TDP - stats_t.readAc.access = M; - tdp_stats = stats_t; - if (router_exist) - { - set_pppm(pppm_t, 1*M, 1, 1, 1);//reset traffic pattern - router->power = router->power*pppm_t; - set_pppm(pppm_t, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes, nocdynp.total_nodes); - power = power + router->power*pppm_t; - } - if (link_bus_exist) - { - if (nocdynp.type) - set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports -1), nocdynp.global_linked_ports, - nocdynp.global_linked_ports, nocdynp.global_linked_ports); - //reset traffic pattern; local port do not have router links - else - set_pppm(pppm_t, 1*M_traffic_pattern*M*(nocdynp.min_ports), nocdynp.global_linked_ports, - nocdynp.global_linked_ports, nocdynp.global_linked_ports);//reset traffic pattern - - link_bus_tot_per_Router.power = link_bus->power*pppm_t; - - set_pppm(pppm_t, nocdynp.total_nodes, - nocdynp.total_nodes, - nocdynp.total_nodes, - nocdynp.total_nodes); - power = power + link_bus_tot_per_Router.power*pppm_t; - - } - } - else - { - //init stats for runtime power (RTP) - stats_t.readAc.access = XML->sys.NoC[ithNoC].total_accesses; - rtp_stats = stats_t; - set_pppm(pppm_t, 1, 0 , 0, 0); - if (router_exist) - { - router->buffer.rt_power.readOp.dynamic = (router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic)*rtp_stats.readAc.access ; - router->crossbar.rt_power.readOp.dynamic = router->crossbar.power.readOp.dynamic*rtp_stats.readAc.access ; - router->arbiter.rt_power.readOp.dynamic = router->arbiter.power.readOp.dynamic*rtp_stats.readAc.access ; - - router->rt_power = router->rt_power + (router->buffer.rt_power + router->crossbar.rt_power + router->arbiter.rt_power)*pppm_t + - router->power*pppm_lkg;//TDP power must be calculated first! - rt_power = rt_power + router->rt_power; - } - if (link_bus_exist) - { - set_pppm(pppm_t, rtp_stats.readAc.access, 1 , 1, rtp_stats.readAc.access); - link_bus->rt_power = link_bus->power * pppm_t; - rt_power = rt_power + link_bus->rt_power; - } - - } +void OnChipNetwork::init_link_bus() { + if (noc_params.type) { + link_name = "Links"; + } else { + link_name = "Bus"; + } + + interface_ip.throughput = noc_params.link_throughput / + noc_params.clockRate; + interface_ip.latency = noc_params.link_latency / noc_params.clockRate; + + link_len /= (noc_params.horizontal_nodes + noc_params.vertical_nodes) / 2; + + if (noc_params.total_nodes > 1) { + //All links are shared by neighbors + link_len /= 2; + } + link_bus = new Interconnect(xml_data, "Link", Uncore_device, + noc_params.link_base_width, + noc_params.link_base_height, + noc_params.flit_size, link_len, &interface_ip, + noc_params.link_start_wiring_level, + noc_params.clockRate, true/*pipelinable*/, + noc_params.route_over_perc); + children.push_back(link_bus); + + link_bus_exist = true; } - -void NoC::displayEnergy(uint32_t indent,int plevel,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - double M =M_traffic_pattern*nocdynp.duty_cycle; - /*only router as a whole has been applied the M_traffic_pattern(0.6 by default) factor in router.cc; - * When power of crossbars, arbiters, etc need to be displayed, the M_traffic_pattern factor need to - * be applied together with McPAT's extra traffic pattern. - * */ - if (is_tdp) - { - cout << name << endl; - cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str<< "Peak Dynamic = " << power.readOp.dynamic*nocdynp.clockRate << " W" << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str<< "Runtime Dynamic = " << rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl; - cout<2){ - cout << indent_str<< indent_str << "Virtual Channel Buffer:" << endl; - cout << indent_str<< indent_str_next << "Area = " << router->buffer.area.get_area()*1e-6*nocdynp.input_ports<< " mm^2" << endl; - cout << indent_str<< indent_str_next << "Peak Dynamic = " <<(router->buffer.power.readOp.dynamic + router->buffer.power.writeOp.dynamic) - *nocdynp.min_ports*M*nocdynp.clockRate << " W" << endl; - cout << indent_str<< indent_str_next << "Subthreshold Leakage = " - << (long_channel? router->buffer.power.readOp.longer_channel_leakage*nocdynp.input_ports:router->buffer.power.readOp.leakage*nocdynp.input_ports) <<" W" << endl; - cout << indent_str<< indent_str_next << "Gate Leakage = " << router->buffer.power.readOp.gate_leakage*nocdynp.input_ports << " W" << endl; - cout << indent_str<< indent_str_next << "Runtime Dynamic = " << router->buffer.rt_power.readOp.dynamic/nocdynp.executionTime << " W" << endl; - cout <power = router->power * pppm_t; + set_pppm(pppm_t, noc_params.total_nodes, + noc_params.total_nodes, + noc_params.total_nodes, + noc_params.total_nodes); + } + if (link_bus_exist) { + if (noc_params.type) { + link_bus->int_params.active_ports = noc_params.min_ports - 1; + } else { + link_bus->int_params.active_ports = noc_params.min_ports; } + link_bus->int_stats.duty_cycle = + noc_params.M_traffic_pattern * noc_stats.duty_cycle; + + // TODO: Decide how to roll multiple routers into a single top-level + // NOC module. I would prefer not to, but it might be a nice feature + set_pppm(pppm_t, noc_params.total_nodes, + noc_params.total_nodes, + noc_params.total_nodes, + noc_params.total_nodes); + } + + // Initialize stats for runtime energy and power + rtp_stats.reset(); + rtp_stats.readAc.access = noc_stats.total_access; + set_pppm(pppm_t, 1, 0 , 0, 0); + if (router_exist) { + // TODO: Move this to a McPATComponent parent class of Router + router->buffer.rt_power.readOp.dynamic = + (router->buffer.power.readOp.dynamic + + router->buffer.power.writeOp.dynamic) * rtp_stats.readAc.access; + router->crossbar.rt_power.readOp.dynamic = + router->crossbar.power.readOp.dynamic * rtp_stats.readAc.access; + router->arbiter.rt_power.readOp.dynamic = + router->arbiter.power.readOp.dynamic * rtp_stats.readAc.access; + + router->rt_power = router->rt_power + + (router->buffer.rt_power + router->crossbar.rt_power + + router->arbiter.rt_power) * pppm_t + + router->power * pppm_lkg;//TDP power must be calculated first! + } + if (link_bus_exist) { + link_bus->int_stats.accesses = noc_stats.total_access; + } + + // Recursively compute energy + McPATComponent::computeEnergy(); } -void NoC::set_noc_param() -{ - - nocdynp.type = XML->sys.NoC[ithNoC].type; - nocdynp.clockRate =XML->sys.NoC[ithNoC].clockrate; - nocdynp.clockRate *= 1e6; - nocdynp.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - - nocdynp.flit_size = XML->sys.NoC[ithNoC].flit_bits; - if (nocdynp.type) - { - nocdynp.input_ports = XML->sys.NoC[ithNoC].input_ports; - nocdynp.output_ports = XML->sys.NoC[ithNoC].output_ports;//later minus 1 - nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports); - nocdynp.global_linked_ports = (nocdynp.input_ports-1) + (nocdynp.output_ports-1); - /* - * Except local i/o ports, all ports needs links( global_linked_ports); - * However only min_ports can be fully active simultaneously - * since the fewer number of ports (input or output ) is the bottleneck. - */ - } - else - { - nocdynp.input_ports = 1; - nocdynp.output_ports = 1; - nocdynp.min_ports = min(nocdynp.input_ports,nocdynp.output_ports); - nocdynp.global_linked_ports = 1; +void OnChipNetwork::set_param_stats() { + // TODO: Remove this or move initialization elsewhere + memset(&noc_params, 0, sizeof(OnChipNetworkParameters)); + + int num_children = xml_data->nChildNode("param"); + int i; + int mat_type; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_INT_IF("type", noc_params.type); + ASSIGN_FP_IF("clockrate", noc_params.clockRate); + ASSIGN_INT_IF("flit_bits", noc_params.flit_size); + ASSIGN_FP_IF("link_len", link_len); + ASSIGN_FP_IF("link_throughput", noc_params.link_throughput); + ASSIGN_FP_IF("link_latency", noc_params.link_latency); + ASSIGN_INT_IF("input_ports", noc_params.input_ports); + ASSIGN_INT_IF("output_ports", noc_params.output_ports); + ASSIGN_INT_IF("global_linked_ports", noc_params.global_linked_ports); + ASSIGN_INT_IF("horizontal_nodes", noc_params.horizontal_nodes); + ASSIGN_INT_IF("vertical_nodes", noc_params.vertical_nodes); + ASSIGN_FP_IF("chip_coverage", noc_params.chip_coverage); + ASSIGN_FP_IF("link_routing_over_percentage", + noc_params.route_over_perc); + ASSIGN_INT_IF("has_global_link", noc_params.has_global_link); + ASSIGN_INT_IF("virtual_channel_per_port", + noc_params.virtual_channel_per_port); + ASSIGN_INT_IF("input_buffer_entries_per_vc", + noc_params.input_buffer_entries_per_vc); + ASSIGN_FP_IF("M_traffic_pattern", noc_params.M_traffic_pattern); + ASSIGN_FP_IF("link_base_width", noc_params.link_base_width); + ASSIGN_FP_IF("link_base_height", noc_params.link_base_height); + ASSIGN_INT_IF("link_start_wiring_level", + noc_params.link_start_wiring_level); + ASSIGN_INT_IF("wire_mat_type", mat_type); + ASSIGN_ENUM_IF("wire_type", interface_ip.wt, Wire_type); + + else { + warnUnrecognizedParam(node_name); } + } - nocdynp.virtual_channel_per_port = XML->sys.NoC[ithNoC].virtual_channel_per_port; - nocdynp.input_buffer_entries_per_vc = XML->sys.NoC[ithNoC].input_buffer_entries_per_vc; + // Change from MHz to Hz + noc_params.clockRate *= 1e6; - nocdynp.horizontal_nodes = XML->sys.NoC[ithNoC].horizontal_nodes; - nocdynp.vertical_nodes = XML->sys.NoC[ithNoC].vertical_nodes; - nocdynp.total_nodes = nocdynp.horizontal_nodes*nocdynp.vertical_nodes; - nocdynp.duty_cycle = XML->sys.NoC[ithNoC].duty_cycle; - nocdynp.has_global_link = XML->sys.NoC[ithNoC].has_global_link; - nocdynp.link_throughput = XML->sys.NoC[ithNoC].link_throughput; - nocdynp.link_latency = XML->sys.NoC[ithNoC].link_latency; - nocdynp.chip_coverage = XML->sys.NoC[ithNoC].chip_coverage; - nocdynp.route_over_perc = XML->sys.NoC[ithNoC].route_over_perc; + interface_ip.wire_is_mat_type = mat_type; + interface_ip.wire_os_mat_type = mat_type; - assert (nocdynp.chip_coverage <=1); - assert (nocdynp.route_over_perc <=1); + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); - if (nocdynp.type) - name = "NOC"; - else - name = "BUSES"; + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); -} + ASSIGN_FP_IF("duty_cycle", noc_stats.duty_cycle); + ASSIGN_FP_IF("total_accesses", noc_stats.total_access); + else { + warnUnrecognizedStat(node_name); + } + } + + clockRate = noc_params.clockRate; + noc_params.min_ports = + min(noc_params.input_ports, noc_params.output_ports); + if (noc_params.type) { + noc_params.global_linked_ports = (noc_params.input_ports - 1) + + (noc_params.output_ports - 1); + } + noc_params.total_nodes = + noc_params.horizontal_nodes * noc_params.vertical_nodes; + + assert(noc_params.chip_coverage <= 1); + assert(noc_params.route_over_perc <= 1); + assert(link_len > 0); +} -NoC ::~NoC(){ +OnChipNetwork ::~OnChipNetwork() { - if(router) {delete router; router = 0;} - if(link_bus) {delete link_bus; link_bus = 0;} + if (router) { + delete router; + router = 0; + } + if (link_bus) { + delete link_bus; + link_bus = 0; + } } diff --git a/ext/mcpat/noc.h b/ext/mcpat/noc.h index 31b5b3b2e..291712b9d 100644 --- a/ext/mcpat/noc.h +++ b/ext/mcpat/noc.h @@ -2,6 +2,7 @@ * McPAT * SOFTWARE LICENSE AGREEMENT * Copyright 2012 Hewlett-Packard Development Company, L.P. + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * All Rights Reserved * * Redistribution and use in source and binary forms, with or without @@ -25,13 +26,13 @@ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ***************************************************************************/ #ifndef NOC_H_ #define NOC_H_ -#include "XML_Parse.h" + #include "array.h" #include "basic_components.h" #include "interconnect.h" @@ -39,37 +40,62 @@ #include "parameter.h" #include "router.h" -class NoC :public Component { - public: +class OnChipNetworkParameters { +public: + double clockRate; + int flit_size; + int input_ports; + int output_ports; + int min_ports; + int global_linked_ports; + int virtual_channel_per_port; + int input_buffer_entries_per_vc; + int horizontal_nodes; + int vertical_nodes; + int total_nodes; + double link_throughput; + double link_latency; + double chip_coverage; + double route_over_perc; + bool has_global_link; + bool type; + double M_traffic_pattern; + double link_base_width; + double link_base_height; + int link_start_wiring_level; +}; + +class OnChipNetworkStatistics { +public: + double duty_cycle; + double total_access; +}; + +class OnChipNetwork : public McPATComponent { +public: + Router* router; + Interconnect* link_bus; + Component link_bus_tot_per_Router; + + int ithNoC; + InputParameter interface_ip; + double link_len; + double scktRatio, chip_PR_overhead, macro_PR_overhead; + OnChipNetworkParameters noc_params; + OnChipNetworkStatistics noc_stats; + uca_org_t local_result; + statsDef stats_t; + bool link_bus_exist; + bool router_exist; + string link_name; - ParseXML *XML; - int ithNoC; - InputParameter interface_ip; - double link_len; - double executionTime; - double scktRatio, chip_PR_overhead, macro_PR_overhead; - Router * router; - interconnect * link_bus; - NoCParam nocdynp; - uca_org_t local_result; - statsDef tdp_stats; - statsDef rtp_stats; - statsDef stats_t; - powerDef power_t; - Component link_bus_tot_per_Router; - bool link_bus_exist; - bool router_exist; - string name, link_name; - double M_traffic_pattern; - NoC(ParseXML *XML_interface, int ithNoC_, InputParameter* interface_ip_, double M_traffic_pattern_ = 0.6,double link_len_=0); - void set_noc_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - void init_link_bus(double link_len_); - void init_router(); - void computeEnergy_link_bus(bool is_tdp=true); - void displayEnergy_link_bus(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - ~NoC(); + OnChipNetwork(XMLNode* _xml_data, int ithNoC_, + InputParameter* interface_ip_); + void set_param_stats(); + void computeEnergy(); + void init_link_bus(); + void init_router(); + ~OnChipNetwork(); }; #endif /* NOC_H_ */ diff --git a/ext/mcpat/processor.cc b/ext/mcpat/processor.cc deleted file mode 100644 index 8520c9633..000000000 --- a/ext/mcpat/processor.cc +++ /dev/null @@ -1,839 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ -#include -#include -#include -#include -#include -#include -#include - -#include "XML_Parse.h" -#include "array.h" -#include "basic_circuit.h" -#include "const.h" -#include "parameter.h" -#include "processor.h" -#include "version.h" - -Processor::Processor(ParseXML *XML_interface) -:XML(XML_interface),//TODO: using one global copy may have problems. - mc(0), - niu(0), - pcie(0), - flashcontroller(0) -{ - /* - * placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm - * There is no point to have heterogeneous memory controller on chip, - * thus McPAT only support homogeneous memory controllers. - */ - int i; - double pppm_t[4] = {1,1,1,1}; - set_proc_param(); - if (procdynp.homoCore) - numCore = procdynp.numCore==0? 0:1; - else - numCore = procdynp.numCore; - - if (procdynp.homoL2) - numL2 = procdynp.numL2==0? 0:1; - else - numL2 = procdynp.numL2; - - if (XML->sys.Private_L2 && numCore != numL2) - { - cout<<"Number of private L2 does not match number of cores"<computeEnergy(); - cores[i]->computeEnergy(false); - if (procdynp.homoCore){ - core.area.set_area(core.area.get_area() + cores[i]->area.get_area()*procdynp.numCore); - set_pppm(pppm_t,cores[i]->clockRate*procdynp.numCore, procdynp.numCore,procdynp.numCore,procdynp.numCore); - core.power = core.power + cores[i]->power*pppm_t; - set_pppm(pppm_t,1/cores[i]->executionTime, procdynp.numCore,procdynp.numCore,procdynp.numCore); - core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t; - area.set_area(area.get_area() + core.area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm - power = power + core.power; - rt_power = rt_power + core.rt_power; - } - else{ - core.area.set_area(core.area.get_area() + cores[i]->area.get_area()); - area.set_area(area.get_area() + cores[i]->area.get_area());//placement and routing overhead is 10%, core scales worse than cache 40% is accumulated from 90 to 22nm - - set_pppm(pppm_t,cores[i]->clockRate, 1, 1, 1); - core.power = core.power + cores[i]->power*pppm_t; - power = power + cores[i]->power*pppm_t; - - set_pppm(pppm_t,1/cores[i]->executionTime, 1, 1, 1); - core.rt_power = core.rt_power + cores[i]->rt_power*pppm_t; - rt_power = rt_power + cores[i]->rt_power*pppm_t; - } - } - - if (!XML->sys.Private_L2) - { - if (numL2 >0) - for (i = 0;i < numL2; i++) - { - l2array.push_back(new SharedCache(XML,i, &interface_ip)); - l2array[i]->computeEnergy(); - l2array[i]->computeEnergy(false); - if (procdynp.homoL2){ - l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()*procdynp.numL2); - set_pppm(pppm_t,l2array[i]->cachep.clockRate*procdynp.numL2, procdynp.numL2,procdynp.numL2,procdynp.numL2); - l2.power = l2.power + l2array[i]->power*pppm_t; - set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, procdynp.numL2,procdynp.numL2,procdynp.numL2); - l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t; - area.set_area(area.get_area() + l2.area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm - power = power + l2.power; - rt_power = rt_power + l2.rt_power; - } - else{ - l2.area.set_area(l2.area.get_area() + l2array[i]->area.get_area()); - area.set_area(area.get_area() + l2array[i]->area.get_area());//placement and routing overhead is 10%, l2 scales worse than cache 40% is accumulated from 90 to 22nm - - set_pppm(pppm_t,l2array[i]->cachep.clockRate, 1, 1, 1); - l2.power = l2.power + l2array[i]->power*pppm_t; - power = power + l2array[i]->power*pppm_t;; - set_pppm(pppm_t,1/l2array[i]->cachep.executionTime, 1, 1, 1); - l2.rt_power = l2.rt_power + l2array[i]->rt_power*pppm_t; - rt_power = rt_power + l2array[i]->rt_power*pppm_t; - } - } - } - - if (numL3 >0) - for (i = 0;i < numL3; i++) - { - l3array.push_back(new SharedCache(XML,i, &interface_ip, L3)); - l3array[i]->computeEnergy(); - l3array[i]->computeEnergy(false); - if (procdynp.homoL3){ - l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()*procdynp.numL3); - set_pppm(pppm_t,l3array[i]->cachep.clockRate*procdynp.numL3, procdynp.numL3,procdynp.numL3,procdynp.numL3); - l3.power = l3.power + l3array[i]->power*pppm_t; - set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, procdynp.numL3,procdynp.numL3,procdynp.numL3); - l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t; - area.set_area(area.get_area() + l3.area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm - power = power + l3.power; - rt_power = rt_power + l3.rt_power; - - } - else{ - l3.area.set_area(l3.area.get_area() + l3array[i]->area.get_area()); - area.set_area(area.get_area() + l3array[i]->area.get_area());//placement and routing overhead is 10%, l3 scales worse than cache 40% is accumulated from 90 to 22nm - set_pppm(pppm_t,l3array[i]->cachep.clockRate, 1, 1, 1); - l3.power = l3.power + l3array[i]->power*pppm_t; - power = power + l3array[i]->power*pppm_t; - set_pppm(pppm_t,1/l3array[i]->cachep.executionTime, 1, 1, 1); - l3.rt_power = l3.rt_power + l3array[i]->rt_power*pppm_t; - rt_power = rt_power + l3array[i]->rt_power*pppm_t; - - } - } - if (numL1Dir >0) - for (i = 0;i < numL1Dir; i++) - { - l1dirarray.push_back(new SharedCache(XML,i, &interface_ip, L1Directory)); - l1dirarray[i]->computeEnergy(); - l1dirarray[i]->computeEnergy(false); - if (procdynp.homoL1Dir){ - l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area()*procdynp.numL1Dir); - set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate*procdynp.numL1Dir, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir); - l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t; - set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, procdynp.numL1Dir,procdynp.numL1Dir,procdynp.numL1Dir); - l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t; - area.set_area(area.get_area() + l1dir.area.get_area());//placement and routing overhead is 10%, l1dir scales worse than cache 40% is accumulated from 90 to 22nm - power = power + l1dir.power; - rt_power = rt_power + l1dir.rt_power; - - } - else{ - l1dir.area.set_area(l1dir.area.get_area() + l1dirarray[i]->area.get_area()); - area.set_area(area.get_area() + l1dirarray[i]->area.get_area()); - set_pppm(pppm_t,l1dirarray[i]->cachep.clockRate, 1, 1, 1); - l1dir.power = l1dir.power + l1dirarray[i]->power*pppm_t; - power = power + l1dirarray[i]->power; - set_pppm(pppm_t,1/l1dirarray[i]->cachep.executionTime, 1, 1, 1); - l1dir.rt_power = l1dir.rt_power + l1dirarray[i]->rt_power*pppm_t; - rt_power = rt_power + l1dirarray[i]->rt_power; - } - } - - if (numL2Dir >0) - for (i = 0;i < numL2Dir; i++) - { - l2dirarray.push_back(new SharedCache(XML,i, &interface_ip, L2Directory)); - l2dirarray[i]->computeEnergy(); - l2dirarray[i]->computeEnergy(false); - if (procdynp.homoL2Dir){ - l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area()*procdynp.numL2Dir); - set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate*procdynp.numL2Dir, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir); - l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t; - set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, procdynp.numL2Dir,procdynp.numL2Dir,procdynp.numL2Dir); - l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t; - area.set_area(area.get_area() + l2dir.area.get_area());//placement and routing overhead is 10%, l2dir scales worse than cache 40% is accumulated from 90 to 22nm - power = power + l2dir.power; - rt_power = rt_power + l2dir.rt_power; - - } - else{ - l2dir.area.set_area(l2dir.area.get_area() + l2dirarray[i]->area.get_area()); - area.set_area(area.get_area() + l2dirarray[i]->area.get_area()); - set_pppm(pppm_t,l2dirarray[i]->cachep.clockRate, 1, 1, 1); - l2dir.power = l2dir.power + l2dirarray[i]->power*pppm_t; - power = power + l2dirarray[i]->power*pppm_t; - set_pppm(pppm_t,1/l2dirarray[i]->cachep.executionTime, 1, 1, 1); - l2dir.rt_power = l2dir.rt_power + l2dirarray[i]->rt_power*pppm_t; - rt_power = rt_power + l2dirarray[i]->rt_power*pppm_t; - } - } - - if (XML->sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0) - { - mc = new MemoryController(XML, &interface_ip, MC); - mc->computeEnergy(); - mc->computeEnergy(false); - mcs.area.set_area(mcs.area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs); - area.set_area(area.get_area()+mc->area.get_area()*XML->sys.mc.number_mcs); - set_pppm(pppm_t,XML->sys.mc.number_mcs*mc->mcp.clockRate, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs); - mcs.power = mc->power*pppm_t; - power = power + mcs.power; - set_pppm(pppm_t,1/mc->mcp.executionTime, XML->sys.mc.number_mcs,XML->sys.mc.number_mcs,XML->sys.mc.number_mcs); - mcs.rt_power = mc->rt_power*pppm_t; - rt_power = rt_power + mcs.rt_power; - - } - - if (XML->sys.flashc.number_mcs >0 )//flash controller - { - flashcontroller = new FlashController(XML, &interface_ip); - flashcontroller->computeEnergy(); - flashcontroller->computeEnergy(false); - double number_fcs = flashcontroller->fcp.num_mcs; - flashcontrollers.area.set_area(flashcontrollers.area.get_area()+flashcontroller->area.get_area()*number_fcs); - area.set_area(area.get_area()+flashcontrollers.area.get_area()); - set_pppm(pppm_t,number_fcs, number_fcs ,number_fcs, number_fcs ); - flashcontrollers.power = flashcontroller->power*pppm_t; - power = power + flashcontrollers.power; - set_pppm(pppm_t,number_fcs , number_fcs ,number_fcs ,number_fcs ); - flashcontrollers.rt_power = flashcontroller->rt_power*pppm_t; - rt_power = rt_power + flashcontrollers.rt_power; - - } - - if (XML->sys.niu.number_units >0) - { - niu = new NIUController(XML, &interface_ip); - niu->computeEnergy(); - niu->computeEnergy(false); - nius.area.set_area(nius.area.get_area()+niu->area.get_area()*XML->sys.niu.number_units); - area.set_area(area.get_area()+niu->area.get_area()*XML->sys.niu.number_units); - set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units); - nius.power = niu->power*pppm_t; - power = power + nius.power; - set_pppm(pppm_t,XML->sys.niu.number_units*niu->niup.clockRate, XML->sys.niu.number_units,XML->sys.niu.number_units,XML->sys.niu.number_units); - nius.rt_power = niu->rt_power*pppm_t; - rt_power = rt_power + nius.rt_power; - - } - - if (XML->sys.pcie.number_units >0 && XML->sys.pcie.num_channels >0) - { - pcie = new PCIeController(XML, &interface_ip); - pcie->computeEnergy(); - pcie->computeEnergy(false); - pcies.area.set_area(pcies.area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units); - area.set_area(area.get_area()+pcie->area.get_area()*XML->sys.pcie.number_units); - set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units); - pcies.power = pcie->power*pppm_t; - power = power + pcies.power; - set_pppm(pppm_t,XML->sys.pcie.number_units*pcie->pciep.clockRate, XML->sys.pcie.number_units,XML->sys.pcie.number_units,XML->sys.pcie.number_units); - pcies.rt_power = pcie->rt_power*pppm_t; - rt_power = rt_power + pcies.rt_power; - - } - - if (numNOC >0) - { - for (i = 0;i < numNOC; i++) - { - if (XML->sys.NoC[i].type) - {//First add up area of routers if NoC is used - nocs.push_back(new NoC(XML,i, &interface_ip, 1)); - if (procdynp.homoNOC) - { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC); - area.set_area(area.get_area() + noc.area.get_area()); - } - else - { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); - area.set_area(area.get_area() + nocs[i]->area.get_area()); - } - } - else - {//Bus based interconnect - nocs.push_back(new NoC(XML,i, &interface_ip, 1, sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage))); - if (procdynp.homoNOC){ - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()*procdynp.numNOC); - area.set_area(area.get_area() + noc.area.get_area()); - } - else - { - noc.area.set_area(noc.area.get_area() + nocs[i]->area.get_area()); - area.set_area(area.get_area() + nocs[i]->area.get_area()); - } - } - } - - /* - * Compute global links associated with each NOC, if any. This must be done at the end (even after the NOC router part) since the total chip - * area must be obtain to decide the link routing - */ - for (i = 0;i < numNOC; i++) - { - if (nocs[i]->nocdynp.has_global_link && XML->sys.NoC[i].type) - { - nocs[i]->init_link_bus(sqrt(area.get_area()*XML->sys.NoC[i].chip_coverage));//compute global links - if (procdynp.homoNOC) - { - noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() - * nocs[i]->nocdynp.total_nodes - * procdynp.numNOC); - area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() - * nocs[i]->nocdynp.total_nodes - * procdynp.numNOC); - } - else - { - noc.area.set_area(noc.area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() - * nocs[i]->nocdynp.total_nodes); - area.set_area(area.get_area() + nocs[i]->link_bus_tot_per_Router.area.get_area() - * nocs[i]->nocdynp.total_nodes); - } - } - } - //Compute energy of NoC (w or w/o links) or buses - for (i = 0;i < numNOC; i++) - { - nocs[i]->computeEnergy(); - nocs[i]->computeEnergy(false); - if (procdynp.homoNOC){ - set_pppm(pppm_t,procdynp.numNOC*nocs[i]->nocdynp.clockRate, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC); - noc.power = noc.power + nocs[i]->power*pppm_t; - set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, procdynp.numNOC,procdynp.numNOC,procdynp.numNOC); - noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t; - power = power + noc.power; - rt_power = rt_power + noc.rt_power; - } - else - { - set_pppm(pppm_t,nocs[i]->nocdynp.clockRate, 1, 1, 1); - noc.power = noc.power + nocs[i]->power*pppm_t; - power = power + nocs[i]->power*pppm_t; - set_pppm(pppm_t,1/nocs[i]->nocdynp.executionTime, 1, 1, 1); - noc.rt_power = noc.rt_power + nocs[i]->rt_power*pppm_t; - rt_power = rt_power + nocs[i]->rt_power*pppm_t; - - - } - } - } - -// //clock power -// globalClock.init_wire_external(is_default, &interface_ip); -// globalClock.clk_area =area*1e6; //change it from mm^2 to um^2 -// globalClock.end_wiring_level =5;//toplevel metal -// globalClock.start_wiring_level =5;//toplevel metal -// globalClock.l_ip.with_clock_grid=false;//global clock does not drive local final nodes -// globalClock.optimize_wire(); - -} - -void Processor::displayDeviceType(int device_type_, uint32_t indent) -{ - string indent_str(indent, ' '); - - switch ( device_type_ ) { - - case 0 : - cout <sys.longer_channel_device; - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - if (is_tdp) - { - - if (plevel<5) - { - cout<<"\nMcPAT (version "<< VER_MAJOR <<"."<< VER_MINOR - << " of " << VER_UPDATE << ") results (current print level is "<< plevel - <<", please increase print level to see the details in components): "<sys.core_tech_node<<" nm"<sys.core[0].clock_rate<0){ - cout <sys.number_of_cores << " cores "<sys.device_type,indent); - cout << indent_str_next << "Area = " << core.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << core.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? core.power.readOp.longer_channel_leakage:core.power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << core.power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << core.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << core.rt_power.readOp.dynamic << " W" << endl; - cout <sys.Private_L2) - { - if (numL2 >0){ - cout <sys.L2[0].device_type,indent); - cout << indent_str_next << "Area = " << l2.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l2.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? l2.power.readOp.longer_channel_leakage:l2.power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << l2.power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << l2.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << l2.rt_power.readOp.dynamic << " W" << endl; - cout <0){ - cout <sys.L3[0].device_type, indent); - cout << indent_str_next << "Area = " << l3.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l3.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? l3.power.readOp.longer_channel_leakage:l3.power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << l3.power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << l3.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << l3.rt_power.readOp.dynamic << " W" << endl; - cout <0){ - cout <sys.L1Directory[0].device_type, indent); - cout << indent_str_next << "Area = " << l1dir.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l1dir.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? l1dir.power.readOp.longer_channel_leakage:l1dir.power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << l1dir.power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << l1dir.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << l1dir.rt_power.readOp.dynamic << " W" << endl; - cout <0){ - cout <sys.L1Directory[0].device_type, indent); - cout << indent_str_next << "Area = " << l2dir.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << l2dir.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? l2dir.power.readOp.longer_channel_leakage:l2dir.power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << l2dir.power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << l2dir.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << l2dir.rt_power.readOp.dynamic << " W" << endl; - cout <0){ - cout <sys.device_type, indent); - cout << indent_str_next << "Area = " << noc.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << noc.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? noc.power.readOp.longer_channel_leakage:noc.power.readOp.leakage) <<" W" << endl; - //cout << indent_str_next << "Subthreshold Leakage = " << noc.power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << noc.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << noc.rt_power.readOp.dynamic << " W" << endl; - cout <sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0) - { - cout <sys.mc.number_mcs << " Memory Controllers "<sys.device_type, indent); - cout << indent_str_next << "Area = " << mcs.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << mcs.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? mcs.power.readOp.longer_channel_leakage:mcs.power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << mcs.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << mcs.rt_power.readOp.dynamic << " W" << endl; - cout <sys.flashc.number_mcs >0) - { - cout <fcp.num_mcs << " Flash/SSD Controllers "<sys.device_type, indent); - cout << indent_str_next << "Area = " << flashcontrollers.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << flashcontrollers.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? flashcontrollers.power.readOp.longer_channel_leakage:flashcontrollers.power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << flashcontrollers.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << flashcontrollers.rt_power.readOp.dynamic << " W" << endl; - cout <sys.niu.number_units >0 ) - { - cout <niup.num_units << " Network Interface Units "<sys.device_type, indent); - cout << indent_str_next << "Area = " << nius.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << nius.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? nius.power.readOp.longer_channel_leakage:nius.power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << nius.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << nius.rt_power.readOp.dynamic << " W" << endl; - cout <sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0) - { - cout <pciep.num_units << " PCIe Controllers "<sys.device_type, indent); - cout << indent_str_next << "Area = " << pcies.area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str_next << "Peak Dynamic = " << pcies.power.readOp.dynamic << " W" << endl; - cout << indent_str_next << "Subthreshold Leakage = " - << (long_channel? pcies.power.readOp.longer_channel_leakage:pcies.power.readOp.leakage) <<" W" << endl; - cout << indent_str_next << "Gate Leakage = " << pcies.power.readOp.gate_leakage << " W" << endl; - cout << indent_str_next << "Runtime Dynamic = " << pcies.rt_power.readOp.dynamic << " W" << endl; - cout <1) - { - for (i = 0;i < numCore; i++) - { - cores[i]->displayEnergy(indent+4,plevel,is_tdp); - cout <<"*****************************************************************************************"<sys.Private_L2) - { - for (i = 0;i < numL2; i++) - { - l2array[i]->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<sys.mc.number_mcs >0 && XML->sys.mc.memory_channels_per_mc>0) - { - mc->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<sys.flashc.number_mcs >0 && XML->sys.flashc.memory_channels_per_mc>0) - { - flashcontroller->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<sys.niu.number_units >0 ) - { - niu->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<sys.pcie.number_units >0 && XML->sys.pcie.num_channels>0) - { - pcie->displayEnergy(indent+4,is_tdp); - cout <<"*****************************************************************************************"<displayEnergy(indent+4,plevel,is_tdp); - cout <<"*****************************************************************************************"<sys.homogeneous_cores); - procdynp.homoL2 = bool(debug?1:XML->sys.homogeneous_L2s); - procdynp.homoL3 = bool(debug?1:XML->sys.homogeneous_L3s); - procdynp.homoNOC = bool(debug?1:XML->sys.homogeneous_NoCs); - procdynp.homoL1Dir = bool(debug?1:XML->sys.homogeneous_L1Directories); - procdynp.homoL2Dir = bool(debug?1:XML->sys.homogeneous_L2Directories); - - procdynp.numCore = XML->sys.number_of_cores; - procdynp.numL2 = XML->sys.number_of_L2s; - procdynp.numL3 = XML->sys.number_of_L3s; - procdynp.numNOC = XML->sys.number_of_NoCs; - procdynp.numL1Dir = XML->sys.number_of_L1Directories; - procdynp.numL2Dir = XML->sys.number_of_L2Directories; - procdynp.numMC = XML->sys.mc.number_mcs; - procdynp.numMCChannel = XML->sys.mc.memory_channels_per_mc; - -// if (procdynp.numCore<1) -// { -// cout<<" The target processor should at least have one core on chip." <2) - // { - // cout <<"number of NOCs must be 1 (only global NOCs) or 2 (both global and local NOCs)"<sys.device_type; - interface_ip.data_arr_peri_global_tech_type = debug?0:XML->sys.device_type; - interface_ip.tag_arr_ram_cell_tech_type = debug?0:XML->sys.device_type; - interface_ip.tag_arr_peri_global_tech_type = debug?0:XML->sys.device_type; - - interface_ip.ic_proj_type = debug?0:XML->sys.interconnect_projection_type; - interface_ip.delay_wt = 100;//Fixed number, make sure timing can be satisfied. - interface_ip.area_wt = 0;//Fixed number, This is used to exhaustive search for individual components. - interface_ip.dynamic_power_wt = 100;//Fixed number, This is used to exhaustive search for individual components. - interface_ip.leakage_power_wt = 0; - interface_ip.cycle_time_wt = 0; - - interface_ip.delay_dev = 10000;//Fixed number, make sure timing can be satisfied. - interface_ip.area_dev = 10000;//Fixed number, This is used to exhaustive search for individual components. - interface_ip.dynamic_power_dev = 10000;//Fixed number, This is used to exhaustive search for individual components. - interface_ip.leakage_power_dev = 10000; - interface_ip.cycle_time_dev = 10000; - - interface_ip.ed = 2; - interface_ip.burst_len = 1;//parameters are fixed for processor section, since memory is processed separately - interface_ip.int_prefetch_w = 1; - interface_ip.page_sz_bits = 0; - interface_ip.temp = debug?360: XML->sys.temperature; - interface_ip.F_sz_nm = debug?90:XML->sys.core_tech_node;//XML->sys.core_tech_node; - interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000; - - //***********This section of code does not have real meaning, they are just to ensure all data will have initial value to prevent errors. - //They will be overridden during each components initialization - interface_ip.cache_sz =64; - interface_ip.line_sz = 1; - interface_ip.assoc = 1; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.specific_tag = 1; - interface_ip.tag_w = 64; - interface_ip.access_mode = 2; - - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - - interface_ip.is_main_mem = false; - interface_ip.rpters_in_htree = true ; - interface_ip.ver_htree_wires_over_array = 0; - interface_ip.broadcast_addr_din_over_ver_htrees = 0; - - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 1; - interface_ip.nuca = 0; - interface_ip.nuca_bank_count = 0; - interface_ip.is_cache =true; - interface_ip.pure_ram =false; - interface_ip.pure_cam =false; - interface_ip.force_cache_config =false; - if (XML->sys.Embedded) - { - interface_ip.wt =Global_30; - interface_ip.wire_is_mat_type = 0; - interface_ip.wire_os_mat_type = 0; - } - else - { - interface_ip.wt =Global; - interface_ip.wire_is_mat_type = 2; - interface_ip.wire_os_mat_type = 2; - } - interface_ip.force_wiretype = false; - interface_ip.print_detail = 1; - interface_ip.add_ecc_b_ =true; -} - -Processor::~Processor(){ - while (!cores.empty()) - { - delete cores.back(); - cores.pop_back(); - } - while (!l2array.empty()) - { - delete l2array.back(); - l2array.pop_back(); - } - while (!l3array.empty()) - { - delete l3array.back(); - l3array.pop_back(); - } - while (!nocs.empty()) - { - delete nocs.back(); - nocs.pop_back(); - } - if (!mc) - { - delete mc; - } - if (!niu) - { - delete niu; - } - if (!pcie) - { - delete pcie; - } - if (!flashcontroller) - { - delete flashcontroller; - } -}; diff --git a/ext/mcpat/processor.h b/ext/mcpat/processor.h deleted file mode 100644 index 5a7a2f7f5..000000000 --- a/ext/mcpat/processor.h +++ /dev/null @@ -1,79 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ -#ifndef PROCESSOR_H_ -#define PROCESSOR_H_ - -#include - -#include "XML_Parse.h" -#include "arbiter.h" -#include "area.h" -#include "array.h" -#include "basic_components.h" -#include "core.h" -#include "decoder.h" -#include "iocontrollers.h" -#include "memoryctrl.h" -#include "noc.h" -#include "parameter.h" -#include "router.h" -#include "sharedcache.h" - -class Processor : public Component -{ - public: - ParseXML *XML; - vector cores; - vector l2array; - vector l3array; - vector l1dirarray; - vector l2dirarray; - vector nocs; - MemoryController * mc; - NIUController * niu; - PCIeController * pcie; - FlashController * flashcontroller; - InputParameter interface_ip; - ProcParam procdynp; - //wire globalInterconnect; - //clock_network globalClock; - Component core, l2, l3, l1dir, l2dir, noc, mcs, cc, nius, pcies,flashcontrollers; - int numCore, numL2, numL3, numNOC, numL1Dir, numL2Dir; - Processor(ParseXML *XML_interface); - void compute(); - void set_proc_param(); - void displayEnergy(uint32_t indent = 0,int plevel = 100, bool is_tdp=true); - void displayDeviceType(int device_type_, uint32_t indent = 0); - void displayInterconnectType(int interconnect_type_, uint32_t indent = 0); - ~Processor(); -}; - -#endif /* PROCESSOR_H_ */ diff --git a/ext/mcpat/sharedcache.cc b/ext/mcpat/sharedcache.cc deleted file mode 100644 index 3a61e1b6d..000000000 --- a/ext/mcpat/sharedcache.cc +++ /dev/null @@ -1,1162 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ - -#include -#include -#include -#include -#include - -#include "XML_Parse.h" -#include "arbiter.h" -#include "array.h" -#include "basic_circuit.h" -#include "const.h" -#include "io.h" -#include "logic.h" -#include "parameter.h" -#include "sharedcache.h" - -SharedCache::SharedCache(ParseXML* XML_interface, int ithCache_, InputParameter* interface_ip_, enum cache_level cacheL_) -:XML(XML_interface), - ithCache(ithCache_), - interface_ip(*interface_ip_), - cacheL(cacheL_), - dir_overhead(0) -{ - int idx; - int tag, data; - bool is_default, debug; - enum Device_ty device_t; - enum Core_type core_t; - double size, line, assoc, banks; - if (cacheL==L2 && XML->sys.Private_L2) - { - device_t=Core_device; - core_t = (enum Core_type)XML->sys.core[ithCache].machine_type; - } - else - { - device_t=LLC_device; - core_t = Inorder; - } - - debug = false; - is_default=true;//indication for default setup - if (XML->sys.Embedded) - { - interface_ip.wt =Global_30; - interface_ip.wire_is_mat_type = 0; - interface_ip.wire_os_mat_type = 1; - } - else - { - interface_ip.wt =Global; - interface_ip.wire_is_mat_type = 2; - interface_ip.wire_os_mat_type = 2; - } - set_cache_param(); - - //All lower level cache are physically indexed and tagged. - size = cachep.capacity; - line = cachep.blockW; - assoc = cachep.assoc; - banks = cachep.nbanks; - if ((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory)) - { - assoc = 0; - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - interface_ip.num_search_ports = 1; - } - else - { - idx = debug?9:int(ceil(log2(size/line/assoc))); - tag = debug?51:XML->sys.physical_address_width-idx-int(ceil(log2(line))) + EXTRA_TAG_BITS; - interface_ip.num_search_ports = 0; - if (cachep.dir_ty==SBT) - { - dir_overhead = ceil(XML->sys.number_of_cores/8.0)*8/(cachep.blockW*8); - line = cachep.blockW*(1+ dir_overhead) ; - size = cachep.capacity*(1+ dir_overhead); - - } - } -// if (XML->sys.first_level_dir==2) -// tag += int(XML->sys.domain_size + 5); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = (int)size; - interface_ip.line_sz = (int)line; - interface_ip.assoc = (int)assoc; - interface_ip.nbanks = (int)banks; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 1; - interface_ip.throughput = cachep.throughput; - interface_ip.latency = cachep.latency; - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - interface_ip.pure_cam = false; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//lower level cache usually has one port. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; -// interface_ip.force_cache_config =true; -// interface_ip.ndwl = 4; -// interface_ip.ndbl = 8; -// interface_ip.nspd = 1; -// interface_ip.ndcm =1 ; -// interface_ip.ndsam1 =1; -// interface_ip.ndsam2 =1; - unicache.caches = new ArrayST(&interface_ip, cachep.name + "cache", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.caches->local_result.area); - area.set_area(area.get_area()+ unicache.caches->local_result.area); - interface_ip.force_cache_config =false; - - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + unicache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = cachep.missb_size*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.is_cache = true; - interface_ip.pure_ram = false; - interface_ip.pure_cam = false; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 0; - interface_ip.throughput = cachep.throughput;//means cycle time - interface_ip.latency = cachep.latency;//means access time - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - interface_ip.num_search_ports = 1; - unicache.missb = new ArrayST(&interface_ip, cachep.name + "MissB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.missb->local_result.area); - area.set_area(area.get_area()+ unicache.missb->local_result.area); - //fill buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = unicache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*cachep.fu_size ; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 0; - interface_ip.throughput = cachep.throughput; - interface_ip.latency = cachep.latency; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - unicache.ifb = new ArrayST(&interface_ip, cachep.name + "FillB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.ifb->local_result.area); - area.set_area(area.get_area()+ unicache.ifb->local_result.area); - //prefetch buffer - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = unicache.caches->l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = cachep.prefetchb_size*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 0; - interface_ip.throughput = cachep.throughput; - interface_ip.latency = cachep.latency; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - unicache.prefetchb = new ArrayST(&interface_ip, cachep.name + "PrefetchB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.prefetchb->local_result.area); - area.set_area(area.get_area()+ unicache.prefetchb->local_result.area); - //WBB - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = unicache.caches->l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = cachep.wbb_size*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8/2; - interface_ip.access_mode = 0; - interface_ip.throughput = cachep.throughput; - interface_ip.latency = cachep.latency; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - unicache.wbb = new ArrayST(&interface_ip, cachep.name + "WBB", device_t, true, core_t); - unicache.area.set_area(unicache.area.get_area()+ unicache.wbb->local_result.area); - area.set_area(area.get_area()+ unicache.wbb->local_result.area); - } - // //pipeline -// interface_ip.pipeline_stages = int(ceil(llCache.caches.local_result.access_time/llCache.caches.local_result.cycle_time)); -// interface_ip.per_stage_vector = llCache.caches.l_ip.out_w + llCache.caches.l_ip.tag_w ; -// pipeLogicCache.init_pipeline(is_default, &interface_ip); -// pipeLogicCache.compute_pipeline(); - - /* - if (!((XML->sys.number_of_dir_levels==1 && XML->sys.first_level_dir ==1) - ||(XML->sys.number_of_dir_levels==1 && XML->sys.first_level_dir ==2)))//not single level IC and DIC - { - //directory Now assuming one directory per bank, TODO:should change it later - size = XML->sys.L2directory.L2Dir_config[0]; - line = XML->sys.L2directory.L2Dir_config[1]; - assoc = XML->sys.L2directory.L2Dir_config[2]; - banks = XML->sys.L2directory.L2Dir_config[3]; - tag = debug?51:XML->sys.physical_address_width + EXTRA_TAG_BITS;//TODO: a little bit over estimate - interface_ip.specific_tag = 0; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.L2directory.L2Dir_config[0]; - interface_ip.line_sz = XML->sys.L2directory.L2Dir_config[1]; - interface_ip.assoc = XML->sys.L2directory.L2Dir_config[2]; - interface_ip.nbanks = XML->sys.L2directory.L2Dir_config[3]; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; - interface_ip.throughput = XML->sys.L2directory.L2Dir_config[4]/clockRate; - interface_ip.latency = XML->sys.L2directory.L2Dir_config[5]/clockRate; - interface_ip.is_cache = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//lower level cache usually has one port. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - - strcpy(directory.caches.name,"L2 Directory"); - directory.caches.init_cache(&interface_ip); - directory.caches.optimize_array(); - directory.area += directory.caches.local_result.area; - //output_data_csv(directory.caches.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + directory.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[0]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;//means cycle time - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate;//means access time - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory.missb.name,"directoryMissB"); - directory.missb.init_cache(&interface_ip); - directory.missb.optimize_array(); - directory.area += directory.missb.local_result.area; - //output_data_csv(directory.missb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = directory.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*XML->sys.L2[ithCache].buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory.ifb.name,"directoryFillB"); - directory.ifb.init_cache(&interface_ip); - directory.ifb.optimize_array(); - directory.area += directory.ifb.local_result.area; - //output_data_csv(directory.ifb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = directory.caches.l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[2]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory.prefetchb.name,"directoryPrefetchB"); - directory.prefetchb.init_cache(&interface_ip); - directory.prefetchb.optimize_array(); - directory.area += directory.prefetchb.local_result.area; - //output_data_csv(directory.prefetchb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = directory.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[3]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory.wbb.name,"directoryWBB"); - directory.wbb.init_cache(&interface_ip); - directory.wbb.optimize_array(); - directory.area += directory.wbb.local_result.area; - } - - if (XML->sys.number_of_dir_levels ==2 && XML->sys.first_level_dir==0) - { - //first level directory - size = XML->sys.L2directory.L2Dir_config[0]*XML->sys.domain_size/128; - line = int(ceil(XML->sys.domain_size/8.0)); - assoc = XML->sys.L2directory.L2Dir_config[2]; - banks = XML->sys.L2directory.L2Dir_config[3]; - tag = debug?51:XML->sys.physical_address_width + EXTRA_TAG_BITS;//TODO: a little bit over estimate - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.cache_sz = XML->sys.L2directory.L2Dir_config[0]; - interface_ip.line_sz = XML->sys.L2directory.L2Dir_config[1]; - interface_ip.assoc = XML->sys.L2directory.L2Dir_config[2]; - interface_ip.nbanks = XML->sys.L2directory.L2Dir_config[3]; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0;//debug?0:XML->sys.core[ithCore].icache.icache_config[5]; - interface_ip.throughput = XML->sys.L2directory.L2Dir_config[4]/clockRate; - interface_ip.latency = XML->sys.L2directory.L2Dir_config[5]/clockRate; - interface_ip.is_cache = true; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1;//lower level cache usually has one port. - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - - strcpy(directory1.caches.name,"first level Directory"); - directory1.caches.init_cache(&interface_ip); - directory1.caches.optimize_array(); - directory1.area += directory1.caches.local_result.area; - //output_data_csv(directory.caches.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = (XML->sys.physical_address_width) + int(ceil(log2(size/line))) + directory1.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = int(ceil(data/8.0));//int(ceil(pow(2.0,ceil(log2(data)))/8.0)); - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[0]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate;//means cycle time - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate;//means access time - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory1.missb.name,"directory1MissB"); - directory1.missb.init_cache(&interface_ip); - directory1.missb.optimize_array(); - directory1.area += directory1.missb.local_result.area; - //output_data_csv(directory.missb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = directory1.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = data*XML->sys.L2[ithCache].buffer_sizes[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory1.ifb.name,"directory1FillB"); - directory1.ifb.init_cache(&interface_ip); - directory1.ifb.optimize_array(); - directory1.area += directory1.ifb.local_result.area; - //output_data_csv(directory.ifb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS;//check with previous entries to decide wthether to merge. - data = directory1.caches.l_ip.line_sz;//separate queue to prevent from cache polution. - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data;//int(pow(2.0,ceil(log2(data)))); - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[2]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory1.prefetchb.name,"directory1PrefetchB"); - directory1.prefetchb.init_cache(&interface_ip); - directory1.prefetchb.optimize_array(); - directory1.area += directory1.prefetchb.local_result.area; - //output_data_csv(directory.prefetchb.local_result); - ///cout<<"area="<sys.physical_address_width + EXTRA_TAG_BITS; - data = directory1.caches.l_ip.line_sz; - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = XML->sys.L2[ithCache].buffer_sizes[3]*interface_ip.line_sz; - interface_ip.assoc = 0; - interface_ip.nbanks = 1; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(directory1.wbb.name,"directoryWBB"); - directory1.wbb.init_cache(&interface_ip); - directory1.wbb.optimize_array(); - directory1.area += directory1.wbb.local_result.area; - } - - if (XML->sys.first_level_dir==1)//IC - { - tag = XML->sys.physical_address_width + EXTRA_TAG_BITS; - data = int(ceil(XML->sys.domain_size/8.0)); - interface_ip.specific_tag = 1; - interface_ip.tag_w = tag; - interface_ip.line_sz = data; - interface_ip.cache_sz = XML->sys.domain_size*data*XML->sys.L2[ithCache].L2_config[0]/XML->sys.L2[ithCache].L2_config[1]; - interface_ip.assoc = 0; - interface_ip.nbanks = 1024; - interface_ip.out_w = interface_ip.line_sz*8; - interface_ip.access_mode = 0; - interface_ip.throughput = XML->sys.L2[ithCache].L2_config[4]/clockRate; - interface_ip.latency = XML->sys.L2[ithCache].L2_config[5]/clockRate; - interface_ip.obj_func_dyn_energy = 0; - interface_ip.obj_func_dyn_power = 0; - interface_ip.obj_func_leak_power = 0; - interface_ip.obj_func_cycle_t = 1; - interface_ip.num_rw_ports = 1; - interface_ip.num_rd_ports = 0; - interface_ip.num_wr_ports = 0; - interface_ip.num_se_rd_ports = 0; - strcpy(inv_dir.caches.name,"inv_dir"); - inv_dir.caches.init_cache(&interface_ip); - inv_dir.caches.optimize_array(); - inv_dir.area = inv_dir.caches.local_result.area; - - } -*/ -// //pipeline -// interface_ip.pipeline_stages = int(ceil(directory.caches.local_result.access_time/directory.caches.local_result.cycle_time)); -// interface_ip.per_stage_vector = directory.caches.l_ip.out_w + directory.caches.l_ip.tag_w ; -// pipeLogicDirectory.init_pipeline(is_default, &interface_ip); -// pipeLogicDirectory.compute_pipeline(); -// -// //clock power -// clockNetwork.init_wire_external(is_default, &interface_ip); -// clockNetwork.clk_area =area*1.1;//10% of placement overhead. rule of thumb -// clockNetwork.end_wiring_level =5;//toplevel metal -// clockNetwork.start_wiring_level =5;//toplevel metal -// clockNetwork.num_regs = pipeLogicCache.tot_stage_vector + pipeLogicDirectory.tot_stage_vector; -// clockNetwork.optimize_wire(); - -} - - -void SharedCache::computeEnergy(bool is_tdp) -{ - double homenode_data_access = (cachep.dir_ty==SBT)? 0.9:1.0; - if (is_tdp) - { - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - //init stats for Peak - unicache.caches->stats_t.readAc.access = .67*unicache.caches->l_ip.num_rw_ports*cachep.duty_cycle*homenode_data_access; - unicache.caches->stats_t.readAc.miss = 0; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = .33*unicache.caches->l_ip.num_rw_ports*cachep.duty_cycle*homenode_data_access; - unicache.caches->stats_t.writeAc.miss = 0; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->tdp_stats = unicache.caches->stats_t; - - if (cachep.dir_ty==SBT) - { - homenode_stats_t.readAc.access = .67*unicache.caches->l_ip.num_rw_ports*cachep.dir_duty_cycle*(1-homenode_data_access); - homenode_stats_t.readAc.miss = 0; - homenode_stats_t.readAc.hit = homenode_stats_t.readAc.access - homenode_stats_t.readAc.miss; - homenode_stats_t.writeAc.access = .67*unicache.caches->l_ip.num_rw_ports*cachep.dir_duty_cycle*(1-homenode_data_access); - homenode_stats_t.writeAc.miss = 0; - homenode_stats_t.writeAc.hit = homenode_stats_t.writeAc.access - homenode_stats_t.writeAc.miss; - homenode_tdp_stats = homenode_stats_t; - } - - unicache.missb->stats_t.readAc.access = unicache.missb->l_ip.num_search_ports; - unicache.missb->stats_t.writeAc.access = unicache.missb->l_ip.num_search_ports; - unicache.missb->tdp_stats = unicache.missb->stats_t; - - unicache.ifb->stats_t.readAc.access = unicache.ifb->l_ip.num_search_ports; - unicache.ifb->stats_t.writeAc.access = unicache.ifb->l_ip.num_search_ports; - unicache.ifb->tdp_stats = unicache.ifb->stats_t; - - unicache.prefetchb->stats_t.readAc.access = unicache.prefetchb->l_ip.num_search_ports; - unicache.prefetchb->stats_t.writeAc.access = unicache.ifb->l_ip.num_search_ports; - unicache.prefetchb->tdp_stats = unicache.prefetchb->stats_t; - - unicache.wbb->stats_t.readAc.access = unicache.wbb->l_ip.num_search_ports; - unicache.wbb->stats_t.writeAc.access = unicache.wbb->l_ip.num_search_ports; - unicache.wbb->tdp_stats = unicache.wbb->stats_t; - } - else - { - unicache.caches->stats_t.readAc.access = unicache.caches->l_ip.num_search_ports*cachep.duty_cycle; - unicache.caches->stats_t.readAc.miss = 0; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = 0; - unicache.caches->stats_t.writeAc.miss = 0; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->tdp_stats = unicache.caches->stats_t; - - } - - } - else - { - //init stats for runtime power (RTP) - if (cacheL==L2) - { - unicache.caches->stats_t.readAc.access = XML->sys.L2[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = XML->sys.L2[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = XML->sys.L2[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = XML->sys.L2[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; - - if (cachep.dir_ty==SBT) - { - homenode_rtp_stats.readAc.access = XML->sys.L2[ithCache].homenode_read_accesses; - homenode_rtp_stats.readAc.miss = XML->sys.L2[ithCache].homenode_read_misses; - homenode_rtp_stats.readAc.hit = homenode_rtp_stats.readAc.access - homenode_rtp_stats.readAc.miss; - homenode_rtp_stats.writeAc.access = XML->sys.L2[ithCache].homenode_write_accesses; - homenode_rtp_stats.writeAc.miss = XML->sys.L2[ithCache].homenode_write_misses; - homenode_rtp_stats.writeAc.hit = homenode_rtp_stats.writeAc.access - homenode_rtp_stats.writeAc.miss; - } - } - else if (cacheL==L3) - { - unicache.caches->stats_t.readAc.access = XML->sys.L3[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = XML->sys.L3[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = XML->sys.L3[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = XML->sys.L3[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; - - if (cachep.dir_ty==SBT) - { - homenode_rtp_stats.readAc.access = XML->sys.L3[ithCache].homenode_read_accesses; - homenode_rtp_stats.readAc.miss = XML->sys.L3[ithCache].homenode_read_misses; - homenode_rtp_stats.readAc.hit = homenode_rtp_stats.readAc.access - homenode_rtp_stats.readAc.miss; - homenode_rtp_stats.writeAc.access = XML->sys.L3[ithCache].homenode_write_accesses; - homenode_rtp_stats.writeAc.miss = XML->sys.L3[ithCache].homenode_write_misses; - homenode_rtp_stats.writeAc.hit = homenode_rtp_stats.writeAc.access - homenode_rtp_stats.writeAc.miss; - } - } - else if (cacheL==L1Directory) - { - unicache.caches->stats_t.readAc.access = XML->sys.L1Directory[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = XML->sys.L1Directory[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = XML->sys.L1Directory[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = XML->sys.L1Directory[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; - } - else if (cacheL==L2Directory) - { - unicache.caches->stats_t.readAc.access = XML->sys.L2Directory[ithCache].read_accesses; - unicache.caches->stats_t.readAc.miss = XML->sys.L2Directory[ithCache].read_misses; - unicache.caches->stats_t.readAc.hit = unicache.caches->stats_t.readAc.access - unicache.caches->stats_t.readAc.miss; - unicache.caches->stats_t.writeAc.access = XML->sys.L2Directory[ithCache].write_accesses; - unicache.caches->stats_t.writeAc.miss = XML->sys.L2Directory[ithCache].write_misses; - unicache.caches->stats_t.writeAc.hit = unicache.caches->stats_t.writeAc.access - unicache.caches->stats_t.writeAc.miss; - unicache.caches->rtp_stats = unicache.caches->stats_t; - } - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { //Assuming write back and write-allocate cache - - unicache.missb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss ; - unicache.missb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.missb->rtp_stats = unicache.missb->stats_t; - - unicache.ifb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.ifb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.ifb->rtp_stats = unicache.ifb->stats_t; - - unicache.prefetchb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.prefetchb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.prefetchb->rtp_stats = unicache.prefetchb->stats_t; - - unicache.wbb->stats_t.readAc.access = unicache.caches->stats_t.writeAc.miss; - unicache.wbb->stats_t.writeAc.access = unicache.caches->stats_t.writeAc.miss; - if (cachep.dir_ty==SBT) - { - unicache.missb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.missb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.missb->rtp_stats = unicache.missb->stats_t; - - unicache.missb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.missb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.missb->rtp_stats = unicache.missb->stats_t; - - unicache.ifb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.ifb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.ifb->rtp_stats = unicache.ifb->stats_t; - - unicache.prefetchb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.prefetchb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - unicache.prefetchb->rtp_stats = unicache.prefetchb->stats_t; - - unicache.wbb->stats_t.readAc.access += homenode_rtp_stats.writeAc.miss; - unicache.wbb->stats_t.writeAc.access += homenode_rtp_stats.writeAc.miss; - } - unicache.wbb->rtp_stats = unicache.wbb->stats_t; - - } - - } - - unicache.power_t.reset(); - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - unicache.power_t.readOp.dynamic += (unicache.caches->stats_t.readAc.hit*unicache.caches->local_result.power.readOp.dynamic+ - unicache.caches->stats_t.readAc.miss*unicache.caches->local_result.tag_array2->power.readOp.dynamic+ - unicache.caches->stats_t.writeAc.miss*unicache.caches->local_result.tag_array2->power.writeOp.dynamic+ - unicache.caches->stats_t.writeAc.access*unicache.caches->local_result.power.writeOp.dynamic);//write miss will also generate a write later - - if (cachep.dir_ty==SBT) - { - unicache.power_t.readOp.dynamic += homenode_stats_t.readAc.hit * (unicache.caches->local_result.data_array2->power.readOp.dynamic*dir_overhead + - unicache.caches->local_result.tag_array2->power.readOp.dynamic) + - homenode_stats_t.readAc.miss*unicache.caches->local_result.tag_array2->power.readOp.dynamic + - homenode_stats_t.writeAc.miss*unicache.caches->local_result.tag_array2->power.readOp.dynamic + - homenode_stats_t.writeAc.hit*(unicache.caches->local_result.data_array2->power.writeOp.dynamic*dir_overhead + - unicache.caches->local_result.tag_array2->power.readOp.dynamic+ - homenode_stats_t.writeAc.miss*unicache.caches->local_result.power.writeOp.dynamic);//write miss on dynamic home node will generate a replacement write on whole cache block - - - } - - unicache.power_t.readOp.dynamic += unicache.missb->stats_t.readAc.access*unicache.missb->local_result.power.searchOp.dynamic + - unicache.missb->stats_t.writeAc.access*unicache.missb->local_result.power.writeOp.dynamic;//each access to missb involves a CAM and a write - unicache.power_t.readOp.dynamic += unicache.ifb->stats_t.readAc.access*unicache.ifb->local_result.power.searchOp.dynamic + - unicache.ifb->stats_t.writeAc.access*unicache.ifb->local_result.power.writeOp.dynamic; - unicache.power_t.readOp.dynamic += unicache.prefetchb->stats_t.readAc.access*unicache.prefetchb->local_result.power.searchOp.dynamic + - unicache.prefetchb->stats_t.writeAc.access*unicache.prefetchb->local_result.power.writeOp.dynamic; - unicache.power_t.readOp.dynamic += unicache.wbb->stats_t.readAc.access*unicache.wbb->local_result.power.searchOp.dynamic + - unicache.wbb->stats_t.writeAc.access*unicache.wbb->local_result.power.writeOp.dynamic; - } - else - { - unicache.power_t.readOp.dynamic += (unicache.caches->stats_t.readAc.access*unicache.caches->local_result.power.searchOp.dynamic+ - unicache.caches->stats_t.writeAc.access*unicache.caches->local_result.power.writeOp.dynamic); - } - - if (is_tdp) - { - unicache.power = unicache.power_t + (unicache.caches->local_result.power)*pppm_lkg; - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - unicache.power = unicache.power+ - (unicache.missb->local_result.power + - unicache.ifb->local_result.power + - unicache.prefetchb->local_result.power + - unicache.wbb->local_result.power)*pppm_lkg; - } - power = power + unicache.power; -// cout<<"unicache.caches->local_result.power.readOp.dynamic"<local_result.power.readOp.dynamic<local_result.power.writeOp.dynamic"<local_result.power.writeOp.dynamic<local_result.power)*pppm_lkg; - if (!((cachep.dir_ty==ST&& cacheL==L1Directory)||(cachep.dir_ty==ST&& cacheL==L2Directory))) - { - (unicache.rt_power = unicache.rt_power + - unicache.missb->local_result.power + - unicache.ifb->local_result.power + - unicache.prefetchb->local_result.power + - unicache.wbb->local_result.power)*pppm_lkg; - } - rt_power = rt_power + unicache.rt_power; - } -} - -void SharedCache::displayEnergy(uint32_t indent,bool is_tdp) -{ - string indent_str(indent, ' '); - string indent_str_next(indent+2, ' '); - bool long_channel = XML->sys.longer_channel_device; - - if (is_tdp) - { - cout << (XML->sys.Private_L2? indent_str:"")<< cachep.name << endl; - cout << indent_str << "Area = " << area.get_area()*1e-6<< " mm^2" << endl; - cout << indent_str << "Peak Dynamic = " << power.readOp.dynamic*cachep.clockRate << " W" << endl; - cout << indent_str << "Subthreshold Leakage = " - << (long_channel? power.readOp.longer_channel_leakage:power.readOp.leakage) <<" W" << endl; - //cout << indent_str << "Subthreshold Leakage = " << power.readOp.longer_channel_leakage <<" W" << endl; - cout << indent_str << "Gate Leakage = " << power.readOp.gate_leakage << " W" << endl; - cout << indent_str << "Runtime Dynamic = " << rt_power.readOp.dynamic/cachep.executionTime << " W" << endl; - cout <sys.first_level_dir==1) -// { -// inv_dir.maxPower = inv_dir.caches.local_result.power.searchOp.dynamic*clockRate*XML->sys.domain_size; -// cc.power.readOp.dynamic = inv_dir.maxPower*scktRatio*64/XML->sys.domain_size; -// cc.power.readOp.leakage = inv_dir.caches.local_result.power.readOp.leakage*inv_dir.caches.l_ip.nbanks*64/XML->sys.domain_size; -// -// cc.area.set_area(inv_dir.area*64/XML->sys.domain_size); -// cout<<"CC area="<sys.number_of_dir_levels==2) -// { -// -// directory.maxPower = 0.0; -// directory.maxPower += (directory.caches.l_ip.num_rw_ports*(0.67*directory.caches.local_result.power.readOp.dynamic+0.33*directory.caches.local_result.power.writeOp.dynamic) -// +directory.caches.l_ip.num_rd_ports*directory.caches.local_result.power.readOp.dynamic+directory.caches.l_ip.num_wr_ports*directory.caches.local_result.power.writeOp.dynamic -// +directory.caches.l_ip.num_se_rd_ports*directory.caches.local_result.power.readOp.dynamic)*clockRate; -// ///cout<<"directory.maxPower=" <sys.L2[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - interface_ip.data_arr_ram_cell_tech_type = XML->sys.L2[ithCache].device_type;//long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = XML->sys.L2[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L2[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = XML->sys.L2[ithCache].device_type; - cachep.capacity = XML->sys.L2[ithCache].L2_config[0]; - cachep.blockW = XML->sys.L2[ithCache].L2_config[1]; - cachep.assoc = XML->sys.L2[ithCache].L2_config[2]; - cachep.nbanks = XML->sys.L2[ithCache].L2_config[3]; - cachep.throughput = XML->sys.L2[ithCache].L2_config[4]/cachep.clockRate; - cachep.latency = XML->sys.L2[ithCache].L2_config[5]/cachep.clockRate; - cachep.missb_size = XML->sys.L2[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L2[ithCache].buffer_sizes[1]; - cachep.prefetchb_size= XML->sys.L2[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L2[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L2[ithCache].duty_cycle; - if (!XML->sys.L2[ithCache].merged_dir) - { - cachep.dir_ty = NonDir; - } - else - { - cachep.dir_ty = SBT; - cachep.dir_duty_cycle = XML->sys.L2[ithCache].dir_duty_cycle; - } - } - else if (cacheL==L3) - { - cachep.name = "L3"; - cachep.clockRate = XML->sys.L3[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - interface_ip.data_arr_ram_cell_tech_type = XML->sys.L3[ithCache].device_type;//long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = XML->sys.L3[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L3[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = XML->sys.L3[ithCache].device_type; - cachep.capacity = XML->sys.L3[ithCache].L3_config[0]; - cachep.blockW = XML->sys.L3[ithCache].L3_config[1]; - cachep.assoc = XML->sys.L3[ithCache].L3_config[2]; - cachep.nbanks = XML->sys.L3[ithCache].L3_config[3]; - cachep.throughput = XML->sys.L3[ithCache].L3_config[4]/cachep.clockRate; - cachep.latency = XML->sys.L3[ithCache].L3_config[5]/cachep.clockRate; - cachep.missb_size = XML->sys.L3[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L3[ithCache].buffer_sizes[1]; - cachep.prefetchb_size= XML->sys.L3[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L3[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L3[ithCache].duty_cycle; - if (!XML->sys.L2[ithCache].merged_dir) - { - cachep.dir_ty = NonDir; - } - else - { - cachep.dir_ty = SBT; - cachep.dir_duty_cycle = XML->sys.L2[ithCache].dir_duty_cycle; - } - } - else if (cacheL==L1Directory) - { - cachep.name = "First Level Directory"; - cachep.dir_ty = (enum Dir_type) XML->sys.L1Directory[ithCache].Directory_type; - cachep.clockRate = XML->sys.L1Directory[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - interface_ip.data_arr_ram_cell_tech_type = XML->sys.L1Directory[ithCache].device_type;//long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = XML->sys.L1Directory[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L1Directory[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = XML->sys.L1Directory[ithCache].device_type; - cachep.capacity = XML->sys.L1Directory[ithCache].Dir_config[0]; - cachep.blockW = XML->sys.L1Directory[ithCache].Dir_config[1]; - cachep.assoc = XML->sys.L1Directory[ithCache].Dir_config[2]; - cachep.nbanks = XML->sys.L1Directory[ithCache].Dir_config[3]; - cachep.throughput = XML->sys.L1Directory[ithCache].Dir_config[4]/cachep.clockRate; - cachep.latency = XML->sys.L1Directory[ithCache].Dir_config[5]/cachep.clockRate; - cachep.missb_size = XML->sys.L1Directory[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L1Directory[ithCache].buffer_sizes[1]; - cachep.prefetchb_size= XML->sys.L1Directory[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L1Directory[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L1Directory[ithCache].duty_cycle; - } - else if (cacheL==L2Directory) - { - cachep.name = "Second Level Directory"; - cachep.dir_ty = (enum Dir_type) XML->sys.L2Directory[ithCache].Directory_type; - cachep.clockRate = XML->sys.L2Directory[ithCache].clockrate; - cachep.clockRate *= 1e6; - cachep.executionTime = XML->sys.total_cycles/(XML->sys.target_core_clockrate*1e6); - interface_ip.data_arr_ram_cell_tech_type = XML->sys.L2Directory[ithCache].device_type;//long channel device LSTP - interface_ip.data_arr_peri_global_tech_type = XML->sys.L2Directory[ithCache].device_type; - interface_ip.tag_arr_ram_cell_tech_type = XML->sys.L2Directory[ithCache].device_type; - interface_ip.tag_arr_peri_global_tech_type = XML->sys.L2Directory[ithCache].device_type; - cachep.capacity = XML->sys.L2Directory[ithCache].Dir_config[0]; - cachep.blockW = XML->sys.L2Directory[ithCache].Dir_config[1]; - cachep.assoc = XML->sys.L2Directory[ithCache].Dir_config[2]; - cachep.nbanks = XML->sys.L2Directory[ithCache].Dir_config[3]; - cachep.throughput = XML->sys.L2Directory[ithCache].Dir_config[4]/cachep.clockRate; - cachep.latency = XML->sys.L2Directory[ithCache].Dir_config[5]/cachep.clockRate; - cachep.missb_size = XML->sys.L2Directory[ithCache].buffer_sizes[0]; - cachep.fu_size = XML->sys.L2Directory[ithCache].buffer_sizes[1]; - cachep.prefetchb_size= XML->sys.L2Directory[ithCache].buffer_sizes[2]; - cachep.wbb_size = XML->sys.L2Directory[ithCache].buffer_sizes[3]; - cachep.duty_cycle = XML->sys.L2Directory[ithCache].duty_cycle; - } - //cachep.cache_duty_cycle=cachep.dir_duty_cycle = 0.35; -} - diff --git a/ext/mcpat/sharedcache.h b/ext/mcpat/sharedcache.h deleted file mode 100644 index 923408482..000000000 --- a/ext/mcpat/sharedcache.h +++ /dev/null @@ -1,89 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ - -#ifndef SHAREDCACHE_H_ -#define SHAREDCACHE_H_ -#include - -#include "XML_Parse.h" -#include "area.h" -#include "array.h" -#include "basic_components.h" -#include "logic.h" -#include "parameter.h" - -class SharedCache :public Component{ - public: - ParseXML * XML; - int ithCache; - InputParameter interface_ip; - enum cache_level cacheL; - DataCache unicache;//Shared cache - CacheDynParam cachep; - statsDef homenode_tdp_stats; - statsDef homenode_rtp_stats; - statsDef homenode_stats_t; - double dir_overhead; - // cache_processor llCache,directory, directory1, inv_dir; - - //pipeline pipeLogicCache, pipeLogicDirectory; - //clock_network clockNetwork; - double scktRatio, executionTime; - // Component L2Tot, cc, cc1, ccTot; - - SharedCache(ParseXML *XML_interface, int ithCache_, InputParameter* interface_ip_,enum cache_level cacheL_ =L2); - void set_cache_param(); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,bool is_tdp=true); - ~SharedCache(){}; -}; - -class CCdir :public Component{ - public: - ParseXML * XML; - int ithCache; - InputParameter interface_ip; - DataCache dc;//Shared cache - ArrayST * shadow_dir; -// cache_processor llCache,directory, directory1, inv_dir; - - //pipeline pipeLogicCache, pipeLogicDirectory; - //clock_network clockNetwork; - double scktRatio, clockRate, executionTime; - Component L2Tot, cc, cc1, ccTot; - - CCdir(ParseXML *XML_interface, int ithCache_, InputParameter* interface_ip_); - void computeEnergy(bool is_tdp=true); - void displayEnergy(uint32_t indent = 0,bool is_tdp=true); - ~CCdir(); -}; - -#endif /* SHAREDCACHE_H_ */ diff --git a/ext/mcpat/system.cc b/ext/mcpat/system.cc new file mode 100644 index 000000000..657f7f38d --- /dev/null +++ b/ext/mcpat/system.cc @@ -0,0 +1,350 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Joel Hestness + * Yasuko Eckert + * + ***************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include "array.h" +#include "basic_circuit.h" +#include "common.h" +#include "const.h" +#include "parameter.h" +#include "system.h" +#include "version.h" + +// TODO: Fix this constructor to default initialize all pointers to NULL +System::System(XMLNode* _xml_data) + : McPATComponent(_xml_data) { + int i; + int currCore = 0; + int currNOC = 0; + name = "System"; + set_proc_param(); + + // TODO: This loop can (and should) be called by every component in + // the hierarchy. Consider moving it to McPATComponent + int numChildren = xml_data->nChildNode("component"); + for (i = 0; i < numChildren; i++ ) { + // For each child node of the system, + XMLNode* childXML = xml_data->getChildNodePtr("component", &i); + XMLCSTR type = childXML->getAttribute("type"); + + if (!type) { + warnMissingComponentType(childXML->getAttribute("id")); + + } STRCMP(type, "Core") { + // TODO: If homogeneous cores, and currCore > 0, just copy core 0 + children.push_back(new Core(childXML, currCore, &interface_ip)); + currCore++; + } STRCMP(type, "CacheUnit") { + children.push_back(new CacheUnit(childXML, &interface_ip)); + } STRCMP(type, "CacheController") { + // TODO: Remove reliance on interface_ip - there should be a better + // way to share global variables than passing, copying + children.push_back(new CacheController(childXML, &interface_ip)); + } STRCMP(type, "MemoryController") { + children.push_back(new MemoryController(childXML, &interface_ip)); + } STRCMP(type, "FlashController") { + children.push_back(new FlashController(childXML, &interface_ip)); + } STRCMP(type, "NIUController") { + children.push_back(new NIUController(childXML, &interface_ip)); + } STRCMP(type, "PCIeController") { + children.push_back(new PCIeController(childXML, &interface_ip)); + } STRCMP(type, "Memory") { + // TODO: + warnIncompleteComponentType(type); + } STRCMP(type, "OnChipNetwork") { + // TODO: Many of the parameters to this constructor should be + // handled in another way + children.push_back(new OnChipNetwork(childXML, currNOC, + &interface_ip)); + currNOC++; + warnIncompleteComponentType(type); + } STRCMP(type, "BusInterconnect") { + // TODO: Many of the parameters to this constructor should be + // handled in another way + children.push_back(new BusInterconnect(childXML, &interface_ip)); + warnIncompleteComponentType(type); + + // TODO: Add a directory data type that can handle the directories + // as defined by certain McScript output + } else { + warnUnrecognizedComponent(type); + } + } +} + +void System::displayDeviceType(int device_type_, uint32_t indent) { + string indent_str(indent, ' '); + cout << indent_str << "Device Type = "; + + switch ( device_type_ ) { + case 0: + cout << "ITRS high performance device type" << endl; + break; + case 1: + cout << "ITRS low standby power device type" << endl; + break; + case 2: + cout << "ITRS low operating power device type" << endl; + break; + case 3: + cout << "LP-DRAM device type" << endl; + break; + case 4: + cout << "COMM-DRAM device type" << endl; + break; + default: + cout << indent_str << "Unknown!" << endl; + exit(0); + } +} + +void System::displayInterconnectType(int interconnect_type_, uint32_t indent) { + string indent_str(indent, ' '); + cout << indent_str << "Interconnect metal projection = "; + + switch ( interconnect_type_ ) { + case 0: + cout << "aggressive interconnect technology projection" << endl; + break; + case 1: + cout << "conservative interconnect technology projection" << endl; + break; + default: + cout << indent_str << "Unknown!" << endl; + exit(0); + } +} + +// TODO: Migrate this down to the McPATComponent::displayData function +void System::displayData(uint32_t indent, int plevel) { + string indent_str(indent, ' '); + string indent_str_next(indent + 2, ' '); + if (plevel < 5) { + cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR + << " of " << VER_UPDATE << ") results (current print level is " + << plevel + << ", please increase print level to see the details in " + << "components) " << endl; + } else { + cout << "\nMcPAT (version " << VER_MAJOR << "." << VER_MINOR + << " of " << VER_UPDATE << ") results (current print level is 5)" + << endl; + } + + cout << "*****************************************************************" + << "************************" << endl; + cout << indent_str << "Technology " << core_tech_node << " nm" << endl; + if (longer_channel_device) + cout << indent_str << "Using Long Channel Devices When Appropriate" << endl; + displayInterconnectType(interconnect_projection_type, indent); + cout << indent_str << "Target Clock Rate (MHz) " << target_core_clockrate / 1e6 << endl; + cout << endl; + + cout << "*****************************************************************" + << "************************" << endl; + + McPATComponent::displayData(indent, plevel); +} + +void System::set_proc_param() { + // TODO: Consider creating a SystemParams class that tracks system-wide + // parameters like these + longer_channel_device = false; + core_tech_node = -1; + temperature = -1; + interconnect_projection_type = -1; + device_type = -1; + physical_address_width = -1; + + int num_children = xml_data->nChildNode("param"); + int i; + for (i = 0; i < num_children; i++) { + XMLNode* paramNode = xml_data->getChildNodePtr("param", &i); + XMLCSTR node_name = paramNode->getAttribute("name"); + XMLCSTR value = paramNode->getAttribute("value"); + + if (!node_name) + warnMissingParamName(paramNode->getAttribute("id")); + + ASSIGN_FP_IF("core_tech_node", core_tech_node); + ASSIGN_INT_IF("target_core_clockrate", target_core_clockrate); + ASSIGN_INT_IF("temperature", temperature); + ASSIGN_INT_IF("device_type", device_type); + ASSIGN_INT_IF("longer_channel_device", longer_channel_device); + ASSIGN_INT_IF("interconnect_projection_type", + interconnect_projection_type); + ASSIGN_INT_IF("machine_bits", data_path_width); + ASSIGN_INT_IF("virtual_address_width", virtual_address_width); + ASSIGN_INT_IF("physical_address_width", physical_address_width); + ASSIGN_INT_IF("virtual_memory_page_size", virtual_memory_page_size); + ASSIGN_INT_IF("wire_is_mat_type", interface_ip.wire_is_mat_type); + ASSIGN_INT_IF("wire_os_mat_type", interface_ip.wire_os_mat_type); + ASSIGN_INT_IF("delay_wt", interface_ip.delay_wt); + ASSIGN_INT_IF("area_wt", interface_ip.area_wt); + ASSIGN_INT_IF("dynamic_power_wt", interface_ip.dynamic_power_wt); + ASSIGN_INT_IF("leakage_power_wt", interface_ip.leakage_power_wt); + ASSIGN_INT_IF("cycle_time_wt", interface_ip.cycle_time_wt); + ASSIGN_INT_IF("delay_dev", interface_ip.delay_dev); + ASSIGN_INT_IF("area_dev", interface_ip.area_dev); + ASSIGN_INT_IF("dynamic_power_dev", interface_ip.dynamic_power_dev); + ASSIGN_INT_IF("leakage_power_dev", interface_ip.leakage_power_dev); + ASSIGN_INT_IF("cycle_time_dev", interface_ip.cycle_time_dev); + ASSIGN_INT_IF("ed", interface_ip.ed); + ASSIGN_INT_IF("burst_len", interface_ip.burst_len); + ASSIGN_INT_IF("int_prefetch_w", interface_ip.int_prefetch_w); + ASSIGN_INT_IF("page_sz_bits", interface_ip.page_sz_bits); + ASSIGN_ENUM_IF("rpters_in_htree", interface_ip.rpters_in_htree, bool); + ASSIGN_INT_IF("ver_htree_wires_over_array", + interface_ip.ver_htree_wires_over_array); + ASSIGN_INT_IF("broadcast_addr_din_over_ver_htrees", + interface_ip.broadcast_addr_din_over_ver_htrees); + ASSIGN_INT_IF("nuca", interface_ip.nuca); + ASSIGN_INT_IF("nuca_bank_count", interface_ip.nuca_bank_count); + ASSIGN_ENUM_IF("force_cache_config", + interface_ip.force_cache_config, bool); + ASSIGN_ENUM_IF("wt", interface_ip.wt, Wire_type); + ASSIGN_INT_IF("force_wiretype", interface_ip.force_wiretype); + ASSIGN_INT_IF("print_detail", interface_ip.print_detail); + ASSIGN_ENUM_IF("add_ecc_b_", interface_ip.add_ecc_b_, bool); + + else { + warnUnrecognizedParam(node_name); + } + } + + // Change from MHz to Hz + target_core_clockrate *= 1e6; + interconnect_projection_type = + (interconnect_projection_type == 0) ? 0 : 1; + + num_children = xml_data->nChildNode("stat"); + for (i = 0; i < num_children; i++) { + XMLNode* statNode = xml_data->getChildNodePtr("stat", &i); + XMLCSTR node_name = statNode->getAttribute("name"); + XMLCSTR value = statNode->getAttribute("value"); + + if (!node_name) + warnMissingStatName(statNode->getAttribute("id")); + + ASSIGN_FP_IF("total_cycles", total_cycles); + + else { + warnUnrecognizedStat(node_name); + } + } + + if (temperature < 0) { + errorUnspecifiedParam("temperature"); + } + + if (core_tech_node < 0) { + errorUnspecifiedParam("core_tech_node"); + } + + if (interconnect_projection_type < 0) { + errorUnspecifiedParam("interconnect_projection_type"); + } + + if (device_type < 0) { + errorUnspecifiedParam("device_type"); + } + + if (physical_address_width <= 0) { + errorNonPositiveParam("physical_address_width"); + } + + if (data_path_width <= 0) { + errorNonPositiveParam("machine_bits"); + } + + if (total_cycles <= 0) { + fprintf(stderr, "WARNING: total_cycles <= 0 in system component, ", + "power numbers will be funky...\n"); + } + + clockRate = target_core_clockrate; + execution_time = total_cycles / (target_core_clockrate); + + /* Basic parameters*/ + interface_ip.data_arr_ram_cell_tech_type = device_type; + interface_ip.data_arr_peri_global_tech_type = device_type; + interface_ip.tag_arr_ram_cell_tech_type = device_type; + interface_ip.tag_arr_peri_global_tech_type = device_type; + + interface_ip.ic_proj_type = interconnect_projection_type; + interface_ip.temp = temperature; + interface_ip.F_sz_nm = core_tech_node; + interface_ip.F_sz_um = interface_ip.F_sz_nm / 1000; + interface_ip.is_main_mem = false; + + // These are there just to make CACTI's error_checking() happy. + // They are either not actually used or overwritten by each component. + interface_ip.cache_sz = MIN_BUFFER_SIZE; + interface_ip.nbanks = 1; + interface_ip.out_w = 0; + interface_ip.line_sz = 1; + interface_ip.assoc = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_search_ports = 1; + interface_ip.is_cache = true; + interface_ip.pure_ram = false; + interface_ip.pure_cam = false; + + + //This section of code does not have real meaning; it is just to ensure + //all data will have initial value to prevent errors. + //They will be overridden during each components initialization + interface_ip.specific_tag = 1; + interface_ip.tag_w = 64; + interface_ip.access_mode = 2; + + interface_ip.obj_func_dyn_energy = 0; + interface_ip.obj_func_dyn_power = 0; + interface_ip.obj_func_leak_power = 0; + interface_ip.obj_func_cycle_t = 1; + interface_ip.num_rw_ports = 1; + interface_ip.num_rd_ports = 0; + interface_ip.num_wr_ports = 0; + interface_ip.num_se_rd_ports = 0; +} + +System::~System() { + // TODO: Delete children... do this in McPATComponent +}; diff --git a/ext/mcpat/system.h b/ext/mcpat/system.h new file mode 100644 index 000000000..d2e263720 --- /dev/null +++ b/ext/mcpat/system.h @@ -0,0 +1,71 @@ +/***************************************************************************** + * McPAT + * SOFTWARE LICENSE AGREEMENT + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. + * All Rights Reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Joel Hestness + * Yasuko Eckert + * + ***************************************************************************/ + +#ifndef SYSTEM_H_ +#define SYSTEM_H_ + +#include "arbiter.h" +#include "area.h" +#include "array.h" +#include "basic_components.h" +#include "bus_interconnect.h" +#include "cachecontroller.h" +#include "cacheunit.h" +#include "core.h" +#include "decoder.h" +#include "iocontrollers.h" +#include "memoryctrl.h" +#include "noc.h" +#include "parameter.h" +#include "router.h" + +class System : public McPATComponent { +public: + InputParameter interface_ip; + + int device_type; + double core_tech_node; + int interconnect_projection_type; + int temperature; + + System(XMLNode* _xml_data); + void set_proc_param(); + // TODO: make this recursively compute energy on subcomponents + void displayData(uint32_t indent = 0, int plevel = 100); + void displayDeviceType(int device_type_, uint32_t indent = 0); + void displayInterconnectType(int interconnect_type_, uint32_t indent = 0); + ~System(); +}; + +#endif /* SYSTEM_H_ */ diff --git a/ext/mcpat/technology_xeon_core.cc b/ext/mcpat/technology_xeon_core.cc deleted file mode 100644 index 4e60edc1b..000000000 --- a/ext/mcpat/technology_xeon_core.cc +++ /dev/null @@ -1,2772 +0,0 @@ -/***************************************************************************** - * McPAT - * SOFTWARE LICENSE AGREEMENT - * Copyright 2012 Hewlett-Packard Development Company, L.P. - * All Rights Reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer; - * redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution; - * neither the name of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.” - * - ***************************************************************************/ - - -#include "basic_circuit.h" - -#include "parameter.h" - -double wire_resistance(double resistivity, double wire_width, double wire_thickness, - double barrier_thickness, double dishing_thickness, double alpha_scatter) -{ - double resistance; - resistance = alpha_scatter * resistivity /((wire_thickness - barrier_thickness - dishing_thickness)*(wire_width - 2 * barrier_thickness)); - return(resistance); -} - -double wire_capacitance(double wire_width, double wire_thickness, double wire_spacing, - double ild_thickness, double miller_value, double horiz_dielectric_constant, - double vert_dielectric_constant, double fringe_cap) -{ - double vertical_cap, sidewall_cap, total_cap; - vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness; - sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * horiz_dielectric_constant * wire_thickness / wire_spacing; - total_cap = vertical_cap + sidewall_cap + fringe_cap; - return(total_cap); -} - - -void init_tech_params(double technology, bool is_tag) -{ - int iter, tech, tech_lo, tech_hi; - double curr_alpha, curr_vpp; - double wire_width, wire_thickness, wire_spacing, - fringe_cap, pmos_to_nmos_sizing_r; -// double aspect_ratio,ild_thickness, miller_value = 1.5, horiz_dielectric_constant, vert_dielectric_constant; - double barrier_thickness, dishing_thickness, alpha_scatter; - double curr_vdd_dram_cell, curr_v_th_dram_access_transistor, curr_I_on_dram_cell, curr_c_dram_cell; - - uint32_t ram_cell_tech_type = (is_tag) ? g_ip->tag_arr_ram_cell_tech_type : g_ip->data_arr_ram_cell_tech_type; - uint32_t peri_global_tech_type = (is_tag) ? g_ip->tag_arr_peri_global_tech_type : g_ip->data_arr_peri_global_tech_type; - - technology = technology * 1000.0; // in the unit of nm - - // initialize parameters - g_tp.reset(); - double gmp_to_gmn_multiplier_periph_global = 0; - - double curr_Wmemcella_dram, curr_Wmemcellpmos_dram, curr_Wmemcellnmos_dram, - curr_area_cell_dram, curr_asp_ratio_cell_dram, curr_Wmemcella_sram, - curr_Wmemcellpmos_sram, curr_Wmemcellnmos_sram, curr_area_cell_sram, - curr_asp_ratio_cell_sram, curr_I_off_dram_cell_worst_case_length_temp; - double curr_Wmemcella_cam, curr_Wmemcellpmos_cam, curr_Wmemcellnmos_cam, curr_area_cell_cam,//Sheng: CAM data - curr_asp_ratio_cell_cam; - double SENSE_AMP_D, SENSE_AMP_P; // J - double area_cell_dram = 0; - double asp_ratio_cell_dram = 0; - double area_cell_sram = 0; - double asp_ratio_cell_sram = 0; - double area_cell_cam = 0; - double asp_ratio_cell_cam = 0; - double mobility_eff_periph_global = 0; - double Vdsat_periph_global = 0; - double nmos_effective_resistance_multiplier; - double width_dram_access_transistor; - - double curr_logic_scaling_co_eff = 0;//This is based on the reported numbers of Intel Merom 65nm, Penryn45nm and IBM cell 90/65/45 date - double curr_core_tx_density = 0;//this is density per um^2; 90, ...22nm based on Intel Penryn - double curr_chip_layout_overhead = 0; - double curr_macro_layout_overhead = 0; - double curr_sckt_co_eff = 0; - - if (technology < 91 && technology > 89) - { - tech_lo = 90; - tech_hi = 90; - } - else if (technology < 66 && technology > 64) - { - tech_lo = 65; - tech_hi = 65; - } - else if (technology < 46 && technology > 44) - { - tech_lo = 45; - tech_hi = 45; - } - else if (technology < 33 && technology > 31) - { - tech_lo = 32; - tech_hi = 32; - } - else if (technology < 23 && technology > 21) - { - tech_lo = 22; - tech_hi = 22; - if (ram_cell_tech_type == 3) - { - cout<<"current version does not support eDRAM technologies at 22nm"< 15) -// { -// tech_lo = 16; -// tech_hi = 16; -// } - else if (technology < 90 && technology > 65) - { - tech_lo = 90; - tech_hi = 65; - } - else if (technology < 65 && technology > 45) - { - tech_lo = 65; - tech_hi = 45; - } - else if (technology < 45 && technology > 32) - { - tech_lo = 45; - tech_hi = 32; - } - else if (technology < 32 && technology > 22) - { - tech_lo = 32; - tech_hi = 22; - } -// else if (technology < 22 && technology > 16) -// { -// tech_lo = 22; -// tech_hi = 16; -// } - else - { - cout<<"Invalid technology nodes"<F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um;//360 - curr_asp_ratio_cell_cam = 2.92;//2.5 - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 1; - curr_core_tx_density = 1.25*0.7*0.7; - curr_sckt_co_eff = 1.1539; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - - - } - - if (tech == 65) - { //65nm technology-node. Corresponds to year 2007 in ITRS - //ITRS HP device type -// SENSE_AMP_D = .2e-9; // s -// SENSE_AMP_P = 5.7e-15; // J -// vdd[0] = 1.1; -// Lphy[0] = 0.025; -// Lelec[0] = 0.019; -// t_ox[0] = 1.1e-3; -// v_th[0] = .19491; -// c_ox[0] = 1.88e-14; -// mobility_eff[0] = 436.24 * (1e-2 * 1e6 * 1e-2 * 1e6); -// Vdsat[0] = 7.71e-2; -// c_g_ideal[0] = 4.69e-16; -// c_fringe[0] = 0.077e-15; -// c_junc[0] = 1e-15; -// I_on_n[0] = 1197.2e-6; -// I_on_p[0] = 870.8e-6; -// nmos_effective_resistance_multiplier = 1.50; -// n_to_p_eff_curr_drv_ratio[0] = 2.41; -// gmp_to_gmn_multiplier[0] = 1.38; -// Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0]; -// Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; -// long_channel_leakage_reduction[0] = 1/3.74; -// //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first -// //Ioff(Lgate normal)/Ioff(Lgate long)= 3.74. -// I_off_n[0][0] = 1.96e-7; -// I_off_n[0][10] = 2.29e-7; -// I_off_n[0][20] = 2.66e-7; -// I_off_n[0][30] = 3.05e-7; -// I_off_n[0][40] = 3.49e-7; -// I_off_n[0][50] = 3.95e-7; -// I_off_n[0][60] = 4.45e-7; -// I_off_n[0][70] = 4.97e-7; -// I_off_n[0][80] = 5.48e-7; -// I_off_n[0][90] = 5.94e-7; -// I_off_n[0][100] = 6.3e-7; -// I_g_on_n[0][0] = 4.09e-8;//A/micron -// I_g_on_n[0][10] = 4.09e-8; -// I_g_on_n[0][20] = 4.09e-8; -// I_g_on_n[0][30] = 4.09e-8; -// I_g_on_n[0][40] = 4.09e-8; -// I_g_on_n[0][50] = 4.09e-8; -// I_g_on_n[0][60] = 4.09e-8; -// I_g_on_n[0][70] = 4.09e-8; -// I_g_on_n[0][80] = 4.09e-8; -// I_g_on_n[0][90] = 4.09e-8; -// I_g_on_n[0][100] = 4.09e-8; - - SENSE_AMP_D = .2e-9; // s - SENSE_AMP_P = 5.7e-15; // J - vdd[0] = 1.25; - Lphy[0] = 0.025; - Lelec[0] = 0.019; - t_ox[0] = 1.1e-3; - v_th[0] = .12491; - c_ox[0] = 1.88e-14; - mobility_eff[0] = 409.31 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[0] = 9.08e-2; - c_g_ideal[0] = 4.72e-16; - c_fringe[0] = 0.08e-15; - c_junc[0] = 1e-15; - I_on_n[0] = 1486.4e-6; - I_on_p[0] = 1131.5e-6; - nmos_effective_resistance_multiplier = 1.57; - n_to_p_eff_curr_drv_ratio[0] = 2; - gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0]; - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; - long_channel_leakage_reduction[0] = 1.0/4.97; - //Using MASTAR, @380K, increase Lgate until Ion reduces to 90% or Lgate increase by 10%, whichever comes first - //Ioff(Lgate normal)/Ioff(Lgate long)= 4.97@Vdd=1.25; (3.74@Vdd=1.1), however, Intel paper suggest the reduction factor is 3. - I_off_n[0][0] = 8.62e-7; - I_off_n[0][10] = 9.08e-7; - I_off_n[0][20] = 9.55e-7; - I_off_n[0][30] = 1.00e-6; - I_off_n[0][40] = 1.05e-6; - I_off_n[0][50] = 1.09e-6; - I_off_n[0][60] = 1.14e-6; - I_off_n[0][70] = 1.18e-6; - I_off_n[0][80] = 1.23e-6; - I_off_n[0][90] = 1.27e-6; - I_off_n[0][100] = 1.31e-6; - - - I_g_on_n[0][0] = 7.02e-8;//A/micron - I_g_on_n[0][10] = 7.02e-8; - I_g_on_n[0][20] = 7.02e-8; - I_g_on_n[0][30] = 7.02e-8; - I_g_on_n[0][40] = 7.02e-8; - I_g_on_n[0][50] = 7.02e-8; - I_g_on_n[0][60] = 7.02e-8; - I_g_on_n[0][70] = 7.02e-8; - I_g_on_n[0][80] = 7.02e-8; - I_g_on_n[0][90] = 7.02e-8; - I_g_on_n[0][100] = 7.02e-8; - - //ITRS LSTP device type - vdd[1] = 1.2; - Lphy[1] = 0.045; - Lelec[1] = 0.0298; - t_ox[1] = 1.9e-3; - v_th[1] = 0.52354; - c_ox[1] = 1.36e-14; - mobility_eff[1] = 341.21 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[1] = 0.128; - c_g_ideal[1] = 6.14e-16; - c_fringe[1] = 0.08e-15; - c_junc[1] = 1e-15; - I_on_n[1] = 519.2e-6; - I_on_p[1] = 266e-6; - nmos_effective_resistance_multiplier = 1.96; - n_to_p_eff_curr_drv_ratio[1] = 2.23; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/2.82; - I_off_n[1][0] = 9.12e-12; - I_off_n[1][10] = 1.49e-11; - I_off_n[1][20] = 2.36e-11; - I_off_n[1][30] = 3.64e-11; - I_off_n[1][40] = 5.48e-11; - I_off_n[1][50] = 8.05e-11; - I_off_n[1][60] = 1.15e-10; - I_off_n[1][70] = 1.59e-10; - I_off_n[1][80] = 2.1e-10; - I_off_n[1][90] = 2.62e-10; - I_off_n[1][100] = 3.21e-10; - - I_g_on_n[1][0] = 1.09e-10;//A/micron - I_g_on_n[1][10] = 1.09e-10; - I_g_on_n[1][20] = 1.09e-10; - I_g_on_n[1][30] = 1.09e-10; - I_g_on_n[1][40] = 1.09e-10; - I_g_on_n[1][50] = 1.09e-10; - I_g_on_n[1][60] = 1.09e-10; - I_g_on_n[1][70] = 1.09e-10; - I_g_on_n[1][80] = 1.09e-10; - I_g_on_n[1][90] = 1.09e-10; - I_g_on_n[1][100] = 1.09e-10; - - //ITRS LOP device type - vdd[2] = 0.8; - Lphy[2] = 0.032; - Lelec[2] = 0.0216; - t_ox[2] = 1.2e-3; - v_th[2] = 0.28512; - c_ox[2] = 1.87e-14; - mobility_eff[2] = 495.19 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[2] = 0.292; - c_g_ideal[2] = 6e-16; - c_fringe[2] = 0.08e-15; - c_junc[2] = 1e-15; - I_on_n[2] = 573.1e-6; - I_on_p[2] = 340.6e-6; - nmos_effective_resistance_multiplier = 1.82; - n_to_p_eff_curr_drv_ratio[2] = 2.28; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/2.05; - I_off_n[2][0] = 4.9e-9; - I_off_n[2][10] = 6.49e-9; - I_off_n[2][20] = 8.45e-9; - I_off_n[2][30] = 1.08e-8; - I_off_n[2][40] = 1.37e-8; - I_off_n[2][50] = 1.71e-8; - I_off_n[2][60] = 2.09e-8; - I_off_n[2][70] = 2.48e-8; - I_off_n[2][80] = 2.84e-8; - I_off_n[2][90] = 3.13e-8; - I_off_n[2][100] = 3.42e-8; - - I_g_on_n[2][0] = 9.61e-9;//A/micron - I_g_on_n[2][10] = 9.61e-9; - I_g_on_n[2][20] = 9.61e-9; - I_g_on_n[2][30] = 9.61e-9; - I_g_on_n[2][40] = 9.61e-9; - I_g_on_n[2][50] = 9.61e-9; - I_g_on_n[2][60] = 9.61e-9; - I_g_on_n[2][70] = 9.61e-9; - I_g_on_n[2][80] = 9.61e-9; - I_g_on_n[2][90] = 9.61e-9; - I_g_on_n[2][100] = 9.61e-9; - - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.2; - Lphy[3] = 0.12; - Lelec[3] = 0.0756; - curr_v_th_dram_access_transistor = 0.43806; - width_dram_access_transistor = 0.09; - curr_I_on_dram_cell = 36e-6; - curr_I_off_dram_cell_worst_case_length_temp = 19.6e-12; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 0.11; - curr_asp_ratio_cell_dram = 1.46; - curr_c_dram_cell = 20e-15; - - //LP-DRAM wordline transistor parameters - curr_vpp = 1.6; - t_ox[3] = 2.2e-3; - v_th[3] = 0.43806; - c_ox[3] = 1.22e-14; - mobility_eff[3] = 328.32 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.43806; - c_g_ideal[3] = 1.46e-15; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15 ; - I_on_n[3] = 399.8e-6; - I_on_p[3] = 243.4e-6; - nmos_effective_resistance_multiplier = 1.65; - n_to_p_eff_curr_drv_ratio[3] = 2.05; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 2.23e-11; - I_off_n[3][10] = 3.46e-11; - I_off_n[3][20] = 5.24e-11; - I_off_n[3][30] = 7.75e-11; - I_off_n[3][40] = 1.12e-10; - I_off_n[3][50] = 1.58e-10; - I_off_n[3][60] = 2.18e-10; - I_off_n[3][70] = 2.88e-10; - I_off_n[3][80] = 3.63e-10; - I_off_n[3][90] = 4.41e-10; - I_off_n[3][100] = 5.36e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.3; - Lphy[3] = 0.065; - Lelec[3] = 0.0426; - curr_v_th_dram_access_transistor = 1; - width_dram_access_transistor = 0.065; - curr_I_on_dram_cell = 20e-6; - curr_I_off_dram_cell_worst_case_length_temp = 1e-15; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.065*0.065; - curr_asp_ratio_cell_dram = 1.5; - curr_c_dram_cell = 30e-15; - - //COMM-DRAM wordline transistor parameters - curr_vpp = 3.3; - t_ox[3] = 5e-3; - v_th[3] = 1.0; - c_ox[3] = 6.16e-15; - mobility_eff[3] = 303.44 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.385; - c_g_ideal[3] = 4e-16; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15 ; - I_on_n[3] = 1031e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.69; - n_to_p_eff_curr_drv_ratio[3] = 2.39; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.80e-14; - I_off_n[3][10] = 3.64e-14; - I_off_n[3][20] = 7.03e-14; - I_off_n[3][30] = 1.31e-13; - I_off_n[3][40] = 2.35e-13; - I_off_n[3][50] = 4.09e-13; - I_off_n[3][60] = 6.89e-13; - I_off_n[3][70] = 1.13e-12; - I_off_n[3][80] = 1.78e-12; - I_off_n[3][90] = 2.71e-12; - I_off_n[3][100] = 3.99e-12; - } - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7; - curr_core_tx_density = 1.25*0.7; - curr_sckt_co_eff = 1.1359; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } - - if (tech == 45) - { //45nm technology-node. Corresponds to year 2010 in ITRS - //ITRS HP device type - SENSE_AMP_D = .04e-9; // s - SENSE_AMP_P = 2.7e-15; // J - vdd[0] = 1.0; - Lphy[0] = 0.018; - Lelec[0] = 0.01345; - t_ox[0] = 0.65e-3; - v_th[0] = .18035; - c_ox[0] = 3.77e-14; - mobility_eff[0] = 266.68 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[0] = 9.38E-2; - c_g_ideal[0] = 6.78e-16; - c_fringe[0] = 0.05e-15; - c_junc[0] = 1e-15; - I_on_n[0] = 2046.6e-6; - //There are certain problems with the ITRS PMOS numbers in MASTAR for 45nm. So we are using 65nm values of - //n_to_p_eff_curr_drv_ratio and gmp_to_gmn_multiplier for 45nm - I_on_p[0] = I_on_n[0] / 2;//This value is fixed arbitrarily but I_on_p is not being used in CACTI - nmos_effective_resistance_multiplier = 1.51; - n_to_p_eff_curr_drv_ratio[0] = 2.41; - gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0]; - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0]; - long_channel_leakage_reduction[0] = 1/3.546;//Using MASTAR, @380K, increase Lgate until Ion reduces to 90%, Ioff(Lgate normal)/Ioff(Lgate long)= 3.74 - I_off_n[0][0] = 2.8e-7; - I_off_n[0][10] = 3.28e-7; - I_off_n[0][20] = 3.81e-7; - I_off_n[0][30] = 4.39e-7; - I_off_n[0][40] = 5.02e-7; - I_off_n[0][50] = 5.69e-7; - I_off_n[0][60] = 6.42e-7; - I_off_n[0][70] = 7.2e-7; - I_off_n[0][80] = 8.03e-7; - I_off_n[0][90] = 8.91e-7; - I_off_n[0][100] = 9.84e-7; - - I_g_on_n[0][0] = 3.59e-8;//A/micron - I_g_on_n[0][10] = 3.59e-8; - I_g_on_n[0][20] = 3.59e-8; - I_g_on_n[0][30] = 3.59e-8; - I_g_on_n[0][40] = 3.59e-8; - I_g_on_n[0][50] = 3.59e-8; - I_g_on_n[0][60] = 3.59e-8; - I_g_on_n[0][70] = 3.59e-8; - I_g_on_n[0][80] = 3.59e-8; - I_g_on_n[0][90] = 3.59e-8; - I_g_on_n[0][100] = 3.59e-8; - - //ITRS LSTP device type - vdd[1] = 1.1; - Lphy[1] = 0.028; - Lelec[1] = 0.0212; - t_ox[1] = 1.4e-3; - v_th[1] = 0.50245; - c_ox[1] = 2.01e-14; - mobility_eff[1] = 363.96 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[1] = 9.12e-2; - c_g_ideal[1] = 5.18e-16; - c_fringe[1] = 0.08e-15; - c_junc[1] = 1e-15; - I_on_n[1] = 666.2e-6; - I_on_p[1] = I_on_n[1] / 2; - nmos_effective_resistance_multiplier = 1.99; - n_to_p_eff_curr_drv_ratio[1] = 2.23; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/2.08; - I_off_n[1][0] = 1.01e-11; - I_off_n[1][10] = 1.65e-11; - I_off_n[1][20] = 2.62e-11; - I_off_n[1][30] = 4.06e-11; - I_off_n[1][40] = 6.12e-11; - I_off_n[1][50] = 9.02e-11; - I_off_n[1][60] = 1.3e-10; - I_off_n[1][70] = 1.83e-10; - I_off_n[1][80] = 2.51e-10; - I_off_n[1][90] = 3.29e-10; - I_off_n[1][100] = 4.1e-10; - - I_g_on_n[1][0] = 9.47e-12;//A/micron - I_g_on_n[1][10] = 9.47e-12; - I_g_on_n[1][20] = 9.47e-12; - I_g_on_n[1][30] = 9.47e-12; - I_g_on_n[1][40] = 9.47e-12; - I_g_on_n[1][50] = 9.47e-12; - I_g_on_n[1][60] = 9.47e-12; - I_g_on_n[1][70] = 9.47e-12; - I_g_on_n[1][80] = 9.47e-12; - I_g_on_n[1][90] = 9.47e-12; - I_g_on_n[1][100] = 9.47e-12; - - //ITRS LOP device type - vdd[2] = 0.7; - Lphy[2] = 0.022; - Lelec[2] = 0.016; - t_ox[2] = 0.9e-3; - v_th[2] = 0.22599; - c_ox[2] = 2.82e-14;//F/micron2 - mobility_eff[2] = 508.9 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[2] = 5.71e-2; - c_g_ideal[2] = 6.2e-16; - c_fringe[2] = 0.073e-15; - c_junc[2] = 1e-15; - I_on_n[2] = 748.9e-6; - I_on_p[2] = I_on_n[2] / 2; - nmos_effective_resistance_multiplier = 1.76; - n_to_p_eff_curr_drv_ratio[2] = 2.28; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/1.92; - I_off_n[2][0] = 4.03e-9; - I_off_n[2][10] = 5.02e-9; - I_off_n[2][20] = 6.18e-9; - I_off_n[2][30] = 7.51e-9; - I_off_n[2][40] = 9.04e-9; - I_off_n[2][50] = 1.08e-8; - I_off_n[2][60] = 1.27e-8; - I_off_n[2][70] = 1.47e-8; - I_off_n[2][80] = 1.66e-8; - I_off_n[2][90] = 1.84e-8; - I_off_n[2][100] = 2.03e-8; - - I_g_on_n[2][0] = 3.24e-8;//A/micron - I_g_on_n[2][10] = 4.01e-8; - I_g_on_n[2][20] = 4.90e-8; - I_g_on_n[2][30] = 5.92e-8; - I_g_on_n[2][40] = 7.08e-8; - I_g_on_n[2][50] = 8.38e-8; - I_g_on_n[2][60] = 9.82e-8; - I_g_on_n[2][70] = 1.14e-7; - I_g_on_n[2][80] = 1.29e-7; - I_g_on_n[2][90] = 1.43e-7; - I_g_on_n[2][100] = 1.54e-7; - - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.1; - Lphy[3] = 0.078; - Lelec[3] = 0.0504;// Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. - curr_v_th_dram_access_transistor = 0.44559; - width_dram_access_transistor = 0.079; - curr_I_on_dram_cell = 36e-6;//A - curr_I_off_dram_cell_worst_case_length_temp = 19.5e-12; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0; - curr_asp_ratio_cell_dram = 1.46; - curr_c_dram_cell = 20e-15; - - //LP-DRAM wordline transistor parameters - curr_vpp = 1.5; - t_ox[3] = 2.1e-3; - v_th[3] = 0.44559; - c_ox[3] = 1.41e-14; - mobility_eff[3] = 426.30 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.181; - c_g_ideal[3] = 1.10e-15; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 456e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.65; - n_to_p_eff_curr_drv_ratio[3] = 2.05; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 2.54e-11; - I_off_n[3][10] = 3.94e-11; - I_off_n[3][20] = 5.95e-11; - I_off_n[3][30] = 8.79e-11; - I_off_n[3][40] = 1.27e-10; - I_off_n[3][50] = 1.79e-10; - I_off_n[3][60] = 2.47e-10; - I_off_n[3][70] = 3.31e-10; - I_off_n[3][80] = 4.26e-10; - I_off_n[3][90] = 5.27e-10; - I_off_n[3][100] = 6.46e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.1; - Lphy[3] = 0.045; - Lelec[3] = 0.0298; - curr_v_th_dram_access_transistor = 1; - width_dram_access_transistor = 0.045; - curr_I_on_dram_cell = 20e-6;//A - curr_I_off_dram_cell_worst_case_length_temp = 1e-15; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.045*0.045; - curr_asp_ratio_cell_dram = 1.5; - curr_c_dram_cell = 30e-15; - - //COMM-DRAM wordline transistor parameters - curr_vpp = 2.7; - t_ox[3] = 4e-3; - v_th[3] = 1.0; - c_ox[3] = 7.98e-15; - mobility_eff[3] = 368.58 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.147; - c_g_ideal[3] = 3.59e-16; - c_fringe[3] = 0.08e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 999.4e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.69; - n_to_p_eff_curr_drv_ratio[3] = 1.95; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.31e-14; - I_off_n[3][10] = 2.68e-14; - I_off_n[3][20] = 5.25e-14; - I_off_n[3][30] = 9.88e-14; - I_off_n[3][40] = 1.79e-13; - I_off_n[3][50] = 3.15e-13; - I_off_n[3][60] = 5.36e-13; - I_off_n[3][70] = 8.86e-13; - I_off_n[3][80] = 1.42e-12; - I_off_n[3][90] = 2.20e-12; - I_off_n[3][100] = 3.29e-12; - } - - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7; - curr_core_tx_density = 1.25; - curr_sckt_co_eff = 1.1387; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } - - if (tech == 32) - { - SENSE_AMP_D = .03e-9; // s - SENSE_AMP_P = 2.16e-15; // J - //For 2013, MPU/ASIC stagger-contacted M1 half-pitch is 32 nm (so this is 32 nm - //technology i.e. FEATURESIZE = 0.032). Using the SOI process numbers for - //HP and LSTP. - vdd[0] = 0.9; - Lphy[0] = 0.013; - Lelec[0] = 0.01013; - t_ox[0] = 0.5e-3; - v_th[0] = 0.21835; - c_ox[0] = 4.11e-14; - mobility_eff[0] = 361.84 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[0] = 5.09E-2; - c_g_ideal[0] = 5.34e-16; - c_fringe[0] = 0.04e-15; - c_junc[0] = 1e-15; - I_on_n[0] = 2211.7e-6; - I_on_p[0] = I_on_n[0] / 2; - nmos_effective_resistance_multiplier = 1.49; - n_to_p_eff_curr_drv_ratio[0] = 2.41; - gmp_to_gmn_multiplier[0] = 1.38; - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/3.706; - //Using MASTAR, @300K (380K does not work in MASTAR), increase Lgate until Ion reduces to 95% or Lgate increase by 5% (DG device can only increase by 5%), - //whichever comes first - I_off_n[0][0] = 1.52e-7; - I_off_n[0][10] = 1.55e-7; - I_off_n[0][20] = 1.59e-7; - I_off_n[0][30] = 1.68e-7; - I_off_n[0][40] = 1.90e-7; - I_off_n[0][50] = 2.69e-7; - I_off_n[0][60] = 5.32e-7; - I_off_n[0][70] = 1.02e-6; - I_off_n[0][80] = 1.62e-6; - I_off_n[0][90] = 2.73e-6; - I_off_n[0][100] = 6.1e-6; - - I_g_on_n[0][0] = 6.55e-8;//A/micron - I_g_on_n[0][10] = 6.55e-8; - I_g_on_n[0][20] = 6.55e-8; - I_g_on_n[0][30] = 6.55e-8; - I_g_on_n[0][40] = 6.55e-8; - I_g_on_n[0][50] = 6.55e-8; - I_g_on_n[0][60] = 6.55e-8; - I_g_on_n[0][70] = 6.55e-8; - I_g_on_n[0][80] = 6.55e-8; - I_g_on_n[0][90] = 6.55e-8; - I_g_on_n[0][100] = 6.55e-8; - -// 32 DG -// I_g_on_n[0][0] = 2.71e-9;//A/micron -// I_g_on_n[0][10] = 2.71e-9; -// I_g_on_n[0][20] = 2.71e-9; -// I_g_on_n[0][30] = 2.71e-9; -// I_g_on_n[0][40] = 2.71e-9; -// I_g_on_n[0][50] = 2.71e-9; -// I_g_on_n[0][60] = 2.71e-9; -// I_g_on_n[0][70] = 2.71e-9; -// I_g_on_n[0][80] = 2.71e-9; -// I_g_on_n[0][90] = 2.71e-9; -// I_g_on_n[0][100] = 2.71e-9; - - //LSTP device type - vdd[1] = 1; - Lphy[1] = 0.020; - Lelec[1] = 0.0173; - t_ox[1] = 1.2e-3; - v_th[1] = 0.513; - c_ox[1] = 2.29e-14; - mobility_eff[1] = 347.46 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[1] = 8.64e-2; - c_g_ideal[1] = 4.58e-16; - c_fringe[1] = 0.053e-15; - c_junc[1] = 1e-15; - I_on_n[1] = 683.6e-6; - I_on_p[1] = I_on_n[1] / 2; - nmos_effective_resistance_multiplier = 1.99; - n_to_p_eff_curr_drv_ratio[1] = 2.23; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1]; - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1]; - long_channel_leakage_reduction[1] = 1/1.93; - I_off_n[1][0] = 2.06e-11; - I_off_n[1][10] = 3.30e-11; - I_off_n[1][20] = 5.15e-11; - I_off_n[1][30] = 7.83e-11; - I_off_n[1][40] = 1.16e-10; - I_off_n[1][50] = 1.69e-10; - I_off_n[1][60] = 2.40e-10; - I_off_n[1][70] = 3.34e-10; - I_off_n[1][80] = 4.54e-10; - I_off_n[1][90] = 5.96e-10; - I_off_n[1][100] = 7.44e-10; - - I_g_on_n[1][0] = 3.73e-11;//A/micron - I_g_on_n[1][10] = 3.73e-11; - I_g_on_n[1][20] = 3.73e-11; - I_g_on_n[1][30] = 3.73e-11; - I_g_on_n[1][40] = 3.73e-11; - I_g_on_n[1][50] = 3.73e-11; - I_g_on_n[1][60] = 3.73e-11; - I_g_on_n[1][70] = 3.73e-11; - I_g_on_n[1][80] = 3.73e-11; - I_g_on_n[1][90] = 3.73e-11; - I_g_on_n[1][100] = 3.73e-11; - - - //LOP device type - vdd[2] = 0.6; - Lphy[2] = 0.016; - Lelec[2] = 0.01232; - t_ox[2] = 0.9e-3; - v_th[2] = 0.24227; - c_ox[2] = 2.84e-14; - mobility_eff[2] = 513.52 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[2] = 4.64e-2; - c_g_ideal[2] = 4.54e-16; - c_fringe[2] = 0.057e-15; - c_junc[2] = 1e-15; - I_on_n[2] = 827.8e-6; - I_on_p[2] = I_on_n[2] / 2; - nmos_effective_resistance_multiplier = 1.73; - n_to_p_eff_curr_drv_ratio[2] = 2.28; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2]; - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2]; - long_channel_leakage_reduction[2] = 1/1.89; - I_off_n[2][0] = 5.94e-8; - I_off_n[2][10] = 7.23e-8; - I_off_n[2][20] = 8.7e-8; - I_off_n[2][30] = 1.04e-7; - I_off_n[2][40] = 1.22e-7; - I_off_n[2][50] = 1.43e-7; - I_off_n[2][60] = 1.65e-7; - I_off_n[2][70] = 1.90e-7; - I_off_n[2][80] = 2.15e-7; - I_off_n[2][90] = 2.39e-7; - I_off_n[2][100] = 2.63e-7; - - I_g_on_n[2][0] = 2.93e-9;//A/micron - I_g_on_n[2][10] = 2.93e-9; - I_g_on_n[2][20] = 2.93e-9; - I_g_on_n[2][30] = 2.93e-9; - I_g_on_n[2][40] = 2.93e-9; - I_g_on_n[2][50] = 2.93e-9; - I_g_on_n[2][60] = 2.93e-9; - I_g_on_n[2][70] = 2.93e-9; - I_g_on_n[2][80] = 2.93e-9; - I_g_on_n[2][90] = 2.93e-9; - I_g_on_n[2][100] = 2.93e-9; - - if (ram_cell_tech_type == lp_dram) - { - //LP-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.0; - Lphy[3] = 0.056; - Lelec[3] = 0.0419;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. - curr_v_th_dram_access_transistor = 0.44129; - width_dram_access_transistor = 0.056; - curr_I_on_dram_cell = 36e-6; - curr_I_off_dram_cell_worst_case_length_temp = 18.9e-12; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = width_dram_access_transistor * Lphy[3] * 10.0; - curr_asp_ratio_cell_dram = 1.46; - curr_c_dram_cell = 20e-15; - - //LP-DRAM wordline transistor parameters - curr_vpp = 1.5; - t_ox[3] = 2e-3; - v_th[3] = 0.44467; - c_ox[3] = 1.48e-14; - mobility_eff[3] = 408.12 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.174; - c_g_ideal[3] = 7.45e-16; - c_fringe[3] = 0.053e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 1055.4e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.65; - n_to_p_eff_curr_drv_ratio[3] = 2.05; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 3.57e-11; - I_off_n[3][10] = 5.51e-11; - I_off_n[3][20] = 8.27e-11; - I_off_n[3][30] = 1.21e-10; - I_off_n[3][40] = 1.74e-10; - I_off_n[3][50] = 2.45e-10; - I_off_n[3][60] = 3.38e-10; - I_off_n[3][70] = 4.53e-10; - I_off_n[3][80] = 5.87e-10; - I_off_n[3][90] = 7.29e-10; - I_off_n[3][100] = 8.87e-10; - } - else if (ram_cell_tech_type == comm_dram) - { - //COMM-DRAM cell access transistor technology parameters - curr_vdd_dram_cell = 1.0; - Lphy[3] = 0.032; - Lelec[3] = 0.0205;//Assume Lelec is 30% lesser than Lphy for DRAM access and wordline transistors. - curr_v_th_dram_access_transistor = 1; - width_dram_access_transistor = 0.032; - curr_I_on_dram_cell = 20e-6; - curr_I_off_dram_cell_worst_case_length_temp = 1e-15; - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.032*0.032; - curr_asp_ratio_cell_dram = 1.5; - curr_c_dram_cell = 30e-15; - - //COMM-DRAM wordline transistor parameters - curr_vpp = 2.6; - t_ox[3] = 4e-3; - v_th[3] = 1.0; - c_ox[3] = 7.99e-15; - mobility_eff[3] = 380.76 * (1e-2 * 1e6 * 1e-2 * 1e6); - Vdsat[3] = 0.129; - c_g_ideal[3] = 2.56e-16; - c_fringe[3] = 0.053e-15; - c_junc[3] = 1e-15; - I_on_n[3] = 1024.5e-6; - I_on_p[3] = I_on_n[3] / 2; - nmos_effective_resistance_multiplier = 1.69; - n_to_p_eff_curr_drv_ratio[3] = 1.95; - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3]; - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3]; - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 3.63e-14; - I_off_n[3][10] = 7.18e-14; - I_off_n[3][20] = 1.36e-13; - I_off_n[3][30] = 2.49e-13; - I_off_n[3][40] = 4.41e-13; - I_off_n[3][50] = 7.55e-13; - I_off_n[3][60] = 1.26e-12; - I_off_n[3][70] = 2.03e-12; - I_off_n[3][80] = 3.19e-12; - I_off_n[3][90] = 4.87e-12; - I_off_n[3][100] = 7.16e-12; - } - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7; - curr_sckt_co_eff = 1.1111; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } - - if(tech == 22){ - //For 2016, MPU/ASIC stagger-contacted M1 half-pitch is 22 nm (so this is 22 nm - //technology i.e. FEATURESIZE = 0.022). Using the DG process numbers for HP. - //22 nm HP - vdd[0] = 0.8; - Lphy[0] = 0.009;//Lphy is the physical gate-length. - Lelec[0] = 0.00468;//Lelec is the electrical gate-length. - t_ox[0] = 0.55e-3;//micron - v_th[0] = 0.1395;//V - c_ox[0] = 3.63e-14;//F/micron2 - mobility_eff[0] = 426.07 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[0] = 2.33e-2; //V/micron - c_g_ideal[0] = 3.27e-16;//F/micron - c_fringe[0] = 0.06e-15;//F/micron - c_junc[0] = 0;//F/micron2 - I_on_n[0] = 2626.4e-6;//A/micron - I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.45; - n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in - //"Dynamic" tab of Device workspace. - gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value. - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/3.274; - I_off_n[0][0] = 1.52e-7/1.5*1.2;//From 22nm, leakage current are directly from ITRS report rather than MASTAR, since MASTAR has serious bugs there. - I_off_n[0][10] = 1.55e-7/1.5*1.2; - I_off_n[0][20] = 1.59e-7/1.5*1.2; - I_off_n[0][30] = 1.68e-7/1.5*1.2; - I_off_n[0][40] = 1.90e-7/1.5*1.2; - I_off_n[0][50] = 2.69e-7/1.5*1.2; - I_off_n[0][60] = 5.32e-7/1.5*1.2; - I_off_n[0][70] = 1.02e-6/1.5*1.2; - I_off_n[0][80] = 1.62e-6/1.5*1.2; - I_off_n[0][90] = 2.73e-6/1.5*1.2; - I_off_n[0][100] = 6.1e-6/1.5*1.2; - //for 22nm DG HP - I_g_on_n[0][0] = 1.81e-9;//A/micron - I_g_on_n[0][10] = 1.81e-9; - I_g_on_n[0][20] = 1.81e-9; - I_g_on_n[0][30] = 1.81e-9; - I_g_on_n[0][40] = 1.81e-9; - I_g_on_n[0][50] = 1.81e-9; - I_g_on_n[0][60] = 1.81e-9; - I_g_on_n[0][70] = 1.81e-9; - I_g_on_n[0][80] = 1.81e-9; - I_g_on_n[0][90] = 1.81e-9; - I_g_on_n[0][100] = 1.81e-9; - - //22 nm LSTP DG - vdd[1] = 0.8; - Lphy[1] = 0.014; - Lelec[1] = 0.008;//Lelec is the electrical gate-length. - t_ox[1] = 1.1e-3;//micron - v_th[1] = 0.40126;//V - c_ox[1] = 2.30e-14;//F/micron2 - mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[1] = 6.64e-2; //V/micron - c_g_ideal[1] = 3.22e-16;//F/micron - c_fringe[1] = 0.08e-15; - c_junc[1] = 0;//F/micron2 - I_on_n[1] = 727.6e-6;//A/micron - I_on_p[1] = I_on_n[1] / 2; - nmos_effective_resistance_multiplier = 1.99; - n_to_p_eff_curr_drv_ratio[1] = 2; - gmp_to_gmn_multiplier[1] = 0.99; - Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron - Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron - long_channel_leakage_reduction[1] = 1/1.89; - I_off_n[1][0] = 2.43e-11; - I_off_n[1][10] = 4.85e-11; - I_off_n[1][20] = 9.68e-11; - I_off_n[1][30] = 1.94e-10; - I_off_n[1][40] = 3.87e-10; - I_off_n[1][50] = 7.73e-10; - I_off_n[1][60] = 3.55e-10; - I_off_n[1][70] = 3.09e-9; - I_off_n[1][80] = 6.19e-9; - I_off_n[1][90] = 1.24e-8; - I_off_n[1][100]= 2.48e-8; - - I_g_on_n[1][0] = 4.51e-10;//A/micron - I_g_on_n[1][10] = 4.51e-10; - I_g_on_n[1][20] = 4.51e-10; - I_g_on_n[1][30] = 4.51e-10; - I_g_on_n[1][40] = 4.51e-10; - I_g_on_n[1][50] = 4.51e-10; - I_g_on_n[1][60] = 4.51e-10; - I_g_on_n[1][70] = 4.51e-10; - I_g_on_n[1][80] = 4.51e-10; - I_g_on_n[1][90] = 4.51e-10; - I_g_on_n[1][100] = 4.51e-10; - - //22 nm LOP - vdd[2] = 0.6; - Lphy[2] = 0.011; - Lelec[2] = 0.00604;//Lelec is the electrical gate-length. - t_ox[2] = 0.8e-3;//micron - v_th[2] = 0.2315;//V - c_ox[2] = 2.87e-14;//F/micron2 - mobility_eff[2] = 698.37 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[2] = 1.81e-2; //V/micron - c_g_ideal[2] = 3.16e-16;//F/micron - c_fringe[2] = 0.08e-15; - c_junc[2] = 0;//F/micron2 This is Cj0 not Cjunc in MASTAR results->Dynamic Tab - I_on_n[2] = 916.1e-6;//A/micron - I_on_p[2] = I_on_n[2] / 2; - nmos_effective_resistance_multiplier = 1.73; - n_to_p_eff_curr_drv_ratio[2] = 2; - gmp_to_gmn_multiplier[2] = 1.11; - Rnchannelon[2] = nmos_effective_resistance_multiplier * vdd[2] / I_on_n[2];//ohm-micron - Rpchannelon[2] = n_to_p_eff_curr_drv_ratio[2] * Rnchannelon[2];//ohm-micron - long_channel_leakage_reduction[2] = 1/2.38; - - I_off_n[2][0] = 1.31e-8; - I_off_n[2][10] = 2.60e-8; - I_off_n[2][20] = 5.14e-8; - I_off_n[2][30] = 1.02e-7; - I_off_n[2][40] = 2.02e-7; - I_off_n[2][50] = 3.99e-7; - I_off_n[2][60] = 7.91e-7; - I_off_n[2][70] = 1.09e-6; - I_off_n[2][80] = 2.09e-6; - I_off_n[2][90] = 4.04e-6; - I_off_n[2][100]= 4.48e-6; - - I_g_on_n[2][0] = 2.74e-9;//A/micron - I_g_on_n[2][10] = 2.74e-9; - I_g_on_n[2][20] = 2.74e-9; - I_g_on_n[2][30] = 2.74e-9; - I_g_on_n[2][40] = 2.74e-9; - I_g_on_n[2][50] = 2.74e-9; - I_g_on_n[2][60] = 2.74e-9; - I_g_on_n[2][70] = 2.74e-9; - I_g_on_n[2][80] = 2.74e-9; - I_g_on_n[2][90] = 2.74e-9; - I_g_on_n[2][100] = 2.74e-9; - - - - if (ram_cell_tech_type == 3) - {} - else if (ram_cell_tech_type == 4) - { - //22 nm commodity DRAM cell access transistor technology parameters. - //parameters - curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In - //2005 ITRS, the value was about twice the value in 2007 ITRS - Lphy[3] = 0.022;//micron - Lelec[3] = 0.0181;//micron. - curr_v_th_dram_access_transistor = 1;//V - width_dram_access_transistor = 0.022;//micron - curr_I_on_dram_cell = 20e-6; //This is a typical value that I have always - //kept constant. In reality this could perhaps be lower - curr_I_off_dram_cell_worst_case_length_temp = 1e-15;//A - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.022*0.022;//micron2. - curr_asp_ratio_cell_dram = 0.667; - curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus - //kept constant. - - //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR. - curr_vpp = 2.3;//vpp. V - t_ox[3] = 3.5e-3;//micron - v_th[3] = 1.0;//V - c_ox[3] = 9.06e-15;//F/micron2 - mobility_eff[3] = 367.29 * (1e-2 * 1e6 * 1e-2 * 1e6);//micron2 / Vs - Vdsat[3] = 0.0972; //V/micron - c_g_ideal[3] = 1.99e-16;//F/micron - c_fringe[3] = 0.053e-15;//F/micron - c_junc[3] = 1e-15;//F/micron2 - I_on_n[3] = 910.5e-6;//A/micron - I_on_p[3] = I_on_n[3] / 2;//This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.69;//Using the value from 32nm. - // - n_to_p_eff_curr_drv_ratio[3] = 1.95;//Using the value from 32nm - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];//ohm-micron - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];//ohm-micron - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.1e-13; //A/micron - I_off_n[3][10] = 2.11e-13; - I_off_n[3][20] = 3.88e-13; - I_off_n[3][30] = 6.9e-13; - I_off_n[3][40] = 1.19e-12; - I_off_n[3][50] = 1.98e-12; - I_off_n[3][60] = 3.22e-12; - I_off_n[3][70] = 5.09e-12; - I_off_n[3][80] = 7.85e-12; - I_off_n[3][90] = 1.18e-11; - I_off_n[3][100] = 1.72e-11; - - } - else - { - //some error handler - } - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7/0.7; - curr_sckt_co_eff = 1.1296; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } - - if(tech == 16){ - //For 2019, MPU/ASIC stagger-contacted M1 half-pitch is 16 nm (so this is 16 nm - //technology i.e. FEATURESIZE = 0.016). Using the DG process numbers for HP. - //16 nm HP - vdd[0] = 0.7; - Lphy[0] = 0.006;//Lphy is the physical gate-length. - Lelec[0] = 0.00315;//Lelec is the electrical gate-length. - t_ox[0] = 0.5e-3;//micron - v_th[0] = 0.1489;//V - c_ox[0] = 3.83e-14;//F/micron2 Cox_elec in MASTAR - mobility_eff[0] = 476.15 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs - Vdsat[0] = 1.42e-2; //V/micron calculated in spreadsheet - c_g_ideal[0] = 2.30e-16;//F/micron - c_fringe[0] = 0.06e-15;//F/micron MASTAR inputdynamic/3 - c_junc[0] = 0;//F/micron2 MASTAR result dynamic - I_on_n[0] = 2768.4e-6;//A/micron - I_on_p[0] = I_on_n[0] / 2;//A/micron //This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.48;//nmos_effective_resistance_multiplier is the ratio of Ieff to Idsat where Ieff is the effective NMOS current and Idsat is the saturation current. - n_to_p_eff_curr_drv_ratio[0] = 2; //Wpmos/Wnmos = 2 in 2007 MASTAR. Look in - //"Dynamic" tab of Device workspace. - gmp_to_gmn_multiplier[0] = 1.38; //Just using the 32nm SOI value. - Rnchannelon[0] = nmos_effective_resistance_multiplier * vdd[0] / I_on_n[0];//ohm-micron - Rpchannelon[0] = n_to_p_eff_curr_drv_ratio[0] * Rnchannelon[0];//ohm-micron - long_channel_leakage_reduction[0] = 1/2.655; - I_off_n[0][0] = 1.52e-7/1.5*1.2*1.07; - I_off_n[0][10] = 1.55e-7/1.5*1.2*1.07; - I_off_n[0][20] = 1.59e-7/1.5*1.2*1.07; - I_off_n[0][30] = 1.68e-7/1.5*1.2*1.07; - I_off_n[0][40] = 1.90e-7/1.5*1.2*1.07; - I_off_n[0][50] = 2.69e-7/1.5*1.2*1.07; - I_off_n[0][60] = 5.32e-7/1.5*1.2*1.07; - I_off_n[0][70] = 1.02e-6/1.5*1.2*1.07; - I_off_n[0][80] = 1.62e-6/1.5*1.2*1.07; - I_off_n[0][90] = 2.73e-6/1.5*1.2*1.07; - I_off_n[0][100] = 6.1e-6/1.5*1.2*1.07; - //for 16nm DG HP - I_g_on_n[0][0] = 1.07e-9;//A/micron - I_g_on_n[0][10] = 1.07e-9; - I_g_on_n[0][20] = 1.07e-9; - I_g_on_n[0][30] = 1.07e-9; - I_g_on_n[0][40] = 1.07e-9; - I_g_on_n[0][50] = 1.07e-9; - I_g_on_n[0][60] = 1.07e-9; - I_g_on_n[0][70] = 1.07e-9; - I_g_on_n[0][80] = 1.07e-9; - I_g_on_n[0][90] = 1.07e-9; - I_g_on_n[0][100] = 1.07e-9; - -// //16 nm LSTP DG -// vdd[1] = 0.8; -// Lphy[1] = 0.014; -// Lelec[1] = 0.008;//Lelec is the electrical gate-length. -// t_ox[1] = 1.1e-3;//micron -// v_th[1] = 0.40126;//V -// c_ox[1] = 2.30e-14;//F/micron2 -// mobility_eff[1] = 738.09 * (1e-2 * 1e6 * 1e-2 * 1e6); //micron2 / Vs -// Vdsat[1] = 6.64e-2; //V/micron -// c_g_ideal[1] = 3.22e-16;//F/micron -// c_fringe[1] = 0.008e-15; -// c_junc[1] = 0;//F/micron2 -// I_on_n[1] = 727.6e-6;//A/micron -// I_on_p[1] = I_on_n[1] / 2; -// nmos_effective_resistance_multiplier = 1.99; -// n_to_p_eff_curr_drv_ratio[1] = 2; -// gmp_to_gmn_multiplier[1] = 0.99; -// Rnchannelon[1] = nmos_effective_resistance_multiplier * vdd[1] / I_on_n[1];//ohm-micron -// Rpchannelon[1] = n_to_p_eff_curr_drv_ratio[1] * Rnchannelon[1];//ohm-micron -// I_off_n[1][0] = 2.43e-11; -// I_off_n[1][10] = 4.85e-11; -// I_off_n[1][20] = 9.68e-11; -// I_off_n[1][30] = 1.94e-10; -// I_off_n[1][40] = 3.87e-10; -// I_off_n[1][50] = 7.73e-10; -// I_off_n[1][60] = 3.55e-10; -// I_off_n[1][70] = 3.09e-9; -// I_off_n[1][80] = 6.19e-9; -// I_off_n[1][90] = 1.24e-8; -// I_off_n[1][100]= 2.48e-8; -// -// // for 22nm LSTP HP -// I_g_on_n[1][0] = 4.51e-10;//A/micron -// I_g_on_n[1][10] = 4.51e-10; -// I_g_on_n[1][20] = 4.51e-10; -// I_g_on_n[1][30] = 4.51e-10; -// I_g_on_n[1][40] = 4.51e-10; -// I_g_on_n[1][50] = 4.51e-10; -// I_g_on_n[1][60] = 4.51e-10; -// I_g_on_n[1][70] = 4.51e-10; -// I_g_on_n[1][80] = 4.51e-10; -// I_g_on_n[1][90] = 4.51e-10; -// I_g_on_n[1][100] = 4.51e-10; - - - if (ram_cell_tech_type == 3) - {} - else if (ram_cell_tech_type == 4) - { - //22 nm commodity DRAM cell access transistor technology parameters. - //parameters - curr_vdd_dram_cell = 0.9;//0.45;//This value has reduced greatly in 2007 ITRS for all technology nodes. In - //2005 ITRS, the value was about twice the value in 2007 ITRS - Lphy[3] = 0.022;//micron - Lelec[3] = 0.0181;//micron. - curr_v_th_dram_access_transistor = 1;//V - width_dram_access_transistor = 0.022;//micron - curr_I_on_dram_cell = 20e-6; //This is a typical value that I have always - //kept constant. In reality this could perhaps be lower - curr_I_off_dram_cell_worst_case_length_temp = 1e-15;//A - curr_Wmemcella_dram = width_dram_access_transistor; - curr_Wmemcellpmos_dram = 0; - curr_Wmemcellnmos_dram = 0; - curr_area_cell_dram = 6*0.022*0.022;//micron2. - curr_asp_ratio_cell_dram = 0.667; - curr_c_dram_cell = 30e-15;//This is a typical value that I have alwaus - //kept constant. - - //22 nm commodity DRAM wordline transistor parameters obtained using MASTAR. - curr_vpp = 2.3;//vpp. V - t_ox[3] = 3.5e-3;//micron - v_th[3] = 1.0;//V - c_ox[3] = 9.06e-15;//F/micron2 - mobility_eff[3] = 367.29 * (1e-2 * 1e6 * 1e-2 * 1e6);//micron2 / Vs - Vdsat[3] = 0.0972; //V/micron - c_g_ideal[3] = 1.99e-16;//F/micron - c_fringe[3] = 0.053e-15;//F/micron - c_junc[3] = 1e-15;//F/micron2 - I_on_n[3] = 910.5e-6;//A/micron - I_on_p[3] = I_on_n[3] / 2;//This value for I_on_p is not really used. - nmos_effective_resistance_multiplier = 1.69;//Using the value from 32nm. - // - n_to_p_eff_curr_drv_ratio[3] = 1.95;//Using the value from 32nm - gmp_to_gmn_multiplier[3] = 0.90; - Rnchannelon[3] = nmos_effective_resistance_multiplier * curr_vpp / I_on_n[3];//ohm-micron - Rpchannelon[3] = n_to_p_eff_curr_drv_ratio[3] * Rnchannelon[3];//ohm-micron - long_channel_leakage_reduction[3] = 1; - I_off_n[3][0] = 1.1e-13; //A/micron - I_off_n[3][10] = 2.11e-13; - I_off_n[3][20] = 3.88e-13; - I_off_n[3][30] = 6.9e-13; - I_off_n[3][40] = 1.19e-12; - I_off_n[3][50] = 1.98e-12; - I_off_n[3][60] = 3.22e-12; - I_off_n[3][70] = 5.09e-12; - I_off_n[3][80] = 7.85e-12; - I_off_n[3][90] = 1.18e-11; - I_off_n[3][100] = 1.72e-11; - - } - else - { - //some error handler - } - - //SRAM cell properties - curr_Wmemcella_sram = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_sram = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_sram = 2.08 * g_ip->F_sz_um; - curr_area_cell_sram = 146 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_sram = 1.46; - //CAM cell properties //TODO: data need to be revisited - curr_Wmemcella_cam = 1.31 * g_ip->F_sz_um; - curr_Wmemcellpmos_cam = 1.23 * g_ip->F_sz_um; - curr_Wmemcellnmos_cam = 2.08 * g_ip->F_sz_um; - curr_area_cell_cam = 292 * g_ip->F_sz_um * g_ip->F_sz_um; - curr_asp_ratio_cell_cam = 2.92; - //Empirical undifferetiated core/FU coefficient - curr_logic_scaling_co_eff = 0.7*0.7*0.7*0.7*0.7; - curr_core_tx_density = 1.25/0.7/0.7/0.7; - curr_sckt_co_eff = 1.1296; - curr_chip_layout_overhead = 1.2;//die measurement results based on Niagara 1 and 2 - curr_macro_layout_overhead = 1.1;//EDA placement and routing tool rule of thumb - } - - - g_tp.peri_global.Vdd += curr_alpha * vdd[peri_global_tech_type]; - g_tp.peri_global.t_ox += curr_alpha * t_ox[peri_global_tech_type]; - g_tp.peri_global.Vth += curr_alpha * v_th[peri_global_tech_type]; - g_tp.peri_global.C_ox += curr_alpha * c_ox[peri_global_tech_type]; - g_tp.peri_global.C_g_ideal += curr_alpha * c_g_ideal[peri_global_tech_type]; - g_tp.peri_global.C_fringe += curr_alpha * c_fringe[peri_global_tech_type]; - g_tp.peri_global.C_junc += curr_alpha * c_junc[peri_global_tech_type]; - g_tp.peri_global.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.peri_global.l_phy += curr_alpha * Lphy[peri_global_tech_type]; - g_tp.peri_global.l_elec += curr_alpha * Lelec[peri_global_tech_type]; - g_tp.peri_global.I_on_n += curr_alpha * I_on_n[peri_global_tech_type]; - g_tp.peri_global.R_nch_on += curr_alpha * Rnchannelon[peri_global_tech_type]; - g_tp.peri_global.R_pch_on += curr_alpha * Rpchannelon[peri_global_tech_type]; - g_tp.peri_global.n_to_p_eff_curr_drv_ratio - += curr_alpha * n_to_p_eff_curr_drv_ratio[peri_global_tech_type]; - g_tp.peri_global.long_channel_leakage_reduction - += curr_alpha * long_channel_leakage_reduction[peri_global_tech_type]; - g_tp.peri_global.I_off_n += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300]; - g_tp.peri_global.I_off_p += curr_alpha * I_off_n[peri_global_tech_type][g_ip->temp - 300]; - g_tp.peri_global.I_g_on_n += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; - g_tp.peri_global.I_g_on_p += curr_alpha * I_g_on_n[peri_global_tech_type][g_ip->temp - 300]; - gmp_to_gmn_multiplier_periph_global += curr_alpha * gmp_to_gmn_multiplier[peri_global_tech_type]; - - g_tp.sram_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type]; - g_tp.sram_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; - g_tp.sram_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; - g_tp.sram_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; - g_tp.sram_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; - g_tp.sram_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type]; - g_tp.sram_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; - g_tp.sram_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; - g_tp.sram_cell.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.sram_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; - g_tp.sram_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; - g_tp.sram_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; - g_tp.sram_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; - g_tp.sram_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; - g_tp.sram_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.sram_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.sram_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.sram_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - - g_tp.dram_cell_Vdd += curr_alpha * curr_vdd_dram_cell; - g_tp.dram_acc.Vth += curr_alpha * curr_v_th_dram_access_transistor; - g_tp.dram_acc.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; - g_tp.dram_acc.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; - g_tp.dram_acc.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; - g_tp.dram_acc.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; - g_tp.dram_acc.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; - g_tp.dram_acc.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.dram_cell_I_on += curr_alpha * curr_I_on_dram_cell; - g_tp.dram_cell_I_off_worst_case_len_temp += curr_alpha * curr_I_off_dram_cell_worst_case_length_temp; - g_tp.dram_acc.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; - g_tp.dram_cell_C += curr_alpha * curr_c_dram_cell; - g_tp.vpp += curr_alpha * curr_vpp; - g_tp.dram_wl.l_phy += curr_alpha * Lphy[dram_cell_tech_flavor]; - g_tp.dram_wl.l_elec += curr_alpha * Lelec[dram_cell_tech_flavor]; - g_tp.dram_wl.C_g_ideal += curr_alpha * c_g_ideal[dram_cell_tech_flavor]; - g_tp.dram_wl.C_fringe += curr_alpha * c_fringe[dram_cell_tech_flavor]; - g_tp.dram_wl.C_junc += curr_alpha * c_junc[dram_cell_tech_flavor]; - g_tp.dram_wl.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.dram_wl.I_on_n += curr_alpha * I_on_n[dram_cell_tech_flavor]; - g_tp.dram_wl.R_nch_on += curr_alpha * Rnchannelon[dram_cell_tech_flavor]; - g_tp.dram_wl.R_pch_on += curr_alpha * Rpchannelon[dram_cell_tech_flavor]; - g_tp.dram_wl.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[dram_cell_tech_flavor]; - g_tp.dram_wl.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[dram_cell_tech_flavor]; - g_tp.dram_wl.I_off_n += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; - g_tp.dram_wl.I_off_p += curr_alpha * I_off_n[dram_cell_tech_flavor][g_ip->temp - 300]; - - g_tp.cam_cell.Vdd += curr_alpha * vdd[ram_cell_tech_type]; - g_tp.cam_cell.l_phy += curr_alpha * Lphy[ram_cell_tech_type]; - g_tp.cam_cell.l_elec += curr_alpha * Lelec[ram_cell_tech_type]; - g_tp.cam_cell.t_ox += curr_alpha * t_ox[ram_cell_tech_type]; - g_tp.cam_cell.Vth += curr_alpha * v_th[ram_cell_tech_type]; - g_tp.cam_cell.C_g_ideal += curr_alpha * c_g_ideal[ram_cell_tech_type]; - g_tp.cam_cell.C_fringe += curr_alpha * c_fringe[ram_cell_tech_type]; - g_tp.cam_cell.C_junc += curr_alpha * c_junc[ram_cell_tech_type]; - g_tp.cam_cell.C_junc_sidewall = 0.25e-15; // F/micron - g_tp.cam_cell.I_on_n += curr_alpha * I_on_n[ram_cell_tech_type]; - g_tp.cam_cell.R_nch_on += curr_alpha * Rnchannelon[ram_cell_tech_type]; - g_tp.cam_cell.R_pch_on += curr_alpha * Rpchannelon[ram_cell_tech_type]; - g_tp.cam_cell.n_to_p_eff_curr_drv_ratio += curr_alpha * n_to_p_eff_curr_drv_ratio[ram_cell_tech_type]; - g_tp.cam_cell.long_channel_leakage_reduction += curr_alpha * long_channel_leakage_reduction[ram_cell_tech_type]; - g_tp.cam_cell.I_off_n += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.cam_cell.I_off_p += curr_alpha * I_off_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.cam_cell.I_g_on_n += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - g_tp.cam_cell.I_g_on_p += curr_alpha * I_g_on_n[ram_cell_tech_type][g_ip->temp - 300]; - - g_tp.dram.cell_a_w += curr_alpha * curr_Wmemcella_dram; - g_tp.dram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_dram; - g_tp.dram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_dram; - area_cell_dram += curr_alpha * curr_area_cell_dram; - asp_ratio_cell_dram += curr_alpha * curr_asp_ratio_cell_dram; - - g_tp.sram.cell_a_w += curr_alpha * curr_Wmemcella_sram; - g_tp.sram.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_sram; - g_tp.sram.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_sram; - area_cell_sram += curr_alpha * curr_area_cell_sram; - asp_ratio_cell_sram += curr_alpha * curr_asp_ratio_cell_sram; - - g_tp.cam.cell_a_w += curr_alpha * curr_Wmemcella_cam;//sheng - g_tp.cam.cell_pmos_w += curr_alpha * curr_Wmemcellpmos_cam; - g_tp.cam.cell_nmos_w += curr_alpha * curr_Wmemcellnmos_cam; - area_cell_cam += curr_alpha * curr_area_cell_cam; - asp_ratio_cell_cam += curr_alpha * curr_asp_ratio_cell_cam; - - //Sense amplifier latch Gm calculation - mobility_eff_periph_global += curr_alpha * mobility_eff[peri_global_tech_type]; - Vdsat_periph_global += curr_alpha * Vdsat[peri_global_tech_type]; - - //Empirical undifferetiated core/FU coefficient - g_tp.scaling_factor.logic_scaling_co_eff += curr_alpha * curr_logic_scaling_co_eff; - g_tp.scaling_factor.core_tx_density += curr_alpha * curr_core_tx_density; - g_tp.chip_layout_overhead += curr_alpha * curr_chip_layout_overhead; - g_tp.macro_layout_overhead += curr_alpha * curr_macro_layout_overhead; - g_tp.sckt_co_eff += curr_alpha * curr_sckt_co_eff; - } - - - //Currently we are not modeling the resistance/capacitance of poly anywhere. - //Continuous function (or date have been processed) does not need linear interpolation - g_tp.w_comp_inv_p1 = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - g_tp.w_comp_inv_n1 = 7.5 * g_ip->F_sz_um;//this was 6 micron for the 0.8 micron process - g_tp.w_comp_inv_p2 = 25 * g_ip->F_sz_um;//this was 20 micron for the 0.8 micron process - g_tp.w_comp_inv_n2 = 15 * g_ip->F_sz_um;//this was 12 micron for the 0.8 micron process - g_tp.w_comp_inv_p3 = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - g_tp.w_comp_inv_n3 = 30 * g_ip->F_sz_um;//this was 24 micron for the 0.8 micron process - g_tp.w_eval_inv_p = 100 * g_ip->F_sz_um;//this was 80 micron for the 0.8 micron process - g_tp.w_eval_inv_n = 50 * g_ip->F_sz_um;//this was 40 micron for the 0.8 micron process - g_tp.w_comp_n = 12.5 * g_ip->F_sz_um;//this was 10 micron for the 0.8 micron process - g_tp.w_comp_p = 37.5 * g_ip->F_sz_um;//this was 30 micron for the 0.8 micron process - - g_tp.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip->F_sz_um; - g_tp.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip->F_sz_um; - g_tp.HPOWERRAIL = 2 * g_ip->F_sz_um; - g_tp.cell_h_def = 50 * g_ip->F_sz_um; - g_tp.w_poly_contact = g_ip->F_sz_um; - g_tp.spacing_poly_to_contact = g_ip->F_sz_um; - g_tp.spacing_poly_to_poly = 1.5 * g_ip->F_sz_um; - g_tp.ram_wl_stitching_overhead_ = 7.5 * g_ip->F_sz_um; - - g_tp.min_w_nmos_ = 3 * g_ip->F_sz_um / 2; - g_tp.max_w_nmos_ = 100 * g_ip->F_sz_um; - g_tp.w_iso = 12.5*g_ip->F_sz_um;//was 10 micron for the 0.8 micron process - g_tp.w_sense_n = 3.75*g_ip->F_sz_um; // sense amplifier N-trans; was 3 micron for the 0.8 micron process - g_tp.w_sense_p = 7.5*g_ip->F_sz_um; // sense amplifier P-trans; was 6 micron for the 0.8 micron process - g_tp.w_sense_en = 5*g_ip->F_sz_um; // Sense enable transistor of the sense amplifier; was 4 micron for the 0.8 micron process - g_tp.w_nmos_b_mux = 6 * g_tp.min_w_nmos_; - g_tp.w_nmos_sa_mux = 6 * g_tp.min_w_nmos_; - - if (ram_cell_tech_type == comm_dram) - { - g_tp.max_w_nmos_dec = 8 * g_ip->F_sz_um; - g_tp.h_dec = 8; // in the unit of memory cell height - } - else - { - g_tp.max_w_nmos_dec = g_tp.max_w_nmos_; - g_tp.h_dec = 4; // in the unit of memory cell height - } - - g_tp.peri_global.C_overlap = 0.2 * g_tp.peri_global.C_g_ideal; - g_tp.sram_cell.C_overlap = 0.2 * g_tp.sram_cell.C_g_ideal; - g_tp.cam_cell.C_overlap = 0.2 * g_tp.cam_cell.C_g_ideal; - - g_tp.dram_acc.C_overlap = 0.2 * g_tp.dram_acc.C_g_ideal; - g_tp.dram_acc.R_nch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_n; - //g_tp.dram_acc.R_pch_on = g_tp.dram_cell_Vdd / g_tp.dram_acc.I_on_p; - - g_tp.dram_wl.C_overlap = 0.2 * g_tp.dram_wl.C_g_ideal; - - double gmn_sense_amp_latch = (mobility_eff_periph_global / 2) * g_tp.peri_global.C_ox * (g_tp.w_sense_n / g_tp.peri_global.l_elec) * Vdsat_periph_global; - double gmp_sense_amp_latch = gmp_to_gmn_multiplier_periph_global * gmn_sense_amp_latch; - g_tp.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch; - - g_tp.dram.b_w = sqrt(area_cell_dram / (asp_ratio_cell_dram)); - g_tp.dram.b_h = asp_ratio_cell_dram * g_tp.dram.b_w; - g_tp.sram.b_w = sqrt(area_cell_sram / (asp_ratio_cell_sram)); - g_tp.sram.b_h = asp_ratio_cell_sram * g_tp.sram.b_w; - g_tp.cam.b_w = sqrt(area_cell_cam / (asp_ratio_cell_cam));//Sheng - g_tp.cam.b_h = asp_ratio_cell_cam * g_tp.cam.b_w; - - g_tp.dram.Vbitpre = g_tp.dram_cell_Vdd; - g_tp.sram.Vbitpre = vdd[ram_cell_tech_type]; - g_tp.cam.Vbitpre = vdd[ram_cell_tech_type];//Sheng - pmos_to_nmos_sizing_r = pmos_to_nmos_sz_ratio(); - g_tp.w_pmos_bl_precharge = 6 * pmos_to_nmos_sizing_r * g_tp.min_w_nmos_; - g_tp.w_pmos_bl_eq = pmos_to_nmos_sizing_r * g_tp.min_w_nmos_; - - - double wire_pitch [NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - wire_r_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - wire_c_per_micron[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - horiz_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - vert_dielectric_constant[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - aspect_ratio[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - miller_value[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES], - ild_thickness[NUMBER_INTERCONNECT_PROJECTION_TYPES][NUMBER_WIRE_TYPES]; - - for (iter=0; iter<=1; ++iter) - { - // linear interpolation - if (iter == 0) - { - tech = tech_lo; - if (tech_lo == tech_hi) - { - curr_alpha = 1; - } - else - { - curr_alpha = (technology - tech_hi)/(tech_lo - tech_hi); - } - } - else - { - tech = tech_hi; - if (tech_lo == tech_hi) - { - break; - } - else - { - curr_alpha = (tech_lo - technology)/(tech_lo - tech_hi); - } - } - - if (tech == 90) - { - //Aggressive projections - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//micron - aspect_ratio[0][0] = 2.4; - wire_width = wire_pitch[0][0] / 2; //micron - wire_thickness = aspect_ratio[0][0] * wire_width;//micron - wire_spacing = wire_pitch[0][0] - wire_width;//micron - barrier_thickness = 0.01;//micron - dishing_thickness = 0;//micron - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter);//ohm/micron - ild_thickness[0][0] = 0.48;//micron - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 2.709; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; //F/micron - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], - vert_dielectric_constant[0][0], - fringe_cap);//F/micron. - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 2.4; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.48;//micron - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 2.709; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 2.7; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.96; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 2.709; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.008; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.48; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 3.038; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], - vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.48; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 3.038; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], - vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 1.1; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 3.038; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2] , miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.09; - wire_c_per_micron[1][3] = 60e-15 / (256 * 2 * 0.09); - wire_r_per_micron[1][3] = 12 / 0.09; - } - else if (tech == 65) - { - //Aggressive projections - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[0][0] = 2.7; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.405; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 2.303; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] , - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 2.7; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.405; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 2.303; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], - vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 2.8; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.81; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 2.303; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.006; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.405; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.734; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.405; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.734; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.77; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.734; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.065; - wire_c_per_micron[1][3] = 52.5e-15 / (256 * 2 * 0.065); - wire_r_per_micron[1][3] = 12 / 0.065; - } - else if (tech == 45) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.315; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.958; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0] , miller_value[0][0] , horiz_dielectric_constant[0][0] , vert_dielectric_constant[0][0] , - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 3.0; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.315; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.958; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.63; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.958; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.004; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.315; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.46; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.315; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.46; - vert_dielectric_constant[1][1] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.55; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.46; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.045; - wire_c_per_micron[1][3] = 37.5e-15 / (256 * 2 * 0.045); - wire_r_per_micron[1][3] = 12 / 0.045; - } - else if (tech == 32) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.21; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.664; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 3.0; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.21; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.664; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um; - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.42; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.664; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.003; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.21; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.214; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - aspect_ratio[1][1] = 2.0; - wire_width = wire_pitch[1][1] / 2; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.21; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.214; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.385; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.214; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.032;//micron - wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.032);//F/micron - wire_r_per_micron[1][3] = 12 / 0.032;//ohm/micron - } - else if (tech == 22) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.15; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.414; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global - wire_width = wire_pitch[0][1] / 2; - aspect_ratio[0][1] = 3.0; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.15; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.414; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.3; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.414; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - -// //************************* -// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][4] - wire_width; -// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][5] - wire_width; -// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][6] - wire_width; -// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - //************************* - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.003; - dishing_thickness = 0; - alpha_scatter = 1.05; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.15; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 2.104; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.15; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 2.104; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.275; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 2.104; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.022;//micron - wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.022);//F/micron - wire_r_per_micron[1][3] = 12 / 0.022;//ohm/micron - - //****************** -// wire_pitch[1][4] = 16 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][4] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][5] = 24 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][5] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][6] = 32 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][6] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - } - - else if (tech == 16) - { - //Aggressive projections. - wire_pitch[0][0] = 2.5 * g_ip->F_sz_um;//local - aspect_ratio[0][0] = 3.0; - wire_width = wire_pitch[0][0] / 2; - wire_thickness = aspect_ratio[0][0] * wire_width; - wire_spacing = wire_pitch[0][0] - wire_width; - barrier_thickness = 0; - dishing_thickness = 0; - alpha_scatter = 1; - wire_r_per_micron[0][0] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][0] = 0.108; - miller_value[0][0] = 1.5; - horiz_dielectric_constant[0][0] = 1.202; - vert_dielectric_constant[0][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[0][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][0], miller_value[0][0], horiz_dielectric_constant[0][0], vert_dielectric_constant[0][0], - fringe_cap); - - wire_pitch[0][1] = 4 * g_ip->F_sz_um;//semi-global - aspect_ratio[0][1] = 3.0; - wire_width = wire_pitch[0][1] / 2; - wire_thickness = aspect_ratio[0][1] * wire_width; - wire_spacing = wire_pitch[0][1] - wire_width; - wire_r_per_micron[0][1] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][1] = 0.108; - miller_value[0][1] = 1.5; - horiz_dielectric_constant[0][1] = 1.202; - vert_dielectric_constant[0][1] = 3.9; - wire_c_per_micron[0][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][1], miller_value[0][1], horiz_dielectric_constant[0][1], vert_dielectric_constant[0][1], - fringe_cap); - - wire_pitch[0][2] = 8 * g_ip->F_sz_um;//global - aspect_ratio[0][2] = 3.0; - wire_width = wire_pitch[0][2] / 2; - wire_thickness = aspect_ratio[0][2] * wire_width; - wire_spacing = wire_pitch[0][2] - wire_width; - wire_r_per_micron[0][2] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[0][2] = 0.216; - miller_value[0][2] = 1.5; - horiz_dielectric_constant[0][2] = 1.202; - vert_dielectric_constant[0][2] = 3.9; - wire_c_per_micron[0][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[0][2], miller_value[0][2], horiz_dielectric_constant[0][2], vert_dielectric_constant[0][2], - fringe_cap); - -// //************************* -// wire_pitch[0][4] = 16 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][4] - wire_width; -// wire_r_per_micron[0][4] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][5] = 24 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][5] - wire_width; -// wire_r_per_micron[0][5] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[0][6] = 32 * g_ip.F_sz_um;//global -// aspect_ratio = 3.0; -// wire_width = wire_pitch[0][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[0][6] - wire_width; -// wire_r_per_micron[0][6] = wire_resistance(BULK_CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.3; -// wire_c_per_micron[0][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - //************************* - - //Conservative projections - wire_pitch[1][0] = 2.5 * g_ip->F_sz_um; - aspect_ratio[1][0] = 2.0; - wire_width = wire_pitch[1][0] / 2; - wire_thickness = aspect_ratio[1][0] * wire_width; - wire_spacing = wire_pitch[1][0] - wire_width; - barrier_thickness = 0.002; - dishing_thickness = 0; - alpha_scatter = 1.05; - wire_r_per_micron[1][0] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][0] = 0.108; - miller_value[1][0] = 1.5; - horiz_dielectric_constant[1][0] = 1.998; - vert_dielectric_constant[1][0] = 3.9; - fringe_cap = 0.115e-15; - wire_c_per_micron[1][0] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][0], miller_value[1][0], horiz_dielectric_constant[1][0], vert_dielectric_constant[1][0], - fringe_cap); - - wire_pitch[1][1] = 4 * g_ip->F_sz_um; - wire_width = wire_pitch[1][1] / 2; - aspect_ratio[1][1] = 2.0; - wire_thickness = aspect_ratio[1][1] * wire_width; - wire_spacing = wire_pitch[1][1] - wire_width; - wire_r_per_micron[1][1] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][1] = 0.108; - miller_value[1][1] = 1.5; - horiz_dielectric_constant[1][1] = 1.998; - vert_dielectric_constant[1][1] = 3.9; - wire_c_per_micron[1][1] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][1], miller_value[1][1], horiz_dielectric_constant[1][1], vert_dielectric_constant[1][1], - fringe_cap); - - wire_pitch[1][2] = 8 * g_ip->F_sz_um; - aspect_ratio[1][2] = 2.2; - wire_width = wire_pitch[1][2] / 2; - wire_thickness = aspect_ratio[1][2] * wire_width; - wire_spacing = wire_pitch[1][2] - wire_width; - dishing_thickness = 0.1 * wire_thickness; - wire_r_per_micron[1][2] = wire_resistance(CU_RESISTIVITY, wire_width, - wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); - ild_thickness[1][2] = 0.198; - miller_value[1][2] = 1.5; - horiz_dielectric_constant[1][2] = 1.998; - vert_dielectric_constant[1][2] = 3.9; - wire_c_per_micron[1][2] = wire_capacitance(wire_width, wire_thickness, wire_spacing, - ild_thickness[1][2], miller_value[1][2], horiz_dielectric_constant[1][2], vert_dielectric_constant[1][2], - fringe_cap); - //Nominal projections for commodity DRAM wordline/bitline - wire_pitch[1][3] = 2 * 0.016;//micron - wire_c_per_micron[1][3] = 31e-15 / (256 * 2 * 0.016);//F/micron - wire_r_per_micron[1][3] = 12 / 0.016;//ohm/micron - - //****************** -// wire_pitch[1][4] = 16 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][4] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][4] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][4] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][4] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][5] = 24 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][5] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][5] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][5] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][5] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); -// -// wire_pitch[1][6] = 32 * g_ip.F_sz_um; -// aspect_ratio = 2.2; -// wire_width = wire_pitch[1][6] / 2; -// wire_thickness = aspect_ratio * wire_width; -// wire_spacing = wire_pitch[1][6] - wire_width; -// dishing_thickness = 0.1 * wire_thickness; -// wire_r_per_micron[1][6] = wire_resistance(CU_RESISTIVITY, wire_width, -// wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter); -// ild_thickness = 0.275; -// wire_c_per_micron[1][6] = wire_capacitance(wire_width, wire_thickness, wire_spacing, -// ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, -// fringe_cap); - } - g_tp.wire_local.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.R_per_um += curr_alpha * wire_r_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.C_per_um += curr_alpha * wire_c_per_micron[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - g_tp.wire_local.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][(ram_cell_tech_type == comm_dram)?3:0]; - - g_tp.wire_inside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.R_per_um += curr_alpha* wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.C_per_um += curr_alpha* wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - g_tp.wire_inside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_is_mat_type]; - - g_tp.wire_outside_mat.pitch += curr_alpha * wire_pitch[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.R_per_um += curr_alpha*wire_r_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.C_per_um += curr_alpha*wire_c_per_micron[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.aspect_ratio += curr_alpha * aspect_ratio[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.ild_thickness += curr_alpha * ild_thickness[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.miller_value += curr_alpha * miller_value[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.horiz_dielectric_constant += curr_alpha* horiz_dielectric_constant[g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - g_tp.wire_outside_mat.vert_dielectric_constant += curr_alpha* vert_dielectric_constant [g_ip->ic_proj_type][g_ip->wire_os_mat_type]; - - g_tp.unit_len_wire_del = g_tp.wire_inside_mat.R_per_um * g_tp.wire_inside_mat.C_per_um / 2; - - g_tp.sense_delay += curr_alpha *SENSE_AMP_D; - g_tp.sense_dy_power += curr_alpha *SENSE_AMP_P; -// g_tp.horiz_dielectric_constant += horiz_dielectric_constant; -// g_tp.vert_dielectric_constant += vert_dielectric_constant; -// g_tp.aspect_ratio += aspect_ratio; -// g_tp.miller_value += miller_value; -// g_tp.ild_thickness += ild_thickness; - - } - g_tp.fringe_cap = fringe_cap; - - double rd = tr_R_on(g_tp.min_w_nmos_, NCH, 1); - double p_to_n_sizing_r = pmos_to_nmos_sz_ratio(); - double c_load = gate_C(g_tp.min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0); - double tf = rd * c_load; - g_tp.kinv = horowitz(0, tf, 0.5, 0.5, RISE); - double KLOAD = 1; - c_load = KLOAD * (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(g_tp.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0)); - tf = rd * c_load; - g_tp.FO4 = horowitz(0, tf, 0.5, 0.5, RISE); -} - diff --git a/ext/mcpat/xmlParser.cc b/ext/mcpat/xmlParser.cc index 5ac45edae..97532d506 100644 --- a/ext/mcpat/xmlParser.cc +++ b/ext/mcpat/xmlParser.cc @@ -75,6 +75,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (c) 2002, Business-Insight + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * Business-Insight * All rights reserved. * @@ -91,7 +92,7 @@ //#endif #define WIN32_LEAN_AND_MEAN #include // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files - // to have "MessageBoxA" to display error messages for openFilHelper +// to have "MessageBoxA" to display error messages for openFilHelper #endif #include @@ -101,37 +102,49 @@ #include #include -XMLCSTR XMLNode::getVersion() { return _CXML("v2.39"); } -void freeXMLString(XMLSTR t){if(t)free(t);} +XMLCSTR XMLNode::getVersion() { + return _CXML("v2.39"); +} +void freeXMLString(XMLSTR t) { + if (t)free(t); +} static XMLNode::XMLCharEncoding characterEncoding=XMLNode::char_encoding_UTF8; static char guessWideCharChars=1, dropWhiteSpace=1, removeCommentsInMiddleOfText=1; -inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; } +inline int mmin( const int t1, const int t2 ) { + return t1 < t2 ? t1 : t2; +} // You can modify the initialization of the variable "XMLClearTags" below // to change the clearTags that are currently recognized by the library. // The number on the second columns is the length of the string inside the // first column. The "") }, - { _CXML("") }, - { _CXML("") }, - { _CXML("
")    ,5,  _CXML("
") }, +typedef struct { + XMLCSTR lpszOpen; + int openTagLen; + XMLCSTR lpszClose; +} ALLXMLClearTag; +static ALLXMLClearTag XMLClearTags[] = { + { _CXML("") }, + { _CXML("") }, + { _CXML("") }, + { _CXML("
")    , 5,  _CXML("
") }, // { _CXML("")}, - { NULL ,0, NULL } + { NULL , 0, NULL } }; // You can modify the initialization of the variable "XMLEntities" below // to change the character entities that are currently recognized by the library. // The number on the second columns is the length of the string inside the // first column. Additionally, the syntaxes " " and " " are recognized. -typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity; -static XMLCharacterEntity XMLEntities[] = -{ +typedef struct { + XMLCSTR s; + int l; + XMLCHAR c; +} XMLCharacterEntity; +static XMLCharacterEntity XMLEntities[] = { { _CXML("&" ), 5, _CXML('&' )}, { _CXML("<" ), 4, _CXML('<' )}, { _CXML(">" ), 4, _CXML('>' )}, @@ -147,32 +160,51 @@ static XMLCharacterEntity XMLEntities[] = // The following function parses the XML errors into a user friendly string. // You can edit this to change the output language of the library to something else. -XMLCSTR XMLNode::getError(XMLError xerror) -{ - switch (xerror) - { - case eXMLErrorNone: return _CXML("No error"); - case eXMLErrorMissingEndTag: return _CXML("Warning: Unmatched end tag"); - case eXMLErrorNoXMLTagFound: return _CXML("Warning: No XML tag found"); - case eXMLErrorEmpty: return _CXML("Error: No XML data"); - case eXMLErrorMissingTagName: return _CXML("Error: Missing start tag name"); - case eXMLErrorMissingEndTagName: return _CXML("Error: Missing end tag name"); - case eXMLErrorUnmatchedEndTag: return _CXML("Error: Unmatched end tag"); - case eXMLErrorUnmatchedEndClearTag: return _CXML("Error: Unmatched clear tag end"); - case eXMLErrorUnexpectedToken: return _CXML("Error: Unexpected token found"); - case eXMLErrorNoElements: return _CXML("Error: No elements found"); - case eXMLErrorFileNotFound: return _CXML("Error: File not found"); - case eXMLErrorFirstTagNotFound: return _CXML("Error: First Tag not found"); - case eXMLErrorUnknownCharacterEntity:return _CXML("Error: Unknown character entity"); - case eXMLErrorCharacterCodeAbove255: return _CXML("Error: Character code above 255 is forbidden in MultiByte char mode."); - case eXMLErrorCharConversionError: return _CXML("Error: unable to convert between WideChar and MultiByte chars"); - case eXMLErrorCannotOpenWriteFile: return _CXML("Error: unable to open file for writing"); - case eXMLErrorCannotWriteFile: return _CXML("Error: cannot write into file"); - - case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _CXML("Warning: Base64-string length is not a multiple of 4"); - case eXMLErrorBase64DecodeTruncatedData: return _CXML("Warning: Base64-string is truncated"); - case eXMLErrorBase64DecodeIllegalCharacter: return _CXML("Error: Base64-string contains an illegal character"); - case eXMLErrorBase64DecodeBufferTooSmall: return _CXML("Error: Base64 decode output buffer is too small"); +XMLCSTR XMLNode::getError(XMLError xerror) { + switch (xerror) { + case eXMLErrorNone: + return _CXML("No error"); + case eXMLErrorMissingEndTag: + return _CXML("Warning: Unmatched end tag"); + case eXMLErrorNoXMLTagFound: + return _CXML("Warning: No XML tag found"); + case eXMLErrorEmpty: + return _CXML("Error: No XML data"); + case eXMLErrorMissingTagName: + return _CXML("Error: Missing start tag name"); + case eXMLErrorMissingEndTagName: + return _CXML("Error: Missing end tag name"); + case eXMLErrorUnmatchedEndTag: + return _CXML("Error: Unmatched end tag"); + case eXMLErrorUnmatchedEndClearTag: + return _CXML("Error: Unmatched clear tag end"); + case eXMLErrorUnexpectedToken: + return _CXML("Error: Unexpected token found"); + case eXMLErrorNoElements: + return _CXML("Error: No elements found"); + case eXMLErrorFileNotFound: + return _CXML("Error: File not found"); + case eXMLErrorFirstTagNotFound: + return _CXML("Error: First Tag not found"); + case eXMLErrorUnknownCharacterEntity: + return _CXML("Error: Unknown character entity"); + case eXMLErrorCharacterCodeAbove255: + return _CXML("Error: Character code above 255 is forbidden in MultiByte char mode."); + case eXMLErrorCharConversionError: + return _CXML("Error: unable to convert between WideChar and MultiByte chars"); + case eXMLErrorCannotOpenWriteFile: + return _CXML("Error: unable to open file for writing"); + case eXMLErrorCannotWriteFile: + return _CXML("Error: cannot write into file"); + + case eXMLErrorBase64DataSizeIsNotMultipleOf4: + return _CXML("Warning: Base64-string length is not a multiple of 4"); + case eXMLErrorBase64DecodeTruncatedData: + return _CXML("Warning: Base64-string is truncated"); + case eXMLErrorBase64DecodeIllegalCharacter: + return _CXML("Error: Base64-string contains an illegal character"); + case eXMLErrorBase64DecodeBufferTooSmall: + return _CXML("Error: Base64 decode output buffer is too small"); }; return _CXML("Unknown"); } @@ -187,168 +219,244 @@ XMLCSTR XMLNode::getError(XMLError xerror) // If you plan to "port" the library to a new system/compiler, all you have to do is // to edit the following lines. #ifdef XML_NO_WIDE_CHAR -char myIsTextWideChar(const void *b, int len) { return FALSE; } +char myIsTextWideChar(const void *b, int len) { + return FALSE; +} #else - #if defined (UNDER_CE) || !defined(_XMLWINDOWS) - char myIsTextWideChar(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode - { +#if defined (UNDER_CE) || !defined(_XMLWINDOWS) +// inspired by the Wine API: RtlIsTextUnicode +char myIsTextWideChar(const void *b, int len) { #ifdef sun - // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer. - if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE; + // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer. + if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE; #endif - const wchar_t *s=(const wchar_t*)b; + const wchar_t *s = (const wchar_t*)b; - // buffer too small: - if (len<(int)sizeof(wchar_t)) return FALSE; + // buffer too small: + if (len < (int)sizeof(wchar_t)) return FALSE; - // odd length test - if (len&1) return FALSE; + // odd length test + if (len&1) return FALSE; - /* only checks the first 256 characters */ - len=mmin(256,len/sizeof(wchar_t)); + /* only checks the first 256 characters */ + len = mmin(256, len / sizeof(wchar_t)); - // Check for the special byte order: - if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE; - if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE + // Check for the special byte order: + if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE; + if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE - // checks for ASCII characters in the UNICODE stream - int i,stats=0; - for (i=0; ilen/2) return TRUE; + // checks for ASCII characters in the UNICODE stream + int i, stats=0; + for (i=0; ilen/2) return TRUE; - // Check for UNICODE NULL chars - for (i=0; i - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);} - static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); } - #else - // for gcc - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);} - static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); } - #endif - static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); } - static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); } - static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) - { - char *filenameAscii=myWideCharToMultiByte(filename); - FILE *f; - if (mode[0]==_CXML('r')) f=fopen(filenameAscii,"rb"); - else f=fopen(filenameAscii,"wb"); - free(filenameAscii); - return f; - } - #else - static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); } - static inline int xstrlen(XMLCSTR c) { return strlen(c); } - static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncasecmp(c1,c2,l);} - static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);} - static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return strcasecmp(c1,c2); } - static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); } - static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); } - #endif - static inline int _strnicmp(const char *c1,const char *c2, int l) { return strncasecmp(c1,c2,l);} +#ifdef XML_NO_WIDE_CHAR +char *myWideCharToMultiByte(const wchar_t *s) { + return NULL; +} +#else +char *myWideCharToMultiByte(const wchar_t *s) { + const wchar_t *ss = s; + int i = (int)wcsrtombs(NULL, &ss, 0, NULL); + if (i < 0) return NULL; + char *d = (char *)malloc(i + 1); + wcsrtombs(d, &s, i, NULL); + d[i] = 0; + return d; +} +#endif +#ifdef _XMLWIDECHAR +wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce) { + const char *ss = s; + int i = (int)mbsrtowcs(NULL, &ss, 0, NULL); + if (i < 0) return NULL; + wchar_t *d = (wchar_t *)malloc((i + 1) * sizeof(wchar_t)); + mbsrtowcs(d, &s, i, NULL); + d[i] = 0; + return d; +} +int xstrlen(XMLCSTR c) { + return wcslen(c); +} +#ifdef sun +// for CC +#include +static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { + return wsncasecmp(c1, c2, l); +} +static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { + return wsncmp(c1, c2, l); +} +static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { + return wscasecmp(c1, c2); +} +#else +// for gcc +static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { + return wcsncasecmp(c1, c2, l); +} +static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { + return wcsncmp(c1, c2, l); +} +static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { + return wcscasecmp(c1, c2); +} +#endif +static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { + return (XMLSTR)wcsstr(c1, c2); +} +static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { + return (XMLSTR)wcscpy(c1, c2); +} +static inline FILE *xfopen(XMLCSTR filename, XMLCSTR mode) { + char *filenameAscii = myWideCharToMultiByte(filename); + FILE *f; + if (mode[0] == _CXML('r')) f = fopen(filenameAscii, "rb"); + else f = fopen(filenameAscii, "wb"); + free(filenameAscii); + return f; +} +#else +static inline FILE *xfopen(XMLCSTR filename, XMLCSTR mode) { + return fopen(filename, mode); +} +static inline int xstrlen(XMLCSTR c) { + return strlen(c); +} +static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { + return strncasecmp(c1, c2, l); +} +static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { + return strncmp(c1, c2, l); +} +static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { + return strcasecmp(c1, c2); +} +static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { + return (XMLSTR)strstr(c1, c2); +} +static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { + return (XMLSTR)strcpy(c1, c2); +} +#endif +static inline int _strnicmp(const char *c1, const char *c2, int l) { + return strncasecmp(c1, c2, l); +} #endif @@ -359,35 +467,86 @@ char myIsTextWideChar(const void *b, int len) { return FALSE; } // There are only here as "convenience" functions for the user. // If you don't need them, you can delete them without any trouble. #ifdef _XMLWIDECHAR - #ifdef _XMLWINDOWS - // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0 - char xmltob(XMLCSTR t,int v){ if (t&&(*t)) return (char)_wtoi(t); return v; } - int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return _wtoi(t); return v; } - long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return _wtol(t); return v; } - double xmltof(XMLCSTR t,double v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; } - #else - #ifdef sun - // for CC - #include - char xmltob(XMLCSTR t,int v){ if (t) return (char)wstol(t,NULL,10); return v; } - int xmltoi(XMLCSTR t,int v){ if (t) return (int)wstol(t,NULL,10); return v; } - long xmltol(XMLCSTR t,long v){ if (t) return wstol(t,NULL,10); return v; } - #else - // for gcc - char xmltob(XMLCSTR t,int v){ if (t) return (char)wcstol(t,NULL,10); return v; } - int xmltoi(XMLCSTR t,int v){ if (t) return (int)wcstol(t,NULL,10); return v; } - long xmltol(XMLCSTR t,long v){ if (t) return wcstol(t,NULL,10); return v; } - #endif - double xmltof(XMLCSTR t,double v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; } - #endif +#ifdef _XMLWINDOWS +// for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0 +char xmltob(XMLCSTR t, int v) { + if (t && (*t)) return (char)_wtoi(t); + return v; +} +int xmltoi(XMLCSTR t, int v) { + if (t && (*t)) return _wtoi(t); + return v; +} +long xmltol(XMLCSTR t, long v) { + if (t && (*t)) return _wtol(t); + return v; +} +double xmltof(XMLCSTR t, double v) { + if (t && (*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ + return v; +} #else - char xmltob(XMLCSTR t,char v){ if (t&&(*t)) return (char)atoi(t); return v; } - int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return atoi(t); return v; } - long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return atol(t); return v; } - double xmltof(XMLCSTR t,double v){ if (t&&(*t)) return atof(t); return v; } +#ifdef sun +// for CC +#include +char xmltob(XMLCSTR t, int v) { + if (t) return (char)wstol(t, NULL, 10); + return v; +} +int xmltoi(XMLCSTR t, int v) { + if (t) return (int)wstol(t, NULL, 10); + return v; +} +long xmltol(XMLCSTR t, long v) { + if (t) return wstol(t, NULL, 10); + return v; +} +#else +// for gcc +char xmltob(XMLCSTR t, int v) { + if (t) return (char)wcstol(t, NULL, 10); + return v; +} +int xmltoi(XMLCSTR t, int v) { + if (t) return (int)wcstol(t, NULL, 10); + return v; +} +long xmltol(XMLCSTR t, long v) { + if (t) return wcstol(t, NULL, 10); + return v; +} +#endif +double xmltof(XMLCSTR t, double v) { + if (t && (*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ + return v; +} +#endif +#else +char xmltob(XMLCSTR t, char v) { + if (t && (*t)) return (char)atoi(t); + return v; +} +int xmltoi(XMLCSTR t, int v) { + if (t && (*t)) return atoi(t); + return v; +} +long xmltol(XMLCSTR t, long v) { + if (t && (*t)) return atol(t); + return v; +} +double xmltof(XMLCSTR t, double v) { + if (t && (*t)) return atof(t); + return v; +} #endif -XMLCSTR xmltoa(XMLCSTR t,XMLCSTR v){ if (t) return t; return v; } -XMLCHAR xmltoc(XMLCSTR t,XMLCHAR v){ if (t&&(*t)) return *t; return v; } +XMLCSTR xmltoa(XMLCSTR t, XMLCSTR v) { + if (t) return t; + return v; +} +XMLCHAR xmltoc(XMLCSTR t, XMLCHAR v) { + if (t && (*t)) return *t; + return v; +} ///////////////////////////////////////////////////////////////////////// // the "openFileHelper" function // @@ -395,42 +554,47 @@ XMLCHAR xmltoc(XMLCSTR t,XMLCHAR v){ if (t&&(*t)) return *t; return v; } // Since each application has its own way to report and deal with errors, you should modify & rewrite // the following "openFileHelper" function to get an "error reporting mechanism" tailored to your needs. -XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) -{ +XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) { // guess the value of the global parameter "characterEncoding" // (the guess is based on the first 200 bytes of the file). - FILE *f=xfopen(filename,_CXML("rb")); - if (f) - { + FILE *f = xfopen(filename, _CXML("rb")); + if (f) { char bb[205]; - int l=(int)fread(bb,1,200,f); - setGlobalOptions(guessCharEncoding(bb,l),guessWideCharChars,dropWhiteSpace,removeCommentsInMiddleOfText); + int l = (int)fread(bb, 1, 200, f); + setGlobalOptions(guessCharEncoding(bb, l), guessWideCharChars, + dropWhiteSpace, removeCommentsInMiddleOfText); fclose(f); } // parse the file XMLResults pResults; - XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults); + XMLNode xnode = XMLNode::parseFile(filename, tag, &pResults); // display error message (if any) - if (pResults.error != eXMLErrorNone) - { + if (pResults.error != eXMLErrorNone) { // create message - char message[2000],*s1=(char*)"",*s3=(char*)""; XMLCSTR s2=_CXML(""); - if (pResults.error==eXMLErrorFirstTagNotFound) { s1=(char*)"First Tag should be '"; s2=tag; s3=(char*)"'.\n"; } + char message[2000], *s1 = (char*)"", *s3 = (char*)""; + XMLCSTR s2 = _CXML(""); + if (pResults.error == eXMLErrorFirstTagNotFound) { + s1 = (char*)"First Tag should be '"; + s2 = tag; + s3 = (char*)"'.\n"; + } sprintf(message, #ifdef _XMLWIDECHAR - "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s" + "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s" #else - "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s" + "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s" #endif - ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3); + , filename, XMLNode::getError(pResults.error), pResults.nLine, + pResults.nColumn, s1, s2, s3); // display message #if defined(_XMLWINDOWS) && !defined(UNDER_CE) && !defined(_XMLPARSER_NO_MESSAGEBOX_) - MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST); + MessageBoxA(NULL, message, "XML Parsing error", MB_OK | MB_ICONERROR | + MB_TOPMOST); #else - printf("%s",message); + printf("%s", message); #endif exit(255); } @@ -450,106 +614,101 @@ XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) // This table is used as lookup-table to know the length of a character (in byte) based on the // content of the first byte of the character. // (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ). -static const char XML_utf8ByteTable[256] = -{ +static const char XML_utf8ByteTable[256] = { // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 End of ASCII range - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0 - 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte - 4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x00 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x10 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x20 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x30 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x40 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x50 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x60 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x70 End of ASCII range + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x80 0x80 to 0xc1 invalid + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x90 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xa0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xb0 + 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xc0 0xc2 to 0xdf 2 byte + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xd0 + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,// 0xe0 0xe0 to 0xef 3 byte + 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid }; -static const char XML_legacyByteTable[256] = -{ - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 +static const char XML_legacyByteTable[256] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; -static const char XML_sjisByteTable[256] = -{ +static const char XML_sjisByteTable[256] = { // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 - 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0x9F 2 bytes - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xc0 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xd0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 0xe0 to 0xef 2 bytes - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x00 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x10 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x20 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x30 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x40 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x50 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x60 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x70 + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0x80 0x81 to 0x9F 2 bytes + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0x90 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xa0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xb0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xc0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0xd0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xe0 0xe0 to 0xef 2 bytes + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 }; -static const char XML_gb2312ByteTable[256] = -{ +static const char XML_gb2312ByteTable[256] = { // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90 - 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 0xa1 to 0xf7 2 bytes - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 - 2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1 // 0xf0 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x00 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x10 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x20 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x30 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x40 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x50 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x60 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x70 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x80 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x90 + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xa0 0xa1 to 0xf7 2 bytes + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xb0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xc0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xd0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xe0 + 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 }; -static const char XML_gbk_big5_ByteTable[256] = -{ +static const char XML_gbk_big5_ByteTable[256] = { // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 - 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0xfe 2 bytes - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1 // 0xf0 + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x00 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x10 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x20 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x30 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x40 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x50 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x60 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,// 0x70 + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0x80 0x81 to 0xfe 2 bytes + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0x90 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xa0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xb0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xc0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xd0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,// 0xe0 + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 // 0xf0 }; -static const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "characterEncoding=XMLNode::encoding_UTF8" +// the default is "characterEncoding=XMLNode::encoding_UTF8" +static const char *XML_ByteTable = (const char *)XML_utf8ByteTable; #endif XMLNode XMLNode::emptyXMLNode; -XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL}; -XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL}; +XMLClear XMLNode::emptyXMLClear = { NULL, NULL, NULL}; +XMLAttribute XMLNode::emptyXMLAttribute = { NULL, NULL}; // Enumeration used to decipher what type a token is -typedef enum XMLTokenTypeTag -{ +typedef enum XMLTokenTypeTag { eTokenText = 0, eTokenQuotedText, eTokenTagStart, /* "<" */ @@ -563,8 +722,7 @@ typedef enum XMLTokenTypeTag } XMLTokenType; // Main structure used for parsing XML -typedef struct XML -{ +typedef struct XML { XMLCSTR lpXML; XMLCSTR lpszText; int nIndex,nIndexMissigEndTag; @@ -576,15 +734,13 @@ typedef struct XML int nFirst; } XML; -typedef struct -{ +typedef struct { ALLXMLClearTag *pClr; XMLCSTR pStr; } NextToken; // Enumeration used when parsing attributes -typedef enum Attrib -{ +typedef enum Attrib { eAttribName = 0, eAttribEquals, eAttribValue @@ -592,118 +748,126 @@ typedef enum Attrib // Enumeration used when parsing elements to dictate whether we are currently // inside a tag -typedef enum Status -{ +typedef enum Status { eInsideTag = 0, eOutsideTag } Status; -XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const -{ +XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const { if (!d) return eXMLErrorNone; - FILE *f=xfopen(filename,_CXML("wb")); + FILE *f = xfopen(filename, _CXML("wb")); if (!f) return eXMLErrorCannotOpenWriteFile; #ifdef _XMLWIDECHAR - unsigned char h[2]={ 0xFF, 0xFE }; - if (!fwrite(h,2,1,f)) return eXMLErrorCannotWriteFile; - if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration()))) - { - if (!fwrite(L"\n",sizeof(wchar_t)*40,1,f)) + unsigned char h[2] = { 0xFF, 0xFE }; + if (!fwrite(h, 2, 1, f)) return eXMLErrorCannotWriteFile; + if ((!isDeclaration()) && ((d->lpszName) || + (!getChildNode().isDeclaration()))) { + if (!fwrite(L"\n", + sizeof(wchar_t)*40, 1, f)) return eXMLErrorCannotWriteFile; } #else - if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration()))) - { - if (characterEncoding==char_encoding_UTF8) - { + if ((!isDeclaration()) && ((d->lpszName) || + (!getChildNode().isDeclaration()))) { + if (characterEncoding == char_encoding_UTF8) { // header so that windows recognize the file as UTF-8: - unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile; - encoding="utf-8"; - } else if (characterEncoding==char_encoding_ShiftJIS) encoding="SHIFT-JIS"; - - if (!encoding) encoding="ISO-8859-1"; - if (fprintf(f,"\n",encoding)<0) return eXMLErrorCannotWriteFile; - } else - { - if (characterEncoding==char_encoding_UTF8) - { - unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile; + unsigned char h[3] = {0xEF, 0xBB, 0xBF}; + if (!fwrite(h, 3, 1, f)) return eXMLErrorCannotWriteFile; + encoding = "utf-8"; + } else if (characterEncoding == char_encoding_ShiftJIS) + encoding = "SHIFT-JIS"; + + if (!encoding) encoding = "ISO-8859-1"; + if (fprintf(f, "\n", encoding) + < 0) + return eXMLErrorCannotWriteFile; + } else { + if (characterEncoding == char_encoding_UTF8) { + unsigned char h[3] = {0xEF, 0xBB, 0xBF}; + if (!fwrite(h, 3, 1, f)) return eXMLErrorCannotWriteFile; } } #endif int i; - XMLSTR t=createXMLString(nFormat,&i); - if (!fwrite(t,sizeof(XMLCHAR)*i,1,f)) return eXMLErrorCannotWriteFile; - if (fclose(f)!=0) return eXMLErrorCannotWriteFile; + XMLSTR t = createXMLString(nFormat, &i); + if (!fwrite(t, sizeof(XMLCHAR)*i, 1, f)) return eXMLErrorCannotWriteFile; + if (fclose(f) != 0) return eXMLErrorCannotWriteFile; free(t); return eXMLErrorNone; } // Duplicate a given string. -XMLSTR stringDup(XMLCSTR lpszData, int cbData) -{ - if (lpszData==NULL) return NULL; +XMLSTR stringDup(XMLCSTR lpszData, int cbData) { + if (lpszData == NULL) return NULL; XMLSTR lpszNew; - if (cbData==-1) cbData=(int)xstrlen(lpszData); - lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR)); - if (lpszNew) - { + if (cbData == -1) cbData = (int)xstrlen(lpszData); + lpszNew = (XMLSTR)malloc((cbData + 1) * sizeof(XMLCHAR)); + if (lpszNew) { memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR)); lpszNew[cbData] = (XMLCHAR)NULL; } return lpszNew; } -XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest,XMLCSTR source) -{ - XMLSTR dd=dest; +XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest, XMLCSTR source) { + XMLSTR dd = dest; XMLCHAR ch; XMLCharacterEntity *entity; - while ((ch=*source)) - { - entity=XMLEntities; - do - { - if (ch==entity->c) {xstrcpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; } + while ((ch = *source)) { + entity = XMLEntities; + do { + if (ch == entity->c) { + xstrcpy(dest, entity->s); + dest += entity->l; + source++; + goto out_of_loop1; + } entity++; - } while(entity->s); + } while (entity->s); #ifdef _XMLWIDECHAR - *(dest++)=*(source++); + *(dest++) = *(source++); #else - switch(XML_ByteTable[(unsigned char)ch]) - { - case 4: *(dest++)=*(source++); - case 3: *(dest++)=*(source++); - case 2: *(dest++)=*(source++); - case 1: *(dest++)=*(source++); + switch (XML_ByteTable[(unsigned char)ch]) { + case 4: + *(dest++) = *(source++); + case 3: + *(dest++) = *(source++); + case 2: + *(dest++) = *(source++); + case 1: + *(dest++) = *(source++); } #endif out_of_loop1: ; } - *dest=0; + *dest = 0; return dd; } // private (used while rendering): -int ToXMLStringTool::lengthXMLString(XMLCSTR source) -{ - int r=0; +int ToXMLStringTool::lengthXMLString(XMLCSTR source) { + int r = 0; XMLCharacterEntity *entity; XMLCHAR ch; - while ((ch=*source)) - { - entity=XMLEntities; - do - { - if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; } + while ((ch = *source)) { + entity = XMLEntities; + do { + if (ch == entity->c) { + r += entity->l; + source++; + goto out_of_loop1; + } entity++; - } while(entity->s); + } while (entity->s); #ifdef _XMLWIDECHAR - r++; source++; + r++; + source++; #else - ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch; + ch = XML_ByteTable[(unsigned char)ch]; + r += ch; + source += ch; #endif out_of_loop1: ; @@ -711,18 +875,25 @@ out_of_loop1: return r; } -ToXMLStringTool::~ToXMLStringTool(){ freeBuffer(); } -void ToXMLStringTool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; } -XMLSTR ToXMLStringTool::toXML(XMLCSTR source) -{ - int l=lengthXMLString(source)+1; - if (l>buflen) { buflen=l; buf=(XMLSTR)realloc(buf,l*sizeof(XMLCHAR)); } - return toXMLUnSafe(buf,source); +ToXMLStringTool::~ToXMLStringTool() { + freeBuffer(); +} +void ToXMLStringTool::freeBuffer() { + if (buf) free(buf); + buf = NULL; + buflen = 0; +} +XMLSTR ToXMLStringTool::toXML(XMLCSTR source) { + int l = lengthXMLString(source) + 1; + if (l > buflen) { + buflen = l; + buf = (XMLSTR)realloc(buf, l * sizeof(XMLCHAR)); + } + return toXMLUnSafe(buf, source); } // private: -XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML) -{ +XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML) { // This function is the opposite of the function "toXMLString". It decodes the escape // sequences &, ", ', <, > and replace them by the characters // &,",',<,>. This function is used internally by the XML Parser. All the calls to @@ -732,108 +903,134 @@ XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML) // out: new allocated string converted from xml if (!s) return NULL; - int ll=0,j; + int ll = 0, j; XMLSTR d; - XMLCSTR ss=s; + XMLCSTR ss = s; XMLCharacterEntity *entity; - while ((lo>0)&&(*s)) - { - if (*s==_CXML('&')) - { - if ((lo>2)&&(s[1]==_CXML('#'))) - { - s+=2; lo-=2; - if ((*s==_CXML('X'))||(*s==_CXML('x'))) { s++; lo--; } - while ((*s)&&(*s!=_CXML(';'))&&((lo--)>0)) s++; - if (*s!=_CXML(';')) - { - pXML->error=eXMLErrorUnknownCharacterEntity; + while ((lo > 0) && (*s)) { + if (*s == _CXML('&')) { + if ((lo > 2) && (s[1] == _CXML('#'))) { + s += 2; + lo -= 2; + if ((*s == _CXML('X')) || (*s == _CXML('x'))) { + s++; + lo--; + } + while ((*s) && (*s != _CXML(';')) && ((lo--) > 0)) { + s++; + } + if (*s != _CXML(';')) { + pXML->error = eXMLErrorUnknownCharacterEntity; return NULL; } - s++; lo--; - } else - { - entity=XMLEntities; - do - { - if ((lo>=entity->l)&&(xstrnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; } + s++; + lo--; + } else { + entity = XMLEntities; + do { + if ((lo >= entity->l) && + (xstrnicmp(s, entity->s, entity->l) == 0)) { + s += entity->l; + lo -= entity->l; + break; + } entity++; - } while(entity->s); - if (!entity->s) - { - pXML->error=eXMLErrorUnknownCharacterEntity; + } while (entity->s); + if (!entity->s) { + pXML->error = eXMLErrorUnknownCharacterEntity; return NULL; } } - } else - { + } else { #ifdef _XMLWIDECHAR - s++; lo--; + s++; + lo--; #else - j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1; + j = XML_ByteTable[(unsigned char)*s]; + s += j; + lo -= j; + ll += j - 1; #endif } ll++; } - d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR)); - s=d; - while (ll-->0) - { - if (*ss==_CXML('&')) - { - if (ss[1]==_CXML('#')) - { - ss+=2; j=0; - if ((*ss==_CXML('X'))||(*ss==_CXML('x'))) - { + d = (XMLSTR)malloc((ll + 1) * sizeof(XMLCHAR)); + s = d; + while (ll-- > 0) { + if (*ss == _CXML('&')) { + if (ss[1] == _CXML('#')) { + ss += 2; + j = 0; + if ((*ss == _CXML('X')) || (*ss == _CXML('x'))) { ss++; - while (*ss!=_CXML(';')) - { - if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j<<4)+*ss-_CXML('0'); - else if ((*ss>=_CXML('A'))&&(*ss<=_CXML('F'))) j=(j<<4)+*ss-_CXML('A')+10; - else if ((*ss>=_CXML('a'))&&(*ss<=_CXML('f'))) j=(j<<4)+*ss-_CXML('a')+10; - else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;} + while (*ss != _CXML(';')) { + if ((*ss >= _CXML('0')) && (*ss <= _CXML('9'))) { + j = (j << 4) + *ss - _CXML('0'); + } else if ((*ss >= _CXML('A')) && (*ss <= _CXML('F'))) { + j = (j << 4) + *ss - _CXML('A') + 10; + } else if ((*ss >= _CXML('a')) && (*ss <= _CXML('f'))) { + j = (j << 4) + *ss - _CXML('a') + 10; + } else { + free((void*)s); + pXML->error = eXMLErrorUnknownCharacterEntity; + return NULL; + } ss++; } - } else - { - while (*ss!=_CXML(';')) - { - if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j*10)+*ss-_CXML('0'); - else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;} + } else { + while (*ss != _CXML(';')) { + if ((*ss >= _CXML('0')) && (*ss <= _CXML('9'))) { + j = (j * 10) + *ss - _CXML('0'); + } else { + free((void*)s); + pXML->error = eXMLErrorUnknownCharacterEntity; + return NULL; + } ss++; } } #ifndef _XMLWIDECHAR - if (j>255) { free((void*)s); pXML->error=eXMLErrorCharacterCodeAbove255;return NULL;} + if (j > 255) { + free((void*)s); + pXML->error = eXMLErrorCharacterCodeAbove255; + return NULL; + } #endif - (*d++)=(XMLCHAR)j; ss++; - } else - { - entity=XMLEntities; - do - { - if (xstrnicmp(ss,entity->s,entity->l)==0) { *(d++)=entity->c; ss+=entity->l; break; } + (*d++) = (XMLCHAR)j; + ss++; + } else { + entity = XMLEntities; + do { + if (xstrnicmp(ss, entity->s, entity->l) == 0) { + *(d++) = entity->c; + ss += entity->l; + break; + } entity++; - } while(entity->s); + } while (entity->s); } - } else - { + } else { #ifdef _XMLWIDECHAR - *(d++)=*(ss++); + *(d++) = *(ss++); #else - switch(XML_ByteTable[(unsigned char)*ss]) - { - case 4: *(d++)=*(ss++); ll--; - case 3: *(d++)=*(ss++); ll--; - case 2: *(d++)=*(ss++); ll--; - case 1: *(d++)=*(ss++); + switch (XML_ByteTable[(unsigned char)*ss]) { + case 4: + *(d++) = *(ss++); + ll--; + case 3: + *(d++) = *(ss++); + ll--; + case 2: + *(d++) = *(ss++); + ll--; + case 1: + *(d++) = *(ss++); } #endif } } - *d=0; + *d = 0; return (XMLSTR)s; } @@ -846,66 +1043,64 @@ char myTagCompare(XMLCSTR cclose, XMLCSTR copen) // return 1 if different { if (!cclose) return 1; - int l=(int)xstrlen(cclose); - if (xstrnicmp(cclose, copen, l)!=0) return 1; - const XMLCHAR c=copen[l]; - if (XML_isSPACECHAR(c)|| - (c==_CXML('/' ))|| - (c==_CXML('<' ))|| - (c==_CXML('>' ))|| - (c==_CXML('=' ))) return 0; + int l = (int)xstrlen(cclose); + if (xstrnicmp(cclose, copen, l) != 0) return 1; + const XMLCHAR c = copen[l]; + if (XML_isSPACECHAR(c) || + (c == _CXML('/' )) || + (c == _CXML('<' )) || + (c == _CXML('>' )) || + (c == _CXML('=' ))) return 0; return 1; } // Obtain the next character from the string. -static inline XMLCHAR getNextChar(XML *pXML) -{ +static inline XMLCHAR getNextChar(XML *pXML) { XMLCHAR ch = pXML->lpXML[pXML->nIndex]; #ifdef _XMLWIDECHAR - if (ch!=0) pXML->nIndex++; + if (ch != 0) pXML->nIndex++; #else - pXML->nIndex+=XML_ByteTable[(unsigned char)ch]; + pXML->nIndex += XML_ByteTable[(unsigned char)ch]; #endif return ch; } // Find the next token in a string. // pcbToken contains the number of characters that have been read. -static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType) -{ +static NextToken GetNextToken(XML *pXML, int *pcbToken, + enum XMLTokenTypeTag *pType) { NextToken result; XMLCHAR ch; XMLCHAR chTemp; - int indexStart,nFoundMatch,nIsText=FALSE; - result.pClr=NULL; // prevent warning + int indexStart, nFoundMatch, nIsText = FALSE; + result.pClr = NULL; // prevent warning // Find next non-white space character - do { indexStart=pXML->nIndex; ch=getNextChar(pXML); } while XML_isSPACECHAR(ch); + do { + indexStart = pXML->nIndex; + ch = getNextChar(pXML); + } while XML_isSPACECHAR(ch); - if (ch) - { + if (ch) { // Cache the current string pointer result.pStr = &pXML->lpXML[indexStart]; // First check whether the token is in the clear tag list (meaning it // does not need formatting). - ALLXMLClearTag *ctag=XMLClearTags; - do - { - if (xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen)==0) - { - result.pClr=ctag; - pXML->nIndex+=ctag->openTagLen-1; - *pType=eTokenClear; + ALLXMLClearTag *ctag = XMLClearTags; + do { + if (xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen) == 0) { + result.pClr = ctag; + pXML->nIndex += ctag->openTagLen - 1; + *pType = eTokenClear; return result; } ctag++; - } while(ctag->lpszOpen); + } while (ctag->lpszOpen); // If we didn't find a clear tag then check for standard tokens - switch(ch) - { - // Check for quotes + switch (ch) { + // Check for quotes case _CXML('\''): case _CXML('\"'): // Type of token @@ -916,17 +1111,20 @@ static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pT nFoundMatch = FALSE; // Search through the string to find a matching quote - while((ch = getNextChar(pXML))) - { - if (ch==chTemp) { nFoundMatch = TRUE; break; } - if (ch==_CXML('<')) break; + while ((ch = getNextChar(pXML))) { + if (ch == chTemp) { + nFoundMatch = TRUE; + break; + } + if (ch == _CXML('<')) { + break; + } } // If we failed to find a matching quote - if (nFoundMatch == FALSE) - { - pXML->nIndex=indexStart+1; - nIsText=TRUE; + if (nFoundMatch == FALSE) { + pXML->nIndex = indexStart + 1; + nIsText = TRUE; break; } @@ -935,17 +1133,17 @@ static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pT break; - // Equals (used with attribute values) + // Equals (used with attribute values) case _CXML('='): *pType = eTokenEquals; break; - // Close tag + // Close tag case _CXML('>'): *pType = eTokenCloseTag; break; - // Check for tag start and tag end + // Check for tag start and tag end case _CXML('<'): // Peek at the next character to see if we have an end tag 'lpXML[pXML->nIndex]; // If we have a tag end... - if (chTemp == _CXML('/')) - { + if (chTemp == _CXML('/')) { // Set the type and ensure we point at the next character getNextChar(pXML); *pType = eTokenTagEnd; } // If we have an XML declaration tag - else if (chTemp == _CXML('?')) - { + else if (chTemp == _CXML('?')) { // Set the type and ensure we point at the next character getNextChar(pXML); @@ -970,21 +1166,19 @@ static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pT } // Otherwise we must have a start tag - else - { + else { *pType = eTokenTagStart; } break; - // Check to see if we have a short hand type end tag ('/>'). + // Check to see if we have a short hand type end tag ('/>'). case _CXML('/'): // Peek at the next character to see if we have a short end tag '/>' chTemp = pXML->lpXML[pXML->nIndex]; // If we have a short hand end tag... - if (chTemp == _CXML('>')) - { + if (chTemp == _CXML('>')) { // Set the type and ensure we point at the next character getNextChar(pXML); *pType = eTokenShortHandClose; @@ -994,65 +1188,69 @@ static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pT // If we haven't found a short hand closing tag then drop into the // text process - // Other characters + // Other characters default: nIsText = TRUE; } // If this is a TEXT node - if (nIsText) - { + if (nIsText) { // Indicate we are dealing with text *pType = eTokenText; - while((ch = getNextChar(pXML))) - { - if XML_isSPACECHAR(ch) - { - indexStart++; break; - - } else if (ch==_CXML('/')) - { + while ((ch = getNextChar(pXML))) { + if XML_isSPACECHAR(ch) { + indexStart++; + break; + + } else if (ch == _CXML('/')) { // If we find a slash then this maybe text or a short hand end tag // Peek at the next character to see it we have short hand end tag - ch=pXML->lpXML[pXML->nIndex]; + ch = pXML->lpXML[pXML->nIndex]; // If we found a short hand end tag then we need to exit the loop - if (ch==_CXML('>')) { pXML->nIndex--; break; } + if (ch == _CXML('>')) { + pXML->nIndex--; + break; + } - } else if ((ch==_CXML('<'))||(ch==_CXML('>'))||(ch==_CXML('='))) - { - pXML->nIndex--; break; + } else if ((ch == _CXML('<')) || (ch == _CXML('>')) || + (ch == _CXML('='))) { + pXML->nIndex--; + break; } } } - *pcbToken = pXML->nIndex-indexStart; - } else - { + *pcbToken = pXML->nIndex - indexStart; + } else { // If we failed to obtain a valid character *pcbToken = 0; *pType = eTokenError; - result.pStr=NULL; + result.pStr = NULL; } return result; } -XMLCSTR XMLNode::updateName_WOSD(XMLSTR lpszName) -{ - if (!d) { free(lpszName); return NULL; } - if (d->lpszName&&(lpszName!=d->lpszName)) free((void*)d->lpszName); - d->lpszName=lpszName; +XMLCSTR XMLNode::updateName_WOSD(XMLSTR lpszName) { + if (!d) { + free(lpszName); + return NULL; + } + if (d->lpszName && (lpszName != d->lpszName)) free((void*)d->lpszName); + d->lpszName = lpszName; return lpszName; } // private: -XMLNode::XMLNode(struct XMLNodeDataTag *p){ d=p; (p->ref_count)++; } -XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration) -{ - d=(XMLNodeData*)malloc(sizeof(XMLNodeData)); - d->ref_count=1; +XMLNode::XMLNode(struct XMLNodeDataTag *p) { + d = p; + (p->ref_count)++; +} +XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration) { + d = (XMLNodeData*)malloc(sizeof(XMLNodeData)); + d->ref_count = 1; - d->lpszName=NULL; - d->nChild= 0; + d->lpszName = NULL; + d->nChild = 0; d->nText = 0; d->nClear = 0; d->nAttribute = 0; @@ -1060,25 +1258,35 @@ XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration) d->isDeclaration = isDeclaration; d->pParent = pParent; - d->pChild= NULL; - d->pText= NULL; - d->pClear= NULL; - d->pAttribute= NULL; - d->pOrder= NULL; + d->pChild = NULL; + d->pText = NULL; + d->pClear = NULL; + d->pAttribute = NULL; + d->pOrder = NULL; updateName_WOSD(lpszName); } -XMLNode XMLNode::createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration) { return XMLNode(NULL,lpszName,isDeclaration); } -XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) { return XMLNode(NULL,stringDup(lpszName),isDeclaration); } +XMLNode XMLNode::createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration) { + return XMLNode(NULL, lpszName, isDeclaration); +} +XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) { + return XMLNode(NULL, stringDup(lpszName), isDeclaration); +} #define MEMORYINCREASE 50 -static inline void myFree(void *p) { if (p) free(p); } -static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem) -{ - if (p==NULL) { if (memInc) return malloc(memInc*sizeofElem); return malloc(sizeofElem); } - if ((memInc==0)||((newsize%memInc)==0)) p=realloc(p,(newsize+memInc)*sizeofElem); +static inline void myFree(void *p) { + if (p) free(p); +} +static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem) { + if (p == NULL) { + if (memInc) return malloc(memInc*sizeofElem); + return malloc(sizeofElem); + } + if ((memInc == 0) || ((newsize % memInc) == 0)) { + p = realloc(p, (newsize + memInc) * sizeofElem); + } // if (!p) // { // printf("XMLParser Error: Not enough memory! Aborting...\n"); exit(220); @@ -1087,20 +1295,23 @@ static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem) } // private: -XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index, XMLElementType xxtype) -{ - if (index<0) return -1; - int i=0,j=(int)((index<<2)+xxtype),*o=d->pOrder; while (o[i]!=j) i++; return i; +XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index, + XMLElementType xxtype) { + if (index < 0) return -1; + int i = 0, j = (int)((index << 2) + xxtype), *o = d->pOrder; + while (o[i] != j) i++; + return i; } // private: // update "order" information when deleting a content of a XMLNode -int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index) -{ - int n=d->nChild+d->nText+d->nClear, *o=d->pOrder,i=findPosition(d,index,t); - memmove(o+i, o+i+1, (n-i)*sizeof(int)); - for (;inChild + d->nText + d->nClear; + int *o = d->pOrder; + int i = findPosition(d, index, t); + memmove(o + i, o + i + 1, (n - i)*sizeof(int)); + for (; i < n; i++) + if ((o[i]&3) == (int)t) o[i] -= 4; // We should normally do: // d->pOrder=(int)realloc(d->pOrder,n*sizeof(int)); // but we skip reallocation because it's too time consuming. @@ -1108,51 +1319,67 @@ int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index) return i; } -void *XMLNode::addToOrder(int memoryIncrease,int *_pos, int nc, void *p, int size, XMLElementType xtype) -{ +void *XMLNode::addToOrder(int memoryIncrease, int *_pos, int nc, void *p, + int size, XMLElementType xtype) { // in: *_pos is the position inside d->pOrder ("-1" means "EndOf") // out: *_pos is the index inside p - p=myRealloc(p,(nc+1),memoryIncrease,size); - int n=d->nChild+d->nText+d->nClear; - d->pOrder=(int*)myRealloc(d->pOrder,n+1,memoryIncrease*3,sizeof(int)); - int pos=*_pos,*o=d->pOrder; - - if ((pos<0)||(pos>=n)) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; } + p = myRealloc(p, (nc + 1), memoryIncrease, size); + int n = d->nChild + d->nText + d->nClear; + d->pOrder = (int*)myRealloc(d->pOrder, n + 1, memoryIncrease * 3, + sizeof(int)); + int pos = *_pos, *o = d->pOrder; + + if ((pos < 0) || (pos >= n)) { + *_pos = nc; + o[n] = (int)((nc << 2) + xtype); + return p; + } - int i=pos; - memmove(o+i+1, o+i, (n-i)*sizeof(int)); + int i = pos; + memmove(o + i + 1, o + i, (n - i)*sizeof(int)); - while ((pos>2; - memmove(((char*)p)+(pos+1)*size,((char*)p)+pos*size,(nc-pos)*size); + *_pos = pos = o[pos] >> 2; + memmove(((char*)p) + (pos + 1)*size, ((char*)p) + pos*size, (nc - pos)*size); return p; } // Add a child node to the given element. -XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, char isDeclaration, int pos) -{ +XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, + char isDeclaration, int pos) { if (!lpszName) return emptyXMLNode; - d->pChild=(XMLNode*)addToOrder(memoryIncrease,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild); - d->pChild[pos].d=NULL; - d->pChild[pos]=XMLNode(d,lpszName,isDeclaration); + d->pChild = (XMLNode*)addToOrder(memoryIncrease, &pos, d->nChild, + d->pChild, sizeof(XMLNode), eNodeChild); + d->pChild[pos].d = NULL; + d->pChild[pos] = XMLNode(d, lpszName, isDeclaration); d->nChild++; return d->pChild[pos]; } // Add an attribute to an element. -XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease,XMLSTR lpszName, XMLSTR lpszValuev) -{ +XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease, XMLSTR lpszName, + XMLSTR lpszValuev) { if (!lpszName) return &emptyXMLAttribute; - if (!d) { myFree(lpszName); myFree(lpszValuev); return &emptyXMLAttribute; } - int nc=d->nAttribute; - d->pAttribute=(XMLAttribute*)myRealloc(d->pAttribute,(nc+1),memoryIncrease,sizeof(XMLAttribute)); - XMLAttribute *pAttr=d->pAttribute+nc; + if (!d) { + myFree(lpszName); + myFree(lpszValuev); + return &emptyXMLAttribute; + } + int nc = d->nAttribute; + d->pAttribute = (XMLAttribute*)myRealloc(d->pAttribute, (nc + 1), + memoryIncrease, + sizeof(XMLAttribute)); + XMLAttribute *pAttr = d->pAttribute + nc; pAttr->lpszName = lpszName; pAttr->lpszValue = lpszValuev; d->nAttribute++; @@ -1160,26 +1387,35 @@ XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease,XMLSTR lpszName, XML } // Add text to the element. -XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos) -{ +XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos) { if (!lpszValue) return NULL; - if (!d) { myFree(lpszValue); return NULL; } - d->pText=(XMLCSTR*)addToOrder(memoryIncrease,&pos,d->nText,d->pText,sizeof(XMLSTR),eNodeText); - d->pText[pos]=lpszValue; + if (!d) { + myFree(lpszValue); + return NULL; + } + d->pText = (XMLCSTR*)addToOrder(memoryIncrease, &pos, d->nText, d->pText, + sizeof(XMLSTR), eNodeText); + d->pText[pos] = lpszValue; d->nText++; return lpszValue; } // Add clear (unformatted) text to the element. -XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, int pos) -{ +XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, + XMLCSTR lpszOpen, XMLCSTR lpszClose, + int pos) { if (!lpszValue) return &emptyXMLClear; - if (!d) { myFree(lpszValue); return &emptyXMLClear; } - d->pClear=(XMLClear *)addToOrder(memoryIncrease,&pos,d->nClear,d->pClear,sizeof(XMLClear),eNodeClear); - XMLClear *pNewClear=d->pClear+pos; + if (!d) { + myFree(lpszValue); + return &emptyXMLClear; + } + d->pClear = (XMLClear *)addToOrder(memoryIncrease, &pos, d->nClear, + d->pClear, sizeof(XMLClear), + eNodeClear); + XMLClear *pNewClear = d->pClear + pos; pNewClear->lpszValue = lpszValue; - if (!lpszOpen) lpszOpen=XMLClearTags->lpszOpen; - if (!lpszClose) lpszClose=XMLClearTags->lpszClose; + if (!lpszOpen) lpszOpen = XMLClearTags->lpszOpen; + if (!lpszClose) lpszClose = XMLClearTags->lpszClose; pNewClear->lpszOpenTag = lpszOpen; pNewClear->lpszCloseTag = lpszClose; d->nClear++; @@ -1188,41 +1424,44 @@ XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, XMLCSTR l // private: // Parse a clear (unformatted) type node. -char XMLNode::parseClearTag(void *px, void *_pClear) -{ - XML *pXML=(XML *)px; - ALLXMLClearTag pClear=*((ALLXMLClearTag*)_pClear); - int cbTemp=0; - XMLCSTR lpszTemp=NULL; - XMLCSTR lpXML=&pXML->lpXML[pXML->nIndex]; - static XMLCSTR docTypeEnd=_CXML("]>"); +char XMLNode::parseClearTag(void *px, void *_pClear) { + XML *pXML = (XML *)px; + ALLXMLClearTag pClear = *((ALLXMLClearTag*)_pClear); + int cbTemp = 0; + XMLCSTR lpszTemp = NULL; + XMLCSTR lpXML = &pXML->lpXML[pXML->nIndex]; + static XMLCSTR docTypeEnd = _CXML("]>"); // Find the closing tag // Seems the ')) { lpszTemp=pCh; break; } + if (pClear.lpszOpen == XMLClearTags[1].lpszOpen) { + XMLCSTR pCh = lpXML; + while (*pCh) { + if (*pCh == _CXML('<')) { + pClear.lpszClose = docTypeEnd; + lpszTemp = xstrstr(lpXML, docTypeEnd); + break; + } else if (*pCh == _CXML('>')) { + lpszTemp = pCh; + break; + } #ifdef _XMLWIDECHAR pCh++; #else - pCh+=XML_ByteTable[(unsigned char)(*pCh)]; + pCh += XML_ByteTable[(unsigned char)(*pCh)]; #endif } - } else lpszTemp=xstrstr(lpXML, pClear.lpszClose); + } else lpszTemp = xstrstr(lpXML, pClear.lpszClose); - if (lpszTemp) - { + if (lpszTemp) { // Cache the size and increment the index cbTemp = (int)(lpszTemp - lpXML); - pXML->nIndex += cbTemp+(int)xstrlen(pClear.lpszClose); + pXML->nIndex += cbTemp + (int)xstrlen(pClear.lpszClose); // Add the clear node to the current element - addClear_priv(MEMORYINCREASE,stringDup(lpXML,cbTemp), pClear.lpszOpen, pClear.lpszClose,-1); + addClear_priv(MEMORYINCREASE, stringDup(lpXML, cbTemp), + pClear.lpszOpen, pClear.lpszClose, -1); return 0; } @@ -1231,63 +1470,81 @@ char XMLNode::parseClearTag(void *px, void *_pClear) return 1; } -void XMLNode::exactMemory(XMLNodeData *d) -{ - if (d->pOrder) d->pOrder=(int*)realloc(d->pOrder,(d->nChild+d->nText+d->nClear)*sizeof(int)); - if (d->pChild) d->pChild=(XMLNode*)realloc(d->pChild,d->nChild*sizeof(XMLNode)); - if (d->pAttribute) d->pAttribute=(XMLAttribute*)realloc(d->pAttribute,d->nAttribute*sizeof(XMLAttribute)); - if (d->pText) d->pText=(XMLCSTR*)realloc(d->pText,d->nText*sizeof(XMLSTR)); - if (d->pClear) d->pClear=(XMLClear *)realloc(d->pClear,d->nClear*sizeof(XMLClear)); +void XMLNode::exactMemory(XMLNodeData *d) { + if (d->pOrder) { + d->pOrder = (int*)realloc(d->pOrder, (d->nChild + d->nText + d->nClear) + * sizeof(int)); + } + if (d->pChild) { + d->pChild = (XMLNode*)realloc(d->pChild, d->nChild * sizeof(XMLNode)); + } + if (d->pAttribute) { + d->pAttribute = (XMLAttribute*)realloc(d->pAttribute, d->nAttribute * + sizeof(XMLAttribute)); + } + if (d->pText) { + d->pText = (XMLCSTR*)realloc(d->pText, d->nText * sizeof(XMLSTR)); + } + if (d->pClear) { + d->pClear = (XMLClear *)realloc(d->pClear, d->nClear * sizeof(XMLClear)); + } } -char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr) -{ - XML *pXML=(XML *)pa; - XMLCSTR lpszText=pXML->lpszText; +char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr) { + XML *pXML = (XML *)pa; + XMLCSTR lpszText = pXML->lpszText; if (!lpszText) return 0; - if (dropWhiteSpace) while (XML_isSPACECHAR(*lpszText)&&(lpszText!=tokenPStr)) lpszText++; + if (dropWhiteSpace) while (XML_isSPACECHAR(*lpszText) && + (lpszText != tokenPStr)) lpszText++; int cbText = (int)(tokenPStr - lpszText); - if (!cbText) { pXML->lpszText=NULL; return 0; } - if (dropWhiteSpace) { cbText--; while ((cbText)&&XML_isSPACECHAR(lpszText[cbText])) cbText--; cbText++; } - if (!cbText) { pXML->lpszText=NULL; return 0; } - XMLSTR lpt=fromXMLString(lpszText,cbText,pXML); + if (!cbText) { + pXML->lpszText = NULL; + return 0; + } + if (dropWhiteSpace) { + cbText--; + while ((cbText) && XML_isSPACECHAR(lpszText[cbText])) cbText--; + cbText++; + } + if (!cbText) { + pXML->lpszText = NULL; + return 0; + } + XMLSTR lpt = fromXMLString(lpszText, cbText, pXML); if (!lpt) return 1; - pXML->lpszText=NULL; - if (removeCommentsInMiddleOfText && d->nText && d->nClear) - { + pXML->lpszText = NULL; + if (removeCommentsInMiddleOfText && d->nText && d->nClear) { // if the previous insertion was a comment () AND // if the previous previous insertion was a text then, delete the comment and append the text - int n=d->nChild+d->nText+d->nClear-1,*o=d->pOrder; - if (((o[n]&3)==eNodeClear)&&((o[n-1]&3)==eNodeText)) - { - int i=o[n]>>2; - if (d->pClear[i].lpszOpenTag==XMLClearTags[2].lpszOpen) - { + int n = d->nChild + d->nText + d->nClear - 1, *o = d->pOrder; + if (((o[n]&3) == eNodeClear) && ((o[n-1]&3) == eNodeText)) { + int i = o[n] >> 2; + if (d->pClear[i].lpszOpenTag == XMLClearTags[2].lpszOpen) { deleteClear(i); - i=o[n-1]>>2; - n=xstrlen(d->pText[i]); - int n2=xstrlen(lpt)+1; - d->pText[i]=(XMLSTR)realloc((void*)d->pText[i],(n+n2)*sizeof(XMLCHAR)); + i = o[n-1] >> 2; + n = xstrlen(d->pText[i]); + int n2 = xstrlen(lpt) + 1; + d->pText[i] = (XMLSTR)realloc((void*)d->pText[i], (n + n2) * + sizeof(XMLCHAR)); if (!d->pText[i]) return 1; - memcpy((void*)(d->pText[i]+n),lpt,n2*sizeof(XMLCHAR)); + memcpy((void*)(d->pText[i] + n), lpt, n2*sizeof(XMLCHAR)); free(lpt); return 0; } } } - addText_priv(MEMORYINCREASE,lpt,-1); + addText_priv(MEMORYINCREASE, lpt, -1); return 0; } // private: // Recursively parse an XML element. -int XMLNode::ParseXMLElement(void *pa) -{ - XML *pXML=(XML *)pa; +int XMLNode::ParseXMLElement(void *pa) { + XML *pXML = (XML *)pa; int cbToken; enum XMLTokenTypeTag xtype; NextToken token; - XMLCSTR lpszTemp=NULL; - int cbTemp=0; + XMLCSTR lpszTemp = NULL; + int cbTemp = 0; char nDeclaration; XMLNode pNew; enum Status status; // inside or outside a tag @@ -1296,36 +1553,30 @@ int XMLNode::ParseXMLElement(void *pa) assert(pXML); // If this is the first call to the function - if (pXML->nFirst) - { + if (pXML->nFirst) { // Assume we are outside of a tag definition pXML->nFirst = FALSE; status = eOutsideTag; - } else - { + } else { // If this is not the first call then we should only be called when inside a tag. status = eInsideTag; } // Iterate through the tokens in the document - for(;;) - { + for (;;) { // Obtain the next token token = GetNextToken(pXML, &cbToken, &xtype); - if (xtype != eTokenError) - { + if (xtype != eTokenError) { // Check the current status - switch(status) - { + switch (status) { - // If we are outside of a tag definition + // If we are outside of a tag definition case eOutsideTag: // Check what type of token we obtained - switch(xtype) - { - // If we have found text or quoted text + switch (xtype) { + // If we have found text or quoted text case eTokenText: case eTokenCloseTag: /* '>' */ case eTokenShortHandClose: /* '/>' */ @@ -1333,7 +1584,7 @@ int XMLNode::ParseXMLElement(void *pa) case eTokenEquals: break; - // If we found a start tag '<' and declarations 'error = eXMLErrorMissingTagName; return FALSE; } @@ -1359,8 +1609,7 @@ int XMLNode::ParseXMLElement(void *pa) #ifdef APPROXIMATE_PARSING if (d->lpszName && - myTagCompare(d->lpszName, token.pStr) == 0) - { + myTagCompare(d->lpszName, token.pStr) == 0) { // Indicate to the caller that it needs to create a // new element. pXML->lpNewElement = token.pStr; @@ -1372,30 +1621,28 @@ int XMLNode::ParseXMLElement(void *pa) // If the name of the new element differs from the name of // the current element we need to add the new element to // the current one and recurse - pNew = addChild_priv(MEMORYINCREASE,stringDup(token.pStr,cbToken), nDeclaration,-1); + pNew = addChild_priv(MEMORYINCREASE, + stringDup(token.pStr, cbToken), + nDeclaration, -1); - while (!pNew.isEmpty()) - { + while (!pNew.isEmpty()) { // Callself to process the new node. If we return // FALSE this means we dont have any more // processing to do... if (!pNew.ParseXMLElement(pXML)) return FALSE; - else - { + else { // If the call to recurse this function // evented in a end tag specified in XML then // we need to unwind the calls to this // function until we find the appropriate node // (the element name and end tag name must // match) - if (pXML->cbEndTag) - { + if (pXML->cbEndTag) { // If we are back at the root node then we // have an unmatched end tag - if (!d->lpszName) - { - pXML->error=eXMLErrorUnmatchedEndTag; + if (!d->lpszName) { + pXML->error = eXMLErrorUnmatchedEndTag; return FALSE; } @@ -1403,55 +1650,56 @@ int XMLNode::ParseXMLElement(void *pa) // element then we only need to unwind // once more... - if (myTagCompare(d->lpszName, pXML->lpEndTag)==0) - { + if (myTagCompare(d->lpszName, + pXML->lpEndTag) == 0) { pXML->cbEndTag = 0; } return TRUE; - } else - if (pXML->cbNewElement) - { - // If the call indicated a new element is to - // be created on THIS element. - - // If the name of this element matches the - // name of the element we need to create - // then we need to return to the caller - // and let it process the element. - - if (myTagCompare(d->lpszName, pXML->lpNewElement)==0) - { - return TRUE; - } - - // Add the new element and recurse - pNew = addChild_priv(MEMORYINCREASE,stringDup(pXML->lpNewElement,pXML->cbNewElement),0,-1); - pXML->cbNewElement = 0; + } else if (pXML->cbNewElement) { + // If the call indicated a new element is to + // be created on THIS element. + + // If the name of this element matches the + // name of the element we need to create + // then we need to return to the caller + // and let it process the element. + + if (myTagCompare(d->lpszName, + pXML->lpNewElement) == 0) { + return TRUE; } - else - { - // If we didn't have a new element to create - pNew = emptyXMLNode; - } + // Add the new element and recurse + pNew = + addChild_priv(MEMORYINCREASE, + stringDup(pXML-> + lpNewElement, + pXML-> + cbNewElement), + 0, -1); + pXML->cbNewElement = 0; + } else { + // If we didn't have a new element to create + pNew = emptyXMLNode; + + } } } } break; - // If we found an end tag + // If we found an end tag case eTokenTagEnd: // If we have node text then add this to the element - if (maybeAddTxT(pXML,token.pStr)) return FALSE; + if (maybeAddTxT(pXML, token.pStr)) return FALSE; // Find the name of the end tag token = GetNextToken(pXML, &cbTemp, &xtype); // The end tag should be text - if (xtype != eTokenText) - { + if (xtype != eTokenText) { pXML->error = eXMLErrorMissingEndTagName; return FALSE; } @@ -1459,12 +1707,11 @@ int XMLNode::ParseXMLElement(void *pa) // After the end tag we should find a closing tag token = GetNextToken(pXML, &cbToken, &xtype); - if (xtype != eTokenCloseTag) - { + if (xtype != eTokenCloseTag) { pXML->error = eXMLErrorMissingEndTagName; return FALSE; } - pXML->lpszText=pXML->lpXML+pXML->nIndex; + pXML->lpszText = pXML->lpXML + pXML->nIndex; // We need to return to the previous caller. If the name // of the tag cannot be found we need to keep returning to @@ -1472,14 +1719,14 @@ int XMLNode::ParseXMLElement(void *pa) if (myTagCompare(d->lpszName, lpszTemp) != 0) #ifdef STRICT_PARSING { - pXML->error=eXMLErrorUnmatchedEndTag; - pXML->nIndexMissigEndTag=pXML->nIndex; + pXML->error = eXMLErrorUnmatchedEndTag; + pXML->nIndexMissigEndTag = pXML->nIndex; return FALSE; } #else { - pXML->error=eXMLErrorMissingEndTag; - pXML->nIndexMissigEndTag=pXML->nIndex; + pXML->error = eXMLErrorMissingEndTag; + pXML->nIndexMissigEndTag = pXML->nIndex; pXML->lpEndTag = lpszTemp; pXML->cbEndTag = cbTemp; } @@ -1489,12 +1736,12 @@ int XMLNode::ParseXMLElement(void *pa) exactMemory(d); return TRUE; - // If we found a clear (unformatted) token + // If we found a clear (unformatted) token case eTokenClear: // If we have node text then add this to the element - if (maybeAddTxT(pXML,token.pStr)) return FALSE; + if (maybeAddTxT(pXML, token.pStr)) return FALSE; if (parseClearTag(pXML, token.pClr)) return FALSE; - pXML->lpszText=pXML->lpXML+pXML->nIndex; + pXML->lpszText = pXML->lpXML + pXML->nIndex; break; default: @@ -1502,21 +1749,19 @@ int XMLNode::ParseXMLElement(void *pa) } break; - // If we are inside a tag definition we need to search for attributes + // If we are inside a tag definition we need to search for attributes case eInsideTag: // Check what part of the attribute (name, equals, value) we // are looking for. - switch(attrib) - { - // If we are looking for a new attribute + switch (attrib) { + // If we are looking for a new attribute case eAttribName: // Check what the current token type is - switch(xtype) - { - // If the current type is text... - // Eg. 'attribute' + switch (xtype) { + // If the current type is text... + // Eg. 'attribute' case eTokenText: // Cache the token then indicate that we are next to // look for the equals @@ -1525,22 +1770,22 @@ int XMLNode::ParseXMLElement(void *pa) attrib = eAttribEquals; break; - // If we found a closing tag... - // Eg. '>' + // If we found a closing tag... + // Eg. '>' case eTokenCloseTag: // We are now outside the tag status = eOutsideTag; - pXML->lpszText=pXML->lpXML+pXML->nIndex; + pXML->lpszText = pXML->lpXML + pXML->nIndex; break; - // If we found a short hand '/>' closing tag then we can - // return to the caller + // If we found a short hand '/>' closing tag then we can + // return to the caller case eTokenShortHandClose: exactMemory(d); - pXML->lpszText=pXML->lpXML+pXML->nIndex; + pXML->lpszText = pXML->lpXML + pXML->nIndex; return TRUE; - // Errors... + // Errors... case eTokenQuotedText: /* '"SomeText"' */ case eTokenTagStart: /* '<' */ case eTokenTagEnd: /* 'error = eXMLErrorUnexpectedToken; return FALSE; - default: break; + default: + break; } break; - // If we are looking for an equals + // If we are looking for an equals case eAttribEquals: // Check what the current token type is - switch(xtype) - { - // If the current type is text... - // Eg. 'Attribute AnotherAttribute' + switch (xtype) { + // If the current type is text... + // Eg. 'Attribute AnotherAttribute' case eTokenText: // Add the unvalued attribute to the list - addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp), NULL); + addAttribute_priv(MEMORYINCREASE, + stringDup(lpszTemp, cbTemp), NULL); // Cache the token then indicate. We are next to // look for the equals attribute lpszTemp = token.pStr; cbTemp = cbToken; break; - // If we found a closing tag 'Attribute >' or a short hand - // closing tag 'Attribute />' + // If we found a closing tag 'Attribute >' or a short hand + // closing tag 'Attribute />' case eTokenShortHandClose: case eTokenCloseTag: // If we are a declaration element 'lpszText=pXML->lpXML+pXML->nIndex; + pXML->lpszText = pXML->lpXML + pXML->nIndex; if (d->isDeclaration && - (lpszTemp[cbTemp-1]) == _CXML('?')) - { + (lpszTemp[cbTemp-1]) == _CXML('?')) { cbTemp--; - if (d->pParent && d->pParent->pParent) xtype = eTokenShortHandClose; + if (d->pParent && d->pParent->pParent) { + xtype = eTokenShortHandClose; + } } - if (cbTemp) - { + if (cbTemp) { // Add the unvalued attribute to the list - addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp), NULL); + addAttribute_priv(MEMORYINCREASE, + stringDup(lpszTemp, cbTemp), NULL); } // If this is the end of the tag then return to the caller - if (xtype == eTokenShortHandClose) - { + if (xtype == eTokenShortHandClose) { exactMemory(d); return TRUE; } @@ -1601,15 +1847,15 @@ int XMLNode::ParseXMLElement(void *pa) status = eOutsideTag; break; - // If we found the equals token... - // Eg. 'Attribute =' + // If we found the equals token... + // Eg. 'Attribute =' case eTokenEquals: // Indicate that we next need to search for the value // for the attribute attrib = eAttribValue; break; - // Errors... + // Errors... case eTokenQuotedText: /* 'Attribute "InvalidAttr"'*/ case eTokenTagStart: /* 'Attribute <' */ case eTokenTagEnd: /* 'Attribute error = eXMLErrorUnexpectedToken; return FALSE; - default: break; + default: + break; } break; - // If we are looking for an attribute value + // If we are looking for an attribute value case eAttribValue: // Check what the current token type is - switch(xtype) - { - // If the current type is text or quoted text... - // Eg. 'Attribute = "Value"' or 'Attribute = Value' or - // 'Attribute = 'Value''. + switch (xtype) { + // If the current type is text or quoted text... + // Eg. 'Attribute = "Value"' or 'Attribute = Value' or + // 'Attribute = 'Value''. case eTokenText: case eTokenQuotedText: // If we are a declaration element 'isDeclaration && - (token.pStr[cbToken-1]) == _CXML('?')) - { + (token.pStr[cbToken-1]) == _CXML('?')) { cbToken--; } - if (cbTemp) - { + if (cbTemp) { // Add the valued attribute to the list - if (xtype==eTokenQuotedText) { token.pStr++; cbToken-=2; } - XMLSTR attrVal=(XMLSTR)token.pStr; - if (attrVal) - { - attrVal=fromXMLString(attrVal,cbToken,pXML); + if (xtype == eTokenQuotedText) { + token.pStr++; + cbToken -= 2; + } + XMLSTR attrVal = (XMLSTR)token.pStr; + if (attrVal) { + attrVal = fromXMLString(attrVal, cbToken, pXML); if (!attrVal) return FALSE; } - addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp),attrVal); + addAttribute_priv(MEMORYINCREASE, + stringDup(lpszTemp, cbTemp), + attrVal); } // Indicate we are searching for a new attribute attrib = eAttribName; break; - // Errors... + // Errors... case eTokenTagStart: /* 'Attr = <' */ case eTokenTagEnd: /* 'Attr = ' */ @@ -1667,109 +1915,104 @@ int XMLNode::ParseXMLElement(void *pa) pXML->error = eXMLErrorUnexpectedToken; return FALSE; break; - default: break; + default: + break; } } } } // If we failed to obtain the next token - else - { - if ((!d->isDeclaration)&&(d->pParent)) - { + else { + if ((!d->isDeclaration) && (d->pParent)) { #ifdef STRICT_PARSING - pXML->error=eXMLErrorUnmatchedEndTag; + pXML->error = eXMLErrorUnmatchedEndTag; #else - pXML->error=eXMLErrorMissingEndTag; + pXML->error = eXMLErrorMissingEndTag; #endif - pXML->nIndexMissigEndTag=pXML->nIndex; + pXML->nIndexMissigEndTag = pXML->nIndex; } - maybeAddTxT(pXML,pXML->lpXML+pXML->nIndex); + maybeAddTxT(pXML, pXML->lpXML + pXML->nIndex); return FALSE; } } } // Count the number of lines and columns in an XML string. -static void CountLinesAndColumns(XMLCSTR lpXML, int nUpto, XMLResults *pResults) -{ +static void CountLinesAndColumns(XMLCSTR lpXML, int nUpto, + XMLResults *pResults) { XMLCHAR ch; assert(lpXML); assert(pResults); - struct XML xml={ lpXML,lpXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE }; + struct XML xml = { lpXML, lpXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, + TRUE }; pResults->nLine = 1; pResults->nColumn = 1; - while (xml.nIndexnColumn++; - else - { + else { pResults->nLine++; - pResults->nColumn=1; + pResults->nColumn = 1; } } } // Parse XML and return the root element. -XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, XMLResults *pResults) -{ - if (!lpszXML) - { - if (pResults) - { - pResults->error=eXMLErrorNoElements; - pResults->nLine=0; - pResults->nColumn=0; +XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, + XMLResults *pResults) { + if (!lpszXML) { + if (pResults) { + pResults->error = eXMLErrorNoElements; + pResults->nLine = 0; + pResults->nColumn = 0; } return emptyXMLNode; } - XMLNode xnode(NULL,NULL,FALSE); - struct XML xml={ lpszXML, lpszXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE }; + XMLNode xnode(NULL, NULL, FALSE); + struct XML xml = { lpszXML, lpszXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, + TRUE }; // Create header element xnode.ParseXMLElement(&xml); enum XMLError error = xml.error; - if (!xnode.nChildNode()) error=eXMLErrorNoXMLTagFound; - if ((xnode.nChildNode()==1)&&(xnode.nElement()==1)) xnode=xnode.getChildNode(); // skip the empty node + if (!xnode.nChildNode()) error = eXMLErrorNoXMLTagFound; + if ((xnode.nChildNode() == 1) && (xnode.nElement() == 1)) { + xnode = xnode.getChildNode(); // skip the empty node + } // If no error occurred - if ((error==eXMLErrorNone)||(error==eXMLErrorMissingEndTag)||(error==eXMLErrorNoXMLTagFound)) - { - XMLCSTR name=xnode.getName(); - if (tag&&(*tag)&&((!name)||(xstricmp(name,tag)))) - { - xnode=xnode.getChildNode(tag); - if (xnode.isEmpty()) - { - if (pResults) - { - pResults->error=eXMLErrorFirstTagNotFound; - pResults->nLine=0; - pResults->nColumn=0; + if ((error == eXMLErrorNone) || (error == eXMLErrorMissingEndTag) || + (error == eXMLErrorNoXMLTagFound)) { + XMLCSTR name = xnode.getName(); + if (tag && (*tag) && ((!name) || (xstricmp(name, tag)))) { + xnode = xnode.getChildNode(tag); + if (xnode.isEmpty()) { + if (pResults) { + pResults->error = eXMLErrorFirstTagNotFound; + pResults->nLine = 0; + pResults->nColumn = 0; } return emptyXMLNode; } } - } else - { + } else { // Cleanup: this will destroy all the nodes xnode = emptyXMLNode; } // If we have been given somewhere to place results - if (pResults) - { + if (pResults) { pResults->error = error; // If we have an error - if (error!=eXMLErrorNone) - { - if (error==eXMLErrorMissingEndTag) xml.nIndex=xml.nIndexMissigEndTag; + if (error != eXMLErrorNone) { + if (error == eXMLErrorMissingEndTag) { + xml.nIndex = xml.nIndexMissigEndTag; + } // Find which line and column it starts on. CountLinesAndColumns(xml.lpXML, xml.nIndex, pResults); } @@ -1777,72 +2020,95 @@ XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, XMLResults *pResults) return xnode; } -XMLNode XMLNode::parseFile(XMLCSTR filename, XMLCSTR tag, XMLResults *pResults) -{ - if (pResults) { pResults->nLine=0; pResults->nColumn=0; } - FILE *f=xfopen(filename,_CXML("rb")); - if (f==NULL) { if (pResults) pResults->error=eXMLErrorFileNotFound; return emptyXMLNode; } - fseek(f,0,SEEK_END); - int l=ftell(f),headerSz=0; - if (!l) { if (pResults) pResults->error=eXMLErrorEmpty; fclose(f); return emptyXMLNode; } - fseek(f,0,SEEK_SET); - unsigned char *buf=(unsigned char*)malloc(l+4); - l=fread(buf,1,l,f); +XMLNode XMLNode::parseFile(XMLCSTR filename, XMLCSTR tag, XMLResults *pResults) { + if (pResults) { + pResults->nLine = 0; + pResults->nColumn = 0; + } + FILE *f = xfopen(filename, _CXML("rb")); + if (f == NULL) { + if (pResults) pResults->error = eXMLErrorFileNotFound; + return emptyXMLNode; + } + fseek(f, 0, SEEK_END); + int l = ftell(f), headerSz = 0; + if (!l) { + if (pResults) pResults->error = eXMLErrorEmpty; + fclose(f); + return emptyXMLNode; + } + fseek(f, 0, SEEK_SET); + unsigned char *buf = (unsigned char*)malloc(l + 4); + l = fread(buf, 1, l, f); fclose(f); - buf[l]=0;buf[l+1]=0;buf[l+2]=0;buf[l+3]=0; + buf[l] = 0; + buf[l+1] = 0; + buf[l+2] = 0; + buf[l+3] = 0; #ifdef _XMLWIDECHAR - if (guessWideCharChars) - { - if (!myIsTextWideChar(buf,l)) - { - XMLNode::XMLCharEncoding ce=XMLNode::char_encoding_legacy; - if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) { headerSz=3; ce=XMLNode::char_encoding_UTF8; } - XMLSTR b2=myMultiByteToWideChar((const char*)(buf+headerSz),ce); - free(buf); buf=(unsigned char*)b2; headerSz=0; - } else - { - if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2; - if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2; + if (guessWideCharChars) { + if (!myIsTextWideChar(buf, l)) { + XMLNode::XMLCharEncoding ce = XMLNode::char_encoding_legacy; + if ((buf[0] == 0xef) && (buf[1] == 0xbb) && (buf[2] == 0xbf)) { + headerSz = 3; + ce = XMLNode::char_encoding_UTF8; + } + XMLSTR b2 = myMultiByteToWideChar((const char*)(buf + headerSz), ce); + free(buf); + buf = (unsigned char*)b2; + headerSz = 0; + } else { + if ((buf[0] == 0xef) && (buf[1] == 0xff)) headerSz = 2; + if ((buf[0] == 0xff) && (buf[1] == 0xfe)) headerSz = 2; } } #else - if (guessWideCharChars) - { - if (myIsTextWideChar(buf,l)) - { - if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2; - if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2; - char *b2=myWideCharToMultiByte((const wchar_t*)(buf+headerSz)); - free(buf); buf=(unsigned char*)b2; headerSz=0; - } else - { - if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) headerSz=3; + if (guessWideCharChars) { + if (myIsTextWideChar(buf, l)) { + if ((buf[0] == 0xef) && (buf[1] == 0xff)) headerSz = 2; + if ((buf[0] == 0xff) && (buf[1] == 0xfe)) headerSz = 2; + char *b2 = myWideCharToMultiByte((const wchar_t*)(buf + headerSz)); + free(buf); + buf = (unsigned char*)b2; + headerSz = 0; + } else { + if ((buf[0] == 0xef) && (buf[1] == 0xbb) && (buf[2] == 0xbf)) { + headerSz = 3; + } } } #endif - if (!buf) { if (pResults) pResults->error=eXMLErrorCharConversionError; return emptyXMLNode; } - XMLNode x=parseString((XMLSTR)(buf+headerSz),tag,pResults); + if (!buf) { + if (pResults) pResults->error = eXMLErrorCharConversionError; + return emptyXMLNode; + } + XMLNode x = parseString((XMLSTR)(buf + headerSz), tag, pResults); free(buf); return x; } -static inline void charmemset(XMLSTR dest,XMLCHAR c,int l) { while (l--) *(dest++)=c; } +static inline void charmemset(XMLSTR dest, XMLCHAR c, int l) { + while (l--) *(dest++) = c; +} // private: // Creates an user friendly XML string from a given element with // appropriate white space and carriage returns. // // This recurses through all subnodes then adds contents of the nodes to the // string. -int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat) -{ +int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, + int nFormat) { int nResult = 0; - int cb=nFormat<0?0:nFormat; + int cb = nFormat < 0 ? 0 : nFormat; int cbElement; - int nChildFormat=-1; - int nElementI=pEntry->nChild+pEntry->nText+pEntry->nClear; - int i,j; - if ((nFormat>=0)&&(nElementI==1)&&(pEntry->nText==1)&&(!pEntry->isDeclaration)) nFormat=-2; + int nChildFormat = -1; + int nElementI = pEntry->nChild + pEntry->nText + pEntry->nClear; + int i, j; + if ((nFormat >= 0) && (nElementI == 1) && (pEntry->nText == 1) && + (!pEntry->isDeclaration)) { + nFormat = -2; + } assert(pEntry); @@ -1851,47 +2117,43 @@ int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nForma // If the element has no name then assume this is the head node. cbElement = (int)LENSTR(pEntry->lpszName); - if (cbElement) - { + if (cbElement) { // "isDeclaration) lpszMarker[nResult++]=_CXML('?'); + lpszMarker[nResult++] = _CXML('<'); + if (pEntry->isDeclaration) lpszMarker[nResult++] = _CXML('?'); xstrcpy(&lpszMarker[nResult], pEntry->lpszName); - nResult+=cbElement; - lpszMarker[nResult++]=_CXML(' '); + nResult += cbElement; + lpszMarker[nResult++] = _CXML(' '); - } else - { - nResult+=cbElement+2+cb; + } else { + nResult += cbElement + 2 + cb; if (pEntry->isDeclaration) nResult++; } // Enumerate attributes and add them to the string - XMLAttribute *pAttr=pEntry->pAttribute; - for (i=0; inAttribute; i++) - { + XMLAttribute *pAttr = pEntry->pAttribute; + for (i = 0; i < pEntry->nAttribute; i++) { // "Attrib cb = (int)LENSTR(pAttr->lpszName); - if (cb) - { + if (cb) { if (lpszMarker) xstrcpy(&lpszMarker[nResult], pAttr->lpszName); nResult += cb; // "Attrib=Value " - if (pAttr->lpszValue) - { - cb=(int)ToXMLStringTool::lengthXMLString(pAttr->lpszValue); - if (lpszMarker) - { - lpszMarker[nResult]=_CXML('='); - lpszMarker[nResult+1]=_CXML('"'); - if (cb) ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+2],pAttr->lpszValue); - lpszMarker[nResult+cb+2]=_CXML('"'); + if (pAttr->lpszValue) { + cb = (int)ToXMLStringTool::lengthXMLString(pAttr->lpszValue); + if (lpszMarker) { + lpszMarker[nResult] = _CXML('='); + lpszMarker[nResult+1] = _CXML('"'); + if (cb) { + ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+2], + pAttr->lpszValue); + } + lpszMarker[nResult+cb+2] = _CXML('"'); } - nResult+=cb+3; + nResult += cb + 3; } if (lpszMarker) lpszMarker[nResult] = _CXML(' '); nResult++; @@ -1899,27 +2161,22 @@ int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nForma pAttr++; } - if (pEntry->isDeclaration) - { - if (lpszMarker) - { - lpszMarker[nResult-1]=_CXML('?'); - lpszMarker[nResult]=_CXML('>'); + if (pEntry->isDeclaration) { + if (lpszMarker) { + lpszMarker[nResult-1] = _CXML('?'); + lpszMarker[nResult] = _CXML('>'); } nResult++; - if (nFormat!=-1) - { - if (lpszMarker) lpszMarker[nResult]=_CXML('\n'); + if (nFormat != -1) { + if (lpszMarker) lpszMarker[nResult] = _CXML('\n'); nResult++; } } else // If there are child nodes we need to terminate the start tag - if (nElementI) - { - if (lpszMarker) lpszMarker[nResult-1]=_CXML('>'); - if (nFormat>=0) - { - if (lpszMarker) lpszMarker[nResult]=_CXML('\n'); + if (nElementI) { + if (lpszMarker) lpszMarker[nResult-1] = _CXML('>'); + if (nFormat >= 0) { + if (lpszMarker) lpszMarker[nResult] = _CXML('\n'); nResult++; } } else nResult--; @@ -1927,145 +2184,137 @@ int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nForma // Calculate the child format for when we recurse. This is used to // determine the number of spaces used for prefixes. - if (nFormat!=-1) - { - if (cbElement&&(!pEntry->isDeclaration)) nChildFormat=nFormat+1; - else nChildFormat=nFormat; + if (nFormat != -1) { + if (cbElement && (!pEntry->isDeclaration)) nChildFormat = nFormat + 1; + else nChildFormat = nFormat; } // Enumerate through remaining children - for (i=0; ipOrder[i]; - switch((XMLElementType)(j&3)) - { - // Text nodes - case eNodeText: - { - // "Text" - XMLCSTR pChild=pEntry->pText[j>>2]; - cb = (int)ToXMLStringTool::lengthXMLString(pChild); - if (cb) - { - if (nFormat>=0) - { - if (lpszMarker) - { - charmemset(&lpszMarker[nResult],INDENTCHAR,nFormat+1); - ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+nFormat+1],pChild); - lpszMarker[nResult+nFormat+1+cb]=_CXML('\n'); - } - nResult+=cb+nFormat+2; - } else - { - if (lpszMarker) ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult], pChild); - nResult += cb; + for (i = 0; i < nElementI; i++) { + j = pEntry->pOrder[i]; + switch ((XMLElementType)(j&3)) { + // Text nodes + case eNodeText: { + // "Text" + XMLCSTR pChild = pEntry->pText[j>>2]; + cb = (int)ToXMLStringTool::lengthXMLString(pChild); + if (cb) { + if (nFormat >= 0) { + if (lpszMarker) { + charmemset(&lpszMarker[nResult], INDENTCHAR, + nFormat + 1); + ToXMLStringTool::toXMLUnSafe( + &lpszMarker[nResult+nFormat+1], pChild); + lpszMarker[nResult+nFormat+1+cb] = _CXML('\n'); + } + nResult += cb + nFormat + 2; + } else { + if (lpszMarker) { + ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult], + pChild); } + nResult += cb; } - break; } + break; + } // Clear type nodes - case eNodeClear: - { - XMLClear *pChild=pEntry->pClear+(j>>2); - // "OpenTag" - cb = (int)LENSTR(pChild->lpszOpenTag); - if (cb) - { - if (nFormat!=-1) - { - if (lpszMarker) - { - charmemset(&lpszMarker[nResult], INDENTCHAR, nFormat+1); - xstrcpy(&lpszMarker[nResult+nFormat+1], pChild->lpszOpenTag); - } - nResult+=cb+nFormat+1; + case eNodeClear: { + XMLClear *pChild = pEntry->pClear + (j >> 2); + // "OpenTag" + cb = (int)LENSTR(pChild->lpszOpenTag); + if (cb) { + if (nFormat != -1) { + if (lpszMarker) { + charmemset(&lpszMarker[nResult], INDENTCHAR, + nFormat + 1); + xstrcpy(&lpszMarker[nResult+nFormat+1], + pChild->lpszOpenTag); } - else - { - if (lpszMarker)xstrcpy(&lpszMarker[nResult], pChild->lpszOpenTag); - nResult += cb; + nResult += cb + nFormat + 1; + } else { + if (lpszMarker) { + xstrcpy(&lpszMarker[nResult], pChild->lpszOpenTag); } - } - - // "OpenTag Value" - cb = (int)LENSTR(pChild->lpszValue); - if (cb) - { - if (lpszMarker) xstrcpy(&lpszMarker[nResult], pChild->lpszValue); nResult += cb; } + } - // "OpenTag Value CloseTag" - cb = (int)LENSTR(pChild->lpszCloseTag); - if (cb) - { - if (lpszMarker) xstrcpy(&lpszMarker[nResult], pChild->lpszCloseTag); - nResult += cb; + // "OpenTag Value" + cb = (int)LENSTR(pChild->lpszValue); + if (cb) { + if (lpszMarker) { + xstrcpy(&lpszMarker[nResult], pChild->lpszValue); } + nResult += cb; + } - if (nFormat!=-1) - { - if (lpszMarker) lpszMarker[nResult] = _CXML('\n'); - nResult++; + // "OpenTag Value CloseTag" + cb = (int)LENSTR(pChild->lpszCloseTag); + if (cb) { + if (lpszMarker) { + xstrcpy(&lpszMarker[nResult], pChild->lpszCloseTag); } - break; + nResult += cb; } - // Element nodes - case eNodeChild: - { - // Recursively add child nodes - nResult += CreateXMLStringR(pEntry->pChild[j>>2].d, lpszMarker ? lpszMarker + nResult : 0, nChildFormat); - break; + if (nFormat != -1) { + if (lpszMarker) lpszMarker[nResult] = _CXML('\n'); + nResult++; } - default: break; + break; + } + + // Element nodes + case eNodeChild: { + // Recursively add child nodes + nResult += CreateXMLStringR(pEntry->pChild[j>>2].d, + lpszMarker ? lpszMarker + nResult : 0, + nChildFormat); + break; + } + default: + break; } } - if ((cbElement)&&(!pEntry->isDeclaration)) - { + if ((cbElement) && (!pEntry->isDeclaration)) { // If we have child entries we need to use long XML notation for // closing the element - "blah blah blah" - if (nElementI) - { + if (nElementI) { // "\0" - if (lpszMarker) - { - if (nFormat >=0) - { - charmemset(&lpszMarker[nResult], INDENTCHAR,nFormat); - nResult+=nFormat; + if (lpszMarker) { + if (nFormat >= 0) { + charmemset(&lpszMarker[nResult], INDENTCHAR, nFormat); + nResult += nFormat; } - lpszMarker[nResult]=_CXML('<'); lpszMarker[nResult+1]=_CXML('/'); + lpszMarker[nResult] = _CXML('<'); + lpszMarker[nResult+1] = _CXML('/'); nResult += 2; xstrcpy(&lpszMarker[nResult], pEntry->lpszName); nResult += cbElement; - lpszMarker[nResult]=_CXML('>'); + lpszMarker[nResult] = _CXML('>'); if (nFormat == -1) nResult++; - else - { - lpszMarker[nResult+1]=_CXML('\n'); - nResult+=2; + else { + lpszMarker[nResult+1] = _CXML('\n'); + nResult += 2; } - } else - { - if (nFormat>=0) nResult+=cbElement+4+nFormat; - else if (nFormat==-1) nResult+=cbElement+3; - else nResult+=cbElement+4; + } else { + if (nFormat >= 0) nResult += cbElement + 4 + nFormat; + else if (nFormat == -1) nResult += cbElement + 3; + else nResult += cbElement + 4; } - } else - { + } else { // If there are no children we can use shorthand XML notation - // "" // "/>\0" - if (lpszMarker) - { - lpszMarker[nResult]=_CXML('/'); lpszMarker[nResult+1]=_CXML('>'); - if (nFormat != -1) lpszMarker[nResult+2]=_CXML('\n'); + if (lpszMarker) { + lpszMarker[nResult] = _CXML('/'); + lpszMarker[nResult+1] = _CXML('>'); + if (nFormat != -1) lpszMarker[nResult+2] = _CXML('\n'); } nResult += nFormat == -1 ? 2 : 3; } @@ -2085,342 +2334,401 @@ int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nForma // NULL terminator. // @return XMLSTR - Allocated XML string, you must free // this with free(). -XMLSTR XMLNode::createXMLString(int nFormat, int *pnSize) const -{ - if (!d) { if (pnSize) *pnSize=0; return NULL; } +XMLSTR XMLNode::createXMLString(int nFormat, int *pnSize) const { + if (!d) { + if (pnSize) *pnSize = 0; + return NULL; + } XMLSTR lpszResult = NULL; int cbStr; // Recursively Calculate the size of the XML string - if (!dropWhiteSpace) nFormat=0; + if (!dropWhiteSpace) nFormat = 0; nFormat = nFormat ? 0 : -1; cbStr = CreateXMLStringR(d, 0, nFormat); // Alllocate memory for the XML string + the NULL terminator and // create the recursively XML string. - lpszResult=(XMLSTR)malloc((cbStr+1)*sizeof(XMLCHAR)); + lpszResult = (XMLSTR)malloc((cbStr + 1) * sizeof(XMLCHAR)); CreateXMLStringR(d, lpszResult, nFormat); - lpszResult[cbStr]=_CXML('\0'); + lpszResult[cbStr] = _CXML('\0'); if (pnSize) *pnSize = cbStr; return lpszResult; } -int XMLNode::detachFromParent(XMLNodeData *d) -{ - XMLNode *pa=d->pParent->pChild; - int i=0; - while (((void*)(pa[i].d))!=((void*)d)) i++; +int XMLNode::detachFromParent(XMLNodeData *d) { + XMLNode *pa = d->pParent->pChild; + int i = 0; + while (((void*)(pa[i].d)) != ((void*)d)) i++; d->pParent->nChild--; - if (d->pParent->nChild) memmove(pa+i,pa+i+1,(d->pParent->nChild-i)*sizeof(XMLNode)); - else { free(pa); d->pParent->pChild=NULL; } - return removeOrderElement(d->pParent,eNodeChild,i); + if (d->pParent->nChild) { + memmove(pa + i, pa + i + 1, (d->pParent->nChild - i)*sizeof(XMLNode)); + } else { + free(pa); + d->pParent->pChild = NULL; + } + return removeOrderElement(d->pParent, eNodeChild, i); } -XMLNode::~XMLNode() -{ +XMLNode::~XMLNode() { if (!d) return; d->ref_count--; emptyTheNode(0); } -void XMLNode::deleteNodeContent() -{ +void XMLNode::deleteNodeContent() { if (!d) return; - if (d->pParent) { detachFromParent(d); d->pParent=NULL; d->ref_count--; } + if (d->pParent) { + detachFromParent(d); + d->pParent = NULL; + d->ref_count--; + } emptyTheNode(1); } -void XMLNode::emptyTheNode(char force) -{ - XMLNodeData *dd=d; // warning: must stay this way! - if ((dd->ref_count==0)||force) - { +void XMLNode::emptyTheNode(char force) { + XMLNodeData *dd = d; // warning: must stay this way! + if ((dd->ref_count == 0) || force) { if (d->pParent) detachFromParent(d); int i; XMLNode *pc; - for(i=0; inChild; i++) - { - pc=dd->pChild+i; - pc->d->pParent=NULL; + for (i = 0; i < dd->nChild; i++) { + pc = dd->pChild + i; + pc->d->pParent = NULL; pc->d->ref_count--; pc->emptyTheNode(force); } myFree(dd->pChild); - for(i=0; inText; i++) free((void*)dd->pText[i]); + for (i = 0; i < dd->nText; i++) free((void*)dd->pText[i]); myFree(dd->pText); - for(i=0; inClear; i++) free((void*)dd->pClear[i].lpszValue); + for (i = 0; i < dd->nClear; i++) free((void*)dd->pClear[i].lpszValue); myFree(dd->pClear); - for(i=0; inAttribute; i++) - { + for (i = 0; i < dd->nAttribute; i++) { free((void*)dd->pAttribute[i].lpszName); - if (dd->pAttribute[i].lpszValue) free((void*)dd->pAttribute[i].lpszValue); + if (dd->pAttribute[i].lpszValue) { + free((void*)dd->pAttribute[i].lpszValue); + } } myFree(dd->pAttribute); myFree(dd->pOrder); myFree((void*)dd->lpszName); - dd->nChild=0; dd->nText=0; dd->nClear=0; dd->nAttribute=0; - dd->pChild=NULL; dd->pText=NULL; dd->pClear=NULL; dd->pAttribute=NULL; - dd->pOrder=NULL; dd->lpszName=NULL; dd->pParent=NULL; + dd->nChild = 0; + dd->nText = 0; + dd->nClear = 0; + dd->nAttribute = 0; + dd->pChild = NULL; + dd->pText = NULL; + dd->pClear = NULL; + dd->pAttribute = NULL; + dd->pOrder = NULL; + dd->lpszName = NULL; + dd->pParent = NULL; } - if (dd->ref_count==0) - { + if (dd->ref_count == 0) { free(dd); - d=NULL; + d = NULL; } } -XMLNode& XMLNode::operator=( const XMLNode& A ) -{ +XMLNode& XMLNode::operator=( const XMLNode & A ) { // shallow copy - if (this != &A) - { - if (d) { d->ref_count--; emptyTheNode(0); } - d=A.d; + if (this != &A) { + if (d) { + d->ref_count--; + emptyTheNode(0); + } + d = A.d; if (d) (d->ref_count) ++ ; } return *this; } -XMLNode::XMLNode(const XMLNode &A) -{ +XMLNode::XMLNode(const XMLNode &A) { // shallow copy - d=A.d; + d = A.d; if (d) (d->ref_count)++ ; } -XMLNode XMLNode::deepCopy() const -{ +XMLNode XMLNode::deepCopy() const { if (!d) return XMLNode::emptyXMLNode; - XMLNode x(NULL,stringDup(d->lpszName),d->isDeclaration); - XMLNodeData *p=x.d; - int n=d->nAttribute; - if (n) - { - p->nAttribute=n; p->pAttribute=(XMLAttribute*)malloc(n*sizeof(XMLAttribute)); - while (n--) - { - p->pAttribute[n].lpszName=stringDup(d->pAttribute[n].lpszName); - p->pAttribute[n].lpszValue=stringDup(d->pAttribute[n].lpszValue); + XMLNode x(NULL, stringDup(d->lpszName), d->isDeclaration); + XMLNodeData *p = x.d; + int n = d->nAttribute; + if (n) { + p->nAttribute = n; + p->pAttribute = (XMLAttribute*)malloc(n * sizeof(XMLAttribute)); + while (n--) { + p->pAttribute[n].lpszName = stringDup(d->pAttribute[n].lpszName); + p->pAttribute[n].lpszValue = stringDup(d->pAttribute[n].lpszValue); } } - if (d->pOrder) - { - n=(d->nChild+d->nText+d->nClear)*sizeof(int); p->pOrder=(int*)malloc(n); memcpy(p->pOrder,d->pOrder,n); - } - n=d->nText; - if (n) - { - p->nText=n; p->pText=(XMLCSTR*)malloc(n*sizeof(XMLCSTR)); - while(n--) p->pText[n]=stringDup(d->pText[n]); - } - n=d->nClear; - if (n) - { - p->nClear=n; p->pClear=(XMLClear*)malloc(n*sizeof(XMLClear)); - while (n--) - { - p->pClear[n].lpszCloseTag=d->pClear[n].lpszCloseTag; - p->pClear[n].lpszOpenTag=d->pClear[n].lpszOpenTag; - p->pClear[n].lpszValue=stringDup(d->pClear[n].lpszValue); + if (d->pOrder) { + n = (d->nChild + d->nText + d->nClear) * sizeof(int); + p->pOrder = (int*)malloc(n); + memcpy(p->pOrder, d->pOrder, n); + } + n = d->nText; + if (n) { + p->nText = n; + p->pText = (XMLCSTR*)malloc(n * sizeof(XMLCSTR)); + while (n--) p->pText[n] = stringDup(d->pText[n]); + } + n = d->nClear; + if (n) { + p->nClear = n; + p->pClear = (XMLClear*)malloc(n * sizeof(XMLClear)); + while (n--) { + p->pClear[n].lpszCloseTag = d->pClear[n].lpszCloseTag; + p->pClear[n].lpszOpenTag = d->pClear[n].lpszOpenTag; + p->pClear[n].lpszValue = stringDup(d->pClear[n].lpszValue); } } - n=d->nChild; - if (n) - { - p->nChild=n; p->pChild=(XMLNode*)malloc(n*sizeof(XMLNode)); - while (n--) - { - p->pChild[n].d=NULL; - p->pChild[n]=d->pChild[n].deepCopy(); - p->pChild[n].d->pParent=p; + n = d->nChild; + if (n) { + p->nChild = n; + p->pChild = (XMLNode*)malloc(n * sizeof(XMLNode)); + while (n--) { + p->pChild[n].d = NULL; + p->pChild[n] = d->pChild[n].deepCopy(); + p->pChild[n].d->pParent = p; } } return x; } -XMLNode XMLNode::addChild(XMLNode childNode, int pos) -{ - XMLNodeData *dc=childNode.d; - if ((!dc)||(!d)) return childNode; - if (!dc->lpszName) - { +XMLNode XMLNode::addChild(XMLNode childNode, int pos) { + XMLNodeData *dc = childNode.d; + if ((!dc) || (!d)) return childNode; + if (!dc->lpszName) { // this is a root node: todo: correct fix - int j=pos; - while (dc->nChild) - { - addChild(dc->pChild[0],j); - if (pos>=0) j++; + int j = pos; + while (dc->nChild) { + addChild(dc->pChild[0], j); + if (pos >= 0) j++; } return childNode; } - if (dc->pParent) { if ((detachFromParent(dc)<=pos)&&(dc->pParent==d)) pos--; } else dc->ref_count++; - dc->pParent=d; + if (dc->pParent) { + if ((detachFromParent(dc) <= pos) && (dc->pParent == d)) pos--; + } else dc->ref_count++; + dc->pParent = d; // int nc=d->nChild; // d->pChild=(XMLNode*)myRealloc(d->pChild,(nc+1),memoryIncrease,sizeof(XMLNode)); - d->pChild=(XMLNode*)addToOrder(0,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild); - d->pChild[pos].d=dc; + d->pChild = (XMLNode*)addToOrder(0, &pos, d->nChild, d->pChild, + sizeof(XMLNode), eNodeChild); + d->pChild[pos].d = dc; d->nChild++; return childNode; } -void XMLNode::deleteAttribute(int i) -{ - if ((!d)||(i<0)||(i>=d->nAttribute)) return; +void XMLNode::deleteAttribute(int i) { + if ((!d) || (i < 0) || (i >= d->nAttribute)) return; d->nAttribute--; - XMLAttribute *p=d->pAttribute+i; + XMLAttribute *p = d->pAttribute + i; free((void*)p->lpszName); if (p->lpszValue) free((void*)p->lpszValue); - if (d->nAttribute) memmove(p,p+1,(d->nAttribute-i)*sizeof(XMLAttribute)); else { free(p); d->pAttribute=NULL; } + if (d->nAttribute) { + memmove(p, p + 1, (d->nAttribute - i)*sizeof(XMLAttribute)); + } + else { + free(p); + d->pAttribute = NULL; + } } -void XMLNode::deleteAttribute(XMLAttribute *a){ if (a) deleteAttribute(a->lpszName); } -void XMLNode::deleteAttribute(XMLCSTR lpszName) -{ - int j=0; - getAttribute(lpszName,&j); - if (j) deleteAttribute(j-1); +void XMLNode::deleteAttribute(XMLAttribute *a) { + if (a) deleteAttribute(a->lpszName); +} +void XMLNode::deleteAttribute(XMLCSTR lpszName) { + int j = 0; + getAttribute(lpszName, &j); + if (j) deleteAttribute(j - 1); } -XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,int i) -{ - if (!d) { if (lpszNewValue) free(lpszNewValue); if (lpszNewName) free(lpszNewName); return NULL; } - if (i>=d->nAttribute) - { - if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue); +XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, + XMLSTR lpszNewName, int i) { + if (!d) { + if (lpszNewValue) free(lpszNewValue); + if (lpszNewName) free(lpszNewName); + return NULL; + } + if (i >= d->nAttribute) { + if (lpszNewName) return addAttribute_WOSD(lpszNewName, lpszNewValue); return NULL; } - XMLAttribute *p=d->pAttribute+i; - if (p->lpszValue&&p->lpszValue!=lpszNewValue) free((void*)p->lpszValue); - p->lpszValue=lpszNewValue; - if (lpszNewName&&p->lpszName!=lpszNewName) { free((void*)p->lpszName); p->lpszName=lpszNewName; }; + XMLAttribute *p = d->pAttribute + i; + if (p->lpszValue && p->lpszValue != lpszNewValue) { + free((void*)p->lpszValue); + } + p->lpszValue = lpszNewValue; + if (lpszNewName && p->lpszName != lpszNewName) { + free((void*)p->lpszName); + p->lpszName = lpszNewName; + }; return p; } -XMLAttribute *XMLNode::updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute) -{ - if (oldAttribute) return updateAttribute_WOSD((XMLSTR)newAttribute->lpszValue,(XMLSTR)newAttribute->lpszName,oldAttribute->lpszName); - return addAttribute_WOSD((XMLSTR)newAttribute->lpszName,(XMLSTR)newAttribute->lpszValue); -} - -XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,XMLCSTR lpszOldName) -{ - int j=0; - getAttribute(lpszOldName,&j); - if (j) return updateAttribute_WOSD(lpszNewValue,lpszNewName,j-1); - else - { - if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue); - else return addAttribute_WOSD(stringDup(lpszOldName),lpszNewValue); +XMLAttribute *XMLNode::updateAttribute_WOSD(XMLAttribute *newAttribute, + XMLAttribute *oldAttribute) { + if (oldAttribute) { + return updateAttribute_WOSD((XMLSTR)newAttribute->lpszValue, + (XMLSTR)newAttribute->lpszName, + oldAttribute->lpszName); + } + return addAttribute_WOSD((XMLSTR)newAttribute->lpszName, + (XMLSTR)newAttribute->lpszValue); +} + +XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, + XMLSTR lpszNewName, + XMLCSTR lpszOldName) { + int j = 0; + getAttribute(lpszOldName, &j); + if (j) return updateAttribute_WOSD(lpszNewValue, lpszNewName, j - 1); + else { + if (lpszNewName) { + return addAttribute_WOSD(lpszNewName, lpszNewValue); + } else { + return addAttribute_WOSD(stringDup(lpszOldName), lpszNewValue); + } } } -int XMLNode::indexText(XMLCSTR lpszValue) const -{ +int XMLNode::indexText(XMLCSTR lpszValue) const { if (!d) return -1; - int i,l=d->nText; - if (!lpszValue) { if (l) return 0; return -1; } - XMLCSTR *p=d->pText; - for (i=0; inText; + if (!lpszValue) { + if (l) return 0; + return -1; + } + XMLCSTR *p = d->pText; + for (i = 0; i < l; i++) if (lpszValue == p[i]) return i; return -1; } -void XMLNode::deleteText(int i) -{ - if ((!d)||(i<0)||(i>=d->nText)) return; +void XMLNode::deleteText(int i) { + if ((!d) || (i < 0) || (i >= d->nText)) return; d->nText--; - XMLCSTR *p=d->pText+i; + XMLCSTR *p = d->pText + i; free((void*)*p); - if (d->nText) memmove(p,p+1,(d->nText-i)*sizeof(XMLCSTR)); else { free(p); d->pText=NULL; } - removeOrderElement(d,eNodeText,i); + if (d->nText) memmove(p, p + 1, (d->nText - i)*sizeof(XMLCSTR)); + else { + free(p); + d->pText = NULL; + } + removeOrderElement(d, eNodeText, i); } -void XMLNode::deleteText(XMLCSTR lpszValue) { deleteText(indexText(lpszValue)); } +void XMLNode::deleteText(XMLCSTR lpszValue) { + deleteText(indexText(lpszValue)); +} -XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, int i) -{ - if (!d) { if (lpszNewValue) free(lpszNewValue); return NULL; } - if (i>=d->nText) return addText_WOSD(lpszNewValue); - XMLCSTR *p=d->pText+i; - if (*p!=lpszNewValue) { free((void*)*p); *p=lpszNewValue; } +XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, int i) { + if (!d) { + if (lpszNewValue) free(lpszNewValue); + return NULL; + } + if (i >= d->nText) return addText_WOSD(lpszNewValue); + XMLCSTR *p = d->pText + i; + if (*p != lpszNewValue) { + free((void*)*p); + *p = lpszNewValue; + } return lpszNewValue; } -XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue) -{ - if (!d) { if (lpszNewValue) free(lpszNewValue); return NULL; } - int i=indexText(lpszOldValue); - if (i>=0) return updateText_WOSD(lpszNewValue,i); +XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue) { + if (!d) { + if (lpszNewValue) free(lpszNewValue); + return NULL; + } + int i = indexText(lpszOldValue); + if (i >= 0) return updateText_WOSD(lpszNewValue, i); return addText_WOSD(lpszNewValue); } -void XMLNode::deleteClear(int i) -{ - if ((!d)||(i<0)||(i>=d->nClear)) return; +void XMLNode::deleteClear(int i) { + if ((!d) || (i < 0) || (i >= d->nClear)) return; d->nClear--; - XMLClear *p=d->pClear+i; + XMLClear *p = d->pClear + i; free((void*)p->lpszValue); - if (d->nClear) memmove(p,p+1,(d->nClear-i)*sizeof(XMLClear)); else { free(p); d->pClear=NULL; } - removeOrderElement(d,eNodeClear,i); + if (d->nClear) memmove(p, p + 1, (d->nClear - i)*sizeof(XMLClear)); + else { + free(p); + d->pClear = NULL; + } + removeOrderElement(d, eNodeClear, i); } -int XMLNode::indexClear(XMLCSTR lpszValue) const -{ +int XMLNode::indexClear(XMLCSTR lpszValue) const { if (!d) return -1; - int i,l=d->nClear; - if (!lpszValue) { if (l) return 0; return -1; } - XMLClear *p=d->pClear; - for (i=0; inClear; + if (!lpszValue) { + if (l) return 0; + return -1; + } + XMLClear *p = d->pClear; + for (i = 0; i < l; i++) if (lpszValue == p[i].lpszValue) return i; return -1; } -void XMLNode::deleteClear(XMLCSTR lpszValue) { deleteClear(indexClear(lpszValue)); } -void XMLNode::deleteClear(XMLClear *a) { if (a) deleteClear(a->lpszValue); } +void XMLNode::deleteClear(XMLCSTR lpszValue) { + deleteClear(indexClear(lpszValue)); +} +void XMLNode::deleteClear(XMLClear *a) { + if (a) deleteClear(a->lpszValue); +} -XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, int i) -{ - if (!d) { if (lpszNewContent) free(lpszNewContent); return NULL; } - if (i>=d->nClear) return addClear_WOSD(lpszNewContent); - XMLClear *p=d->pClear+i; - if (lpszNewContent!=p->lpszValue) { free((void*)p->lpszValue); p->lpszValue=lpszNewContent; } +XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, int i) { + if (!d) { + if (lpszNewContent) free(lpszNewContent); + return NULL; + } + if (i >= d->nClear) return addClear_WOSD(lpszNewContent); + XMLClear *p = d->pClear + i; + if (lpszNewContent != p->lpszValue) { + free((void*)p->lpszValue); + p->lpszValue = lpszNewContent; + } return p; } -XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, XMLCSTR lpszOldValue) -{ - if (!d) { if (lpszNewContent) free(lpszNewContent); return NULL; } - int i=indexClear(lpszOldValue); - if (i>=0) return updateClear_WOSD(lpszNewContent,i); +XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, + XMLCSTR lpszOldValue) { + if (!d) { + if (lpszNewContent) free(lpszNewContent); + return NULL; + } + int i = indexClear(lpszOldValue); + if (i >= 0) return updateClear_WOSD(lpszNewContent, i); return addClear_WOSD(lpszNewContent); } -XMLClear *XMLNode::updateClear_WOSD(XMLClear *newP,XMLClear *oldP) -{ - if (oldP) return updateClear_WOSD((XMLSTR)newP->lpszValue,(XMLSTR)oldP->lpszValue); +XMLClear *XMLNode::updateClear_WOSD(XMLClear *newP, XMLClear *oldP) { + if (oldP) { + return updateClear_WOSD((XMLSTR)newP->lpszValue, + (XMLSTR)oldP->lpszValue); + } return NULL; } -int XMLNode::nChildNode(XMLCSTR name) const -{ +int XMLNode::nChildNode(XMLCSTR name) const { if (!d) return 0; - int i,j=0,n=d->nChild; - XMLNode *pc=d->pChild; - for (i=0; id->lpszName, name)==0) j++; + int i, j = 0, n = d->nChild; + XMLNode *pc = d->pChild; + for (i = 0; i < n; i++) { + if (xstricmp(pc->d->lpszName, name) == 0) j++; pc++; } return j; } -XMLNode XMLNode::getChildNode(XMLCSTR name, int *j) const -{ +XMLNode XMLNode::getChildNode(XMLCSTR name, int *j) const { if (!d) return emptyXMLNode; - int i=0,n=d->nChild; - if (j) i=*j; - XMLNode *pc=d->pChild+i; - for (; id->lpszName, name)) - { - if (j) *j=i+1; + int i = 0, n = d->nChild; + if (j) i = *j; + XMLNode *pc = d->pChild + i; + for (; i < n; i++) { + if (!xstricmp(pc->d->lpszName, name)) { + if (j) *j = i + 1; return *pc; } pc++; @@ -2428,117 +2736,149 @@ XMLNode XMLNode::getChildNode(XMLCSTR name, int *j) const return emptyXMLNode; } -XMLNode XMLNode::getChildNode(XMLCSTR name, int j) const -{ +XMLNode XMLNode::getChildNode(XMLCSTR name, int j) const { if (!d) return emptyXMLNode; - if (j>=0) - { - int i=0; - while (j-->0) getChildNode(name,&i); - return getChildNode(name,&i); - } - int i=d->nChild; - while (i--) if (!xstricmp(name,d->pChild[i].d->lpszName)) break; - if (i<0) return emptyXMLNode; + if (j >= 0) { + int i = 0; + while (j-- > 0) getChildNode(name, &i); + return getChildNode(name, &i); + } + int i = d->nChild; + while (i--) if (!xstricmp(name, d->pChild[i].d->lpszName)) break; + if (i < 0) return emptyXMLNode; return getChildNode(i); } -XMLNode XMLNode::getChildNodeByPath(XMLCSTR _path, char createMissing, XMLCHAR sep) -{ - XMLSTR path=stringDup(_path); - XMLNode x=getChildNodeByPathNonConst(path,createMissing,sep); +XMLNode* XMLNode::getChildNodePtr(XMLCSTR name, int *j) const { + if (!d) return &emptyXMLNode; + int i = 0, n = d->nChild; + int foundIndex = 0; + XMLNode *pc = d->pChild + i; + for (; i < n; i++) { + if (!xstricmp(pc->d->lpszName, name)) { + if (*j == foundIndex) return pc; + foundIndex++; + } + pc++; + } + return &emptyXMLNode; +} + +XMLNode XMLNode::getChildNodeByPath(XMLCSTR _path, char createMissing, + XMLCHAR sep) { + XMLSTR path = stringDup(_path); + XMLNode x = getChildNodeByPathNonConst(path, createMissing, sep); if (path) free(path); return x; } -XMLNode XMLNode::getChildNodeByPathNonConst(XMLSTR path, char createIfMissing, XMLCHAR sep) -{ - if ((!path)||(!(*path))) return *this; - XMLNode xn,xbase=*this; - XMLCHAR *tend1,sepString[2]; sepString[0]=sep; sepString[1]=0; - tend1=xstrstr(path,sepString); - while(tend1) - { - *tend1=0; - xn=xbase.getChildNode(path); - if (xn.isEmpty()) - { - if (createIfMissing) xn=xbase.addChild(path); - else { *tend1=sep; return XMLNode::emptyXMLNode; } +XMLNode XMLNode::getChildNodeByPathNonConst(XMLSTR path, + char createIfMissing, XMLCHAR sep) { + if ((!path) || (!(*path))) return *this; + XMLNode xn, xbase = *this; + XMLCHAR *tend1, sepString[2]; + sepString[0] = sep; + sepString[1] = 0; + tend1 = xstrstr(path, sepString); + while (tend1) { + *tend1 = 0; + xn = xbase.getChildNode(path); + if (xn.isEmpty()) { + if (createIfMissing) xn = xbase.addChild(path); + else { + *tend1 = sep; + return XMLNode::emptyXMLNode; + } } - *tend1=sep; - xbase=xn; - path=tend1+1; - tend1=xstrstr(path,sepString); + *tend1 = sep; + xbase = xn; + path = tend1 + 1; + tend1 = xstrstr(path, sepString); } - xn=xbase.getChildNode(path); - if (xn.isEmpty()&&createIfMissing) xn=xbase.addChild(path); + xn = xbase.getChildNode(path); + if (xn.isEmpty() && createIfMissing) xn = xbase.addChild(path); return xn; } -XMLElementPosition XMLNode::positionOfText (int i) const { if (i>=d->nText ) i=d->nText-1; return findPosition(d,i,eNodeText ); } -XMLElementPosition XMLNode::positionOfClear (int i) const { if (i>=d->nClear) i=d->nClear-1; return findPosition(d,i,eNodeClear); } -XMLElementPosition XMLNode::positionOfChildNode(int i) const { if (i>=d->nChild) i=d->nChild-1; return findPosition(d,i,eNodeChild); } -XMLElementPosition XMLNode::positionOfText (XMLCSTR lpszValue) const { return positionOfText (indexText (lpszValue)); } -XMLElementPosition XMLNode::positionOfClear(XMLCSTR lpszValue) const { return positionOfClear(indexClear(lpszValue)); } -XMLElementPosition XMLNode::positionOfClear(XMLClear *a) const { if (a) return positionOfClear(a->lpszValue); return positionOfClear(); } -XMLElementPosition XMLNode::positionOfChildNode(XMLNode x) const -{ - if ((!d)||(!x.d)) return -1; - XMLNodeData *dd=x.d; - XMLNode *pc=d->pChild; - int i=d->nChild; - while (i--) if (pc[i].d==dd) return findPosition(d,i,eNodeChild); +XMLElementPosition XMLNode::positionOfText (int i) const { + if (i >= d->nText ) i = d->nText - 1; + return findPosition(d, i, eNodeText ); +} +XMLElementPosition XMLNode::positionOfClear (int i) const { + if (i >= d->nClear) i = d->nClear - 1; + return findPosition(d, i, eNodeClear); +} +XMLElementPosition XMLNode::positionOfChildNode(int i) const { + if (i >= d->nChild) i = d->nChild - 1; + return findPosition(d, i, eNodeChild); +} +XMLElementPosition XMLNode::positionOfText (XMLCSTR lpszValue) const { + return positionOfText (indexText (lpszValue)); +} +XMLElementPosition XMLNode::positionOfClear(XMLCSTR lpszValue) const { + return positionOfClear(indexClear(lpszValue)); +} +XMLElementPosition XMLNode::positionOfClear(XMLClear *a) const { + if (a) return positionOfClear(a->lpszValue); + return positionOfClear(); +} +XMLElementPosition XMLNode::positionOfChildNode(XMLNode x) const { + if ((!d) || (!x.d)) return -1; + XMLNodeData *dd = x.d; + XMLNode *pc = d->pChild; + int i = d->nChild; + while (i--) if (pc[i].d == dd) return findPosition(d, i, eNodeChild); return -1; } -XMLElementPosition XMLNode::positionOfChildNode(XMLCSTR name, int count) const -{ +XMLElementPosition XMLNode::positionOfChildNode(XMLCSTR name, int count) const { if (!name) return positionOfChildNode(count); - int j=0; - do { getChildNode(name,&j); if (j<0) return -1; } while (count--); - return findPosition(d,j-1,eNodeChild); -} - -XMLNode XMLNode::getChildNodeWithAttribute(XMLCSTR name,XMLCSTR attributeName,XMLCSTR attributeValue, int *k) const -{ - int i=0,j; - if (k) i=*k; - XMLNode x; - XMLCSTR t; - do - { - x=getChildNode(name,&i); - if (!x.isEmpty()) - { - if (attributeValue) - { - j=0; - do - { - t=x.getAttribute(attributeName,&j); - if (t&&(xstricmp(attributeValue,t)==0)) { if (k) *k=i; return x; } - } while (t); - } else - { - if (x.isAttributeSet(attributeName)) { if (k) *k=i; return x; } - } - } - } while (!x.isEmpty()); - return emptyXMLNode; + int j = 0; + do { + getChildNode(name, &j); + if (j < 0) return -1; + } while (count--); + return findPosition(d, j - 1, eNodeChild); +} + +XMLNode XMLNode::getChildNodeWithAttribute(XMLCSTR name, XMLCSTR attributeName, + XMLCSTR attributeValue, + int *k) const { + int i = 0, j; + if (k) i = *k; + XMLNode x; + XMLCSTR t; + do { + x = getChildNode(name, &i); + if (!x.isEmpty()) { + if (attributeValue) { + j = 0; + do { + t = x.getAttribute(attributeName, &j); + if (t && (xstricmp(attributeValue, t) == 0)) { + if (k) *k = i; + return x; + } + } while (t); + } else { + if (x.isAttributeSet(attributeName)) { + if (k) *k = i; + return x; + } + } + } + } while (!x.isEmpty()); + return emptyXMLNode; } // Find an attribute on an node. -XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j) const -{ +XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j) const { if (!d) return NULL; - int i=0,n=d->nAttribute; - if (j) i=*j; - XMLAttribute *pAttr=d->pAttribute+i; - for (; ilpszName, lpszAttrib)==0) - { - if (j) *j=i+1; + int i = 0, n = d->nAttribute; + if (j) i = *j; + XMLAttribute *pAttr = d->pAttribute + i; + for (; i < n; i++) { + if (xstricmp(pAttr->lpszName, lpszAttrib) == 0) { + if (j) *j = i + 1; return pAttr->lpszValue; } pAttr++; @@ -2546,15 +2886,12 @@ XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j) const return NULL; } -char XMLNode::isAttributeSet(XMLCSTR lpszAttrib) const -{ +char XMLNode::isAttributeSet(XMLCSTR lpszAttrib) const { if (!d) return FALSE; - int i,n=d->nAttribute; - XMLAttribute *pAttr=d->pAttribute; - for (i=0; ilpszName, lpszAttrib)==0) - { + int i, n = d->nAttribute; + XMLAttribute *pAttr = d->pAttribute; + for (i = 0; i < n; i++) { + if (xstricmp(pAttr->lpszName, lpszAttrib) == 0) { return TRUE; } pAttr++; @@ -2562,159 +2899,283 @@ char XMLNode::isAttributeSet(XMLCSTR lpszAttrib) const return FALSE; } -XMLCSTR XMLNode::getAttribute(XMLCSTR name, int j) const -{ +XMLCSTR XMLNode::getAttribute(XMLCSTR name, int j) const { if (!d) return NULL; - int i=0; - while (j-->0) getAttribute(name,&i); - return getAttribute(name,&i); + int i = 0; + while (j-- > 0) getAttribute(name, &i); + return getAttribute(name, &i); } -XMLNodeContents XMLNode::enumContents(int i) const -{ +XMLNodeContents XMLNode::enumContents(int i) const { XMLNodeContents c; - if (!d) { c.etype=eNodeNULL; return c; } - if (inAttribute) - { - c.etype=eNodeAttribute; - c.attrib=d->pAttribute[i]; + if (!d) { + c.etype = eNodeNULL; + return c; + } + if (i < d->nAttribute) { + c.etype = eNodeAttribute; + c.attrib = d->pAttribute[i]; return c; } - i-=d->nAttribute; - c.etype=(XMLElementType)(d->pOrder[i]&3); - i=(d->pOrder[i])>>2; - switch (c.etype) - { - case eNodeChild: c.child = d->pChild[i]; break; - case eNodeText: c.text = d->pText[i]; break; - case eNodeClear: c.clear = d->pClear[i]; break; - default: break; + i -= d->nAttribute; + c.etype = (XMLElementType)(d->pOrder[i] & 3); + i = (d->pOrder[i]) >> 2; + switch (c.etype) { + case eNodeChild: + c.child = d->pChild[i]; + break; + case eNodeText: + c.text = d->pText[i]; + break; + case eNodeClear: + c.clear = d->pClear[i]; + break; + default: + break; } return c; } -XMLCSTR XMLNode::getName() const { if (!d) return NULL; return d->lpszName; } -int XMLNode::nText() const { if (!d) return 0; return d->nText; } -int XMLNode::nChildNode() const { if (!d) return 0; return d->nChild; } -int XMLNode::nAttribute() const { if (!d) return 0; return d->nAttribute; } -int XMLNode::nClear() const { if (!d) return 0; return d->nClear; } -int XMLNode::nElement() const { if (!d) return 0; return d->nAttribute+d->nChild+d->nText+d->nClear; } -XMLClear XMLNode::getClear (int i) const { if ((!d)||(i>=d->nClear )) return emptyXMLClear; return d->pClear[i]; } -XMLAttribute XMLNode::getAttribute (int i) const { if ((!d)||(i>=d->nAttribute)) return emptyXMLAttribute; return d->pAttribute[i]; } -XMLCSTR XMLNode::getAttributeName (int i) const { if ((!d)||(i>=d->nAttribute)) return NULL; return d->pAttribute[i].lpszName; } -XMLCSTR XMLNode::getAttributeValue(int i) const { if ((!d)||(i>=d->nAttribute)) return NULL; return d->pAttribute[i].lpszValue; } -XMLCSTR XMLNode::getText (int i) const { if ((!d)||(i>=d->nText )) return NULL; return d->pText[i]; } -XMLNode XMLNode::getChildNode (int i) const { if ((!d)||(i>=d->nChild )) return emptyXMLNode; return d->pChild[i]; } -XMLNode XMLNode::getParentNode ( ) const { if ((!d)||(!d->pParent )) return emptyXMLNode; return XMLNode(d->pParent); } -char XMLNode::isDeclaration ( ) const { if (!d) return 0; return d->isDeclaration; } -char XMLNode::isEmpty ( ) const { return (d==NULL); } -XMLNode XMLNode::emptyNode ( ) { return XMLNode::emptyXMLNode; } - -XMLNode XMLNode::addChild(XMLCSTR lpszName, char isDeclaration, XMLElementPosition pos) - { return addChild_priv(0,stringDup(lpszName),isDeclaration,pos); } -XMLNode XMLNode::addChild_WOSD(XMLSTR lpszName, char isDeclaration, XMLElementPosition pos) - { return addChild_priv(0,lpszName,isDeclaration,pos); } -XMLAttribute *XMLNode::addAttribute(XMLCSTR lpszName, XMLCSTR lpszValue) - { return addAttribute_priv(0,stringDup(lpszName),stringDup(lpszValue)); } -XMLAttribute *XMLNode::addAttribute_WOSD(XMLSTR lpszName, XMLSTR lpszValuev) - { return addAttribute_priv(0,lpszName,lpszValuev); } -XMLCSTR XMLNode::addText(XMLCSTR lpszValue, XMLElementPosition pos) - { return addText_priv(0,stringDup(lpszValue),pos); } -XMLCSTR XMLNode::addText_WOSD(XMLSTR lpszValue, XMLElementPosition pos) - { return addText_priv(0,lpszValue,pos); } -XMLClear *XMLNode::addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, XMLElementPosition pos) - { return addClear_priv(0,stringDup(lpszValue),lpszOpen,lpszClose,pos); } -XMLClear *XMLNode::addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, XMLElementPosition pos) - { return addClear_priv(0,lpszValue,lpszOpen,lpszClose,pos); } -XMLCSTR XMLNode::updateName(XMLCSTR lpszName) - { return updateName_WOSD(stringDup(lpszName)); } -XMLAttribute *XMLNode::updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute) - { return updateAttribute_WOSD(stringDup(newAttribute->lpszValue),stringDup(newAttribute->lpszName),oldAttribute->lpszName); } -XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,int i) - { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),i); } -XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName) - { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),lpszOldName); } -XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, int i) - { return updateText_WOSD(stringDup(lpszNewValue),i); } -XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) - { return updateText_WOSD(stringDup(lpszNewValue),lpszOldValue); } -XMLClear *XMLNode::updateClear(XMLCSTR lpszNewContent, int i) - { return updateClear_WOSD(stringDup(lpszNewContent),i); } -XMLClear *XMLNode::updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) - { return updateClear_WOSD(stringDup(lpszNewValue),lpszOldValue); } -XMLClear *XMLNode::updateClear(XMLClear *newP,XMLClear *oldP) - { return updateClear_WOSD(stringDup(newP->lpszValue),oldP->lpszValue); } - -char XMLNode::setGlobalOptions(XMLCharEncoding _characterEncoding, char _guessWideCharChars, - char _dropWhiteSpace, char _removeCommentsInMiddleOfText) -{ - guessWideCharChars=_guessWideCharChars; dropWhiteSpace=_dropWhiteSpace; removeCommentsInMiddleOfText=_removeCommentsInMiddleOfText; +XMLCSTR XMLNode::getName() const { + if (!d) return NULL; + return d->lpszName; +} +int XMLNode::nText() const { + if (!d) return 0; + return d->nText; +} +int XMLNode::nChildNode() const { + if (!d) return 0; + return d->nChild; +} +int XMLNode::nAttribute() const { + if (!d) return 0; + return d->nAttribute; +} +int XMLNode::nClear() const { + if (!d) return 0; + return d->nClear; +} +int XMLNode::nElement() const { + if (!d) return 0; + return d->nAttribute + d->nChild + d->nText + d->nClear; +} +XMLClear XMLNode::getClear (int i) const { + if ((!d) || (i >= d->nClear )) return emptyXMLClear; + return d->pClear[i]; +} +XMLAttribute XMLNode::getAttribute (int i) const { + if ((!d) || (i >= d->nAttribute)) return emptyXMLAttribute; + return d->pAttribute[i]; +} +XMLCSTR XMLNode::getAttributeName (int i) const { + if ((!d) || (i >= d->nAttribute)) return NULL; + return d->pAttribute[i].lpszName; +} +XMLCSTR XMLNode::getAttributeValue(int i) const { + if ((!d) || (i >= d->nAttribute)) return NULL; + return d->pAttribute[i].lpszValue; +} +XMLCSTR XMLNode::getText (int i) const { + if ((!d) || (i >= d->nText )) return NULL; + return d->pText[i]; +} +XMLNode XMLNode::getChildNode (int i) const { + if ((!d) || (i >= d->nChild )) return emptyXMLNode; + return d->pChild[i]; +} +XMLNode XMLNode::getParentNode ( ) const { + if ((!d) || (!d->pParent )) return emptyXMLNode; + return XMLNode(d->pParent); +} +char XMLNode::isDeclaration ( ) const { + if (!d) return 0; + return d->isDeclaration; +} +char XMLNode::isEmpty ( ) const { + return (d == NULL); +} +XMLNode XMLNode::emptyNode ( ) { + return XMLNode::emptyXMLNode; +} + +XMLNode XMLNode::addChild(XMLCSTR lpszName, char isDeclaration, + XMLElementPosition pos) { + return addChild_priv(0, stringDup(lpszName), isDeclaration, pos); +} +XMLNode XMLNode::addChild_WOSD(XMLSTR lpszName, char isDeclaration, + XMLElementPosition pos) { + return addChild_priv(0, lpszName, isDeclaration, pos); +} +XMLAttribute *XMLNode::addAttribute(XMLCSTR lpszName, XMLCSTR lpszValue) { + return addAttribute_priv(0, stringDup(lpszName), stringDup(lpszValue)); +} +XMLAttribute *XMLNode::addAttribute_WOSD(XMLSTR lpszName, XMLSTR lpszValuev) { + return addAttribute_priv(0, lpszName, lpszValuev); +} +XMLCSTR XMLNode::addText(XMLCSTR lpszValue, XMLElementPosition pos) { + return addText_priv(0, stringDup(lpszValue), pos); +} +XMLCSTR XMLNode::addText_WOSD(XMLSTR lpszValue, XMLElementPosition pos) { + return addText_priv(0, lpszValue, pos); +} +XMLClear *XMLNode::addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen, + XMLCSTR lpszClose, XMLElementPosition pos) { + return addClear_priv(0, stringDup(lpszValue), lpszOpen, lpszClose, pos); +} +XMLClear *XMLNode::addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen, + XMLCSTR lpszClose, XMLElementPosition pos) { + return addClear_priv(0, lpszValue, lpszOpen, lpszClose, pos); +} +XMLCSTR XMLNode::updateName(XMLCSTR lpszName) { + return updateName_WOSD(stringDup(lpszName)); +} +XMLAttribute *XMLNode::updateAttribute(XMLAttribute *newAttribute, + XMLAttribute *oldAttribute) { + return updateAttribute_WOSD(stringDup(newAttribute->lpszValue), + stringDup(newAttribute->lpszName), + oldAttribute->lpszName); +} +XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, + XMLCSTR lpszNewName, int i) { + return updateAttribute_WOSD(stringDup(lpszNewValue), + stringDup(lpszNewName), i); +} +XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, + XMLCSTR lpszNewName, + XMLCSTR lpszOldName) { + return updateAttribute_WOSD(stringDup(lpszNewValue), + stringDup(lpszNewName), lpszOldName); +} +XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, int i) { + return updateText_WOSD(stringDup(lpszNewValue), i); +} +XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) { + return updateText_WOSD(stringDup(lpszNewValue), lpszOldValue); +} +XMLClear *XMLNode::updateClear(XMLCSTR lpszNewContent, int i) { + return updateClear_WOSD(stringDup(lpszNewContent), i); +} +XMLClear *XMLNode::updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) { + return updateClear_WOSD(stringDup(lpszNewValue), lpszOldValue); +} +XMLClear *XMLNode::updateClear(XMLClear *newP, XMLClear *oldP) { + return updateClear_WOSD(stringDup(newP->lpszValue), oldP->lpszValue); +} + +char XMLNode::setGlobalOptions(XMLCharEncoding _characterEncoding, + char _guessWideCharChars, + char _dropWhiteSpace, + char _removeCommentsInMiddleOfText) { + guessWideCharChars = _guessWideCharChars; + dropWhiteSpace = _dropWhiteSpace; + removeCommentsInMiddleOfText = _removeCommentsInMiddleOfText; #ifdef _XMLWIDECHAR - if (_characterEncoding) characterEncoding=_characterEncoding; + if (_characterEncoding) characterEncoding = _characterEncoding; #else - switch(_characterEncoding) - { - case char_encoding_UTF8: characterEncoding=_characterEncoding; XML_ByteTable=XML_utf8ByteTable; break; - case char_encoding_legacy: characterEncoding=_characterEncoding; XML_ByteTable=XML_legacyByteTable; break; - case char_encoding_ShiftJIS: characterEncoding=_characterEncoding; XML_ByteTable=XML_sjisByteTable; break; - case char_encoding_GB2312: characterEncoding=_characterEncoding; XML_ByteTable=XML_gb2312ByteTable; break; + switch (_characterEncoding) { + case char_encoding_UTF8: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_utf8ByteTable; + break; + case char_encoding_legacy: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_legacyByteTable; + break; + case char_encoding_ShiftJIS: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_sjisByteTable; + break; + case char_encoding_GB2312: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_gb2312ByteTable; + break; case char_encoding_Big5: - case char_encoding_GBK: characterEncoding=_characterEncoding; XML_ByteTable=XML_gbk_big5_ByteTable; break; - default: return 1; + case char_encoding_GBK: + characterEncoding = _characterEncoding; + XML_ByteTable = XML_gbk_big5_ByteTable; + break; + default: + return 1; } #endif return 0; } -XMLNode::XMLCharEncoding XMLNode::guessCharEncoding(void *buf,int l, char useXMLEncodingAttribute) -{ +XMLNode::XMLCharEncoding XMLNode::guessCharEncoding(void *buf, int l, + char useXMLEncodingAttribute) { #ifdef _XMLWIDECHAR return (XMLCharEncoding)0; #else - if (l<25) return (XMLCharEncoding)0; - if (guessWideCharChars&&(myIsTextWideChar(buf,l))) return (XMLCharEncoding)0; - unsigned char *b=(unsigned char*)buf; - if ((b[0]==0xef)&&(b[1]==0xbb)&&(b[2]==0xbf)) return char_encoding_UTF8; + if (l < 25) return (XMLCharEncoding)0; + if (guessWideCharChars && (myIsTextWideChar(buf, l))) { + return (XMLCharEncoding)0; + } + unsigned char *b = (unsigned char*)buf; + if ((b[0] == 0xef) && (b[1] == 0xbb) && (b[2] == 0xbf)) { + return char_encoding_UTF8; + } // Match utf-8 model ? - XMLCharEncoding bestGuess=char_encoding_UTF8; - int i=0; - while (i>2 ]; - *(curr++)=base64EncodeTable[(inbuf[0]<<4)&0x3F]; - *(curr++)=base64Fillchar; - *(curr++)=base64Fillchar; - } else if (eLen==2) - { - j=(inbuf[0]<<8)|inbuf[1]; - *(curr++)=base64EncodeTable[ j>>10 ]; - *(curr++)=base64EncodeTable[(j>> 4)&0x3f]; - *(curr++)=base64EncodeTable[(j<< 2)&0x3f]; - *(curr++)=base64Fillchar; - } - *(curr++)=0; + *(curr++) = base64EncodeTable[ j>>18 ]; + *(curr++) = base64EncodeTable[(j>>12)&0x3f]; + *(curr++) = base64EncodeTable[(j>> 6)&0x3f]; + *(curr++) = base64EncodeTable[(j )&0x3f]; + if (formatted) { + if (!k) { + *(curr++) = _CXML('\n'); + k = 18; + } + k--; + } + } + eLen = inlen - eLen * 3; // 0 - 2. + if (eLen == 1) { + *(curr++) = base64EncodeTable[ inbuf[0] >> 2 ]; + *(curr++) = base64EncodeTable[(inbuf[0] << 4) & 0x3F]; + *(curr++) = base64Fillchar; + *(curr++) = base64Fillchar; + } else if (eLen == 2) { + j = (inbuf[0] << 8) | inbuf[1]; + *(curr++) = base64EncodeTable[ j>>10 ]; + *(curr++) = base64EncodeTable[(j>> 4)&0x3f]; + *(curr++) = base64EncodeTable[(j<< 2)&0x3f]; + *(curr++) = base64Fillchar; + } + *(curr++) = 0; return (XMLSTR)buf; } -unsigned int XMLParserBase64Tool::decodeSize(XMLCSTR data,XMLError *xe) -{ - if (xe) *xe=eXMLErrorNone; - int size=0; +unsigned int XMLParserBase64Tool::decodeSize(XMLCSTR data, XMLError *xe) { + if (xe) *xe = eXMLErrorNone; + int size = 0; unsigned char c; //skip any extra characters (e.g. newlines or spaces) - while (*data) - { + while (*data) { #ifdef _XMLWIDECHAR - if (*data>255) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } + if (*data > 255) { + if (xe) *xe = eXMLErrorBase64DecodeIllegalCharacter; + return 0; + } #endif - c=base64DecodeTable[(unsigned char)(*data)]; - if (c<97) size++; - else if (c==98) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } + c = base64DecodeTable[(unsigned char)(*data)]; + if (c < 97) size++; + else if (c == 98) { + if (xe) *xe = eXMLErrorBase64DecodeIllegalCharacter; + return 0; + } data++; } - if (xe&&(size%4!=0)) *xe=eXMLErrorBase64DataSizeIsNotMultipleOf4; - if (size==0) return 0; - do { data--; size--; } while(*data==base64Fillchar); size++; - return (unsigned int)((size*3)/4); -} - -unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, int len, XMLError *xe) -{ - if (xe) *xe=eXMLErrorNone; - int i=0,p=0; - unsigned char d,c; - for(;;) - { + if (xe && (size % 4 != 0)) *xe = eXMLErrorBase64DataSizeIsNotMultipleOf4; + if (size == 0) return 0; + do { + data--; + size--; + } while (*data == base64Fillchar); + size++; + return (unsigned int)((size*3) / 4); +} + +unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, + int len, XMLError *xe) { + if (xe) *xe = eXMLErrorNone; + int i = 0, p = 0; + unsigned char d, c; + for (;;) { #ifdef _XMLWIDECHAR #define BASE64DECODE_READ_NEXT_CHAR(c) \ @@ -2834,58 +3312,82 @@ unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, int #endif BASE64DECODE_READ_NEXT_CHAR(c) - if (c==99) { return 2; } - if (c==96) - { - if (p==(int)len) return 2; - if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; + if (c == 99) { + return 2; + } + if (c == 96) { + if (p == (int)len) return 2; + if (xe) *xe = eXMLErrorBase64DecodeTruncatedData; return 1; } BASE64DECODE_READ_NEXT_CHAR(d) - if ((d==99)||(d==96)) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - if (p==(int)len) { if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; return 0; } - buf[p++]=(unsigned char)((c<<2)|((d>>4)&0x3)); + if ((d == 99) || (d == 96)) { + if (xe) *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } + if (p == (int)len) { + if (xe) *xe = eXMLErrorBase64DecodeBufferTooSmall; + return 0; + } + buf[p++] = (unsigned char)((c << 2) | ((d >> 4) & 0x3)); BASE64DECODE_READ_NEXT_CHAR(c) - if (c==99) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - if (p==(int)len) - { - if (c==96) return 2; - if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; + if (c == 99) { + if (xe) *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } + if (p == (int)len) { + if (c == 96) return 2; + if (xe) *xe = eXMLErrorBase64DecodeBufferTooSmall; return 0; } - if (c==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - buf[p++]=(unsigned char)(((d<<4)&0xf0)|((c>>2)&0xf)); + if (c == 96) { + if (xe) *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } + buf[p++] = (unsigned char)(((d << 4) & 0xf0) | ((c >> 2) & 0xf)); BASE64DECODE_READ_NEXT_CHAR(d) - if (d==99 ) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - if (p==(int)len) - { - if (d==96) return 2; - if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; + if (d == 99 ) { + if (xe) *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } + if (p == (int)len) { + if (d == 96) return 2; + if (xe) *xe = eXMLErrorBase64DecodeBufferTooSmall; return 0; } - if (d==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } - buf[p++]=(unsigned char)(((c<<6)&0xc0)|d); + if (d == 96) { + if (xe) *xe = eXMLErrorBase64DecodeTruncatedData; + return 1; + } + buf[p++] = (unsigned char)(((c << 6) & 0xc0) | d); } } #undef BASE64DECODE_READ_NEXT_CHAR -void XMLParserBase64Tool::alloc(int newsize) -{ - if ((!buf)&&(newsize)) { buf=malloc(newsize); buflen=newsize; return; } - if (newsize>buflen) { buf=realloc(buf,newsize); buflen=newsize; } +void XMLParserBase64Tool::alloc(int newsize) { + if ((!buf) && (newsize)) { + buf = malloc(newsize); + buflen = newsize; + return; + } + if (newsize > buflen) { + buf = realloc(buf, newsize); + buflen = newsize; + } } -unsigned char *XMLParserBase64Tool::decode(XMLCSTR data, int *outlen, XMLError *xe) -{ - if (xe) *xe=eXMLErrorNone; - unsigned int len=decodeSize(data,xe); - if (outlen) *outlen=len; +unsigned char *XMLParserBase64Tool::decode(XMLCSTR data, int *outlen, XMLError *xe) { + if (xe) *xe = eXMLErrorNone; + unsigned int len = decodeSize(data, xe); + if (outlen) *outlen = len; if (!len) return NULL; - alloc(len+1); - if(!decode(data,(unsigned char*)buf,len,xe)){ return NULL; } + alloc(len + 1); + if (!decode(data, (unsigned char*)buf, len, xe)) { + return NULL; + } return (unsigned char*)buf; } diff --git a/ext/mcpat/xmlParser.h b/ext/mcpat/xmlParser.h index e29136cb9..dd43694bb 100644 --- a/ext/mcpat/xmlParser.h +++ b/ext/mcpat/xmlParser.h @@ -42,6 +42,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Copyright (c) 2002, Business-Insight + * Copyright (c) 2010-2013 Advanced Micro Devices, Inc. * Business-Insight * All rights reserved. * @@ -160,33 +161,32 @@ #define XMLDLLENTRY #ifndef XML_NO_WIDE_CHAR #include // to have 'wcsrtombs' for ANSI version - // to have 'mbsrtowcs' for WIDECHAR version +// to have 'mbsrtowcs' for WIDECHAR version #endif #endif // Some common types for char set portable code #ifdef _XMLWIDECHAR - #define _CXML(c) L ## c - #define XMLCSTR const wchar_t * - #define XMLSTR wchar_t * - #define XMLCHAR wchar_t +#define _CXML(c) L ## c +#define XMLCSTR const wchar_t * +#define XMLSTR wchar_t * +#define XMLCHAR wchar_t #else - #define _CXML(c) c - #define XMLCSTR const char * - #define XMLSTR char * - #define XMLCHAR char +#define _CXML(c) c +#define XMLCSTR const char * +#define XMLSTR char * +#define XMLCHAR char #endif #ifndef FALSE - #define FALSE 0 +#define FALSE 0 #endif /* FALSE */ #ifndef TRUE - #define TRUE 1 +#define TRUE 1 #endif /* TRUE */ /// Enumeration for XML parse errors. -typedef enum XMLError -{ +typedef enum XMLError { eXMLErrorNone = 0, eXMLErrorMissingEndTag, eXMLErrorNoXMLTagFound, @@ -213,30 +213,32 @@ typedef enum XMLError /// Enumeration used to manage type of data. Use in conjunction with structure XMLNodeContents -typedef enum XMLElementType -{ - eNodeChild=0, - eNodeAttribute=1, - eNodeText=2, - eNodeClear=3, - eNodeNULL=4 +typedef enum XMLElementType { + eNodeChild = 0, + eNodeAttribute = 1, + eNodeText = 2, + eNodeClear = 3, + eNodeNULL = 4 } XMLElementType; /// Structure used to obtain error details if the parse fails. -typedef struct XMLResults -{ +typedef struct XMLResults { enum XMLError error; - int nLine,nColumn; + int nLine; + int nColumn; } XMLResults; /// Structure for XML clear (unformatted) node (usually comments) typedef struct XMLClear { - XMLCSTR lpszValue; XMLCSTR lpszOpenTag; XMLCSTR lpszCloseTag; + XMLCSTR lpszValue; + XMLCSTR lpszOpenTag; + XMLCSTR lpszCloseTag; } XMLClear; /// Structure for XML attribute. typedef struct XMLAttribute { - XMLCSTR lpszName; XMLCSTR lpszValue; + XMLCSTR lpszName; + XMLCSTR lpszValue; } XMLAttribute; /// XMLElementPosition are not interchangeable with simple indexes @@ -256,9 +258,8 @@ struct XMLNodeContents; *
  • XMLNode::openFileHelper
  • *
  • XMLNode::createXMLTopNode (or XMLNode::createXMLTopNode_WOSD)
  • * */ -typedef struct XMLDLLENTRY XMLNode -{ - private: +typedef struct XMLDLLENTRY XMLNode { +private: struct XMLNodeDataTag; @@ -267,7 +268,7 @@ typedef struct XMLDLLENTRY XMLNode /// Constructors are protected, so use instead one of: XMLNode::parseString, XMLNode::parseFile, XMLNode::openFileHelper, XMLNode::createXMLTopNode XMLNode(struct XMLNodeDataTag *p); - public: +public: static XMLCSTR getVersion();///< Return the XMLParser library version number /** @defgroup conversions Parsing XML files/strings to an XMLNode structure and Rendering XMLNode's to files/string. @@ -275,7 +276,8 @@ typedef struct XMLDLLENTRY XMLNode * @{ */ /// Parse an XML string and return the root of a XMLNode tree representing the string. - static XMLNode parseString (XMLCSTR lpXMLString, XMLCSTR tag=NULL, XMLResults *pResults=NULL); + static XMLNode parseString(XMLCSTR lpXMLString, XMLCSTR tag = NULL, + XMLResults *pResults = NULL); /**< The "parseString" function parse an XML string and return the root of a XMLNode tree. The "opposite" of this function is * the function "createXMLString" that re-creates an XML string from an XMLNode tree. If the XML document is corrupted, the * "parseString" method will initialize the "pResults" variable with some information that can be used to trace the error. @@ -288,7 +290,8 @@ typedef struct XMLDLLENTRY XMLNode */ /// Parse an XML file and return the root of a XMLNode tree representing the file. - static XMLNode parseFile (XMLCSTR filename, XMLCSTR tag=NULL, XMLResults *pResults=NULL); + static XMLNode parseFile(XMLCSTR filename, XMLCSTR tag = NULL, + XMLResults *pResults = NULL); /**< The "parseFile" function parse an XML file and return the root of a XMLNode tree. The "opposite" of this function is * the function "writeToFile" that re-creates an XML file from an XMLNode tree. If the XML document is corrupted, the * "parseFile" method will initialize the "pResults" variable with some information that can be used to trace the error. @@ -301,7 +304,7 @@ typedef struct XMLDLLENTRY XMLNode */ /// Parse an XML file and return the root of a XMLNode tree representing the file. A very crude error checking is made. An attempt to guess the Char Encoding used in the file is made. - static XMLNode openFileHelper(XMLCSTR filename, XMLCSTR tag=NULL); + static XMLNode openFileHelper(XMLCSTR filename, XMLCSTR tag = NULL); /**< The "openFileHelper" function reports to the screen all the warnings and errors that occurred during parsing of the XML file. * This function also tries to guess char Encoding (UTF-8, ASCII or SHIT-JIS) based on the first 200 bytes of the file. Since each * application has its own way to report and deal with errors, you should rather use the "parseFile" function to parse XML files @@ -322,7 +325,7 @@ typedef struct XMLDLLENTRY XMLNode static XMLCSTR getError(XMLError error); ///< this gives you a user-friendly explanation of the parsing error /// Create an XML string starting from the current XMLNode. - XMLSTR createXMLString(int nFormat=1, int *pnSize=NULL) const; + XMLSTR createXMLString(int nFormat = 1, int *pnSize = NULL) const; /**< The returned string should be free'd using the "freeXMLString" function. * * If nFormat==0, no formatting is required otherwise this returns an user friendly XML string from a given element @@ -330,8 +333,8 @@ typedef struct XMLDLLENTRY XMLNode /// Save the content of an xmlNode inside a file XMLError writeToFile(XMLCSTR filename, - const char *encoding=NULL, - char nFormat=1) const; + const char *encoding = NULL, + char nFormat = 1) const; /**< If nFormat==0, no formatting is required otherwise this returns an user friendly XML string from a given element with appropriate white spaces and carriage returns. * If the global parameter "characterEncoding==encoding_UTF8", then the "encoding" parameter is ignored and always set to "utf-8". * If the global parameter "characterEncoding==encoding_ShiftJIS", then the "encoding" parameter is ignored and always set to "SHIFT-JIS". @@ -349,14 +352,15 @@ typedef struct XMLDLLENTRY XMLNode XMLNode getChildNode(int i=0) const; ///< return ith child node XMLNode getChildNode(XMLCSTR name, int i) const; ///< return ith child node with specific name (return an empty node if failing). If i==-1, this returns the last XMLNode with the given name. XMLNode getChildNode(XMLCSTR name, int *i=NULL) const; ///< return next child node with specific name (return an empty node if failing) + XMLNode* getChildNodePtr(XMLCSTR name, int *j) const; XMLNode getChildNodeWithAttribute(XMLCSTR tagName, XMLCSTR attributeName, XMLCSTR attributeValue=NULL, int *i=NULL) const; ///< return child node with specific name/attribute (return an empty node if failing) XMLNode getChildNodeByPath(XMLCSTR path, char createNodeIfMissing=0, XMLCHAR sep='/'); - ///< return the first child node with specific path + ///< return the first child node with specific path XMLNode getChildNodeByPathNonConst(XMLSTR path, char createNodeIfMissing=0, XMLCHAR sep='/'); - ///< return the first child node with specific path. + ///< return the first child node with specific path. int nChildNode(XMLCSTR name) const; ///< return the number of child node with specific name int nChildNode() const; ///< nbr of child node @@ -418,12 +422,12 @@ typedef struct XMLDLLENTRY XMLNode */ XMLCSTR updateName(XMLCSTR lpszName); ///< change node's name XMLAttribute *updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a new one will be added - XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName=NULL,int i=0); ///< if the attribute to update is missing, a new one will be added - XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName);///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added + XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName=NULL, int i=0); ///< if the attribute to update is missing, a new one will be added + XMLAttribute *updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName, XMLCSTR lpszOldName);///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added XMLCSTR updateText(XMLCSTR lpszNewValue, int i=0); ///< if the text to update is missing, a new one will be added XMLCSTR updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the text to update is missing, a new one will be added XMLClear *updateClear(XMLCSTR lpszNewContent, int i=0); ///< if the clearTag to update is missing, a new one will be added - XMLClear *updateClear(XMLClear *newP,XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added + XMLClear *updateClear(XMLClear *newP, XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added XMLClear *updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, a new one will be added /** @} */ @@ -482,12 +486,12 @@ typedef struct XMLDLLENTRY XMLNode XMLCSTR updateName_WOSD(XMLSTR lpszName); ///< change node's name XMLAttribute *updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute); ///< if the attribute to update is missing, a new one will be added - XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName=NULL,int i=0); ///< if the attribute to update is missing, a new one will be added - XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,XMLCSTR lpszOldName); ///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added + XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName=NULL, int i=0); ///< if the attribute to update is missing, a new one will be added + XMLAttribute *updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName, XMLCSTR lpszOldName); ///< set lpszNewName=NULL if you don't want to change the name of the attribute if the attribute to update is missing, a new one will be added XMLCSTR updateText_WOSD(XMLSTR lpszNewValue, int i=0); ///< if the text to update is missing, a new one will be added XMLCSTR updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the text to update is missing, a new one will be added XMLClear *updateClear_WOSD(XMLSTR lpszNewContent, int i=0); ///< if the clearTag to update is missing, a new one will be added - XMLClear *updateClear_WOSD(XMLClear *newP,XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added + XMLClear *updateClear_WOSD(XMLClear *newP, XMLClear *oldP); ///< if the clearTag to update is missing, a new one will be added XMLClear *updateClear_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue); ///< if the clearTag to update is missing, a new one will be added /** @} */ @@ -508,15 +512,14 @@ typedef struct XMLDLLENTRY XMLNode /** @} */ /// Enumeration for XML character encoding. - typedef enum XMLCharEncoding - { - char_encoding_error=0, - char_encoding_UTF8=1, - char_encoding_legacy=2, - char_encoding_ShiftJIS=3, - char_encoding_GB2312=4, - char_encoding_Big5=5, - char_encoding_GBK=6 // this is actually the same as Big5 + typedef enum XMLCharEncoding { + char_encoding_error = 0, + char_encoding_UTF8 = 1, + char_encoding_legacy = 2, + char_encoding_ShiftJIS = 3, + char_encoding_GB2312 = 4, + char_encoding_Big5 = 5, + char_encoding_GBK = 6 // this is actually the same as Big5 } XMLCharEncoding; /** \addtogroup conversions @@ -589,48 +592,46 @@ typedef struct XMLDLLENTRY XMLNode * If an inconsistency in the encoding is detected, then the return value is "0". */ /** @} */ - private: - // these are functions and structures used internally by the XMLNode class (don't bother about them): - - typedef struct XMLNodeDataTag // to allow shallow copy and "intelligent/smart" pointers (automatic delete): - { - XMLCSTR lpszName; // Element name (=NULL if root) - int nChild, // Number of child nodes - nText, // Number of text fields - nClear, // Number of Clear fields (comments) - nAttribute; // Number of attributes - char isDeclaration; // Whether node is an XML declaration - '' - struct XMLNodeDataTag *pParent; // Pointer to parent element (=NULL if root) - XMLNode *pChild; // Array of child nodes - XMLCSTR *pText; // Array of text fields - XMLClear *pClear; // Array of clear fields - XMLAttribute *pAttribute; // Array of attributes - int *pOrder; // order of the child_nodes,text_fields,clear_fields - int ref_count; // for garbage collection (smart pointers) - } XMLNodeData; - XMLNodeData *d; - - char parseClearTag(void *px, void *pa); - char maybeAddTxT(void *pa, XMLCSTR tokenPStr); - int ParseXMLElement(void *pXML); - void *addToOrder(int memInc, int *_pos, int nc, void *p, int size, XMLElementType xtype); - int indexText(XMLCSTR lpszValue) const; - int indexClear(XMLCSTR lpszValue) const; - XMLNode addChild_priv(int,XMLSTR,char,int); - XMLAttribute *addAttribute_priv(int,XMLSTR,XMLSTR); - XMLCSTR addText_priv(int,XMLSTR,int); - XMLClear *addClear_priv(int,XMLSTR,XMLCSTR,XMLCSTR,int); - void emptyTheNode(char force); - static inline XMLElementPosition findPosition(XMLNodeData *d, int index, XMLElementType xtype); - static int CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat); - static int removeOrderElement(XMLNodeData *d, XMLElementType t, int index); - static void exactMemory(XMLNodeData *d); - static int detachFromParent(XMLNodeData *d); +private: + // these are functions and structures used internally by the XMLNode class (don't bother about them): + + typedef struct XMLNodeDataTag { // to allow shallow copy and "intelligent/smart" pointers (automatic delete): + XMLCSTR lpszName; // Element name (=NULL if root) + int nChild, // Number of child nodes + nText, // Number of text fields + nClear, // Number of Clear fields (comments) + nAttribute; // Number of attributes + char isDeclaration; // Whether node is an XML declaration - '' + struct XMLNodeDataTag *pParent; // Pointer to parent element (=NULL if root) + XMLNode *pChild; // Array of child nodes + XMLCSTR *pText; // Array of text fields + XMLClear *pClear; // Array of clear fields + XMLAttribute *pAttribute; // Array of attributes + int *pOrder; // order of the child_nodes,text_fields,clear_fields + int ref_count; // for garbage collection (smart pointers) + } XMLNodeData; + XMLNodeData *d; + + char parseClearTag(void *px, void *pa); + char maybeAddTxT(void *pa, XMLCSTR tokenPStr); + int ParseXMLElement(void *pXML); + void *addToOrder(int memInc, int *_pos, int nc, void *p, int size, XMLElementType xtype); + int indexText(XMLCSTR lpszValue) const; + int indexClear(XMLCSTR lpszValue) const; + XMLNode addChild_priv(int, XMLSTR, char, int); + XMLAttribute *addAttribute_priv(int, XMLSTR, XMLSTR); + XMLCSTR addText_priv(int, XMLSTR, int); + XMLClear *addClear_priv(int, XMLSTR, XMLCSTR, XMLCSTR, int); + void emptyTheNode(char force); + static inline XMLElementPosition findPosition(XMLNodeData *d, int index, XMLElementType xtype); + static int CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat); + static int removeOrderElement(XMLNodeData *d, XMLElementType t, int index); + static void exactMemory(XMLNodeData *d); + static int detachFromParent(XMLNodeData *d); } XMLNode; /// This structure is given by the function XMLNode::enumContents. -typedef struct XMLNodeContents -{ +typedef struct XMLNodeContents { /// This dictates what's the content of the XMLNodeContent enum XMLElementType etype; /**< should be an union to access the appropriate data. Compiler does not allow union of object with constructor... too bad. */ @@ -664,12 +665,12 @@ XMLDLLENTRY void freeXMLString(XMLSTR t); // {free(t);} * delete them without any trouble. * * @{ */ -XMLDLLENTRY char xmltob(XMLCSTR xmlString,char defautValue=0); -XMLDLLENTRY int xmltoi(XMLCSTR xmlString,int defautValue=0); -XMLDLLENTRY long xmltol(XMLCSTR xmlString,long defautValue=0); -XMLDLLENTRY double xmltof(XMLCSTR xmlString,double defautValue=.0); -XMLDLLENTRY XMLCSTR xmltoa(XMLCSTR xmlString,XMLCSTR defautValue=_CXML("")); -XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString,XMLCHAR defautValue=_CXML('\0')); +XMLDLLENTRY char xmltob(XMLCSTR xmlString, char defautValue=0); +XMLDLLENTRY int xmltoi(XMLCSTR xmlString, int defautValue=0); +XMLDLLENTRY long xmltol(XMLCSTR xmlString, long defautValue=0); +XMLDLLENTRY double xmltof(XMLCSTR xmlString, double defautValue=.0); +XMLDLLENTRY XMLCSTR xmltoa(XMLCSTR xmlString, XMLCSTR defautValue=_CXML("")); +XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString, XMLCHAR defautValue=_CXML('\0')); /** @} */ /** @defgroup ToXMLStringTool Helper class to create XML files using "printf", "fprintf", "cout",... functions. @@ -685,10 +686,9 @@ XMLDLLENTRY XMLCHAR xmltoc(XMLCSTR xmlString,XMLCHAR defautValue=_CXML('\0')); * \note If you are creating from scratch an XML file using the provided XMLNode class * you must not use the "ToXMLStringTool" class (because the "XMLNode" class does the * processing job for you during rendering).*/ -typedef struct XMLDLLENTRY ToXMLStringTool -{ +typedef struct XMLDLLENTRY ToXMLStringTool { public: - ToXMLStringTool(): buf(NULL),buflen(0){} + ToXMLStringTool(): buf(NULL), buflen(0){} ~ToXMLStringTool(); void freeBuffer();///