ext: McPAT interface changes and fixes
[gem5.git] / ext / mcpat / cacti / cacti_interface.h
1 /*****************************************************************************
2 * McPAT/CACTI
3 * SOFTWARE LICENSE AGREEMENT
4 * Copyright 2012 Hewlett-Packard Development Company, L.P.
5 * Copyright (c) 2010-2013 Advanced Micro Devices, Inc.
6 * All Rights Reserved
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are
10 * met: redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer;
12 * redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution;
15 * neither the name of the copyright holders nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 ***************************************************************************/
32
33
34
35 #ifndef __CACTI_INTERFACE_H__
36 #define __CACTI_INTERFACE_H__
37
38 #include <iostream>
39 #include <list>
40 #include <map>
41 #include <string>
42 #include <vector>
43
44 #include "const.h"
45
46 using namespace std;
47
48
49 class min_values_t;
50 class mem_array;
51 class uca_org_t;
52
53
54 class powerComponents {
55 public:
56 double dynamic;
57 double leakage;
58 double gate_leakage;
59 double short_circuit;
60 double longer_channel_leakage;
61
62 powerComponents() : dynamic(0), leakage(0), gate_leakage(0), short_circuit(0), longer_channel_leakage(0) { }
63 powerComponents(const powerComponents & obj) {
64 *this = obj;
65 }
66 powerComponents & operator=(const powerComponents & rhs) {
67 dynamic = rhs.dynamic;
68 leakage = rhs.leakage;
69 gate_leakage = rhs.gate_leakage;
70 short_circuit = rhs.short_circuit;
71 longer_channel_leakage = rhs.longer_channel_leakage;
72 return *this;
73 }
74 void reset() {
75 dynamic = 0;
76 leakage = 0;
77 gate_leakage = 0;
78 short_circuit = 0;
79 longer_channel_leakage = 0;
80 }
81
82 friend powerComponents operator+(const powerComponents & x, const powerComponents & y);
83 friend powerComponents operator*(const powerComponents & x, double const * const y);
84 };
85
86
87
88 class powerDef {
89 public:
90 powerComponents readOp;
91 powerComponents writeOp;
92 powerComponents searchOp;//Sheng: for CAM and FA
93
94 powerDef() : readOp(), writeOp(), searchOp() { }
95 void reset() {
96 readOp.reset();
97 writeOp.reset();
98 searchOp.reset();
99 }
100
101 friend powerDef operator+(const powerDef & x, const powerDef & y);
102 friend powerDef operator*(const powerDef & x, double const * const y);
103 };
104
105 enum Wire_type {
106 Global /* gloabl wires with repeaters */,
107 Global_5 /* 5% delay penalty */,
108 Global_10 /* 10% delay penalty */,
109 Global_20 /* 20% delay penalty */,
110 Global_30 /* 30% delay penalty */,
111 Low_swing /* differential low power wires with high area overhead */,
112 Semi_global /* mid-level wires with repeaters*/,
113 Transmission /* tranmission lines with high area overhead */,
114 Optical /* optical wires */,
115 Invalid_wtype
116 };
117
118
119
120 class InputParameter {
121 public:
122 void parse_cfg(const string & infile);
123
124 // return false if the input parameters are problematic
125 bool error_checking(string name = "CACTI");
126 void display_ip();
127
128 unsigned int cache_sz; // in bytes
129 unsigned int line_sz;
130 unsigned int assoc;
131 unsigned int nbanks;
132 unsigned int out_w;// == nr_bits_out
133 bool specific_tag;
134 unsigned int tag_w;
135 unsigned int access_mode;
136 unsigned int obj_func_dyn_energy;
137 unsigned int obj_func_dyn_power;
138 unsigned int obj_func_leak_power;
139 unsigned int obj_func_cycle_t;
140
141 double F_sz_nm; // feature size in nm
142 double F_sz_um; // feature size in um
143 unsigned int num_rw_ports;
144 unsigned int num_rd_ports;
145 unsigned int num_wr_ports;
146 unsigned int num_se_rd_ports; // number of single ended read ports
147 unsigned int num_search_ports; // Sheng: number of search ports for CAM
148 bool is_main_mem;
149 bool is_cache;
150 bool pure_ram;
151 bool pure_cam;
152 bool rpters_in_htree; // if there are repeaters in htree segment
153 unsigned int ver_htree_wires_over_array;
154 unsigned int broadcast_addr_din_over_ver_htrees;
155 unsigned int temp;
156
157 unsigned int ram_cell_tech_type;
158 unsigned int peri_global_tech_type;
159 unsigned int data_arr_ram_cell_tech_type;
160 unsigned int data_arr_peri_global_tech_type;
161 unsigned int tag_arr_ram_cell_tech_type;
162 unsigned int tag_arr_peri_global_tech_type;
163
164 unsigned int burst_len;
165 unsigned int int_prefetch_w;
166 unsigned int page_sz_bits;
167
168 unsigned int ic_proj_type; // interconnect_projection_type
169 unsigned int wire_is_mat_type; // wire_inside_mat_type
170 unsigned int wire_os_mat_type; // wire_outside_mat_type
171 enum Wire_type wt;
172 int force_wiretype;
173 bool print_input_args;
174 unsigned int nuca_cache_sz; // TODO
175 int ndbl, ndwl, nspd, ndsam1, ndsam2, ndcm;
176 bool force_cache_config;
177
178 int cache_level;
179 int cores;
180 int nuca_bank_count;
181 int force_nuca_bank;
182
183 int delay_wt, dynamic_power_wt, leakage_power_wt,
184 cycle_time_wt, area_wt;
185 int delay_wt_nuca, dynamic_power_wt_nuca, leakage_power_wt_nuca,
186 cycle_time_wt_nuca, area_wt_nuca;
187
188 int delay_dev, dynamic_power_dev, leakage_power_dev,
189 cycle_time_dev, area_dev;
190 int delay_dev_nuca, dynamic_power_dev_nuca, leakage_power_dev_nuca,
191 cycle_time_dev_nuca, area_dev_nuca;
192 int ed; //ED or ED2 optimization
193 int nuca;
194
195 bool fast_access;
196 unsigned int block_sz; // bytes
197 unsigned int tag_assoc;
198 unsigned int data_assoc;
199 bool is_seq_acc;
200 bool fully_assoc;
201 unsigned int nsets; // == number_of_sets
202 int print_detail;
203
204
205 bool add_ecc_b_;
206 //parameters for design constraint
207 double throughput;
208 double latency;
209 bool pipelinable;
210 int pipeline_stages;
211 int per_stage_vector;
212 bool with_clock_grid;
213 };
214
215
216 typedef struct {
217 int Ndwl;
218 int Ndbl;
219 double Nspd;
220 int deg_bl_muxing;
221 int Ndsam_lev_1;
222 int Ndsam_lev_2;
223 int number_activated_mats_horizontal_direction;
224 int number_subbanks;
225 int page_size_in_bits;
226 double delay_route_to_bank;
227 double delay_crossbar;
228 double delay_addr_din_horizontal_htree;
229 double delay_addr_din_vertical_htree;
230 double delay_row_predecode_driver_and_block;
231 double delay_row_decoder;
232 double delay_bitlines;
233 double delay_sense_amp;
234 double delay_subarray_output_driver;
235 double delay_bit_mux_predecode_driver_and_block;
236 double delay_bit_mux_decoder;
237 double delay_senseamp_mux_lev_1_predecode_driver_and_block;
238 double delay_senseamp_mux_lev_1_decoder;
239 double delay_senseamp_mux_lev_2_predecode_driver_and_block;
240 double delay_senseamp_mux_lev_2_decoder;
241 double delay_input_htree;
242 double delay_output_htree;
243 double delay_dout_vertical_htree;
244 double delay_dout_horizontal_htree;
245 double delay_comparator;
246 double access_time;
247 double cycle_time;
248 double multisubbank_interleave_cycle_time;
249 double delay_request_network;
250 double delay_inside_mat;
251 double delay_reply_network;
252 double trcd;
253 double cas_latency;
254 double precharge_delay;
255 powerDef power_routing_to_bank;
256 powerDef power_addr_input_htree;
257 powerDef power_data_input_htree;
258 powerDef power_data_output_htree;
259 powerDef power_addr_horizontal_htree;
260 powerDef power_datain_horizontal_htree;
261 powerDef power_dataout_horizontal_htree;
262 powerDef power_addr_vertical_htree;
263 powerDef power_datain_vertical_htree;
264 powerDef power_row_predecoder_drivers;
265 powerDef power_row_predecoder_blocks;
266 powerDef power_row_decoders;
267 powerDef power_bit_mux_predecoder_drivers;
268 powerDef power_bit_mux_predecoder_blocks;
269 powerDef power_bit_mux_decoders;
270 powerDef power_senseamp_mux_lev_1_predecoder_drivers;
271 powerDef power_senseamp_mux_lev_1_predecoder_blocks;
272 powerDef power_senseamp_mux_lev_1_decoders;
273 powerDef power_senseamp_mux_lev_2_predecoder_drivers;
274 powerDef power_senseamp_mux_lev_2_predecoder_blocks;
275 powerDef power_senseamp_mux_lev_2_decoders;
276 powerDef power_bitlines;
277 powerDef power_sense_amps;
278 powerDef power_prechg_eq_drivers;
279 powerDef power_output_drivers_at_subarray;
280 powerDef power_dataout_vertical_htree;
281 powerDef power_comparators;
282 powerDef power_crossbar;
283 powerDef total_power;
284 double area;
285 double all_banks_height;
286 double all_banks_width;
287 double bank_height;
288 double bank_width;
289 double subarray_memory_cell_area_height;
290 double subarray_memory_cell_area_width;
291 double mat_height;
292 double mat_width;
293 double routing_area_height_within_bank;
294 double routing_area_width_within_bank;
295 double area_efficiency;
296 double refresh_power;
297 double dram_refresh_period;
298 double dram_array_availability;
299 double dyn_read_energy_from_closed_page;
300 double dyn_read_energy_from_open_page;
301 double leak_power_subbank_closed_page;
302 double leak_power_subbank_open_page;
303 double leak_power_request_and_reply_networks;
304 double activate_energy;
305 double read_energy;
306 double write_energy;
307 double precharge_energy;
308 } results_mem_array;
309
310
311 class uca_org_t {
312 public:
313 mem_array * tag_array2;
314 mem_array * data_array2;
315 double access_time;
316 double cycle_time;
317 double area;
318 double area_efficiency;
319 powerDef power;
320 double leak_power_with_sleep_transistors_in_mats;
321 double cache_ht;
322 double cache_len;
323 char file_n[100];
324 double vdd_periph_global;
325 bool valid;
326 results_mem_array tag_array;
327 results_mem_array data_array;
328
329 uca_org_t();
330 void find_delay();
331 void find_energy();
332 void find_area();
333 void find_cyc();
334 void adjust_area();//for McPAT only to adjust routing overhead
335 void cleanup();
336 ~uca_org_t() {};
337 };
338
339 void reconfigure(InputParameter *local_interface, uca_org_t *fin_res);
340
341 uca_org_t cacti_interface(const string & infile_name);
342 //McPAT's plain interface, please keep !!!
343 uca_org_t cacti_interface(InputParameter * const local_interface);
344 //McPAT's plain interface, please keep !!!
345 uca_org_t init_interface(InputParameter * const local_interface,
346 const string &name);
347 //McPAT's plain interface, please keep !!!
348 uca_org_t cacti_interface(
349 int cache_size,
350 int line_size,
351 int associativity,
352 int rw_ports,
353 int excl_read_ports,
354 int excl_write_ports,
355 int single_ended_read_ports,
356 int search_ports,
357 int banks,
358 double tech_node,
359 int output_width,
360 int specific_tag,
361 int tag_width,
362 int access_mode,
363 int cache,
364 int main_mem,
365 int obj_func_delay,
366 int obj_func_dynamic_power,
367 int obj_func_leakage_power,
368 int obj_func_cycle_time,
369 int obj_func_area,
370 int dev_func_delay,
371 int dev_func_dynamic_power,
372 int dev_func_leakage_power,
373 int dev_func_area,
374 int dev_func_cycle_time,
375 int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
376 int temp,
377 int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
378 int data_arr_ram_cell_tech_flavor_in,
379 int data_arr_peri_global_tech_flavor_in,
380 int tag_arr_ram_cell_tech_flavor_in,
381 int tag_arr_peri_global_tech_flavor_in,
382 int interconnect_projection_type_in,
383 int wire_inside_mat_type_in,
384 int wire_outside_mat_type_in,
385 int REPEATERS_IN_HTREE_SEGMENTS_in,
386 int VERTICAL_HTREE_WIRES_OVER_THE_ARRAY_in,
387 int BROADCAST_ADDR_DATAIN_OVER_VERTICAL_HTREES_in,
388 int PAGE_SIZE_BITS_in,
389 int BURST_LENGTH_in,
390 int INTERNAL_PREFETCH_WIDTH_in,
391 int force_wiretype,
392 int wiretype,
393 int force_config,
394 int ndwl,
395 int ndbl,
396 int nspd,
397 int ndcm,
398 int ndsam1,
399 int ndsam2,
400 int ecc);
401
402 //Naveen's interface
403 uca_org_t cacti_interface(
404 int cache_size,
405 int line_size,
406 int associativity,
407 int rw_ports,
408 int excl_read_ports,
409 int excl_write_ports,
410 int single_ended_read_ports,
411 int banks,
412 double tech_node,
413 int page_sz,
414 int burst_length,
415 int pre_width,
416 int output_width,
417 int specific_tag,
418 int tag_width,
419 int access_mode, //0 normal, 1 seq, 2 fast
420 int cache, //scratch ram or cache
421 int main_mem,
422 int obj_func_delay,
423 int obj_func_dynamic_power,
424 int obj_func_leakage_power,
425 int obj_func_area,
426 int obj_func_cycle_time,
427 int dev_func_delay,
428 int dev_func_dynamic_power,
429 int dev_func_leakage_power,
430 int dev_func_area,
431 int dev_func_cycle_time,
432 int ed_ed2_none, // 0 - ED, 1 - ED^2, 2 - use weight and deviate
433 int temp,
434 int wt, //0 - default(search across everything), 1 - global, 2 - 5% delay penalty, 3 - 10%, 4 - 20 %, 5 - 30%, 6 - low-swing
435 int data_arr_ram_cell_tech_flavor_in,
436 int data_arr_peri_global_tech_flavor_in,
437 int tag_arr_ram_cell_tech_flavor_in,
438 int tag_arr_peri_global_tech_flavor_in,
439 int interconnect_projection_type_in, // 0 - aggressive, 1 - normal
440 int wire_inside_mat_type_in,
441 int wire_outside_mat_type_in,
442 int is_nuca, // 0 - UCA, 1 - NUCA
443 int core_count,
444 int cache_level, // 0 - L2, 1 - L3
445 int nuca_bank_count,
446 int nuca_obj_func_delay,
447 int nuca_obj_func_dynamic_power,
448 int nuca_obj_func_leakage_power,
449 int nuca_obj_func_area,
450 int nuca_obj_func_cycle_time,
451 int nuca_dev_func_delay,
452 int nuca_dev_func_dynamic_power,
453 int nuca_dev_func_leakage_power,
454 int nuca_dev_func_area,
455 int nuca_dev_func_cycle_time,
456 int REPEATERS_IN_HTREE_SEGMENTS_in,//TODO for now only wires with repeaters are supported
457 int p_input);
458
459 class mem_array {
460 public:
461 int Ndcm;
462 int Ndwl;
463 int Ndbl;
464 double Nspd;
465 int deg_bl_muxing;
466 int Ndsam_lev_1;
467 int Ndsam_lev_2;
468 double access_time;
469 double cycle_time;
470 double multisubbank_interleave_cycle_time;
471 double area_ram_cells;
472 double area;
473 powerDef power;
474 double delay_senseamp_mux_decoder;
475 double delay_before_subarray_output_driver;
476 double delay_from_subarray_output_driver_to_output;
477 double height;
478 double width;
479
480 double mat_height;
481 double mat_length;
482 double subarray_length;
483 double subarray_height;
484
485 double delay_route_to_bank,
486 delay_input_htree,
487 delay_row_predecode_driver_and_block,
488 delay_row_decoder,
489 delay_bitlines,
490 delay_sense_amp,
491 delay_subarray_output_driver,
492 delay_dout_htree,
493 delay_comparator,
494 delay_matchlines;
495
496 double all_banks_height,
497 all_banks_width,
498 area_efficiency;
499
500 powerDef power_routing_to_bank;
501 powerDef power_addr_input_htree;
502 powerDef power_data_input_htree;
503 powerDef power_data_output_htree;
504 powerDef power_htree_in_search;
505 powerDef power_htree_out_search;
506 powerDef power_row_predecoder_drivers;
507 powerDef power_row_predecoder_blocks;
508 powerDef power_row_decoders;
509 powerDef power_bit_mux_predecoder_drivers;
510 powerDef power_bit_mux_predecoder_blocks;
511 powerDef power_bit_mux_decoders;
512 powerDef power_senseamp_mux_lev_1_predecoder_drivers;
513 powerDef power_senseamp_mux_lev_1_predecoder_blocks;
514 powerDef power_senseamp_mux_lev_1_decoders;
515 powerDef power_senseamp_mux_lev_2_predecoder_drivers;
516 powerDef power_senseamp_mux_lev_2_predecoder_blocks;
517 powerDef power_senseamp_mux_lev_2_decoders;
518 powerDef power_bitlines;
519 powerDef power_sense_amps;
520 powerDef power_prechg_eq_drivers;
521 powerDef power_output_drivers_at_subarray;
522 powerDef power_dataout_vertical_htree;
523 powerDef power_comparators;
524
525 powerDef power_cam_bitline_precharge_eq_drv;
526 powerDef power_searchline;
527 powerDef power_searchline_precharge;
528 powerDef power_matchlines;
529 powerDef power_matchline_precharge;
530 powerDef power_matchline_to_wordline_drv;
531
532 min_values_t *arr_min;
533 enum Wire_type wt;
534
535 // dram stats
536 double activate_energy, read_energy, write_energy, precharge_energy,
537 refresh_power, leak_power_subbank_closed_page, leak_power_subbank_open_page,
538 leak_power_request_and_reply_networks;
539
540 double precharge_delay;
541
542 static bool lt(const mem_array * m1, const mem_array * m2);
543 };
544
545
546 #endif