src/mem/ruby/config/rubyconfig.defaults

   1 //
   2 // This file has been modified by Kevin Moore and Dan Nussbaum of the
   3 // Scalable Systems Research Group at Sun Microsystems Laboratories
   4 // (http://research.sun.com/scalable/) to support the Adaptive
   5 // Transactional Memory Test Platform (ATMTP).  For information about
   6 // ATMTP, see the GEMS website: http://www.cs.wisc.edu/gems/.
   7 //
   8 // Please send email to atmtp-interest@sun.com with feedback, questions, or
   9 // to request future announcements about ATMTP.
  10 //
  11 // ----------------------------------------------------------------------
  12 //
  13 // File modification date: 2008-02-23
  14 //
  15 // ----------------------------------------------------------------------
  16 //
  17 // ATMTP is distributed as part of the GEMS software toolset and is
  18 // available for use and modification under the terms of version 2 of the
  19 // GNU General Public License.  The GNU General Public License is contained
  20 // in the file $GEMS/LICENSE.
  21 //
  22 // Multifacet GEMS is free software; you can redistribute it and/or modify
  23 // it under the terms of version 2 of the GNU General Public License as
  24 // published by the Free Software Foundation.
  25 //
  26 // Multifacet GEMS is distributed in the hope that it will be useful, but
  27 // WITHOUT ANY WARRANTY; without even the implied warranty of
  28 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  29 // General Public License for more details.
  30 //
  31 // You should have received a copy of the GNU General Public License along
  32 // with the Multifacet GEMS; if not, write to the Free Software Foundation,
  33 // Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
  34 //
  35 // ----------------------------------------------------------------------
  36 //
  37
  38 g_RANDOM_SEED: 1
  39
  40 g_DEADLOCK_THRESHOLD: 500000
  41
  42 // determines how many Simics cycles advance for every Ruby cycle
  43 //  (does not apply when running Opal)
  44 SIMICS_RUBY_MULTIPLIER: 4
  45
  46 // Ruby cycles between when a sequencer issues a request and it arrives at
  47 // the L1 cache controller
  48 //
  49 // ** important ** this parameter determines the L2 hit latency when
  50 //  using the SMP protocols with a combined L1/L2 controller (-cache.sm)
  51 //
  52 SEQUENCER_TO_CONTROLLER_LATENCY: 4
  53
  54
  55 // When set to false, the L1 cache structures are probed for a hit in Sequencer.C
  56 //  If a request hits, it is *not* issued to the cache controller
  57 // When set to true, all processor data requests issue to cache controller
  58 //
  59 // ** important ** this parameter must be set to false for proper L1/L2 hit timing
  60 //  for the SMP protocols with combined L1/L2 controllers (-cache.sm)
  61 //
  62 REMOVE_SINGLE_CYCLE_DCACHE_FAST_PATH: false
  63
  64
  65 // When running with Opal in SMT configurations, this indicates the number of threads per physical processor
  66 g_NUM_SMT_THREADS: 1
  67
  68
  69 // Maximum number of requests (including SW prefetches) outstanding from
  70 // the sequencer (Note: this also include items buffered in the store
  71 // buffer)
  72 g_SEQUENCER_OUTSTANDING_REQUESTS: 16
  73
  74
  75 PROTOCOL_DEBUG_TRACE: true
  76 DEBUG_FILTER_STRING: none
  77 DEBUG_VERBOSITY_STRING: none
  78 DEBUG_START_TIME: 0
  79 DEBUG_OUTPUT_FILENAME: none
  80
  81
  82 TRANSACTION_TRACE_ENABLED: false
  83 USER_MODE_DATA_ONLY: false
  84 PROFILE_HOT_LINES: false
  85
  86 PROFILE_ALL_INSTRUCTIONS: false
  87 PRINT_INSTRUCTION_TRACE: false
  88 g_DEBUG_CYCLE: 0
  89 BLOCK_STC: false
  90 PERFECT_MEMORY_SYSTEM: false
  91 PERFECT_MEMORY_SYSTEM_LATENCY: 0
  92 DATA_BLOCK: false
  93
  94
  95 // *********************************************
  96 // CACHE & MEMORY PARAMETERS
  97 // *********************************************
  98
  99
 100 L1_CACHE_ASSOC: 4
 101 L1_CACHE_NUM_SETS_BITS: 8
 102 L2_CACHE_ASSOC: 4
 103 L2_CACHE_NUM_SETS_BITS: 16
 104
 105 // 32 bits = 4 GB address space
 106 g_MEMORY_SIZE_BYTES: 1073741824 //4294967296
 107 g_DATA_BLOCK_BYTES: 64
 108 g_PAGE_SIZE_BYTES: 4096
 109 g_REPLACEMENT_POLICY: PSEDUO_LRU // currently, only other option is LRU
 110
 111 g_PROCS_PER_CHIP: 1
 112
 113
 114 // set automatically
 115 g_NUM_PROCESSORS: 0
 116 g_NUM_L2_BANKS: 0
 117 g_NUM_MEMORIES: 0
 118
 119 // The following group of parameters are calculated.  They must
 120 // _always_ be left at zero.
 121 g_NUM_CHIPS: 0
 122 g_NUM_CHIP_BITS: 0
 123 g_MEMORY_SIZE_BITS: 0
 124 g_DATA_BLOCK_BITS: 0
 125 g_PAGE_SIZE_BITS: 0
 126 g_NUM_PROCESSORS_BITS: 0
 127 g_PROCS_PER_CHIP_BITS: 0
 128 g_NUM_L2_BANKS_BITS: 0
 129 g_NUM_L2_BANKS_PER_CHIP: 0
 130 g_NUM_L2_BANKS_PER_CHIP_BITS: 0
 131 g_NUM_MEMORIES_BITS: 0
 132 g_NUM_MEMORIES_PER_CHIP: 0
 133 g_MEMORY_MODULE_BITS: 0
 134 g_MEMORY_MODULE_BLOCKS: 0
 135
 136
 137 // For certain CMP protocols, determines whether the lowest bits of a block address
 138 // are used to index to a L2 cache bank or into the sets of a
 139 // single bank
 140 //        lowest                                                             highest
 141 // true:   g_DATA_BLOCK_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS | L2_CACHE_NUM_SETS_BITS
 142 // false:  g_DATA_BLOCK_BITS | L2_CACHE_NUM_SETS_BITS | g_NUM_L2_BANKS_PER_CHIP_BITS
 143 MAP_L2BANKS_TO_LOWEST_BITS: false
 144
 145
 146
 147 // TIMING PARAMETERS  -- many of these are protocol specific.  See SLICC files
 148 //                       to determine where they apply
 149
 150 MEMORY_RESPONSE_LATENCY_MINUS_2: 158  // determines memory response latency
 151 DIRECTORY_CACHE_LATENCY: 6
 152 NULL_LATENCY: 1
 153 ISSUE_LATENCY: 2
 154 CACHE_RESPONSE_LATENCY: 12
 155 L1_RESPONSE_LATENCY: 3
 156 L2_RESPONSE_LATENCY: 6
 157 L2_TAG_LATENCY: 6
 158 DIRECTORY_LATENCY: 80
 159 NETWORK_LINK_LATENCY: 1
 160 COPY_HEAD_LATENCY: 4
 161 ON_CHIP_LINK_LATENCY: 1
 162 RECYCLE_LATENCY: 10
 163 L2_RECYCLE_LATENCY: 5
 164 TIMER_LATENCY: 10000
 165 TBE_RESPONSE_LATENCY: 1
 166 PERIODIC_TIMER_WAKEUPS: true
 167
 168
 169 // constants used by CMP protocols
 170 // cache bank access times
 171 L1_REQUEST_LATENCY: 2
 172 L2_REQUEST_LATENCY: 4
 173
 174
 175 // Number of transitions each controller state machines can complete per cycle
 176 // i.e. the number of ports to each controller
 177 // L1cache is the sum of the L1I and L1D cache ports
 178 L1CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32
 179 // Note: if SINGLE_ACCESS_L2_BANKS is enabled, this will probably enforce a
 180 // much greater constraint on the concurrency of a L2 cache bank
 181 L2CACHE_TRANSITIONS_PER_RUBY_CYCLE: 32
 182 DIRECTORY_TRANSITIONS_PER_RUBY_CYCLE: 32
 183 DMA_TRANSITIONS_PER_RUBY_CYCLE: 1
 184
 185
 186 // Number of TBEs available for demand misses, ALL prefetches, and replacements
 187 // used by one-level protocols
 188 NUMBER_OF_TBES: 128
 189 // two-level protocols
 190 NUMBER_OF_L1_TBES: 32
 191 NUMBER_OF_L2_TBES: 32
 192
 193 // ** INTERCONECT PARAMETERS **
 194 //
 195 g_PRINT_TOPOLOGY: true
 196 g_NETWORK_TOPOLOGY: HIERARCHICAL_SWITCH
 197 g_CACHE_DESIGN: NUCA  // specifies file prefix for FILE_SPECIFIED topology
 198 FAN_OUT_DEGREE: 4  // for HIERARCHICAL SWITCH topology
 199
 200 g_adaptive_routing: true
 201 NUMBER_OF_VIRTUAL_NETWORKS: 6
 202
 203 // bandwidth unit is 1/1000 byte per cycle.  the following parameter is multiplied by
 204 //  topology specific link weights
 205 g_endpoint_bandwidth: 10000
 206
 207
 208 // ** finite buffering parameters
 209 //
 210 // note: Finite buffering allows us to simulate a realistic virtual cut-through
 211 // routed network with idealized flow control.  this feature is NOT heavily tested
 212 FINITE_BUFFERING: false
 213 // All message buffers within the network (i.e. the switch's input and
 214 // output buffers) are set to the size specified below by the FINITE_BUFFER_SIZE
 215 FINITE_BUFFER_SIZE: 3
 216 // g_SEQUENCER_OUTSTANDING_REQUESTS (above) controlls the number of demand requests
 217 // issued by the sequencer.  The PROCESSOR_BUFFER_SIZE controlls the
 218 // number of requests in the mandatory queue
 219 // Only effects the simualtion when FINITE_BUFFERING is enabled
 220 PROCESSOR_BUFFER_SIZE: 10
 221 // The PROTOCOL_BUFFER_SIZE limits the size of all other buffers connecting to
 222 // Controllers.  Controlls the number of request issued by the L2 HW Prefetcher
 223 PROTOCOL_BUFFER_SIZE: 32
 224 // ** end finite buffering parameters
 225
 226
 227 // (deprecated)
 228 // Allows on a single accesses to a multi-cycle L2 bank.
 229 // Ensures the cache array is only accessed once for every L2_REQUEST_LATENCY
 230 // number of cycles.  However the TBE table can be accessed in parallel.
 231 SINGLE_ACCESS_L2_BANKS: true
 232
 233
 234 // MOESI_CMP_token parameters (some might be deprecated)
 235 g_FILTERING_ENABLED: false
 236 g_DISTRIBUTED_PERSISTENT_ENABLED: true
 237 g_RETRY_THRESHOLD: 1
 238 g_DYNAMIC_TIMEOUT_ENABLED: true
 239 g_FIXED_TIMEOUT_LATENCY: 300
 240
 241
 242 // tester parameters (overridden by testerconfig.defaults)
 243 //
 244 //  injects random message delays to excite protocol races
 245 RANDOMIZATION: false
 246 g_SYNTHETIC_DRIVER: false
 247 g_DETERMINISTIC_DRIVER: false
 248 g_trace_warmup_length: 1000000
 249 g_bash_bandwidth_adaptive_threshold: 0.75
 250
 251 g_tester_length: 0
 252 // # of synthetic locks == 16 * 128
 253 g_synthetic_locks: 2048
 254 g_deterministic_addrs: 1
 255 g_SpecifiedGenerator: DetermInvGenerator
 256 g_callback_counter: 0
 257 g_NUM_COMPLETIONS_BEFORE_PASS: 0
 258 // parameters used by locking synthetic tester
 259 g_think_time: 5
 260 g_hold_time:  5
 261 g_wait_time:  5
 262
 263 // Princeton Network (Garnet)
 264 g_GARNET_NETWORK: true
 265 g_DETAIL_NETWORK: false
 266 g_NETWORK_TESTING: false
 267 g_FLIT_SIZE: 16
 268 g_NUM_PIPE_STAGES: 4
 269 g_VCS_PER_CLASS: 4
 270 g_BUFFER_SIZE: 4
 271
 272 ///////////////////////////////////////////////////////////////////////////////
 273 //
 274 // MemoryControl:
 275
 276 // Basic cycle time of the memory controller.  This defines the period which is
 277 // used as the memory channel clock period, the address bus bit time, and the
 278 // memory controller cycle time.
 279 // Assuming a 200 MHz memory channel (DDR-400, which has 400 bits/sec data),
 280 // and a 2 GHz Ruby clock:
 281 MEM_BUS_CYCLE_MULTIPLIER: 10
 282
 283 // How many internal banks in each DRAM chip:
 284 BANKS_PER_RANK: 8
 285
 286 // How many sets of DRAM chips per DIMM.
 287 RANKS_PER_DIMM: 2
 288
 289 // How many DIMMs per channel.  (Currently the only thing that
 290 // matters is the number of ranks per channel, i.e. the product
 291 // of this parameter and RANKS_PER_DIMM.  But if and when this is
 292 // expanded to do FB-DIMMs, the distinction between the two
 293 // will matter.)
 294 DIMMS_PER_CHANNEL: 2
 295
 296 // Which bits to use to find the bank, rank, and DIMM numbers.
 297 // You could choose to have the bank bits, rank bits, and DIMM bits
 298 // in any order; here they are in that order.
 299 // For these defaults, we assume this format for addresses:
 300 //    Offset within line:     [5:0]
 301 //    Memory controller #:    [7:6]
 302 //    Bank:                  [10:8]
 303 //    Rank:                    [11]
 304 //    DIMM:                    [12]
 305 //    Row addr / Col addr: [top:13]
 306 // If you get these bits wrong, then some banks won't see any
 307 // requests; you need to check for this in the .stats output.
 308 BANK_BIT_0: 8
 309 RANK_BIT_0: 11
 310 DIMM_BIT_0: 12
 311
 312 // Number of entries max in each bank queues; set to whatever you want.
 313 // If it is too small, you will see in the .stats file a lot of delay
 314 // time spent in the common input queue.
 315 BANK_QUEUE_SIZE: 12
 316
 317 // Bank cycle time (tRC) measured in memory cycles:
 318 BANK_BUSY_TIME: 11
 319
 320 // This is how many memory address cycles to delay between reads to
 321 // different ranks of DRAMs to allow for clock skew:
 322 RANK_RANK_DELAY: 1
 323
 324 // This is how many memory address cycles to delay between a read
 325 // and a write.  This is based on two things:  (1) the data bus is
 326 // used one cycle earlier in the operation; (2) a round-trip wire
 327 // delay from the controller to the DIMM that did the reading.
 328 READ_WRITE_DELAY: 2
 329
 330 // Basic address and data bus occupancy.  If you are assuming a
 331 // 16-byte-wide data bus (pairs of DIMMs side-by-side), then
 332 // the data bus occupancy matches the address bus occupancy at
 333 // two cycles.  But if the channel is only 8 bytes wide, you
 334 // need to increase this bus occupancy time to 4 cycles.
 335 BASIC_BUS_BUSY_TIME: 2
 336
 337 // Latency to returning read request or writeback acknowledgement.
 338 // Measured in memory address cycles.
 339 // This equals tRCD + CL + AL + (four bit times)
 340 //                            + (round trip on channel)
 341 //                            + (memory control internal delays)
 342 // It's going to be an approximation, so pick what you like.
 343 // Note:  The fact that latency is a constant, and does not depend on two
 344 // low-order address bits, implies that our memory controller either:
 345 // (a) tells the DRAM to read the critical word first, and sends the
 346 // critical word first back to the CPU, or (b) waits until it has
 347 // seen all four bit times on the data wires before sending anything
 348 // back.  Either is plausible.  If (a), remove the "four bit times"
 349 // term from the calculation above.
 350 MEM_CTL_LATENCY: 12
 351
 352 // refresh_period is the number of memory cycles between refresh
 353 // of row x in bank n and refresh of row x+1 in bank n.  For DDR-400,
 354 // this is typically 7.8 usec for commercial systems; after 8192 such
 355 // refreshes, this will have refreshed the whole chip in 64 msec.  If
 356 // we have a 5 nsec memory clock, 7800 / 5 = 1560 cycles.  The memory
 357 // controller will divide this by the total number of banks, and kick
 358 // off a refresh to *somebody* every time that amount is counted
 359 // down to zero. (There will be some rounding error there, but it
 360 // should have minimal effect.)
 361 REFRESH_PERIOD: 1560
 362
 363 // tFAW is a DRAM chip parameter which restricts the number of
 364 // activates that can be done within a certain window of time.
 365 // The window is specified here in terms of number of memory
 366 // controller cycles.  At most four activates may be done during
 367 // any such sliding window.  If this number is set to be no more
 368 // than 4 * BASIC_BUS_BUSY_TIME, it will have no effect.
 369 // It is typical in real systems for tFAW to have no effect, but
 370 // it may be useful in throttling power.  Set to zero to ignore.
 371 TFAW: 0
 372
 373 // By default, the memory controller uses round-robin to arbitrate
 374 // between ready bank queues for use of the address bus.  If you
 375 // wish to add randomness to the system, set this parameter to
 376 // one instead, and it will restart the round-robin pointer at a
 377 // random bank number each cycle.  If you want additional
 378 // nondeterminism, set the parameter to some integer n >= 2, and
 379 // it will in addition add a n% chance each cycle that a ready bank
 380 // will be delayed an additional cycle.  Note that if you are
 381 // in MEM_FIXED_DELAY mode (see below), MEM_RANDOM_ARBITRATE=1 will
 382 // have no effect, but MEM_RANDOM_ARBITRATE=2 or more will.
 383 MEM_RANDOM_ARBITRATE: 0
 384
 385 // The following parameter, if nonzero, will disable the memory
 386 // controller and instead give every request a fixed latency.  The
 387 // nonzero value specified here is measured in memory cycles and is
 388 // just added to MEM_CTL_LATENCY.  It will also show up in the stats
 389 // file as a contributor to memory_delays_stalled_at_head_of_bank_queue.
 390 MEM_FIXED_DELAY: 0
 391
 392 // If instead of DDR-400, you wanted DDR-800, the channel gets faster
 393 // but the basic operation of the DRAM core is unchanged.
 394 // Busy times appear to double just because they are measured
 395 // in smaller clock cycles.  The performance advantage comes because
 396 // the bus busy times don't actually quite double.
 397 // You would use something like these values:
 398 //
 399 // MEM_BUS_CYCLE_MULTIPLIER: 5
 400 // BANK_BUSY_TIME: 22
 401 // RANK_RANK_DELAY: 2
 402 // READ_WRITE_DELAY: 3
 403 // BASIC_BUS_BUSY_TIME: 3
 404 // MEM_CTL_LATENCY: 20
 405 // REFRESH_PERIOD: 3120